1 /* 2 * linux/fs/ext4/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19 #include <linux/module.h> 20 #include <linux/string.h> 21 #include <linux/fs.h> 22 #include <linux/time.h> 23 #include <linux/jbd2.h> 24 #include <linux/ext4_fs.h> 25 #include <linux/ext4_jbd2.h> 26 #include <linux/slab.h> 27 #include <linux/init.h> 28 #include <linux/blkdev.h> 29 #include <linux/parser.h> 30 #include <linux/smp_lock.h> 31 #include <linux/buffer_head.h> 32 #include <linux/vfs.h> 33 #include <linux/random.h> 34 #include <linux/mount.h> 35 #include <linux/namei.h> 36 #include <linux/quotaops.h> 37 #include <linux/seq_file.h> 38 39 #include <asm/uaccess.h> 40 41 #include "xattr.h" 42 #include "acl.h" 43 #include "namei.h" 44 45 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 46 unsigned long journal_devnum); 47 static int ext4_create_journal(struct super_block *, struct ext4_super_block *, 48 unsigned int); 49 static void ext4_commit_super (struct super_block * sb, 50 struct ext4_super_block * es, 51 int sync); 52 static void ext4_mark_recovery_complete(struct super_block * sb, 53 struct ext4_super_block * es); 54 static void ext4_clear_journal_err(struct super_block * sb, 55 struct ext4_super_block * es); 56 static int ext4_sync_fs(struct super_block *sb, int wait); 57 static const char *ext4_decode_error(struct super_block * sb, int errno, 58 char nbuf[16]); 59 static int ext4_remount (struct super_block * sb, int * flags, char * data); 60 static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf); 61 static void ext4_unlockfs(struct super_block *sb); 62 static void ext4_write_super (struct super_block * sb); 63 static void ext4_write_super_lockfs(struct super_block *sb); 64 65 66 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 67 struct ext4_group_desc *bg) 68 { 69 return le32_to_cpu(bg->bg_block_bitmap) | 70 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 71 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 72 } 73 74 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 75 struct ext4_group_desc *bg) 76 { 77 return le32_to_cpu(bg->bg_inode_bitmap) | 78 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 79 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 80 } 81 82 ext4_fsblk_t ext4_inode_table(struct super_block *sb, 83 struct ext4_group_desc *bg) 84 { 85 return le32_to_cpu(bg->bg_inode_table) | 86 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 87 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 88 } 89 90 void ext4_block_bitmap_set(struct super_block *sb, 91 struct ext4_group_desc *bg, ext4_fsblk_t blk) 92 { 93 bg->bg_block_bitmap = cpu_to_le32((u32)blk); 94 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 95 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 96 } 97 98 void ext4_inode_bitmap_set(struct super_block *sb, 99 struct ext4_group_desc *bg, ext4_fsblk_t blk) 100 { 101 bg->bg_inode_bitmap = cpu_to_le32((u32)blk); 102 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 103 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 104 } 105 106 void ext4_inode_table_set(struct super_block *sb, 107 struct ext4_group_desc *bg, ext4_fsblk_t blk) 108 { 109 bg->bg_inode_table = cpu_to_le32((u32)blk); 110 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 111 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 112 } 113 114 /* 115 * Wrappers for jbd2_journal_start/end. 116 * 117 * The only special thing we need to do here is to make sure that all 118 * journal_end calls result in the superblock being marked dirty, so 119 * that sync() will call the filesystem's write_super callback if 120 * appropriate. 121 */ 122 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 123 { 124 journal_t *journal; 125 126 if (sb->s_flags & MS_RDONLY) 127 return ERR_PTR(-EROFS); 128 129 /* Special case here: if the journal has aborted behind our 130 * backs (eg. EIO in the commit thread), then we still need to 131 * take the FS itself readonly cleanly. */ 132 journal = EXT4_SB(sb)->s_journal; 133 if (is_journal_aborted(journal)) { 134 ext4_abort(sb, __FUNCTION__, 135 "Detected aborted journal"); 136 return ERR_PTR(-EROFS); 137 } 138 139 return jbd2_journal_start(journal, nblocks); 140 } 141 142 /* 143 * The only special thing we need to do here is to make sure that all 144 * jbd2_journal_stop calls result in the superblock being marked dirty, so 145 * that sync() will call the filesystem's write_super callback if 146 * appropriate. 147 */ 148 int __ext4_journal_stop(const char *where, handle_t *handle) 149 { 150 struct super_block *sb; 151 int err; 152 int rc; 153 154 sb = handle->h_transaction->t_journal->j_private; 155 err = handle->h_err; 156 rc = jbd2_journal_stop(handle); 157 158 if (!err) 159 err = rc; 160 if (err) 161 __ext4_std_error(sb, where, err); 162 return err; 163 } 164 165 void ext4_journal_abort_handle(const char *caller, const char *err_fn, 166 struct buffer_head *bh, handle_t *handle, int err) 167 { 168 char nbuf[16]; 169 const char *errstr = ext4_decode_error(NULL, err, nbuf); 170 171 if (bh) 172 BUFFER_TRACE(bh, "abort"); 173 174 if (!handle->h_err) 175 handle->h_err = err; 176 177 if (is_handle_aborted(handle)) 178 return; 179 180 printk(KERN_ERR "%s: aborting transaction: %s in %s\n", 181 caller, errstr, err_fn); 182 183 jbd2_journal_abort_handle(handle); 184 } 185 186 /* Deal with the reporting of failure conditions on a filesystem such as 187 * inconsistencies detected or read IO failures. 188 * 189 * On ext2, we can store the error state of the filesystem in the 190 * superblock. That is not possible on ext4, because we may have other 191 * write ordering constraints on the superblock which prevent us from 192 * writing it out straight away; and given that the journal is about to 193 * be aborted, we can't rely on the current, or future, transactions to 194 * write out the superblock safely. 195 * 196 * We'll just use the jbd2_journal_abort() error code to record an error in 197 * the journal instead. On recovery, the journal will compain about 198 * that error until we've noted it down and cleared it. 199 */ 200 201 static void ext4_handle_error(struct super_block *sb) 202 { 203 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 204 205 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 206 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 207 208 if (sb->s_flags & MS_RDONLY) 209 return; 210 211 if (!test_opt (sb, ERRORS_CONT)) { 212 journal_t *journal = EXT4_SB(sb)->s_journal; 213 214 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 215 if (journal) 216 jbd2_journal_abort(journal, -EIO); 217 } 218 if (test_opt (sb, ERRORS_RO)) { 219 printk (KERN_CRIT "Remounting filesystem read-only\n"); 220 sb->s_flags |= MS_RDONLY; 221 } 222 ext4_commit_super(sb, es, 1); 223 if (test_opt(sb, ERRORS_PANIC)) 224 panic("EXT4-fs (device %s): panic forced after error\n", 225 sb->s_id); 226 } 227 228 void ext4_error (struct super_block * sb, const char * function, 229 const char * fmt, ...) 230 { 231 va_list args; 232 233 va_start(args, fmt); 234 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); 235 vprintk(fmt, args); 236 printk("\n"); 237 va_end(args); 238 239 ext4_handle_error(sb); 240 } 241 242 static const char *ext4_decode_error(struct super_block * sb, int errno, 243 char nbuf[16]) 244 { 245 char *errstr = NULL; 246 247 switch (errno) { 248 case -EIO: 249 errstr = "IO failure"; 250 break; 251 case -ENOMEM: 252 errstr = "Out of memory"; 253 break; 254 case -EROFS: 255 if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT) 256 errstr = "Journal has aborted"; 257 else 258 errstr = "Readonly filesystem"; 259 break; 260 default: 261 /* If the caller passed in an extra buffer for unknown 262 * errors, textualise them now. Else we just return 263 * NULL. */ 264 if (nbuf) { 265 /* Check for truncated error codes... */ 266 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 267 errstr = nbuf; 268 } 269 break; 270 } 271 272 return errstr; 273 } 274 275 /* __ext4_std_error decodes expected errors from journaling functions 276 * automatically and invokes the appropriate error response. */ 277 278 void __ext4_std_error (struct super_block * sb, const char * function, 279 int errno) 280 { 281 char nbuf[16]; 282 const char *errstr; 283 284 /* Special case: if the error is EROFS, and we're not already 285 * inside a transaction, then there's really no point in logging 286 * an error. */ 287 if (errno == -EROFS && journal_current_handle() == NULL && 288 (sb->s_flags & MS_RDONLY)) 289 return; 290 291 errstr = ext4_decode_error(sb, errno, nbuf); 292 printk (KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", 293 sb->s_id, function, errstr); 294 295 ext4_handle_error(sb); 296 } 297 298 /* 299 * ext4_abort is a much stronger failure handler than ext4_error. The 300 * abort function may be used to deal with unrecoverable failures such 301 * as journal IO errors or ENOMEM at a critical moment in log management. 302 * 303 * We unconditionally force the filesystem into an ABORT|READONLY state, 304 * unless the error response on the fs has been set to panic in which 305 * case we take the easy way out and panic immediately. 306 */ 307 308 void ext4_abort (struct super_block * sb, const char * function, 309 const char * fmt, ...) 310 { 311 va_list args; 312 313 printk (KERN_CRIT "ext4_abort called.\n"); 314 315 va_start(args, fmt); 316 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); 317 vprintk(fmt, args); 318 printk("\n"); 319 va_end(args); 320 321 if (test_opt(sb, ERRORS_PANIC)) 322 panic("EXT4-fs panic from previous error\n"); 323 324 if (sb->s_flags & MS_RDONLY) 325 return; 326 327 printk(KERN_CRIT "Remounting filesystem read-only\n"); 328 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 329 sb->s_flags |= MS_RDONLY; 330 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 331 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 332 } 333 334 void ext4_warning (struct super_block * sb, const char * function, 335 const char * fmt, ...) 336 { 337 va_list args; 338 339 va_start(args, fmt); 340 printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ", 341 sb->s_id, function); 342 vprintk(fmt, args); 343 printk("\n"); 344 va_end(args); 345 } 346 347 void ext4_update_dynamic_rev(struct super_block *sb) 348 { 349 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 350 351 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 352 return; 353 354 ext4_warning(sb, __FUNCTION__, 355 "updating to rev %d because of new feature flag, " 356 "running e2fsck is recommended", 357 EXT4_DYNAMIC_REV); 358 359 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); 360 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); 361 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); 362 /* leave es->s_feature_*compat flags alone */ 363 /* es->s_uuid will be set by e2fsck if empty */ 364 365 /* 366 * The rest of the superblock fields should be zero, and if not it 367 * means they are likely already in use, so leave them alone. We 368 * can leave it up to e2fsck to clean up any inconsistencies there. 369 */ 370 } 371 372 /* 373 * Open the external journal device 374 */ 375 static struct block_device *ext4_blkdev_get(dev_t dev) 376 { 377 struct block_device *bdev; 378 char b[BDEVNAME_SIZE]; 379 380 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 381 if (IS_ERR(bdev)) 382 goto fail; 383 return bdev; 384 385 fail: 386 printk(KERN_ERR "EXT4: failed to open journal device %s: %ld\n", 387 __bdevname(dev, b), PTR_ERR(bdev)); 388 return NULL; 389 } 390 391 /* 392 * Release the journal device 393 */ 394 static int ext4_blkdev_put(struct block_device *bdev) 395 { 396 bd_release(bdev); 397 return blkdev_put(bdev); 398 } 399 400 static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 401 { 402 struct block_device *bdev; 403 int ret = -ENODEV; 404 405 bdev = sbi->journal_bdev; 406 if (bdev) { 407 ret = ext4_blkdev_put(bdev); 408 sbi->journal_bdev = NULL; 409 } 410 return ret; 411 } 412 413 static inline struct inode *orphan_list_entry(struct list_head *l) 414 { 415 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; 416 } 417 418 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) 419 { 420 struct list_head *l; 421 422 printk(KERN_ERR "sb orphan head is %d\n", 423 le32_to_cpu(sbi->s_es->s_last_orphan)); 424 425 printk(KERN_ERR "sb_info orphan list:\n"); 426 list_for_each(l, &sbi->s_orphan) { 427 struct inode *inode = orphan_list_entry(l); 428 printk(KERN_ERR " " 429 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 430 inode->i_sb->s_id, inode->i_ino, inode, 431 inode->i_mode, inode->i_nlink, 432 NEXT_ORPHAN(inode)); 433 } 434 } 435 436 static void ext4_put_super (struct super_block * sb) 437 { 438 struct ext4_sb_info *sbi = EXT4_SB(sb); 439 struct ext4_super_block *es = sbi->s_es; 440 int i; 441 442 ext4_ext_release(sb); 443 ext4_xattr_put_super(sb); 444 jbd2_journal_destroy(sbi->s_journal); 445 if (!(sb->s_flags & MS_RDONLY)) { 446 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 447 es->s_state = cpu_to_le16(sbi->s_mount_state); 448 BUFFER_TRACE(sbi->s_sbh, "marking dirty"); 449 mark_buffer_dirty(sbi->s_sbh); 450 ext4_commit_super(sb, es, 1); 451 } 452 453 for (i = 0; i < sbi->s_gdb_count; i++) 454 brelse(sbi->s_group_desc[i]); 455 kfree(sbi->s_group_desc); 456 percpu_counter_destroy(&sbi->s_freeblocks_counter); 457 percpu_counter_destroy(&sbi->s_freeinodes_counter); 458 percpu_counter_destroy(&sbi->s_dirs_counter); 459 brelse(sbi->s_sbh); 460 #ifdef CONFIG_QUOTA 461 for (i = 0; i < MAXQUOTAS; i++) 462 kfree(sbi->s_qf_names[i]); 463 #endif 464 465 /* Debugging code just in case the in-memory inode orphan list 466 * isn't empty. The on-disk one can be non-empty if we've 467 * detected an error and taken the fs readonly, but the 468 * in-memory list had better be clean by this point. */ 469 if (!list_empty(&sbi->s_orphan)) 470 dump_orphan_list(sb, sbi); 471 J_ASSERT(list_empty(&sbi->s_orphan)); 472 473 invalidate_bdev(sb->s_bdev); 474 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 475 /* 476 * Invalidate the journal device's buffers. We don't want them 477 * floating about in memory - the physical journal device may 478 * hotswapped, and it breaks the `ro-after' testing code. 479 */ 480 sync_blockdev(sbi->journal_bdev); 481 invalidate_bdev(sbi->journal_bdev); 482 ext4_blkdev_remove(sbi); 483 } 484 sb->s_fs_info = NULL; 485 kfree(sbi); 486 return; 487 } 488 489 static struct kmem_cache *ext4_inode_cachep; 490 491 /* 492 * Called inside transaction, so use GFP_NOFS 493 */ 494 static struct inode *ext4_alloc_inode(struct super_block *sb) 495 { 496 struct ext4_inode_info *ei; 497 498 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 499 if (!ei) 500 return NULL; 501 #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 502 ei->i_acl = EXT4_ACL_NOT_CACHED; 503 ei->i_default_acl = EXT4_ACL_NOT_CACHED; 504 #endif 505 ei->i_block_alloc_info = NULL; 506 ei->vfs_inode.i_version = 1; 507 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 508 return &ei->vfs_inode; 509 } 510 511 static void ext4_destroy_inode(struct inode *inode) 512 { 513 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 514 } 515 516 static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) 517 { 518 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 519 520 if (flags & SLAB_CTOR_CONSTRUCTOR) { 521 INIT_LIST_HEAD(&ei->i_orphan); 522 #ifdef CONFIG_EXT4DEV_FS_XATTR 523 init_rwsem(&ei->xattr_sem); 524 #endif 525 mutex_init(&ei->truncate_mutex); 526 inode_init_once(&ei->vfs_inode); 527 } 528 } 529 530 static int init_inodecache(void) 531 { 532 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 533 sizeof(struct ext4_inode_info), 534 0, (SLAB_RECLAIM_ACCOUNT| 535 SLAB_MEM_SPREAD), 536 init_once, NULL); 537 if (ext4_inode_cachep == NULL) 538 return -ENOMEM; 539 return 0; 540 } 541 542 static void destroy_inodecache(void) 543 { 544 kmem_cache_destroy(ext4_inode_cachep); 545 } 546 547 static void ext4_clear_inode(struct inode *inode) 548 { 549 struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info; 550 #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 551 if (EXT4_I(inode)->i_acl && 552 EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { 553 posix_acl_release(EXT4_I(inode)->i_acl); 554 EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED; 555 } 556 if (EXT4_I(inode)->i_default_acl && 557 EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) { 558 posix_acl_release(EXT4_I(inode)->i_default_acl); 559 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; 560 } 561 #endif 562 ext4_discard_reservation(inode); 563 EXT4_I(inode)->i_block_alloc_info = NULL; 564 if (unlikely(rsv)) 565 kfree(rsv); 566 } 567 568 static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) 569 { 570 #if defined(CONFIG_QUOTA) 571 struct ext4_sb_info *sbi = EXT4_SB(sb); 572 573 if (sbi->s_jquota_fmt) 574 seq_printf(seq, ",jqfmt=%s", 575 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); 576 577 if (sbi->s_qf_names[USRQUOTA]) 578 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 579 580 if (sbi->s_qf_names[GRPQUOTA]) 581 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 582 583 if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) 584 seq_puts(seq, ",usrquota"); 585 586 if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) 587 seq_puts(seq, ",grpquota"); 588 #endif 589 } 590 591 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) 592 { 593 struct super_block *sb = vfs->mnt_sb; 594 595 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 596 seq_puts(seq, ",data=journal"); 597 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 598 seq_puts(seq, ",data=ordered"); 599 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 600 seq_puts(seq, ",data=writeback"); 601 602 ext4_show_quota_options(seq, sb); 603 604 return 0; 605 } 606 607 608 static struct dentry *ext4_get_dentry(struct super_block *sb, void *vobjp) 609 { 610 __u32 *objp = vobjp; 611 unsigned long ino = objp[0]; 612 __u32 generation = objp[1]; 613 struct inode *inode; 614 struct dentry *result; 615 616 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 617 return ERR_PTR(-ESTALE); 618 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 619 return ERR_PTR(-ESTALE); 620 621 /* iget isn't really right if the inode is currently unallocated!! 622 * 623 * ext4_read_inode will return a bad_inode if the inode had been 624 * deleted, so we should be safe. 625 * 626 * Currently we don't know the generation for parent directory, so 627 * a generation of 0 means "accept any" 628 */ 629 inode = iget(sb, ino); 630 if (inode == NULL) 631 return ERR_PTR(-ENOMEM); 632 if (is_bad_inode(inode) || 633 (generation && inode->i_generation != generation)) { 634 iput(inode); 635 return ERR_PTR(-ESTALE); 636 } 637 /* now to find a dentry. 638 * If possible, get a well-connected one 639 */ 640 result = d_alloc_anon(inode); 641 if (!result) { 642 iput(inode); 643 return ERR_PTR(-ENOMEM); 644 } 645 return result; 646 } 647 648 #ifdef CONFIG_QUOTA 649 #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") 650 #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 651 652 static int ext4_dquot_initialize(struct inode *inode, int type); 653 static int ext4_dquot_drop(struct inode *inode); 654 static int ext4_write_dquot(struct dquot *dquot); 655 static int ext4_acquire_dquot(struct dquot *dquot); 656 static int ext4_release_dquot(struct dquot *dquot); 657 static int ext4_mark_dquot_dirty(struct dquot *dquot); 658 static int ext4_write_info(struct super_block *sb, int type); 659 static int ext4_quota_on(struct super_block *sb, int type, int format_id, char *path); 660 static int ext4_quota_on_mount(struct super_block *sb, int type); 661 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 662 size_t len, loff_t off); 663 static ssize_t ext4_quota_write(struct super_block *sb, int type, 664 const char *data, size_t len, loff_t off); 665 666 static struct dquot_operations ext4_quota_operations = { 667 .initialize = ext4_dquot_initialize, 668 .drop = ext4_dquot_drop, 669 .alloc_space = dquot_alloc_space, 670 .alloc_inode = dquot_alloc_inode, 671 .free_space = dquot_free_space, 672 .free_inode = dquot_free_inode, 673 .transfer = dquot_transfer, 674 .write_dquot = ext4_write_dquot, 675 .acquire_dquot = ext4_acquire_dquot, 676 .release_dquot = ext4_release_dquot, 677 .mark_dirty = ext4_mark_dquot_dirty, 678 .write_info = ext4_write_info 679 }; 680 681 static struct quotactl_ops ext4_qctl_operations = { 682 .quota_on = ext4_quota_on, 683 .quota_off = vfs_quota_off, 684 .quota_sync = vfs_quota_sync, 685 .get_info = vfs_get_dqinfo, 686 .set_info = vfs_set_dqinfo, 687 .get_dqblk = vfs_get_dqblk, 688 .set_dqblk = vfs_set_dqblk 689 }; 690 #endif 691 692 static const struct super_operations ext4_sops = { 693 .alloc_inode = ext4_alloc_inode, 694 .destroy_inode = ext4_destroy_inode, 695 .read_inode = ext4_read_inode, 696 .write_inode = ext4_write_inode, 697 .dirty_inode = ext4_dirty_inode, 698 .delete_inode = ext4_delete_inode, 699 .put_super = ext4_put_super, 700 .write_super = ext4_write_super, 701 .sync_fs = ext4_sync_fs, 702 .write_super_lockfs = ext4_write_super_lockfs, 703 .unlockfs = ext4_unlockfs, 704 .statfs = ext4_statfs, 705 .remount_fs = ext4_remount, 706 .clear_inode = ext4_clear_inode, 707 .show_options = ext4_show_options, 708 #ifdef CONFIG_QUOTA 709 .quota_read = ext4_quota_read, 710 .quota_write = ext4_quota_write, 711 #endif 712 }; 713 714 static struct export_operations ext4_export_ops = { 715 .get_parent = ext4_get_parent, 716 .get_dentry = ext4_get_dentry, 717 }; 718 719 enum { 720 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 721 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 722 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 723 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 724 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 725 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 726 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 727 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 728 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 729 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 730 Opt_grpquota, Opt_extents, 731 }; 732 733 static match_table_t tokens = { 734 {Opt_bsd_df, "bsddf"}, 735 {Opt_minix_df, "minixdf"}, 736 {Opt_grpid, "grpid"}, 737 {Opt_grpid, "bsdgroups"}, 738 {Opt_nogrpid, "nogrpid"}, 739 {Opt_nogrpid, "sysvgroups"}, 740 {Opt_resgid, "resgid=%u"}, 741 {Opt_resuid, "resuid=%u"}, 742 {Opt_sb, "sb=%u"}, 743 {Opt_err_cont, "errors=continue"}, 744 {Opt_err_panic, "errors=panic"}, 745 {Opt_err_ro, "errors=remount-ro"}, 746 {Opt_nouid32, "nouid32"}, 747 {Opt_nocheck, "nocheck"}, 748 {Opt_nocheck, "check=none"}, 749 {Opt_debug, "debug"}, 750 {Opt_oldalloc, "oldalloc"}, 751 {Opt_orlov, "orlov"}, 752 {Opt_user_xattr, "user_xattr"}, 753 {Opt_nouser_xattr, "nouser_xattr"}, 754 {Opt_acl, "acl"}, 755 {Opt_noacl, "noacl"}, 756 {Opt_reservation, "reservation"}, 757 {Opt_noreservation, "noreservation"}, 758 {Opt_noload, "noload"}, 759 {Opt_nobh, "nobh"}, 760 {Opt_bh, "bh"}, 761 {Opt_commit, "commit=%u"}, 762 {Opt_journal_update, "journal=update"}, 763 {Opt_journal_inum, "journal=%u"}, 764 {Opt_journal_dev, "journal_dev=%u"}, 765 {Opt_abort, "abort"}, 766 {Opt_data_journal, "data=journal"}, 767 {Opt_data_ordered, "data=ordered"}, 768 {Opt_data_writeback, "data=writeback"}, 769 {Opt_offusrjquota, "usrjquota="}, 770 {Opt_usrjquota, "usrjquota=%s"}, 771 {Opt_offgrpjquota, "grpjquota="}, 772 {Opt_grpjquota, "grpjquota=%s"}, 773 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 774 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 775 {Opt_grpquota, "grpquota"}, 776 {Opt_noquota, "noquota"}, 777 {Opt_quota, "quota"}, 778 {Opt_usrquota, "usrquota"}, 779 {Opt_barrier, "barrier=%u"}, 780 {Opt_extents, "extents"}, 781 {Opt_err, NULL}, 782 {Opt_resize, "resize"}, 783 }; 784 785 static ext4_fsblk_t get_sb_block(void **data) 786 { 787 ext4_fsblk_t sb_block; 788 char *options = (char *) *data; 789 790 if (!options || strncmp(options, "sb=", 3) != 0) 791 return 1; /* Default location */ 792 options += 3; 793 /*todo: use simple_strtoll with >32bit ext4 */ 794 sb_block = simple_strtoul(options, &options, 0); 795 if (*options && *options != ',') { 796 printk("EXT4-fs: Invalid sb specification: %s\n", 797 (char *) *data); 798 return 1; 799 } 800 if (*options == ',') 801 options++; 802 *data = (void *) options; 803 return sb_block; 804 } 805 806 static int parse_options (char *options, struct super_block *sb, 807 unsigned int *inum, unsigned long *journal_devnum, 808 ext4_fsblk_t *n_blocks_count, int is_remount) 809 { 810 struct ext4_sb_info *sbi = EXT4_SB(sb); 811 char * p; 812 substring_t args[MAX_OPT_ARGS]; 813 int data_opt = 0; 814 int option; 815 #ifdef CONFIG_QUOTA 816 int qtype; 817 char *qname; 818 #endif 819 820 if (!options) 821 return 1; 822 823 while ((p = strsep (&options, ",")) != NULL) { 824 int token; 825 if (!*p) 826 continue; 827 828 token = match_token(p, tokens, args); 829 switch (token) { 830 case Opt_bsd_df: 831 clear_opt (sbi->s_mount_opt, MINIX_DF); 832 break; 833 case Opt_minix_df: 834 set_opt (sbi->s_mount_opt, MINIX_DF); 835 break; 836 case Opt_grpid: 837 set_opt (sbi->s_mount_opt, GRPID); 838 break; 839 case Opt_nogrpid: 840 clear_opt (sbi->s_mount_opt, GRPID); 841 break; 842 case Opt_resuid: 843 if (match_int(&args[0], &option)) 844 return 0; 845 sbi->s_resuid = option; 846 break; 847 case Opt_resgid: 848 if (match_int(&args[0], &option)) 849 return 0; 850 sbi->s_resgid = option; 851 break; 852 case Opt_sb: 853 /* handled by get_sb_block() instead of here */ 854 /* *sb_block = match_int(&args[0]); */ 855 break; 856 case Opt_err_panic: 857 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 858 clear_opt (sbi->s_mount_opt, ERRORS_RO); 859 set_opt (sbi->s_mount_opt, ERRORS_PANIC); 860 break; 861 case Opt_err_ro: 862 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 863 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 864 set_opt (sbi->s_mount_opt, ERRORS_RO); 865 break; 866 case Opt_err_cont: 867 clear_opt (sbi->s_mount_opt, ERRORS_RO); 868 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 869 set_opt (sbi->s_mount_opt, ERRORS_CONT); 870 break; 871 case Opt_nouid32: 872 set_opt (sbi->s_mount_opt, NO_UID32); 873 break; 874 case Opt_nocheck: 875 clear_opt (sbi->s_mount_opt, CHECK); 876 break; 877 case Opt_debug: 878 set_opt (sbi->s_mount_opt, DEBUG); 879 break; 880 case Opt_oldalloc: 881 set_opt (sbi->s_mount_opt, OLDALLOC); 882 break; 883 case Opt_orlov: 884 clear_opt (sbi->s_mount_opt, OLDALLOC); 885 break; 886 #ifdef CONFIG_EXT4DEV_FS_XATTR 887 case Opt_user_xattr: 888 set_opt (sbi->s_mount_opt, XATTR_USER); 889 break; 890 case Opt_nouser_xattr: 891 clear_opt (sbi->s_mount_opt, XATTR_USER); 892 break; 893 #else 894 case Opt_user_xattr: 895 case Opt_nouser_xattr: 896 printk("EXT4 (no)user_xattr options not supported\n"); 897 break; 898 #endif 899 #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 900 case Opt_acl: 901 set_opt(sbi->s_mount_opt, POSIX_ACL); 902 break; 903 case Opt_noacl: 904 clear_opt(sbi->s_mount_opt, POSIX_ACL); 905 break; 906 #else 907 case Opt_acl: 908 case Opt_noacl: 909 printk("EXT4 (no)acl options not supported\n"); 910 break; 911 #endif 912 case Opt_reservation: 913 set_opt(sbi->s_mount_opt, RESERVATION); 914 break; 915 case Opt_noreservation: 916 clear_opt(sbi->s_mount_opt, RESERVATION); 917 break; 918 case Opt_journal_update: 919 /* @@@ FIXME */ 920 /* Eventually we will want to be able to create 921 a journal file here. For now, only allow the 922 user to specify an existing inode to be the 923 journal file. */ 924 if (is_remount) { 925 printk(KERN_ERR "EXT4-fs: cannot specify " 926 "journal on remount\n"); 927 return 0; 928 } 929 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); 930 break; 931 case Opt_journal_inum: 932 if (is_remount) { 933 printk(KERN_ERR "EXT4-fs: cannot specify " 934 "journal on remount\n"); 935 return 0; 936 } 937 if (match_int(&args[0], &option)) 938 return 0; 939 *inum = option; 940 break; 941 case Opt_journal_dev: 942 if (is_remount) { 943 printk(KERN_ERR "EXT4-fs: cannot specify " 944 "journal on remount\n"); 945 return 0; 946 } 947 if (match_int(&args[0], &option)) 948 return 0; 949 *journal_devnum = option; 950 break; 951 case Opt_noload: 952 set_opt (sbi->s_mount_opt, NOLOAD); 953 break; 954 case Opt_commit: 955 if (match_int(&args[0], &option)) 956 return 0; 957 if (option < 0) 958 return 0; 959 if (option == 0) 960 option = JBD_DEFAULT_MAX_COMMIT_AGE; 961 sbi->s_commit_interval = HZ * option; 962 break; 963 case Opt_data_journal: 964 data_opt = EXT4_MOUNT_JOURNAL_DATA; 965 goto datacheck; 966 case Opt_data_ordered: 967 data_opt = EXT4_MOUNT_ORDERED_DATA; 968 goto datacheck; 969 case Opt_data_writeback: 970 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 971 datacheck: 972 if (is_remount) { 973 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) 974 != data_opt) { 975 printk(KERN_ERR 976 "EXT4-fs: cannot change data " 977 "mode on remount\n"); 978 return 0; 979 } 980 } else { 981 sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS; 982 sbi->s_mount_opt |= data_opt; 983 } 984 break; 985 #ifdef CONFIG_QUOTA 986 case Opt_usrjquota: 987 qtype = USRQUOTA; 988 goto set_qf_name; 989 case Opt_grpjquota: 990 qtype = GRPQUOTA; 991 set_qf_name: 992 if (sb_any_quota_enabled(sb)) { 993 printk(KERN_ERR 994 "EXT4-fs: Cannot change journalled " 995 "quota options when quota turned on.\n"); 996 return 0; 997 } 998 qname = match_strdup(&args[0]); 999 if (!qname) { 1000 printk(KERN_ERR 1001 "EXT4-fs: not enough memory for " 1002 "storing quotafile name.\n"); 1003 return 0; 1004 } 1005 if (sbi->s_qf_names[qtype] && 1006 strcmp(sbi->s_qf_names[qtype], qname)) { 1007 printk(KERN_ERR 1008 "EXT4-fs: %s quota file already " 1009 "specified.\n", QTYPE2NAME(qtype)); 1010 kfree(qname); 1011 return 0; 1012 } 1013 sbi->s_qf_names[qtype] = qname; 1014 if (strchr(sbi->s_qf_names[qtype], '/')) { 1015 printk(KERN_ERR 1016 "EXT4-fs: quotafile must be on " 1017 "filesystem root.\n"); 1018 kfree(sbi->s_qf_names[qtype]); 1019 sbi->s_qf_names[qtype] = NULL; 1020 return 0; 1021 } 1022 set_opt(sbi->s_mount_opt, QUOTA); 1023 break; 1024 case Opt_offusrjquota: 1025 qtype = USRQUOTA; 1026 goto clear_qf_name; 1027 case Opt_offgrpjquota: 1028 qtype = GRPQUOTA; 1029 clear_qf_name: 1030 if (sb_any_quota_enabled(sb)) { 1031 printk(KERN_ERR "EXT4-fs: Cannot change " 1032 "journalled quota options when " 1033 "quota turned on.\n"); 1034 return 0; 1035 } 1036 /* 1037 * The space will be released later when all options 1038 * are confirmed to be correct 1039 */ 1040 sbi->s_qf_names[qtype] = NULL; 1041 break; 1042 case Opt_jqfmt_vfsold: 1043 sbi->s_jquota_fmt = QFMT_VFS_OLD; 1044 break; 1045 case Opt_jqfmt_vfsv0: 1046 sbi->s_jquota_fmt = QFMT_VFS_V0; 1047 break; 1048 case Opt_quota: 1049 case Opt_usrquota: 1050 set_opt(sbi->s_mount_opt, QUOTA); 1051 set_opt(sbi->s_mount_opt, USRQUOTA); 1052 break; 1053 case Opt_grpquota: 1054 set_opt(sbi->s_mount_opt, QUOTA); 1055 set_opt(sbi->s_mount_opt, GRPQUOTA); 1056 break; 1057 case Opt_noquota: 1058 if (sb_any_quota_enabled(sb)) { 1059 printk(KERN_ERR "EXT4-fs: Cannot change quota " 1060 "options when quota turned on.\n"); 1061 return 0; 1062 } 1063 clear_opt(sbi->s_mount_opt, QUOTA); 1064 clear_opt(sbi->s_mount_opt, USRQUOTA); 1065 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1066 break; 1067 #else 1068 case Opt_quota: 1069 case Opt_usrquota: 1070 case Opt_grpquota: 1071 case Opt_usrjquota: 1072 case Opt_grpjquota: 1073 case Opt_offusrjquota: 1074 case Opt_offgrpjquota: 1075 case Opt_jqfmt_vfsold: 1076 case Opt_jqfmt_vfsv0: 1077 printk(KERN_ERR 1078 "EXT4-fs: journalled quota options not " 1079 "supported.\n"); 1080 break; 1081 case Opt_noquota: 1082 break; 1083 #endif 1084 case Opt_abort: 1085 set_opt(sbi->s_mount_opt, ABORT); 1086 break; 1087 case Opt_barrier: 1088 if (match_int(&args[0], &option)) 1089 return 0; 1090 if (option) 1091 set_opt(sbi->s_mount_opt, BARRIER); 1092 else 1093 clear_opt(sbi->s_mount_opt, BARRIER); 1094 break; 1095 case Opt_ignore: 1096 break; 1097 case Opt_resize: 1098 if (!is_remount) { 1099 printk("EXT4-fs: resize option only available " 1100 "for remount\n"); 1101 return 0; 1102 } 1103 if (match_int(&args[0], &option) != 0) 1104 return 0; 1105 *n_blocks_count = option; 1106 break; 1107 case Opt_nobh: 1108 set_opt(sbi->s_mount_opt, NOBH); 1109 break; 1110 case Opt_bh: 1111 clear_opt(sbi->s_mount_opt, NOBH); 1112 break; 1113 case Opt_extents: 1114 set_opt (sbi->s_mount_opt, EXTENTS); 1115 break; 1116 default: 1117 printk (KERN_ERR 1118 "EXT4-fs: Unrecognized mount option \"%s\" " 1119 "or missing value\n", p); 1120 return 0; 1121 } 1122 } 1123 #ifdef CONFIG_QUOTA 1124 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1125 if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) && 1126 sbi->s_qf_names[USRQUOTA]) 1127 clear_opt(sbi->s_mount_opt, USRQUOTA); 1128 1129 if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) && 1130 sbi->s_qf_names[GRPQUOTA]) 1131 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1132 1133 if ((sbi->s_qf_names[USRQUOTA] && 1134 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || 1135 (sbi->s_qf_names[GRPQUOTA] && 1136 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { 1137 printk(KERN_ERR "EXT4-fs: old and new quota " 1138 "format mixing.\n"); 1139 return 0; 1140 } 1141 1142 if (!sbi->s_jquota_fmt) { 1143 printk(KERN_ERR "EXT4-fs: journalled quota format " 1144 "not specified.\n"); 1145 return 0; 1146 } 1147 } else { 1148 if (sbi->s_jquota_fmt) { 1149 printk(KERN_ERR "EXT4-fs: journalled quota format " 1150 "specified with no journalling " 1151 "enabled.\n"); 1152 return 0; 1153 } 1154 } 1155 #endif 1156 return 1; 1157 } 1158 1159 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1160 int read_only) 1161 { 1162 struct ext4_sb_info *sbi = EXT4_SB(sb); 1163 int res = 0; 1164 1165 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1166 printk (KERN_ERR "EXT4-fs warning: revision level too high, " 1167 "forcing read-only mode\n"); 1168 res = MS_RDONLY; 1169 } 1170 if (read_only) 1171 return res; 1172 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1173 printk (KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " 1174 "running e2fsck is recommended\n"); 1175 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1176 printk (KERN_WARNING 1177 "EXT4-fs warning: mounting fs with errors, " 1178 "running e2fsck is recommended\n"); 1179 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1180 le16_to_cpu(es->s_mnt_count) >= 1181 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1182 printk (KERN_WARNING 1183 "EXT4-fs warning: maximal mount count reached, " 1184 "running e2fsck is recommended\n"); 1185 else if (le32_to_cpu(es->s_checkinterval) && 1186 (le32_to_cpu(es->s_lastcheck) + 1187 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1188 printk (KERN_WARNING 1189 "EXT4-fs warning: checktime reached, " 1190 "running e2fsck is recommended\n"); 1191 #if 0 1192 /* @@@ We _will_ want to clear the valid bit if we find 1193 * inconsistencies, to force a fsck at reboot. But for 1194 * a plain journaled filesystem we can keep it set as 1195 * valid forever! :) 1196 */ 1197 es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS); 1198 #endif 1199 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1200 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1201 es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); 1202 es->s_mtime = cpu_to_le32(get_seconds()); 1203 ext4_update_dynamic_rev(sb); 1204 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1205 1206 ext4_commit_super(sb, es, 1); 1207 if (test_opt(sb, DEBUG)) 1208 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%lu, " 1209 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1210 sb->s_blocksize, 1211 sbi->s_groups_count, 1212 EXT4_BLOCKS_PER_GROUP(sb), 1213 EXT4_INODES_PER_GROUP(sb), 1214 sbi->s_mount_opt); 1215 1216 printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id); 1217 if (EXT4_SB(sb)->s_journal->j_inode == NULL) { 1218 char b[BDEVNAME_SIZE]; 1219 1220 printk("external journal on %s\n", 1221 bdevname(EXT4_SB(sb)->s_journal->j_dev, b)); 1222 } else { 1223 printk("internal journal\n"); 1224 } 1225 return res; 1226 } 1227 1228 /* Called at mount-time, super-block is locked */ 1229 static int ext4_check_descriptors (struct super_block * sb) 1230 { 1231 struct ext4_sb_info *sbi = EXT4_SB(sb); 1232 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 1233 ext4_fsblk_t last_block; 1234 ext4_fsblk_t block_bitmap; 1235 ext4_fsblk_t inode_bitmap; 1236 ext4_fsblk_t inode_table; 1237 struct ext4_group_desc * gdp = NULL; 1238 int desc_block = 0; 1239 int i; 1240 1241 ext4_debug ("Checking group descriptors"); 1242 1243 for (i = 0; i < sbi->s_groups_count; i++) 1244 { 1245 if (i == sbi->s_groups_count - 1) 1246 last_block = ext4_blocks_count(sbi->s_es) - 1; 1247 else 1248 last_block = first_block + 1249 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1250 1251 if ((i % EXT4_DESC_PER_BLOCK(sb)) == 0) 1252 gdp = (struct ext4_group_desc *) 1253 sbi->s_group_desc[desc_block++]->b_data; 1254 block_bitmap = ext4_block_bitmap(sb, gdp); 1255 if (block_bitmap < first_block || block_bitmap > last_block) 1256 { 1257 ext4_error (sb, "ext4_check_descriptors", 1258 "Block bitmap for group %d" 1259 " not in group (block %llu)!", 1260 i, block_bitmap); 1261 return 0; 1262 } 1263 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1264 if (inode_bitmap < first_block || inode_bitmap > last_block) 1265 { 1266 ext4_error (sb, "ext4_check_descriptors", 1267 "Inode bitmap for group %d" 1268 " not in group (block %llu)!", 1269 i, inode_bitmap); 1270 return 0; 1271 } 1272 inode_table = ext4_inode_table(sb, gdp); 1273 if (inode_table < first_block || 1274 inode_table + sbi->s_itb_per_group > last_block) 1275 { 1276 ext4_error (sb, "ext4_check_descriptors", 1277 "Inode table for group %d" 1278 " not in group (block %llu)!", 1279 i, inode_table); 1280 return 0; 1281 } 1282 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1283 gdp = (struct ext4_group_desc *) 1284 ((__u8 *)gdp + EXT4_DESC_SIZE(sb)); 1285 } 1286 1287 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1288 sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb)); 1289 return 1; 1290 } 1291 1292 1293 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at 1294 * the superblock) which were deleted from all directories, but held open by 1295 * a process at the time of a crash. We walk the list and try to delete these 1296 * inodes at recovery time (only with a read-write filesystem). 1297 * 1298 * In order to keep the orphan inode chain consistent during traversal (in 1299 * case of crash during recovery), we link each inode into the superblock 1300 * orphan list_head and handle it the same way as an inode deletion during 1301 * normal operation (which journals the operations for us). 1302 * 1303 * We only do an iget() and an iput() on each inode, which is very safe if we 1304 * accidentally point at an in-use or already deleted inode. The worst that 1305 * can happen in this case is that we get a "bit already cleared" message from 1306 * ext4_free_inode(). The only reason we would point at a wrong inode is if 1307 * e2fsck was run on this filesystem, and it must have already done the orphan 1308 * inode cleanup for us, so we can safely abort without any further action. 1309 */ 1310 static void ext4_orphan_cleanup (struct super_block * sb, 1311 struct ext4_super_block * es) 1312 { 1313 unsigned int s_flags = sb->s_flags; 1314 int nr_orphans = 0, nr_truncates = 0; 1315 #ifdef CONFIG_QUOTA 1316 int i; 1317 #endif 1318 if (!es->s_last_orphan) { 1319 jbd_debug(4, "no orphan inodes to clean up\n"); 1320 return; 1321 } 1322 1323 if (bdev_read_only(sb->s_bdev)) { 1324 printk(KERN_ERR "EXT4-fs: write access " 1325 "unavailable, skipping orphan cleanup.\n"); 1326 return; 1327 } 1328 1329 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 1330 if (es->s_last_orphan) 1331 jbd_debug(1, "Errors on filesystem, " 1332 "clearing orphan list.\n"); 1333 es->s_last_orphan = 0; 1334 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1335 return; 1336 } 1337 1338 if (s_flags & MS_RDONLY) { 1339 printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n", 1340 sb->s_id); 1341 sb->s_flags &= ~MS_RDONLY; 1342 } 1343 #ifdef CONFIG_QUOTA 1344 /* Needed for iput() to work correctly and not trash data */ 1345 sb->s_flags |= MS_ACTIVE; 1346 /* Turn on quotas so that they are updated correctly */ 1347 for (i = 0; i < MAXQUOTAS; i++) { 1348 if (EXT4_SB(sb)->s_qf_names[i]) { 1349 int ret = ext4_quota_on_mount(sb, i); 1350 if (ret < 0) 1351 printk(KERN_ERR 1352 "EXT4-fs: Cannot turn on journalled " 1353 "quota: error %d\n", ret); 1354 } 1355 } 1356 #endif 1357 1358 while (es->s_last_orphan) { 1359 struct inode *inode; 1360 1361 if (!(inode = 1362 ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { 1363 es->s_last_orphan = 0; 1364 break; 1365 } 1366 1367 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 1368 DQUOT_INIT(inode); 1369 if (inode->i_nlink) { 1370 printk(KERN_DEBUG 1371 "%s: truncating inode %lu to %Ld bytes\n", 1372 __FUNCTION__, inode->i_ino, inode->i_size); 1373 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1374 inode->i_ino, inode->i_size); 1375 ext4_truncate(inode); 1376 nr_truncates++; 1377 } else { 1378 printk(KERN_DEBUG 1379 "%s: deleting unreferenced inode %lu\n", 1380 __FUNCTION__, inode->i_ino); 1381 jbd_debug(2, "deleting unreferenced inode %lu\n", 1382 inode->i_ino); 1383 nr_orphans++; 1384 } 1385 iput(inode); /* The delete magic happens here! */ 1386 } 1387 1388 #define PLURAL(x) (x), ((x)==1) ? "" : "s" 1389 1390 if (nr_orphans) 1391 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", 1392 sb->s_id, PLURAL(nr_orphans)); 1393 if (nr_truncates) 1394 printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n", 1395 sb->s_id, PLURAL(nr_truncates)); 1396 #ifdef CONFIG_QUOTA 1397 /* Turn quotas off */ 1398 for (i = 0; i < MAXQUOTAS; i++) { 1399 if (sb_dqopt(sb)->files[i]) 1400 vfs_quota_off(sb, i); 1401 } 1402 #endif 1403 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1404 } 1405 1406 #define log2(n) ffz(~(n)) 1407 1408 /* 1409 * Maximal file size. There is a direct, and {,double-,triple-}indirect 1410 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. 1411 * We need to be 1 filesystem block less than the 2^32 sector limit. 1412 */ 1413 static loff_t ext4_max_size(int bits) 1414 { 1415 loff_t res = EXT4_NDIR_BLOCKS; 1416 /* This constant is calculated to be the largest file size for a 1417 * dense, 4k-blocksize file such that the total number of 1418 * sectors in the file, including data and all indirect blocks, 1419 * does not exceed 2^32. */ 1420 const loff_t upper_limit = 0x1ff7fffd000LL; 1421 1422 res += 1LL << (bits-2); 1423 res += 1LL << (2*(bits-2)); 1424 res += 1LL << (3*(bits-2)); 1425 res <<= bits; 1426 if (res > upper_limit) 1427 res = upper_limit; 1428 return res; 1429 } 1430 1431 static ext4_fsblk_t descriptor_loc(struct super_block *sb, 1432 ext4_fsblk_t logical_sb_block, int nr) 1433 { 1434 struct ext4_sb_info *sbi = EXT4_SB(sb); 1435 unsigned long bg, first_meta_bg; 1436 int has_super = 0; 1437 1438 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 1439 1440 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 1441 nr < first_meta_bg) 1442 return logical_sb_block + nr + 1; 1443 bg = sbi->s_desc_per_block * nr; 1444 if (ext4_bg_has_super(sb, bg)) 1445 has_super = 1; 1446 return (has_super + ext4_group_first_block_no(sb, bg)); 1447 } 1448 1449 1450 static int ext4_fill_super (struct super_block *sb, void *data, int silent) 1451 { 1452 struct buffer_head * bh; 1453 struct ext4_super_block *es = NULL; 1454 struct ext4_sb_info *sbi; 1455 ext4_fsblk_t block; 1456 ext4_fsblk_t sb_block = get_sb_block(&data); 1457 ext4_fsblk_t logical_sb_block; 1458 unsigned long offset = 0; 1459 unsigned int journal_inum = 0; 1460 unsigned long journal_devnum = 0; 1461 unsigned long def_mount_opts; 1462 struct inode *root; 1463 int blocksize; 1464 int hblock; 1465 int db_count; 1466 int i; 1467 int needs_recovery; 1468 __le32 features; 1469 __u64 blocks_count; 1470 1471 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 1472 if (!sbi) 1473 return -ENOMEM; 1474 sb->s_fs_info = sbi; 1475 sbi->s_mount_opt = 0; 1476 sbi->s_resuid = EXT4_DEF_RESUID; 1477 sbi->s_resgid = EXT4_DEF_RESGID; 1478 1479 unlock_kernel(); 1480 1481 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 1482 if (!blocksize) { 1483 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); 1484 goto out_fail; 1485 } 1486 1487 /* 1488 * The ext4 superblock will not be buffer aligned for other than 1kB 1489 * block sizes. We need to calculate the offset from buffer start. 1490 */ 1491 if (blocksize != EXT4_MIN_BLOCK_SIZE) { 1492 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 1493 offset = do_div(logical_sb_block, blocksize); 1494 } else { 1495 logical_sb_block = sb_block; 1496 } 1497 1498 if (!(bh = sb_bread(sb, logical_sb_block))) { 1499 printk (KERN_ERR "EXT4-fs: unable to read superblock\n"); 1500 goto out_fail; 1501 } 1502 /* 1503 * Note: s_es must be initialized as soon as possible because 1504 * some ext4 macro-instructions depend on its value 1505 */ 1506 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 1507 sbi->s_es = es; 1508 sb->s_magic = le16_to_cpu(es->s_magic); 1509 if (sb->s_magic != EXT4_SUPER_MAGIC) 1510 goto cantfind_ext4; 1511 1512 /* Set defaults before we parse the mount options */ 1513 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 1514 if (def_mount_opts & EXT4_DEFM_DEBUG) 1515 set_opt(sbi->s_mount_opt, DEBUG); 1516 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) 1517 set_opt(sbi->s_mount_opt, GRPID); 1518 if (def_mount_opts & EXT4_DEFM_UID16) 1519 set_opt(sbi->s_mount_opt, NO_UID32); 1520 #ifdef CONFIG_EXT4DEV_FS_XATTR 1521 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 1522 set_opt(sbi->s_mount_opt, XATTR_USER); 1523 #endif 1524 #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 1525 if (def_mount_opts & EXT4_DEFM_ACL) 1526 set_opt(sbi->s_mount_opt, POSIX_ACL); 1527 #endif 1528 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 1529 sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; 1530 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 1531 sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; 1532 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 1533 sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; 1534 1535 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 1536 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1537 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_RO) 1538 set_opt(sbi->s_mount_opt, ERRORS_RO); 1539 else 1540 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1541 1542 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 1543 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 1544 1545 set_opt(sbi->s_mount_opt, RESERVATION); 1546 1547 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 1548 NULL, 0)) 1549 goto failed_mount; 1550 1551 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1552 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 1553 1554 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 1555 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 1556 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 1557 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 1558 printk(KERN_WARNING 1559 "EXT4-fs warning: feature flags set on rev 0 fs, " 1560 "running e2fsck is recommended\n"); 1561 /* 1562 * Check feature flags regardless of the revision level, since we 1563 * previously didn't change the revision level when setting the flags, 1564 * so there is a chance incompat flags are set on a rev 0 filesystem. 1565 */ 1566 features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); 1567 if (features) { 1568 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of " 1569 "unsupported optional features (%x).\n", 1570 sb->s_id, le32_to_cpu(features)); 1571 goto failed_mount; 1572 } 1573 features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); 1574 if (!(sb->s_flags & MS_RDONLY) && features) { 1575 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of " 1576 "unsupported optional features (%x).\n", 1577 sb->s_id, le32_to_cpu(features)); 1578 goto failed_mount; 1579 } 1580 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 1581 1582 if (blocksize < EXT4_MIN_BLOCK_SIZE || 1583 blocksize > EXT4_MAX_BLOCK_SIZE) { 1584 printk(KERN_ERR 1585 "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n", 1586 blocksize, sb->s_id); 1587 goto failed_mount; 1588 } 1589 1590 hblock = bdev_hardsect_size(sb->s_bdev); 1591 if (sb->s_blocksize != blocksize) { 1592 /* 1593 * Make sure the blocksize for the filesystem is larger 1594 * than the hardware sectorsize for the machine. 1595 */ 1596 if (blocksize < hblock) { 1597 printk(KERN_ERR "EXT4-fs: blocksize %d too small for " 1598 "device blocksize %d.\n", blocksize, hblock); 1599 goto failed_mount; 1600 } 1601 1602 brelse (bh); 1603 sb_set_blocksize(sb, blocksize); 1604 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 1605 offset = do_div(logical_sb_block, blocksize); 1606 bh = sb_bread(sb, logical_sb_block); 1607 if (!bh) { 1608 printk(KERN_ERR 1609 "EXT4-fs: Can't read superblock on 2nd try.\n"); 1610 goto failed_mount; 1611 } 1612 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 1613 sbi->s_es = es; 1614 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 1615 printk (KERN_ERR 1616 "EXT4-fs: Magic mismatch, very weird !\n"); 1617 goto failed_mount; 1618 } 1619 } 1620 1621 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits); 1622 1623 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 1624 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 1625 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; 1626 } else { 1627 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 1628 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 1629 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 1630 (sbi->s_inode_size & (sbi->s_inode_size - 1)) || 1631 (sbi->s_inode_size > blocksize)) { 1632 printk (KERN_ERR 1633 "EXT4-fs: unsupported inode size: %d\n", 1634 sbi->s_inode_size); 1635 goto failed_mount; 1636 } 1637 } 1638 sbi->s_frag_size = EXT4_MIN_FRAG_SIZE << 1639 le32_to_cpu(es->s_log_frag_size); 1640 if (blocksize != sbi->s_frag_size) { 1641 printk(KERN_ERR 1642 "EXT4-fs: fragsize %lu != blocksize %u (unsupported)\n", 1643 sbi->s_frag_size, blocksize); 1644 goto failed_mount; 1645 } 1646 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 1647 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 1648 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 1649 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 1650 sbi->s_desc_size & (sbi->s_desc_size - 1)) { 1651 printk(KERN_ERR 1652 "EXT4-fs: unsupported descriptor size %lu\n", 1653 sbi->s_desc_size); 1654 goto failed_mount; 1655 } 1656 } else 1657 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 1658 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 1659 sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); 1660 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 1661 if (EXT4_INODE_SIZE(sb) == 0) 1662 goto cantfind_ext4; 1663 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 1664 if (sbi->s_inodes_per_block == 0) 1665 goto cantfind_ext4; 1666 sbi->s_itb_per_group = sbi->s_inodes_per_group / 1667 sbi->s_inodes_per_block; 1668 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 1669 sbi->s_sbh = bh; 1670 sbi->s_mount_state = le16_to_cpu(es->s_state); 1671 sbi->s_addr_per_block_bits = log2(EXT4_ADDR_PER_BLOCK(sb)); 1672 sbi->s_desc_per_block_bits = log2(EXT4_DESC_PER_BLOCK(sb)); 1673 for (i=0; i < 4; i++) 1674 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 1675 sbi->s_def_hash_version = es->s_def_hash_version; 1676 1677 if (sbi->s_blocks_per_group > blocksize * 8) { 1678 printk (KERN_ERR 1679 "EXT4-fs: #blocks per group too big: %lu\n", 1680 sbi->s_blocks_per_group); 1681 goto failed_mount; 1682 } 1683 if (sbi->s_frags_per_group > blocksize * 8) { 1684 printk (KERN_ERR 1685 "EXT4-fs: #fragments per group too big: %lu\n", 1686 sbi->s_frags_per_group); 1687 goto failed_mount; 1688 } 1689 if (sbi->s_inodes_per_group > blocksize * 8) { 1690 printk (KERN_ERR 1691 "EXT4-fs: #inodes per group too big: %lu\n", 1692 sbi->s_inodes_per_group); 1693 goto failed_mount; 1694 } 1695 1696 if (ext4_blocks_count(es) > 1697 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 1698 printk(KERN_ERR "EXT4-fs: filesystem on %s:" 1699 " too large to mount safely\n", sb->s_id); 1700 if (sizeof(sector_t) < 8) 1701 printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not " 1702 "enabled\n"); 1703 goto failed_mount; 1704 } 1705 1706 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 1707 goto cantfind_ext4; 1708 blocks_count = (ext4_blocks_count(es) - 1709 le32_to_cpu(es->s_first_data_block) + 1710 EXT4_BLOCKS_PER_GROUP(sb) - 1); 1711 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 1712 sbi->s_groups_count = blocks_count; 1713 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 1714 EXT4_DESC_PER_BLOCK(sb); 1715 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 1716 GFP_KERNEL); 1717 if (sbi->s_group_desc == NULL) { 1718 printk (KERN_ERR "EXT4-fs: not enough memory\n"); 1719 goto failed_mount; 1720 } 1721 1722 bgl_lock_init(&sbi->s_blockgroup_lock); 1723 1724 for (i = 0; i < db_count; i++) { 1725 block = descriptor_loc(sb, logical_sb_block, i); 1726 sbi->s_group_desc[i] = sb_bread(sb, block); 1727 if (!sbi->s_group_desc[i]) { 1728 printk (KERN_ERR "EXT4-fs: " 1729 "can't read group descriptor %d\n", i); 1730 db_count = i; 1731 goto failed_mount2; 1732 } 1733 } 1734 if (!ext4_check_descriptors (sb)) { 1735 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); 1736 goto failed_mount2; 1737 } 1738 sbi->s_gdb_count = db_count; 1739 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 1740 spin_lock_init(&sbi->s_next_gen_lock); 1741 1742 percpu_counter_init(&sbi->s_freeblocks_counter, 1743 ext4_count_free_blocks(sb)); 1744 percpu_counter_init(&sbi->s_freeinodes_counter, 1745 ext4_count_free_inodes(sb)); 1746 percpu_counter_init(&sbi->s_dirs_counter, 1747 ext4_count_dirs(sb)); 1748 1749 /* per fileystem reservation list head & lock */ 1750 spin_lock_init(&sbi->s_rsv_window_lock); 1751 sbi->s_rsv_window_root = RB_ROOT; 1752 /* Add a single, static dummy reservation to the start of the 1753 * reservation window list --- it gives us a placeholder for 1754 * append-at-start-of-list which makes the allocation logic 1755 * _much_ simpler. */ 1756 sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; 1757 sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; 1758 sbi->s_rsv_window_head.rsv_alloc_hit = 0; 1759 sbi->s_rsv_window_head.rsv_goal_size = 0; 1760 ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); 1761 1762 /* 1763 * set up enough so that it can read an inode 1764 */ 1765 sb->s_op = &ext4_sops; 1766 sb->s_export_op = &ext4_export_ops; 1767 sb->s_xattr = ext4_xattr_handlers; 1768 #ifdef CONFIG_QUOTA 1769 sb->s_qcop = &ext4_qctl_operations; 1770 sb->dq_op = &ext4_quota_operations; 1771 #endif 1772 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1773 1774 sb->s_root = NULL; 1775 1776 needs_recovery = (es->s_last_orphan != 0 || 1777 EXT4_HAS_INCOMPAT_FEATURE(sb, 1778 EXT4_FEATURE_INCOMPAT_RECOVER)); 1779 1780 /* 1781 * The first inode we look at is the journal inode. Don't try 1782 * root first: it may be modified in the journal! 1783 */ 1784 if (!test_opt(sb, NOLOAD) && 1785 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 1786 if (ext4_load_journal(sb, es, journal_devnum)) 1787 goto failed_mount3; 1788 } else if (journal_inum) { 1789 if (ext4_create_journal(sb, es, journal_inum)) 1790 goto failed_mount3; 1791 } else { 1792 if (!silent) 1793 printk (KERN_ERR 1794 "ext4: No journal on filesystem on %s\n", 1795 sb->s_id); 1796 goto failed_mount3; 1797 } 1798 1799 /* We have now updated the journal if required, so we can 1800 * validate the data journaling mode. */ 1801 switch (test_opt(sb, DATA_FLAGS)) { 1802 case 0: 1803 /* No mode set, assume a default based on the journal 1804 * capabilities: ORDERED_DATA if the journal can 1805 * cope, else JOURNAL_DATA 1806 */ 1807 if (jbd2_journal_check_available_features 1808 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 1809 set_opt(sbi->s_mount_opt, ORDERED_DATA); 1810 else 1811 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 1812 break; 1813 1814 case EXT4_MOUNT_ORDERED_DATA: 1815 case EXT4_MOUNT_WRITEBACK_DATA: 1816 if (!jbd2_journal_check_available_features 1817 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 1818 printk(KERN_ERR "EXT4-fs: Journal does not support " 1819 "requested data journaling mode\n"); 1820 goto failed_mount4; 1821 } 1822 default: 1823 break; 1824 } 1825 1826 if (test_opt(sb, NOBH)) { 1827 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 1828 printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - " 1829 "its supported only with writeback mode\n"); 1830 clear_opt(sbi->s_mount_opt, NOBH); 1831 } 1832 } 1833 /* 1834 * The jbd2_journal_load will have done any necessary log recovery, 1835 * so we can safely mount the rest of the filesystem now. 1836 */ 1837 1838 root = iget(sb, EXT4_ROOT_INO); 1839 sb->s_root = d_alloc_root(root); 1840 if (!sb->s_root) { 1841 printk(KERN_ERR "EXT4-fs: get root inode failed\n"); 1842 iput(root); 1843 goto failed_mount4; 1844 } 1845 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 1846 dput(sb->s_root); 1847 sb->s_root = NULL; 1848 printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); 1849 goto failed_mount4; 1850 } 1851 1852 ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); 1853 /* 1854 * akpm: core read_super() calls in here with the superblock locked. 1855 * That deadlocks, because orphan cleanup needs to lock the superblock 1856 * in numerous places. Here we just pop the lock - it's relatively 1857 * harmless, because we are now ready to accept write_super() requests, 1858 * and aviro says that's the only reason for hanging onto the 1859 * superblock lock. 1860 */ 1861 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 1862 ext4_orphan_cleanup(sb, es); 1863 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 1864 if (needs_recovery) 1865 printk (KERN_INFO "EXT4-fs: recovery complete.\n"); 1866 ext4_mark_recovery_complete(sb, es); 1867 printk (KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n", 1868 test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal": 1869 test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": 1870 "writeback"); 1871 1872 ext4_ext_init(sb); 1873 1874 lock_kernel(); 1875 return 0; 1876 1877 cantfind_ext4: 1878 if (!silent) 1879 printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n", 1880 sb->s_id); 1881 goto failed_mount; 1882 1883 failed_mount4: 1884 jbd2_journal_destroy(sbi->s_journal); 1885 failed_mount3: 1886 percpu_counter_destroy(&sbi->s_freeblocks_counter); 1887 percpu_counter_destroy(&sbi->s_freeinodes_counter); 1888 percpu_counter_destroy(&sbi->s_dirs_counter); 1889 failed_mount2: 1890 for (i = 0; i < db_count; i++) 1891 brelse(sbi->s_group_desc[i]); 1892 kfree(sbi->s_group_desc); 1893 failed_mount: 1894 #ifdef CONFIG_QUOTA 1895 for (i = 0; i < MAXQUOTAS; i++) 1896 kfree(sbi->s_qf_names[i]); 1897 #endif 1898 ext4_blkdev_remove(sbi); 1899 brelse(bh); 1900 out_fail: 1901 sb->s_fs_info = NULL; 1902 kfree(sbi); 1903 lock_kernel(); 1904 return -EINVAL; 1905 } 1906 1907 /* 1908 * Setup any per-fs journal parameters now. We'll do this both on 1909 * initial mount, once the journal has been initialised but before we've 1910 * done any recovery; and again on any subsequent remount. 1911 */ 1912 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) 1913 { 1914 struct ext4_sb_info *sbi = EXT4_SB(sb); 1915 1916 if (sbi->s_commit_interval) 1917 journal->j_commit_interval = sbi->s_commit_interval; 1918 /* We could also set up an ext4-specific default for the commit 1919 * interval here, but for now we'll just fall back to the jbd 1920 * default. */ 1921 1922 spin_lock(&journal->j_state_lock); 1923 if (test_opt(sb, BARRIER)) 1924 journal->j_flags |= JBD2_BARRIER; 1925 else 1926 journal->j_flags &= ~JBD2_BARRIER; 1927 spin_unlock(&journal->j_state_lock); 1928 } 1929 1930 static journal_t *ext4_get_journal(struct super_block *sb, 1931 unsigned int journal_inum) 1932 { 1933 struct inode *journal_inode; 1934 journal_t *journal; 1935 1936 /* First, test for the existence of a valid inode on disk. Bad 1937 * things happen if we iget() an unused inode, as the subsequent 1938 * iput() will try to delete it. */ 1939 1940 journal_inode = iget(sb, journal_inum); 1941 if (!journal_inode) { 1942 printk(KERN_ERR "EXT4-fs: no journal found.\n"); 1943 return NULL; 1944 } 1945 if (!journal_inode->i_nlink) { 1946 make_bad_inode(journal_inode); 1947 iput(journal_inode); 1948 printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n"); 1949 return NULL; 1950 } 1951 1952 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", 1953 journal_inode, journal_inode->i_size); 1954 if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) { 1955 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); 1956 iput(journal_inode); 1957 return NULL; 1958 } 1959 1960 journal = jbd2_journal_init_inode(journal_inode); 1961 if (!journal) { 1962 printk(KERN_ERR "EXT4-fs: Could not load journal inode\n"); 1963 iput(journal_inode); 1964 return NULL; 1965 } 1966 journal->j_private = sb; 1967 ext4_init_journal_params(sb, journal); 1968 return journal; 1969 } 1970 1971 static journal_t *ext4_get_dev_journal(struct super_block *sb, 1972 dev_t j_dev) 1973 { 1974 struct buffer_head * bh; 1975 journal_t *journal; 1976 ext4_fsblk_t start; 1977 ext4_fsblk_t len; 1978 int hblock, blocksize; 1979 ext4_fsblk_t sb_block; 1980 unsigned long offset; 1981 struct ext4_super_block * es; 1982 struct block_device *bdev; 1983 1984 bdev = ext4_blkdev_get(j_dev); 1985 if (bdev == NULL) 1986 return NULL; 1987 1988 if (bd_claim(bdev, sb)) { 1989 printk(KERN_ERR 1990 "EXT4: failed to claim external journal device.\n"); 1991 blkdev_put(bdev); 1992 return NULL; 1993 } 1994 1995 blocksize = sb->s_blocksize; 1996 hblock = bdev_hardsect_size(bdev); 1997 if (blocksize < hblock) { 1998 printk(KERN_ERR 1999 "EXT4-fs: blocksize too small for journal device.\n"); 2000 goto out_bdev; 2001 } 2002 2003 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 2004 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 2005 set_blocksize(bdev, blocksize); 2006 if (!(bh = __bread(bdev, sb_block, blocksize))) { 2007 printk(KERN_ERR "EXT4-fs: couldn't read superblock of " 2008 "external journal\n"); 2009 goto out_bdev; 2010 } 2011 2012 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 2013 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 2014 !(le32_to_cpu(es->s_feature_incompat) & 2015 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 2016 printk(KERN_ERR "EXT4-fs: external journal has " 2017 "bad superblock\n"); 2018 brelse(bh); 2019 goto out_bdev; 2020 } 2021 2022 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 2023 printk(KERN_ERR "EXT4-fs: journal UUID does not match\n"); 2024 brelse(bh); 2025 goto out_bdev; 2026 } 2027 2028 len = ext4_blocks_count(es); 2029 start = sb_block + 1; 2030 brelse(bh); /* we're done with the superblock */ 2031 2032 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 2033 start, len, blocksize); 2034 if (!journal) { 2035 printk(KERN_ERR "EXT4-fs: failed to create device journal\n"); 2036 goto out_bdev; 2037 } 2038 journal->j_private = sb; 2039 ll_rw_block(READ, 1, &journal->j_sb_buffer); 2040 wait_on_buffer(journal->j_sb_buffer); 2041 if (!buffer_uptodate(journal->j_sb_buffer)) { 2042 printk(KERN_ERR "EXT4-fs: I/O error on journal device\n"); 2043 goto out_journal; 2044 } 2045 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 2046 printk(KERN_ERR "EXT4-fs: External journal has more than one " 2047 "user (unsupported) - %d\n", 2048 be32_to_cpu(journal->j_superblock->s_nr_users)); 2049 goto out_journal; 2050 } 2051 EXT4_SB(sb)->journal_bdev = bdev; 2052 ext4_init_journal_params(sb, journal); 2053 return journal; 2054 out_journal: 2055 jbd2_journal_destroy(journal); 2056 out_bdev: 2057 ext4_blkdev_put(bdev); 2058 return NULL; 2059 } 2060 2061 static int ext4_load_journal(struct super_block *sb, 2062 struct ext4_super_block *es, 2063 unsigned long journal_devnum) 2064 { 2065 journal_t *journal; 2066 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 2067 dev_t journal_dev; 2068 int err = 0; 2069 int really_read_only; 2070 2071 if (journal_devnum && 2072 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2073 printk(KERN_INFO "EXT4-fs: external journal device major/minor " 2074 "numbers have changed\n"); 2075 journal_dev = new_decode_dev(journal_devnum); 2076 } else 2077 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 2078 2079 really_read_only = bdev_read_only(sb->s_bdev); 2080 2081 /* 2082 * Are we loading a blank journal or performing recovery after a 2083 * crash? For recovery, we need to check in advance whether we 2084 * can get read-write access to the device. 2085 */ 2086 2087 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2088 if (sb->s_flags & MS_RDONLY) { 2089 printk(KERN_INFO "EXT4-fs: INFO: recovery " 2090 "required on readonly filesystem.\n"); 2091 if (really_read_only) { 2092 printk(KERN_ERR "EXT4-fs: write access " 2093 "unavailable, cannot proceed.\n"); 2094 return -EROFS; 2095 } 2096 printk (KERN_INFO "EXT4-fs: write access will " 2097 "be enabled during recovery.\n"); 2098 } 2099 } 2100 2101 if (journal_inum && journal_dev) { 2102 printk(KERN_ERR "EXT4-fs: filesystem has both journal " 2103 "and inode journals!\n"); 2104 return -EINVAL; 2105 } 2106 2107 if (journal_inum) { 2108 if (!(journal = ext4_get_journal(sb, journal_inum))) 2109 return -EINVAL; 2110 } else { 2111 if (!(journal = ext4_get_dev_journal(sb, journal_dev))) 2112 return -EINVAL; 2113 } 2114 2115 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 2116 err = jbd2_journal_update_format(journal); 2117 if (err) { 2118 printk(KERN_ERR "EXT4-fs: error updating journal.\n"); 2119 jbd2_journal_destroy(journal); 2120 return err; 2121 } 2122 } 2123 2124 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 2125 err = jbd2_journal_wipe(journal, !really_read_only); 2126 if (!err) 2127 err = jbd2_journal_load(journal); 2128 2129 if (err) { 2130 printk(KERN_ERR "EXT4-fs: error loading journal.\n"); 2131 jbd2_journal_destroy(journal); 2132 return err; 2133 } 2134 2135 EXT4_SB(sb)->s_journal = journal; 2136 ext4_clear_journal_err(sb, es); 2137 2138 if (journal_devnum && 2139 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2140 es->s_journal_dev = cpu_to_le32(journal_devnum); 2141 sb->s_dirt = 1; 2142 2143 /* Make sure we flush the recovery flag to disk. */ 2144 ext4_commit_super(sb, es, 1); 2145 } 2146 2147 return 0; 2148 } 2149 2150 static int ext4_create_journal(struct super_block * sb, 2151 struct ext4_super_block * es, 2152 unsigned int journal_inum) 2153 { 2154 journal_t *journal; 2155 2156 if (sb->s_flags & MS_RDONLY) { 2157 printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to " 2158 "create journal.\n"); 2159 return -EROFS; 2160 } 2161 2162 if (!(journal = ext4_get_journal(sb, journal_inum))) 2163 return -EINVAL; 2164 2165 printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n", 2166 journal_inum); 2167 2168 if (jbd2_journal_create(journal)) { 2169 printk(KERN_ERR "EXT4-fs: error creating journal.\n"); 2170 jbd2_journal_destroy(journal); 2171 return -EIO; 2172 } 2173 2174 EXT4_SB(sb)->s_journal = journal; 2175 2176 ext4_update_dynamic_rev(sb); 2177 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2178 EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL); 2179 2180 es->s_journal_inum = cpu_to_le32(journal_inum); 2181 sb->s_dirt = 1; 2182 2183 /* Make sure we flush the recovery flag to disk. */ 2184 ext4_commit_super(sb, es, 1); 2185 2186 return 0; 2187 } 2188 2189 static void ext4_commit_super (struct super_block * sb, 2190 struct ext4_super_block * es, 2191 int sync) 2192 { 2193 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 2194 2195 if (!sbh) 2196 return; 2197 es->s_wtime = cpu_to_le32(get_seconds()); 2198 ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); 2199 es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); 2200 BUFFER_TRACE(sbh, "marking dirty"); 2201 mark_buffer_dirty(sbh); 2202 if (sync) 2203 sync_dirty_buffer(sbh); 2204 } 2205 2206 2207 /* 2208 * Have we just finished recovery? If so, and if we are mounting (or 2209 * remounting) the filesystem readonly, then we will end up with a 2210 * consistent fs on disk. Record that fact. 2211 */ 2212 static void ext4_mark_recovery_complete(struct super_block * sb, 2213 struct ext4_super_block * es) 2214 { 2215 journal_t *journal = EXT4_SB(sb)->s_journal; 2216 2217 jbd2_journal_lock_updates(journal); 2218 jbd2_journal_flush(journal); 2219 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 2220 sb->s_flags & MS_RDONLY) { 2221 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2222 sb->s_dirt = 0; 2223 ext4_commit_super(sb, es, 1); 2224 } 2225 jbd2_journal_unlock_updates(journal); 2226 } 2227 2228 /* 2229 * If we are mounting (or read-write remounting) a filesystem whose journal 2230 * has recorded an error from a previous lifetime, move that error to the 2231 * main filesystem now. 2232 */ 2233 static void ext4_clear_journal_err(struct super_block * sb, 2234 struct ext4_super_block * es) 2235 { 2236 journal_t *journal; 2237 int j_errno; 2238 const char *errstr; 2239 2240 journal = EXT4_SB(sb)->s_journal; 2241 2242 /* 2243 * Now check for any error status which may have been recorded in the 2244 * journal by a prior ext4_error() or ext4_abort() 2245 */ 2246 2247 j_errno = jbd2_journal_errno(journal); 2248 if (j_errno) { 2249 char nbuf[16]; 2250 2251 errstr = ext4_decode_error(sb, j_errno, nbuf); 2252 ext4_warning(sb, __FUNCTION__, "Filesystem error recorded " 2253 "from previous mount: %s", errstr); 2254 ext4_warning(sb, __FUNCTION__, "Marking fs in need of " 2255 "filesystem check."); 2256 2257 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2258 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2259 ext4_commit_super (sb, es, 1); 2260 2261 jbd2_journal_clear_err(journal); 2262 } 2263 } 2264 2265 /* 2266 * Force the running and committing transactions to commit, 2267 * and wait on the commit. 2268 */ 2269 int ext4_force_commit(struct super_block *sb) 2270 { 2271 journal_t *journal; 2272 int ret; 2273 2274 if (sb->s_flags & MS_RDONLY) 2275 return 0; 2276 2277 journal = EXT4_SB(sb)->s_journal; 2278 sb->s_dirt = 0; 2279 ret = ext4_journal_force_commit(journal); 2280 return ret; 2281 } 2282 2283 /* 2284 * Ext4 always journals updates to the superblock itself, so we don't 2285 * have to propagate any other updates to the superblock on disk at this 2286 * point. Just start an async writeback to get the buffers on their way 2287 * to the disk. 2288 * 2289 * This implicitly triggers the writebehind on sync(). 2290 */ 2291 2292 static void ext4_write_super (struct super_block * sb) 2293 { 2294 if (mutex_trylock(&sb->s_lock) != 0) 2295 BUG(); 2296 sb->s_dirt = 0; 2297 } 2298 2299 static int ext4_sync_fs(struct super_block *sb, int wait) 2300 { 2301 tid_t target; 2302 2303 sb->s_dirt = 0; 2304 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { 2305 if (wait) 2306 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); 2307 } 2308 return 0; 2309 } 2310 2311 /* 2312 * LVM calls this function before a (read-only) snapshot is created. This 2313 * gives us a chance to flush the journal completely and mark the fs clean. 2314 */ 2315 static void ext4_write_super_lockfs(struct super_block *sb) 2316 { 2317 sb->s_dirt = 0; 2318 2319 if (!(sb->s_flags & MS_RDONLY)) { 2320 journal_t *journal = EXT4_SB(sb)->s_journal; 2321 2322 /* Now we set up the journal barrier. */ 2323 jbd2_journal_lock_updates(journal); 2324 jbd2_journal_flush(journal); 2325 2326 /* Journal blocked and flushed, clear needs_recovery flag. */ 2327 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2328 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 2329 } 2330 } 2331 2332 /* 2333 * Called by LVM after the snapshot is done. We need to reset the RECOVER 2334 * flag here, even though the filesystem is not technically dirty yet. 2335 */ 2336 static void ext4_unlockfs(struct super_block *sb) 2337 { 2338 if (!(sb->s_flags & MS_RDONLY)) { 2339 lock_super(sb); 2340 /* Reser the needs_recovery flag before the fs is unlocked. */ 2341 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2342 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 2343 unlock_super(sb); 2344 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 2345 } 2346 } 2347 2348 static int ext4_remount (struct super_block * sb, int * flags, char * data) 2349 { 2350 struct ext4_super_block * es; 2351 struct ext4_sb_info *sbi = EXT4_SB(sb); 2352 ext4_fsblk_t n_blocks_count = 0; 2353 unsigned long old_sb_flags; 2354 struct ext4_mount_options old_opts; 2355 int err; 2356 #ifdef CONFIG_QUOTA 2357 int i; 2358 #endif 2359 2360 /* Store the original options */ 2361 old_sb_flags = sb->s_flags; 2362 old_opts.s_mount_opt = sbi->s_mount_opt; 2363 old_opts.s_resuid = sbi->s_resuid; 2364 old_opts.s_resgid = sbi->s_resgid; 2365 old_opts.s_commit_interval = sbi->s_commit_interval; 2366 #ifdef CONFIG_QUOTA 2367 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 2368 for (i = 0; i < MAXQUOTAS; i++) 2369 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 2370 #endif 2371 2372 /* 2373 * Allow the "check" option to be passed as a remount option. 2374 */ 2375 if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { 2376 err = -EINVAL; 2377 goto restore_opts; 2378 } 2379 2380 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) 2381 ext4_abort(sb, __FUNCTION__, "Abort forced by user"); 2382 2383 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2384 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2385 2386 es = sbi->s_es; 2387 2388 ext4_init_journal_params(sb, sbi->s_journal); 2389 2390 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 2391 n_blocks_count > ext4_blocks_count(es)) { 2392 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) { 2393 err = -EROFS; 2394 goto restore_opts; 2395 } 2396 2397 if (*flags & MS_RDONLY) { 2398 /* 2399 * First of all, the unconditional stuff we have to do 2400 * to disable replay of the journal when we next remount 2401 */ 2402 sb->s_flags |= MS_RDONLY; 2403 2404 /* 2405 * OK, test if we are remounting a valid rw partition 2406 * readonly, and if so set the rdonly flag and then 2407 * mark the partition as valid again. 2408 */ 2409 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && 2410 (sbi->s_mount_state & EXT4_VALID_FS)) 2411 es->s_state = cpu_to_le16(sbi->s_mount_state); 2412 2413 ext4_mark_recovery_complete(sb, es); 2414 } else { 2415 __le32 ret; 2416 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, 2417 ~EXT4_FEATURE_RO_COMPAT_SUPP))) { 2418 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 2419 "remount RDWR because of unsupported " 2420 "optional features (%x).\n", 2421 sb->s_id, le32_to_cpu(ret)); 2422 err = -EROFS; 2423 goto restore_opts; 2424 } 2425 2426 /* 2427 * If we have an unprocessed orphan list hanging 2428 * around from a previously readonly bdev mount, 2429 * require a full umount/remount for now. 2430 */ 2431 if (es->s_last_orphan) { 2432 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 2433 "remount RDWR because of unprocessed " 2434 "orphan inode list. Please " 2435 "umount/remount instead.\n", 2436 sb->s_id); 2437 err = -EINVAL; 2438 goto restore_opts; 2439 } 2440 2441 /* 2442 * Mounting a RDONLY partition read-write, so reread 2443 * and store the current valid flag. (It may have 2444 * been changed by e2fsck since we originally mounted 2445 * the partition.) 2446 */ 2447 ext4_clear_journal_err(sb, es); 2448 sbi->s_mount_state = le16_to_cpu(es->s_state); 2449 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 2450 goto restore_opts; 2451 if (!ext4_setup_super (sb, es, 0)) 2452 sb->s_flags &= ~MS_RDONLY; 2453 } 2454 } 2455 #ifdef CONFIG_QUOTA 2456 /* Release old quota file names */ 2457 for (i = 0; i < MAXQUOTAS; i++) 2458 if (old_opts.s_qf_names[i] && 2459 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 2460 kfree(old_opts.s_qf_names[i]); 2461 #endif 2462 return 0; 2463 restore_opts: 2464 sb->s_flags = old_sb_flags; 2465 sbi->s_mount_opt = old_opts.s_mount_opt; 2466 sbi->s_resuid = old_opts.s_resuid; 2467 sbi->s_resgid = old_opts.s_resgid; 2468 sbi->s_commit_interval = old_opts.s_commit_interval; 2469 #ifdef CONFIG_QUOTA 2470 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 2471 for (i = 0; i < MAXQUOTAS; i++) { 2472 if (sbi->s_qf_names[i] && 2473 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 2474 kfree(sbi->s_qf_names[i]); 2475 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 2476 } 2477 #endif 2478 return err; 2479 } 2480 2481 static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) 2482 { 2483 struct super_block *sb = dentry->d_sb; 2484 struct ext4_sb_info *sbi = EXT4_SB(sb); 2485 struct ext4_super_block *es = sbi->s_es; 2486 ext4_fsblk_t overhead; 2487 int i; 2488 u64 fsid; 2489 2490 if (test_opt (sb, MINIX_DF)) 2491 overhead = 0; 2492 else { 2493 unsigned long ngroups; 2494 ngroups = EXT4_SB(sb)->s_groups_count; 2495 smp_rmb(); 2496 2497 /* 2498 * Compute the overhead (FS structures) 2499 */ 2500 2501 /* 2502 * All of the blocks before first_data_block are 2503 * overhead 2504 */ 2505 overhead = le32_to_cpu(es->s_first_data_block); 2506 2507 /* 2508 * Add the overhead attributed to the superblock and 2509 * block group descriptors. If the sparse superblocks 2510 * feature is turned on, then not all groups have this. 2511 */ 2512 for (i = 0; i < ngroups; i++) { 2513 overhead += ext4_bg_has_super(sb, i) + 2514 ext4_bg_num_gdb(sb, i); 2515 cond_resched(); 2516 } 2517 2518 /* 2519 * Every block group has an inode bitmap, a block 2520 * bitmap, and an inode table. 2521 */ 2522 overhead += (ngroups * (2 + EXT4_SB(sb)->s_itb_per_group)); 2523 } 2524 2525 buf->f_type = EXT4_SUPER_MAGIC; 2526 buf->f_bsize = sb->s_blocksize; 2527 buf->f_blocks = ext4_blocks_count(es) - overhead; 2528 buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); 2529 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 2530 if (buf->f_bfree < ext4_r_blocks_count(es)) 2531 buf->f_bavail = 0; 2532 buf->f_files = le32_to_cpu(es->s_inodes_count); 2533 buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); 2534 buf->f_namelen = EXT4_NAME_LEN; 2535 fsid = le64_to_cpup((void *)es->s_uuid) ^ 2536 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 2537 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 2538 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 2539 return 0; 2540 } 2541 2542 /* Helper function for writing quotas on sync - we need to start transaction before quota file 2543 * is locked for write. Otherwise the are possible deadlocks: 2544 * Process 1 Process 2 2545 * ext4_create() quota_sync() 2546 * jbd2_journal_start() write_dquot() 2547 * DQUOT_INIT() down(dqio_mutex) 2548 * down(dqio_mutex) jbd2_journal_start() 2549 * 2550 */ 2551 2552 #ifdef CONFIG_QUOTA 2553 2554 static inline struct inode *dquot_to_inode(struct dquot *dquot) 2555 { 2556 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 2557 } 2558 2559 static int ext4_dquot_initialize(struct inode *inode, int type) 2560 { 2561 handle_t *handle; 2562 int ret, err; 2563 2564 /* We may create quota structure so we need to reserve enough blocks */ 2565 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)); 2566 if (IS_ERR(handle)) 2567 return PTR_ERR(handle); 2568 ret = dquot_initialize(inode, type); 2569 err = ext4_journal_stop(handle); 2570 if (!ret) 2571 ret = err; 2572 return ret; 2573 } 2574 2575 static int ext4_dquot_drop(struct inode *inode) 2576 { 2577 handle_t *handle; 2578 int ret, err; 2579 2580 /* We may delete quota structure so we need to reserve enough blocks */ 2581 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb)); 2582 if (IS_ERR(handle)) 2583 return PTR_ERR(handle); 2584 ret = dquot_drop(inode); 2585 err = ext4_journal_stop(handle); 2586 if (!ret) 2587 ret = err; 2588 return ret; 2589 } 2590 2591 static int ext4_write_dquot(struct dquot *dquot) 2592 { 2593 int ret, err; 2594 handle_t *handle; 2595 struct inode *inode; 2596 2597 inode = dquot_to_inode(dquot); 2598 handle = ext4_journal_start(inode, 2599 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 2600 if (IS_ERR(handle)) 2601 return PTR_ERR(handle); 2602 ret = dquot_commit(dquot); 2603 err = ext4_journal_stop(handle); 2604 if (!ret) 2605 ret = err; 2606 return ret; 2607 } 2608 2609 static int ext4_acquire_dquot(struct dquot *dquot) 2610 { 2611 int ret, err; 2612 handle_t *handle; 2613 2614 handle = ext4_journal_start(dquot_to_inode(dquot), 2615 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 2616 if (IS_ERR(handle)) 2617 return PTR_ERR(handle); 2618 ret = dquot_acquire(dquot); 2619 err = ext4_journal_stop(handle); 2620 if (!ret) 2621 ret = err; 2622 return ret; 2623 } 2624 2625 static int ext4_release_dquot(struct dquot *dquot) 2626 { 2627 int ret, err; 2628 handle_t *handle; 2629 2630 handle = ext4_journal_start(dquot_to_inode(dquot), 2631 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 2632 if (IS_ERR(handle)) 2633 return PTR_ERR(handle); 2634 ret = dquot_release(dquot); 2635 err = ext4_journal_stop(handle); 2636 if (!ret) 2637 ret = err; 2638 return ret; 2639 } 2640 2641 static int ext4_mark_dquot_dirty(struct dquot *dquot) 2642 { 2643 /* Are we journalling quotas? */ 2644 if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2645 EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2646 dquot_mark_dquot_dirty(dquot); 2647 return ext4_write_dquot(dquot); 2648 } else { 2649 return dquot_mark_dquot_dirty(dquot); 2650 } 2651 } 2652 2653 static int ext4_write_info(struct super_block *sb, int type) 2654 { 2655 int ret, err; 2656 handle_t *handle; 2657 2658 /* Data block + inode block */ 2659 handle = ext4_journal_start(sb->s_root->d_inode, 2); 2660 if (IS_ERR(handle)) 2661 return PTR_ERR(handle); 2662 ret = dquot_commit_info(sb, type); 2663 err = ext4_journal_stop(handle); 2664 if (!ret) 2665 ret = err; 2666 return ret; 2667 } 2668 2669 /* 2670 * Turn on quotas during mount time - we need to find 2671 * the quota file and such... 2672 */ 2673 static int ext4_quota_on_mount(struct super_block *sb, int type) 2674 { 2675 return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 2676 EXT4_SB(sb)->s_jquota_fmt, type); 2677 } 2678 2679 /* 2680 * Standard function to be called on quota_on 2681 */ 2682 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 2683 char *path) 2684 { 2685 int err; 2686 struct nameidata nd; 2687 2688 if (!test_opt(sb, QUOTA)) 2689 return -EINVAL; 2690 /* Not journalling quota? */ 2691 if (!EXT4_SB(sb)->s_qf_names[USRQUOTA] && 2692 !EXT4_SB(sb)->s_qf_names[GRPQUOTA]) 2693 return vfs_quota_on(sb, type, format_id, path); 2694 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2695 if (err) 2696 return err; 2697 /* Quotafile not on the same filesystem? */ 2698 if (nd.mnt->mnt_sb != sb) { 2699 path_release(&nd); 2700 return -EXDEV; 2701 } 2702 /* Quotafile not of fs root? */ 2703 if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) 2704 printk(KERN_WARNING 2705 "EXT4-fs: Quota file not on filesystem root. " 2706 "Journalled quota will not work.\n"); 2707 path_release(&nd); 2708 return vfs_quota_on(sb, type, format_id, path); 2709 } 2710 2711 /* Read data from quotafile - avoid pagecache and such because we cannot afford 2712 * acquiring the locks... As quota files are never truncated and quota code 2713 * itself serializes the operations (and noone else should touch the files) 2714 * we don't have to be afraid of races */ 2715 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 2716 size_t len, loff_t off) 2717 { 2718 struct inode *inode = sb_dqopt(sb)->files[type]; 2719 sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 2720 int err = 0; 2721 int offset = off & (sb->s_blocksize - 1); 2722 int tocopy; 2723 size_t toread; 2724 struct buffer_head *bh; 2725 loff_t i_size = i_size_read(inode); 2726 2727 if (off > i_size) 2728 return 0; 2729 if (off+len > i_size) 2730 len = i_size-off; 2731 toread = len; 2732 while (toread > 0) { 2733 tocopy = sb->s_blocksize - offset < toread ? 2734 sb->s_blocksize - offset : toread; 2735 bh = ext4_bread(NULL, inode, blk, 0, &err); 2736 if (err) 2737 return err; 2738 if (!bh) /* A hole? */ 2739 memset(data, 0, tocopy); 2740 else 2741 memcpy(data, bh->b_data+offset, tocopy); 2742 brelse(bh); 2743 offset = 0; 2744 toread -= tocopy; 2745 data += tocopy; 2746 blk++; 2747 } 2748 return len; 2749 } 2750 2751 /* Write to quotafile (we know the transaction is already started and has 2752 * enough credits) */ 2753 static ssize_t ext4_quota_write(struct super_block *sb, int type, 2754 const char *data, size_t len, loff_t off) 2755 { 2756 struct inode *inode = sb_dqopt(sb)->files[type]; 2757 sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 2758 int err = 0; 2759 int offset = off & (sb->s_blocksize - 1); 2760 int tocopy; 2761 int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; 2762 size_t towrite = len; 2763 struct buffer_head *bh; 2764 handle_t *handle = journal_current_handle(); 2765 2766 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 2767 while (towrite > 0) { 2768 tocopy = sb->s_blocksize - offset < towrite ? 2769 sb->s_blocksize - offset : towrite; 2770 bh = ext4_bread(handle, inode, blk, 1, &err); 2771 if (!bh) 2772 goto out; 2773 if (journal_quota) { 2774 err = ext4_journal_get_write_access(handle, bh); 2775 if (err) { 2776 brelse(bh); 2777 goto out; 2778 } 2779 } 2780 lock_buffer(bh); 2781 memcpy(bh->b_data+offset, data, tocopy); 2782 flush_dcache_page(bh->b_page); 2783 unlock_buffer(bh); 2784 if (journal_quota) 2785 err = ext4_journal_dirty_metadata(handle, bh); 2786 else { 2787 /* Always do at least ordered writes for quotas */ 2788 err = ext4_journal_dirty_data(handle, bh); 2789 mark_buffer_dirty(bh); 2790 } 2791 brelse(bh); 2792 if (err) 2793 goto out; 2794 offset = 0; 2795 towrite -= tocopy; 2796 data += tocopy; 2797 blk++; 2798 } 2799 out: 2800 if (len == towrite) 2801 return err; 2802 if (inode->i_size < off+len-towrite) { 2803 i_size_write(inode, off+len-towrite); 2804 EXT4_I(inode)->i_disksize = inode->i_size; 2805 } 2806 inode->i_version++; 2807 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2808 ext4_mark_inode_dirty(handle, inode); 2809 mutex_unlock(&inode->i_mutex); 2810 return len - towrite; 2811 } 2812 2813 #endif 2814 2815 static int ext4_get_sb(struct file_system_type *fs_type, 2816 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 2817 { 2818 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 2819 } 2820 2821 static struct file_system_type ext4dev_fs_type = { 2822 .owner = THIS_MODULE, 2823 .name = "ext4dev", 2824 .get_sb = ext4_get_sb, 2825 .kill_sb = kill_block_super, 2826 .fs_flags = FS_REQUIRES_DEV, 2827 }; 2828 2829 static int __init init_ext4_fs(void) 2830 { 2831 int err = init_ext4_xattr(); 2832 if (err) 2833 return err; 2834 err = init_inodecache(); 2835 if (err) 2836 goto out1; 2837 err = register_filesystem(&ext4dev_fs_type); 2838 if (err) 2839 goto out; 2840 return 0; 2841 out: 2842 destroy_inodecache(); 2843 out1: 2844 exit_ext4_xattr(); 2845 return err; 2846 } 2847 2848 static void __exit exit_ext4_fs(void) 2849 { 2850 unregister_filesystem(&ext4dev_fs_type); 2851 destroy_inodecache(); 2852 exit_ext4_xattr(); 2853 } 2854 2855 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 2856 MODULE_DESCRIPTION("Fourth Extended Filesystem with extents"); 2857 MODULE_LICENSE("GPL"); 2858 module_init(init_ext4_fs) 2859 module_exit(exit_ext4_fs) 2860