1 /* 2 * linux/fs/ext4/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19 #include <linux/module.h> 20 #include <linux/string.h> 21 #include <linux/fs.h> 22 #include <linux/time.h> 23 #include <linux/vmalloc.h> 24 #include <linux/jbd2.h> 25 #include <linux/slab.h> 26 #include <linux/init.h> 27 #include <linux/blkdev.h> 28 #include <linux/parser.h> 29 #include <linux/buffer_head.h> 30 #include <linux/exportfs.h> 31 #include <linux/vfs.h> 32 #include <linux/random.h> 33 #include <linux/mount.h> 34 #include <linux/namei.h> 35 #include <linux/quotaops.h> 36 #include <linux/seq_file.h> 37 #include <linux/proc_fs.h> 38 #include <linux/ctype.h> 39 #include <linux/log2.h> 40 #include <linux/crc16.h> 41 #include <linux/cleancache.h> 42 #include <asm/uaccess.h> 43 44 #include <linux/kthread.h> 45 #include <linux/freezer.h> 46 47 #include "ext4.h" 48 #include "ext4_jbd2.h" 49 #include "xattr.h" 50 #include "acl.h" 51 #include "mballoc.h" 52 53 #define CREATE_TRACE_POINTS 54 #include <trace/events/ext4.h> 55 56 static struct proc_dir_entry *ext4_proc_root; 57 static struct kset *ext4_kset; 58 static struct ext4_lazy_init *ext4_li_info; 59 static struct mutex ext4_li_mtx; 60 static struct ext4_features *ext4_feat; 61 62 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 63 unsigned long journal_devnum); 64 static int ext4_commit_super(struct super_block *sb, int sync); 65 static void ext4_mark_recovery_complete(struct super_block *sb, 66 struct ext4_super_block *es); 67 static void ext4_clear_journal_err(struct super_block *sb, 68 struct ext4_super_block *es); 69 static int ext4_sync_fs(struct super_block *sb, int wait); 70 static const char *ext4_decode_error(struct super_block *sb, int errno, 71 char nbuf[16]); 72 static int ext4_remount(struct super_block *sb, int *flags, char *data); 73 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 74 static int ext4_unfreeze(struct super_block *sb); 75 static void ext4_write_super(struct super_block *sb); 76 static int ext4_freeze(struct super_block *sb); 77 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, 78 const char *dev_name, void *data); 79 static inline int ext2_feature_set_ok(struct super_block *sb); 80 static inline int ext3_feature_set_ok(struct super_block *sb); 81 static int ext4_feature_set_ok(struct super_block *sb, int readonly); 82 static void ext4_destroy_lazyinit_thread(void); 83 static void ext4_unregister_li_request(struct super_block *sb); 84 static void ext4_clear_request_list(void); 85 86 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 87 static struct file_system_type ext2_fs_type = { 88 .owner = THIS_MODULE, 89 .name = "ext2", 90 .mount = ext4_mount, 91 .kill_sb = kill_block_super, 92 .fs_flags = FS_REQUIRES_DEV, 93 }; 94 #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) 95 #else 96 #define IS_EXT2_SB(sb) (0) 97 #endif 98 99 100 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 101 static struct file_system_type ext3_fs_type = { 102 .owner = THIS_MODULE, 103 .name = "ext3", 104 .mount = ext4_mount, 105 .kill_sb = kill_block_super, 106 .fs_flags = FS_REQUIRES_DEV, 107 }; 108 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) 109 #else 110 #define IS_EXT3_SB(sb) (0) 111 #endif 112 113 void *ext4_kvmalloc(size_t size, gfp_t flags) 114 { 115 void *ret; 116 117 ret = kmalloc(size, flags); 118 if (!ret) 119 ret = __vmalloc(size, flags, PAGE_KERNEL); 120 return ret; 121 } 122 123 void *ext4_kvzalloc(size_t size, gfp_t flags) 124 { 125 void *ret; 126 127 ret = kzalloc(size, flags); 128 if (!ret) 129 ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); 130 return ret; 131 } 132 133 void ext4_kvfree(void *ptr) 134 { 135 if (is_vmalloc_addr(ptr)) 136 vfree(ptr); 137 else 138 kfree(ptr); 139 140 } 141 142 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 143 struct ext4_group_desc *bg) 144 { 145 return le32_to_cpu(bg->bg_block_bitmap_lo) | 146 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 147 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 148 } 149 150 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 151 struct ext4_group_desc *bg) 152 { 153 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 154 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 155 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 156 } 157 158 ext4_fsblk_t ext4_inode_table(struct super_block *sb, 159 struct ext4_group_desc *bg) 160 { 161 return le32_to_cpu(bg->bg_inode_table_lo) | 162 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 163 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 164 } 165 166 __u32 ext4_free_blks_count(struct super_block *sb, 167 struct ext4_group_desc *bg) 168 { 169 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 170 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 171 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); 172 } 173 174 __u32 ext4_free_inodes_count(struct super_block *sb, 175 struct ext4_group_desc *bg) 176 { 177 return le16_to_cpu(bg->bg_free_inodes_count_lo) | 178 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 179 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); 180 } 181 182 __u32 ext4_used_dirs_count(struct super_block *sb, 183 struct ext4_group_desc *bg) 184 { 185 return le16_to_cpu(bg->bg_used_dirs_count_lo) | 186 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 187 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); 188 } 189 190 __u32 ext4_itable_unused_count(struct super_block *sb, 191 struct ext4_group_desc *bg) 192 { 193 return le16_to_cpu(bg->bg_itable_unused_lo) | 194 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 195 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); 196 } 197 198 void ext4_block_bitmap_set(struct super_block *sb, 199 struct ext4_group_desc *bg, ext4_fsblk_t blk) 200 { 201 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); 202 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 203 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 204 } 205 206 void ext4_inode_bitmap_set(struct super_block *sb, 207 struct ext4_group_desc *bg, ext4_fsblk_t blk) 208 { 209 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); 210 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 211 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 212 } 213 214 void ext4_inode_table_set(struct super_block *sb, 215 struct ext4_group_desc *bg, ext4_fsblk_t blk) 216 { 217 bg->bg_inode_table_lo = cpu_to_le32((u32)blk); 218 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 219 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 220 } 221 222 void ext4_free_blks_set(struct super_block *sb, 223 struct ext4_group_desc *bg, __u32 count) 224 { 225 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); 226 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 227 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16); 228 } 229 230 void ext4_free_inodes_set(struct super_block *sb, 231 struct ext4_group_desc *bg, __u32 count) 232 { 233 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); 234 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 235 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); 236 } 237 238 void ext4_used_dirs_set(struct super_block *sb, 239 struct ext4_group_desc *bg, __u32 count) 240 { 241 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count); 242 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 243 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16); 244 } 245 246 void ext4_itable_unused_set(struct super_block *sb, 247 struct ext4_group_desc *bg, __u32 count) 248 { 249 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count); 250 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 251 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); 252 } 253 254 255 /* Just increment the non-pointer handle value */ 256 static handle_t *ext4_get_nojournal(void) 257 { 258 handle_t *handle = current->journal_info; 259 unsigned long ref_cnt = (unsigned long)handle; 260 261 BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT); 262 263 ref_cnt++; 264 handle = (handle_t *)ref_cnt; 265 266 current->journal_info = handle; 267 return handle; 268 } 269 270 271 /* Decrement the non-pointer handle value */ 272 static void ext4_put_nojournal(handle_t *handle) 273 { 274 unsigned long ref_cnt = (unsigned long)handle; 275 276 BUG_ON(ref_cnt == 0); 277 278 ref_cnt--; 279 handle = (handle_t *)ref_cnt; 280 281 current->journal_info = handle; 282 } 283 284 /* 285 * Wrappers for jbd2_journal_start/end. 286 * 287 * The only special thing we need to do here is to make sure that all 288 * journal_end calls result in the superblock being marked dirty, so 289 * that sync() will call the filesystem's write_super callback if 290 * appropriate. 291 * 292 * To avoid j_barrier hold in userspace when a user calls freeze(), 293 * ext4 prevents a new handle from being started by s_frozen, which 294 * is in an upper layer. 295 */ 296 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 297 { 298 journal_t *journal; 299 handle_t *handle; 300 301 trace_ext4_journal_start(sb, nblocks, _RET_IP_); 302 if (sb->s_flags & MS_RDONLY) 303 return ERR_PTR(-EROFS); 304 305 journal = EXT4_SB(sb)->s_journal; 306 handle = ext4_journal_current_handle(); 307 308 /* 309 * If a handle has been started, it should be allowed to 310 * finish, otherwise deadlock could happen between freeze 311 * and others(e.g. truncate) due to the restart of the 312 * journal handle if the filesystem is forzen and active 313 * handles are not stopped. 314 */ 315 if (!handle) 316 vfs_check_frozen(sb, SB_FREEZE_TRANS); 317 318 if (!journal) 319 return ext4_get_nojournal(); 320 /* 321 * Special case here: if the journal has aborted behind our 322 * backs (eg. EIO in the commit thread), then we still need to 323 * take the FS itself readonly cleanly. 324 */ 325 if (is_journal_aborted(journal)) { 326 ext4_abort(sb, "Detected aborted journal"); 327 return ERR_PTR(-EROFS); 328 } 329 return jbd2_journal_start(journal, nblocks); 330 } 331 332 /* 333 * The only special thing we need to do here is to make sure that all 334 * jbd2_journal_stop calls result in the superblock being marked dirty, so 335 * that sync() will call the filesystem's write_super callback if 336 * appropriate. 337 */ 338 int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) 339 { 340 struct super_block *sb; 341 int err; 342 int rc; 343 344 if (!ext4_handle_valid(handle)) { 345 ext4_put_nojournal(handle); 346 return 0; 347 } 348 sb = handle->h_transaction->t_journal->j_private; 349 err = handle->h_err; 350 rc = jbd2_journal_stop(handle); 351 352 if (!err) 353 err = rc; 354 if (err) 355 __ext4_std_error(sb, where, line, err); 356 return err; 357 } 358 359 void ext4_journal_abort_handle(const char *caller, unsigned int line, 360 const char *err_fn, struct buffer_head *bh, 361 handle_t *handle, int err) 362 { 363 char nbuf[16]; 364 const char *errstr = ext4_decode_error(NULL, err, nbuf); 365 366 BUG_ON(!ext4_handle_valid(handle)); 367 368 if (bh) 369 BUFFER_TRACE(bh, "abort"); 370 371 if (!handle->h_err) 372 handle->h_err = err; 373 374 if (is_handle_aborted(handle)) 375 return; 376 377 printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n", 378 caller, line, errstr, err_fn); 379 380 jbd2_journal_abort_handle(handle); 381 } 382 383 static void __save_error_info(struct super_block *sb, const char *func, 384 unsigned int line) 385 { 386 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 387 388 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 389 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 390 es->s_last_error_time = cpu_to_le32(get_seconds()); 391 strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); 392 es->s_last_error_line = cpu_to_le32(line); 393 if (!es->s_first_error_time) { 394 es->s_first_error_time = es->s_last_error_time; 395 strncpy(es->s_first_error_func, func, 396 sizeof(es->s_first_error_func)); 397 es->s_first_error_line = cpu_to_le32(line); 398 es->s_first_error_ino = es->s_last_error_ino; 399 es->s_first_error_block = es->s_last_error_block; 400 } 401 /* 402 * Start the daily error reporting function if it hasn't been 403 * started already 404 */ 405 if (!es->s_error_count) 406 mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); 407 es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1); 408 } 409 410 static void save_error_info(struct super_block *sb, const char *func, 411 unsigned int line) 412 { 413 __save_error_info(sb, func, line); 414 ext4_commit_super(sb, 1); 415 } 416 417 418 /* Deal with the reporting of failure conditions on a filesystem such as 419 * inconsistencies detected or read IO failures. 420 * 421 * On ext2, we can store the error state of the filesystem in the 422 * superblock. That is not possible on ext4, because we may have other 423 * write ordering constraints on the superblock which prevent us from 424 * writing it out straight away; and given that the journal is about to 425 * be aborted, we can't rely on the current, or future, transactions to 426 * write out the superblock safely. 427 * 428 * We'll just use the jbd2_journal_abort() error code to record an error in 429 * the journal instead. On recovery, the journal will complain about 430 * that error until we've noted it down and cleared it. 431 */ 432 433 static void ext4_handle_error(struct super_block *sb) 434 { 435 if (sb->s_flags & MS_RDONLY) 436 return; 437 438 if (!test_opt(sb, ERRORS_CONT)) { 439 journal_t *journal = EXT4_SB(sb)->s_journal; 440 441 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 442 if (journal) 443 jbd2_journal_abort(journal, -EIO); 444 } 445 if (test_opt(sb, ERRORS_RO)) { 446 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 447 sb->s_flags |= MS_RDONLY; 448 } 449 if (test_opt(sb, ERRORS_PANIC)) 450 panic("EXT4-fs (device %s): panic forced after error\n", 451 sb->s_id); 452 } 453 454 void __ext4_error(struct super_block *sb, const char *function, 455 unsigned int line, const char *fmt, ...) 456 { 457 struct va_format vaf; 458 va_list args; 459 460 va_start(args, fmt); 461 vaf.fmt = fmt; 462 vaf.va = &args; 463 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", 464 sb->s_id, function, line, current->comm, &vaf); 465 va_end(args); 466 467 ext4_handle_error(sb); 468 } 469 470 void ext4_error_inode(struct inode *inode, const char *function, 471 unsigned int line, ext4_fsblk_t block, 472 const char *fmt, ...) 473 { 474 va_list args; 475 struct va_format vaf; 476 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; 477 478 es->s_last_error_ino = cpu_to_le32(inode->i_ino); 479 es->s_last_error_block = cpu_to_le64(block); 480 save_error_info(inode->i_sb, function, line); 481 va_start(args, fmt); 482 vaf.fmt = fmt; 483 vaf.va = &args; 484 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ", 485 inode->i_sb->s_id, function, line, inode->i_ino); 486 if (block) 487 printk(KERN_CONT "block %llu: ", block); 488 printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf); 489 va_end(args); 490 491 ext4_handle_error(inode->i_sb); 492 } 493 494 void ext4_error_file(struct file *file, const char *function, 495 unsigned int line, ext4_fsblk_t block, 496 const char *fmt, ...) 497 { 498 va_list args; 499 struct va_format vaf; 500 struct ext4_super_block *es; 501 struct inode *inode = file->f_dentry->d_inode; 502 char pathname[80], *path; 503 504 es = EXT4_SB(inode->i_sb)->s_es; 505 es->s_last_error_ino = cpu_to_le32(inode->i_ino); 506 save_error_info(inode->i_sb, function, line); 507 path = d_path(&(file->f_path), pathname, sizeof(pathname)); 508 if (IS_ERR(path)) 509 path = "(unknown)"; 510 printk(KERN_CRIT 511 "EXT4-fs error (device %s): %s:%d: inode #%lu: ", 512 inode->i_sb->s_id, function, line, inode->i_ino); 513 if (block) 514 printk(KERN_CONT "block %llu: ", block); 515 va_start(args, fmt); 516 vaf.fmt = fmt; 517 vaf.va = &args; 518 printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf); 519 va_end(args); 520 521 ext4_handle_error(inode->i_sb); 522 } 523 524 static const char *ext4_decode_error(struct super_block *sb, int errno, 525 char nbuf[16]) 526 { 527 char *errstr = NULL; 528 529 switch (errno) { 530 case -EIO: 531 errstr = "IO failure"; 532 break; 533 case -ENOMEM: 534 errstr = "Out of memory"; 535 break; 536 case -EROFS: 537 if (!sb || (EXT4_SB(sb)->s_journal && 538 EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) 539 errstr = "Journal has aborted"; 540 else 541 errstr = "Readonly filesystem"; 542 break; 543 default: 544 /* If the caller passed in an extra buffer for unknown 545 * errors, textualise them now. Else we just return 546 * NULL. */ 547 if (nbuf) { 548 /* Check for truncated error codes... */ 549 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 550 errstr = nbuf; 551 } 552 break; 553 } 554 555 return errstr; 556 } 557 558 /* __ext4_std_error decodes expected errors from journaling functions 559 * automatically and invokes the appropriate error response. */ 560 561 void __ext4_std_error(struct super_block *sb, const char *function, 562 unsigned int line, int errno) 563 { 564 char nbuf[16]; 565 const char *errstr; 566 567 /* Special case: if the error is EROFS, and we're not already 568 * inside a transaction, then there's really no point in logging 569 * an error. */ 570 if (errno == -EROFS && journal_current_handle() == NULL && 571 (sb->s_flags & MS_RDONLY)) 572 return; 573 574 errstr = ext4_decode_error(sb, errno, nbuf); 575 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", 576 sb->s_id, function, line, errstr); 577 save_error_info(sb, function, line); 578 579 ext4_handle_error(sb); 580 } 581 582 /* 583 * ext4_abort is a much stronger failure handler than ext4_error. The 584 * abort function may be used to deal with unrecoverable failures such 585 * as journal IO errors or ENOMEM at a critical moment in log management. 586 * 587 * We unconditionally force the filesystem into an ABORT|READONLY state, 588 * unless the error response on the fs has been set to panic in which 589 * case we take the easy way out and panic immediately. 590 */ 591 592 void __ext4_abort(struct super_block *sb, const char *function, 593 unsigned int line, const char *fmt, ...) 594 { 595 va_list args; 596 597 save_error_info(sb, function, line); 598 va_start(args, fmt); 599 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id, 600 function, line); 601 vprintk(fmt, args); 602 printk("\n"); 603 va_end(args); 604 605 if ((sb->s_flags & MS_RDONLY) == 0) { 606 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 607 sb->s_flags |= MS_RDONLY; 608 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 609 if (EXT4_SB(sb)->s_journal) 610 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 611 save_error_info(sb, function, line); 612 } 613 if (test_opt(sb, ERRORS_PANIC)) 614 panic("EXT4-fs panic from previous error\n"); 615 } 616 617 void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...) 618 { 619 struct va_format vaf; 620 va_list args; 621 622 va_start(args, fmt); 623 vaf.fmt = fmt; 624 vaf.va = &args; 625 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf); 626 va_end(args); 627 } 628 629 void __ext4_warning(struct super_block *sb, const char *function, 630 unsigned int line, const char *fmt, ...) 631 { 632 struct va_format vaf; 633 va_list args; 634 635 va_start(args, fmt); 636 vaf.fmt = fmt; 637 vaf.va = &args; 638 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n", 639 sb->s_id, function, line, &vaf); 640 va_end(args); 641 } 642 643 void __ext4_grp_locked_error(const char *function, unsigned int line, 644 struct super_block *sb, ext4_group_t grp, 645 unsigned long ino, ext4_fsblk_t block, 646 const char *fmt, ...) 647 __releases(bitlock) 648 __acquires(bitlock) 649 { 650 struct va_format vaf; 651 va_list args; 652 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 653 654 es->s_last_error_ino = cpu_to_le32(ino); 655 es->s_last_error_block = cpu_to_le64(block); 656 __save_error_info(sb, function, line); 657 658 va_start(args, fmt); 659 660 vaf.fmt = fmt; 661 vaf.va = &args; 662 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", 663 sb->s_id, function, line, grp); 664 if (ino) 665 printk(KERN_CONT "inode %lu: ", ino); 666 if (block) 667 printk(KERN_CONT "block %llu:", (unsigned long long) block); 668 printk(KERN_CONT "%pV\n", &vaf); 669 va_end(args); 670 671 if (test_opt(sb, ERRORS_CONT)) { 672 ext4_commit_super(sb, 0); 673 return; 674 } 675 676 ext4_unlock_group(sb, grp); 677 ext4_handle_error(sb); 678 /* 679 * We only get here in the ERRORS_RO case; relocking the group 680 * may be dangerous, but nothing bad will happen since the 681 * filesystem will have already been marked read/only and the 682 * journal has been aborted. We return 1 as a hint to callers 683 * who might what to use the return value from 684 * ext4_grp_locked_error() to distinguish between the 685 * ERRORS_CONT and ERRORS_RO case, and perhaps return more 686 * aggressively from the ext4 function in question, with a 687 * more appropriate error code. 688 */ 689 ext4_lock_group(sb, grp); 690 return; 691 } 692 693 void ext4_update_dynamic_rev(struct super_block *sb) 694 { 695 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 696 697 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 698 return; 699 700 ext4_warning(sb, 701 "updating to rev %d because of new feature flag, " 702 "running e2fsck is recommended", 703 EXT4_DYNAMIC_REV); 704 705 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); 706 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); 707 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); 708 /* leave es->s_feature_*compat flags alone */ 709 /* es->s_uuid will be set by e2fsck if empty */ 710 711 /* 712 * The rest of the superblock fields should be zero, and if not it 713 * means they are likely already in use, so leave them alone. We 714 * can leave it up to e2fsck to clean up any inconsistencies there. 715 */ 716 } 717 718 /* 719 * Open the external journal device 720 */ 721 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) 722 { 723 struct block_device *bdev; 724 char b[BDEVNAME_SIZE]; 725 726 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); 727 if (IS_ERR(bdev)) 728 goto fail; 729 return bdev; 730 731 fail: 732 ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", 733 __bdevname(dev, b), PTR_ERR(bdev)); 734 return NULL; 735 } 736 737 /* 738 * Release the journal device 739 */ 740 static int ext4_blkdev_put(struct block_device *bdev) 741 { 742 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 743 } 744 745 static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 746 { 747 struct block_device *bdev; 748 int ret = -ENODEV; 749 750 bdev = sbi->journal_bdev; 751 if (bdev) { 752 ret = ext4_blkdev_put(bdev); 753 sbi->journal_bdev = NULL; 754 } 755 return ret; 756 } 757 758 static inline struct inode *orphan_list_entry(struct list_head *l) 759 { 760 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; 761 } 762 763 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) 764 { 765 struct list_head *l; 766 767 ext4_msg(sb, KERN_ERR, "sb orphan head is %d", 768 le32_to_cpu(sbi->s_es->s_last_orphan)); 769 770 printk(KERN_ERR "sb_info orphan list:\n"); 771 list_for_each(l, &sbi->s_orphan) { 772 struct inode *inode = orphan_list_entry(l); 773 printk(KERN_ERR " " 774 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 775 inode->i_sb->s_id, inode->i_ino, inode, 776 inode->i_mode, inode->i_nlink, 777 NEXT_ORPHAN(inode)); 778 } 779 } 780 781 static void ext4_put_super(struct super_block *sb) 782 { 783 struct ext4_sb_info *sbi = EXT4_SB(sb); 784 struct ext4_super_block *es = sbi->s_es; 785 int i, err; 786 787 ext4_unregister_li_request(sb); 788 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 789 790 flush_workqueue(sbi->dio_unwritten_wq); 791 destroy_workqueue(sbi->dio_unwritten_wq); 792 793 lock_super(sb); 794 if (sb->s_dirt) 795 ext4_commit_super(sb, 1); 796 797 if (sbi->s_journal) { 798 err = jbd2_journal_destroy(sbi->s_journal); 799 sbi->s_journal = NULL; 800 if (err < 0) 801 ext4_abort(sb, "Couldn't clean up the journal"); 802 } 803 804 del_timer(&sbi->s_err_report); 805 ext4_release_system_zone(sb); 806 ext4_mb_release(sb); 807 ext4_ext_release(sb); 808 ext4_xattr_put_super(sb); 809 810 if (!(sb->s_flags & MS_RDONLY)) { 811 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 812 es->s_state = cpu_to_le16(sbi->s_mount_state); 813 ext4_commit_super(sb, 1); 814 } 815 if (sbi->s_proc) { 816 remove_proc_entry(sb->s_id, ext4_proc_root); 817 } 818 kobject_del(&sbi->s_kobj); 819 820 for (i = 0; i < sbi->s_gdb_count; i++) 821 brelse(sbi->s_group_desc[i]); 822 ext4_kvfree(sbi->s_group_desc); 823 ext4_kvfree(sbi->s_flex_groups); 824 percpu_counter_destroy(&sbi->s_freeblocks_counter); 825 percpu_counter_destroy(&sbi->s_freeinodes_counter); 826 percpu_counter_destroy(&sbi->s_dirs_counter); 827 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 828 brelse(sbi->s_sbh); 829 #ifdef CONFIG_QUOTA 830 for (i = 0; i < MAXQUOTAS; i++) 831 kfree(sbi->s_qf_names[i]); 832 #endif 833 834 /* Debugging code just in case the in-memory inode orphan list 835 * isn't empty. The on-disk one can be non-empty if we've 836 * detected an error and taken the fs readonly, but the 837 * in-memory list had better be clean by this point. */ 838 if (!list_empty(&sbi->s_orphan)) 839 dump_orphan_list(sb, sbi); 840 J_ASSERT(list_empty(&sbi->s_orphan)); 841 842 invalidate_bdev(sb->s_bdev); 843 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 844 /* 845 * Invalidate the journal device's buffers. We don't want them 846 * floating about in memory - the physical journal device may 847 * hotswapped, and it breaks the `ro-after' testing code. 848 */ 849 sync_blockdev(sbi->journal_bdev); 850 invalidate_bdev(sbi->journal_bdev); 851 ext4_blkdev_remove(sbi); 852 } 853 if (sbi->s_mmp_tsk) 854 kthread_stop(sbi->s_mmp_tsk); 855 sb->s_fs_info = NULL; 856 /* 857 * Now that we are completely done shutting down the 858 * superblock, we need to actually destroy the kobject. 859 */ 860 unlock_super(sb); 861 kobject_put(&sbi->s_kobj); 862 wait_for_completion(&sbi->s_kobj_unregister); 863 kfree(sbi->s_blockgroup_lock); 864 kfree(sbi); 865 } 866 867 static struct kmem_cache *ext4_inode_cachep; 868 869 /* 870 * Called inside transaction, so use GFP_NOFS 871 */ 872 static struct inode *ext4_alloc_inode(struct super_block *sb) 873 { 874 struct ext4_inode_info *ei; 875 876 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 877 if (!ei) 878 return NULL; 879 880 ei->vfs_inode.i_version = 1; 881 ei->vfs_inode.i_data.writeback_index = 0; 882 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 883 INIT_LIST_HEAD(&ei->i_prealloc_list); 884 spin_lock_init(&ei->i_prealloc_lock); 885 ei->i_reserved_data_blocks = 0; 886 ei->i_reserved_meta_blocks = 0; 887 ei->i_allocated_meta_blocks = 0; 888 ei->i_da_metadata_calc_len = 0; 889 spin_lock_init(&(ei->i_block_reservation_lock)); 890 #ifdef CONFIG_QUOTA 891 ei->i_reserved_quota = 0; 892 #endif 893 ei->jinode = NULL; 894 INIT_LIST_HEAD(&ei->i_completed_io_list); 895 spin_lock_init(&ei->i_completed_io_lock); 896 ei->cur_aio_dio = NULL; 897 ei->i_sync_tid = 0; 898 ei->i_datasync_tid = 0; 899 atomic_set(&ei->i_ioend_count, 0); 900 atomic_set(&ei->i_aiodio_unwritten, 0); 901 902 return &ei->vfs_inode; 903 } 904 905 static int ext4_drop_inode(struct inode *inode) 906 { 907 int drop = generic_drop_inode(inode); 908 909 trace_ext4_drop_inode(inode, drop); 910 return drop; 911 } 912 913 static void ext4_i_callback(struct rcu_head *head) 914 { 915 struct inode *inode = container_of(head, struct inode, i_rcu); 916 INIT_LIST_HEAD(&inode->i_dentry); 917 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 918 } 919 920 static void ext4_destroy_inode(struct inode *inode) 921 { 922 ext4_ioend_wait(inode); 923 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 924 ext4_msg(inode->i_sb, KERN_ERR, 925 "Inode %lu (%p): orphan list check failed!", 926 inode->i_ino, EXT4_I(inode)); 927 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 928 EXT4_I(inode), sizeof(struct ext4_inode_info), 929 true); 930 dump_stack(); 931 } 932 call_rcu(&inode->i_rcu, ext4_i_callback); 933 } 934 935 static void init_once(void *foo) 936 { 937 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 938 939 INIT_LIST_HEAD(&ei->i_orphan); 940 #ifdef CONFIG_EXT4_FS_XATTR 941 init_rwsem(&ei->xattr_sem); 942 #endif 943 init_rwsem(&ei->i_data_sem); 944 inode_init_once(&ei->vfs_inode); 945 } 946 947 static int init_inodecache(void) 948 { 949 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 950 sizeof(struct ext4_inode_info), 951 0, (SLAB_RECLAIM_ACCOUNT| 952 SLAB_MEM_SPREAD), 953 init_once); 954 if (ext4_inode_cachep == NULL) 955 return -ENOMEM; 956 return 0; 957 } 958 959 static void destroy_inodecache(void) 960 { 961 kmem_cache_destroy(ext4_inode_cachep); 962 } 963 964 void ext4_clear_inode(struct inode *inode) 965 { 966 invalidate_inode_buffers(inode); 967 end_writeback(inode); 968 dquot_drop(inode); 969 ext4_discard_preallocations(inode); 970 if (EXT4_I(inode)->jinode) { 971 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), 972 EXT4_I(inode)->jinode); 973 jbd2_free_inode(EXT4_I(inode)->jinode); 974 EXT4_I(inode)->jinode = NULL; 975 } 976 } 977 978 static inline void ext4_show_quota_options(struct seq_file *seq, 979 struct super_block *sb) 980 { 981 #if defined(CONFIG_QUOTA) 982 struct ext4_sb_info *sbi = EXT4_SB(sb); 983 984 if (sbi->s_jquota_fmt) { 985 char *fmtname = ""; 986 987 switch (sbi->s_jquota_fmt) { 988 case QFMT_VFS_OLD: 989 fmtname = "vfsold"; 990 break; 991 case QFMT_VFS_V0: 992 fmtname = "vfsv0"; 993 break; 994 case QFMT_VFS_V1: 995 fmtname = "vfsv1"; 996 break; 997 } 998 seq_printf(seq, ",jqfmt=%s", fmtname); 999 } 1000 1001 if (sbi->s_qf_names[USRQUOTA]) 1002 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 1003 1004 if (sbi->s_qf_names[GRPQUOTA]) 1005 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 1006 1007 if (test_opt(sb, USRQUOTA)) 1008 seq_puts(seq, ",usrquota"); 1009 1010 if (test_opt(sb, GRPQUOTA)) 1011 seq_puts(seq, ",grpquota"); 1012 #endif 1013 } 1014 1015 /* 1016 * Show an option if 1017 * - it's set to a non-default value OR 1018 * - if the per-sb default is different from the global default 1019 */ 1020 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) 1021 { 1022 int def_errors; 1023 unsigned long def_mount_opts; 1024 struct super_block *sb = vfs->mnt_sb; 1025 struct ext4_sb_info *sbi = EXT4_SB(sb); 1026 struct ext4_super_block *es = sbi->s_es; 1027 1028 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 1029 def_errors = le16_to_cpu(es->s_errors); 1030 1031 if (sbi->s_sb_block != 1) 1032 seq_printf(seq, ",sb=%llu", sbi->s_sb_block); 1033 if (test_opt(sb, MINIX_DF)) 1034 seq_puts(seq, ",minixdf"); 1035 if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) 1036 seq_puts(seq, ",grpid"); 1037 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) 1038 seq_puts(seq, ",nogrpid"); 1039 if (sbi->s_resuid != EXT4_DEF_RESUID || 1040 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { 1041 seq_printf(seq, ",resuid=%u", sbi->s_resuid); 1042 } 1043 if (sbi->s_resgid != EXT4_DEF_RESGID || 1044 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { 1045 seq_printf(seq, ",resgid=%u", sbi->s_resgid); 1046 } 1047 if (test_opt(sb, ERRORS_RO)) { 1048 if (def_errors == EXT4_ERRORS_PANIC || 1049 def_errors == EXT4_ERRORS_CONTINUE) { 1050 seq_puts(seq, ",errors=remount-ro"); 1051 } 1052 } 1053 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 1054 seq_puts(seq, ",errors=continue"); 1055 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 1056 seq_puts(seq, ",errors=panic"); 1057 if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) 1058 seq_puts(seq, ",nouid32"); 1059 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) 1060 seq_puts(seq, ",debug"); 1061 if (test_opt(sb, OLDALLOC)) 1062 seq_puts(seq, ",oldalloc"); 1063 #ifdef CONFIG_EXT4_FS_XATTR 1064 if (test_opt(sb, XATTR_USER)) 1065 seq_puts(seq, ",user_xattr"); 1066 if (!test_opt(sb, XATTR_USER)) 1067 seq_puts(seq, ",nouser_xattr"); 1068 #endif 1069 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1070 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 1071 seq_puts(seq, ",acl"); 1072 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 1073 seq_puts(seq, ",noacl"); 1074 #endif 1075 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { 1076 seq_printf(seq, ",commit=%u", 1077 (unsigned) (sbi->s_commit_interval / HZ)); 1078 } 1079 if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { 1080 seq_printf(seq, ",min_batch_time=%u", 1081 (unsigned) sbi->s_min_batch_time); 1082 } 1083 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { 1084 seq_printf(seq, ",max_batch_time=%u", 1085 (unsigned) sbi->s_min_batch_time); 1086 } 1087 1088 /* 1089 * We're changing the default of barrier mount option, so 1090 * let's always display its mount state so it's clear what its 1091 * status is. 1092 */ 1093 seq_puts(seq, ",barrier="); 1094 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 1095 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) 1096 seq_puts(seq, ",journal_async_commit"); 1097 else if (test_opt(sb, JOURNAL_CHECKSUM)) 1098 seq_puts(seq, ",journal_checksum"); 1099 if (test_opt(sb, I_VERSION)) 1100 seq_puts(seq, ",i_version"); 1101 if (!test_opt(sb, DELALLOC) && 1102 !(def_mount_opts & EXT4_DEFM_NODELALLOC)) 1103 seq_puts(seq, ",nodelalloc"); 1104 1105 if (!test_opt(sb, MBLK_IO_SUBMIT)) 1106 seq_puts(seq, ",nomblk_io_submit"); 1107 if (sbi->s_stripe) 1108 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 1109 /* 1110 * journal mode get enabled in different ways 1111 * So just print the value even if we didn't specify it 1112 */ 1113 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 1114 seq_puts(seq, ",data=journal"); 1115 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 1116 seq_puts(seq, ",data=ordered"); 1117 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 1118 seq_puts(seq, ",data=writeback"); 1119 1120 if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 1121 seq_printf(seq, ",inode_readahead_blks=%u", 1122 sbi->s_inode_readahead_blks); 1123 1124 if (test_opt(sb, DATA_ERR_ABORT)) 1125 seq_puts(seq, ",data_err=abort"); 1126 1127 if (test_opt(sb, NO_AUTO_DA_ALLOC)) 1128 seq_puts(seq, ",noauto_da_alloc"); 1129 1130 if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD)) 1131 seq_puts(seq, ",discard"); 1132 1133 if (test_opt(sb, NOLOAD)) 1134 seq_puts(seq, ",norecovery"); 1135 1136 if (test_opt(sb, DIOREAD_NOLOCK)) 1137 seq_puts(seq, ",dioread_nolock"); 1138 1139 if (test_opt(sb, BLOCK_VALIDITY) && 1140 !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) 1141 seq_puts(seq, ",block_validity"); 1142 1143 if (!test_opt(sb, INIT_INODE_TABLE)) 1144 seq_puts(seq, ",noinit_inode_table"); 1145 else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT) 1146 seq_printf(seq, ",init_inode_table=%u", 1147 (unsigned) sbi->s_li_wait_mult); 1148 1149 ext4_show_quota_options(seq, sb); 1150 1151 return 0; 1152 } 1153 1154 static struct inode *ext4_nfs_get_inode(struct super_block *sb, 1155 u64 ino, u32 generation) 1156 { 1157 struct inode *inode; 1158 1159 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 1160 return ERR_PTR(-ESTALE); 1161 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 1162 return ERR_PTR(-ESTALE); 1163 1164 /* iget isn't really right if the inode is currently unallocated!! 1165 * 1166 * ext4_read_inode will return a bad_inode if the inode had been 1167 * deleted, so we should be safe. 1168 * 1169 * Currently we don't know the generation for parent directory, so 1170 * a generation of 0 means "accept any" 1171 */ 1172 inode = ext4_iget(sb, ino); 1173 if (IS_ERR(inode)) 1174 return ERR_CAST(inode); 1175 if (generation && inode->i_generation != generation) { 1176 iput(inode); 1177 return ERR_PTR(-ESTALE); 1178 } 1179 1180 return inode; 1181 } 1182 1183 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 1184 int fh_len, int fh_type) 1185 { 1186 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 1187 ext4_nfs_get_inode); 1188 } 1189 1190 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 1191 int fh_len, int fh_type) 1192 { 1193 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 1194 ext4_nfs_get_inode); 1195 } 1196 1197 /* 1198 * Try to release metadata pages (indirect blocks, directories) which are 1199 * mapped via the block device. Since these pages could have journal heads 1200 * which would prevent try_to_free_buffers() from freeing them, we must use 1201 * jbd2 layer's try_to_free_buffers() function to release them. 1202 */ 1203 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, 1204 gfp_t wait) 1205 { 1206 journal_t *journal = EXT4_SB(sb)->s_journal; 1207 1208 WARN_ON(PageChecked(page)); 1209 if (!page_has_buffers(page)) 1210 return 0; 1211 if (journal) 1212 return jbd2_journal_try_to_free_buffers(journal, page, 1213 wait & ~__GFP_WAIT); 1214 return try_to_free_buffers(page); 1215 } 1216 1217 #ifdef CONFIG_QUOTA 1218 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") 1219 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 1220 1221 static int ext4_write_dquot(struct dquot *dquot); 1222 static int ext4_acquire_dquot(struct dquot *dquot); 1223 static int ext4_release_dquot(struct dquot *dquot); 1224 static int ext4_mark_dquot_dirty(struct dquot *dquot); 1225 static int ext4_write_info(struct super_block *sb, int type); 1226 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 1227 struct path *path); 1228 static int ext4_quota_off(struct super_block *sb, int type); 1229 static int ext4_quota_on_mount(struct super_block *sb, int type); 1230 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 1231 size_t len, loff_t off); 1232 static ssize_t ext4_quota_write(struct super_block *sb, int type, 1233 const char *data, size_t len, loff_t off); 1234 1235 static const struct dquot_operations ext4_quota_operations = { 1236 .get_reserved_space = ext4_get_reserved_space, 1237 .write_dquot = ext4_write_dquot, 1238 .acquire_dquot = ext4_acquire_dquot, 1239 .release_dquot = ext4_release_dquot, 1240 .mark_dirty = ext4_mark_dquot_dirty, 1241 .write_info = ext4_write_info, 1242 .alloc_dquot = dquot_alloc, 1243 .destroy_dquot = dquot_destroy, 1244 }; 1245 1246 static const struct quotactl_ops ext4_qctl_operations = { 1247 .quota_on = ext4_quota_on, 1248 .quota_off = ext4_quota_off, 1249 .quota_sync = dquot_quota_sync, 1250 .get_info = dquot_get_dqinfo, 1251 .set_info = dquot_set_dqinfo, 1252 .get_dqblk = dquot_get_dqblk, 1253 .set_dqblk = dquot_set_dqblk 1254 }; 1255 #endif 1256 1257 static const struct super_operations ext4_sops = { 1258 .alloc_inode = ext4_alloc_inode, 1259 .destroy_inode = ext4_destroy_inode, 1260 .write_inode = ext4_write_inode, 1261 .dirty_inode = ext4_dirty_inode, 1262 .drop_inode = ext4_drop_inode, 1263 .evict_inode = ext4_evict_inode, 1264 .put_super = ext4_put_super, 1265 .sync_fs = ext4_sync_fs, 1266 .freeze_fs = ext4_freeze, 1267 .unfreeze_fs = ext4_unfreeze, 1268 .statfs = ext4_statfs, 1269 .remount_fs = ext4_remount, 1270 .show_options = ext4_show_options, 1271 #ifdef CONFIG_QUOTA 1272 .quota_read = ext4_quota_read, 1273 .quota_write = ext4_quota_write, 1274 #endif 1275 .bdev_try_to_free_page = bdev_try_to_free_page, 1276 }; 1277 1278 static const struct super_operations ext4_nojournal_sops = { 1279 .alloc_inode = ext4_alloc_inode, 1280 .destroy_inode = ext4_destroy_inode, 1281 .write_inode = ext4_write_inode, 1282 .dirty_inode = ext4_dirty_inode, 1283 .drop_inode = ext4_drop_inode, 1284 .evict_inode = ext4_evict_inode, 1285 .write_super = ext4_write_super, 1286 .put_super = ext4_put_super, 1287 .statfs = ext4_statfs, 1288 .remount_fs = ext4_remount, 1289 .show_options = ext4_show_options, 1290 #ifdef CONFIG_QUOTA 1291 .quota_read = ext4_quota_read, 1292 .quota_write = ext4_quota_write, 1293 #endif 1294 .bdev_try_to_free_page = bdev_try_to_free_page, 1295 }; 1296 1297 static const struct export_operations ext4_export_ops = { 1298 .fh_to_dentry = ext4_fh_to_dentry, 1299 .fh_to_parent = ext4_fh_to_parent, 1300 .get_parent = ext4_get_parent, 1301 }; 1302 1303 enum { 1304 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 1305 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 1306 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, 1307 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1308 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh, 1309 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, 1310 Opt_journal_update, Opt_journal_dev, 1311 Opt_journal_checksum, Opt_journal_async_commit, 1312 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1313 Opt_data_err_abort, Opt_data_err_ignore, 1314 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1315 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1316 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 1317 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, 1318 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, 1319 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, 1320 Opt_inode_readahead_blks, Opt_journal_ioprio, 1321 Opt_dioread_nolock, Opt_dioread_lock, 1322 Opt_discard, Opt_nodiscard, 1323 Opt_init_inode_table, Opt_noinit_inode_table, 1324 }; 1325 1326 static const match_table_t tokens = { 1327 {Opt_bsd_df, "bsddf"}, 1328 {Opt_minix_df, "minixdf"}, 1329 {Opt_grpid, "grpid"}, 1330 {Opt_grpid, "bsdgroups"}, 1331 {Opt_nogrpid, "nogrpid"}, 1332 {Opt_nogrpid, "sysvgroups"}, 1333 {Opt_resgid, "resgid=%u"}, 1334 {Opt_resuid, "resuid=%u"}, 1335 {Opt_sb, "sb=%u"}, 1336 {Opt_err_cont, "errors=continue"}, 1337 {Opt_err_panic, "errors=panic"}, 1338 {Opt_err_ro, "errors=remount-ro"}, 1339 {Opt_nouid32, "nouid32"}, 1340 {Opt_debug, "debug"}, 1341 {Opt_oldalloc, "oldalloc"}, 1342 {Opt_orlov, "orlov"}, 1343 {Opt_user_xattr, "user_xattr"}, 1344 {Opt_nouser_xattr, "nouser_xattr"}, 1345 {Opt_acl, "acl"}, 1346 {Opt_noacl, "noacl"}, 1347 {Opt_noload, "noload"}, 1348 {Opt_noload, "norecovery"}, 1349 {Opt_nobh, "nobh"}, 1350 {Opt_bh, "bh"}, 1351 {Opt_commit, "commit=%u"}, 1352 {Opt_min_batch_time, "min_batch_time=%u"}, 1353 {Opt_max_batch_time, "max_batch_time=%u"}, 1354 {Opt_journal_update, "journal=update"}, 1355 {Opt_journal_dev, "journal_dev=%u"}, 1356 {Opt_journal_checksum, "journal_checksum"}, 1357 {Opt_journal_async_commit, "journal_async_commit"}, 1358 {Opt_abort, "abort"}, 1359 {Opt_data_journal, "data=journal"}, 1360 {Opt_data_ordered, "data=ordered"}, 1361 {Opt_data_writeback, "data=writeback"}, 1362 {Opt_data_err_abort, "data_err=abort"}, 1363 {Opt_data_err_ignore, "data_err=ignore"}, 1364 {Opt_offusrjquota, "usrjquota="}, 1365 {Opt_usrjquota, "usrjquota=%s"}, 1366 {Opt_offgrpjquota, "grpjquota="}, 1367 {Opt_grpjquota, "grpjquota=%s"}, 1368 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 1369 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 1370 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, 1371 {Opt_grpquota, "grpquota"}, 1372 {Opt_noquota, "noquota"}, 1373 {Opt_quota, "quota"}, 1374 {Opt_usrquota, "usrquota"}, 1375 {Opt_barrier, "barrier=%u"}, 1376 {Opt_barrier, "barrier"}, 1377 {Opt_nobarrier, "nobarrier"}, 1378 {Opt_i_version, "i_version"}, 1379 {Opt_stripe, "stripe=%u"}, 1380 {Opt_resize, "resize"}, 1381 {Opt_delalloc, "delalloc"}, 1382 {Opt_nodelalloc, "nodelalloc"}, 1383 {Opt_mblk_io_submit, "mblk_io_submit"}, 1384 {Opt_nomblk_io_submit, "nomblk_io_submit"}, 1385 {Opt_block_validity, "block_validity"}, 1386 {Opt_noblock_validity, "noblock_validity"}, 1387 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1388 {Opt_journal_ioprio, "journal_ioprio=%u"}, 1389 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1390 {Opt_auto_da_alloc, "auto_da_alloc"}, 1391 {Opt_noauto_da_alloc, "noauto_da_alloc"}, 1392 {Opt_dioread_nolock, "dioread_nolock"}, 1393 {Opt_dioread_lock, "dioread_lock"}, 1394 {Opt_discard, "discard"}, 1395 {Opt_nodiscard, "nodiscard"}, 1396 {Opt_init_inode_table, "init_itable=%u"}, 1397 {Opt_init_inode_table, "init_itable"}, 1398 {Opt_noinit_inode_table, "noinit_itable"}, 1399 {Opt_err, NULL}, 1400 }; 1401 1402 static ext4_fsblk_t get_sb_block(void **data) 1403 { 1404 ext4_fsblk_t sb_block; 1405 char *options = (char *) *data; 1406 1407 if (!options || strncmp(options, "sb=", 3) != 0) 1408 return 1; /* Default location */ 1409 1410 options += 3; 1411 /* TODO: use simple_strtoll with >32bit ext4 */ 1412 sb_block = simple_strtoul(options, &options, 0); 1413 if (*options && *options != ',') { 1414 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 1415 (char *) *data); 1416 return 1; 1417 } 1418 if (*options == ',') 1419 options++; 1420 *data = (void *) options; 1421 1422 return sb_block; 1423 } 1424 1425 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) 1426 static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" 1427 "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; 1428 1429 #ifdef CONFIG_QUOTA 1430 static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) 1431 { 1432 struct ext4_sb_info *sbi = EXT4_SB(sb); 1433 char *qname; 1434 1435 if (sb_any_quota_loaded(sb) && 1436 !sbi->s_qf_names[qtype]) { 1437 ext4_msg(sb, KERN_ERR, 1438 "Cannot change journaled " 1439 "quota options when quota turned on"); 1440 return 0; 1441 } 1442 qname = match_strdup(args); 1443 if (!qname) { 1444 ext4_msg(sb, KERN_ERR, 1445 "Not enough memory for storing quotafile name"); 1446 return 0; 1447 } 1448 if (sbi->s_qf_names[qtype] && 1449 strcmp(sbi->s_qf_names[qtype], qname)) { 1450 ext4_msg(sb, KERN_ERR, 1451 "%s quota file already specified", QTYPE2NAME(qtype)); 1452 kfree(qname); 1453 return 0; 1454 } 1455 sbi->s_qf_names[qtype] = qname; 1456 if (strchr(sbi->s_qf_names[qtype], '/')) { 1457 ext4_msg(sb, KERN_ERR, 1458 "quotafile must be on filesystem root"); 1459 kfree(sbi->s_qf_names[qtype]); 1460 sbi->s_qf_names[qtype] = NULL; 1461 return 0; 1462 } 1463 set_opt(sb, QUOTA); 1464 return 1; 1465 } 1466 1467 static int clear_qf_name(struct super_block *sb, int qtype) 1468 { 1469 1470 struct ext4_sb_info *sbi = EXT4_SB(sb); 1471 1472 if (sb_any_quota_loaded(sb) && 1473 sbi->s_qf_names[qtype]) { 1474 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" 1475 " when quota turned on"); 1476 return 0; 1477 } 1478 /* 1479 * The space will be released later when all options are confirmed 1480 * to be correct 1481 */ 1482 sbi->s_qf_names[qtype] = NULL; 1483 return 1; 1484 } 1485 #endif 1486 1487 static int parse_options(char *options, struct super_block *sb, 1488 unsigned long *journal_devnum, 1489 unsigned int *journal_ioprio, 1490 ext4_fsblk_t *n_blocks_count, int is_remount) 1491 { 1492 struct ext4_sb_info *sbi = EXT4_SB(sb); 1493 char *p; 1494 substring_t args[MAX_OPT_ARGS]; 1495 int data_opt = 0; 1496 int option; 1497 #ifdef CONFIG_QUOTA 1498 int qfmt; 1499 #endif 1500 1501 if (!options) 1502 return 1; 1503 1504 while ((p = strsep(&options, ",")) != NULL) { 1505 int token; 1506 if (!*p) 1507 continue; 1508 1509 /* 1510 * Initialize args struct so we know whether arg was 1511 * found; some options take optional arguments. 1512 */ 1513 args[0].to = args[0].from = NULL; 1514 token = match_token(p, tokens, args); 1515 switch (token) { 1516 case Opt_bsd_df: 1517 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1518 clear_opt(sb, MINIX_DF); 1519 break; 1520 case Opt_minix_df: 1521 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1522 set_opt(sb, MINIX_DF); 1523 1524 break; 1525 case Opt_grpid: 1526 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1527 set_opt(sb, GRPID); 1528 1529 break; 1530 case Opt_nogrpid: 1531 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1532 clear_opt(sb, GRPID); 1533 1534 break; 1535 case Opt_resuid: 1536 if (match_int(&args[0], &option)) 1537 return 0; 1538 sbi->s_resuid = option; 1539 break; 1540 case Opt_resgid: 1541 if (match_int(&args[0], &option)) 1542 return 0; 1543 sbi->s_resgid = option; 1544 break; 1545 case Opt_sb: 1546 /* handled by get_sb_block() instead of here */ 1547 /* *sb_block = match_int(&args[0]); */ 1548 break; 1549 case Opt_err_panic: 1550 clear_opt(sb, ERRORS_CONT); 1551 clear_opt(sb, ERRORS_RO); 1552 set_opt(sb, ERRORS_PANIC); 1553 break; 1554 case Opt_err_ro: 1555 clear_opt(sb, ERRORS_CONT); 1556 clear_opt(sb, ERRORS_PANIC); 1557 set_opt(sb, ERRORS_RO); 1558 break; 1559 case Opt_err_cont: 1560 clear_opt(sb, ERRORS_RO); 1561 clear_opt(sb, ERRORS_PANIC); 1562 set_opt(sb, ERRORS_CONT); 1563 break; 1564 case Opt_nouid32: 1565 set_opt(sb, NO_UID32); 1566 break; 1567 case Opt_debug: 1568 set_opt(sb, DEBUG); 1569 break; 1570 case Opt_oldalloc: 1571 set_opt(sb, OLDALLOC); 1572 break; 1573 case Opt_orlov: 1574 clear_opt(sb, OLDALLOC); 1575 break; 1576 #ifdef CONFIG_EXT4_FS_XATTR 1577 case Opt_user_xattr: 1578 set_opt(sb, XATTR_USER); 1579 break; 1580 case Opt_nouser_xattr: 1581 clear_opt(sb, XATTR_USER); 1582 break; 1583 #else 1584 case Opt_user_xattr: 1585 case Opt_nouser_xattr: 1586 ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); 1587 break; 1588 #endif 1589 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1590 case Opt_acl: 1591 set_opt(sb, POSIX_ACL); 1592 break; 1593 case Opt_noacl: 1594 clear_opt(sb, POSIX_ACL); 1595 break; 1596 #else 1597 case Opt_acl: 1598 case Opt_noacl: 1599 ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); 1600 break; 1601 #endif 1602 case Opt_journal_update: 1603 /* @@@ FIXME */ 1604 /* Eventually we will want to be able to create 1605 a journal file here. For now, only allow the 1606 user to specify an existing inode to be the 1607 journal file. */ 1608 if (is_remount) { 1609 ext4_msg(sb, KERN_ERR, 1610 "Cannot specify journal on remount"); 1611 return 0; 1612 } 1613 set_opt(sb, UPDATE_JOURNAL); 1614 break; 1615 case Opt_journal_dev: 1616 if (is_remount) { 1617 ext4_msg(sb, KERN_ERR, 1618 "Cannot specify journal on remount"); 1619 return 0; 1620 } 1621 if (match_int(&args[0], &option)) 1622 return 0; 1623 *journal_devnum = option; 1624 break; 1625 case Opt_journal_checksum: 1626 set_opt(sb, JOURNAL_CHECKSUM); 1627 break; 1628 case Opt_journal_async_commit: 1629 set_opt(sb, JOURNAL_ASYNC_COMMIT); 1630 set_opt(sb, JOURNAL_CHECKSUM); 1631 break; 1632 case Opt_noload: 1633 set_opt(sb, NOLOAD); 1634 break; 1635 case Opt_commit: 1636 if (match_int(&args[0], &option)) 1637 return 0; 1638 if (option < 0) 1639 return 0; 1640 if (option == 0) 1641 option = JBD2_DEFAULT_MAX_COMMIT_AGE; 1642 sbi->s_commit_interval = HZ * option; 1643 break; 1644 case Opt_max_batch_time: 1645 if (match_int(&args[0], &option)) 1646 return 0; 1647 if (option < 0) 1648 return 0; 1649 if (option == 0) 1650 option = EXT4_DEF_MAX_BATCH_TIME; 1651 sbi->s_max_batch_time = option; 1652 break; 1653 case Opt_min_batch_time: 1654 if (match_int(&args[0], &option)) 1655 return 0; 1656 if (option < 0) 1657 return 0; 1658 sbi->s_min_batch_time = option; 1659 break; 1660 case Opt_data_journal: 1661 data_opt = EXT4_MOUNT_JOURNAL_DATA; 1662 goto datacheck; 1663 case Opt_data_ordered: 1664 data_opt = EXT4_MOUNT_ORDERED_DATA; 1665 goto datacheck; 1666 case Opt_data_writeback: 1667 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1668 datacheck: 1669 if (is_remount) { 1670 if (test_opt(sb, DATA_FLAGS) != data_opt) { 1671 ext4_msg(sb, KERN_ERR, 1672 "Cannot change data mode on remount"); 1673 return 0; 1674 } 1675 } else { 1676 clear_opt(sb, DATA_FLAGS); 1677 sbi->s_mount_opt |= data_opt; 1678 } 1679 break; 1680 case Opt_data_err_abort: 1681 set_opt(sb, DATA_ERR_ABORT); 1682 break; 1683 case Opt_data_err_ignore: 1684 clear_opt(sb, DATA_ERR_ABORT); 1685 break; 1686 #ifdef CONFIG_QUOTA 1687 case Opt_usrjquota: 1688 if (!set_qf_name(sb, USRQUOTA, &args[0])) 1689 return 0; 1690 break; 1691 case Opt_grpjquota: 1692 if (!set_qf_name(sb, GRPQUOTA, &args[0])) 1693 return 0; 1694 break; 1695 case Opt_offusrjquota: 1696 if (!clear_qf_name(sb, USRQUOTA)) 1697 return 0; 1698 break; 1699 case Opt_offgrpjquota: 1700 if (!clear_qf_name(sb, GRPQUOTA)) 1701 return 0; 1702 break; 1703 1704 case Opt_jqfmt_vfsold: 1705 qfmt = QFMT_VFS_OLD; 1706 goto set_qf_format; 1707 case Opt_jqfmt_vfsv0: 1708 qfmt = QFMT_VFS_V0; 1709 goto set_qf_format; 1710 case Opt_jqfmt_vfsv1: 1711 qfmt = QFMT_VFS_V1; 1712 set_qf_format: 1713 if (sb_any_quota_loaded(sb) && 1714 sbi->s_jquota_fmt != qfmt) { 1715 ext4_msg(sb, KERN_ERR, "Cannot change " 1716 "journaled quota options when " 1717 "quota turned on"); 1718 return 0; 1719 } 1720 sbi->s_jquota_fmt = qfmt; 1721 break; 1722 case Opt_quota: 1723 case Opt_usrquota: 1724 set_opt(sb, QUOTA); 1725 set_opt(sb, USRQUOTA); 1726 break; 1727 case Opt_grpquota: 1728 set_opt(sb, QUOTA); 1729 set_opt(sb, GRPQUOTA); 1730 break; 1731 case Opt_noquota: 1732 if (sb_any_quota_loaded(sb)) { 1733 ext4_msg(sb, KERN_ERR, "Cannot change quota " 1734 "options when quota turned on"); 1735 return 0; 1736 } 1737 clear_opt(sb, QUOTA); 1738 clear_opt(sb, USRQUOTA); 1739 clear_opt(sb, GRPQUOTA); 1740 break; 1741 #else 1742 case Opt_quota: 1743 case Opt_usrquota: 1744 case Opt_grpquota: 1745 ext4_msg(sb, KERN_ERR, 1746 "quota options not supported"); 1747 break; 1748 case Opt_usrjquota: 1749 case Opt_grpjquota: 1750 case Opt_offusrjquota: 1751 case Opt_offgrpjquota: 1752 case Opt_jqfmt_vfsold: 1753 case Opt_jqfmt_vfsv0: 1754 case Opt_jqfmt_vfsv1: 1755 ext4_msg(sb, KERN_ERR, 1756 "journaled quota options not supported"); 1757 break; 1758 case Opt_noquota: 1759 break; 1760 #endif 1761 case Opt_abort: 1762 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1763 break; 1764 case Opt_nobarrier: 1765 clear_opt(sb, BARRIER); 1766 break; 1767 case Opt_barrier: 1768 if (args[0].from) { 1769 if (match_int(&args[0], &option)) 1770 return 0; 1771 } else 1772 option = 1; /* No argument, default to 1 */ 1773 if (option) 1774 set_opt(sb, BARRIER); 1775 else 1776 clear_opt(sb, BARRIER); 1777 break; 1778 case Opt_ignore: 1779 break; 1780 case Opt_resize: 1781 if (!is_remount) { 1782 ext4_msg(sb, KERN_ERR, 1783 "resize option only available " 1784 "for remount"); 1785 return 0; 1786 } 1787 if (match_int(&args[0], &option) != 0) 1788 return 0; 1789 *n_blocks_count = option; 1790 break; 1791 case Opt_nobh: 1792 ext4_msg(sb, KERN_WARNING, 1793 "Ignoring deprecated nobh option"); 1794 break; 1795 case Opt_bh: 1796 ext4_msg(sb, KERN_WARNING, 1797 "Ignoring deprecated bh option"); 1798 break; 1799 case Opt_i_version: 1800 set_opt(sb, I_VERSION); 1801 sb->s_flags |= MS_I_VERSION; 1802 break; 1803 case Opt_nodelalloc: 1804 clear_opt(sb, DELALLOC); 1805 break; 1806 case Opt_mblk_io_submit: 1807 set_opt(sb, MBLK_IO_SUBMIT); 1808 break; 1809 case Opt_nomblk_io_submit: 1810 clear_opt(sb, MBLK_IO_SUBMIT); 1811 break; 1812 case Opt_stripe: 1813 if (match_int(&args[0], &option)) 1814 return 0; 1815 if (option < 0) 1816 return 0; 1817 sbi->s_stripe = option; 1818 break; 1819 case Opt_delalloc: 1820 set_opt(sb, DELALLOC); 1821 break; 1822 case Opt_block_validity: 1823 set_opt(sb, BLOCK_VALIDITY); 1824 break; 1825 case Opt_noblock_validity: 1826 clear_opt(sb, BLOCK_VALIDITY); 1827 break; 1828 case Opt_inode_readahead_blks: 1829 if (match_int(&args[0], &option)) 1830 return 0; 1831 if (option < 0 || option > (1 << 30)) 1832 return 0; 1833 if (option && !is_power_of_2(option)) { 1834 ext4_msg(sb, KERN_ERR, 1835 "EXT4-fs: inode_readahead_blks" 1836 " must be a power of 2"); 1837 return 0; 1838 } 1839 sbi->s_inode_readahead_blks = option; 1840 break; 1841 case Opt_journal_ioprio: 1842 if (match_int(&args[0], &option)) 1843 return 0; 1844 if (option < 0 || option > 7) 1845 break; 1846 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 1847 option); 1848 break; 1849 case Opt_noauto_da_alloc: 1850 set_opt(sb, NO_AUTO_DA_ALLOC); 1851 break; 1852 case Opt_auto_da_alloc: 1853 if (args[0].from) { 1854 if (match_int(&args[0], &option)) 1855 return 0; 1856 } else 1857 option = 1; /* No argument, default to 1 */ 1858 if (option) 1859 clear_opt(sb, NO_AUTO_DA_ALLOC); 1860 else 1861 set_opt(sb,NO_AUTO_DA_ALLOC); 1862 break; 1863 case Opt_discard: 1864 set_opt(sb, DISCARD); 1865 break; 1866 case Opt_nodiscard: 1867 clear_opt(sb, DISCARD); 1868 break; 1869 case Opt_dioread_nolock: 1870 set_opt(sb, DIOREAD_NOLOCK); 1871 break; 1872 case Opt_dioread_lock: 1873 clear_opt(sb, DIOREAD_NOLOCK); 1874 break; 1875 case Opt_init_inode_table: 1876 set_opt(sb, INIT_INODE_TABLE); 1877 if (args[0].from) { 1878 if (match_int(&args[0], &option)) 1879 return 0; 1880 } else 1881 option = EXT4_DEF_LI_WAIT_MULT; 1882 if (option < 0) 1883 return 0; 1884 sbi->s_li_wait_mult = option; 1885 break; 1886 case Opt_noinit_inode_table: 1887 clear_opt(sb, INIT_INODE_TABLE); 1888 break; 1889 default: 1890 ext4_msg(sb, KERN_ERR, 1891 "Unrecognized mount option \"%s\" " 1892 "or missing value", p); 1893 return 0; 1894 } 1895 } 1896 #ifdef CONFIG_QUOTA 1897 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1898 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) 1899 clear_opt(sb, USRQUOTA); 1900 1901 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) 1902 clear_opt(sb, GRPQUOTA); 1903 1904 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { 1905 ext4_msg(sb, KERN_ERR, "old and new quota " 1906 "format mixing"); 1907 return 0; 1908 } 1909 1910 if (!sbi->s_jquota_fmt) { 1911 ext4_msg(sb, KERN_ERR, "journaled quota format " 1912 "not specified"); 1913 return 0; 1914 } 1915 } else { 1916 if (sbi->s_jquota_fmt) { 1917 ext4_msg(sb, KERN_ERR, "journaled quota format " 1918 "specified with no journaling " 1919 "enabled"); 1920 return 0; 1921 } 1922 } 1923 #endif 1924 return 1; 1925 } 1926 1927 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1928 int read_only) 1929 { 1930 struct ext4_sb_info *sbi = EXT4_SB(sb); 1931 int res = 0; 1932 1933 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1934 ext4_msg(sb, KERN_ERR, "revision level too high, " 1935 "forcing read-only mode"); 1936 res = MS_RDONLY; 1937 } 1938 if (read_only) 1939 return res; 1940 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1941 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " 1942 "running e2fsck is recommended"); 1943 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1944 ext4_msg(sb, KERN_WARNING, 1945 "warning: mounting fs with errors, " 1946 "running e2fsck is recommended"); 1947 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 && 1948 le16_to_cpu(es->s_mnt_count) >= 1949 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1950 ext4_msg(sb, KERN_WARNING, 1951 "warning: maximal mount count reached, " 1952 "running e2fsck is recommended"); 1953 else if (le32_to_cpu(es->s_checkinterval) && 1954 (le32_to_cpu(es->s_lastcheck) + 1955 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1956 ext4_msg(sb, KERN_WARNING, 1957 "warning: checktime reached, " 1958 "running e2fsck is recommended"); 1959 if (!sbi->s_journal) 1960 es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1961 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1962 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1963 le16_add_cpu(&es->s_mnt_count, 1); 1964 es->s_mtime = cpu_to_le32(get_seconds()); 1965 ext4_update_dynamic_rev(sb); 1966 if (sbi->s_journal) 1967 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1968 1969 ext4_commit_super(sb, 1); 1970 if (test_opt(sb, DEBUG)) 1971 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1972 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n", 1973 sb->s_blocksize, 1974 sbi->s_groups_count, 1975 EXT4_BLOCKS_PER_GROUP(sb), 1976 EXT4_INODES_PER_GROUP(sb), 1977 sbi->s_mount_opt, sbi->s_mount_opt2); 1978 1979 cleancache_init_fs(sb); 1980 return res; 1981 } 1982 1983 static int ext4_fill_flex_info(struct super_block *sb) 1984 { 1985 struct ext4_sb_info *sbi = EXT4_SB(sb); 1986 struct ext4_group_desc *gdp = NULL; 1987 ext4_group_t flex_group_count; 1988 ext4_group_t flex_group; 1989 int groups_per_flex = 0; 1990 size_t size; 1991 int i; 1992 1993 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1994 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 1995 1996 if (groups_per_flex < 2) { 1997 sbi->s_log_groups_per_flex = 0; 1998 return 1; 1999 } 2000 2001 /* We allocate both existing and potentially added groups */ 2002 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 2003 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 2004 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; 2005 size = flex_group_count * sizeof(struct flex_groups); 2006 sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL); 2007 if (sbi->s_flex_groups == NULL) { 2008 ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups", 2009 flex_group_count); 2010 goto failed; 2011 } 2012 2013 for (i = 0; i < sbi->s_groups_count; i++) { 2014 gdp = ext4_get_group_desc(sb, i, NULL); 2015 2016 flex_group = ext4_flex_group(sbi, i); 2017 atomic_add(ext4_free_inodes_count(sb, gdp), 2018 &sbi->s_flex_groups[flex_group].free_inodes); 2019 atomic_add(ext4_free_blks_count(sb, gdp), 2020 &sbi->s_flex_groups[flex_group].free_blocks); 2021 atomic_add(ext4_used_dirs_count(sb, gdp), 2022 &sbi->s_flex_groups[flex_group].used_dirs); 2023 } 2024 2025 return 1; 2026 failed: 2027 return 0; 2028 } 2029 2030 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, 2031 struct ext4_group_desc *gdp) 2032 { 2033 __u16 crc = 0; 2034 2035 if (sbi->s_es->s_feature_ro_compat & 2036 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 2037 int offset = offsetof(struct ext4_group_desc, bg_checksum); 2038 __le32 le_group = cpu_to_le32(block_group); 2039 2040 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 2041 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); 2042 crc = crc16(crc, (__u8 *)gdp, offset); 2043 offset += sizeof(gdp->bg_checksum); /* skip checksum */ 2044 /* for checksum of struct ext4_group_desc do the rest...*/ 2045 if ((sbi->s_es->s_feature_incompat & 2046 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && 2047 offset < le16_to_cpu(sbi->s_es->s_desc_size)) 2048 crc = crc16(crc, (__u8 *)gdp + offset, 2049 le16_to_cpu(sbi->s_es->s_desc_size) - 2050 offset); 2051 } 2052 2053 return cpu_to_le16(crc); 2054 } 2055 2056 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, 2057 struct ext4_group_desc *gdp) 2058 { 2059 if ((sbi->s_es->s_feature_ro_compat & 2060 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && 2061 (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) 2062 return 0; 2063 2064 return 1; 2065 } 2066 2067 /* Called at mount-time, super-block is locked */ 2068 static int ext4_check_descriptors(struct super_block *sb, 2069 ext4_group_t *first_not_zeroed) 2070 { 2071 struct ext4_sb_info *sbi = EXT4_SB(sb); 2072 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 2073 ext4_fsblk_t last_block; 2074 ext4_fsblk_t block_bitmap; 2075 ext4_fsblk_t inode_bitmap; 2076 ext4_fsblk_t inode_table; 2077 int flexbg_flag = 0; 2078 ext4_group_t i, grp = sbi->s_groups_count; 2079 2080 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 2081 flexbg_flag = 1; 2082 2083 ext4_debug("Checking group descriptors"); 2084 2085 for (i = 0; i < sbi->s_groups_count; i++) { 2086 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 2087 2088 if (i == sbi->s_groups_count - 1 || flexbg_flag) 2089 last_block = ext4_blocks_count(sbi->s_es) - 1; 2090 else 2091 last_block = first_block + 2092 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 2093 2094 if ((grp == sbi->s_groups_count) && 2095 !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) 2096 grp = i; 2097 2098 block_bitmap = ext4_block_bitmap(sb, gdp); 2099 if (block_bitmap < first_block || block_bitmap > last_block) { 2100 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2101 "Block bitmap for group %u not in group " 2102 "(block %llu)!", i, block_bitmap); 2103 return 0; 2104 } 2105 inode_bitmap = ext4_inode_bitmap(sb, gdp); 2106 if (inode_bitmap < first_block || inode_bitmap > last_block) { 2107 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2108 "Inode bitmap for group %u not in group " 2109 "(block %llu)!", i, inode_bitmap); 2110 return 0; 2111 } 2112 inode_table = ext4_inode_table(sb, gdp); 2113 if (inode_table < first_block || 2114 inode_table + sbi->s_itb_per_group - 1 > last_block) { 2115 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2116 "Inode table for group %u not in group " 2117 "(block %llu)!", i, inode_table); 2118 return 0; 2119 } 2120 ext4_lock_group(sb, i); 2121 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 2122 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2123 "Checksum for group %u failed (%u!=%u)", 2124 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 2125 gdp)), le16_to_cpu(gdp->bg_checksum)); 2126 if (!(sb->s_flags & MS_RDONLY)) { 2127 ext4_unlock_group(sb, i); 2128 return 0; 2129 } 2130 } 2131 ext4_unlock_group(sb, i); 2132 if (!flexbg_flag) 2133 first_block += EXT4_BLOCKS_PER_GROUP(sb); 2134 } 2135 if (NULL != first_not_zeroed) 2136 *first_not_zeroed = grp; 2137 2138 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 2139 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); 2140 return 1; 2141 } 2142 2143 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at 2144 * the superblock) which were deleted from all directories, but held open by 2145 * a process at the time of a crash. We walk the list and try to delete these 2146 * inodes at recovery time (only with a read-write filesystem). 2147 * 2148 * In order to keep the orphan inode chain consistent during traversal (in 2149 * case of crash during recovery), we link each inode into the superblock 2150 * orphan list_head and handle it the same way as an inode deletion during 2151 * normal operation (which journals the operations for us). 2152 * 2153 * We only do an iget() and an iput() on each inode, which is very safe if we 2154 * accidentally point at an in-use or already deleted inode. The worst that 2155 * can happen in this case is that we get a "bit already cleared" message from 2156 * ext4_free_inode(). The only reason we would point at a wrong inode is if 2157 * e2fsck was run on this filesystem, and it must have already done the orphan 2158 * inode cleanup for us, so we can safely abort without any further action. 2159 */ 2160 static void ext4_orphan_cleanup(struct super_block *sb, 2161 struct ext4_super_block *es) 2162 { 2163 unsigned int s_flags = sb->s_flags; 2164 int nr_orphans = 0, nr_truncates = 0; 2165 #ifdef CONFIG_QUOTA 2166 int i; 2167 #endif 2168 if (!es->s_last_orphan) { 2169 jbd_debug(4, "no orphan inodes to clean up\n"); 2170 return; 2171 } 2172 2173 if (bdev_read_only(sb->s_bdev)) { 2174 ext4_msg(sb, KERN_ERR, "write access " 2175 "unavailable, skipping orphan cleanup"); 2176 return; 2177 } 2178 2179 /* Check if feature set would not allow a r/w mount */ 2180 if (!ext4_feature_set_ok(sb, 0)) { 2181 ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " 2182 "unknown ROCOMPAT features"); 2183 return; 2184 } 2185 2186 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 2187 if (es->s_last_orphan) 2188 jbd_debug(1, "Errors on filesystem, " 2189 "clearing orphan list.\n"); 2190 es->s_last_orphan = 0; 2191 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 2192 return; 2193 } 2194 2195 if (s_flags & MS_RDONLY) { 2196 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); 2197 sb->s_flags &= ~MS_RDONLY; 2198 } 2199 #ifdef CONFIG_QUOTA 2200 /* Needed for iput() to work correctly and not trash data */ 2201 sb->s_flags |= MS_ACTIVE; 2202 /* Turn on quotas so that they are updated correctly */ 2203 for (i = 0; i < MAXQUOTAS; i++) { 2204 if (EXT4_SB(sb)->s_qf_names[i]) { 2205 int ret = ext4_quota_on_mount(sb, i); 2206 if (ret < 0) 2207 ext4_msg(sb, KERN_ERR, 2208 "Cannot turn on journaled " 2209 "quota: error %d", ret); 2210 } 2211 } 2212 #endif 2213 2214 while (es->s_last_orphan) { 2215 struct inode *inode; 2216 2217 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 2218 if (IS_ERR(inode)) { 2219 es->s_last_orphan = 0; 2220 break; 2221 } 2222 2223 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 2224 dquot_initialize(inode); 2225 if (inode->i_nlink) { 2226 ext4_msg(sb, KERN_DEBUG, 2227 "%s: truncating inode %lu to %lld bytes", 2228 __func__, inode->i_ino, inode->i_size); 2229 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 2230 inode->i_ino, inode->i_size); 2231 ext4_truncate(inode); 2232 nr_truncates++; 2233 } else { 2234 ext4_msg(sb, KERN_DEBUG, 2235 "%s: deleting unreferenced inode %lu", 2236 __func__, inode->i_ino); 2237 jbd_debug(2, "deleting unreferenced inode %lu\n", 2238 inode->i_ino); 2239 nr_orphans++; 2240 } 2241 iput(inode); /* The delete magic happens here! */ 2242 } 2243 2244 #define PLURAL(x) (x), ((x) == 1) ? "" : "s" 2245 2246 if (nr_orphans) 2247 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", 2248 PLURAL(nr_orphans)); 2249 if (nr_truncates) 2250 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", 2251 PLURAL(nr_truncates)); 2252 #ifdef CONFIG_QUOTA 2253 /* Turn quotas off */ 2254 for (i = 0; i < MAXQUOTAS; i++) { 2255 if (sb_dqopt(sb)->files[i]) 2256 dquot_quota_off(sb, i); 2257 } 2258 #endif 2259 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 2260 } 2261 2262 /* 2263 * Maximal extent format file size. 2264 * Resulting logical blkno at s_maxbytes must fit in our on-disk 2265 * extent format containers, within a sector_t, and within i_blocks 2266 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, 2267 * so that won't be a limiting factor. 2268 * 2269 * However there is other limiting factor. We do store extents in the form 2270 * of starting block and length, hence the resulting length of the extent 2271 * covering maximum file size must fit into on-disk format containers as 2272 * well. Given that length is always by 1 unit bigger than max unit (because 2273 * we count 0 as well) we have to lower the s_maxbytes by one fs block. 2274 * 2275 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 2276 */ 2277 static loff_t ext4_max_size(int blkbits, int has_huge_files) 2278 { 2279 loff_t res; 2280 loff_t upper_limit = MAX_LFS_FILESIZE; 2281 2282 /* small i_blocks in vfs inode? */ 2283 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2284 /* 2285 * CONFIG_LBDAF is not enabled implies the inode 2286 * i_block represent total blocks in 512 bytes 2287 * 32 == size of vfs inode i_blocks * 8 2288 */ 2289 upper_limit = (1LL << 32) - 1; 2290 2291 /* total blocks in file system block size */ 2292 upper_limit >>= (blkbits - 9); 2293 upper_limit <<= blkbits; 2294 } 2295 2296 /* 2297 * 32-bit extent-start container, ee_block. We lower the maxbytes 2298 * by one fs block, so ee_len can cover the extent of maximum file 2299 * size 2300 */ 2301 res = (1LL << 32) - 1; 2302 res <<= blkbits; 2303 2304 /* Sanity check against vm- & vfs- imposed limits */ 2305 if (res > upper_limit) 2306 res = upper_limit; 2307 2308 return res; 2309 } 2310 2311 /* 2312 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect 2313 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. 2314 * We need to be 1 filesystem block less than the 2^48 sector limit. 2315 */ 2316 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) 2317 { 2318 loff_t res = EXT4_NDIR_BLOCKS; 2319 int meta_blocks; 2320 loff_t upper_limit; 2321 /* This is calculated to be the largest file size for a dense, block 2322 * mapped file such that the file's total number of 512-byte sectors, 2323 * including data and all indirect blocks, does not exceed (2^48 - 1). 2324 * 2325 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total 2326 * number of 512-byte sectors of the file. 2327 */ 2328 2329 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2330 /* 2331 * !has_huge_files or CONFIG_LBDAF not enabled implies that 2332 * the inode i_block field represents total file blocks in 2333 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 2334 */ 2335 upper_limit = (1LL << 32) - 1; 2336 2337 /* total blocks in file system block size */ 2338 upper_limit >>= (bits - 9); 2339 2340 } else { 2341 /* 2342 * We use 48 bit ext4_inode i_blocks 2343 * With EXT4_HUGE_FILE_FL set the i_blocks 2344 * represent total number of blocks in 2345 * file system block size 2346 */ 2347 upper_limit = (1LL << 48) - 1; 2348 2349 } 2350 2351 /* indirect blocks */ 2352 meta_blocks = 1; 2353 /* double indirect blocks */ 2354 meta_blocks += 1 + (1LL << (bits-2)); 2355 /* tripple indirect blocks */ 2356 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 2357 2358 upper_limit -= meta_blocks; 2359 upper_limit <<= bits; 2360 2361 res += 1LL << (bits-2); 2362 res += 1LL << (2*(bits-2)); 2363 res += 1LL << (3*(bits-2)); 2364 res <<= bits; 2365 if (res > upper_limit) 2366 res = upper_limit; 2367 2368 if (res > MAX_LFS_FILESIZE) 2369 res = MAX_LFS_FILESIZE; 2370 2371 return res; 2372 } 2373 2374 static ext4_fsblk_t descriptor_loc(struct super_block *sb, 2375 ext4_fsblk_t logical_sb_block, int nr) 2376 { 2377 struct ext4_sb_info *sbi = EXT4_SB(sb); 2378 ext4_group_t bg, first_meta_bg; 2379 int has_super = 0; 2380 2381 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 2382 2383 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 2384 nr < first_meta_bg) 2385 return logical_sb_block + nr + 1; 2386 bg = sbi->s_desc_per_block * nr; 2387 if (ext4_bg_has_super(sb, bg)) 2388 has_super = 1; 2389 2390 return (has_super + ext4_group_first_block_no(sb, bg)); 2391 } 2392 2393 /** 2394 * ext4_get_stripe_size: Get the stripe size. 2395 * @sbi: In memory super block info 2396 * 2397 * If we have specified it via mount option, then 2398 * use the mount option value. If the value specified at mount time is 2399 * greater than the blocks per group use the super block value. 2400 * If the super block value is greater than blocks per group return 0. 2401 * Allocator needs it be less than blocks per group. 2402 * 2403 */ 2404 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) 2405 { 2406 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); 2407 unsigned long stripe_width = 2408 le32_to_cpu(sbi->s_es->s_raid_stripe_width); 2409 int ret; 2410 2411 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) 2412 ret = sbi->s_stripe; 2413 else if (stripe_width <= sbi->s_blocks_per_group) 2414 ret = stripe_width; 2415 else if (stride <= sbi->s_blocks_per_group) 2416 ret = stride; 2417 else 2418 ret = 0; 2419 2420 /* 2421 * If the stripe width is 1, this makes no sense and 2422 * we set it to 0 to turn off stripe handling code. 2423 */ 2424 if (ret <= 1) 2425 ret = 0; 2426 2427 return ret; 2428 } 2429 2430 /* sysfs supprt */ 2431 2432 struct ext4_attr { 2433 struct attribute attr; 2434 ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); 2435 ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 2436 const char *, size_t); 2437 int offset; 2438 }; 2439 2440 static int parse_strtoul(const char *buf, 2441 unsigned long max, unsigned long *value) 2442 { 2443 char *endp; 2444 2445 *value = simple_strtoul(skip_spaces(buf), &endp, 0); 2446 endp = skip_spaces(endp); 2447 if (*endp || *value > max) 2448 return -EINVAL; 2449 2450 return 0; 2451 } 2452 2453 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, 2454 struct ext4_sb_info *sbi, 2455 char *buf) 2456 { 2457 return snprintf(buf, PAGE_SIZE, "%llu\n", 2458 (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); 2459 } 2460 2461 static ssize_t session_write_kbytes_show(struct ext4_attr *a, 2462 struct ext4_sb_info *sbi, char *buf) 2463 { 2464 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2465 2466 if (!sb->s_bdev->bd_part) 2467 return snprintf(buf, PAGE_SIZE, "0\n"); 2468 return snprintf(buf, PAGE_SIZE, "%lu\n", 2469 (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2470 sbi->s_sectors_written_start) >> 1); 2471 } 2472 2473 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, 2474 struct ext4_sb_info *sbi, char *buf) 2475 { 2476 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2477 2478 if (!sb->s_bdev->bd_part) 2479 return snprintf(buf, PAGE_SIZE, "0\n"); 2480 return snprintf(buf, PAGE_SIZE, "%llu\n", 2481 (unsigned long long)(sbi->s_kbytes_written + 2482 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2483 EXT4_SB(sb)->s_sectors_written_start) >> 1))); 2484 } 2485 2486 static ssize_t extent_cache_hits_show(struct ext4_attr *a, 2487 struct ext4_sb_info *sbi, char *buf) 2488 { 2489 return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_hits); 2490 } 2491 2492 static ssize_t extent_cache_misses_show(struct ext4_attr *a, 2493 struct ext4_sb_info *sbi, char *buf) 2494 { 2495 return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_misses); 2496 } 2497 2498 static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2499 struct ext4_sb_info *sbi, 2500 const char *buf, size_t count) 2501 { 2502 unsigned long t; 2503 2504 if (parse_strtoul(buf, 0x40000000, &t)) 2505 return -EINVAL; 2506 2507 if (t && !is_power_of_2(t)) 2508 return -EINVAL; 2509 2510 sbi->s_inode_readahead_blks = t; 2511 return count; 2512 } 2513 2514 static ssize_t sbi_ui_show(struct ext4_attr *a, 2515 struct ext4_sb_info *sbi, char *buf) 2516 { 2517 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2518 2519 return snprintf(buf, PAGE_SIZE, "%u\n", *ui); 2520 } 2521 2522 static ssize_t sbi_ui_store(struct ext4_attr *a, 2523 struct ext4_sb_info *sbi, 2524 const char *buf, size_t count) 2525 { 2526 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2527 unsigned long t; 2528 2529 if (parse_strtoul(buf, 0xffffffff, &t)) 2530 return -EINVAL; 2531 *ui = t; 2532 return count; 2533 } 2534 2535 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ 2536 static struct ext4_attr ext4_attr_##_name = { \ 2537 .attr = {.name = __stringify(_name), .mode = _mode }, \ 2538 .show = _show, \ 2539 .store = _store, \ 2540 .offset = offsetof(struct ext4_sb_info, _elname), \ 2541 } 2542 #define EXT4_ATTR(name, mode, show, store) \ 2543 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) 2544 2545 #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) 2546 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) 2547 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) 2548 #define EXT4_RW_ATTR_SBI_UI(name, elname) \ 2549 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) 2550 #define ATTR_LIST(name) &ext4_attr_##name.attr 2551 2552 EXT4_RO_ATTR(delayed_allocation_blocks); 2553 EXT4_RO_ATTR(session_write_kbytes); 2554 EXT4_RO_ATTR(lifetime_write_kbytes); 2555 EXT4_RO_ATTR(extent_cache_hits); 2556 EXT4_RO_ATTR(extent_cache_misses); 2557 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, 2558 inode_readahead_blks_store, s_inode_readahead_blks); 2559 EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); 2560 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); 2561 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); 2562 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); 2563 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); 2564 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); 2565 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); 2566 EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); 2567 2568 static struct attribute *ext4_attrs[] = { 2569 ATTR_LIST(delayed_allocation_blocks), 2570 ATTR_LIST(session_write_kbytes), 2571 ATTR_LIST(lifetime_write_kbytes), 2572 ATTR_LIST(extent_cache_hits), 2573 ATTR_LIST(extent_cache_misses), 2574 ATTR_LIST(inode_readahead_blks), 2575 ATTR_LIST(inode_goal), 2576 ATTR_LIST(mb_stats), 2577 ATTR_LIST(mb_max_to_scan), 2578 ATTR_LIST(mb_min_to_scan), 2579 ATTR_LIST(mb_order2_req), 2580 ATTR_LIST(mb_stream_req), 2581 ATTR_LIST(mb_group_prealloc), 2582 ATTR_LIST(max_writeback_mb_bump), 2583 NULL, 2584 }; 2585 2586 /* Features this copy of ext4 supports */ 2587 EXT4_INFO_ATTR(lazy_itable_init); 2588 EXT4_INFO_ATTR(batched_discard); 2589 2590 static struct attribute *ext4_feat_attrs[] = { 2591 ATTR_LIST(lazy_itable_init), 2592 ATTR_LIST(batched_discard), 2593 NULL, 2594 }; 2595 2596 static ssize_t ext4_attr_show(struct kobject *kobj, 2597 struct attribute *attr, char *buf) 2598 { 2599 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2600 s_kobj); 2601 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2602 2603 return a->show ? a->show(a, sbi, buf) : 0; 2604 } 2605 2606 static ssize_t ext4_attr_store(struct kobject *kobj, 2607 struct attribute *attr, 2608 const char *buf, size_t len) 2609 { 2610 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2611 s_kobj); 2612 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2613 2614 return a->store ? a->store(a, sbi, buf, len) : 0; 2615 } 2616 2617 static void ext4_sb_release(struct kobject *kobj) 2618 { 2619 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2620 s_kobj); 2621 complete(&sbi->s_kobj_unregister); 2622 } 2623 2624 static const struct sysfs_ops ext4_attr_ops = { 2625 .show = ext4_attr_show, 2626 .store = ext4_attr_store, 2627 }; 2628 2629 static struct kobj_type ext4_ktype = { 2630 .default_attrs = ext4_attrs, 2631 .sysfs_ops = &ext4_attr_ops, 2632 .release = ext4_sb_release, 2633 }; 2634 2635 static void ext4_feat_release(struct kobject *kobj) 2636 { 2637 complete(&ext4_feat->f_kobj_unregister); 2638 } 2639 2640 static struct kobj_type ext4_feat_ktype = { 2641 .default_attrs = ext4_feat_attrs, 2642 .sysfs_ops = &ext4_attr_ops, 2643 .release = ext4_feat_release, 2644 }; 2645 2646 /* 2647 * Check whether this filesystem can be mounted based on 2648 * the features present and the RDONLY/RDWR mount requested. 2649 * Returns 1 if this filesystem can be mounted as requested, 2650 * 0 if it cannot be. 2651 */ 2652 static int ext4_feature_set_ok(struct super_block *sb, int readonly) 2653 { 2654 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { 2655 ext4_msg(sb, KERN_ERR, 2656 "Couldn't mount because of " 2657 "unsupported optional features (%x)", 2658 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & 2659 ~EXT4_FEATURE_INCOMPAT_SUPP)); 2660 return 0; 2661 } 2662 2663 if (readonly) 2664 return 1; 2665 2666 /* Check that feature set is OK for a read-write mount */ 2667 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { 2668 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " 2669 "unsupported optional features (%x)", 2670 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & 2671 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 2672 return 0; 2673 } 2674 /* 2675 * Large file size enabled file system can only be mounted 2676 * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF 2677 */ 2678 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { 2679 if (sizeof(blkcnt_t) < sizeof(u64)) { 2680 ext4_msg(sb, KERN_ERR, "Filesystem with huge files " 2681 "cannot be mounted RDWR without " 2682 "CONFIG_LBDAF"); 2683 return 0; 2684 } 2685 } 2686 return 1; 2687 } 2688 2689 /* 2690 * This function is called once a day if we have errors logged 2691 * on the file system 2692 */ 2693 static void print_daily_error_info(unsigned long arg) 2694 { 2695 struct super_block *sb = (struct super_block *) arg; 2696 struct ext4_sb_info *sbi; 2697 struct ext4_super_block *es; 2698 2699 sbi = EXT4_SB(sb); 2700 es = sbi->s_es; 2701 2702 if (es->s_error_count) 2703 ext4_msg(sb, KERN_NOTICE, "error count: %u", 2704 le32_to_cpu(es->s_error_count)); 2705 if (es->s_first_error_time) { 2706 printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d", 2707 sb->s_id, le32_to_cpu(es->s_first_error_time), 2708 (int) sizeof(es->s_first_error_func), 2709 es->s_first_error_func, 2710 le32_to_cpu(es->s_first_error_line)); 2711 if (es->s_first_error_ino) 2712 printk(": inode %u", 2713 le32_to_cpu(es->s_first_error_ino)); 2714 if (es->s_first_error_block) 2715 printk(": block %llu", (unsigned long long) 2716 le64_to_cpu(es->s_first_error_block)); 2717 printk("\n"); 2718 } 2719 if (es->s_last_error_time) { 2720 printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d", 2721 sb->s_id, le32_to_cpu(es->s_last_error_time), 2722 (int) sizeof(es->s_last_error_func), 2723 es->s_last_error_func, 2724 le32_to_cpu(es->s_last_error_line)); 2725 if (es->s_last_error_ino) 2726 printk(": inode %u", 2727 le32_to_cpu(es->s_last_error_ino)); 2728 if (es->s_last_error_block) 2729 printk(": block %llu", (unsigned long long) 2730 le64_to_cpu(es->s_last_error_block)); 2731 printk("\n"); 2732 } 2733 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ 2734 } 2735 2736 /* Find next suitable group and run ext4_init_inode_table */ 2737 static int ext4_run_li_request(struct ext4_li_request *elr) 2738 { 2739 struct ext4_group_desc *gdp = NULL; 2740 ext4_group_t group, ngroups; 2741 struct super_block *sb; 2742 unsigned long timeout = 0; 2743 int ret = 0; 2744 2745 sb = elr->lr_super; 2746 ngroups = EXT4_SB(sb)->s_groups_count; 2747 2748 for (group = elr->lr_next_group; group < ngroups; group++) { 2749 gdp = ext4_get_group_desc(sb, group, NULL); 2750 if (!gdp) { 2751 ret = 1; 2752 break; 2753 } 2754 2755 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) 2756 break; 2757 } 2758 2759 if (group == ngroups) 2760 ret = 1; 2761 2762 if (!ret) { 2763 timeout = jiffies; 2764 ret = ext4_init_inode_table(sb, group, 2765 elr->lr_timeout ? 0 : 1); 2766 if (elr->lr_timeout == 0) { 2767 timeout = (jiffies - timeout) * 2768 elr->lr_sbi->s_li_wait_mult; 2769 elr->lr_timeout = timeout; 2770 } 2771 elr->lr_next_sched = jiffies + elr->lr_timeout; 2772 elr->lr_next_group = group + 1; 2773 } 2774 2775 return ret; 2776 } 2777 2778 /* 2779 * Remove lr_request from the list_request and free the 2780 * request structure. Should be called with li_list_mtx held 2781 */ 2782 static void ext4_remove_li_request(struct ext4_li_request *elr) 2783 { 2784 struct ext4_sb_info *sbi; 2785 2786 if (!elr) 2787 return; 2788 2789 sbi = elr->lr_sbi; 2790 2791 list_del(&elr->lr_request); 2792 sbi->s_li_request = NULL; 2793 kfree(elr); 2794 } 2795 2796 static void ext4_unregister_li_request(struct super_block *sb) 2797 { 2798 mutex_lock(&ext4_li_mtx); 2799 if (!ext4_li_info) { 2800 mutex_unlock(&ext4_li_mtx); 2801 return; 2802 } 2803 2804 mutex_lock(&ext4_li_info->li_list_mtx); 2805 ext4_remove_li_request(EXT4_SB(sb)->s_li_request); 2806 mutex_unlock(&ext4_li_info->li_list_mtx); 2807 mutex_unlock(&ext4_li_mtx); 2808 } 2809 2810 static struct task_struct *ext4_lazyinit_task; 2811 2812 /* 2813 * This is the function where ext4lazyinit thread lives. It walks 2814 * through the request list searching for next scheduled filesystem. 2815 * When such a fs is found, run the lazy initialization request 2816 * (ext4_rn_li_request) and keep track of the time spend in this 2817 * function. Based on that time we compute next schedule time of 2818 * the request. When walking through the list is complete, compute 2819 * next waking time and put itself into sleep. 2820 */ 2821 static int ext4_lazyinit_thread(void *arg) 2822 { 2823 struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; 2824 struct list_head *pos, *n; 2825 struct ext4_li_request *elr; 2826 unsigned long next_wakeup, cur; 2827 2828 BUG_ON(NULL == eli); 2829 2830 cont_thread: 2831 while (true) { 2832 next_wakeup = MAX_JIFFY_OFFSET; 2833 2834 mutex_lock(&eli->li_list_mtx); 2835 if (list_empty(&eli->li_request_list)) { 2836 mutex_unlock(&eli->li_list_mtx); 2837 goto exit_thread; 2838 } 2839 2840 list_for_each_safe(pos, n, &eli->li_request_list) { 2841 elr = list_entry(pos, struct ext4_li_request, 2842 lr_request); 2843 2844 if (time_after_eq(jiffies, elr->lr_next_sched)) { 2845 if (ext4_run_li_request(elr) != 0) { 2846 /* error, remove the lazy_init job */ 2847 ext4_remove_li_request(elr); 2848 continue; 2849 } 2850 } 2851 2852 if (time_before(elr->lr_next_sched, next_wakeup)) 2853 next_wakeup = elr->lr_next_sched; 2854 } 2855 mutex_unlock(&eli->li_list_mtx); 2856 2857 if (freezing(current)) 2858 refrigerator(); 2859 2860 cur = jiffies; 2861 if ((time_after_eq(cur, next_wakeup)) || 2862 (MAX_JIFFY_OFFSET == next_wakeup)) { 2863 cond_resched(); 2864 continue; 2865 } 2866 2867 schedule_timeout_interruptible(next_wakeup - cur); 2868 2869 if (kthread_should_stop()) { 2870 ext4_clear_request_list(); 2871 goto exit_thread; 2872 } 2873 } 2874 2875 exit_thread: 2876 /* 2877 * It looks like the request list is empty, but we need 2878 * to check it under the li_list_mtx lock, to prevent any 2879 * additions into it, and of course we should lock ext4_li_mtx 2880 * to atomically free the list and ext4_li_info, because at 2881 * this point another ext4 filesystem could be registering 2882 * new one. 2883 */ 2884 mutex_lock(&ext4_li_mtx); 2885 mutex_lock(&eli->li_list_mtx); 2886 if (!list_empty(&eli->li_request_list)) { 2887 mutex_unlock(&eli->li_list_mtx); 2888 mutex_unlock(&ext4_li_mtx); 2889 goto cont_thread; 2890 } 2891 mutex_unlock(&eli->li_list_mtx); 2892 kfree(ext4_li_info); 2893 ext4_li_info = NULL; 2894 mutex_unlock(&ext4_li_mtx); 2895 2896 return 0; 2897 } 2898 2899 static void ext4_clear_request_list(void) 2900 { 2901 struct list_head *pos, *n; 2902 struct ext4_li_request *elr; 2903 2904 mutex_lock(&ext4_li_info->li_list_mtx); 2905 list_for_each_safe(pos, n, &ext4_li_info->li_request_list) { 2906 elr = list_entry(pos, struct ext4_li_request, 2907 lr_request); 2908 ext4_remove_li_request(elr); 2909 } 2910 mutex_unlock(&ext4_li_info->li_list_mtx); 2911 } 2912 2913 static int ext4_run_lazyinit_thread(void) 2914 { 2915 ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread, 2916 ext4_li_info, "ext4lazyinit"); 2917 if (IS_ERR(ext4_lazyinit_task)) { 2918 int err = PTR_ERR(ext4_lazyinit_task); 2919 ext4_clear_request_list(); 2920 kfree(ext4_li_info); 2921 ext4_li_info = NULL; 2922 printk(KERN_CRIT "EXT4: error %d creating inode table " 2923 "initialization thread\n", 2924 err); 2925 return err; 2926 } 2927 ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; 2928 return 0; 2929 } 2930 2931 /* 2932 * Check whether it make sense to run itable init. thread or not. 2933 * If there is at least one uninitialized inode table, return 2934 * corresponding group number, else the loop goes through all 2935 * groups and return total number of groups. 2936 */ 2937 static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) 2938 { 2939 ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; 2940 struct ext4_group_desc *gdp = NULL; 2941 2942 for (group = 0; group < ngroups; group++) { 2943 gdp = ext4_get_group_desc(sb, group, NULL); 2944 if (!gdp) 2945 continue; 2946 2947 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) 2948 break; 2949 } 2950 2951 return group; 2952 } 2953 2954 static int ext4_li_info_new(void) 2955 { 2956 struct ext4_lazy_init *eli = NULL; 2957 2958 eli = kzalloc(sizeof(*eli), GFP_KERNEL); 2959 if (!eli) 2960 return -ENOMEM; 2961 2962 INIT_LIST_HEAD(&eli->li_request_list); 2963 mutex_init(&eli->li_list_mtx); 2964 2965 eli->li_state |= EXT4_LAZYINIT_QUIT; 2966 2967 ext4_li_info = eli; 2968 2969 return 0; 2970 } 2971 2972 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, 2973 ext4_group_t start) 2974 { 2975 struct ext4_sb_info *sbi = EXT4_SB(sb); 2976 struct ext4_li_request *elr; 2977 unsigned long rnd; 2978 2979 elr = kzalloc(sizeof(*elr), GFP_KERNEL); 2980 if (!elr) 2981 return NULL; 2982 2983 elr->lr_super = sb; 2984 elr->lr_sbi = sbi; 2985 elr->lr_next_group = start; 2986 2987 /* 2988 * Randomize first schedule time of the request to 2989 * spread the inode table initialization requests 2990 * better. 2991 */ 2992 get_random_bytes(&rnd, sizeof(rnd)); 2993 elr->lr_next_sched = jiffies + (unsigned long)rnd % 2994 (EXT4_DEF_LI_MAX_START_DELAY * HZ); 2995 2996 return elr; 2997 } 2998 2999 static int ext4_register_li_request(struct super_block *sb, 3000 ext4_group_t first_not_zeroed) 3001 { 3002 struct ext4_sb_info *sbi = EXT4_SB(sb); 3003 struct ext4_li_request *elr; 3004 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 3005 int ret = 0; 3006 3007 if (sbi->s_li_request != NULL) { 3008 /* 3009 * Reset timeout so it can be computed again, because 3010 * s_li_wait_mult might have changed. 3011 */ 3012 sbi->s_li_request->lr_timeout = 0; 3013 return 0; 3014 } 3015 3016 if (first_not_zeroed == ngroups || 3017 (sb->s_flags & MS_RDONLY) || 3018 !test_opt(sb, INIT_INODE_TABLE)) 3019 return 0; 3020 3021 elr = ext4_li_request_new(sb, first_not_zeroed); 3022 if (!elr) 3023 return -ENOMEM; 3024 3025 mutex_lock(&ext4_li_mtx); 3026 3027 if (NULL == ext4_li_info) { 3028 ret = ext4_li_info_new(); 3029 if (ret) 3030 goto out; 3031 } 3032 3033 mutex_lock(&ext4_li_info->li_list_mtx); 3034 list_add(&elr->lr_request, &ext4_li_info->li_request_list); 3035 mutex_unlock(&ext4_li_info->li_list_mtx); 3036 3037 sbi->s_li_request = elr; 3038 /* 3039 * set elr to NULL here since it has been inserted to 3040 * the request_list and the removal and free of it is 3041 * handled by ext4_clear_request_list from now on. 3042 */ 3043 elr = NULL; 3044 3045 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { 3046 ret = ext4_run_lazyinit_thread(); 3047 if (ret) 3048 goto out; 3049 } 3050 out: 3051 mutex_unlock(&ext4_li_mtx); 3052 if (ret) 3053 kfree(elr); 3054 return ret; 3055 } 3056 3057 /* 3058 * We do not need to lock anything since this is called on 3059 * module unload. 3060 */ 3061 static void ext4_destroy_lazyinit_thread(void) 3062 { 3063 /* 3064 * If thread exited earlier 3065 * there's nothing to be done. 3066 */ 3067 if (!ext4_li_info || !ext4_lazyinit_task) 3068 return; 3069 3070 kthread_stop(ext4_lazyinit_task); 3071 } 3072 3073 static int ext4_fill_super(struct super_block *sb, void *data, int silent) 3074 __releases(kernel_lock) 3075 __acquires(kernel_lock) 3076 { 3077 char *orig_data = kstrdup(data, GFP_KERNEL); 3078 struct buffer_head *bh; 3079 struct ext4_super_block *es = NULL; 3080 struct ext4_sb_info *sbi; 3081 ext4_fsblk_t block; 3082 ext4_fsblk_t sb_block = get_sb_block(&data); 3083 ext4_fsblk_t logical_sb_block; 3084 unsigned long offset = 0; 3085 unsigned long journal_devnum = 0; 3086 unsigned long def_mount_opts; 3087 struct inode *root; 3088 char *cp; 3089 const char *descr; 3090 int ret = -ENOMEM; 3091 int blocksize; 3092 unsigned int db_count; 3093 unsigned int i; 3094 int needs_recovery, has_huge_files; 3095 __u64 blocks_count; 3096 int err; 3097 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 3098 ext4_group_t first_not_zeroed; 3099 3100 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 3101 if (!sbi) 3102 goto out_free_orig; 3103 3104 sbi->s_blockgroup_lock = 3105 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); 3106 if (!sbi->s_blockgroup_lock) { 3107 kfree(sbi); 3108 goto out_free_orig; 3109 } 3110 sb->s_fs_info = sbi; 3111 sbi->s_mount_opt = 0; 3112 sbi->s_resuid = EXT4_DEF_RESUID; 3113 sbi->s_resgid = EXT4_DEF_RESGID; 3114 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 3115 sbi->s_sb_block = sb_block; 3116 if (sb->s_bdev->bd_part) 3117 sbi->s_sectors_written_start = 3118 part_stat_read(sb->s_bdev->bd_part, sectors[1]); 3119 3120 /* Cleanup superblock name */ 3121 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 3122 *cp = '!'; 3123 3124 ret = -EINVAL; 3125 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 3126 if (!blocksize) { 3127 ext4_msg(sb, KERN_ERR, "unable to set blocksize"); 3128 goto out_fail; 3129 } 3130 3131 /* 3132 * The ext4 superblock will not be buffer aligned for other than 1kB 3133 * block sizes. We need to calculate the offset from buffer start. 3134 */ 3135 if (blocksize != EXT4_MIN_BLOCK_SIZE) { 3136 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 3137 offset = do_div(logical_sb_block, blocksize); 3138 } else { 3139 logical_sb_block = sb_block; 3140 } 3141 3142 if (!(bh = sb_bread(sb, logical_sb_block))) { 3143 ext4_msg(sb, KERN_ERR, "unable to read superblock"); 3144 goto out_fail; 3145 } 3146 /* 3147 * Note: s_es must be initialized as soon as possible because 3148 * some ext4 macro-instructions depend on its value 3149 */ 3150 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 3151 sbi->s_es = es; 3152 sb->s_magic = le16_to_cpu(es->s_magic); 3153 if (sb->s_magic != EXT4_SUPER_MAGIC) 3154 goto cantfind_ext4; 3155 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); 3156 3157 /* Set defaults before we parse the mount options */ 3158 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 3159 set_opt(sb, INIT_INODE_TABLE); 3160 if (def_mount_opts & EXT4_DEFM_DEBUG) 3161 set_opt(sb, DEBUG); 3162 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { 3163 ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", 3164 "2.6.38"); 3165 set_opt(sb, GRPID); 3166 } 3167 if (def_mount_opts & EXT4_DEFM_UID16) 3168 set_opt(sb, NO_UID32); 3169 /* xattr user namespace & acls are now defaulted on */ 3170 #ifdef CONFIG_EXT4_FS_XATTR 3171 set_opt(sb, XATTR_USER); 3172 #endif 3173 #ifdef CONFIG_EXT4_FS_POSIX_ACL 3174 set_opt(sb, POSIX_ACL); 3175 #endif 3176 set_opt(sb, MBLK_IO_SUBMIT); 3177 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 3178 set_opt(sb, JOURNAL_DATA); 3179 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 3180 set_opt(sb, ORDERED_DATA); 3181 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 3182 set_opt(sb, WRITEBACK_DATA); 3183 3184 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 3185 set_opt(sb, ERRORS_PANIC); 3186 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) 3187 set_opt(sb, ERRORS_CONT); 3188 else 3189 set_opt(sb, ERRORS_RO); 3190 if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) 3191 set_opt(sb, BLOCK_VALIDITY); 3192 if (def_mount_opts & EXT4_DEFM_DISCARD) 3193 set_opt(sb, DISCARD); 3194 3195 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 3196 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 3197 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 3198 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 3199 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 3200 3201 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) 3202 set_opt(sb, BARRIER); 3203 3204 /* 3205 * enable delayed allocation by default 3206 * Use -o nodelalloc to turn it off 3207 */ 3208 if (!IS_EXT3_SB(sb) && 3209 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) 3210 set_opt(sb, DELALLOC); 3211 3212 /* 3213 * set default s_li_wait_mult for lazyinit, for the case there is 3214 * no mount option specified. 3215 */ 3216 sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; 3217 3218 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, 3219 &journal_devnum, &journal_ioprio, NULL, 0)) { 3220 ext4_msg(sb, KERN_WARNING, 3221 "failed to parse options in superblock: %s", 3222 sbi->s_es->s_mount_opts); 3223 } 3224 if (!parse_options((char *) data, sb, &journal_devnum, 3225 &journal_ioprio, NULL, 0)) 3226 goto failed_mount; 3227 3228 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3229 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 3230 3231 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 3232 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 3233 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 3234 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 3235 ext4_msg(sb, KERN_WARNING, 3236 "feature flags set on rev 0 fs, " 3237 "running e2fsck is recommended"); 3238 3239 if (IS_EXT2_SB(sb)) { 3240 if (ext2_feature_set_ok(sb)) 3241 ext4_msg(sb, KERN_INFO, "mounting ext2 file system " 3242 "using the ext4 subsystem"); 3243 else { 3244 ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " 3245 "to feature incompatibilities"); 3246 goto failed_mount; 3247 } 3248 } 3249 3250 if (IS_EXT3_SB(sb)) { 3251 if (ext3_feature_set_ok(sb)) 3252 ext4_msg(sb, KERN_INFO, "mounting ext3 file system " 3253 "using the ext4 subsystem"); 3254 else { 3255 ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " 3256 "to feature incompatibilities"); 3257 goto failed_mount; 3258 } 3259 } 3260 3261 /* 3262 * Check feature flags regardless of the revision level, since we 3263 * previously didn't change the revision level when setting the flags, 3264 * so there is a chance incompat flags are set on a rev 0 filesystem. 3265 */ 3266 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) 3267 goto failed_mount; 3268 3269 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 3270 3271 if (blocksize < EXT4_MIN_BLOCK_SIZE || 3272 blocksize > EXT4_MAX_BLOCK_SIZE) { 3273 ext4_msg(sb, KERN_ERR, 3274 "Unsupported filesystem blocksize %d", blocksize); 3275 goto failed_mount; 3276 } 3277 3278 if (sb->s_blocksize != blocksize) { 3279 /* Validate the filesystem blocksize */ 3280 if (!sb_set_blocksize(sb, blocksize)) { 3281 ext4_msg(sb, KERN_ERR, "bad block size %d", 3282 blocksize); 3283 goto failed_mount; 3284 } 3285 3286 brelse(bh); 3287 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 3288 offset = do_div(logical_sb_block, blocksize); 3289 bh = sb_bread(sb, logical_sb_block); 3290 if (!bh) { 3291 ext4_msg(sb, KERN_ERR, 3292 "Can't read superblock on 2nd try"); 3293 goto failed_mount; 3294 } 3295 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 3296 sbi->s_es = es; 3297 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 3298 ext4_msg(sb, KERN_ERR, 3299 "Magic mismatch, very weird!"); 3300 goto failed_mount; 3301 } 3302 } 3303 3304 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, 3305 EXT4_FEATURE_RO_COMPAT_HUGE_FILE); 3306 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, 3307 has_huge_files); 3308 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); 3309 3310 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 3311 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 3312 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; 3313 } else { 3314 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 3315 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 3316 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 3317 (!is_power_of_2(sbi->s_inode_size)) || 3318 (sbi->s_inode_size > blocksize)) { 3319 ext4_msg(sb, KERN_ERR, 3320 "unsupported inode size: %d", 3321 sbi->s_inode_size); 3322 goto failed_mount; 3323 } 3324 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 3325 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 3326 } 3327 3328 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 3329 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 3330 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 3331 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 3332 !is_power_of_2(sbi->s_desc_size)) { 3333 ext4_msg(sb, KERN_ERR, 3334 "unsupported descriptor size %lu", 3335 sbi->s_desc_size); 3336 goto failed_mount; 3337 } 3338 } else 3339 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 3340 3341 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 3342 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 3343 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) 3344 goto cantfind_ext4; 3345 3346 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 3347 if (sbi->s_inodes_per_block == 0) 3348 goto cantfind_ext4; 3349 sbi->s_itb_per_group = sbi->s_inodes_per_group / 3350 sbi->s_inodes_per_block; 3351 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 3352 sbi->s_sbh = bh; 3353 sbi->s_mount_state = le16_to_cpu(es->s_state); 3354 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 3355 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 3356 3357 for (i = 0; i < 4; i++) 3358 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 3359 sbi->s_def_hash_version = es->s_def_hash_version; 3360 i = le32_to_cpu(es->s_flags); 3361 if (i & EXT2_FLAGS_UNSIGNED_HASH) 3362 sbi->s_hash_unsigned = 3; 3363 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { 3364 #ifdef __CHAR_UNSIGNED__ 3365 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); 3366 sbi->s_hash_unsigned = 3; 3367 #else 3368 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 3369 #endif 3370 sb->s_dirt = 1; 3371 } 3372 3373 if (sbi->s_blocks_per_group > blocksize * 8) { 3374 ext4_msg(sb, KERN_ERR, 3375 "#blocks per group too big: %lu", 3376 sbi->s_blocks_per_group); 3377 goto failed_mount; 3378 } 3379 if (sbi->s_inodes_per_group > blocksize * 8) { 3380 ext4_msg(sb, KERN_ERR, 3381 "#inodes per group too big: %lu", 3382 sbi->s_inodes_per_group); 3383 goto failed_mount; 3384 } 3385 3386 /* 3387 * Test whether we have more sectors than will fit in sector_t, 3388 * and whether the max offset is addressable by the page cache. 3389 */ 3390 err = generic_check_addressable(sb->s_blocksize_bits, 3391 ext4_blocks_count(es)); 3392 if (err) { 3393 ext4_msg(sb, KERN_ERR, "filesystem" 3394 " too large to mount safely on this system"); 3395 if (sizeof(sector_t) < 8) 3396 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); 3397 ret = err; 3398 goto failed_mount; 3399 } 3400 3401 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 3402 goto cantfind_ext4; 3403 3404 /* check blocks count against device size */ 3405 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; 3406 if (blocks_count && ext4_blocks_count(es) > blocks_count) { 3407 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " 3408 "exceeds size of device (%llu blocks)", 3409 ext4_blocks_count(es), blocks_count); 3410 goto failed_mount; 3411 } 3412 3413 /* 3414 * It makes no sense for the first data block to be beyond the end 3415 * of the filesystem. 3416 */ 3417 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 3418 ext4_msg(sb, KERN_WARNING, "bad geometry: first data" 3419 "block %u is beyond end of filesystem (%llu)", 3420 le32_to_cpu(es->s_first_data_block), 3421 ext4_blocks_count(es)); 3422 goto failed_mount; 3423 } 3424 blocks_count = (ext4_blocks_count(es) - 3425 le32_to_cpu(es->s_first_data_block) + 3426 EXT4_BLOCKS_PER_GROUP(sb) - 1); 3427 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 3428 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { 3429 ext4_msg(sb, KERN_WARNING, "groups count too large: %u " 3430 "(block count %llu, first data block %u, " 3431 "blocks per group %lu)", sbi->s_groups_count, 3432 ext4_blocks_count(es), 3433 le32_to_cpu(es->s_first_data_block), 3434 EXT4_BLOCKS_PER_GROUP(sb)); 3435 goto failed_mount; 3436 } 3437 sbi->s_groups_count = blocks_count; 3438 sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, 3439 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); 3440 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 3441 EXT4_DESC_PER_BLOCK(sb); 3442 sbi->s_group_desc = ext4_kvmalloc(db_count * 3443 sizeof(struct buffer_head *), 3444 GFP_KERNEL); 3445 if (sbi->s_group_desc == NULL) { 3446 ext4_msg(sb, KERN_ERR, "not enough memory"); 3447 goto failed_mount; 3448 } 3449 3450 #ifdef CONFIG_PROC_FS 3451 if (ext4_proc_root) 3452 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 3453 #endif 3454 3455 bgl_lock_init(sbi->s_blockgroup_lock); 3456 3457 for (i = 0; i < db_count; i++) { 3458 block = descriptor_loc(sb, logical_sb_block, i); 3459 sbi->s_group_desc[i] = sb_bread(sb, block); 3460 if (!sbi->s_group_desc[i]) { 3461 ext4_msg(sb, KERN_ERR, 3462 "can't read group descriptor %d", i); 3463 db_count = i; 3464 goto failed_mount2; 3465 } 3466 } 3467 if (!ext4_check_descriptors(sb, &first_not_zeroed)) { 3468 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 3469 goto failed_mount2; 3470 } 3471 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 3472 if (!ext4_fill_flex_info(sb)) { 3473 ext4_msg(sb, KERN_ERR, 3474 "unable to initialize " 3475 "flex_bg meta info!"); 3476 goto failed_mount2; 3477 } 3478 3479 sbi->s_gdb_count = db_count; 3480 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3481 spin_lock_init(&sbi->s_next_gen_lock); 3482 3483 init_timer(&sbi->s_err_report); 3484 sbi->s_err_report.function = print_daily_error_info; 3485 sbi->s_err_report.data = (unsigned long) sb; 3486 3487 err = percpu_counter_init(&sbi->s_freeblocks_counter, 3488 ext4_count_free_blocks(sb)); 3489 if (!err) { 3490 err = percpu_counter_init(&sbi->s_freeinodes_counter, 3491 ext4_count_free_inodes(sb)); 3492 } 3493 if (!err) { 3494 err = percpu_counter_init(&sbi->s_dirs_counter, 3495 ext4_count_dirs(sb)); 3496 } 3497 if (!err) { 3498 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 3499 } 3500 if (err) { 3501 ext4_msg(sb, KERN_ERR, "insufficient memory"); 3502 goto failed_mount3; 3503 } 3504 3505 sbi->s_stripe = ext4_get_stripe_size(sbi); 3506 sbi->s_max_writeback_mb_bump = 128; 3507 3508 /* 3509 * set up enough so that it can read an inode 3510 */ 3511 if (!test_opt(sb, NOLOAD) && 3512 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) 3513 sb->s_op = &ext4_sops; 3514 else 3515 sb->s_op = &ext4_nojournal_sops; 3516 sb->s_export_op = &ext4_export_ops; 3517 sb->s_xattr = ext4_xattr_handlers; 3518 #ifdef CONFIG_QUOTA 3519 sb->s_qcop = &ext4_qctl_operations; 3520 sb->dq_op = &ext4_quota_operations; 3521 #endif 3522 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); 3523 3524 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3525 mutex_init(&sbi->s_orphan_lock); 3526 sbi->s_resize_flags = 0; 3527 3528 sb->s_root = NULL; 3529 3530 needs_recovery = (es->s_last_orphan != 0 || 3531 EXT4_HAS_INCOMPAT_FEATURE(sb, 3532 EXT4_FEATURE_INCOMPAT_RECOVER)); 3533 3534 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && 3535 !(sb->s_flags & MS_RDONLY)) 3536 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) 3537 goto failed_mount3; 3538 3539 /* 3540 * The first inode we look at is the journal inode. Don't try 3541 * root first: it may be modified in the journal! 3542 */ 3543 if (!test_opt(sb, NOLOAD) && 3544 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 3545 if (ext4_load_journal(sb, es, journal_devnum)) 3546 goto failed_mount3; 3547 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 3548 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 3549 ext4_msg(sb, KERN_ERR, "required journal recovery " 3550 "suppressed and not mounted read-only"); 3551 goto failed_mount_wq; 3552 } else { 3553 clear_opt(sb, DATA_FLAGS); 3554 sbi->s_journal = NULL; 3555 needs_recovery = 0; 3556 goto no_journal; 3557 } 3558 3559 if (ext4_blocks_count(es) > 0xffffffffULL && 3560 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 3561 JBD2_FEATURE_INCOMPAT_64BIT)) { 3562 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); 3563 goto failed_mount_wq; 3564 } 3565 3566 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 3567 jbd2_journal_set_features(sbi->s_journal, 3568 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 3569 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 3570 } else if (test_opt(sb, JOURNAL_CHECKSUM)) { 3571 jbd2_journal_set_features(sbi->s_journal, 3572 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); 3573 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 3574 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 3575 } else { 3576 jbd2_journal_clear_features(sbi->s_journal, 3577 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 3578 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 3579 } 3580 3581 /* We have now updated the journal if required, so we can 3582 * validate the data journaling mode. */ 3583 switch (test_opt(sb, DATA_FLAGS)) { 3584 case 0: 3585 /* No mode set, assume a default based on the journal 3586 * capabilities: ORDERED_DATA if the journal can 3587 * cope, else JOURNAL_DATA 3588 */ 3589 if (jbd2_journal_check_available_features 3590 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 3591 set_opt(sb, ORDERED_DATA); 3592 else 3593 set_opt(sb, JOURNAL_DATA); 3594 break; 3595 3596 case EXT4_MOUNT_ORDERED_DATA: 3597 case EXT4_MOUNT_WRITEBACK_DATA: 3598 if (!jbd2_journal_check_available_features 3599 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 3600 ext4_msg(sb, KERN_ERR, "Journal does not support " 3601 "requested data journaling mode"); 3602 goto failed_mount_wq; 3603 } 3604 default: 3605 break; 3606 } 3607 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 3608 3609 /* 3610 * The journal may have updated the bg summary counts, so we 3611 * need to update the global counters. 3612 */ 3613 percpu_counter_set(&sbi->s_freeblocks_counter, 3614 ext4_count_free_blocks(sb)); 3615 percpu_counter_set(&sbi->s_freeinodes_counter, 3616 ext4_count_free_inodes(sb)); 3617 percpu_counter_set(&sbi->s_dirs_counter, 3618 ext4_count_dirs(sb)); 3619 percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); 3620 3621 no_journal: 3622 /* 3623 * The maximum number of concurrent works can be high and 3624 * concurrency isn't really necessary. Limit it to 1. 3625 */ 3626 EXT4_SB(sb)->dio_unwritten_wq = 3627 alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); 3628 if (!EXT4_SB(sb)->dio_unwritten_wq) { 3629 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 3630 goto failed_mount_wq; 3631 } 3632 3633 /* 3634 * The jbd2_journal_load will have done any necessary log recovery, 3635 * so we can safely mount the rest of the filesystem now. 3636 */ 3637 3638 root = ext4_iget(sb, EXT4_ROOT_INO); 3639 if (IS_ERR(root)) { 3640 ext4_msg(sb, KERN_ERR, "get root inode failed"); 3641 ret = PTR_ERR(root); 3642 root = NULL; 3643 goto failed_mount4; 3644 } 3645 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 3646 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); 3647 goto failed_mount4; 3648 } 3649 sb->s_root = d_alloc_root(root); 3650 if (!sb->s_root) { 3651 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 3652 ret = -ENOMEM; 3653 goto failed_mount4; 3654 } 3655 3656 ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); 3657 3658 /* determine the minimum size of new large inodes, if present */ 3659 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 3660 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 3661 EXT4_GOOD_OLD_INODE_SIZE; 3662 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3663 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { 3664 if (sbi->s_want_extra_isize < 3665 le16_to_cpu(es->s_want_extra_isize)) 3666 sbi->s_want_extra_isize = 3667 le16_to_cpu(es->s_want_extra_isize); 3668 if (sbi->s_want_extra_isize < 3669 le16_to_cpu(es->s_min_extra_isize)) 3670 sbi->s_want_extra_isize = 3671 le16_to_cpu(es->s_min_extra_isize); 3672 } 3673 } 3674 /* Check if enough inode space is available */ 3675 if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > 3676 sbi->s_inode_size) { 3677 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 3678 EXT4_GOOD_OLD_INODE_SIZE; 3679 ext4_msg(sb, KERN_INFO, "required extra inode space not" 3680 "available"); 3681 } 3682 3683 if (test_opt(sb, DELALLOC) && 3684 (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { 3685 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " 3686 "requested data journaling mode"); 3687 clear_opt(sb, DELALLOC); 3688 } 3689 if (test_opt(sb, DIOREAD_NOLOCK)) { 3690 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 3691 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " 3692 "option - requested data journaling mode"); 3693 clear_opt(sb, DIOREAD_NOLOCK); 3694 } 3695 if (sb->s_blocksize < PAGE_SIZE) { 3696 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " 3697 "option - block size is too small"); 3698 clear_opt(sb, DIOREAD_NOLOCK); 3699 } 3700 } 3701 3702 err = ext4_setup_system_zone(sb); 3703 if (err) { 3704 ext4_msg(sb, KERN_ERR, "failed to initialize system " 3705 "zone (%d)", err); 3706 goto failed_mount4; 3707 } 3708 3709 ext4_ext_init(sb); 3710 err = ext4_mb_init(sb, needs_recovery); 3711 if (err) { 3712 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", 3713 err); 3714 goto failed_mount4; 3715 } 3716 3717 err = ext4_register_li_request(sb, first_not_zeroed); 3718 if (err) 3719 goto failed_mount4; 3720 3721 sbi->s_kobj.kset = ext4_kset; 3722 init_completion(&sbi->s_kobj_unregister); 3723 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, 3724 "%s", sb->s_id); 3725 if (err) { 3726 ext4_mb_release(sb); 3727 ext4_ext_release(sb); 3728 goto failed_mount4; 3729 }; 3730 3731 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 3732 ext4_orphan_cleanup(sb, es); 3733 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 3734 if (needs_recovery) { 3735 ext4_msg(sb, KERN_INFO, "recovery complete"); 3736 ext4_mark_recovery_complete(sb, es); 3737 } 3738 if (EXT4_SB(sb)->s_journal) { 3739 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 3740 descr = " journalled data mode"; 3741 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 3742 descr = " ordered data mode"; 3743 else 3744 descr = " writeback data mode"; 3745 } else 3746 descr = "out journal"; 3747 3748 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " 3749 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, 3750 *sbi->s_es->s_mount_opts ? "; " : "", orig_data); 3751 3752 if (es->s_error_count) 3753 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ 3754 3755 kfree(orig_data); 3756 return 0; 3757 3758 cantfind_ext4: 3759 if (!silent) 3760 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 3761 goto failed_mount; 3762 3763 failed_mount4: 3764 iput(root); 3765 sb->s_root = NULL; 3766 ext4_msg(sb, KERN_ERR, "mount failed"); 3767 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); 3768 failed_mount_wq: 3769 ext4_release_system_zone(sb); 3770 if (sbi->s_journal) { 3771 jbd2_journal_destroy(sbi->s_journal); 3772 sbi->s_journal = NULL; 3773 } 3774 failed_mount3: 3775 del_timer(&sbi->s_err_report); 3776 if (sbi->s_flex_groups) 3777 ext4_kvfree(sbi->s_flex_groups); 3778 percpu_counter_destroy(&sbi->s_freeblocks_counter); 3779 percpu_counter_destroy(&sbi->s_freeinodes_counter); 3780 percpu_counter_destroy(&sbi->s_dirs_counter); 3781 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 3782 if (sbi->s_mmp_tsk) 3783 kthread_stop(sbi->s_mmp_tsk); 3784 failed_mount2: 3785 for (i = 0; i < db_count; i++) 3786 brelse(sbi->s_group_desc[i]); 3787 ext4_kvfree(sbi->s_group_desc); 3788 failed_mount: 3789 if (sbi->s_proc) { 3790 remove_proc_entry(sb->s_id, ext4_proc_root); 3791 } 3792 #ifdef CONFIG_QUOTA 3793 for (i = 0; i < MAXQUOTAS; i++) 3794 kfree(sbi->s_qf_names[i]); 3795 #endif 3796 ext4_blkdev_remove(sbi); 3797 brelse(bh); 3798 out_fail: 3799 sb->s_fs_info = NULL; 3800 kfree(sbi->s_blockgroup_lock); 3801 kfree(sbi); 3802 out_free_orig: 3803 kfree(orig_data); 3804 return ret; 3805 } 3806 3807 /* 3808 * Setup any per-fs journal parameters now. We'll do this both on 3809 * initial mount, once the journal has been initialised but before we've 3810 * done any recovery; and again on any subsequent remount. 3811 */ 3812 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) 3813 { 3814 struct ext4_sb_info *sbi = EXT4_SB(sb); 3815 3816 journal->j_commit_interval = sbi->s_commit_interval; 3817 journal->j_min_batch_time = sbi->s_min_batch_time; 3818 journal->j_max_batch_time = sbi->s_max_batch_time; 3819 3820 write_lock(&journal->j_state_lock); 3821 if (test_opt(sb, BARRIER)) 3822 journal->j_flags |= JBD2_BARRIER; 3823 else 3824 journal->j_flags &= ~JBD2_BARRIER; 3825 if (test_opt(sb, DATA_ERR_ABORT)) 3826 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; 3827 else 3828 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; 3829 write_unlock(&journal->j_state_lock); 3830 } 3831 3832 static journal_t *ext4_get_journal(struct super_block *sb, 3833 unsigned int journal_inum) 3834 { 3835 struct inode *journal_inode; 3836 journal_t *journal; 3837 3838 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3839 3840 /* First, test for the existence of a valid inode on disk. Bad 3841 * things happen if we iget() an unused inode, as the subsequent 3842 * iput() will try to delete it. */ 3843 3844 journal_inode = ext4_iget(sb, journal_inum); 3845 if (IS_ERR(journal_inode)) { 3846 ext4_msg(sb, KERN_ERR, "no journal found"); 3847 return NULL; 3848 } 3849 if (!journal_inode->i_nlink) { 3850 make_bad_inode(journal_inode); 3851 iput(journal_inode); 3852 ext4_msg(sb, KERN_ERR, "journal inode is deleted"); 3853 return NULL; 3854 } 3855 3856 jbd_debug(2, "Journal inode found at %p: %lld bytes\n", 3857 journal_inode, journal_inode->i_size); 3858 if (!S_ISREG(journal_inode->i_mode)) { 3859 ext4_msg(sb, KERN_ERR, "invalid journal inode"); 3860 iput(journal_inode); 3861 return NULL; 3862 } 3863 3864 journal = jbd2_journal_init_inode(journal_inode); 3865 if (!journal) { 3866 ext4_msg(sb, KERN_ERR, "Could not load journal inode"); 3867 iput(journal_inode); 3868 return NULL; 3869 } 3870 journal->j_private = sb; 3871 ext4_init_journal_params(sb, journal); 3872 return journal; 3873 } 3874 3875 static journal_t *ext4_get_dev_journal(struct super_block *sb, 3876 dev_t j_dev) 3877 { 3878 struct buffer_head *bh; 3879 journal_t *journal; 3880 ext4_fsblk_t start; 3881 ext4_fsblk_t len; 3882 int hblock, blocksize; 3883 ext4_fsblk_t sb_block; 3884 unsigned long offset; 3885 struct ext4_super_block *es; 3886 struct block_device *bdev; 3887 3888 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3889 3890 bdev = ext4_blkdev_get(j_dev, sb); 3891 if (bdev == NULL) 3892 return NULL; 3893 3894 blocksize = sb->s_blocksize; 3895 hblock = bdev_logical_block_size(bdev); 3896 if (blocksize < hblock) { 3897 ext4_msg(sb, KERN_ERR, 3898 "blocksize too small for journal device"); 3899 goto out_bdev; 3900 } 3901 3902 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 3903 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 3904 set_blocksize(bdev, blocksize); 3905 if (!(bh = __bread(bdev, sb_block, blocksize))) { 3906 ext4_msg(sb, KERN_ERR, "couldn't read superblock of " 3907 "external journal"); 3908 goto out_bdev; 3909 } 3910 3911 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 3912 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 3913 !(le32_to_cpu(es->s_feature_incompat) & 3914 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 3915 ext4_msg(sb, KERN_ERR, "external journal has " 3916 "bad superblock"); 3917 brelse(bh); 3918 goto out_bdev; 3919 } 3920 3921 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 3922 ext4_msg(sb, KERN_ERR, "journal UUID does not match"); 3923 brelse(bh); 3924 goto out_bdev; 3925 } 3926 3927 len = ext4_blocks_count(es); 3928 start = sb_block + 1; 3929 brelse(bh); /* we're done with the superblock */ 3930 3931 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 3932 start, len, blocksize); 3933 if (!journal) { 3934 ext4_msg(sb, KERN_ERR, "failed to create device journal"); 3935 goto out_bdev; 3936 } 3937 journal->j_private = sb; 3938 ll_rw_block(READ, 1, &journal->j_sb_buffer); 3939 wait_on_buffer(journal->j_sb_buffer); 3940 if (!buffer_uptodate(journal->j_sb_buffer)) { 3941 ext4_msg(sb, KERN_ERR, "I/O error on journal device"); 3942 goto out_journal; 3943 } 3944 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 3945 ext4_msg(sb, KERN_ERR, "External journal has more than one " 3946 "user (unsupported) - %d", 3947 be32_to_cpu(journal->j_superblock->s_nr_users)); 3948 goto out_journal; 3949 } 3950 EXT4_SB(sb)->journal_bdev = bdev; 3951 ext4_init_journal_params(sb, journal); 3952 return journal; 3953 3954 out_journal: 3955 jbd2_journal_destroy(journal); 3956 out_bdev: 3957 ext4_blkdev_put(bdev); 3958 return NULL; 3959 } 3960 3961 static int ext4_load_journal(struct super_block *sb, 3962 struct ext4_super_block *es, 3963 unsigned long journal_devnum) 3964 { 3965 journal_t *journal; 3966 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 3967 dev_t journal_dev; 3968 int err = 0; 3969 int really_read_only; 3970 3971 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3972 3973 if (journal_devnum && 3974 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3975 ext4_msg(sb, KERN_INFO, "external journal device major/minor " 3976 "numbers have changed"); 3977 journal_dev = new_decode_dev(journal_devnum); 3978 } else 3979 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 3980 3981 really_read_only = bdev_read_only(sb->s_bdev); 3982 3983 /* 3984 * Are we loading a blank journal or performing recovery after a 3985 * crash? For recovery, we need to check in advance whether we 3986 * can get read-write access to the device. 3987 */ 3988 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 3989 if (sb->s_flags & MS_RDONLY) { 3990 ext4_msg(sb, KERN_INFO, "INFO: recovery " 3991 "required on readonly filesystem"); 3992 if (really_read_only) { 3993 ext4_msg(sb, KERN_ERR, "write access " 3994 "unavailable, cannot proceed"); 3995 return -EROFS; 3996 } 3997 ext4_msg(sb, KERN_INFO, "write access will " 3998 "be enabled during recovery"); 3999 } 4000 } 4001 4002 if (journal_inum && journal_dev) { 4003 ext4_msg(sb, KERN_ERR, "filesystem has both journal " 4004 "and inode journals!"); 4005 return -EINVAL; 4006 } 4007 4008 if (journal_inum) { 4009 if (!(journal = ext4_get_journal(sb, journal_inum))) 4010 return -EINVAL; 4011 } else { 4012 if (!(journal = ext4_get_dev_journal(sb, journal_dev))) 4013 return -EINVAL; 4014 } 4015 4016 if (!(journal->j_flags & JBD2_BARRIER)) 4017 ext4_msg(sb, KERN_INFO, "barriers disabled"); 4018 4019 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 4020 err = jbd2_journal_update_format(journal); 4021 if (err) { 4022 ext4_msg(sb, KERN_ERR, "error updating journal"); 4023 jbd2_journal_destroy(journal); 4024 return err; 4025 } 4026 } 4027 4028 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 4029 err = jbd2_journal_wipe(journal, !really_read_only); 4030 if (!err) { 4031 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL); 4032 if (save) 4033 memcpy(save, ((char *) es) + 4034 EXT4_S_ERR_START, EXT4_S_ERR_LEN); 4035 err = jbd2_journal_load(journal); 4036 if (save) 4037 memcpy(((char *) es) + EXT4_S_ERR_START, 4038 save, EXT4_S_ERR_LEN); 4039 kfree(save); 4040 } 4041 4042 if (err) { 4043 ext4_msg(sb, KERN_ERR, "error loading journal"); 4044 jbd2_journal_destroy(journal); 4045 return err; 4046 } 4047 4048 EXT4_SB(sb)->s_journal = journal; 4049 ext4_clear_journal_err(sb, es); 4050 4051 if (!really_read_only && journal_devnum && 4052 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 4053 es->s_journal_dev = cpu_to_le32(journal_devnum); 4054 4055 /* Make sure we flush the recovery flag to disk. */ 4056 ext4_commit_super(sb, 1); 4057 } 4058 4059 return 0; 4060 } 4061 4062 static int ext4_commit_super(struct super_block *sb, int sync) 4063 { 4064 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 4065 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 4066 int error = 0; 4067 4068 if (!sbh) 4069 return error; 4070 if (buffer_write_io_error(sbh)) { 4071 /* 4072 * Oh, dear. A previous attempt to write the 4073 * superblock failed. This could happen because the 4074 * USB device was yanked out. Or it could happen to 4075 * be a transient write error and maybe the block will 4076 * be remapped. Nothing we can do but to retry the 4077 * write and hope for the best. 4078 */ 4079 ext4_msg(sb, KERN_ERR, "previous I/O error to " 4080 "superblock detected"); 4081 clear_buffer_write_io_error(sbh); 4082 set_buffer_uptodate(sbh); 4083 } 4084 /* 4085 * If the file system is mounted read-only, don't update the 4086 * superblock write time. This avoids updating the superblock 4087 * write time when we are mounting the root file system 4088 * read/only but we need to replay the journal; at that point, 4089 * for people who are east of GMT and who make their clock 4090 * tick in localtime for Windows bug-for-bug compatibility, 4091 * the clock is set in the future, and this will cause e2fsck 4092 * to complain and force a full file system check. 4093 */ 4094 if (!(sb->s_flags & MS_RDONLY)) 4095 es->s_wtime = cpu_to_le32(get_seconds()); 4096 if (sb->s_bdev->bd_part) 4097 es->s_kbytes_written = 4098 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 4099 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 4100 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 4101 else 4102 es->s_kbytes_written = 4103 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); 4104 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 4105 &EXT4_SB(sb)->s_freeblocks_counter)); 4106 es->s_free_inodes_count = 4107 cpu_to_le32(percpu_counter_sum_positive( 4108 &EXT4_SB(sb)->s_freeinodes_counter)); 4109 sb->s_dirt = 0; 4110 BUFFER_TRACE(sbh, "marking dirty"); 4111 mark_buffer_dirty(sbh); 4112 if (sync) { 4113 error = sync_dirty_buffer(sbh); 4114 if (error) 4115 return error; 4116 4117 error = buffer_write_io_error(sbh); 4118 if (error) { 4119 ext4_msg(sb, KERN_ERR, "I/O error while writing " 4120 "superblock"); 4121 clear_buffer_write_io_error(sbh); 4122 set_buffer_uptodate(sbh); 4123 } 4124 } 4125 return error; 4126 } 4127 4128 /* 4129 * Have we just finished recovery? If so, and if we are mounting (or 4130 * remounting) the filesystem readonly, then we will end up with a 4131 * consistent fs on disk. Record that fact. 4132 */ 4133 static void ext4_mark_recovery_complete(struct super_block *sb, 4134 struct ext4_super_block *es) 4135 { 4136 journal_t *journal = EXT4_SB(sb)->s_journal; 4137 4138 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 4139 BUG_ON(journal != NULL); 4140 return; 4141 } 4142 jbd2_journal_lock_updates(journal); 4143 if (jbd2_journal_flush(journal) < 0) 4144 goto out; 4145 4146 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 4147 sb->s_flags & MS_RDONLY) { 4148 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4149 ext4_commit_super(sb, 1); 4150 } 4151 4152 out: 4153 jbd2_journal_unlock_updates(journal); 4154 } 4155 4156 /* 4157 * If we are mounting (or read-write remounting) a filesystem whose journal 4158 * has recorded an error from a previous lifetime, move that error to the 4159 * main filesystem now. 4160 */ 4161 static void ext4_clear_journal_err(struct super_block *sb, 4162 struct ext4_super_block *es) 4163 { 4164 journal_t *journal; 4165 int j_errno; 4166 const char *errstr; 4167 4168 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 4169 4170 journal = EXT4_SB(sb)->s_journal; 4171 4172 /* 4173 * Now check for any error status which may have been recorded in the 4174 * journal by a prior ext4_error() or ext4_abort() 4175 */ 4176 4177 j_errno = jbd2_journal_errno(journal); 4178 if (j_errno) { 4179 char nbuf[16]; 4180 4181 errstr = ext4_decode_error(sb, j_errno, nbuf); 4182 ext4_warning(sb, "Filesystem error recorded " 4183 "from previous mount: %s", errstr); 4184 ext4_warning(sb, "Marking fs in need of filesystem check."); 4185 4186 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 4187 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 4188 ext4_commit_super(sb, 1); 4189 4190 jbd2_journal_clear_err(journal); 4191 } 4192 } 4193 4194 /* 4195 * Force the running and committing transactions to commit, 4196 * and wait on the commit. 4197 */ 4198 int ext4_force_commit(struct super_block *sb) 4199 { 4200 journal_t *journal; 4201 int ret = 0; 4202 4203 if (sb->s_flags & MS_RDONLY) 4204 return 0; 4205 4206 journal = EXT4_SB(sb)->s_journal; 4207 if (journal) { 4208 vfs_check_frozen(sb, SB_FREEZE_TRANS); 4209 ret = ext4_journal_force_commit(journal); 4210 } 4211 4212 return ret; 4213 } 4214 4215 static void ext4_write_super(struct super_block *sb) 4216 { 4217 lock_super(sb); 4218 ext4_commit_super(sb, 1); 4219 unlock_super(sb); 4220 } 4221 4222 static int ext4_sync_fs(struct super_block *sb, int wait) 4223 { 4224 int ret = 0; 4225 tid_t target; 4226 struct ext4_sb_info *sbi = EXT4_SB(sb); 4227 4228 trace_ext4_sync_fs(sb, wait); 4229 flush_workqueue(sbi->dio_unwritten_wq); 4230 if (jbd2_journal_start_commit(sbi->s_journal, &target)) { 4231 if (wait) 4232 jbd2_log_wait_commit(sbi->s_journal, target); 4233 } 4234 return ret; 4235 } 4236 4237 /* 4238 * LVM calls this function before a (read-only) snapshot is created. This 4239 * gives us a chance to flush the journal completely and mark the fs clean. 4240 * 4241 * Note that only this function cannot bring a filesystem to be in a clean 4242 * state independently, because ext4 prevents a new handle from being started 4243 * by @sb->s_frozen, which stays in an upper layer. It thus needs help from 4244 * the upper layer. 4245 */ 4246 static int ext4_freeze(struct super_block *sb) 4247 { 4248 int error = 0; 4249 journal_t *journal; 4250 4251 if (sb->s_flags & MS_RDONLY) 4252 return 0; 4253 4254 journal = EXT4_SB(sb)->s_journal; 4255 4256 /* Now we set up the journal barrier. */ 4257 jbd2_journal_lock_updates(journal); 4258 4259 /* 4260 * Don't clear the needs_recovery flag if we failed to flush 4261 * the journal. 4262 */ 4263 error = jbd2_journal_flush(journal); 4264 if (error < 0) 4265 goto out; 4266 4267 /* Journal blocked and flushed, clear needs_recovery flag. */ 4268 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4269 error = ext4_commit_super(sb, 1); 4270 out: 4271 /* we rely on s_frozen to stop further updates */ 4272 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 4273 return error; 4274 } 4275 4276 /* 4277 * Called by LVM after the snapshot is done. We need to reset the RECOVER 4278 * flag here, even though the filesystem is not technically dirty yet. 4279 */ 4280 static int ext4_unfreeze(struct super_block *sb) 4281 { 4282 if (sb->s_flags & MS_RDONLY) 4283 return 0; 4284 4285 lock_super(sb); 4286 /* Reset the needs_recovery flag before the fs is unlocked. */ 4287 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4288 ext4_commit_super(sb, 1); 4289 unlock_super(sb); 4290 return 0; 4291 } 4292 4293 /* 4294 * Structure to save mount options for ext4_remount's benefit 4295 */ 4296 struct ext4_mount_options { 4297 unsigned long s_mount_opt; 4298 unsigned long s_mount_opt2; 4299 uid_t s_resuid; 4300 gid_t s_resgid; 4301 unsigned long s_commit_interval; 4302 u32 s_min_batch_time, s_max_batch_time; 4303 #ifdef CONFIG_QUOTA 4304 int s_jquota_fmt; 4305 char *s_qf_names[MAXQUOTAS]; 4306 #endif 4307 }; 4308 4309 static int ext4_remount(struct super_block *sb, int *flags, char *data) 4310 { 4311 struct ext4_super_block *es; 4312 struct ext4_sb_info *sbi = EXT4_SB(sb); 4313 ext4_fsblk_t n_blocks_count = 0; 4314 unsigned long old_sb_flags; 4315 struct ext4_mount_options old_opts; 4316 int enable_quota = 0; 4317 ext4_group_t g; 4318 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 4319 int err = 0; 4320 #ifdef CONFIG_QUOTA 4321 int i; 4322 #endif 4323 char *orig_data = kstrdup(data, GFP_KERNEL); 4324 4325 /* Store the original options */ 4326 lock_super(sb); 4327 old_sb_flags = sb->s_flags; 4328 old_opts.s_mount_opt = sbi->s_mount_opt; 4329 old_opts.s_mount_opt2 = sbi->s_mount_opt2; 4330 old_opts.s_resuid = sbi->s_resuid; 4331 old_opts.s_resgid = sbi->s_resgid; 4332 old_opts.s_commit_interval = sbi->s_commit_interval; 4333 old_opts.s_min_batch_time = sbi->s_min_batch_time; 4334 old_opts.s_max_batch_time = sbi->s_max_batch_time; 4335 #ifdef CONFIG_QUOTA 4336 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 4337 for (i = 0; i < MAXQUOTAS; i++) 4338 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 4339 #endif 4340 if (sbi->s_journal && sbi->s_journal->j_task->io_context) 4341 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; 4342 4343 /* 4344 * Allow the "check" option to be passed as a remount option. 4345 */ 4346 if (!parse_options(data, sb, NULL, &journal_ioprio, 4347 &n_blocks_count, 1)) { 4348 err = -EINVAL; 4349 goto restore_opts; 4350 } 4351 4352 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) 4353 ext4_abort(sb, "Abort forced by user"); 4354 4355 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 4356 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 4357 4358 es = sbi->s_es; 4359 4360 if (sbi->s_journal) { 4361 ext4_init_journal_params(sb, sbi->s_journal); 4362 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 4363 } 4364 4365 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 4366 n_blocks_count > ext4_blocks_count(es)) { 4367 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { 4368 err = -EROFS; 4369 goto restore_opts; 4370 } 4371 4372 if (*flags & MS_RDONLY) { 4373 err = dquot_suspend(sb, -1); 4374 if (err < 0) 4375 goto restore_opts; 4376 4377 /* 4378 * First of all, the unconditional stuff we have to do 4379 * to disable replay of the journal when we next remount 4380 */ 4381 sb->s_flags |= MS_RDONLY; 4382 4383 /* 4384 * OK, test if we are remounting a valid rw partition 4385 * readonly, and if so set the rdonly flag and then 4386 * mark the partition as valid again. 4387 */ 4388 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && 4389 (sbi->s_mount_state & EXT4_VALID_FS)) 4390 es->s_state = cpu_to_le16(sbi->s_mount_state); 4391 4392 if (sbi->s_journal) 4393 ext4_mark_recovery_complete(sb, es); 4394 } else { 4395 /* Make sure we can mount this feature set readwrite */ 4396 if (!ext4_feature_set_ok(sb, 0)) { 4397 err = -EROFS; 4398 goto restore_opts; 4399 } 4400 /* 4401 * Make sure the group descriptor checksums 4402 * are sane. If they aren't, refuse to remount r/w. 4403 */ 4404 for (g = 0; g < sbi->s_groups_count; g++) { 4405 struct ext4_group_desc *gdp = 4406 ext4_get_group_desc(sb, g, NULL); 4407 4408 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { 4409 ext4_msg(sb, KERN_ERR, 4410 "ext4_remount: Checksum for group %u failed (%u!=%u)", 4411 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 4412 le16_to_cpu(gdp->bg_checksum)); 4413 err = -EINVAL; 4414 goto restore_opts; 4415 } 4416 } 4417 4418 /* 4419 * If we have an unprocessed orphan list hanging 4420 * around from a previously readonly bdev mount, 4421 * require a full umount/remount for now. 4422 */ 4423 if (es->s_last_orphan) { 4424 ext4_msg(sb, KERN_WARNING, "Couldn't " 4425 "remount RDWR because of unprocessed " 4426 "orphan inode list. Please " 4427 "umount/remount instead"); 4428 err = -EINVAL; 4429 goto restore_opts; 4430 } 4431 4432 /* 4433 * Mounting a RDONLY partition read-write, so reread 4434 * and store the current valid flag. (It may have 4435 * been changed by e2fsck since we originally mounted 4436 * the partition.) 4437 */ 4438 if (sbi->s_journal) 4439 ext4_clear_journal_err(sb, es); 4440 sbi->s_mount_state = le16_to_cpu(es->s_state); 4441 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 4442 goto restore_opts; 4443 if (!ext4_setup_super(sb, es, 0)) 4444 sb->s_flags &= ~MS_RDONLY; 4445 if (EXT4_HAS_INCOMPAT_FEATURE(sb, 4446 EXT4_FEATURE_INCOMPAT_MMP)) 4447 if (ext4_multi_mount_protect(sb, 4448 le64_to_cpu(es->s_mmp_block))) { 4449 err = -EROFS; 4450 goto restore_opts; 4451 } 4452 enable_quota = 1; 4453 } 4454 } 4455 4456 /* 4457 * Reinitialize lazy itable initialization thread based on 4458 * current settings 4459 */ 4460 if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE)) 4461 ext4_unregister_li_request(sb); 4462 else { 4463 ext4_group_t first_not_zeroed; 4464 first_not_zeroed = ext4_has_uninit_itable(sb); 4465 ext4_register_li_request(sb, first_not_zeroed); 4466 } 4467 4468 ext4_setup_system_zone(sb); 4469 if (sbi->s_journal == NULL) 4470 ext4_commit_super(sb, 1); 4471 4472 #ifdef CONFIG_QUOTA 4473 /* Release old quota file names */ 4474 for (i = 0; i < MAXQUOTAS; i++) 4475 if (old_opts.s_qf_names[i] && 4476 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 4477 kfree(old_opts.s_qf_names[i]); 4478 #endif 4479 unlock_super(sb); 4480 if (enable_quota) 4481 dquot_resume(sb, -1); 4482 4483 ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); 4484 kfree(orig_data); 4485 return 0; 4486 4487 restore_opts: 4488 sb->s_flags = old_sb_flags; 4489 sbi->s_mount_opt = old_opts.s_mount_opt; 4490 sbi->s_mount_opt2 = old_opts.s_mount_opt2; 4491 sbi->s_resuid = old_opts.s_resuid; 4492 sbi->s_resgid = old_opts.s_resgid; 4493 sbi->s_commit_interval = old_opts.s_commit_interval; 4494 sbi->s_min_batch_time = old_opts.s_min_batch_time; 4495 sbi->s_max_batch_time = old_opts.s_max_batch_time; 4496 #ifdef CONFIG_QUOTA 4497 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 4498 for (i = 0; i < MAXQUOTAS; i++) { 4499 if (sbi->s_qf_names[i] && 4500 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 4501 kfree(sbi->s_qf_names[i]); 4502 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 4503 } 4504 #endif 4505 unlock_super(sb); 4506 kfree(orig_data); 4507 return err; 4508 } 4509 4510 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 4511 { 4512 struct super_block *sb = dentry->d_sb; 4513 struct ext4_sb_info *sbi = EXT4_SB(sb); 4514 struct ext4_super_block *es = sbi->s_es; 4515 u64 fsid; 4516 s64 bfree; 4517 4518 if (test_opt(sb, MINIX_DF)) { 4519 sbi->s_overhead_last = 0; 4520 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 4521 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 4522 ext4_fsblk_t overhead = 0; 4523 4524 /* 4525 * Compute the overhead (FS structures). This is constant 4526 * for a given filesystem unless the number of block groups 4527 * changes so we cache the previous value until it does. 4528 */ 4529 4530 /* 4531 * All of the blocks before first_data_block are 4532 * overhead 4533 */ 4534 overhead = le32_to_cpu(es->s_first_data_block); 4535 4536 /* 4537 * Add the overhead attributed to the superblock and 4538 * block group descriptors. If the sparse superblocks 4539 * feature is turned on, then not all groups have this. 4540 */ 4541 for (i = 0; i < ngroups; i++) { 4542 overhead += ext4_bg_has_super(sb, i) + 4543 ext4_bg_num_gdb(sb, i); 4544 cond_resched(); 4545 } 4546 4547 /* 4548 * Every block group has an inode bitmap, a block 4549 * bitmap, and an inode table. 4550 */ 4551 overhead += ngroups * (2 + sbi->s_itb_per_group); 4552 sbi->s_overhead_last = overhead; 4553 smp_wmb(); 4554 sbi->s_blocks_last = ext4_blocks_count(es); 4555 } 4556 4557 buf->f_type = EXT4_SUPER_MAGIC; 4558 buf->f_bsize = sb->s_blocksize; 4559 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 4560 bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 4561 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 4562 /* prevent underflow in case that few free space is available */ 4563 buf->f_bfree = max_t(s64, bfree, 0); 4564 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 4565 if (buf->f_bfree < ext4_r_blocks_count(es)) 4566 buf->f_bavail = 0; 4567 buf->f_files = le32_to_cpu(es->s_inodes_count); 4568 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 4569 buf->f_namelen = EXT4_NAME_LEN; 4570 fsid = le64_to_cpup((void *)es->s_uuid) ^ 4571 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 4572 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 4573 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 4574 4575 return 0; 4576 } 4577 4578 /* Helper function for writing quotas on sync - we need to start transaction 4579 * before quota file is locked for write. Otherwise the are possible deadlocks: 4580 * Process 1 Process 2 4581 * ext4_create() quota_sync() 4582 * jbd2_journal_start() write_dquot() 4583 * dquot_initialize() down(dqio_mutex) 4584 * down(dqio_mutex) jbd2_journal_start() 4585 * 4586 */ 4587 4588 #ifdef CONFIG_QUOTA 4589 4590 static inline struct inode *dquot_to_inode(struct dquot *dquot) 4591 { 4592 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 4593 } 4594 4595 static int ext4_write_dquot(struct dquot *dquot) 4596 { 4597 int ret, err; 4598 handle_t *handle; 4599 struct inode *inode; 4600 4601 inode = dquot_to_inode(dquot); 4602 handle = ext4_journal_start(inode, 4603 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 4604 if (IS_ERR(handle)) 4605 return PTR_ERR(handle); 4606 ret = dquot_commit(dquot); 4607 err = ext4_journal_stop(handle); 4608 if (!ret) 4609 ret = err; 4610 return ret; 4611 } 4612 4613 static int ext4_acquire_dquot(struct dquot *dquot) 4614 { 4615 int ret, err; 4616 handle_t *handle; 4617 4618 handle = ext4_journal_start(dquot_to_inode(dquot), 4619 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 4620 if (IS_ERR(handle)) 4621 return PTR_ERR(handle); 4622 ret = dquot_acquire(dquot); 4623 err = ext4_journal_stop(handle); 4624 if (!ret) 4625 ret = err; 4626 return ret; 4627 } 4628 4629 static int ext4_release_dquot(struct dquot *dquot) 4630 { 4631 int ret, err; 4632 handle_t *handle; 4633 4634 handle = ext4_journal_start(dquot_to_inode(dquot), 4635 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 4636 if (IS_ERR(handle)) { 4637 /* Release dquot anyway to avoid endless cycle in dqput() */ 4638 dquot_release(dquot); 4639 return PTR_ERR(handle); 4640 } 4641 ret = dquot_release(dquot); 4642 err = ext4_journal_stop(handle); 4643 if (!ret) 4644 ret = err; 4645 return ret; 4646 } 4647 4648 static int ext4_mark_dquot_dirty(struct dquot *dquot) 4649 { 4650 /* Are we journaling quotas? */ 4651 if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 4652 EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 4653 dquot_mark_dquot_dirty(dquot); 4654 return ext4_write_dquot(dquot); 4655 } else { 4656 return dquot_mark_dquot_dirty(dquot); 4657 } 4658 } 4659 4660 static int ext4_write_info(struct super_block *sb, int type) 4661 { 4662 int ret, err; 4663 handle_t *handle; 4664 4665 /* Data block + inode block */ 4666 handle = ext4_journal_start(sb->s_root->d_inode, 2); 4667 if (IS_ERR(handle)) 4668 return PTR_ERR(handle); 4669 ret = dquot_commit_info(sb, type); 4670 err = ext4_journal_stop(handle); 4671 if (!ret) 4672 ret = err; 4673 return ret; 4674 } 4675 4676 /* 4677 * Turn on quotas during mount time - we need to find 4678 * the quota file and such... 4679 */ 4680 static int ext4_quota_on_mount(struct super_block *sb, int type) 4681 { 4682 return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 4683 EXT4_SB(sb)->s_jquota_fmt, type); 4684 } 4685 4686 /* 4687 * Standard function to be called on quota_on 4688 */ 4689 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 4690 struct path *path) 4691 { 4692 int err; 4693 4694 if (!test_opt(sb, QUOTA)) 4695 return -EINVAL; 4696 4697 /* Quotafile not on the same filesystem? */ 4698 if (path->mnt->mnt_sb != sb) 4699 return -EXDEV; 4700 /* Journaling quota? */ 4701 if (EXT4_SB(sb)->s_qf_names[type]) { 4702 /* Quotafile not in fs root? */ 4703 if (path->dentry->d_parent != sb->s_root) 4704 ext4_msg(sb, KERN_WARNING, 4705 "Quota file not on filesystem root. " 4706 "Journaled quota will not work"); 4707 } 4708 4709 /* 4710 * When we journal data on quota file, we have to flush journal to see 4711 * all updates to the file when we bypass pagecache... 4712 */ 4713 if (EXT4_SB(sb)->s_journal && 4714 ext4_should_journal_data(path->dentry->d_inode)) { 4715 /* 4716 * We don't need to lock updates but journal_flush() could 4717 * otherwise be livelocked... 4718 */ 4719 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 4720 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 4721 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 4722 if (err) 4723 return err; 4724 } 4725 4726 return dquot_quota_on(sb, type, format_id, path); 4727 } 4728 4729 static int ext4_quota_off(struct super_block *sb, int type) 4730 { 4731 struct inode *inode = sb_dqopt(sb)->files[type]; 4732 handle_t *handle; 4733 4734 /* Force all delayed allocation blocks to be allocated. 4735 * Caller already holds s_umount sem */ 4736 if (test_opt(sb, DELALLOC)) 4737 sync_filesystem(sb); 4738 4739 if (!inode) 4740 goto out; 4741 4742 /* Update modification times of quota files when userspace can 4743 * start looking at them */ 4744 handle = ext4_journal_start(inode, 1); 4745 if (IS_ERR(handle)) 4746 goto out; 4747 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 4748 ext4_mark_inode_dirty(handle, inode); 4749 ext4_journal_stop(handle); 4750 4751 out: 4752 return dquot_quota_off(sb, type); 4753 } 4754 4755 /* Read data from quotafile - avoid pagecache and such because we cannot afford 4756 * acquiring the locks... As quota files are never truncated and quota code 4757 * itself serializes the operations (and no one else should touch the files) 4758 * we don't have to be afraid of races */ 4759 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 4760 size_t len, loff_t off) 4761 { 4762 struct inode *inode = sb_dqopt(sb)->files[type]; 4763 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 4764 int err = 0; 4765 int offset = off & (sb->s_blocksize - 1); 4766 int tocopy; 4767 size_t toread; 4768 struct buffer_head *bh; 4769 loff_t i_size = i_size_read(inode); 4770 4771 if (off > i_size) 4772 return 0; 4773 if (off+len > i_size) 4774 len = i_size-off; 4775 toread = len; 4776 while (toread > 0) { 4777 tocopy = sb->s_blocksize - offset < toread ? 4778 sb->s_blocksize - offset : toread; 4779 bh = ext4_bread(NULL, inode, blk, 0, &err); 4780 if (err) 4781 return err; 4782 if (!bh) /* A hole? */ 4783 memset(data, 0, tocopy); 4784 else 4785 memcpy(data, bh->b_data+offset, tocopy); 4786 brelse(bh); 4787 offset = 0; 4788 toread -= tocopy; 4789 data += tocopy; 4790 blk++; 4791 } 4792 return len; 4793 } 4794 4795 /* Write to quotafile (we know the transaction is already started and has 4796 * enough credits) */ 4797 static ssize_t ext4_quota_write(struct super_block *sb, int type, 4798 const char *data, size_t len, loff_t off) 4799 { 4800 struct inode *inode = sb_dqopt(sb)->files[type]; 4801 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 4802 int err = 0; 4803 int offset = off & (sb->s_blocksize - 1); 4804 struct buffer_head *bh; 4805 handle_t *handle = journal_current_handle(); 4806 4807 if (EXT4_SB(sb)->s_journal && !handle) { 4808 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 4809 " cancelled because transaction is not started", 4810 (unsigned long long)off, (unsigned long long)len); 4811 return -EIO; 4812 } 4813 /* 4814 * Since we account only one data block in transaction credits, 4815 * then it is impossible to cross a block boundary. 4816 */ 4817 if (sb->s_blocksize - offset < len) { 4818 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 4819 " cancelled because not block aligned", 4820 (unsigned long long)off, (unsigned long long)len); 4821 return -EIO; 4822 } 4823 4824 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 4825 bh = ext4_bread(handle, inode, blk, 1, &err); 4826 if (!bh) 4827 goto out; 4828 err = ext4_journal_get_write_access(handle, bh); 4829 if (err) { 4830 brelse(bh); 4831 goto out; 4832 } 4833 lock_buffer(bh); 4834 memcpy(bh->b_data+offset, data, len); 4835 flush_dcache_page(bh->b_page); 4836 unlock_buffer(bh); 4837 err = ext4_handle_dirty_metadata(handle, NULL, bh); 4838 brelse(bh); 4839 out: 4840 if (err) { 4841 mutex_unlock(&inode->i_mutex); 4842 return err; 4843 } 4844 if (inode->i_size < off + len) { 4845 i_size_write(inode, off + len); 4846 EXT4_I(inode)->i_disksize = inode->i_size; 4847 ext4_mark_inode_dirty(handle, inode); 4848 } 4849 mutex_unlock(&inode->i_mutex); 4850 return len; 4851 } 4852 4853 #endif 4854 4855 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, 4856 const char *dev_name, void *data) 4857 { 4858 return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super); 4859 } 4860 4861 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 4862 static inline void register_as_ext2(void) 4863 { 4864 int err = register_filesystem(&ext2_fs_type); 4865 if (err) 4866 printk(KERN_WARNING 4867 "EXT4-fs: Unable to register as ext2 (%d)\n", err); 4868 } 4869 4870 static inline void unregister_as_ext2(void) 4871 { 4872 unregister_filesystem(&ext2_fs_type); 4873 } 4874 4875 static inline int ext2_feature_set_ok(struct super_block *sb) 4876 { 4877 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP)) 4878 return 0; 4879 if (sb->s_flags & MS_RDONLY) 4880 return 1; 4881 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP)) 4882 return 0; 4883 return 1; 4884 } 4885 MODULE_ALIAS("ext2"); 4886 #else 4887 static inline void register_as_ext2(void) { } 4888 static inline void unregister_as_ext2(void) { } 4889 static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; } 4890 #endif 4891 4892 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 4893 static inline void register_as_ext3(void) 4894 { 4895 int err = register_filesystem(&ext3_fs_type); 4896 if (err) 4897 printk(KERN_WARNING 4898 "EXT4-fs: Unable to register as ext3 (%d)\n", err); 4899 } 4900 4901 static inline void unregister_as_ext3(void) 4902 { 4903 unregister_filesystem(&ext3_fs_type); 4904 } 4905 4906 static inline int ext3_feature_set_ok(struct super_block *sb) 4907 { 4908 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP)) 4909 return 0; 4910 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) 4911 return 0; 4912 if (sb->s_flags & MS_RDONLY) 4913 return 1; 4914 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) 4915 return 0; 4916 return 1; 4917 } 4918 MODULE_ALIAS("ext3"); 4919 #else 4920 static inline void register_as_ext3(void) { } 4921 static inline void unregister_as_ext3(void) { } 4922 static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; } 4923 #endif 4924 4925 static struct file_system_type ext4_fs_type = { 4926 .owner = THIS_MODULE, 4927 .name = "ext4", 4928 .mount = ext4_mount, 4929 .kill_sb = kill_block_super, 4930 .fs_flags = FS_REQUIRES_DEV, 4931 }; 4932 4933 static int __init ext4_init_feat_adverts(void) 4934 { 4935 struct ext4_features *ef; 4936 int ret = -ENOMEM; 4937 4938 ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL); 4939 if (!ef) 4940 goto out; 4941 4942 ef->f_kobj.kset = ext4_kset; 4943 init_completion(&ef->f_kobj_unregister); 4944 ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL, 4945 "features"); 4946 if (ret) { 4947 kfree(ef); 4948 goto out; 4949 } 4950 4951 ext4_feat = ef; 4952 ret = 0; 4953 out: 4954 return ret; 4955 } 4956 4957 static void ext4_exit_feat_adverts(void) 4958 { 4959 kobject_put(&ext4_feat->f_kobj); 4960 wait_for_completion(&ext4_feat->f_kobj_unregister); 4961 kfree(ext4_feat); 4962 } 4963 4964 /* Shared across all ext4 file systems */ 4965 wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; 4966 struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; 4967 4968 static int __init ext4_init_fs(void) 4969 { 4970 int i, err; 4971 4972 ext4_check_flag_values(); 4973 4974 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { 4975 mutex_init(&ext4__aio_mutex[i]); 4976 init_waitqueue_head(&ext4__ioend_wq[i]); 4977 } 4978 4979 err = ext4_init_pageio(); 4980 if (err) 4981 return err; 4982 err = ext4_init_system_zone(); 4983 if (err) 4984 goto out7; 4985 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 4986 if (!ext4_kset) 4987 goto out6; 4988 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 4989 if (!ext4_proc_root) 4990 goto out5; 4991 4992 err = ext4_init_feat_adverts(); 4993 if (err) 4994 goto out4; 4995 4996 err = ext4_init_mballoc(); 4997 if (err) 4998 goto out3; 4999 5000 err = ext4_init_xattr(); 5001 if (err) 5002 goto out2; 5003 err = init_inodecache(); 5004 if (err) 5005 goto out1; 5006 register_as_ext3(); 5007 register_as_ext2(); 5008 err = register_filesystem(&ext4_fs_type); 5009 if (err) 5010 goto out; 5011 5012 ext4_li_info = NULL; 5013 mutex_init(&ext4_li_mtx); 5014 return 0; 5015 out: 5016 unregister_as_ext2(); 5017 unregister_as_ext3(); 5018 destroy_inodecache(); 5019 out1: 5020 ext4_exit_xattr(); 5021 out2: 5022 ext4_exit_mballoc(); 5023 out3: 5024 ext4_exit_feat_adverts(); 5025 out4: 5026 remove_proc_entry("fs/ext4", NULL); 5027 out5: 5028 kset_unregister(ext4_kset); 5029 out6: 5030 ext4_exit_system_zone(); 5031 out7: 5032 ext4_exit_pageio(); 5033 return err; 5034 } 5035 5036 static void __exit ext4_exit_fs(void) 5037 { 5038 ext4_destroy_lazyinit_thread(); 5039 unregister_as_ext2(); 5040 unregister_as_ext3(); 5041 unregister_filesystem(&ext4_fs_type); 5042 destroy_inodecache(); 5043 ext4_exit_xattr(); 5044 ext4_exit_mballoc(); 5045 ext4_exit_feat_adverts(); 5046 remove_proc_entry("fs/ext4", NULL); 5047 kset_unregister(ext4_kset); 5048 ext4_exit_system_zone(); 5049 ext4_exit_pageio(); 5050 } 5051 5052 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 5053 MODULE_DESCRIPTION("Fourth Extended Filesystem"); 5054 MODULE_LICENSE("GPL"); 5055 module_init(ext4_init_fs) 5056 module_exit(ext4_exit_fs) 5057