xref: /linux/fs/ext4/super.c (revision 6a3335b43342b42dd6c69b4bbbde15d622cb49ca)
1 /*
2  *  linux/fs/ext4/super.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  from
10  *
11  *  linux/fs/minix/inode.c
12  *
13  *  Copyright (C) 1991, 1992  Linus Torvalds
14  *
15  *  Big-endian to little-endian byte-swapping/bitmaps by
16  *        David S. Miller (davem@caip.rutgers.edu), 1995
17  */
18 
19 #include <linux/module.h>
20 #include <linux/string.h>
21 #include <linux/fs.h>
22 #include <linux/time.h>
23 #include <linux/jbd2.h>
24 #include <linux/slab.h>
25 #include <linux/init.h>
26 #include <linux/blkdev.h>
27 #include <linux/parser.h>
28 #include <linux/smp_lock.h>
29 #include <linux/buffer_head.h>
30 #include <linux/exportfs.h>
31 #include <linux/vfs.h>
32 #include <linux/random.h>
33 #include <linux/mount.h>
34 #include <linux/namei.h>
35 #include <linux/quotaops.h>
36 #include <linux/seq_file.h>
37 #include <linux/proc_fs.h>
38 #include <linux/marker.h>
39 #include <linux/log2.h>
40 #include <linux/crc16.h>
41 #include <asm/uaccess.h>
42 
43 #include "ext4.h"
44 #include "ext4_jbd2.h"
45 #include "xattr.h"
46 #include "acl.h"
47 #include "namei.h"
48 #include "group.h"
49 
50 struct proc_dir_entry *ext4_proc_root;
51 
52 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
53 			     unsigned long journal_devnum);
54 static int ext4_commit_super(struct super_block *sb,
55 			      struct ext4_super_block *es, int sync);
56 static void ext4_mark_recovery_complete(struct super_block *sb,
57 					struct ext4_super_block *es);
58 static void ext4_clear_journal_err(struct super_block *sb,
59 				   struct ext4_super_block *es);
60 static int ext4_sync_fs(struct super_block *sb, int wait);
61 static const char *ext4_decode_error(struct super_block *sb, int errno,
62 				     char nbuf[16]);
63 static int ext4_remount(struct super_block *sb, int *flags, char *data);
64 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
65 static int ext4_unfreeze(struct super_block *sb);
66 static void ext4_write_super(struct super_block *sb);
67 static int ext4_freeze(struct super_block *sb);
68 
69 
70 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
71 			       struct ext4_group_desc *bg)
72 {
73 	return le32_to_cpu(bg->bg_block_bitmap_lo) |
74 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
75 		(ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
76 }
77 
78 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
79 			       struct ext4_group_desc *bg)
80 {
81 	return le32_to_cpu(bg->bg_inode_bitmap_lo) |
82 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
83 		(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
84 }
85 
86 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
87 			      struct ext4_group_desc *bg)
88 {
89 	return le32_to_cpu(bg->bg_inode_table_lo) |
90 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
91 		(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
92 }
93 
94 __u32 ext4_free_blks_count(struct super_block *sb,
95 			      struct ext4_group_desc *bg)
96 {
97 	return le16_to_cpu(bg->bg_free_blocks_count_lo) |
98 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
99 		(__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
100 }
101 
102 __u32 ext4_free_inodes_count(struct super_block *sb,
103 			      struct ext4_group_desc *bg)
104 {
105 	return le16_to_cpu(bg->bg_free_inodes_count_lo) |
106 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
107 		(__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
108 }
109 
110 __u32 ext4_used_dirs_count(struct super_block *sb,
111 			      struct ext4_group_desc *bg)
112 {
113 	return le16_to_cpu(bg->bg_used_dirs_count_lo) |
114 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
115 		(__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
116 }
117 
118 __u32 ext4_itable_unused_count(struct super_block *sb,
119 			      struct ext4_group_desc *bg)
120 {
121 	return le16_to_cpu(bg->bg_itable_unused_lo) |
122 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
123 		(__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
124 }
125 
126 void ext4_block_bitmap_set(struct super_block *sb,
127 			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
128 {
129 	bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
130 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
131 		bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
132 }
133 
134 void ext4_inode_bitmap_set(struct super_block *sb,
135 			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
136 {
137 	bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
138 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
139 		bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
140 }
141 
142 void ext4_inode_table_set(struct super_block *sb,
143 			  struct ext4_group_desc *bg, ext4_fsblk_t blk)
144 {
145 	bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
146 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
147 		bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
148 }
149 
150 void ext4_free_blks_set(struct super_block *sb,
151 			  struct ext4_group_desc *bg, __u32 count)
152 {
153 	bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
154 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
155 		bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
156 }
157 
158 void ext4_free_inodes_set(struct super_block *sb,
159 			  struct ext4_group_desc *bg, __u32 count)
160 {
161 	bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
162 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
163 		bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
164 }
165 
166 void ext4_used_dirs_set(struct super_block *sb,
167 			  struct ext4_group_desc *bg, __u32 count)
168 {
169 	bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
170 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
171 		bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
172 }
173 
174 void ext4_itable_unused_set(struct super_block *sb,
175 			  struct ext4_group_desc *bg, __u32 count)
176 {
177 	bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
178 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
179 		bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
180 }
181 
182 /*
183  * Wrappers for jbd2_journal_start/end.
184  *
185  * The only special thing we need to do here is to make sure that all
186  * journal_end calls result in the superblock being marked dirty, so
187  * that sync() will call the filesystem's write_super callback if
188  * appropriate.
189  */
190 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
191 {
192 	journal_t *journal;
193 
194 	if (sb->s_flags & MS_RDONLY)
195 		return ERR_PTR(-EROFS);
196 
197 	/* Special case here: if the journal has aborted behind our
198 	 * backs (eg. EIO in the commit thread), then we still need to
199 	 * take the FS itself readonly cleanly. */
200 	journal = EXT4_SB(sb)->s_journal;
201 	if (journal) {
202 		if (is_journal_aborted(journal)) {
203 			ext4_abort(sb, __func__,
204 				   "Detected aborted journal");
205 			return ERR_PTR(-EROFS);
206 		}
207 		return jbd2_journal_start(journal, nblocks);
208 	}
209 	/*
210 	 * We're not journaling, return the appropriate indication.
211 	 */
212 	current->journal_info = EXT4_NOJOURNAL_HANDLE;
213 	return current->journal_info;
214 }
215 
216 /*
217  * The only special thing we need to do here is to make sure that all
218  * jbd2_journal_stop calls result in the superblock being marked dirty, so
219  * that sync() will call the filesystem's write_super callback if
220  * appropriate.
221  */
222 int __ext4_journal_stop(const char *where, handle_t *handle)
223 {
224 	struct super_block *sb;
225 	int err;
226 	int rc;
227 
228 	if (!ext4_handle_valid(handle)) {
229 		/*
230 		 * Do this here since we don't call jbd2_journal_stop() in
231 		 * no-journal mode.
232 		 */
233 		current->journal_info = NULL;
234 		return 0;
235 	}
236 	sb = handle->h_transaction->t_journal->j_private;
237 	err = handle->h_err;
238 	rc = jbd2_journal_stop(handle);
239 
240 	if (!err)
241 		err = rc;
242 	if (err)
243 		__ext4_std_error(sb, where, err);
244 	return err;
245 }
246 
247 void ext4_journal_abort_handle(const char *caller, const char *err_fn,
248 		struct buffer_head *bh, handle_t *handle, int err)
249 {
250 	char nbuf[16];
251 	const char *errstr = ext4_decode_error(NULL, err, nbuf);
252 
253 	BUG_ON(!ext4_handle_valid(handle));
254 
255 	if (bh)
256 		BUFFER_TRACE(bh, "abort");
257 
258 	if (!handle->h_err)
259 		handle->h_err = err;
260 
261 	if (is_handle_aborted(handle))
262 		return;
263 
264 	printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
265 	       caller, errstr, err_fn);
266 
267 	jbd2_journal_abort_handle(handle);
268 }
269 
270 /* Deal with the reporting of failure conditions on a filesystem such as
271  * inconsistencies detected or read IO failures.
272  *
273  * On ext2, we can store the error state of the filesystem in the
274  * superblock.  That is not possible on ext4, because we may have other
275  * write ordering constraints on the superblock which prevent us from
276  * writing it out straight away; and given that the journal is about to
277  * be aborted, we can't rely on the current, or future, transactions to
278  * write out the superblock safely.
279  *
280  * We'll just use the jbd2_journal_abort() error code to record an error in
281  * the journal instead.  On recovery, the journal will compain about
282  * that error until we've noted it down and cleared it.
283  */
284 
285 static void ext4_handle_error(struct super_block *sb)
286 {
287 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
288 
289 	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
290 	es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
291 
292 	if (sb->s_flags & MS_RDONLY)
293 		return;
294 
295 	if (!test_opt(sb, ERRORS_CONT)) {
296 		journal_t *journal = EXT4_SB(sb)->s_journal;
297 
298 		EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
299 		if (journal)
300 			jbd2_journal_abort(journal, -EIO);
301 	}
302 	if (test_opt(sb, ERRORS_RO)) {
303 		printk(KERN_CRIT "Remounting filesystem read-only\n");
304 		sb->s_flags |= MS_RDONLY;
305 	}
306 	ext4_commit_super(sb, es, 1);
307 	if (test_opt(sb, ERRORS_PANIC))
308 		panic("EXT4-fs (device %s): panic forced after error\n",
309 			sb->s_id);
310 }
311 
312 void ext4_error(struct super_block *sb, const char *function,
313 		const char *fmt, ...)
314 {
315 	va_list args;
316 
317 	va_start(args, fmt);
318 	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
319 	vprintk(fmt, args);
320 	printk("\n");
321 	va_end(args);
322 
323 	ext4_handle_error(sb);
324 }
325 
326 static const char *ext4_decode_error(struct super_block *sb, int errno,
327 				     char nbuf[16])
328 {
329 	char *errstr = NULL;
330 
331 	switch (errno) {
332 	case -EIO:
333 		errstr = "IO failure";
334 		break;
335 	case -ENOMEM:
336 		errstr = "Out of memory";
337 		break;
338 	case -EROFS:
339 		if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)
340 			errstr = "Journal has aborted";
341 		else
342 			errstr = "Readonly filesystem";
343 		break;
344 	default:
345 		/* If the caller passed in an extra buffer for unknown
346 		 * errors, textualise them now.  Else we just return
347 		 * NULL. */
348 		if (nbuf) {
349 			/* Check for truncated error codes... */
350 			if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
351 				errstr = nbuf;
352 		}
353 		break;
354 	}
355 
356 	return errstr;
357 }
358 
359 /* __ext4_std_error decodes expected errors from journaling functions
360  * automatically and invokes the appropriate error response.  */
361 
362 void __ext4_std_error(struct super_block *sb, const char *function, int errno)
363 {
364 	char nbuf[16];
365 	const char *errstr;
366 
367 	/* Special case: if the error is EROFS, and we're not already
368 	 * inside a transaction, then there's really no point in logging
369 	 * an error. */
370 	if (errno == -EROFS && journal_current_handle() == NULL &&
371 	    (sb->s_flags & MS_RDONLY))
372 		return;
373 
374 	errstr = ext4_decode_error(sb, errno, nbuf);
375 	printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
376 	       sb->s_id, function, errstr);
377 
378 	ext4_handle_error(sb);
379 }
380 
381 /*
382  * ext4_abort is a much stronger failure handler than ext4_error.  The
383  * abort function may be used to deal with unrecoverable failures such
384  * as journal IO errors or ENOMEM at a critical moment in log management.
385  *
386  * We unconditionally force the filesystem into an ABORT|READONLY state,
387  * unless the error response on the fs has been set to panic in which
388  * case we take the easy way out and panic immediately.
389  */
390 
391 void ext4_abort(struct super_block *sb, const char *function,
392 		const char *fmt, ...)
393 {
394 	va_list args;
395 
396 	printk(KERN_CRIT "ext4_abort called.\n");
397 
398 	va_start(args, fmt);
399 	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
400 	vprintk(fmt, args);
401 	printk("\n");
402 	va_end(args);
403 
404 	if (test_opt(sb, ERRORS_PANIC))
405 		panic("EXT4-fs panic from previous error\n");
406 
407 	if (sb->s_flags & MS_RDONLY)
408 		return;
409 
410 	printk(KERN_CRIT "Remounting filesystem read-only\n");
411 	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
412 	sb->s_flags |= MS_RDONLY;
413 	EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
414 	if (EXT4_SB(sb)->s_journal)
415 		jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
416 }
417 
418 void ext4_warning(struct super_block *sb, const char *function,
419 		  const char *fmt, ...)
420 {
421 	va_list args;
422 
423 	va_start(args, fmt);
424 	printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ",
425 	       sb->s_id, function);
426 	vprintk(fmt, args);
427 	printk("\n");
428 	va_end(args);
429 }
430 
431 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
432 				const char *function, const char *fmt, ...)
433 __releases(bitlock)
434 __acquires(bitlock)
435 {
436 	va_list args;
437 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
438 
439 	va_start(args, fmt);
440 	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
441 	vprintk(fmt, args);
442 	printk("\n");
443 	va_end(args);
444 
445 	if (test_opt(sb, ERRORS_CONT)) {
446 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
447 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
448 		ext4_commit_super(sb, es, 0);
449 		return;
450 	}
451 	ext4_unlock_group(sb, grp);
452 	ext4_handle_error(sb);
453 	/*
454 	 * We only get here in the ERRORS_RO case; relocking the group
455 	 * may be dangerous, but nothing bad will happen since the
456 	 * filesystem will have already been marked read/only and the
457 	 * journal has been aborted.  We return 1 as a hint to callers
458 	 * who might what to use the return value from
459 	 * ext4_grp_locked_error() to distinguish beween the
460 	 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
461 	 * aggressively from the ext4 function in question, with a
462 	 * more appropriate error code.
463 	 */
464 	ext4_lock_group(sb, grp);
465 	return;
466 }
467 
468 
469 void ext4_update_dynamic_rev(struct super_block *sb)
470 {
471 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
472 
473 	if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
474 		return;
475 
476 	ext4_warning(sb, __func__,
477 		     "updating to rev %d because of new feature flag, "
478 		     "running e2fsck is recommended",
479 		     EXT4_DYNAMIC_REV);
480 
481 	es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
482 	es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
483 	es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
484 	/* leave es->s_feature_*compat flags alone */
485 	/* es->s_uuid will be set by e2fsck if empty */
486 
487 	/*
488 	 * The rest of the superblock fields should be zero, and if not it
489 	 * means they are likely already in use, so leave them alone.  We
490 	 * can leave it up to e2fsck to clean up any inconsistencies there.
491 	 */
492 }
493 
494 /*
495  * Open the external journal device
496  */
497 static struct block_device *ext4_blkdev_get(dev_t dev)
498 {
499 	struct block_device *bdev;
500 	char b[BDEVNAME_SIZE];
501 
502 	bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
503 	if (IS_ERR(bdev))
504 		goto fail;
505 	return bdev;
506 
507 fail:
508 	printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n",
509 			__bdevname(dev, b), PTR_ERR(bdev));
510 	return NULL;
511 }
512 
513 /*
514  * Release the journal device
515  */
516 static int ext4_blkdev_put(struct block_device *bdev)
517 {
518 	bd_release(bdev);
519 	return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
520 }
521 
522 static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
523 {
524 	struct block_device *bdev;
525 	int ret = -ENODEV;
526 
527 	bdev = sbi->journal_bdev;
528 	if (bdev) {
529 		ret = ext4_blkdev_put(bdev);
530 		sbi->journal_bdev = NULL;
531 	}
532 	return ret;
533 }
534 
535 static inline struct inode *orphan_list_entry(struct list_head *l)
536 {
537 	return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
538 }
539 
540 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
541 {
542 	struct list_head *l;
543 
544 	printk(KERN_ERR "sb orphan head is %d\n",
545 	       le32_to_cpu(sbi->s_es->s_last_orphan));
546 
547 	printk(KERN_ERR "sb_info orphan list:\n");
548 	list_for_each(l, &sbi->s_orphan) {
549 		struct inode *inode = orphan_list_entry(l);
550 		printk(KERN_ERR "  "
551 		       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
552 		       inode->i_sb->s_id, inode->i_ino, inode,
553 		       inode->i_mode, inode->i_nlink,
554 		       NEXT_ORPHAN(inode));
555 	}
556 }
557 
558 static void ext4_put_super(struct super_block *sb)
559 {
560 	struct ext4_sb_info *sbi = EXT4_SB(sb);
561 	struct ext4_super_block *es = sbi->s_es;
562 	int i, err;
563 
564 	ext4_mb_release(sb);
565 	ext4_ext_release(sb);
566 	ext4_xattr_put_super(sb);
567 	if (sbi->s_journal) {
568 		err = jbd2_journal_destroy(sbi->s_journal);
569 		sbi->s_journal = NULL;
570 		if (err < 0)
571 			ext4_abort(sb, __func__,
572 				   "Couldn't clean up the journal");
573 	}
574 	if (!(sb->s_flags & MS_RDONLY)) {
575 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
576 		es->s_state = cpu_to_le16(sbi->s_mount_state);
577 		ext4_commit_super(sb, es, 1);
578 	}
579 	if (sbi->s_proc) {
580 		remove_proc_entry("inode_readahead_blks", sbi->s_proc);
581 		remove_proc_entry(sb->s_id, ext4_proc_root);
582 	}
583 
584 	for (i = 0; i < sbi->s_gdb_count; i++)
585 		brelse(sbi->s_group_desc[i]);
586 	kfree(sbi->s_group_desc);
587 	kfree(sbi->s_flex_groups);
588 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
589 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
590 	percpu_counter_destroy(&sbi->s_dirs_counter);
591 	percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
592 	brelse(sbi->s_sbh);
593 #ifdef CONFIG_QUOTA
594 	for (i = 0; i < MAXQUOTAS; i++)
595 		kfree(sbi->s_qf_names[i]);
596 #endif
597 
598 	/* Debugging code just in case the in-memory inode orphan list
599 	 * isn't empty.  The on-disk one can be non-empty if we've
600 	 * detected an error and taken the fs readonly, but the
601 	 * in-memory list had better be clean by this point. */
602 	if (!list_empty(&sbi->s_orphan))
603 		dump_orphan_list(sb, sbi);
604 	J_ASSERT(list_empty(&sbi->s_orphan));
605 
606 	invalidate_bdev(sb->s_bdev);
607 	if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
608 		/*
609 		 * Invalidate the journal device's buffers.  We don't want them
610 		 * floating about in memory - the physical journal device may
611 		 * hotswapped, and it breaks the `ro-after' testing code.
612 		 */
613 		sync_blockdev(sbi->journal_bdev);
614 		invalidate_bdev(sbi->journal_bdev);
615 		ext4_blkdev_remove(sbi);
616 	}
617 	sb->s_fs_info = NULL;
618 	kfree(sbi);
619 	return;
620 }
621 
622 static struct kmem_cache *ext4_inode_cachep;
623 
624 /*
625  * Called inside transaction, so use GFP_NOFS
626  */
627 static struct inode *ext4_alloc_inode(struct super_block *sb)
628 {
629 	struct ext4_inode_info *ei;
630 
631 	ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
632 	if (!ei)
633 		return NULL;
634 #ifdef CONFIG_EXT4_FS_POSIX_ACL
635 	ei->i_acl = EXT4_ACL_NOT_CACHED;
636 	ei->i_default_acl = EXT4_ACL_NOT_CACHED;
637 #endif
638 	ei->vfs_inode.i_version = 1;
639 	ei->vfs_inode.i_data.writeback_index = 0;
640 	memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
641 	INIT_LIST_HEAD(&ei->i_prealloc_list);
642 	spin_lock_init(&ei->i_prealloc_lock);
643 	/*
644 	 * Note:  We can be called before EXT4_SB(sb)->s_journal is set,
645 	 * therefore it can be null here.  Don't check it, just initialize
646 	 * jinode.
647 	 */
648 	jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
649 	ei->i_reserved_data_blocks = 0;
650 	ei->i_reserved_meta_blocks = 0;
651 	ei->i_allocated_meta_blocks = 0;
652 	ei->i_delalloc_reserved_flag = 0;
653 	spin_lock_init(&(ei->i_block_reservation_lock));
654 	return &ei->vfs_inode;
655 }
656 
657 static void ext4_destroy_inode(struct inode *inode)
658 {
659 	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
660 		printk("EXT4 Inode %p: orphan list check failed!\n",
661 			EXT4_I(inode));
662 		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
663 				EXT4_I(inode), sizeof(struct ext4_inode_info),
664 				true);
665 		dump_stack();
666 	}
667 	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
668 }
669 
670 static void init_once(void *foo)
671 {
672 	struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
673 
674 	INIT_LIST_HEAD(&ei->i_orphan);
675 #ifdef CONFIG_EXT4_FS_XATTR
676 	init_rwsem(&ei->xattr_sem);
677 #endif
678 	init_rwsem(&ei->i_data_sem);
679 	inode_init_once(&ei->vfs_inode);
680 }
681 
682 static int init_inodecache(void)
683 {
684 	ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
685 					     sizeof(struct ext4_inode_info),
686 					     0, (SLAB_RECLAIM_ACCOUNT|
687 						SLAB_MEM_SPREAD),
688 					     init_once);
689 	if (ext4_inode_cachep == NULL)
690 		return -ENOMEM;
691 	return 0;
692 }
693 
694 static void destroy_inodecache(void)
695 {
696 	kmem_cache_destroy(ext4_inode_cachep);
697 }
698 
699 static void ext4_clear_inode(struct inode *inode)
700 {
701 #ifdef CONFIG_EXT4_FS_POSIX_ACL
702 	if (EXT4_I(inode)->i_acl &&
703 			EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
704 		posix_acl_release(EXT4_I(inode)->i_acl);
705 		EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED;
706 	}
707 	if (EXT4_I(inode)->i_default_acl &&
708 			EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) {
709 		posix_acl_release(EXT4_I(inode)->i_default_acl);
710 		EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
711 	}
712 #endif
713 	ext4_discard_preallocations(inode);
714 	if (EXT4_JOURNAL(inode))
715 		jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
716 				       &EXT4_I(inode)->jinode);
717 }
718 
719 static inline void ext4_show_quota_options(struct seq_file *seq,
720 					   struct super_block *sb)
721 {
722 #if defined(CONFIG_QUOTA)
723 	struct ext4_sb_info *sbi = EXT4_SB(sb);
724 
725 	if (sbi->s_jquota_fmt)
726 		seq_printf(seq, ",jqfmt=%s",
727 		(sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0");
728 
729 	if (sbi->s_qf_names[USRQUOTA])
730 		seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
731 
732 	if (sbi->s_qf_names[GRPQUOTA])
733 		seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
734 
735 	if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA)
736 		seq_puts(seq, ",usrquota");
737 
738 	if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)
739 		seq_puts(seq, ",grpquota");
740 #endif
741 }
742 
743 /*
744  * Show an option if
745  *  - it's set to a non-default value OR
746  *  - if the per-sb default is different from the global default
747  */
748 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
749 {
750 	int def_errors;
751 	unsigned long def_mount_opts;
752 	struct super_block *sb = vfs->mnt_sb;
753 	struct ext4_sb_info *sbi = EXT4_SB(sb);
754 	struct ext4_super_block *es = sbi->s_es;
755 
756 	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
757 	def_errors     = le16_to_cpu(es->s_errors);
758 
759 	if (sbi->s_sb_block != 1)
760 		seq_printf(seq, ",sb=%llu", sbi->s_sb_block);
761 	if (test_opt(sb, MINIX_DF))
762 		seq_puts(seq, ",minixdf");
763 	if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS))
764 		seq_puts(seq, ",grpid");
765 	if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS))
766 		seq_puts(seq, ",nogrpid");
767 	if (sbi->s_resuid != EXT4_DEF_RESUID ||
768 	    le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) {
769 		seq_printf(seq, ",resuid=%u", sbi->s_resuid);
770 	}
771 	if (sbi->s_resgid != EXT4_DEF_RESGID ||
772 	    le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) {
773 		seq_printf(seq, ",resgid=%u", sbi->s_resgid);
774 	}
775 	if (test_opt(sb, ERRORS_RO)) {
776 		if (def_errors == EXT4_ERRORS_PANIC ||
777 		    def_errors == EXT4_ERRORS_CONTINUE) {
778 			seq_puts(seq, ",errors=remount-ro");
779 		}
780 	}
781 	if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
782 		seq_puts(seq, ",errors=continue");
783 	if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
784 		seq_puts(seq, ",errors=panic");
785 	if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16))
786 		seq_puts(seq, ",nouid32");
787 	if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
788 		seq_puts(seq, ",debug");
789 	if (test_opt(sb, OLDALLOC))
790 		seq_puts(seq, ",oldalloc");
791 #ifdef CONFIG_EXT4_FS_XATTR
792 	if (test_opt(sb, XATTR_USER) &&
793 		!(def_mount_opts & EXT4_DEFM_XATTR_USER))
794 		seq_puts(seq, ",user_xattr");
795 	if (!test_opt(sb, XATTR_USER) &&
796 	    (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
797 		seq_puts(seq, ",nouser_xattr");
798 	}
799 #endif
800 #ifdef CONFIG_EXT4_FS_POSIX_ACL
801 	if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
802 		seq_puts(seq, ",acl");
803 	if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
804 		seq_puts(seq, ",noacl");
805 #endif
806 	if (!test_opt(sb, RESERVATION))
807 		seq_puts(seq, ",noreservation");
808 	if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
809 		seq_printf(seq, ",commit=%u",
810 			   (unsigned) (sbi->s_commit_interval / HZ));
811 	}
812 	if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
813 		seq_printf(seq, ",min_batch_time=%u",
814 			   (unsigned) sbi->s_min_batch_time);
815 	}
816 	if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
817 		seq_printf(seq, ",max_batch_time=%u",
818 			   (unsigned) sbi->s_min_batch_time);
819 	}
820 
821 	/*
822 	 * We're changing the default of barrier mount option, so
823 	 * let's always display its mount state so it's clear what its
824 	 * status is.
825 	 */
826 	seq_puts(seq, ",barrier=");
827 	seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
828 	if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
829 		seq_puts(seq, ",journal_async_commit");
830 	if (test_opt(sb, NOBH))
831 		seq_puts(seq, ",nobh");
832 	if (test_opt(sb, I_VERSION))
833 		seq_puts(seq, ",i_version");
834 	if (!test_opt(sb, DELALLOC))
835 		seq_puts(seq, ",nodelalloc");
836 
837 
838 	if (sbi->s_stripe)
839 		seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
840 	/*
841 	 * journal mode get enabled in different ways
842 	 * So just print the value even if we didn't specify it
843 	 */
844 	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
845 		seq_puts(seq, ",data=journal");
846 	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
847 		seq_puts(seq, ",data=ordered");
848 	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
849 		seq_puts(seq, ",data=writeback");
850 
851 	if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
852 		seq_printf(seq, ",inode_readahead_blks=%u",
853 			   sbi->s_inode_readahead_blks);
854 
855 	if (test_opt(sb, DATA_ERR_ABORT))
856 		seq_puts(seq, ",data_err=abort");
857 
858 	ext4_show_quota_options(seq, sb);
859 	return 0;
860 }
861 
862 
863 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
864 		u64 ino, u32 generation)
865 {
866 	struct inode *inode;
867 
868 	if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
869 		return ERR_PTR(-ESTALE);
870 	if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
871 		return ERR_PTR(-ESTALE);
872 
873 	/* iget isn't really right if the inode is currently unallocated!!
874 	 *
875 	 * ext4_read_inode will return a bad_inode if the inode had been
876 	 * deleted, so we should be safe.
877 	 *
878 	 * Currently we don't know the generation for parent directory, so
879 	 * a generation of 0 means "accept any"
880 	 */
881 	inode = ext4_iget(sb, ino);
882 	if (IS_ERR(inode))
883 		return ERR_CAST(inode);
884 	if (generation && inode->i_generation != generation) {
885 		iput(inode);
886 		return ERR_PTR(-ESTALE);
887 	}
888 
889 	return inode;
890 }
891 
892 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
893 		int fh_len, int fh_type)
894 {
895 	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
896 				    ext4_nfs_get_inode);
897 }
898 
899 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
900 		int fh_len, int fh_type)
901 {
902 	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
903 				    ext4_nfs_get_inode);
904 }
905 
906 /*
907  * Try to release metadata pages (indirect blocks, directories) which are
908  * mapped via the block device.  Since these pages could have journal heads
909  * which would prevent try_to_free_buffers() from freeing them, we must use
910  * jbd2 layer's try_to_free_buffers() function to release them.
911  */
912 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait)
913 {
914 	journal_t *journal = EXT4_SB(sb)->s_journal;
915 
916 	WARN_ON(PageChecked(page));
917 	if (!page_has_buffers(page))
918 		return 0;
919 	if (journal)
920 		return jbd2_journal_try_to_free_buffers(journal, page,
921 							wait & ~__GFP_WAIT);
922 	return try_to_free_buffers(page);
923 }
924 
925 #ifdef CONFIG_QUOTA
926 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
927 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
928 
929 static int ext4_write_dquot(struct dquot *dquot);
930 static int ext4_acquire_dquot(struct dquot *dquot);
931 static int ext4_release_dquot(struct dquot *dquot);
932 static int ext4_mark_dquot_dirty(struct dquot *dquot);
933 static int ext4_write_info(struct super_block *sb, int type);
934 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
935 				char *path, int remount);
936 static int ext4_quota_on_mount(struct super_block *sb, int type);
937 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
938 			       size_t len, loff_t off);
939 static ssize_t ext4_quota_write(struct super_block *sb, int type,
940 				const char *data, size_t len, loff_t off);
941 
942 static struct dquot_operations ext4_quota_operations = {
943 	.initialize	= dquot_initialize,
944 	.drop		= dquot_drop,
945 	.alloc_space	= dquot_alloc_space,
946 	.reserve_space	= dquot_reserve_space,
947 	.claim_space	= dquot_claim_space,
948 	.release_rsv	= dquot_release_reserved_space,
949 	.get_reserved_space = ext4_get_reserved_space,
950 	.alloc_inode	= dquot_alloc_inode,
951 	.free_space	= dquot_free_space,
952 	.free_inode	= dquot_free_inode,
953 	.transfer	= dquot_transfer,
954 	.write_dquot	= ext4_write_dquot,
955 	.acquire_dquot	= ext4_acquire_dquot,
956 	.release_dquot	= ext4_release_dquot,
957 	.mark_dirty	= ext4_mark_dquot_dirty,
958 	.write_info	= ext4_write_info,
959 	.alloc_dquot	= dquot_alloc,
960 	.destroy_dquot	= dquot_destroy,
961 };
962 
963 static struct quotactl_ops ext4_qctl_operations = {
964 	.quota_on	= ext4_quota_on,
965 	.quota_off	= vfs_quota_off,
966 	.quota_sync	= vfs_quota_sync,
967 	.get_info	= vfs_get_dqinfo,
968 	.set_info	= vfs_set_dqinfo,
969 	.get_dqblk	= vfs_get_dqblk,
970 	.set_dqblk	= vfs_set_dqblk
971 };
972 #endif
973 
974 static const struct super_operations ext4_sops = {
975 	.alloc_inode	= ext4_alloc_inode,
976 	.destroy_inode	= ext4_destroy_inode,
977 	.write_inode	= ext4_write_inode,
978 	.dirty_inode	= ext4_dirty_inode,
979 	.delete_inode	= ext4_delete_inode,
980 	.put_super	= ext4_put_super,
981 	.write_super	= ext4_write_super,
982 	.sync_fs	= ext4_sync_fs,
983 	.freeze_fs	= ext4_freeze,
984 	.unfreeze_fs	= ext4_unfreeze,
985 	.statfs		= ext4_statfs,
986 	.remount_fs	= ext4_remount,
987 	.clear_inode	= ext4_clear_inode,
988 	.show_options	= ext4_show_options,
989 #ifdef CONFIG_QUOTA
990 	.quota_read	= ext4_quota_read,
991 	.quota_write	= ext4_quota_write,
992 #endif
993 	.bdev_try_to_free_page = bdev_try_to_free_page,
994 };
995 
996 static const struct export_operations ext4_export_ops = {
997 	.fh_to_dentry = ext4_fh_to_dentry,
998 	.fh_to_parent = ext4_fh_to_parent,
999 	.get_parent = ext4_get_parent,
1000 };
1001 
1002 enum {
1003 	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1004 	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1005 	Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
1006 	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1007 	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
1008 	Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
1009 	Opt_journal_update, Opt_journal_dev,
1010 	Opt_journal_checksum, Opt_journal_async_commit,
1011 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1012 	Opt_data_err_abort, Opt_data_err_ignore,
1013 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1014 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
1015 	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
1016 	Opt_grpquota, Opt_i_version,
1017 	Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1018 	Opt_inode_readahead_blks, Opt_journal_ioprio
1019 };
1020 
1021 static const match_table_t tokens = {
1022 	{Opt_bsd_df, "bsddf"},
1023 	{Opt_minix_df, "minixdf"},
1024 	{Opt_grpid, "grpid"},
1025 	{Opt_grpid, "bsdgroups"},
1026 	{Opt_nogrpid, "nogrpid"},
1027 	{Opt_nogrpid, "sysvgroups"},
1028 	{Opt_resgid, "resgid=%u"},
1029 	{Opt_resuid, "resuid=%u"},
1030 	{Opt_sb, "sb=%u"},
1031 	{Opt_err_cont, "errors=continue"},
1032 	{Opt_err_panic, "errors=panic"},
1033 	{Opt_err_ro, "errors=remount-ro"},
1034 	{Opt_nouid32, "nouid32"},
1035 	{Opt_debug, "debug"},
1036 	{Opt_oldalloc, "oldalloc"},
1037 	{Opt_orlov, "orlov"},
1038 	{Opt_user_xattr, "user_xattr"},
1039 	{Opt_nouser_xattr, "nouser_xattr"},
1040 	{Opt_acl, "acl"},
1041 	{Opt_noacl, "noacl"},
1042 	{Opt_reservation, "reservation"},
1043 	{Opt_noreservation, "noreservation"},
1044 	{Opt_noload, "noload"},
1045 	{Opt_nobh, "nobh"},
1046 	{Opt_bh, "bh"},
1047 	{Opt_commit, "commit=%u"},
1048 	{Opt_min_batch_time, "min_batch_time=%u"},
1049 	{Opt_max_batch_time, "max_batch_time=%u"},
1050 	{Opt_journal_update, "journal=update"},
1051 	{Opt_journal_dev, "journal_dev=%u"},
1052 	{Opt_journal_checksum, "journal_checksum"},
1053 	{Opt_journal_async_commit, "journal_async_commit"},
1054 	{Opt_abort, "abort"},
1055 	{Opt_data_journal, "data=journal"},
1056 	{Opt_data_ordered, "data=ordered"},
1057 	{Opt_data_writeback, "data=writeback"},
1058 	{Opt_data_err_abort, "data_err=abort"},
1059 	{Opt_data_err_ignore, "data_err=ignore"},
1060 	{Opt_offusrjquota, "usrjquota="},
1061 	{Opt_usrjquota, "usrjquota=%s"},
1062 	{Opt_offgrpjquota, "grpjquota="},
1063 	{Opt_grpjquota, "grpjquota=%s"},
1064 	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1065 	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1066 	{Opt_grpquota, "grpquota"},
1067 	{Opt_noquota, "noquota"},
1068 	{Opt_quota, "quota"},
1069 	{Opt_usrquota, "usrquota"},
1070 	{Opt_barrier, "barrier=%u"},
1071 	{Opt_i_version, "i_version"},
1072 	{Opt_stripe, "stripe=%u"},
1073 	{Opt_resize, "resize"},
1074 	{Opt_delalloc, "delalloc"},
1075 	{Opt_nodelalloc, "nodelalloc"},
1076 	{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1077 	{Opt_journal_ioprio, "journal_ioprio=%u"},
1078 	{Opt_err, NULL},
1079 };
1080 
1081 static ext4_fsblk_t get_sb_block(void **data)
1082 {
1083 	ext4_fsblk_t	sb_block;
1084 	char		*options = (char *) *data;
1085 
1086 	if (!options || strncmp(options, "sb=", 3) != 0)
1087 		return 1;	/* Default location */
1088 	options += 3;
1089 	/*todo: use simple_strtoll with >32bit ext4 */
1090 	sb_block = simple_strtoul(options, &options, 0);
1091 	if (*options && *options != ',') {
1092 		printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1093 		       (char *) *data);
1094 		return 1;
1095 	}
1096 	if (*options == ',')
1097 		options++;
1098 	*data = (void *) options;
1099 	return sb_block;
1100 }
1101 
1102 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1103 
1104 static int parse_options(char *options, struct super_block *sb,
1105 			 unsigned long *journal_devnum,
1106 			 unsigned int *journal_ioprio,
1107 			 ext4_fsblk_t *n_blocks_count, int is_remount)
1108 {
1109 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1110 	char *p;
1111 	substring_t args[MAX_OPT_ARGS];
1112 	int data_opt = 0;
1113 	int option;
1114 #ifdef CONFIG_QUOTA
1115 	int qtype, qfmt;
1116 	char *qname;
1117 #endif
1118 
1119 	if (!options)
1120 		return 1;
1121 
1122 	while ((p = strsep(&options, ",")) != NULL) {
1123 		int token;
1124 		if (!*p)
1125 			continue;
1126 
1127 		token = match_token(p, tokens, args);
1128 		switch (token) {
1129 		case Opt_bsd_df:
1130 			clear_opt(sbi->s_mount_opt, MINIX_DF);
1131 			break;
1132 		case Opt_minix_df:
1133 			set_opt(sbi->s_mount_opt, MINIX_DF);
1134 			break;
1135 		case Opt_grpid:
1136 			set_opt(sbi->s_mount_opt, GRPID);
1137 			break;
1138 		case Opt_nogrpid:
1139 			clear_opt(sbi->s_mount_opt, GRPID);
1140 			break;
1141 		case Opt_resuid:
1142 			if (match_int(&args[0], &option))
1143 				return 0;
1144 			sbi->s_resuid = option;
1145 			break;
1146 		case Opt_resgid:
1147 			if (match_int(&args[0], &option))
1148 				return 0;
1149 			sbi->s_resgid = option;
1150 			break;
1151 		case Opt_sb:
1152 			/* handled by get_sb_block() instead of here */
1153 			/* *sb_block = match_int(&args[0]); */
1154 			break;
1155 		case Opt_err_panic:
1156 			clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1157 			clear_opt(sbi->s_mount_opt, ERRORS_RO);
1158 			set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1159 			break;
1160 		case Opt_err_ro:
1161 			clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1162 			clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1163 			set_opt(sbi->s_mount_opt, ERRORS_RO);
1164 			break;
1165 		case Opt_err_cont:
1166 			clear_opt(sbi->s_mount_opt, ERRORS_RO);
1167 			clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1168 			set_opt(sbi->s_mount_opt, ERRORS_CONT);
1169 			break;
1170 		case Opt_nouid32:
1171 			set_opt(sbi->s_mount_opt, NO_UID32);
1172 			break;
1173 		case Opt_debug:
1174 			set_opt(sbi->s_mount_opt, DEBUG);
1175 			break;
1176 		case Opt_oldalloc:
1177 			set_opt(sbi->s_mount_opt, OLDALLOC);
1178 			break;
1179 		case Opt_orlov:
1180 			clear_opt(sbi->s_mount_opt, OLDALLOC);
1181 			break;
1182 #ifdef CONFIG_EXT4_FS_XATTR
1183 		case Opt_user_xattr:
1184 			set_opt(sbi->s_mount_opt, XATTR_USER);
1185 			break;
1186 		case Opt_nouser_xattr:
1187 			clear_opt(sbi->s_mount_opt, XATTR_USER);
1188 			break;
1189 #else
1190 		case Opt_user_xattr:
1191 		case Opt_nouser_xattr:
1192 			printk(KERN_ERR "EXT4 (no)user_xattr options "
1193 			       "not supported\n");
1194 			break;
1195 #endif
1196 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1197 		case Opt_acl:
1198 			set_opt(sbi->s_mount_opt, POSIX_ACL);
1199 			break;
1200 		case Opt_noacl:
1201 			clear_opt(sbi->s_mount_opt, POSIX_ACL);
1202 			break;
1203 #else
1204 		case Opt_acl:
1205 		case Opt_noacl:
1206 			printk(KERN_ERR "EXT4 (no)acl options "
1207 			       "not supported\n");
1208 			break;
1209 #endif
1210 		case Opt_reservation:
1211 			set_opt(sbi->s_mount_opt, RESERVATION);
1212 			break;
1213 		case Opt_noreservation:
1214 			clear_opt(sbi->s_mount_opt, RESERVATION);
1215 			break;
1216 		case Opt_journal_update:
1217 			/* @@@ FIXME */
1218 			/* Eventually we will want to be able to create
1219 			   a journal file here.  For now, only allow the
1220 			   user to specify an existing inode to be the
1221 			   journal file. */
1222 			if (is_remount) {
1223 				printk(KERN_ERR "EXT4-fs: cannot specify "
1224 				       "journal on remount\n");
1225 				return 0;
1226 			}
1227 			set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
1228 			break;
1229 		case Opt_journal_dev:
1230 			if (is_remount) {
1231 				printk(KERN_ERR "EXT4-fs: cannot specify "
1232 				       "journal on remount\n");
1233 				return 0;
1234 			}
1235 			if (match_int(&args[0], &option))
1236 				return 0;
1237 			*journal_devnum = option;
1238 			break;
1239 		case Opt_journal_checksum:
1240 			set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1241 			break;
1242 		case Opt_journal_async_commit:
1243 			set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
1244 			set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1245 			break;
1246 		case Opt_noload:
1247 			set_opt(sbi->s_mount_opt, NOLOAD);
1248 			break;
1249 		case Opt_commit:
1250 			if (match_int(&args[0], &option))
1251 				return 0;
1252 			if (option < 0)
1253 				return 0;
1254 			if (option == 0)
1255 				option = JBD2_DEFAULT_MAX_COMMIT_AGE;
1256 			sbi->s_commit_interval = HZ * option;
1257 			break;
1258 		case Opt_max_batch_time:
1259 			if (match_int(&args[0], &option))
1260 				return 0;
1261 			if (option < 0)
1262 				return 0;
1263 			if (option == 0)
1264 				option = EXT4_DEF_MAX_BATCH_TIME;
1265 			sbi->s_max_batch_time = option;
1266 			break;
1267 		case Opt_min_batch_time:
1268 			if (match_int(&args[0], &option))
1269 				return 0;
1270 			if (option < 0)
1271 				return 0;
1272 			sbi->s_min_batch_time = option;
1273 			break;
1274 		case Opt_data_journal:
1275 			data_opt = EXT4_MOUNT_JOURNAL_DATA;
1276 			goto datacheck;
1277 		case Opt_data_ordered:
1278 			data_opt = EXT4_MOUNT_ORDERED_DATA;
1279 			goto datacheck;
1280 		case Opt_data_writeback:
1281 			data_opt = EXT4_MOUNT_WRITEBACK_DATA;
1282 		datacheck:
1283 			if (is_remount) {
1284 				if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
1285 						!= data_opt) {
1286 					printk(KERN_ERR
1287 						"EXT4-fs: cannot change data "
1288 						"mode on remount\n");
1289 					return 0;
1290 				}
1291 			} else {
1292 				sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS;
1293 				sbi->s_mount_opt |= data_opt;
1294 			}
1295 			break;
1296 		case Opt_data_err_abort:
1297 			set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1298 			break;
1299 		case Opt_data_err_ignore:
1300 			clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1301 			break;
1302 #ifdef CONFIG_QUOTA
1303 		case Opt_usrjquota:
1304 			qtype = USRQUOTA;
1305 			goto set_qf_name;
1306 		case Opt_grpjquota:
1307 			qtype = GRPQUOTA;
1308 set_qf_name:
1309 			if (sb_any_quota_loaded(sb) &&
1310 			    !sbi->s_qf_names[qtype]) {
1311 				printk(KERN_ERR
1312 				       "EXT4-fs: Cannot change journaled "
1313 				       "quota options when quota turned on.\n");
1314 				return 0;
1315 			}
1316 			qname = match_strdup(&args[0]);
1317 			if (!qname) {
1318 				printk(KERN_ERR
1319 					"EXT4-fs: not enough memory for "
1320 					"storing quotafile name.\n");
1321 				return 0;
1322 			}
1323 			if (sbi->s_qf_names[qtype] &&
1324 			    strcmp(sbi->s_qf_names[qtype], qname)) {
1325 				printk(KERN_ERR
1326 					"EXT4-fs: %s quota file already "
1327 					"specified.\n", QTYPE2NAME(qtype));
1328 				kfree(qname);
1329 				return 0;
1330 			}
1331 			sbi->s_qf_names[qtype] = qname;
1332 			if (strchr(sbi->s_qf_names[qtype], '/')) {
1333 				printk(KERN_ERR
1334 					"EXT4-fs: quotafile must be on "
1335 					"filesystem root.\n");
1336 				kfree(sbi->s_qf_names[qtype]);
1337 				sbi->s_qf_names[qtype] = NULL;
1338 				return 0;
1339 			}
1340 			set_opt(sbi->s_mount_opt, QUOTA);
1341 			break;
1342 		case Opt_offusrjquota:
1343 			qtype = USRQUOTA;
1344 			goto clear_qf_name;
1345 		case Opt_offgrpjquota:
1346 			qtype = GRPQUOTA;
1347 clear_qf_name:
1348 			if (sb_any_quota_loaded(sb) &&
1349 			    sbi->s_qf_names[qtype]) {
1350 				printk(KERN_ERR "EXT4-fs: Cannot change "
1351 					"journaled quota options when "
1352 					"quota turned on.\n");
1353 				return 0;
1354 			}
1355 			/*
1356 			 * The space will be released later when all options
1357 			 * are confirmed to be correct
1358 			 */
1359 			sbi->s_qf_names[qtype] = NULL;
1360 			break;
1361 		case Opt_jqfmt_vfsold:
1362 			qfmt = QFMT_VFS_OLD;
1363 			goto set_qf_format;
1364 		case Opt_jqfmt_vfsv0:
1365 			qfmt = QFMT_VFS_V0;
1366 set_qf_format:
1367 			if (sb_any_quota_loaded(sb) &&
1368 			    sbi->s_jquota_fmt != qfmt) {
1369 				printk(KERN_ERR "EXT4-fs: Cannot change "
1370 					"journaled quota options when "
1371 					"quota turned on.\n");
1372 				return 0;
1373 			}
1374 			sbi->s_jquota_fmt = qfmt;
1375 			break;
1376 		case Opt_quota:
1377 		case Opt_usrquota:
1378 			set_opt(sbi->s_mount_opt, QUOTA);
1379 			set_opt(sbi->s_mount_opt, USRQUOTA);
1380 			break;
1381 		case Opt_grpquota:
1382 			set_opt(sbi->s_mount_opt, QUOTA);
1383 			set_opt(sbi->s_mount_opt, GRPQUOTA);
1384 			break;
1385 		case Opt_noquota:
1386 			if (sb_any_quota_loaded(sb)) {
1387 				printk(KERN_ERR "EXT4-fs: Cannot change quota "
1388 					"options when quota turned on.\n");
1389 				return 0;
1390 			}
1391 			clear_opt(sbi->s_mount_opt, QUOTA);
1392 			clear_opt(sbi->s_mount_opt, USRQUOTA);
1393 			clear_opt(sbi->s_mount_opt, GRPQUOTA);
1394 			break;
1395 #else
1396 		case Opt_quota:
1397 		case Opt_usrquota:
1398 		case Opt_grpquota:
1399 			printk(KERN_ERR
1400 				"EXT4-fs: quota options not supported.\n");
1401 			break;
1402 		case Opt_usrjquota:
1403 		case Opt_grpjquota:
1404 		case Opt_offusrjquota:
1405 		case Opt_offgrpjquota:
1406 		case Opt_jqfmt_vfsold:
1407 		case Opt_jqfmt_vfsv0:
1408 			printk(KERN_ERR
1409 				"EXT4-fs: journaled quota options not "
1410 				"supported.\n");
1411 			break;
1412 		case Opt_noquota:
1413 			break;
1414 #endif
1415 		case Opt_abort:
1416 			set_opt(sbi->s_mount_opt, ABORT);
1417 			break;
1418 		case Opt_barrier:
1419 			if (match_int(&args[0], &option))
1420 				return 0;
1421 			if (option)
1422 				set_opt(sbi->s_mount_opt, BARRIER);
1423 			else
1424 				clear_opt(sbi->s_mount_opt, BARRIER);
1425 			break;
1426 		case Opt_ignore:
1427 			break;
1428 		case Opt_resize:
1429 			if (!is_remount) {
1430 				printk("EXT4-fs: resize option only available "
1431 					"for remount\n");
1432 				return 0;
1433 			}
1434 			if (match_int(&args[0], &option) != 0)
1435 				return 0;
1436 			*n_blocks_count = option;
1437 			break;
1438 		case Opt_nobh:
1439 			set_opt(sbi->s_mount_opt, NOBH);
1440 			break;
1441 		case Opt_bh:
1442 			clear_opt(sbi->s_mount_opt, NOBH);
1443 			break;
1444 		case Opt_i_version:
1445 			set_opt(sbi->s_mount_opt, I_VERSION);
1446 			sb->s_flags |= MS_I_VERSION;
1447 			break;
1448 		case Opt_nodelalloc:
1449 			clear_opt(sbi->s_mount_opt, DELALLOC);
1450 			break;
1451 		case Opt_stripe:
1452 			if (match_int(&args[0], &option))
1453 				return 0;
1454 			if (option < 0)
1455 				return 0;
1456 			sbi->s_stripe = option;
1457 			break;
1458 		case Opt_delalloc:
1459 			set_opt(sbi->s_mount_opt, DELALLOC);
1460 			break;
1461 		case Opt_inode_readahead_blks:
1462 			if (match_int(&args[0], &option))
1463 				return 0;
1464 			if (option < 0 || option > (1 << 30))
1465 				return 0;
1466 			sbi->s_inode_readahead_blks = option;
1467 			break;
1468 		case Opt_journal_ioprio:
1469 			if (match_int(&args[0], &option))
1470 				return 0;
1471 			if (option < 0 || option > 7)
1472 				break;
1473 			*journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
1474 							    option);
1475 			break;
1476 		default:
1477 			printk(KERN_ERR
1478 			       "EXT4-fs: Unrecognized mount option \"%s\" "
1479 			       "or missing value\n", p);
1480 			return 0;
1481 		}
1482 	}
1483 #ifdef CONFIG_QUOTA
1484 	if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1485 		if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) &&
1486 		     sbi->s_qf_names[USRQUOTA])
1487 			clear_opt(sbi->s_mount_opt, USRQUOTA);
1488 
1489 		if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) &&
1490 		     sbi->s_qf_names[GRPQUOTA])
1491 			clear_opt(sbi->s_mount_opt, GRPQUOTA);
1492 
1493 		if ((sbi->s_qf_names[USRQUOTA] &&
1494 				(sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
1495 		    (sbi->s_qf_names[GRPQUOTA] &&
1496 				(sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
1497 			printk(KERN_ERR "EXT4-fs: old and new quota "
1498 					"format mixing.\n");
1499 			return 0;
1500 		}
1501 
1502 		if (!sbi->s_jquota_fmt) {
1503 			printk(KERN_ERR "EXT4-fs: journaled quota format "
1504 					"not specified.\n");
1505 			return 0;
1506 		}
1507 	} else {
1508 		if (sbi->s_jquota_fmt) {
1509 			printk(KERN_ERR "EXT4-fs: journaled quota format "
1510 					"specified with no journaling "
1511 					"enabled.\n");
1512 			return 0;
1513 		}
1514 	}
1515 #endif
1516 	return 1;
1517 }
1518 
1519 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1520 			    int read_only)
1521 {
1522 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1523 	int res = 0;
1524 
1525 	if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1526 		printk(KERN_ERR "EXT4-fs warning: revision level too high, "
1527 		       "forcing read-only mode\n");
1528 		res = MS_RDONLY;
1529 	}
1530 	if (read_only)
1531 		return res;
1532 	if (!(sbi->s_mount_state & EXT4_VALID_FS))
1533 		printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
1534 		       "running e2fsck is recommended\n");
1535 	else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1536 		printk(KERN_WARNING
1537 		       "EXT4-fs warning: mounting fs with errors, "
1538 		       "running e2fsck is recommended\n");
1539 	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1540 		 le16_to_cpu(es->s_mnt_count) >=
1541 		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1542 		printk(KERN_WARNING
1543 		       "EXT4-fs warning: maximal mount count reached, "
1544 		       "running e2fsck is recommended\n");
1545 	else if (le32_to_cpu(es->s_checkinterval) &&
1546 		(le32_to_cpu(es->s_lastcheck) +
1547 			le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1548 		printk(KERN_WARNING
1549 		       "EXT4-fs warning: checktime reached, "
1550 		       "running e2fsck is recommended\n");
1551 	if (!sbi->s_journal)
1552 		es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1553 	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1554 		es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1555 	le16_add_cpu(&es->s_mnt_count, 1);
1556 	es->s_mtime = cpu_to_le32(get_seconds());
1557 	ext4_update_dynamic_rev(sb);
1558 	if (sbi->s_journal)
1559 		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1560 
1561 	ext4_commit_super(sb, es, 1);
1562 	if (test_opt(sb, DEBUG))
1563 		printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1564 				"bpg=%lu, ipg=%lu, mo=%04lx]\n",
1565 			sb->s_blocksize,
1566 			sbi->s_groups_count,
1567 			EXT4_BLOCKS_PER_GROUP(sb),
1568 			EXT4_INODES_PER_GROUP(sb),
1569 			sbi->s_mount_opt);
1570 
1571 	if (EXT4_SB(sb)->s_journal) {
1572 		printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
1573 		       sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
1574 		       "external", EXT4_SB(sb)->s_journal->j_devname);
1575 	} else {
1576 		printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id);
1577 	}
1578 	return res;
1579 }
1580 
1581 static int ext4_fill_flex_info(struct super_block *sb)
1582 {
1583 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1584 	struct ext4_group_desc *gdp = NULL;
1585 	struct buffer_head *bh;
1586 	ext4_group_t flex_group_count;
1587 	ext4_group_t flex_group;
1588 	int groups_per_flex = 0;
1589 	int i;
1590 
1591 	if (!sbi->s_es->s_log_groups_per_flex) {
1592 		sbi->s_log_groups_per_flex = 0;
1593 		return 1;
1594 	}
1595 
1596 	sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1597 	groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1598 
1599 	/* We allocate both existing and potentially added groups */
1600 	flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1601 			((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1602 			      EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1603 	sbi->s_flex_groups = kzalloc(flex_group_count *
1604 				     sizeof(struct flex_groups), GFP_KERNEL);
1605 	if (sbi->s_flex_groups == NULL) {
1606 		printk(KERN_ERR "EXT4-fs: not enough memory for "
1607 				"%u flex groups\n", flex_group_count);
1608 		goto failed;
1609 	}
1610 
1611 	for (i = 0; i < sbi->s_groups_count; i++) {
1612 		gdp = ext4_get_group_desc(sb, i, &bh);
1613 
1614 		flex_group = ext4_flex_group(sbi, i);
1615 		sbi->s_flex_groups[flex_group].free_inodes +=
1616 			ext4_free_inodes_count(sb, gdp);
1617 		sbi->s_flex_groups[flex_group].free_blocks +=
1618 			ext4_free_blks_count(sb, gdp);
1619 	}
1620 
1621 	return 1;
1622 failed:
1623 	return 0;
1624 }
1625 
1626 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
1627 			    struct ext4_group_desc *gdp)
1628 {
1629 	__u16 crc = 0;
1630 
1631 	if (sbi->s_es->s_feature_ro_compat &
1632 	    cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
1633 		int offset = offsetof(struct ext4_group_desc, bg_checksum);
1634 		__le32 le_group = cpu_to_le32(block_group);
1635 
1636 		crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
1637 		crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
1638 		crc = crc16(crc, (__u8 *)gdp, offset);
1639 		offset += sizeof(gdp->bg_checksum); /* skip checksum */
1640 		/* for checksum of struct ext4_group_desc do the rest...*/
1641 		if ((sbi->s_es->s_feature_incompat &
1642 		     cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
1643 		    offset < le16_to_cpu(sbi->s_es->s_desc_size))
1644 			crc = crc16(crc, (__u8 *)gdp + offset,
1645 				    le16_to_cpu(sbi->s_es->s_desc_size) -
1646 					offset);
1647 	}
1648 
1649 	return cpu_to_le16(crc);
1650 }
1651 
1652 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
1653 				struct ext4_group_desc *gdp)
1654 {
1655 	if ((sbi->s_es->s_feature_ro_compat &
1656 	     cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
1657 	    (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
1658 		return 0;
1659 
1660 	return 1;
1661 }
1662 
1663 /* Called at mount-time, super-block is locked */
1664 static int ext4_check_descriptors(struct super_block *sb)
1665 {
1666 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1667 	ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
1668 	ext4_fsblk_t last_block;
1669 	ext4_fsblk_t block_bitmap;
1670 	ext4_fsblk_t inode_bitmap;
1671 	ext4_fsblk_t inode_table;
1672 	int flexbg_flag = 0;
1673 	ext4_group_t i;
1674 
1675 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
1676 		flexbg_flag = 1;
1677 
1678 	ext4_debug("Checking group descriptors");
1679 
1680 	for (i = 0; i < sbi->s_groups_count; i++) {
1681 		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1682 
1683 		if (i == sbi->s_groups_count - 1 || flexbg_flag)
1684 			last_block = ext4_blocks_count(sbi->s_es) - 1;
1685 		else
1686 			last_block = first_block +
1687 				(EXT4_BLOCKS_PER_GROUP(sb) - 1);
1688 
1689 		block_bitmap = ext4_block_bitmap(sb, gdp);
1690 		if (block_bitmap < first_block || block_bitmap > last_block) {
1691 			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1692 			       "Block bitmap for group %u not in group "
1693 			       "(block %llu)!\n", i, block_bitmap);
1694 			return 0;
1695 		}
1696 		inode_bitmap = ext4_inode_bitmap(sb, gdp);
1697 		if (inode_bitmap < first_block || inode_bitmap > last_block) {
1698 			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1699 			       "Inode bitmap for group %u not in group "
1700 			       "(block %llu)!\n", i, inode_bitmap);
1701 			return 0;
1702 		}
1703 		inode_table = ext4_inode_table(sb, gdp);
1704 		if (inode_table < first_block ||
1705 		    inode_table + sbi->s_itb_per_group - 1 > last_block) {
1706 			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1707 			       "Inode table for group %u not in group "
1708 			       "(block %llu)!\n", i, inode_table);
1709 			return 0;
1710 		}
1711 		spin_lock(sb_bgl_lock(sbi, i));
1712 		if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1713 			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1714 			       "Checksum for group %u failed (%u!=%u)\n",
1715 			       i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1716 			       gdp)), le16_to_cpu(gdp->bg_checksum));
1717 			if (!(sb->s_flags & MS_RDONLY)) {
1718 				spin_unlock(sb_bgl_lock(sbi, i));
1719 				return 0;
1720 			}
1721 		}
1722 		spin_unlock(sb_bgl_lock(sbi, i));
1723 		if (!flexbg_flag)
1724 			first_block += EXT4_BLOCKS_PER_GROUP(sb);
1725 	}
1726 
1727 	ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
1728 	sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
1729 	return 1;
1730 }
1731 
1732 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
1733  * the superblock) which were deleted from all directories, but held open by
1734  * a process at the time of a crash.  We walk the list and try to delete these
1735  * inodes at recovery time (only with a read-write filesystem).
1736  *
1737  * In order to keep the orphan inode chain consistent during traversal (in
1738  * case of crash during recovery), we link each inode into the superblock
1739  * orphan list_head and handle it the same way as an inode deletion during
1740  * normal operation (which journals the operations for us).
1741  *
1742  * We only do an iget() and an iput() on each inode, which is very safe if we
1743  * accidentally point at an in-use or already deleted inode.  The worst that
1744  * can happen in this case is that we get a "bit already cleared" message from
1745  * ext4_free_inode().  The only reason we would point at a wrong inode is if
1746  * e2fsck was run on this filesystem, and it must have already done the orphan
1747  * inode cleanup for us, so we can safely abort without any further action.
1748  */
1749 static void ext4_orphan_cleanup(struct super_block *sb,
1750 				struct ext4_super_block *es)
1751 {
1752 	unsigned int s_flags = sb->s_flags;
1753 	int nr_orphans = 0, nr_truncates = 0;
1754 #ifdef CONFIG_QUOTA
1755 	int i;
1756 #endif
1757 	if (!es->s_last_orphan) {
1758 		jbd_debug(4, "no orphan inodes to clean up\n");
1759 		return;
1760 	}
1761 
1762 	if (bdev_read_only(sb->s_bdev)) {
1763 		printk(KERN_ERR "EXT4-fs: write access "
1764 			"unavailable, skipping orphan cleanup.\n");
1765 		return;
1766 	}
1767 
1768 	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
1769 		if (es->s_last_orphan)
1770 			jbd_debug(1, "Errors on filesystem, "
1771 				  "clearing orphan list.\n");
1772 		es->s_last_orphan = 0;
1773 		jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1774 		return;
1775 	}
1776 
1777 	if (s_flags & MS_RDONLY) {
1778 		printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n",
1779 		       sb->s_id);
1780 		sb->s_flags &= ~MS_RDONLY;
1781 	}
1782 #ifdef CONFIG_QUOTA
1783 	/* Needed for iput() to work correctly and not trash data */
1784 	sb->s_flags |= MS_ACTIVE;
1785 	/* Turn on quotas so that they are updated correctly */
1786 	for (i = 0; i < MAXQUOTAS; i++) {
1787 		if (EXT4_SB(sb)->s_qf_names[i]) {
1788 			int ret = ext4_quota_on_mount(sb, i);
1789 			if (ret < 0)
1790 				printk(KERN_ERR
1791 					"EXT4-fs: Cannot turn on journaled "
1792 					"quota: error %d\n", ret);
1793 		}
1794 	}
1795 #endif
1796 
1797 	while (es->s_last_orphan) {
1798 		struct inode *inode;
1799 
1800 		inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1801 		if (IS_ERR(inode)) {
1802 			es->s_last_orphan = 0;
1803 			break;
1804 		}
1805 
1806 		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
1807 		vfs_dq_init(inode);
1808 		if (inode->i_nlink) {
1809 			printk(KERN_DEBUG
1810 				"%s: truncating inode %lu to %lld bytes\n",
1811 				__func__, inode->i_ino, inode->i_size);
1812 			jbd_debug(2, "truncating inode %lu to %lld bytes\n",
1813 				  inode->i_ino, inode->i_size);
1814 			ext4_truncate(inode);
1815 			nr_truncates++;
1816 		} else {
1817 			printk(KERN_DEBUG
1818 				"%s: deleting unreferenced inode %lu\n",
1819 				__func__, inode->i_ino);
1820 			jbd_debug(2, "deleting unreferenced inode %lu\n",
1821 				  inode->i_ino);
1822 			nr_orphans++;
1823 		}
1824 		iput(inode);  /* The delete magic happens here! */
1825 	}
1826 
1827 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
1828 
1829 	if (nr_orphans)
1830 		printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
1831 		       sb->s_id, PLURAL(nr_orphans));
1832 	if (nr_truncates)
1833 		printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n",
1834 		       sb->s_id, PLURAL(nr_truncates));
1835 #ifdef CONFIG_QUOTA
1836 	/* Turn quotas off */
1837 	for (i = 0; i < MAXQUOTAS; i++) {
1838 		if (sb_dqopt(sb)->files[i])
1839 			vfs_quota_off(sb, i, 0);
1840 	}
1841 #endif
1842 	sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1843 }
1844 /*
1845  * Maximal extent format file size.
1846  * Resulting logical blkno at s_maxbytes must fit in our on-disk
1847  * extent format containers, within a sector_t, and within i_blocks
1848  * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
1849  * so that won't be a limiting factor.
1850  *
1851  * Note, this does *not* consider any metadata overhead for vfs i_blocks.
1852  */
1853 static loff_t ext4_max_size(int blkbits, int has_huge_files)
1854 {
1855 	loff_t res;
1856 	loff_t upper_limit = MAX_LFS_FILESIZE;
1857 
1858 	/* small i_blocks in vfs inode? */
1859 	if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1860 		/*
1861 		 * CONFIG_LBD is not enabled implies the inode
1862 		 * i_block represent total blocks in 512 bytes
1863 		 * 32 == size of vfs inode i_blocks * 8
1864 		 */
1865 		upper_limit = (1LL << 32) - 1;
1866 
1867 		/* total blocks in file system block size */
1868 		upper_limit >>= (blkbits - 9);
1869 		upper_limit <<= blkbits;
1870 	}
1871 
1872 	/* 32-bit extent-start container, ee_block */
1873 	res = 1LL << 32;
1874 	res <<= blkbits;
1875 	res -= 1;
1876 
1877 	/* Sanity check against vm- & vfs- imposed limits */
1878 	if (res > upper_limit)
1879 		res = upper_limit;
1880 
1881 	return res;
1882 }
1883 
1884 /*
1885  * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
1886  * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
1887  * We need to be 1 filesystem block less than the 2^48 sector limit.
1888  */
1889 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1890 {
1891 	loff_t res = EXT4_NDIR_BLOCKS;
1892 	int meta_blocks;
1893 	loff_t upper_limit;
1894 	/* This is calculated to be the largest file size for a
1895 	 * dense, bitmapped file such that the total number of
1896 	 * sectors in the file, including data and all indirect blocks,
1897 	 * does not exceed 2^48 -1
1898 	 * __u32 i_blocks_lo and _u16 i_blocks_high representing the
1899 	 * total number of  512 bytes blocks of the file
1900 	 */
1901 
1902 	if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1903 		/*
1904 		 * !has_huge_files or CONFIG_LBD is not enabled
1905 		 * implies the inode i_block represent total blocks in
1906 		 * 512 bytes 32 == size of vfs inode i_blocks * 8
1907 		 */
1908 		upper_limit = (1LL << 32) - 1;
1909 
1910 		/* total blocks in file system block size */
1911 		upper_limit >>= (bits - 9);
1912 
1913 	} else {
1914 		/*
1915 		 * We use 48 bit ext4_inode i_blocks
1916 		 * With EXT4_HUGE_FILE_FL set the i_blocks
1917 		 * represent total number of blocks in
1918 		 * file system block size
1919 		 */
1920 		upper_limit = (1LL << 48) - 1;
1921 
1922 	}
1923 
1924 	/* indirect blocks */
1925 	meta_blocks = 1;
1926 	/* double indirect blocks */
1927 	meta_blocks += 1 + (1LL << (bits-2));
1928 	/* tripple indirect blocks */
1929 	meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
1930 
1931 	upper_limit -= meta_blocks;
1932 	upper_limit <<= bits;
1933 
1934 	res += 1LL << (bits-2);
1935 	res += 1LL << (2*(bits-2));
1936 	res += 1LL << (3*(bits-2));
1937 	res <<= bits;
1938 	if (res > upper_limit)
1939 		res = upper_limit;
1940 
1941 	if (res > MAX_LFS_FILESIZE)
1942 		res = MAX_LFS_FILESIZE;
1943 
1944 	return res;
1945 }
1946 
1947 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
1948 				ext4_fsblk_t logical_sb_block, int nr)
1949 {
1950 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1951 	ext4_group_t bg, first_meta_bg;
1952 	int has_super = 0;
1953 
1954 	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
1955 
1956 	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
1957 	    nr < first_meta_bg)
1958 		return logical_sb_block + nr + 1;
1959 	bg = sbi->s_desc_per_block * nr;
1960 	if (ext4_bg_has_super(sb, bg))
1961 		has_super = 1;
1962 	return (has_super + ext4_group_first_block_no(sb, bg));
1963 }
1964 
1965 /**
1966  * ext4_get_stripe_size: Get the stripe size.
1967  * @sbi: In memory super block info
1968  *
1969  * If we have specified it via mount option, then
1970  * use the mount option value. If the value specified at mount time is
1971  * greater than the blocks per group use the super block value.
1972  * If the super block value is greater than blocks per group return 0.
1973  * Allocator needs it be less than blocks per group.
1974  *
1975  */
1976 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
1977 {
1978 	unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
1979 	unsigned long stripe_width =
1980 			le32_to_cpu(sbi->s_es->s_raid_stripe_width);
1981 
1982 	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
1983 		return sbi->s_stripe;
1984 
1985 	if (stripe_width <= sbi->s_blocks_per_group)
1986 		return stripe_width;
1987 
1988 	if (stride <= sbi->s_blocks_per_group)
1989 		return stride;
1990 
1991 	return 0;
1992 }
1993 
1994 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1995 				__releases(kernel_lock)
1996 				__acquires(kernel_lock)
1997 
1998 {
1999 	struct buffer_head *bh;
2000 	struct ext4_super_block *es = NULL;
2001 	struct ext4_sb_info *sbi;
2002 	ext4_fsblk_t block;
2003 	ext4_fsblk_t sb_block = get_sb_block(&data);
2004 	ext4_fsblk_t logical_sb_block;
2005 	unsigned long offset = 0;
2006 	unsigned long journal_devnum = 0;
2007 	unsigned long def_mount_opts;
2008 	struct inode *root;
2009 	char *cp;
2010 	const char *descr;
2011 	int ret = -EINVAL;
2012 	int blocksize;
2013 	unsigned int db_count;
2014 	unsigned int i;
2015 	int needs_recovery, has_huge_files;
2016 	int features;
2017 	__u64 blocks_count;
2018 	int err;
2019 	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
2020 
2021 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
2022 	if (!sbi)
2023 		return -ENOMEM;
2024 	sb->s_fs_info = sbi;
2025 	sbi->s_mount_opt = 0;
2026 	sbi->s_resuid = EXT4_DEF_RESUID;
2027 	sbi->s_resgid = EXT4_DEF_RESGID;
2028 	sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
2029 	sbi->s_sb_block = sb_block;
2030 
2031 	unlock_kernel();
2032 
2033 	/* Cleanup superblock name */
2034 	for (cp = sb->s_id; (cp = strchr(cp, '/'));)
2035 		*cp = '!';
2036 
2037 	blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
2038 	if (!blocksize) {
2039 		printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
2040 		goto out_fail;
2041 	}
2042 
2043 	/*
2044 	 * The ext4 superblock will not be buffer aligned for other than 1kB
2045 	 * block sizes.  We need to calculate the offset from buffer start.
2046 	 */
2047 	if (blocksize != EXT4_MIN_BLOCK_SIZE) {
2048 		logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2049 		offset = do_div(logical_sb_block, blocksize);
2050 	} else {
2051 		logical_sb_block = sb_block;
2052 	}
2053 
2054 	if (!(bh = sb_bread(sb, logical_sb_block))) {
2055 		printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
2056 		goto out_fail;
2057 	}
2058 	/*
2059 	 * Note: s_es must be initialized as soon as possible because
2060 	 *       some ext4 macro-instructions depend on its value
2061 	 */
2062 	es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
2063 	sbi->s_es = es;
2064 	sb->s_magic = le16_to_cpu(es->s_magic);
2065 	if (sb->s_magic != EXT4_SUPER_MAGIC)
2066 		goto cantfind_ext4;
2067 
2068 	/* Set defaults before we parse the mount options */
2069 	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
2070 	if (def_mount_opts & EXT4_DEFM_DEBUG)
2071 		set_opt(sbi->s_mount_opt, DEBUG);
2072 	if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
2073 		set_opt(sbi->s_mount_opt, GRPID);
2074 	if (def_mount_opts & EXT4_DEFM_UID16)
2075 		set_opt(sbi->s_mount_opt, NO_UID32);
2076 #ifdef CONFIG_EXT4_FS_XATTR
2077 	if (def_mount_opts & EXT4_DEFM_XATTR_USER)
2078 		set_opt(sbi->s_mount_opt, XATTR_USER);
2079 #endif
2080 #ifdef CONFIG_EXT4_FS_POSIX_ACL
2081 	if (def_mount_opts & EXT4_DEFM_ACL)
2082 		set_opt(sbi->s_mount_opt, POSIX_ACL);
2083 #endif
2084 	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
2085 		sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
2086 	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
2087 		sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
2088 	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
2089 		sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA;
2090 
2091 	if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
2092 		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
2093 	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
2094 		set_opt(sbi->s_mount_opt, ERRORS_CONT);
2095 	else
2096 		set_opt(sbi->s_mount_opt, ERRORS_RO);
2097 
2098 	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
2099 	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
2100 	sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
2101 	sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
2102 	sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
2103 
2104 	set_opt(sbi->s_mount_opt, RESERVATION);
2105 	set_opt(sbi->s_mount_opt, BARRIER);
2106 
2107 	/*
2108 	 * enable delayed allocation by default
2109 	 * Use -o nodelalloc to turn it off
2110 	 */
2111 	set_opt(sbi->s_mount_opt, DELALLOC);
2112 
2113 
2114 	if (!parse_options((char *) data, sb, &journal_devnum,
2115 			   &journal_ioprio, NULL, 0))
2116 		goto failed_mount;
2117 
2118 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2119 		((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
2120 
2121 	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
2122 	    (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
2123 	     EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
2124 	     EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
2125 		printk(KERN_WARNING
2126 		       "EXT4-fs warning: feature flags set on rev 0 fs, "
2127 		       "running e2fsck is recommended\n");
2128 
2129 	/*
2130 	 * Check feature flags regardless of the revision level, since we
2131 	 * previously didn't change the revision level when setting the flags,
2132 	 * so there is a chance incompat flags are set on a rev 0 filesystem.
2133 	 */
2134 	features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
2135 	if (features) {
2136 		printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
2137 		       "unsupported optional features (%x).\n", sb->s_id,
2138 			(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2139 			~EXT4_FEATURE_INCOMPAT_SUPP));
2140 		goto failed_mount;
2141 	}
2142 	features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
2143 	if (!(sb->s_flags & MS_RDONLY) && features) {
2144 		printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
2145 		       "unsupported optional features (%x).\n", sb->s_id,
2146 			(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2147 			~EXT4_FEATURE_RO_COMPAT_SUPP));
2148 		goto failed_mount;
2149 	}
2150 	has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
2151 				    EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
2152 	if (has_huge_files) {
2153 		/*
2154 		 * Large file size enabled file system can only be
2155 		 * mount if kernel is build with CONFIG_LBD
2156 		 */
2157 		if (sizeof(root->i_blocks) < sizeof(u64) &&
2158 				!(sb->s_flags & MS_RDONLY)) {
2159 			printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
2160 					"files cannot be mounted read-write "
2161 					"without CONFIG_LBD.\n", sb->s_id);
2162 			goto failed_mount;
2163 		}
2164 	}
2165 	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
2166 
2167 	if (blocksize < EXT4_MIN_BLOCK_SIZE ||
2168 	    blocksize > EXT4_MAX_BLOCK_SIZE) {
2169 		printk(KERN_ERR
2170 		       "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n",
2171 		       blocksize, sb->s_id);
2172 		goto failed_mount;
2173 	}
2174 
2175 	if (sb->s_blocksize != blocksize) {
2176 
2177 		/* Validate the filesystem blocksize */
2178 		if (!sb_set_blocksize(sb, blocksize)) {
2179 			printk(KERN_ERR "EXT4-fs: bad block size %d.\n",
2180 					blocksize);
2181 			goto failed_mount;
2182 		}
2183 
2184 		brelse(bh);
2185 		logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2186 		offset = do_div(logical_sb_block, blocksize);
2187 		bh = sb_bread(sb, logical_sb_block);
2188 		if (!bh) {
2189 			printk(KERN_ERR
2190 			       "EXT4-fs: Can't read superblock on 2nd try.\n");
2191 			goto failed_mount;
2192 		}
2193 		es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
2194 		sbi->s_es = es;
2195 		if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
2196 			printk(KERN_ERR
2197 			       "EXT4-fs: Magic mismatch, very weird !\n");
2198 			goto failed_mount;
2199 		}
2200 	}
2201 
2202 	sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
2203 						      has_huge_files);
2204 	sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
2205 
2206 	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
2207 		sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
2208 		sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
2209 	} else {
2210 		sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
2211 		sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
2212 		if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
2213 		    (!is_power_of_2(sbi->s_inode_size)) ||
2214 		    (sbi->s_inode_size > blocksize)) {
2215 			printk(KERN_ERR
2216 			       "EXT4-fs: unsupported inode size: %d\n",
2217 			       sbi->s_inode_size);
2218 			goto failed_mount;
2219 		}
2220 		if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
2221 			sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
2222 	}
2223 	sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
2224 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
2225 		if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
2226 		    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
2227 		    !is_power_of_2(sbi->s_desc_size)) {
2228 			printk(KERN_ERR
2229 			       "EXT4-fs: unsupported descriptor size %lu\n",
2230 			       sbi->s_desc_size);
2231 			goto failed_mount;
2232 		}
2233 	} else
2234 		sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
2235 	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
2236 	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
2237 	if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
2238 		goto cantfind_ext4;
2239 	sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
2240 	if (sbi->s_inodes_per_block == 0)
2241 		goto cantfind_ext4;
2242 	sbi->s_itb_per_group = sbi->s_inodes_per_group /
2243 					sbi->s_inodes_per_block;
2244 	sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
2245 	sbi->s_sbh = bh;
2246 	sbi->s_mount_state = le16_to_cpu(es->s_state);
2247 	sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
2248 	sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
2249 	for (i = 0; i < 4; i++)
2250 		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
2251 	sbi->s_def_hash_version = es->s_def_hash_version;
2252 	i = le32_to_cpu(es->s_flags);
2253 	if (i & EXT2_FLAGS_UNSIGNED_HASH)
2254 		sbi->s_hash_unsigned = 3;
2255 	else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
2256 #ifdef __CHAR_UNSIGNED__
2257 		es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
2258 		sbi->s_hash_unsigned = 3;
2259 #else
2260 		es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
2261 #endif
2262 		sb->s_dirt = 1;
2263 	}
2264 
2265 	if (sbi->s_blocks_per_group > blocksize * 8) {
2266 		printk(KERN_ERR
2267 		       "EXT4-fs: #blocks per group too big: %lu\n",
2268 		       sbi->s_blocks_per_group);
2269 		goto failed_mount;
2270 	}
2271 	if (sbi->s_inodes_per_group > blocksize * 8) {
2272 		printk(KERN_ERR
2273 		       "EXT4-fs: #inodes per group too big: %lu\n",
2274 		       sbi->s_inodes_per_group);
2275 		goto failed_mount;
2276 	}
2277 
2278 	if (ext4_blocks_count(es) >
2279 		    (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
2280 		printk(KERN_ERR "EXT4-fs: filesystem on %s:"
2281 			" too large to mount safely\n", sb->s_id);
2282 		if (sizeof(sector_t) < 8)
2283 			printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not "
2284 					"enabled\n");
2285 		goto failed_mount;
2286 	}
2287 
2288 	if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
2289 		goto cantfind_ext4;
2290 
2291         /*
2292          * It makes no sense for the first data block to be beyond the end
2293          * of the filesystem.
2294          */
2295         if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
2296                 printk(KERN_WARNING "EXT4-fs: bad geometry: first data"
2297 		       "block %u is beyond end of filesystem (%llu)\n",
2298 		       le32_to_cpu(es->s_first_data_block),
2299 		       ext4_blocks_count(es));
2300 		goto failed_mount;
2301 	}
2302 	blocks_count = (ext4_blocks_count(es) -
2303 			le32_to_cpu(es->s_first_data_block) +
2304 			EXT4_BLOCKS_PER_GROUP(sb) - 1);
2305 	do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
2306 	if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
2307 		printk(KERN_WARNING "EXT4-fs: groups count too large: %u "
2308 		       "(block count %llu, first data block %u, "
2309 		       "blocks per group %lu)\n", sbi->s_groups_count,
2310 		       ext4_blocks_count(es),
2311 		       le32_to_cpu(es->s_first_data_block),
2312 		       EXT4_BLOCKS_PER_GROUP(sb));
2313 		goto failed_mount;
2314 	}
2315 	sbi->s_groups_count = blocks_count;
2316 	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
2317 		   EXT4_DESC_PER_BLOCK(sb);
2318 	sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
2319 				    GFP_KERNEL);
2320 	if (sbi->s_group_desc == NULL) {
2321 		printk(KERN_ERR "EXT4-fs: not enough memory\n");
2322 		goto failed_mount;
2323 	}
2324 
2325 #ifdef CONFIG_PROC_FS
2326 	if (ext4_proc_root)
2327 		sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
2328 
2329 	if (sbi->s_proc)
2330 		proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
2331 				 &ext4_ui_proc_fops,
2332 				 &sbi->s_inode_readahead_blks);
2333 #endif
2334 
2335 	bgl_lock_init(&sbi->s_blockgroup_lock);
2336 
2337 	for (i = 0; i < db_count; i++) {
2338 		block = descriptor_loc(sb, logical_sb_block, i);
2339 		sbi->s_group_desc[i] = sb_bread(sb, block);
2340 		if (!sbi->s_group_desc[i]) {
2341 			printk(KERN_ERR "EXT4-fs: "
2342 			       "can't read group descriptor %d\n", i);
2343 			db_count = i;
2344 			goto failed_mount2;
2345 		}
2346 	}
2347 	if (!ext4_check_descriptors(sb)) {
2348 		printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
2349 		goto failed_mount2;
2350 	}
2351 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2352 		if (!ext4_fill_flex_info(sb)) {
2353 			printk(KERN_ERR
2354 			       "EXT4-fs: unable to initialize "
2355 			       "flex_bg meta info!\n");
2356 			goto failed_mount2;
2357 		}
2358 
2359 	sbi->s_gdb_count = db_count;
2360 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
2361 	spin_lock_init(&sbi->s_next_gen_lock);
2362 
2363 	err = percpu_counter_init(&sbi->s_freeblocks_counter,
2364 			ext4_count_free_blocks(sb));
2365 	if (!err) {
2366 		err = percpu_counter_init(&sbi->s_freeinodes_counter,
2367 				ext4_count_free_inodes(sb));
2368 	}
2369 	if (!err) {
2370 		err = percpu_counter_init(&sbi->s_dirs_counter,
2371 				ext4_count_dirs(sb));
2372 	}
2373 	if (!err) {
2374 		err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
2375 	}
2376 	if (err) {
2377 		printk(KERN_ERR "EXT4-fs: insufficient memory\n");
2378 		goto failed_mount3;
2379 	}
2380 
2381 	sbi->s_stripe = ext4_get_stripe_size(sbi);
2382 
2383 	/*
2384 	 * set up enough so that it can read an inode
2385 	 */
2386 	sb->s_op = &ext4_sops;
2387 	sb->s_export_op = &ext4_export_ops;
2388 	sb->s_xattr = ext4_xattr_handlers;
2389 #ifdef CONFIG_QUOTA
2390 	sb->s_qcop = &ext4_qctl_operations;
2391 	sb->dq_op = &ext4_quota_operations;
2392 #endif
2393 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
2394 
2395 	sb->s_root = NULL;
2396 
2397 	needs_recovery = (es->s_last_orphan != 0 ||
2398 			  EXT4_HAS_INCOMPAT_FEATURE(sb,
2399 				    EXT4_FEATURE_INCOMPAT_RECOVER));
2400 
2401 	/*
2402 	 * The first inode we look at is the journal inode.  Don't try
2403 	 * root first: it may be modified in the journal!
2404 	 */
2405 	if (!test_opt(sb, NOLOAD) &&
2406 	    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2407 		if (ext4_load_journal(sb, es, journal_devnum))
2408 			goto failed_mount3;
2409 		if (!(sb->s_flags & MS_RDONLY) &&
2410 		    EXT4_SB(sb)->s_journal->j_failed_commit) {
2411 			printk(KERN_CRIT "EXT4-fs error (device %s): "
2412 			       "ext4_fill_super: Journal transaction "
2413 			       "%u is corrupt\n", sb->s_id,
2414 			       EXT4_SB(sb)->s_journal->j_failed_commit);
2415 			if (test_opt(sb, ERRORS_RO)) {
2416 				printk(KERN_CRIT
2417 				       "Mounting filesystem read-only\n");
2418 				sb->s_flags |= MS_RDONLY;
2419 				EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2420 				es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2421 			}
2422 			if (test_opt(sb, ERRORS_PANIC)) {
2423 				EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2424 				es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2425 				ext4_commit_super(sb, es, 1);
2426 				goto failed_mount4;
2427 			}
2428 		}
2429 	} else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
2430 	      EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2431 		printk(KERN_ERR "EXT4-fs: required journal recovery "
2432 		       "suppressed and not mounted read-only\n");
2433 		goto failed_mount4;
2434 	} else {
2435 		clear_opt(sbi->s_mount_opt, DATA_FLAGS);
2436 		set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
2437 		sbi->s_journal = NULL;
2438 		needs_recovery = 0;
2439 		goto no_journal;
2440 	}
2441 
2442 	if (ext4_blocks_count(es) > 0xffffffffULL &&
2443 	    !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
2444 				       JBD2_FEATURE_INCOMPAT_64BIT)) {
2445 		printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n");
2446 		goto failed_mount4;
2447 	}
2448 
2449 	if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
2450 		jbd2_journal_set_features(sbi->s_journal,
2451 				JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2452 				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2453 	} else if (test_opt(sb, JOURNAL_CHECKSUM)) {
2454 		jbd2_journal_set_features(sbi->s_journal,
2455 				JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
2456 		jbd2_journal_clear_features(sbi->s_journal, 0, 0,
2457 				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2458 	} else {
2459 		jbd2_journal_clear_features(sbi->s_journal,
2460 				JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2461 				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2462 	}
2463 
2464 	/* We have now updated the journal if required, so we can
2465 	 * validate the data journaling mode. */
2466 	switch (test_opt(sb, DATA_FLAGS)) {
2467 	case 0:
2468 		/* No mode set, assume a default based on the journal
2469 		 * capabilities: ORDERED_DATA if the journal can
2470 		 * cope, else JOURNAL_DATA
2471 		 */
2472 		if (jbd2_journal_check_available_features
2473 		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
2474 			set_opt(sbi->s_mount_opt, ORDERED_DATA);
2475 		else
2476 			set_opt(sbi->s_mount_opt, JOURNAL_DATA);
2477 		break;
2478 
2479 	case EXT4_MOUNT_ORDERED_DATA:
2480 	case EXT4_MOUNT_WRITEBACK_DATA:
2481 		if (!jbd2_journal_check_available_features
2482 		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
2483 			printk(KERN_ERR "EXT4-fs: Journal does not support "
2484 			       "requested data journaling mode\n");
2485 			goto failed_mount4;
2486 		}
2487 	default:
2488 		break;
2489 	}
2490 	set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
2491 
2492 no_journal:
2493 
2494 	if (test_opt(sb, NOBH)) {
2495 		if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
2496 			printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - "
2497 				"its supported only with writeback mode\n");
2498 			clear_opt(sbi->s_mount_opt, NOBH);
2499 		}
2500 	}
2501 	/*
2502 	 * The jbd2_journal_load will have done any necessary log recovery,
2503 	 * so we can safely mount the rest of the filesystem now.
2504 	 */
2505 
2506 	root = ext4_iget(sb, EXT4_ROOT_INO);
2507 	if (IS_ERR(root)) {
2508 		printk(KERN_ERR "EXT4-fs: get root inode failed\n");
2509 		ret = PTR_ERR(root);
2510 		goto failed_mount4;
2511 	}
2512 	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2513 		iput(root);
2514 		printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n");
2515 		goto failed_mount4;
2516 	}
2517 	sb->s_root = d_alloc_root(root);
2518 	if (!sb->s_root) {
2519 		printk(KERN_ERR "EXT4-fs: get root dentry failed\n");
2520 		iput(root);
2521 		ret = -ENOMEM;
2522 		goto failed_mount4;
2523 	}
2524 
2525 	ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
2526 
2527 	/* determine the minimum size of new large inodes, if present */
2528 	if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
2529 		sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2530 						     EXT4_GOOD_OLD_INODE_SIZE;
2531 		if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
2532 				       EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
2533 			if (sbi->s_want_extra_isize <
2534 			    le16_to_cpu(es->s_want_extra_isize))
2535 				sbi->s_want_extra_isize =
2536 					le16_to_cpu(es->s_want_extra_isize);
2537 			if (sbi->s_want_extra_isize <
2538 			    le16_to_cpu(es->s_min_extra_isize))
2539 				sbi->s_want_extra_isize =
2540 					le16_to_cpu(es->s_min_extra_isize);
2541 		}
2542 	}
2543 	/* Check if enough inode space is available */
2544 	if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
2545 							sbi->s_inode_size) {
2546 		sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2547 						       EXT4_GOOD_OLD_INODE_SIZE;
2548 		printk(KERN_INFO "EXT4-fs: required extra inode space not"
2549 			"available.\n");
2550 	}
2551 
2552 	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2553 		printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
2554 				"requested data journaling mode\n");
2555 		clear_opt(sbi->s_mount_opt, DELALLOC);
2556 	} else if (test_opt(sb, DELALLOC))
2557 		printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
2558 
2559 	ext4_ext_init(sb);
2560 	err = ext4_mb_init(sb, needs_recovery);
2561 	if (err) {
2562 		printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
2563 		       err);
2564 		goto failed_mount4;
2565 	}
2566 
2567 	/*
2568 	 * akpm: core read_super() calls in here with the superblock locked.
2569 	 * That deadlocks, because orphan cleanup needs to lock the superblock
2570 	 * in numerous places.  Here we just pop the lock - it's relatively
2571 	 * harmless, because we are now ready to accept write_super() requests,
2572 	 * and aviro says that's the only reason for hanging onto the
2573 	 * superblock lock.
2574 	 */
2575 	EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
2576 	ext4_orphan_cleanup(sb, es);
2577 	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
2578 	if (needs_recovery) {
2579 		printk(KERN_INFO "EXT4-fs: recovery complete.\n");
2580 		ext4_mark_recovery_complete(sb, es);
2581 	}
2582 	if (EXT4_SB(sb)->s_journal) {
2583 		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2584 			descr = " journalled data mode";
2585 		else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2586 			descr = " ordered data mode";
2587 		else
2588 			descr = " writeback data mode";
2589 	} else
2590 		descr = "out journal";
2591 
2592 	printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n",
2593 	       sb->s_id, descr);
2594 
2595 	lock_kernel();
2596 	return 0;
2597 
2598 cantfind_ext4:
2599 	if (!silent)
2600 		printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n",
2601 		       sb->s_id);
2602 	goto failed_mount;
2603 
2604 failed_mount4:
2605 	printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id);
2606 	if (sbi->s_journal) {
2607 		jbd2_journal_destroy(sbi->s_journal);
2608 		sbi->s_journal = NULL;
2609 	}
2610 failed_mount3:
2611 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
2612 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
2613 	percpu_counter_destroy(&sbi->s_dirs_counter);
2614 	percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
2615 failed_mount2:
2616 	for (i = 0; i < db_count; i++)
2617 		brelse(sbi->s_group_desc[i]);
2618 	kfree(sbi->s_group_desc);
2619 failed_mount:
2620 	if (sbi->s_proc) {
2621 		remove_proc_entry("inode_readahead_blks", sbi->s_proc);
2622 		remove_proc_entry(sb->s_id, ext4_proc_root);
2623 	}
2624 #ifdef CONFIG_QUOTA
2625 	for (i = 0; i < MAXQUOTAS; i++)
2626 		kfree(sbi->s_qf_names[i]);
2627 #endif
2628 	ext4_blkdev_remove(sbi);
2629 	brelse(bh);
2630 out_fail:
2631 	sb->s_fs_info = NULL;
2632 	kfree(sbi);
2633 	lock_kernel();
2634 	return ret;
2635 }
2636 
2637 /*
2638  * Setup any per-fs journal parameters now.  We'll do this both on
2639  * initial mount, once the journal has been initialised but before we've
2640  * done any recovery; and again on any subsequent remount.
2641  */
2642 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
2643 {
2644 	struct ext4_sb_info *sbi = EXT4_SB(sb);
2645 
2646 	journal->j_commit_interval = sbi->s_commit_interval;
2647 	journal->j_min_batch_time = sbi->s_min_batch_time;
2648 	journal->j_max_batch_time = sbi->s_max_batch_time;
2649 
2650 	spin_lock(&journal->j_state_lock);
2651 	if (test_opt(sb, BARRIER))
2652 		journal->j_flags |= JBD2_BARRIER;
2653 	else
2654 		journal->j_flags &= ~JBD2_BARRIER;
2655 	if (test_opt(sb, DATA_ERR_ABORT))
2656 		journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
2657 	else
2658 		journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
2659 	spin_unlock(&journal->j_state_lock);
2660 }
2661 
2662 static journal_t *ext4_get_journal(struct super_block *sb,
2663 				   unsigned int journal_inum)
2664 {
2665 	struct inode *journal_inode;
2666 	journal_t *journal;
2667 
2668 	BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2669 
2670 	/* First, test for the existence of a valid inode on disk.  Bad
2671 	 * things happen if we iget() an unused inode, as the subsequent
2672 	 * iput() will try to delete it. */
2673 
2674 	journal_inode = ext4_iget(sb, journal_inum);
2675 	if (IS_ERR(journal_inode)) {
2676 		printk(KERN_ERR "EXT4-fs: no journal found.\n");
2677 		return NULL;
2678 	}
2679 	if (!journal_inode->i_nlink) {
2680 		make_bad_inode(journal_inode);
2681 		iput(journal_inode);
2682 		printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n");
2683 		return NULL;
2684 	}
2685 
2686 	jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
2687 		  journal_inode, journal_inode->i_size);
2688 	if (!S_ISREG(journal_inode->i_mode)) {
2689 		printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
2690 		iput(journal_inode);
2691 		return NULL;
2692 	}
2693 
2694 	journal = jbd2_journal_init_inode(journal_inode);
2695 	if (!journal) {
2696 		printk(KERN_ERR "EXT4-fs: Could not load journal inode\n");
2697 		iput(journal_inode);
2698 		return NULL;
2699 	}
2700 	journal->j_private = sb;
2701 	ext4_init_journal_params(sb, journal);
2702 	return journal;
2703 }
2704 
2705 static journal_t *ext4_get_dev_journal(struct super_block *sb,
2706 				       dev_t j_dev)
2707 {
2708 	struct buffer_head *bh;
2709 	journal_t *journal;
2710 	ext4_fsblk_t start;
2711 	ext4_fsblk_t len;
2712 	int hblock, blocksize;
2713 	ext4_fsblk_t sb_block;
2714 	unsigned long offset;
2715 	struct ext4_super_block *es;
2716 	struct block_device *bdev;
2717 
2718 	BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2719 
2720 	bdev = ext4_blkdev_get(j_dev);
2721 	if (bdev == NULL)
2722 		return NULL;
2723 
2724 	if (bd_claim(bdev, sb)) {
2725 		printk(KERN_ERR
2726 			"EXT4-fs: failed to claim external journal device.\n");
2727 		blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2728 		return NULL;
2729 	}
2730 
2731 	blocksize = sb->s_blocksize;
2732 	hblock = bdev_hardsect_size(bdev);
2733 	if (blocksize < hblock) {
2734 		printk(KERN_ERR
2735 			"EXT4-fs: blocksize too small for journal device.\n");
2736 		goto out_bdev;
2737 	}
2738 
2739 	sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
2740 	offset = EXT4_MIN_BLOCK_SIZE % blocksize;
2741 	set_blocksize(bdev, blocksize);
2742 	if (!(bh = __bread(bdev, sb_block, blocksize))) {
2743 		printk(KERN_ERR "EXT4-fs: couldn't read superblock of "
2744 		       "external journal\n");
2745 		goto out_bdev;
2746 	}
2747 
2748 	es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
2749 	if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
2750 	    !(le32_to_cpu(es->s_feature_incompat) &
2751 	      EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
2752 		printk(KERN_ERR "EXT4-fs: external journal has "
2753 					"bad superblock\n");
2754 		brelse(bh);
2755 		goto out_bdev;
2756 	}
2757 
2758 	if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
2759 		printk(KERN_ERR "EXT4-fs: journal UUID does not match\n");
2760 		brelse(bh);
2761 		goto out_bdev;
2762 	}
2763 
2764 	len = ext4_blocks_count(es);
2765 	start = sb_block + 1;
2766 	brelse(bh);	/* we're done with the superblock */
2767 
2768 	journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
2769 					start, len, blocksize);
2770 	if (!journal) {
2771 		printk(KERN_ERR "EXT4-fs: failed to create device journal\n");
2772 		goto out_bdev;
2773 	}
2774 	journal->j_private = sb;
2775 	ll_rw_block(READ, 1, &journal->j_sb_buffer);
2776 	wait_on_buffer(journal->j_sb_buffer);
2777 	if (!buffer_uptodate(journal->j_sb_buffer)) {
2778 		printk(KERN_ERR "EXT4-fs: I/O error on journal device\n");
2779 		goto out_journal;
2780 	}
2781 	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
2782 		printk(KERN_ERR "EXT4-fs: External journal has more than one "
2783 					"user (unsupported) - %d\n",
2784 			be32_to_cpu(journal->j_superblock->s_nr_users));
2785 		goto out_journal;
2786 	}
2787 	EXT4_SB(sb)->journal_bdev = bdev;
2788 	ext4_init_journal_params(sb, journal);
2789 	return journal;
2790 out_journal:
2791 	jbd2_journal_destroy(journal);
2792 out_bdev:
2793 	ext4_blkdev_put(bdev);
2794 	return NULL;
2795 }
2796 
2797 static int ext4_load_journal(struct super_block *sb,
2798 			     struct ext4_super_block *es,
2799 			     unsigned long journal_devnum)
2800 {
2801 	journal_t *journal;
2802 	unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
2803 	dev_t journal_dev;
2804 	int err = 0;
2805 	int really_read_only;
2806 
2807 	BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2808 
2809 	if (journal_devnum &&
2810 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2811 		printk(KERN_INFO "EXT4-fs: external journal device major/minor "
2812 			"numbers have changed\n");
2813 		journal_dev = new_decode_dev(journal_devnum);
2814 	} else
2815 		journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
2816 
2817 	really_read_only = bdev_read_only(sb->s_bdev);
2818 
2819 	/*
2820 	 * Are we loading a blank journal or performing recovery after a
2821 	 * crash?  For recovery, we need to check in advance whether we
2822 	 * can get read-write access to the device.
2823 	 */
2824 
2825 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2826 		if (sb->s_flags & MS_RDONLY) {
2827 			printk(KERN_INFO "EXT4-fs: INFO: recovery "
2828 					"required on readonly filesystem.\n");
2829 			if (really_read_only) {
2830 				printk(KERN_ERR "EXT4-fs: write access "
2831 					"unavailable, cannot proceed.\n");
2832 				return -EROFS;
2833 			}
2834 			printk(KERN_INFO "EXT4-fs: write access will "
2835 			       "be enabled during recovery.\n");
2836 		}
2837 	}
2838 
2839 	if (journal_inum && journal_dev) {
2840 		printk(KERN_ERR "EXT4-fs: filesystem has both journal "
2841 		       "and inode journals!\n");
2842 		return -EINVAL;
2843 	}
2844 
2845 	if (journal_inum) {
2846 		if (!(journal = ext4_get_journal(sb, journal_inum)))
2847 			return -EINVAL;
2848 	} else {
2849 		if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
2850 			return -EINVAL;
2851 	}
2852 
2853 	if (journal->j_flags & JBD2_BARRIER)
2854 		printk(KERN_INFO "EXT4-fs: barriers enabled\n");
2855 	else
2856 		printk(KERN_INFO "EXT4-fs: barriers disabled\n");
2857 
2858 	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
2859 		err = jbd2_journal_update_format(journal);
2860 		if (err)  {
2861 			printk(KERN_ERR "EXT4-fs: error updating journal.\n");
2862 			jbd2_journal_destroy(journal);
2863 			return err;
2864 		}
2865 	}
2866 
2867 	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
2868 		err = jbd2_journal_wipe(journal, !really_read_only);
2869 	if (!err)
2870 		err = jbd2_journal_load(journal);
2871 
2872 	if (err) {
2873 		printk(KERN_ERR "EXT4-fs: error loading journal.\n");
2874 		jbd2_journal_destroy(journal);
2875 		return err;
2876 	}
2877 
2878 	EXT4_SB(sb)->s_journal = journal;
2879 	ext4_clear_journal_err(sb, es);
2880 
2881 	if (journal_devnum &&
2882 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2883 		es->s_journal_dev = cpu_to_le32(journal_devnum);
2884 		sb->s_dirt = 1;
2885 
2886 		/* Make sure we flush the recovery flag to disk. */
2887 		ext4_commit_super(sb, es, 1);
2888 	}
2889 
2890 	return 0;
2891 }
2892 
2893 static int ext4_commit_super(struct super_block *sb,
2894 			      struct ext4_super_block *es, int sync)
2895 {
2896 	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
2897 	int error = 0;
2898 
2899 	if (!sbh)
2900 		return error;
2901 	if (buffer_write_io_error(sbh)) {
2902 		/*
2903 		 * Oh, dear.  A previous attempt to write the
2904 		 * superblock failed.  This could happen because the
2905 		 * USB device was yanked out.  Or it could happen to
2906 		 * be a transient write error and maybe the block will
2907 		 * be remapped.  Nothing we can do but to retry the
2908 		 * write and hope for the best.
2909 		 */
2910 		printk(KERN_ERR "EXT4-fs: previous I/O error to "
2911 		       "superblock detected for %s.\n", sb->s_id);
2912 		clear_buffer_write_io_error(sbh);
2913 		set_buffer_uptodate(sbh);
2914 	}
2915 	es->s_wtime = cpu_to_le32(get_seconds());
2916 	ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
2917 					&EXT4_SB(sb)->s_freeblocks_counter));
2918 	es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
2919 					&EXT4_SB(sb)->s_freeinodes_counter));
2920 
2921 	BUFFER_TRACE(sbh, "marking dirty");
2922 	mark_buffer_dirty(sbh);
2923 	if (sync) {
2924 		error = sync_dirty_buffer(sbh);
2925 		if (error)
2926 			return error;
2927 
2928 		error = buffer_write_io_error(sbh);
2929 		if (error) {
2930 			printk(KERN_ERR "EXT4-fs: I/O error while writing "
2931 			       "superblock for %s.\n", sb->s_id);
2932 			clear_buffer_write_io_error(sbh);
2933 			set_buffer_uptodate(sbh);
2934 		}
2935 	}
2936 	return error;
2937 }
2938 
2939 
2940 /*
2941  * Have we just finished recovery?  If so, and if we are mounting (or
2942  * remounting) the filesystem readonly, then we will end up with a
2943  * consistent fs on disk.  Record that fact.
2944  */
2945 static void ext4_mark_recovery_complete(struct super_block *sb,
2946 					struct ext4_super_block *es)
2947 {
2948 	journal_t *journal = EXT4_SB(sb)->s_journal;
2949 
2950 	if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2951 		BUG_ON(journal != NULL);
2952 		return;
2953 	}
2954 	jbd2_journal_lock_updates(journal);
2955 	if (jbd2_journal_flush(journal) < 0)
2956 		goto out;
2957 
2958 	lock_super(sb);
2959 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
2960 	    sb->s_flags & MS_RDONLY) {
2961 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
2962 		sb->s_dirt = 0;
2963 		ext4_commit_super(sb, es, 1);
2964 	}
2965 	unlock_super(sb);
2966 
2967 out:
2968 	jbd2_journal_unlock_updates(journal);
2969 }
2970 
2971 /*
2972  * If we are mounting (or read-write remounting) a filesystem whose journal
2973  * has recorded an error from a previous lifetime, move that error to the
2974  * main filesystem now.
2975  */
2976 static void ext4_clear_journal_err(struct super_block *sb,
2977 				   struct ext4_super_block *es)
2978 {
2979 	journal_t *journal;
2980 	int j_errno;
2981 	const char *errstr;
2982 
2983 	BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2984 
2985 	journal = EXT4_SB(sb)->s_journal;
2986 
2987 	/*
2988 	 * Now check for any error status which may have been recorded in the
2989 	 * journal by a prior ext4_error() or ext4_abort()
2990 	 */
2991 
2992 	j_errno = jbd2_journal_errno(journal);
2993 	if (j_errno) {
2994 		char nbuf[16];
2995 
2996 		errstr = ext4_decode_error(sb, j_errno, nbuf);
2997 		ext4_warning(sb, __func__, "Filesystem error recorded "
2998 			     "from previous mount: %s", errstr);
2999 		ext4_warning(sb, __func__, "Marking fs in need of "
3000 			     "filesystem check.");
3001 
3002 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
3003 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
3004 		ext4_commit_super(sb, es, 1);
3005 
3006 		jbd2_journal_clear_err(journal);
3007 	}
3008 }
3009 
3010 /*
3011  * Force the running and committing transactions to commit,
3012  * and wait on the commit.
3013  */
3014 int ext4_force_commit(struct super_block *sb)
3015 {
3016 	journal_t *journal;
3017 	int ret = 0;
3018 
3019 	if (sb->s_flags & MS_RDONLY)
3020 		return 0;
3021 
3022 	journal = EXT4_SB(sb)->s_journal;
3023 	if (journal) {
3024 		sb->s_dirt = 0;
3025 		ret = ext4_journal_force_commit(journal);
3026 	}
3027 
3028 	return ret;
3029 }
3030 
3031 /*
3032  * Ext4 always journals updates to the superblock itself, so we don't
3033  * have to propagate any other updates to the superblock on disk at this
3034  * point.  (We can probably nuke this function altogether, and remove
3035  * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...)
3036  */
3037 static void ext4_write_super(struct super_block *sb)
3038 {
3039 	if (EXT4_SB(sb)->s_journal) {
3040 		if (mutex_trylock(&sb->s_lock) != 0)
3041 			BUG();
3042 		sb->s_dirt = 0;
3043 	} else {
3044 		ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
3045 	}
3046 }
3047 
3048 static int ext4_sync_fs(struct super_block *sb, int wait)
3049 {
3050 	int ret = 0;
3051 	tid_t target;
3052 
3053 	trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
3054 	sb->s_dirt = 0;
3055 	if (EXT4_SB(sb)->s_journal) {
3056 		if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal,
3057 					      &target)) {
3058 			if (wait)
3059 				jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
3060 						     target);
3061 		}
3062 	} else {
3063 		ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
3064 	}
3065 	return ret;
3066 }
3067 
3068 /*
3069  * LVM calls this function before a (read-only) snapshot is created.  This
3070  * gives us a chance to flush the journal completely and mark the fs clean.
3071  */
3072 static int ext4_freeze(struct super_block *sb)
3073 {
3074 	int error = 0;
3075 	journal_t *journal;
3076 	sb->s_dirt = 0;
3077 
3078 	if (!(sb->s_flags & MS_RDONLY)) {
3079 		journal = EXT4_SB(sb)->s_journal;
3080 
3081 		if (journal) {
3082 			/* Now we set up the journal barrier. */
3083 			jbd2_journal_lock_updates(journal);
3084 
3085 			/*
3086 			 * We don't want to clear needs_recovery flag when we
3087 			 * failed to flush the journal.
3088 			 */
3089 			error = jbd2_journal_flush(journal);
3090 			if (error < 0)
3091 				goto out;
3092 		}
3093 
3094 		/* Journal blocked and flushed, clear needs_recovery flag. */
3095 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3096 		error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
3097 		if (error)
3098 			goto out;
3099 	}
3100 	return 0;
3101 out:
3102 	jbd2_journal_unlock_updates(journal);
3103 	return error;
3104 }
3105 
3106 /*
3107  * Called by LVM after the snapshot is done.  We need to reset the RECOVER
3108  * flag here, even though the filesystem is not technically dirty yet.
3109  */
3110 static int ext4_unfreeze(struct super_block *sb)
3111 {
3112 	if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) {
3113 		lock_super(sb);
3114 		/* Reser the needs_recovery flag before the fs is unlocked. */
3115 		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3116 		ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
3117 		unlock_super(sb);
3118 		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3119 	}
3120 	return 0;
3121 }
3122 
3123 static int ext4_remount(struct super_block *sb, int *flags, char *data)
3124 {
3125 	struct ext4_super_block *es;
3126 	struct ext4_sb_info *sbi = EXT4_SB(sb);
3127 	ext4_fsblk_t n_blocks_count = 0;
3128 	unsigned long old_sb_flags;
3129 	struct ext4_mount_options old_opts;
3130 	ext4_group_t g;
3131 	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3132 	int err;
3133 #ifdef CONFIG_QUOTA
3134 	int i;
3135 #endif
3136 
3137 	/* Store the original options */
3138 	old_sb_flags = sb->s_flags;
3139 	old_opts.s_mount_opt = sbi->s_mount_opt;
3140 	old_opts.s_resuid = sbi->s_resuid;
3141 	old_opts.s_resgid = sbi->s_resgid;
3142 	old_opts.s_commit_interval = sbi->s_commit_interval;
3143 	old_opts.s_min_batch_time = sbi->s_min_batch_time;
3144 	old_opts.s_max_batch_time = sbi->s_max_batch_time;
3145 #ifdef CONFIG_QUOTA
3146 	old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
3147 	for (i = 0; i < MAXQUOTAS; i++)
3148 		old_opts.s_qf_names[i] = sbi->s_qf_names[i];
3149 #endif
3150 	if (sbi->s_journal && sbi->s_journal->j_task->io_context)
3151 		journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
3152 
3153 	/*
3154 	 * Allow the "check" option to be passed as a remount option.
3155 	 */
3156 	if (!parse_options(data, sb, NULL, &journal_ioprio,
3157 			   &n_blocks_count, 1)) {
3158 		err = -EINVAL;
3159 		goto restore_opts;
3160 	}
3161 
3162 	if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
3163 		ext4_abort(sb, __func__, "Abort forced by user");
3164 
3165 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3166 		((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
3167 
3168 	es = sbi->s_es;
3169 
3170 	if (sbi->s_journal) {
3171 		ext4_init_journal_params(sb, sbi->s_journal);
3172 		set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3173 	}
3174 
3175 	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
3176 		n_blocks_count > ext4_blocks_count(es)) {
3177 		if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) {
3178 			err = -EROFS;
3179 			goto restore_opts;
3180 		}
3181 
3182 		if (*flags & MS_RDONLY) {
3183 			/*
3184 			 * First of all, the unconditional stuff we have to do
3185 			 * to disable replay of the journal when we next remount
3186 			 */
3187 			sb->s_flags |= MS_RDONLY;
3188 
3189 			/*
3190 			 * OK, test if we are remounting a valid rw partition
3191 			 * readonly, and if so set the rdonly flag and then
3192 			 * mark the partition as valid again.
3193 			 */
3194 			if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
3195 			    (sbi->s_mount_state & EXT4_VALID_FS))
3196 				es->s_state = cpu_to_le16(sbi->s_mount_state);
3197 
3198 			/*
3199 			 * We have to unlock super so that we can wait for
3200 			 * transactions.
3201 			 */
3202 			if (sbi->s_journal) {
3203 				unlock_super(sb);
3204 				ext4_mark_recovery_complete(sb, es);
3205 				lock_super(sb);
3206 			}
3207 		} else {
3208 			int ret;
3209 			if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3210 					~EXT4_FEATURE_RO_COMPAT_SUPP))) {
3211 				printk(KERN_WARNING "EXT4-fs: %s: couldn't "
3212 				       "remount RDWR because of unsupported "
3213 				       "optional features (%x).\n", sb->s_id,
3214 				(le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
3215 					~EXT4_FEATURE_RO_COMPAT_SUPP));
3216 				err = -EROFS;
3217 				goto restore_opts;
3218 			}
3219 
3220 			/*
3221 			 * Make sure the group descriptor checksums
3222 			 * are sane.  If they aren't, refuse to
3223 			 * remount r/w.
3224 			 */
3225 			for (g = 0; g < sbi->s_groups_count; g++) {
3226 				struct ext4_group_desc *gdp =
3227 					ext4_get_group_desc(sb, g, NULL);
3228 
3229 				if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
3230 					printk(KERN_ERR
3231 	       "EXT4-fs: ext4_remount: "
3232 		"Checksum for group %u failed (%u!=%u)\n",
3233 		g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
3234 					       le16_to_cpu(gdp->bg_checksum));
3235 					err = -EINVAL;
3236 					goto restore_opts;
3237 				}
3238 			}
3239 
3240 			/*
3241 			 * If we have an unprocessed orphan list hanging
3242 			 * around from a previously readonly bdev mount,
3243 			 * require a full umount/remount for now.
3244 			 */
3245 			if (es->s_last_orphan) {
3246 				printk(KERN_WARNING "EXT4-fs: %s: couldn't "
3247 				       "remount RDWR because of unprocessed "
3248 				       "orphan inode list.  Please "
3249 				       "umount/remount instead.\n",
3250 				       sb->s_id);
3251 				err = -EINVAL;
3252 				goto restore_opts;
3253 			}
3254 
3255 			/*
3256 			 * Mounting a RDONLY partition read-write, so reread
3257 			 * and store the current valid flag.  (It may have
3258 			 * been changed by e2fsck since we originally mounted
3259 			 * the partition.)
3260 			 */
3261 			if (sbi->s_journal)
3262 				ext4_clear_journal_err(sb, es);
3263 			sbi->s_mount_state = le16_to_cpu(es->s_state);
3264 			if ((err = ext4_group_extend(sb, es, n_blocks_count)))
3265 				goto restore_opts;
3266 			if (!ext4_setup_super(sb, es, 0))
3267 				sb->s_flags &= ~MS_RDONLY;
3268 		}
3269 	}
3270 	if (sbi->s_journal == NULL)
3271 		ext4_commit_super(sb, es, 1);
3272 
3273 #ifdef CONFIG_QUOTA
3274 	/* Release old quota file names */
3275 	for (i = 0; i < MAXQUOTAS; i++)
3276 		if (old_opts.s_qf_names[i] &&
3277 		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3278 			kfree(old_opts.s_qf_names[i]);
3279 #endif
3280 	return 0;
3281 restore_opts:
3282 	sb->s_flags = old_sb_flags;
3283 	sbi->s_mount_opt = old_opts.s_mount_opt;
3284 	sbi->s_resuid = old_opts.s_resuid;
3285 	sbi->s_resgid = old_opts.s_resgid;
3286 	sbi->s_commit_interval = old_opts.s_commit_interval;
3287 	sbi->s_min_batch_time = old_opts.s_min_batch_time;
3288 	sbi->s_max_batch_time = old_opts.s_max_batch_time;
3289 #ifdef CONFIG_QUOTA
3290 	sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
3291 	for (i = 0; i < MAXQUOTAS; i++) {
3292 		if (sbi->s_qf_names[i] &&
3293 		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3294 			kfree(sbi->s_qf_names[i]);
3295 		sbi->s_qf_names[i] = old_opts.s_qf_names[i];
3296 	}
3297 #endif
3298 	return err;
3299 }
3300 
3301 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3302 {
3303 	struct super_block *sb = dentry->d_sb;
3304 	struct ext4_sb_info *sbi = EXT4_SB(sb);
3305 	struct ext4_super_block *es = sbi->s_es;
3306 	u64 fsid;
3307 
3308 	if (test_opt(sb, MINIX_DF)) {
3309 		sbi->s_overhead_last = 0;
3310 	} else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
3311 		ext4_group_t ngroups = sbi->s_groups_count, i;
3312 		ext4_fsblk_t overhead = 0;
3313 		smp_rmb();
3314 
3315 		/*
3316 		 * Compute the overhead (FS structures).  This is constant
3317 		 * for a given filesystem unless the number of block groups
3318 		 * changes so we cache the previous value until it does.
3319 		 */
3320 
3321 		/*
3322 		 * All of the blocks before first_data_block are
3323 		 * overhead
3324 		 */
3325 		overhead = le32_to_cpu(es->s_first_data_block);
3326 
3327 		/*
3328 		 * Add the overhead attributed to the superblock and
3329 		 * block group descriptors.  If the sparse superblocks
3330 		 * feature is turned on, then not all groups have this.
3331 		 */
3332 		for (i = 0; i < ngroups; i++) {
3333 			overhead += ext4_bg_has_super(sb, i) +
3334 				ext4_bg_num_gdb(sb, i);
3335 			cond_resched();
3336 		}
3337 
3338 		/*
3339 		 * Every block group has an inode bitmap, a block
3340 		 * bitmap, and an inode table.
3341 		 */
3342 		overhead += ngroups * (2 + sbi->s_itb_per_group);
3343 		sbi->s_overhead_last = overhead;
3344 		smp_wmb();
3345 		sbi->s_blocks_last = ext4_blocks_count(es);
3346 	}
3347 
3348 	buf->f_type = EXT4_SUPER_MAGIC;
3349 	buf->f_bsize = sb->s_blocksize;
3350 	buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
3351 	buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
3352 		       percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
3353 	ext4_free_blocks_count_set(es, buf->f_bfree);
3354 	buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
3355 	if (buf->f_bfree < ext4_r_blocks_count(es))
3356 		buf->f_bavail = 0;
3357 	buf->f_files = le32_to_cpu(es->s_inodes_count);
3358 	buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
3359 	es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
3360 	buf->f_namelen = EXT4_NAME_LEN;
3361 	fsid = le64_to_cpup((void *)es->s_uuid) ^
3362 	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
3363 	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
3364 	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
3365 	return 0;
3366 }
3367 
3368 /* Helper function for writing quotas on sync - we need to start transaction before quota file
3369  * is locked for write. Otherwise the are possible deadlocks:
3370  * Process 1                         Process 2
3371  * ext4_create()                     quota_sync()
3372  *   jbd2_journal_start()                  write_dquot()
3373  *   vfs_dq_init()                         down(dqio_mutex)
3374  *     down(dqio_mutex)                    jbd2_journal_start()
3375  *
3376  */
3377 
3378 #ifdef CONFIG_QUOTA
3379 
3380 static inline struct inode *dquot_to_inode(struct dquot *dquot)
3381 {
3382 	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
3383 }
3384 
3385 static int ext4_write_dquot(struct dquot *dquot)
3386 {
3387 	int ret, err;
3388 	handle_t *handle;
3389 	struct inode *inode;
3390 
3391 	inode = dquot_to_inode(dquot);
3392 	handle = ext4_journal_start(inode,
3393 					EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
3394 	if (IS_ERR(handle))
3395 		return PTR_ERR(handle);
3396 	ret = dquot_commit(dquot);
3397 	err = ext4_journal_stop(handle);
3398 	if (!ret)
3399 		ret = err;
3400 	return ret;
3401 }
3402 
3403 static int ext4_acquire_dquot(struct dquot *dquot)
3404 {
3405 	int ret, err;
3406 	handle_t *handle;
3407 
3408 	handle = ext4_journal_start(dquot_to_inode(dquot),
3409 					EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
3410 	if (IS_ERR(handle))
3411 		return PTR_ERR(handle);
3412 	ret = dquot_acquire(dquot);
3413 	err = ext4_journal_stop(handle);
3414 	if (!ret)
3415 		ret = err;
3416 	return ret;
3417 }
3418 
3419 static int ext4_release_dquot(struct dquot *dquot)
3420 {
3421 	int ret, err;
3422 	handle_t *handle;
3423 
3424 	handle = ext4_journal_start(dquot_to_inode(dquot),
3425 					EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
3426 	if (IS_ERR(handle)) {
3427 		/* Release dquot anyway to avoid endless cycle in dqput() */
3428 		dquot_release(dquot);
3429 		return PTR_ERR(handle);
3430 	}
3431 	ret = dquot_release(dquot);
3432 	err = ext4_journal_stop(handle);
3433 	if (!ret)
3434 		ret = err;
3435 	return ret;
3436 }
3437 
3438 static int ext4_mark_dquot_dirty(struct dquot *dquot)
3439 {
3440 	/* Are we journaling quotas? */
3441 	if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
3442 	    EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
3443 		dquot_mark_dquot_dirty(dquot);
3444 		return ext4_write_dquot(dquot);
3445 	} else {
3446 		return dquot_mark_dquot_dirty(dquot);
3447 	}
3448 }
3449 
3450 static int ext4_write_info(struct super_block *sb, int type)
3451 {
3452 	int ret, err;
3453 	handle_t *handle;
3454 
3455 	/* Data block + inode block */
3456 	handle = ext4_journal_start(sb->s_root->d_inode, 2);
3457 	if (IS_ERR(handle))
3458 		return PTR_ERR(handle);
3459 	ret = dquot_commit_info(sb, type);
3460 	err = ext4_journal_stop(handle);
3461 	if (!ret)
3462 		ret = err;
3463 	return ret;
3464 }
3465 
3466 /*
3467  * Turn on quotas during mount time - we need to find
3468  * the quota file and such...
3469  */
3470 static int ext4_quota_on_mount(struct super_block *sb, int type)
3471 {
3472 	return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
3473 			EXT4_SB(sb)->s_jquota_fmt, type);
3474 }
3475 
3476 /*
3477  * Standard function to be called on quota_on
3478  */
3479 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3480 			 char *name, int remount)
3481 {
3482 	int err;
3483 	struct path path;
3484 
3485 	if (!test_opt(sb, QUOTA))
3486 		return -EINVAL;
3487 	/* When remounting, no checks are needed and in fact, name is NULL */
3488 	if (remount)
3489 		return vfs_quota_on(sb, type, format_id, name, remount);
3490 
3491 	err = kern_path(name, LOOKUP_FOLLOW, &path);
3492 	if (err)
3493 		return err;
3494 
3495 	/* Quotafile not on the same filesystem? */
3496 	if (path.mnt->mnt_sb != sb) {
3497 		path_put(&path);
3498 		return -EXDEV;
3499 	}
3500 	/* Journaling quota? */
3501 	if (EXT4_SB(sb)->s_qf_names[type]) {
3502 		/* Quotafile not in fs root? */
3503 		if (path.dentry->d_parent != sb->s_root)
3504 			printk(KERN_WARNING
3505 				"EXT4-fs: Quota file not on filesystem root. "
3506 				"Journaled quota will not work.\n");
3507 	}
3508 
3509 	/*
3510 	 * When we journal data on quota file, we have to flush journal to see
3511 	 * all updates to the file when we bypass pagecache...
3512 	 */
3513 	if (EXT4_SB(sb)->s_journal &&
3514 	    ext4_should_journal_data(path.dentry->d_inode)) {
3515 		/*
3516 		 * We don't need to lock updates but journal_flush() could
3517 		 * otherwise be livelocked...
3518 		 */
3519 		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
3520 		err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
3521 		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3522 		if (err) {
3523 			path_put(&path);
3524 			return err;
3525 		}
3526 	}
3527 
3528 	err = vfs_quota_on_path(sb, type, format_id, &path);
3529 	path_put(&path);
3530 	return err;
3531 }
3532 
3533 /* Read data from quotafile - avoid pagecache and such because we cannot afford
3534  * acquiring the locks... As quota files are never truncated and quota code
3535  * itself serializes the operations (and noone else should touch the files)
3536  * we don't have to be afraid of races */
3537 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
3538 			       size_t len, loff_t off)
3539 {
3540 	struct inode *inode = sb_dqopt(sb)->files[type];
3541 	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3542 	int err = 0;
3543 	int offset = off & (sb->s_blocksize - 1);
3544 	int tocopy;
3545 	size_t toread;
3546 	struct buffer_head *bh;
3547 	loff_t i_size = i_size_read(inode);
3548 
3549 	if (off > i_size)
3550 		return 0;
3551 	if (off+len > i_size)
3552 		len = i_size-off;
3553 	toread = len;
3554 	while (toread > 0) {
3555 		tocopy = sb->s_blocksize - offset < toread ?
3556 				sb->s_blocksize - offset : toread;
3557 		bh = ext4_bread(NULL, inode, blk, 0, &err);
3558 		if (err)
3559 			return err;
3560 		if (!bh)	/* A hole? */
3561 			memset(data, 0, tocopy);
3562 		else
3563 			memcpy(data, bh->b_data+offset, tocopy);
3564 		brelse(bh);
3565 		offset = 0;
3566 		toread -= tocopy;
3567 		data += tocopy;
3568 		blk++;
3569 	}
3570 	return len;
3571 }
3572 
3573 /* Write to quotafile (we know the transaction is already started and has
3574  * enough credits) */
3575 static ssize_t ext4_quota_write(struct super_block *sb, int type,
3576 				const char *data, size_t len, loff_t off)
3577 {
3578 	struct inode *inode = sb_dqopt(sb)->files[type];
3579 	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3580 	int err = 0;
3581 	int offset = off & (sb->s_blocksize - 1);
3582 	int tocopy;
3583 	int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
3584 	size_t towrite = len;
3585 	struct buffer_head *bh;
3586 	handle_t *handle = journal_current_handle();
3587 
3588 	if (EXT4_SB(sb)->s_journal && !handle) {
3589 		printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
3590 			" cancelled because transaction is not started.\n",
3591 			(unsigned long long)off, (unsigned long long)len);
3592 		return -EIO;
3593 	}
3594 	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
3595 	while (towrite > 0) {
3596 		tocopy = sb->s_blocksize - offset < towrite ?
3597 				sb->s_blocksize - offset : towrite;
3598 		bh = ext4_bread(handle, inode, blk, 1, &err);
3599 		if (!bh)
3600 			goto out;
3601 		if (journal_quota) {
3602 			err = ext4_journal_get_write_access(handle, bh);
3603 			if (err) {
3604 				brelse(bh);
3605 				goto out;
3606 			}
3607 		}
3608 		lock_buffer(bh);
3609 		memcpy(bh->b_data+offset, data, tocopy);
3610 		flush_dcache_page(bh->b_page);
3611 		unlock_buffer(bh);
3612 		if (journal_quota)
3613 			err = ext4_handle_dirty_metadata(handle, NULL, bh);
3614 		else {
3615 			/* Always do at least ordered writes for quotas */
3616 			err = ext4_jbd2_file_inode(handle, inode);
3617 			mark_buffer_dirty(bh);
3618 		}
3619 		brelse(bh);
3620 		if (err)
3621 			goto out;
3622 		offset = 0;
3623 		towrite -= tocopy;
3624 		data += tocopy;
3625 		blk++;
3626 	}
3627 out:
3628 	if (len == towrite) {
3629 		mutex_unlock(&inode->i_mutex);
3630 		return err;
3631 	}
3632 	if (inode->i_size < off+len-towrite) {
3633 		i_size_write(inode, off+len-towrite);
3634 		EXT4_I(inode)->i_disksize = inode->i_size;
3635 	}
3636 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3637 	ext4_mark_inode_dirty(handle, inode);
3638 	mutex_unlock(&inode->i_mutex);
3639 	return len - towrite;
3640 }
3641 
3642 #endif
3643 
3644 static int ext4_get_sb(struct file_system_type *fs_type,
3645 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
3646 {
3647 	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3648 }
3649 
3650 #ifdef CONFIG_PROC_FS
3651 static int ext4_ui_proc_show(struct seq_file *m, void *v)
3652 {
3653 	unsigned int *p = m->private;
3654 
3655 	seq_printf(m, "%u\n", *p);
3656 	return 0;
3657 }
3658 
3659 static int ext4_ui_proc_open(struct inode *inode, struct file *file)
3660 {
3661 	return single_open(file, ext4_ui_proc_show, PDE(inode)->data);
3662 }
3663 
3664 static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf,
3665 			       size_t cnt, loff_t *ppos)
3666 {
3667 	unsigned long *p = PDE(file->f_path.dentry->d_inode)->data;
3668 	char str[32];
3669 
3670 	if (cnt >= sizeof(str))
3671 		return -EINVAL;
3672 	if (copy_from_user(str, buf, cnt))
3673 		return -EFAULT;
3674 
3675 	*p = simple_strtoul(str, NULL, 0);
3676 	return cnt;
3677 }
3678 
3679 const struct file_operations ext4_ui_proc_fops = {
3680 	.owner		= THIS_MODULE,
3681 	.open		= ext4_ui_proc_open,
3682 	.read		= seq_read,
3683 	.llseek		= seq_lseek,
3684 	.release	= single_release,
3685 	.write		= ext4_ui_proc_write,
3686 };
3687 #endif
3688 
3689 static struct file_system_type ext4_fs_type = {
3690 	.owner		= THIS_MODULE,
3691 	.name		= "ext4",
3692 	.get_sb		= ext4_get_sb,
3693 	.kill_sb	= kill_block_super,
3694 	.fs_flags	= FS_REQUIRES_DEV,
3695 };
3696 
3697 #ifdef CONFIG_EXT4DEV_COMPAT
3698 static int ext4dev_get_sb(struct file_system_type *fs_type,
3699 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
3700 {
3701 	printk(KERN_WARNING "EXT4-fs: Update your userspace programs "
3702 	       "to mount using ext4\n");
3703 	printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility "
3704 	       "will go away by 2.6.31\n");
3705 	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3706 }
3707 
3708 static struct file_system_type ext4dev_fs_type = {
3709 	.owner		= THIS_MODULE,
3710 	.name		= "ext4dev",
3711 	.get_sb		= ext4dev_get_sb,
3712 	.kill_sb	= kill_block_super,
3713 	.fs_flags	= FS_REQUIRES_DEV,
3714 };
3715 MODULE_ALIAS("ext4dev");
3716 #endif
3717 
3718 static int __init init_ext4_fs(void)
3719 {
3720 	int err;
3721 
3722 	ext4_proc_root = proc_mkdir("fs/ext4", NULL);
3723 	err = init_ext4_mballoc();
3724 	if (err)
3725 		return err;
3726 
3727 	err = init_ext4_xattr();
3728 	if (err)
3729 		goto out2;
3730 	err = init_inodecache();
3731 	if (err)
3732 		goto out1;
3733 	err = register_filesystem(&ext4_fs_type);
3734 	if (err)
3735 		goto out;
3736 #ifdef CONFIG_EXT4DEV_COMPAT
3737 	err = register_filesystem(&ext4dev_fs_type);
3738 	if (err) {
3739 		unregister_filesystem(&ext4_fs_type);
3740 		goto out;
3741 	}
3742 #endif
3743 	return 0;
3744 out:
3745 	destroy_inodecache();
3746 out1:
3747 	exit_ext4_xattr();
3748 out2:
3749 	exit_ext4_mballoc();
3750 	return err;
3751 }
3752 
3753 static void __exit exit_ext4_fs(void)
3754 {
3755 	unregister_filesystem(&ext4_fs_type);
3756 #ifdef CONFIG_EXT4DEV_COMPAT
3757 	unregister_filesystem(&ext4dev_fs_type);
3758 #endif
3759 	destroy_inodecache();
3760 	exit_ext4_xattr();
3761 	exit_ext4_mballoc();
3762 	remove_proc_entry("fs/ext4", NULL);
3763 }
3764 
3765 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
3766 MODULE_DESCRIPTION("Fourth Extended Filesystem");
3767 MODULE_LICENSE("GPL");
3768 module_init(init_ext4_fs)
3769 module_exit(exit_ext4_fs)
3770