1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * linux/fs/ext4/super.c
4 *
5 * Copyright (C) 1992, 1993, 1994, 1995
6 * Remy Card (card@masi.ibp.fr)
7 * Laboratoire MASI - Institut Blaise Pascal
8 * Universite Pierre et Marie Curie (Paris VI)
9 *
10 * from
11 *
12 * linux/fs/minix/inode.c
13 *
14 * Copyright (C) 1991, 1992 Linus Torvalds
15 *
16 * Big-endian to little-endian byte-swapping/bitmaps by
17 * David S. Miller (davem@caip.rutgers.edu), 1995
18 */
19
20 #include <linux/module.h>
21 #include <linux/string.h>
22 #include <linux/fs.h>
23 #include <linux/time.h>
24 #include <linux/vmalloc.h>
25 #include <linux/slab.h>
26 #include <linux/init.h>
27 #include <linux/blkdev.h>
28 #include <linux/backing-dev.h>
29 #include <linux/parser.h>
30 #include <linux/buffer_head.h>
31 #include <linux/exportfs.h>
32 #include <linux/vfs.h>
33 #include <linux/random.h>
34 #include <linux/mount.h>
35 #include <linux/namei.h>
36 #include <linux/quotaops.h>
37 #include <linux/seq_file.h>
38 #include <linux/ctype.h>
39 #include <linux/log2.h>
40 #include <linux/crc16.h>
41 #include <linux/dax.h>
42 #include <linux/uaccess.h>
43 #include <linux/iversion.h>
44 #include <linux/unicode.h>
45 #include <linux/part_stat.h>
46 #include <linux/kthread.h>
47 #include <linux/freezer.h>
48 #include <linux/fsnotify.h>
49 #include <linux/fs_context.h>
50 #include <linux/fs_parser.h>
51
52 #include "ext4.h"
53 #include "ext4_extents.h" /* Needed for trace points definition */
54 #include "ext4_jbd2.h"
55 #include "xattr.h"
56 #include "acl.h"
57 #include "mballoc.h"
58 #include "fsmap.h"
59
60 #define CREATE_TRACE_POINTS
61 #include <trace/events/ext4.h>
62
63 static struct ext4_lazy_init *ext4_li_info;
64 static DEFINE_MUTEX(ext4_li_mtx);
65 static struct ratelimit_state ext4_mount_msg_ratelimit;
66
67 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
68 unsigned long journal_devnum);
69 static int ext4_show_options(struct seq_file *seq, struct dentry *root);
70 static void ext4_update_super(struct super_block *sb);
71 static int ext4_commit_super(struct super_block *sb);
72 static int ext4_mark_recovery_complete(struct super_block *sb,
73 struct ext4_super_block *es);
74 static int ext4_clear_journal_err(struct super_block *sb,
75 struct ext4_super_block *es);
76 static int ext4_sync_fs(struct super_block *sb, int wait);
77 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
78 static int ext4_unfreeze(struct super_block *sb);
79 static int ext4_freeze(struct super_block *sb);
80 static inline int ext2_feature_set_ok(struct super_block *sb);
81 static inline int ext3_feature_set_ok(struct super_block *sb);
82 static void ext4_destroy_lazyinit_thread(void);
83 static void ext4_unregister_li_request(struct super_block *sb);
84 static void ext4_clear_request_list(void);
85 static struct inode *ext4_get_journal_inode(struct super_block *sb,
86 unsigned int journal_inum);
87 static int ext4_validate_options(struct fs_context *fc);
88 static int ext4_check_opt_consistency(struct fs_context *fc,
89 struct super_block *sb);
90 static void ext4_apply_options(struct fs_context *fc, struct super_block *sb);
91 static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param);
92 static int ext4_get_tree(struct fs_context *fc);
93 static int ext4_reconfigure(struct fs_context *fc);
94 static void ext4_fc_free(struct fs_context *fc);
95 static int ext4_init_fs_context(struct fs_context *fc);
96 static void ext4_kill_sb(struct super_block *sb);
97 static const struct fs_parameter_spec ext4_param_specs[];
98
99 /*
100 * Lock ordering
101 *
102 * page fault path:
103 * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
104 * -> page lock -> i_data_sem (rw)
105 *
106 * buffered write path:
107 * sb_start_write -> i_mutex -> mmap_lock
108 * sb_start_write -> i_mutex -> transaction start -> page lock ->
109 * i_data_sem (rw)
110 *
111 * truncate:
112 * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
113 * page lock
114 * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
115 * i_data_sem (rw)
116 *
117 * direct IO:
118 * sb_start_write -> i_mutex -> mmap_lock
119 * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
120 *
121 * writepages:
122 * transaction start -> page lock(s) -> i_data_sem (rw)
123 */
124
125 static const struct fs_context_operations ext4_context_ops = {
126 .parse_param = ext4_parse_param,
127 .get_tree = ext4_get_tree,
128 .reconfigure = ext4_reconfigure,
129 .free = ext4_fc_free,
130 };
131
132
133 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
134 static struct file_system_type ext2_fs_type = {
135 .owner = THIS_MODULE,
136 .name = "ext2",
137 .init_fs_context = ext4_init_fs_context,
138 .parameters = ext4_param_specs,
139 .kill_sb = ext4_kill_sb,
140 .fs_flags = FS_REQUIRES_DEV,
141 };
142 MODULE_ALIAS_FS("ext2");
143 MODULE_ALIAS("ext2");
144 #define IS_EXT2_SB(sb) ((sb)->s_type == &ext2_fs_type)
145 #else
146 #define IS_EXT2_SB(sb) (0)
147 #endif
148
149
150 static struct file_system_type ext3_fs_type = {
151 .owner = THIS_MODULE,
152 .name = "ext3",
153 .init_fs_context = ext4_init_fs_context,
154 .parameters = ext4_param_specs,
155 .kill_sb = ext4_kill_sb,
156 .fs_flags = FS_REQUIRES_DEV,
157 };
158 MODULE_ALIAS_FS("ext3");
159 MODULE_ALIAS("ext3");
160 #define IS_EXT3_SB(sb) ((sb)->s_type == &ext3_fs_type)
161
162
__ext4_read_bh(struct buffer_head * bh,blk_opf_t op_flags,bh_end_io_t * end_io)163 static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
164 bh_end_io_t *end_io)
165 {
166 /*
167 * buffer's verified bit is no longer valid after reading from
168 * disk again due to write out error, clear it to make sure we
169 * recheck the buffer contents.
170 */
171 clear_buffer_verified(bh);
172
173 bh->b_end_io = end_io ? end_io : end_buffer_read_sync;
174 get_bh(bh);
175 submit_bh(REQ_OP_READ | op_flags, bh);
176 }
177
ext4_read_bh_nowait(struct buffer_head * bh,blk_opf_t op_flags,bh_end_io_t * end_io)178 void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
179 bh_end_io_t *end_io)
180 {
181 BUG_ON(!buffer_locked(bh));
182
183 if (ext4_buffer_uptodate(bh)) {
184 unlock_buffer(bh);
185 return;
186 }
187 __ext4_read_bh(bh, op_flags, end_io);
188 }
189
ext4_read_bh(struct buffer_head * bh,blk_opf_t op_flags,bh_end_io_t * end_io)190 int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io)
191 {
192 BUG_ON(!buffer_locked(bh));
193
194 if (ext4_buffer_uptodate(bh)) {
195 unlock_buffer(bh);
196 return 0;
197 }
198
199 __ext4_read_bh(bh, op_flags, end_io);
200
201 wait_on_buffer(bh);
202 if (buffer_uptodate(bh))
203 return 0;
204 return -EIO;
205 }
206
ext4_read_bh_lock(struct buffer_head * bh,blk_opf_t op_flags,bool wait)207 int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
208 {
209 lock_buffer(bh);
210 if (!wait) {
211 ext4_read_bh_nowait(bh, op_flags, NULL);
212 return 0;
213 }
214 return ext4_read_bh(bh, op_flags, NULL);
215 }
216
217 /*
218 * This works like __bread_gfp() except it uses ERR_PTR for error
219 * returns. Currently with sb_bread it's impossible to distinguish
220 * between ENOMEM and EIO situations (since both result in a NULL
221 * return.
222 */
__ext4_sb_bread_gfp(struct super_block * sb,sector_t block,blk_opf_t op_flags,gfp_t gfp)223 static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
224 sector_t block,
225 blk_opf_t op_flags, gfp_t gfp)
226 {
227 struct buffer_head *bh;
228 int ret;
229
230 bh = sb_getblk_gfp(sb, block, gfp);
231 if (bh == NULL)
232 return ERR_PTR(-ENOMEM);
233 if (ext4_buffer_uptodate(bh))
234 return bh;
235
236 ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true);
237 if (ret) {
238 put_bh(bh);
239 return ERR_PTR(ret);
240 }
241 return bh;
242 }
243
ext4_sb_bread(struct super_block * sb,sector_t block,blk_opf_t op_flags)244 struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
245 blk_opf_t op_flags)
246 {
247 gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping,
248 ~__GFP_FS) | __GFP_MOVABLE;
249
250 return __ext4_sb_bread_gfp(sb, block, op_flags, gfp);
251 }
252
ext4_sb_bread_unmovable(struct super_block * sb,sector_t block)253 struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
254 sector_t block)
255 {
256 gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping,
257 ~__GFP_FS);
258
259 return __ext4_sb_bread_gfp(sb, block, 0, gfp);
260 }
261
ext4_sb_breadahead_unmovable(struct super_block * sb,sector_t block)262 void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
263 {
264 struct buffer_head *bh = bdev_getblk(sb->s_bdev, block,
265 sb->s_blocksize, GFP_NOWAIT | __GFP_NOWARN);
266
267 if (likely(bh)) {
268 if (trylock_buffer(bh))
269 ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL);
270 brelse(bh);
271 }
272 }
273
ext4_verify_csum_type(struct super_block * sb,struct ext4_super_block * es)274 static int ext4_verify_csum_type(struct super_block *sb,
275 struct ext4_super_block *es)
276 {
277 if (!ext4_has_feature_metadata_csum(sb))
278 return 1;
279
280 return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
281 }
282
ext4_superblock_csum(struct super_block * sb,struct ext4_super_block * es)283 __le32 ext4_superblock_csum(struct super_block *sb,
284 struct ext4_super_block *es)
285 {
286 struct ext4_sb_info *sbi = EXT4_SB(sb);
287 int offset = offsetof(struct ext4_super_block, s_checksum);
288 __u32 csum;
289
290 csum = ext4_chksum(sbi, ~0, (char *)es, offset);
291
292 return cpu_to_le32(csum);
293 }
294
ext4_superblock_csum_verify(struct super_block * sb,struct ext4_super_block * es)295 static int ext4_superblock_csum_verify(struct super_block *sb,
296 struct ext4_super_block *es)
297 {
298 if (!ext4_has_metadata_csum(sb))
299 return 1;
300
301 return es->s_checksum == ext4_superblock_csum(sb, es);
302 }
303
ext4_superblock_csum_set(struct super_block * sb)304 void ext4_superblock_csum_set(struct super_block *sb)
305 {
306 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
307
308 if (!ext4_has_metadata_csum(sb))
309 return;
310
311 es->s_checksum = ext4_superblock_csum(sb, es);
312 }
313
ext4_block_bitmap(struct super_block * sb,struct ext4_group_desc * bg)314 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
315 struct ext4_group_desc *bg)
316 {
317 return le32_to_cpu(bg->bg_block_bitmap_lo) |
318 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
319 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
320 }
321
ext4_inode_bitmap(struct super_block * sb,struct ext4_group_desc * bg)322 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
323 struct ext4_group_desc *bg)
324 {
325 return le32_to_cpu(bg->bg_inode_bitmap_lo) |
326 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
327 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
328 }
329
ext4_inode_table(struct super_block * sb,struct ext4_group_desc * bg)330 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
331 struct ext4_group_desc *bg)
332 {
333 return le32_to_cpu(bg->bg_inode_table_lo) |
334 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
335 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
336 }
337
ext4_free_group_clusters(struct super_block * sb,struct ext4_group_desc * bg)338 __u32 ext4_free_group_clusters(struct super_block *sb,
339 struct ext4_group_desc *bg)
340 {
341 return le16_to_cpu(bg->bg_free_blocks_count_lo) |
342 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
343 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
344 }
345
ext4_free_inodes_count(struct super_block * sb,struct ext4_group_desc * bg)346 __u32 ext4_free_inodes_count(struct super_block *sb,
347 struct ext4_group_desc *bg)
348 {
349 return le16_to_cpu(bg->bg_free_inodes_count_lo) |
350 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
351 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
352 }
353
ext4_used_dirs_count(struct super_block * sb,struct ext4_group_desc * bg)354 __u32 ext4_used_dirs_count(struct super_block *sb,
355 struct ext4_group_desc *bg)
356 {
357 return le16_to_cpu(bg->bg_used_dirs_count_lo) |
358 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
359 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
360 }
361
ext4_itable_unused_count(struct super_block * sb,struct ext4_group_desc * bg)362 __u32 ext4_itable_unused_count(struct super_block *sb,
363 struct ext4_group_desc *bg)
364 {
365 return le16_to_cpu(bg->bg_itable_unused_lo) |
366 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
367 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
368 }
369
ext4_block_bitmap_set(struct super_block * sb,struct ext4_group_desc * bg,ext4_fsblk_t blk)370 void ext4_block_bitmap_set(struct super_block *sb,
371 struct ext4_group_desc *bg, ext4_fsblk_t blk)
372 {
373 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
374 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
375 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
376 }
377
ext4_inode_bitmap_set(struct super_block * sb,struct ext4_group_desc * bg,ext4_fsblk_t blk)378 void ext4_inode_bitmap_set(struct super_block *sb,
379 struct ext4_group_desc *bg, ext4_fsblk_t blk)
380 {
381 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk);
382 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
383 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
384 }
385
ext4_inode_table_set(struct super_block * sb,struct ext4_group_desc * bg,ext4_fsblk_t blk)386 void ext4_inode_table_set(struct super_block *sb,
387 struct ext4_group_desc *bg, ext4_fsblk_t blk)
388 {
389 bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
390 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
391 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
392 }
393
ext4_free_group_clusters_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)394 void ext4_free_group_clusters_set(struct super_block *sb,
395 struct ext4_group_desc *bg, __u32 count)
396 {
397 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
398 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
399 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
400 }
401
ext4_free_inodes_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)402 void ext4_free_inodes_set(struct super_block *sb,
403 struct ext4_group_desc *bg, __u32 count)
404 {
405 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
406 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
407 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
408 }
409
ext4_used_dirs_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)410 void ext4_used_dirs_set(struct super_block *sb,
411 struct ext4_group_desc *bg, __u32 count)
412 {
413 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
414 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
415 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
416 }
417
ext4_itable_unused_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)418 void ext4_itable_unused_set(struct super_block *sb,
419 struct ext4_group_desc *bg, __u32 count)
420 {
421 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
422 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
423 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
424 }
425
__ext4_update_tstamp(__le32 * lo,__u8 * hi,time64_t now)426 static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now)
427 {
428 now = clamp_val(now, 0, (1ull << 40) - 1);
429
430 *lo = cpu_to_le32(lower_32_bits(now));
431 *hi = upper_32_bits(now);
432 }
433
__ext4_get_tstamp(__le32 * lo,__u8 * hi)434 static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
435 {
436 return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
437 }
438 #define ext4_update_tstamp(es, tstamp) \
439 __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \
440 ktime_get_real_seconds())
441 #define ext4_get_tstamp(es, tstamp) \
442 __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
443
444 #define EXT4_SB_REFRESH_INTERVAL_SEC (3600) /* seconds (1 hour) */
445 #define EXT4_SB_REFRESH_INTERVAL_KB (16384) /* kilobytes (16MB) */
446
447 /*
448 * The ext4_maybe_update_superblock() function checks and updates the
449 * superblock if needed.
450 *
451 * This function is designed to update the on-disk superblock only under
452 * certain conditions to prevent excessive disk writes and unnecessary
453 * waking of the disk from sleep. The superblock will be updated if:
454 * 1. More than an hour has passed since the last superblock update, and
455 * 2. More than 16MB have been written since the last superblock update.
456 *
457 * @sb: The superblock
458 */
ext4_maybe_update_superblock(struct super_block * sb)459 static void ext4_maybe_update_superblock(struct super_block *sb)
460 {
461 struct ext4_sb_info *sbi = EXT4_SB(sb);
462 struct ext4_super_block *es = sbi->s_es;
463 journal_t *journal = sbi->s_journal;
464 time64_t now;
465 __u64 last_update;
466 __u64 lifetime_write_kbytes;
467 __u64 diff_size;
468
469 if (sb_rdonly(sb) || !(sb->s_flags & SB_ACTIVE) ||
470 !journal || (journal->j_flags & JBD2_UNMOUNT))
471 return;
472
473 now = ktime_get_real_seconds();
474 last_update = ext4_get_tstamp(es, s_wtime);
475
476 if (likely(now - last_update < EXT4_SB_REFRESH_INTERVAL_SEC))
477 return;
478
479 lifetime_write_kbytes = sbi->s_kbytes_written +
480 ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
481 sbi->s_sectors_written_start) >> 1);
482
483 /* Get the number of kilobytes not written to disk to account
484 * for statistics and compare with a multiple of 16 MB. This
485 * is used to determine when the next superblock commit should
486 * occur (i.e. not more often than once per 16MB if there was
487 * less written in an hour).
488 */
489 diff_size = lifetime_write_kbytes - le64_to_cpu(es->s_kbytes_written);
490
491 if (diff_size > EXT4_SB_REFRESH_INTERVAL_KB)
492 schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
493 }
494
ext4_journal_commit_callback(journal_t * journal,transaction_t * txn)495 static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
496 {
497 struct super_block *sb = journal->j_private;
498 struct ext4_sb_info *sbi = EXT4_SB(sb);
499 int error = is_journal_aborted(journal);
500 struct ext4_journal_cb_entry *jce;
501
502 BUG_ON(txn->t_state == T_FINISHED);
503
504 ext4_process_freed_data(sb, txn->t_tid);
505 ext4_maybe_update_superblock(sb);
506
507 spin_lock(&sbi->s_md_lock);
508 while (!list_empty(&txn->t_private_list)) {
509 jce = list_entry(txn->t_private_list.next,
510 struct ext4_journal_cb_entry, jce_list);
511 list_del_init(&jce->jce_list);
512 spin_unlock(&sbi->s_md_lock);
513 jce->jce_func(sb, jce, error);
514 spin_lock(&sbi->s_md_lock);
515 }
516 spin_unlock(&sbi->s_md_lock);
517 }
518
519 /*
520 * This writepage callback for write_cache_pages()
521 * takes care of a few cases after page cleaning.
522 *
523 * write_cache_pages() already checks for dirty pages
524 * and calls clear_page_dirty_for_io(), which we want,
525 * to write protect the pages.
526 *
527 * However, we may have to redirty a page (see below.)
528 */
ext4_journalled_writepage_callback(struct folio * folio,struct writeback_control * wbc,void * data)529 static int ext4_journalled_writepage_callback(struct folio *folio,
530 struct writeback_control *wbc,
531 void *data)
532 {
533 transaction_t *transaction = (transaction_t *) data;
534 struct buffer_head *bh, *head;
535 struct journal_head *jh;
536
537 bh = head = folio_buffers(folio);
538 do {
539 /*
540 * We have to redirty a page in these cases:
541 * 1) If buffer is dirty, it means the page was dirty because it
542 * contains a buffer that needs checkpointing. So the dirty bit
543 * needs to be preserved so that checkpointing writes the buffer
544 * properly.
545 * 2) If buffer is not part of the committing transaction
546 * (we may have just accidentally come across this buffer because
547 * inode range tracking is not exact) or if the currently running
548 * transaction already contains this buffer as well, dirty bit
549 * needs to be preserved so that the buffer gets writeprotected
550 * properly on running transaction's commit.
551 */
552 jh = bh2jh(bh);
553 if (buffer_dirty(bh) ||
554 (jh && (jh->b_transaction != transaction ||
555 jh->b_next_transaction))) {
556 folio_redirty_for_writepage(wbc, folio);
557 goto out;
558 }
559 } while ((bh = bh->b_this_page) != head);
560
561 out:
562 return AOP_WRITEPAGE_ACTIVATE;
563 }
564
ext4_journalled_submit_inode_data_buffers(struct jbd2_inode * jinode)565 static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
566 {
567 struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
568 struct writeback_control wbc = {
569 .sync_mode = WB_SYNC_ALL,
570 .nr_to_write = LONG_MAX,
571 .range_start = jinode->i_dirty_start,
572 .range_end = jinode->i_dirty_end,
573 };
574
575 return write_cache_pages(mapping, &wbc,
576 ext4_journalled_writepage_callback,
577 jinode->i_transaction);
578 }
579
ext4_journal_submit_inode_data_buffers(struct jbd2_inode * jinode)580 static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
581 {
582 int ret;
583
584 if (ext4_should_journal_data(jinode->i_vfs_inode))
585 ret = ext4_journalled_submit_inode_data_buffers(jinode);
586 else
587 ret = ext4_normal_submit_inode_data_buffers(jinode);
588 return ret;
589 }
590
ext4_journal_finish_inode_data_buffers(struct jbd2_inode * jinode)591 static int ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
592 {
593 int ret = 0;
594
595 if (!ext4_should_journal_data(jinode->i_vfs_inode))
596 ret = jbd2_journal_finish_inode_data_buffers(jinode);
597
598 return ret;
599 }
600
system_going_down(void)601 static bool system_going_down(void)
602 {
603 return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
604 || system_state == SYSTEM_RESTART;
605 }
606
607 struct ext4_err_translation {
608 int code;
609 int errno;
610 };
611
612 #define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
613
614 static struct ext4_err_translation err_translation[] = {
615 EXT4_ERR_TRANSLATE(EIO),
616 EXT4_ERR_TRANSLATE(ENOMEM),
617 EXT4_ERR_TRANSLATE(EFSBADCRC),
618 EXT4_ERR_TRANSLATE(EFSCORRUPTED),
619 EXT4_ERR_TRANSLATE(ENOSPC),
620 EXT4_ERR_TRANSLATE(ENOKEY),
621 EXT4_ERR_TRANSLATE(EROFS),
622 EXT4_ERR_TRANSLATE(EFBIG),
623 EXT4_ERR_TRANSLATE(EEXIST),
624 EXT4_ERR_TRANSLATE(ERANGE),
625 EXT4_ERR_TRANSLATE(EOVERFLOW),
626 EXT4_ERR_TRANSLATE(EBUSY),
627 EXT4_ERR_TRANSLATE(ENOTDIR),
628 EXT4_ERR_TRANSLATE(ENOTEMPTY),
629 EXT4_ERR_TRANSLATE(ESHUTDOWN),
630 EXT4_ERR_TRANSLATE(EFAULT),
631 };
632
ext4_errno_to_code(int errno)633 static int ext4_errno_to_code(int errno)
634 {
635 int i;
636
637 for (i = 0; i < ARRAY_SIZE(err_translation); i++)
638 if (err_translation[i].errno == errno)
639 return err_translation[i].code;
640 return EXT4_ERR_UNKNOWN;
641 }
642
save_error_info(struct super_block * sb,int error,__u32 ino,__u64 block,const char * func,unsigned int line)643 static void save_error_info(struct super_block *sb, int error,
644 __u32 ino, __u64 block,
645 const char *func, unsigned int line)
646 {
647 struct ext4_sb_info *sbi = EXT4_SB(sb);
648
649 /* We default to EFSCORRUPTED error... */
650 if (error == 0)
651 error = EFSCORRUPTED;
652
653 spin_lock(&sbi->s_error_lock);
654 sbi->s_add_error_count++;
655 sbi->s_last_error_code = error;
656 sbi->s_last_error_line = line;
657 sbi->s_last_error_ino = ino;
658 sbi->s_last_error_block = block;
659 sbi->s_last_error_func = func;
660 sbi->s_last_error_time = ktime_get_real_seconds();
661 if (!sbi->s_first_error_time) {
662 sbi->s_first_error_code = error;
663 sbi->s_first_error_line = line;
664 sbi->s_first_error_ino = ino;
665 sbi->s_first_error_block = block;
666 sbi->s_first_error_func = func;
667 sbi->s_first_error_time = sbi->s_last_error_time;
668 }
669 spin_unlock(&sbi->s_error_lock);
670 }
671
672 /* Deal with the reporting of failure conditions on a filesystem such as
673 * inconsistencies detected or read IO failures.
674 *
675 * On ext2, we can store the error state of the filesystem in the
676 * superblock. That is not possible on ext4, because we may have other
677 * write ordering constraints on the superblock which prevent us from
678 * writing it out straight away; and given that the journal is about to
679 * be aborted, we can't rely on the current, or future, transactions to
680 * write out the superblock safely.
681 *
682 * We'll just use the jbd2_journal_abort() error code to record an error in
683 * the journal instead. On recovery, the journal will complain about
684 * that error until we've noted it down and cleared it.
685 *
686 * If force_ro is set, we unconditionally force the filesystem into an
687 * ABORT|READONLY state, unless the error response on the fs has been set to
688 * panic in which case we take the easy way out and panic immediately. This is
689 * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
690 * at a critical moment in log management.
691 */
ext4_handle_error(struct super_block * sb,bool force_ro,int error,__u32 ino,__u64 block,const char * func,unsigned int line)692 static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
693 __u32 ino, __u64 block,
694 const char *func, unsigned int line)
695 {
696 journal_t *journal = EXT4_SB(sb)->s_journal;
697 bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT);
698
699 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
700 if (test_opt(sb, WARN_ON_ERROR))
701 WARN_ON_ONCE(1);
702
703 if (!continue_fs && !sb_rdonly(sb)) {
704 set_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags);
705 if (journal)
706 jbd2_journal_abort(journal, -EIO);
707 }
708
709 if (!bdev_read_only(sb->s_bdev)) {
710 save_error_info(sb, error, ino, block, func, line);
711 /*
712 * In case the fs should keep running, we need to writeout
713 * superblock through the journal. Due to lock ordering
714 * constraints, it may not be safe to do it right here so we
715 * defer superblock flushing to a workqueue.
716 */
717 if (continue_fs && journal)
718 schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
719 else
720 ext4_commit_super(sb);
721 }
722
723 /*
724 * We force ERRORS_RO behavior when system is rebooting. Otherwise we
725 * could panic during 'reboot -f' as the underlying device got already
726 * disabled.
727 */
728 if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
729 panic("EXT4-fs (device %s): panic forced after error\n",
730 sb->s_id);
731 }
732
733 if (sb_rdonly(sb) || continue_fs)
734 return;
735
736 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
737 /*
738 * EXT4_FLAGS_SHUTDOWN was set which stops all filesystem
739 * modifications. We don't set SB_RDONLY because that requires
740 * sb->s_umount semaphore and setting it without proper remount
741 * procedure is confusing code such as freeze_super() leading to
742 * deadlocks and other problems.
743 */
744 }
745
update_super_work(struct work_struct * work)746 static void update_super_work(struct work_struct *work)
747 {
748 struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
749 s_sb_upd_work);
750 journal_t *journal = sbi->s_journal;
751 handle_t *handle;
752
753 /*
754 * If the journal is still running, we have to write out superblock
755 * through the journal to avoid collisions of other journalled sb
756 * updates.
757 *
758 * We use directly jbd2 functions here to avoid recursing back into
759 * ext4 error handling code during handling of previous errors.
760 */
761 if (!sb_rdonly(sbi->s_sb) && journal) {
762 struct buffer_head *sbh = sbi->s_sbh;
763 bool call_notify_err = false;
764
765 handle = jbd2_journal_start(journal, 1);
766 if (IS_ERR(handle))
767 goto write_directly;
768 if (jbd2_journal_get_write_access(handle, sbh)) {
769 jbd2_journal_stop(handle);
770 goto write_directly;
771 }
772
773 if (sbi->s_add_error_count > 0)
774 call_notify_err = true;
775
776 ext4_update_super(sbi->s_sb);
777 if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
778 ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
779 "superblock detected");
780 clear_buffer_write_io_error(sbh);
781 set_buffer_uptodate(sbh);
782 }
783
784 if (jbd2_journal_dirty_metadata(handle, sbh)) {
785 jbd2_journal_stop(handle);
786 goto write_directly;
787 }
788 jbd2_journal_stop(handle);
789
790 if (call_notify_err)
791 ext4_notify_error_sysfs(sbi);
792
793 return;
794 }
795 write_directly:
796 /*
797 * Write through journal failed. Write sb directly to get error info
798 * out and hope for the best.
799 */
800 ext4_commit_super(sbi->s_sb);
801 ext4_notify_error_sysfs(sbi);
802 }
803
804 #define ext4_error_ratelimit(sb) \
805 ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \
806 "EXT4-fs error")
807
__ext4_error(struct super_block * sb,const char * function,unsigned int line,bool force_ro,int error,__u64 block,const char * fmt,...)808 void __ext4_error(struct super_block *sb, const char *function,
809 unsigned int line, bool force_ro, int error, __u64 block,
810 const char *fmt, ...)
811 {
812 struct va_format vaf;
813 va_list args;
814
815 if (unlikely(ext4_forced_shutdown(sb)))
816 return;
817
818 trace_ext4_error(sb, function, line);
819 if (ext4_error_ratelimit(sb)) {
820 va_start(args, fmt);
821 vaf.fmt = fmt;
822 vaf.va = &args;
823 printk(KERN_CRIT
824 "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
825 sb->s_id, function, line, current->comm, &vaf);
826 va_end(args);
827 }
828 fsnotify_sb_error(sb, NULL, error ? error : EFSCORRUPTED);
829
830 ext4_handle_error(sb, force_ro, error, 0, block, function, line);
831 }
832
__ext4_error_inode(struct inode * inode,const char * function,unsigned int line,ext4_fsblk_t block,int error,const char * fmt,...)833 void __ext4_error_inode(struct inode *inode, const char *function,
834 unsigned int line, ext4_fsblk_t block, int error,
835 const char *fmt, ...)
836 {
837 va_list args;
838 struct va_format vaf;
839
840 if (unlikely(ext4_forced_shutdown(inode->i_sb)))
841 return;
842
843 trace_ext4_error(inode->i_sb, function, line);
844 if (ext4_error_ratelimit(inode->i_sb)) {
845 va_start(args, fmt);
846 vaf.fmt = fmt;
847 vaf.va = &args;
848 if (block)
849 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
850 "inode #%lu: block %llu: comm %s: %pV\n",
851 inode->i_sb->s_id, function, line, inode->i_ino,
852 block, current->comm, &vaf);
853 else
854 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
855 "inode #%lu: comm %s: %pV\n",
856 inode->i_sb->s_id, function, line, inode->i_ino,
857 current->comm, &vaf);
858 va_end(args);
859 }
860 fsnotify_sb_error(inode->i_sb, inode, error ? error : EFSCORRUPTED);
861
862 ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
863 function, line);
864 }
865
__ext4_error_file(struct file * file,const char * function,unsigned int line,ext4_fsblk_t block,const char * fmt,...)866 void __ext4_error_file(struct file *file, const char *function,
867 unsigned int line, ext4_fsblk_t block,
868 const char *fmt, ...)
869 {
870 va_list args;
871 struct va_format vaf;
872 struct inode *inode = file_inode(file);
873 char pathname[80], *path;
874
875 if (unlikely(ext4_forced_shutdown(inode->i_sb)))
876 return;
877
878 trace_ext4_error(inode->i_sb, function, line);
879 if (ext4_error_ratelimit(inode->i_sb)) {
880 path = file_path(file, pathname, sizeof(pathname));
881 if (IS_ERR(path))
882 path = "(unknown)";
883 va_start(args, fmt);
884 vaf.fmt = fmt;
885 vaf.va = &args;
886 if (block)
887 printk(KERN_CRIT
888 "EXT4-fs error (device %s): %s:%d: inode #%lu: "
889 "block %llu: comm %s: path %s: %pV\n",
890 inode->i_sb->s_id, function, line, inode->i_ino,
891 block, current->comm, path, &vaf);
892 else
893 printk(KERN_CRIT
894 "EXT4-fs error (device %s): %s:%d: inode #%lu: "
895 "comm %s: path %s: %pV\n",
896 inode->i_sb->s_id, function, line, inode->i_ino,
897 current->comm, path, &vaf);
898 va_end(args);
899 }
900 fsnotify_sb_error(inode->i_sb, inode, EFSCORRUPTED);
901
902 ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
903 function, line);
904 }
905
ext4_decode_error(struct super_block * sb,int errno,char nbuf[16])906 const char *ext4_decode_error(struct super_block *sb, int errno,
907 char nbuf[16])
908 {
909 char *errstr = NULL;
910
911 switch (errno) {
912 case -EFSCORRUPTED:
913 errstr = "Corrupt filesystem";
914 break;
915 case -EFSBADCRC:
916 errstr = "Filesystem failed CRC";
917 break;
918 case -EIO:
919 errstr = "IO failure";
920 break;
921 case -ENOMEM:
922 errstr = "Out of memory";
923 break;
924 case -EROFS:
925 if (!sb || (EXT4_SB(sb)->s_journal &&
926 EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
927 errstr = "Journal has aborted";
928 else
929 errstr = "Readonly filesystem";
930 break;
931 default:
932 /* If the caller passed in an extra buffer for unknown
933 * errors, textualise them now. Else we just return
934 * NULL. */
935 if (nbuf) {
936 /* Check for truncated error codes... */
937 if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
938 errstr = nbuf;
939 }
940 break;
941 }
942
943 return errstr;
944 }
945
946 /* __ext4_std_error decodes expected errors from journaling functions
947 * automatically and invokes the appropriate error response. */
948
__ext4_std_error(struct super_block * sb,const char * function,unsigned int line,int errno)949 void __ext4_std_error(struct super_block *sb, const char *function,
950 unsigned int line, int errno)
951 {
952 char nbuf[16];
953 const char *errstr;
954
955 if (unlikely(ext4_forced_shutdown(sb)))
956 return;
957
958 /* Special case: if the error is EROFS, and we're not already
959 * inside a transaction, then there's really no point in logging
960 * an error. */
961 if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
962 return;
963
964 if (ext4_error_ratelimit(sb)) {
965 errstr = ext4_decode_error(sb, errno, nbuf);
966 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
967 sb->s_id, function, line, errstr);
968 }
969 fsnotify_sb_error(sb, NULL, errno ? errno : EFSCORRUPTED);
970
971 ext4_handle_error(sb, false, -errno, 0, 0, function, line);
972 }
973
__ext4_msg(struct super_block * sb,const char * prefix,const char * fmt,...)974 void __ext4_msg(struct super_block *sb,
975 const char *prefix, const char *fmt, ...)
976 {
977 struct va_format vaf;
978 va_list args;
979
980 if (sb) {
981 atomic_inc(&EXT4_SB(sb)->s_msg_count);
982 if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state),
983 "EXT4-fs"))
984 return;
985 }
986
987 va_start(args, fmt);
988 vaf.fmt = fmt;
989 vaf.va = &args;
990 if (sb)
991 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
992 else
993 printk("%sEXT4-fs: %pV\n", prefix, &vaf);
994 va_end(args);
995 }
996
ext4_warning_ratelimit(struct super_block * sb)997 static int ext4_warning_ratelimit(struct super_block *sb)
998 {
999 atomic_inc(&EXT4_SB(sb)->s_warning_count);
1000 return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
1001 "EXT4-fs warning");
1002 }
1003
__ext4_warning(struct super_block * sb,const char * function,unsigned int line,const char * fmt,...)1004 void __ext4_warning(struct super_block *sb, const char *function,
1005 unsigned int line, const char *fmt, ...)
1006 {
1007 struct va_format vaf;
1008 va_list args;
1009
1010 if (!ext4_warning_ratelimit(sb))
1011 return;
1012
1013 va_start(args, fmt);
1014 vaf.fmt = fmt;
1015 vaf.va = &args;
1016 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
1017 sb->s_id, function, line, &vaf);
1018 va_end(args);
1019 }
1020
__ext4_warning_inode(const struct inode * inode,const char * function,unsigned int line,const char * fmt,...)1021 void __ext4_warning_inode(const struct inode *inode, const char *function,
1022 unsigned int line, const char *fmt, ...)
1023 {
1024 struct va_format vaf;
1025 va_list args;
1026
1027 if (!ext4_warning_ratelimit(inode->i_sb))
1028 return;
1029
1030 va_start(args, fmt);
1031 vaf.fmt = fmt;
1032 vaf.va = &args;
1033 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
1034 "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
1035 function, line, inode->i_ino, current->comm, &vaf);
1036 va_end(args);
1037 }
1038
__ext4_grp_locked_error(const char * function,unsigned int line,struct super_block * sb,ext4_group_t grp,unsigned long ino,ext4_fsblk_t block,const char * fmt,...)1039 void __ext4_grp_locked_error(const char *function, unsigned int line,
1040 struct super_block *sb, ext4_group_t grp,
1041 unsigned long ino, ext4_fsblk_t block,
1042 const char *fmt, ...)
1043 __releases(bitlock)
1044 __acquires(bitlock)
1045 {
1046 struct va_format vaf;
1047 va_list args;
1048
1049 if (unlikely(ext4_forced_shutdown(sb)))
1050 return;
1051
1052 trace_ext4_error(sb, function, line);
1053 if (ext4_error_ratelimit(sb)) {
1054 va_start(args, fmt);
1055 vaf.fmt = fmt;
1056 vaf.va = &args;
1057 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
1058 sb->s_id, function, line, grp);
1059 if (ino)
1060 printk(KERN_CONT "inode %lu: ", ino);
1061 if (block)
1062 printk(KERN_CONT "block %llu:",
1063 (unsigned long long) block);
1064 printk(KERN_CONT "%pV\n", &vaf);
1065 va_end(args);
1066 }
1067
1068 if (test_opt(sb, ERRORS_CONT)) {
1069 if (test_opt(sb, WARN_ON_ERROR))
1070 WARN_ON_ONCE(1);
1071 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
1072 if (!bdev_read_only(sb->s_bdev)) {
1073 save_error_info(sb, EFSCORRUPTED, ino, block, function,
1074 line);
1075 schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
1076 }
1077 return;
1078 }
1079 ext4_unlock_group(sb, grp);
1080 ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line);
1081 /*
1082 * We only get here in the ERRORS_RO case; relocking the group
1083 * may be dangerous, but nothing bad will happen since the
1084 * filesystem will have already been marked read/only and the
1085 * journal has been aborted. We return 1 as a hint to callers
1086 * who might what to use the return value from
1087 * ext4_grp_locked_error() to distinguish between the
1088 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
1089 * aggressively from the ext4 function in question, with a
1090 * more appropriate error code.
1091 */
1092 ext4_lock_group(sb, grp);
1093 return;
1094 }
1095
ext4_mark_group_bitmap_corrupted(struct super_block * sb,ext4_group_t group,unsigned int flags)1096 void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
1097 ext4_group_t group,
1098 unsigned int flags)
1099 {
1100 struct ext4_sb_info *sbi = EXT4_SB(sb);
1101 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1102 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
1103 int ret;
1104
1105 if (!grp || !gdp)
1106 return;
1107 if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
1108 ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1109 &grp->bb_state);
1110 if (!ret)
1111 percpu_counter_sub(&sbi->s_freeclusters_counter,
1112 grp->bb_free);
1113 }
1114
1115 if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
1116 ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
1117 &grp->bb_state);
1118 if (!ret && gdp) {
1119 int count;
1120
1121 count = ext4_free_inodes_count(sb, gdp);
1122 percpu_counter_sub(&sbi->s_freeinodes_counter,
1123 count);
1124 }
1125 }
1126 }
1127
ext4_update_dynamic_rev(struct super_block * sb)1128 void ext4_update_dynamic_rev(struct super_block *sb)
1129 {
1130 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
1131
1132 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
1133 return;
1134
1135 ext4_warning(sb,
1136 "updating to rev %d because of new feature flag, "
1137 "running e2fsck is recommended",
1138 EXT4_DYNAMIC_REV);
1139
1140 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
1141 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
1142 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
1143 /* leave es->s_feature_*compat flags alone */
1144 /* es->s_uuid will be set by e2fsck if empty */
1145
1146 /*
1147 * The rest of the superblock fields should be zero, and if not it
1148 * means they are likely already in use, so leave them alone. We
1149 * can leave it up to e2fsck to clean up any inconsistencies there.
1150 */
1151 }
1152
orphan_list_entry(struct list_head * l)1153 static inline struct inode *orphan_list_entry(struct list_head *l)
1154 {
1155 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
1156 }
1157
dump_orphan_list(struct super_block * sb,struct ext4_sb_info * sbi)1158 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
1159 {
1160 struct list_head *l;
1161
1162 ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
1163 le32_to_cpu(sbi->s_es->s_last_orphan));
1164
1165 printk(KERN_ERR "sb_info orphan list:\n");
1166 list_for_each(l, &sbi->s_orphan) {
1167 struct inode *inode = orphan_list_entry(l);
1168 printk(KERN_ERR " "
1169 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
1170 inode->i_sb->s_id, inode->i_ino, inode,
1171 inode->i_mode, inode->i_nlink,
1172 NEXT_ORPHAN(inode));
1173 }
1174 }
1175
1176 #ifdef CONFIG_QUOTA
1177 static int ext4_quota_off(struct super_block *sb, int type);
1178
ext4_quotas_off(struct super_block * sb,int type)1179 static inline void ext4_quotas_off(struct super_block *sb, int type)
1180 {
1181 BUG_ON(type > EXT4_MAXQUOTAS);
1182
1183 /* Use our quota_off function to clear inode flags etc. */
1184 for (type--; type >= 0; type--)
1185 ext4_quota_off(sb, type);
1186 }
1187
1188 /*
1189 * This is a helper function which is used in the mount/remount
1190 * codepaths (which holds s_umount) to fetch the quota file name.
1191 */
get_qf_name(struct super_block * sb,struct ext4_sb_info * sbi,int type)1192 static inline char *get_qf_name(struct super_block *sb,
1193 struct ext4_sb_info *sbi,
1194 int type)
1195 {
1196 return rcu_dereference_protected(sbi->s_qf_names[type],
1197 lockdep_is_held(&sb->s_umount));
1198 }
1199 #else
ext4_quotas_off(struct super_block * sb,int type)1200 static inline void ext4_quotas_off(struct super_block *sb, int type)
1201 {
1202 }
1203 #endif
1204
ext4_percpu_param_init(struct ext4_sb_info * sbi)1205 static int ext4_percpu_param_init(struct ext4_sb_info *sbi)
1206 {
1207 ext4_fsblk_t block;
1208 int err;
1209
1210 block = ext4_count_free_clusters(sbi->s_sb);
1211 ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block));
1212 err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
1213 GFP_KERNEL);
1214 if (!err) {
1215 unsigned long freei = ext4_count_free_inodes(sbi->s_sb);
1216 sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
1217 err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
1218 GFP_KERNEL);
1219 }
1220 if (!err)
1221 err = percpu_counter_init(&sbi->s_dirs_counter,
1222 ext4_count_dirs(sbi->s_sb), GFP_KERNEL);
1223 if (!err)
1224 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
1225 GFP_KERNEL);
1226 if (!err)
1227 err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
1228 GFP_KERNEL);
1229 if (!err)
1230 err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
1231
1232 if (err)
1233 ext4_msg(sbi->s_sb, KERN_ERR, "insufficient memory");
1234
1235 return err;
1236 }
1237
ext4_percpu_param_destroy(struct ext4_sb_info * sbi)1238 static void ext4_percpu_param_destroy(struct ext4_sb_info *sbi)
1239 {
1240 percpu_counter_destroy(&sbi->s_freeclusters_counter);
1241 percpu_counter_destroy(&sbi->s_freeinodes_counter);
1242 percpu_counter_destroy(&sbi->s_dirs_counter);
1243 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
1244 percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
1245 percpu_free_rwsem(&sbi->s_writepages_rwsem);
1246 }
1247
ext4_group_desc_free(struct ext4_sb_info * sbi)1248 static void ext4_group_desc_free(struct ext4_sb_info *sbi)
1249 {
1250 struct buffer_head **group_desc;
1251 int i;
1252
1253 rcu_read_lock();
1254 group_desc = rcu_dereference(sbi->s_group_desc);
1255 for (i = 0; i < sbi->s_gdb_count; i++)
1256 brelse(group_desc[i]);
1257 kvfree(group_desc);
1258 rcu_read_unlock();
1259 }
1260
ext4_flex_groups_free(struct ext4_sb_info * sbi)1261 static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
1262 {
1263 struct flex_groups **flex_groups;
1264 int i;
1265
1266 rcu_read_lock();
1267 flex_groups = rcu_dereference(sbi->s_flex_groups);
1268 if (flex_groups) {
1269 for (i = 0; i < sbi->s_flex_groups_allocated; i++)
1270 kvfree(flex_groups[i]);
1271 kvfree(flex_groups);
1272 }
1273 rcu_read_unlock();
1274 }
1275
ext4_put_super(struct super_block * sb)1276 static void ext4_put_super(struct super_block *sb)
1277 {
1278 struct ext4_sb_info *sbi = EXT4_SB(sb);
1279 struct ext4_super_block *es = sbi->s_es;
1280 int aborted = 0;
1281 int err;
1282
1283 /*
1284 * Unregister sysfs before destroying jbd2 journal.
1285 * Since we could still access attr_journal_task attribute via sysfs
1286 * path which could have sbi->s_journal->j_task as NULL
1287 * Unregister sysfs before flush sbi->s_sb_upd_work.
1288 * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
1289 * read metadata verify failed then will queue error work.
1290 * update_super_work will call start_this_handle may trigger
1291 * BUG_ON.
1292 */
1293 ext4_unregister_sysfs(sb);
1294
1295 if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs unmount"))
1296 ext4_msg(sb, KERN_INFO, "unmounting filesystem %pU.",
1297 &sb->s_uuid);
1298
1299 ext4_unregister_li_request(sb);
1300 ext4_quotas_off(sb, EXT4_MAXQUOTAS);
1301
1302 flush_work(&sbi->s_sb_upd_work);
1303 destroy_workqueue(sbi->rsv_conversion_wq);
1304 ext4_release_orphan_info(sb);
1305
1306 if (sbi->s_journal) {
1307 aborted = is_journal_aborted(sbi->s_journal);
1308 err = jbd2_journal_destroy(sbi->s_journal);
1309 sbi->s_journal = NULL;
1310 if ((err < 0) && !aborted) {
1311 ext4_abort(sb, -err, "Couldn't clean up the journal");
1312 }
1313 }
1314
1315 ext4_es_unregister_shrinker(sbi);
1316 timer_shutdown_sync(&sbi->s_err_report);
1317 ext4_release_system_zone(sb);
1318 ext4_mb_release(sb);
1319 ext4_ext_release(sb);
1320
1321 if (!sb_rdonly(sb) && !aborted) {
1322 ext4_clear_feature_journal_needs_recovery(sb);
1323 ext4_clear_feature_orphan_present(sb);
1324 es->s_state = cpu_to_le16(sbi->s_mount_state);
1325 }
1326 if (!sb_rdonly(sb))
1327 ext4_commit_super(sb);
1328
1329 ext4_group_desc_free(sbi);
1330 ext4_flex_groups_free(sbi);
1331
1332 WARN_ON_ONCE(!(sbi->s_mount_state & EXT4_ERROR_FS) &&
1333 percpu_counter_sum(&sbi->s_dirtyclusters_counter));
1334 ext4_percpu_param_destroy(sbi);
1335 #ifdef CONFIG_QUOTA
1336 for (int i = 0; i < EXT4_MAXQUOTAS; i++)
1337 kfree(get_qf_name(sb, sbi, i));
1338 #endif
1339
1340 /* Debugging code just in case the in-memory inode orphan list
1341 * isn't empty. The on-disk one can be non-empty if we've
1342 * detected an error and taken the fs readonly, but the
1343 * in-memory list had better be clean by this point. */
1344 if (!list_empty(&sbi->s_orphan))
1345 dump_orphan_list(sb, sbi);
1346 ASSERT(list_empty(&sbi->s_orphan));
1347
1348 sync_blockdev(sb->s_bdev);
1349 invalidate_bdev(sb->s_bdev);
1350 if (sbi->s_journal_bdev_file) {
1351 /*
1352 * Invalidate the journal device's buffers. We don't want them
1353 * floating about in memory - the physical journal device may
1354 * hotswapped, and it breaks the `ro-after' testing code.
1355 */
1356 sync_blockdev(file_bdev(sbi->s_journal_bdev_file));
1357 invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
1358 }
1359
1360 ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
1361 sbi->s_ea_inode_cache = NULL;
1362
1363 ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
1364 sbi->s_ea_block_cache = NULL;
1365
1366 ext4_stop_mmpd(sbi);
1367
1368 brelse(sbi->s_sbh);
1369 sb->s_fs_info = NULL;
1370 /*
1371 * Now that we are completely done shutting down the
1372 * superblock, we need to actually destroy the kobject.
1373 */
1374 kobject_put(&sbi->s_kobj);
1375 wait_for_completion(&sbi->s_kobj_unregister);
1376 if (sbi->s_chksum_driver)
1377 crypto_free_shash(sbi->s_chksum_driver);
1378 kfree(sbi->s_blockgroup_lock);
1379 fs_put_dax(sbi->s_daxdev, NULL);
1380 fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
1381 #if IS_ENABLED(CONFIG_UNICODE)
1382 utf8_unload(sb->s_encoding);
1383 #endif
1384 kfree(sbi);
1385 }
1386
1387 static struct kmem_cache *ext4_inode_cachep;
1388
1389 /*
1390 * Called inside transaction, so use GFP_NOFS
1391 */
ext4_alloc_inode(struct super_block * sb)1392 static struct inode *ext4_alloc_inode(struct super_block *sb)
1393 {
1394 struct ext4_inode_info *ei;
1395
1396 ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS);
1397 if (!ei)
1398 return NULL;
1399
1400 inode_set_iversion(&ei->vfs_inode, 1);
1401 ei->i_flags = 0;
1402 spin_lock_init(&ei->i_raw_lock);
1403 ei->i_prealloc_node = RB_ROOT;
1404 atomic_set(&ei->i_prealloc_active, 0);
1405 rwlock_init(&ei->i_prealloc_lock);
1406 ext4_es_init_tree(&ei->i_es_tree);
1407 rwlock_init(&ei->i_es_lock);
1408 INIT_LIST_HEAD(&ei->i_es_list);
1409 ei->i_es_all_nr = 0;
1410 ei->i_es_shk_nr = 0;
1411 ei->i_es_shrink_lblk = 0;
1412 ei->i_reserved_data_blocks = 0;
1413 spin_lock_init(&(ei->i_block_reservation_lock));
1414 ext4_init_pending_tree(&ei->i_pending_tree);
1415 #ifdef CONFIG_QUOTA
1416 ei->i_reserved_quota = 0;
1417 memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
1418 #endif
1419 ei->jinode = NULL;
1420 INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
1421 spin_lock_init(&ei->i_completed_io_lock);
1422 ei->i_sync_tid = 0;
1423 ei->i_datasync_tid = 0;
1424 atomic_set(&ei->i_unwritten, 0);
1425 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
1426 ext4_fc_init_inode(&ei->vfs_inode);
1427 mutex_init(&ei->i_fc_lock);
1428 return &ei->vfs_inode;
1429 }
1430
ext4_drop_inode(struct inode * inode)1431 static int ext4_drop_inode(struct inode *inode)
1432 {
1433 int drop = generic_drop_inode(inode);
1434
1435 if (!drop)
1436 drop = fscrypt_drop_inode(inode);
1437
1438 trace_ext4_drop_inode(inode, drop);
1439 return drop;
1440 }
1441
ext4_free_in_core_inode(struct inode * inode)1442 static void ext4_free_in_core_inode(struct inode *inode)
1443 {
1444 fscrypt_free_inode(inode);
1445 if (!list_empty(&(EXT4_I(inode)->i_fc_list))) {
1446 pr_warn("%s: inode %ld still in fc list",
1447 __func__, inode->i_ino);
1448 }
1449 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
1450 }
1451
ext4_destroy_inode(struct inode * inode)1452 static void ext4_destroy_inode(struct inode *inode)
1453 {
1454 if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
1455 ext4_msg(inode->i_sb, KERN_ERR,
1456 "Inode %lu (%p): orphan list check failed!",
1457 inode->i_ino, EXT4_I(inode));
1458 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
1459 EXT4_I(inode), sizeof(struct ext4_inode_info),
1460 true);
1461 dump_stack();
1462 }
1463
1464 if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ERROR_FS) &&
1465 WARN_ON_ONCE(EXT4_I(inode)->i_reserved_data_blocks))
1466 ext4_msg(inode->i_sb, KERN_ERR,
1467 "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
1468 inode->i_ino, EXT4_I(inode),
1469 EXT4_I(inode)->i_reserved_data_blocks);
1470 }
1471
ext4_shutdown(struct super_block * sb)1472 static void ext4_shutdown(struct super_block *sb)
1473 {
1474 ext4_force_shutdown(sb, EXT4_GOING_FLAGS_NOLOGFLUSH);
1475 }
1476
init_once(void * foo)1477 static void init_once(void *foo)
1478 {
1479 struct ext4_inode_info *ei = foo;
1480
1481 INIT_LIST_HEAD(&ei->i_orphan);
1482 init_rwsem(&ei->xattr_sem);
1483 init_rwsem(&ei->i_data_sem);
1484 inode_init_once(&ei->vfs_inode);
1485 ext4_fc_init_inode(&ei->vfs_inode);
1486 }
1487
init_inodecache(void)1488 static int __init init_inodecache(void)
1489 {
1490 ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
1491 sizeof(struct ext4_inode_info), 0,
1492 SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
1493 offsetof(struct ext4_inode_info, i_data),
1494 sizeof_field(struct ext4_inode_info, i_data),
1495 init_once);
1496 if (ext4_inode_cachep == NULL)
1497 return -ENOMEM;
1498 return 0;
1499 }
1500
destroy_inodecache(void)1501 static void destroy_inodecache(void)
1502 {
1503 /*
1504 * Make sure all delayed rcu free inodes are flushed before we
1505 * destroy cache.
1506 */
1507 rcu_barrier();
1508 kmem_cache_destroy(ext4_inode_cachep);
1509 }
1510
ext4_clear_inode(struct inode * inode)1511 void ext4_clear_inode(struct inode *inode)
1512 {
1513 ext4_fc_del(inode);
1514 invalidate_inode_buffers(inode);
1515 clear_inode(inode);
1516 ext4_discard_preallocations(inode);
1517 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
1518 dquot_drop(inode);
1519 if (EXT4_I(inode)->jinode) {
1520 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1521 EXT4_I(inode)->jinode);
1522 jbd2_free_inode(EXT4_I(inode)->jinode);
1523 EXT4_I(inode)->jinode = NULL;
1524 }
1525 fscrypt_put_encryption_info(inode);
1526 fsverity_cleanup_inode(inode);
1527 }
1528
ext4_nfs_get_inode(struct super_block * sb,u64 ino,u32 generation)1529 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1530 u64 ino, u32 generation)
1531 {
1532 struct inode *inode;
1533
1534 /*
1535 * Currently we don't know the generation for parent directory, so
1536 * a generation of 0 means "accept any"
1537 */
1538 inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
1539 if (IS_ERR(inode))
1540 return ERR_CAST(inode);
1541 if (generation && inode->i_generation != generation) {
1542 iput(inode);
1543 return ERR_PTR(-ESTALE);
1544 }
1545
1546 return inode;
1547 }
1548
ext4_fh_to_dentry(struct super_block * sb,struct fid * fid,int fh_len,int fh_type)1549 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1550 int fh_len, int fh_type)
1551 {
1552 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1553 ext4_nfs_get_inode);
1554 }
1555
ext4_fh_to_parent(struct super_block * sb,struct fid * fid,int fh_len,int fh_type)1556 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1557 int fh_len, int fh_type)
1558 {
1559 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1560 ext4_nfs_get_inode);
1561 }
1562
ext4_nfs_commit_metadata(struct inode * inode)1563 static int ext4_nfs_commit_metadata(struct inode *inode)
1564 {
1565 struct writeback_control wbc = {
1566 .sync_mode = WB_SYNC_ALL
1567 };
1568
1569 trace_ext4_nfs_commit_metadata(inode);
1570 return ext4_write_inode(inode, &wbc);
1571 }
1572
1573 #ifdef CONFIG_QUOTA
1574 static const char * const quotatypes[] = INITQFNAMES;
1575 #define QTYPE2NAME(t) (quotatypes[t])
1576
1577 static int ext4_write_dquot(struct dquot *dquot);
1578 static int ext4_acquire_dquot(struct dquot *dquot);
1579 static int ext4_release_dquot(struct dquot *dquot);
1580 static int ext4_mark_dquot_dirty(struct dquot *dquot);
1581 static int ext4_write_info(struct super_block *sb, int type);
1582 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1583 const struct path *path);
1584 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1585 size_t len, loff_t off);
1586 static ssize_t ext4_quota_write(struct super_block *sb, int type,
1587 const char *data, size_t len, loff_t off);
1588 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1589 unsigned int flags);
1590
ext4_get_dquots(struct inode * inode)1591 static struct dquot __rcu **ext4_get_dquots(struct inode *inode)
1592 {
1593 return EXT4_I(inode)->i_dquot;
1594 }
1595
1596 static const struct dquot_operations ext4_quota_operations = {
1597 .get_reserved_space = ext4_get_reserved_space,
1598 .write_dquot = ext4_write_dquot,
1599 .acquire_dquot = ext4_acquire_dquot,
1600 .release_dquot = ext4_release_dquot,
1601 .mark_dirty = ext4_mark_dquot_dirty,
1602 .write_info = ext4_write_info,
1603 .alloc_dquot = dquot_alloc,
1604 .destroy_dquot = dquot_destroy,
1605 .get_projid = ext4_get_projid,
1606 .get_inode_usage = ext4_get_inode_usage,
1607 .get_next_id = dquot_get_next_id,
1608 };
1609
1610 static const struct quotactl_ops ext4_qctl_operations = {
1611 .quota_on = ext4_quota_on,
1612 .quota_off = ext4_quota_off,
1613 .quota_sync = dquot_quota_sync,
1614 .get_state = dquot_get_state,
1615 .set_info = dquot_set_dqinfo,
1616 .get_dqblk = dquot_get_dqblk,
1617 .set_dqblk = dquot_set_dqblk,
1618 .get_nextdqblk = dquot_get_next_dqblk,
1619 };
1620 #endif
1621
1622 static const struct super_operations ext4_sops = {
1623 .alloc_inode = ext4_alloc_inode,
1624 .free_inode = ext4_free_in_core_inode,
1625 .destroy_inode = ext4_destroy_inode,
1626 .write_inode = ext4_write_inode,
1627 .dirty_inode = ext4_dirty_inode,
1628 .drop_inode = ext4_drop_inode,
1629 .evict_inode = ext4_evict_inode,
1630 .put_super = ext4_put_super,
1631 .sync_fs = ext4_sync_fs,
1632 .freeze_fs = ext4_freeze,
1633 .unfreeze_fs = ext4_unfreeze,
1634 .statfs = ext4_statfs,
1635 .show_options = ext4_show_options,
1636 .shutdown = ext4_shutdown,
1637 #ifdef CONFIG_QUOTA
1638 .quota_read = ext4_quota_read,
1639 .quota_write = ext4_quota_write,
1640 .get_dquots = ext4_get_dquots,
1641 #endif
1642 };
1643
1644 static const struct export_operations ext4_export_ops = {
1645 .encode_fh = generic_encode_ino32_fh,
1646 .fh_to_dentry = ext4_fh_to_dentry,
1647 .fh_to_parent = ext4_fh_to_parent,
1648 .get_parent = ext4_get_parent,
1649 .commit_metadata = ext4_nfs_commit_metadata,
1650 };
1651
1652 enum {
1653 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1654 Opt_resgid, Opt_resuid, Opt_sb,
1655 Opt_nouid32, Opt_debug, Opt_removed,
1656 Opt_user_xattr, Opt_acl,
1657 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1658 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1659 Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1660 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1661 Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
1662 Opt_inlinecrypt,
1663 Opt_usrjquota, Opt_grpjquota, Opt_quota,
1664 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1665 Opt_usrquota, Opt_grpquota, Opt_prjquota,
1666 Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
1667 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
1668 Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize,
1669 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1670 Opt_inode_readahead_blks, Opt_journal_ioprio,
1671 Opt_dioread_nolock, Opt_dioread_lock,
1672 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1673 Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
1674 Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
1675 Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type,
1676 #ifdef CONFIG_EXT4_DEBUG
1677 Opt_fc_debug_max_replay, Opt_fc_debug_force
1678 #endif
1679 };
1680
1681 static const struct constant_table ext4_param_errors[] = {
1682 {"continue", EXT4_MOUNT_ERRORS_CONT},
1683 {"panic", EXT4_MOUNT_ERRORS_PANIC},
1684 {"remount-ro", EXT4_MOUNT_ERRORS_RO},
1685 {}
1686 };
1687
1688 static const struct constant_table ext4_param_data[] = {
1689 {"journal", EXT4_MOUNT_JOURNAL_DATA},
1690 {"ordered", EXT4_MOUNT_ORDERED_DATA},
1691 {"writeback", EXT4_MOUNT_WRITEBACK_DATA},
1692 {}
1693 };
1694
1695 static const struct constant_table ext4_param_data_err[] = {
1696 {"abort", Opt_data_err_abort},
1697 {"ignore", Opt_data_err_ignore},
1698 {}
1699 };
1700
1701 static const struct constant_table ext4_param_jqfmt[] = {
1702 {"vfsold", QFMT_VFS_OLD},
1703 {"vfsv0", QFMT_VFS_V0},
1704 {"vfsv1", QFMT_VFS_V1},
1705 {}
1706 };
1707
1708 static const struct constant_table ext4_param_dax[] = {
1709 {"always", Opt_dax_always},
1710 {"inode", Opt_dax_inode},
1711 {"never", Opt_dax_never},
1712 {}
1713 };
1714
1715 /*
1716 * Mount option specification
1717 * We don't use fsparam_flag_no because of the way we set the
1718 * options and the way we show them in _ext4_show_options(). To
1719 * keep the changes to a minimum, let's keep the negative options
1720 * separate for now.
1721 */
1722 static const struct fs_parameter_spec ext4_param_specs[] = {
1723 fsparam_flag ("bsddf", Opt_bsd_df),
1724 fsparam_flag ("minixdf", Opt_minix_df),
1725 fsparam_flag ("grpid", Opt_grpid),
1726 fsparam_flag ("bsdgroups", Opt_grpid),
1727 fsparam_flag ("nogrpid", Opt_nogrpid),
1728 fsparam_flag ("sysvgroups", Opt_nogrpid),
1729 fsparam_gid ("resgid", Opt_resgid),
1730 fsparam_uid ("resuid", Opt_resuid),
1731 fsparam_u32 ("sb", Opt_sb),
1732 fsparam_enum ("errors", Opt_errors, ext4_param_errors),
1733 fsparam_flag ("nouid32", Opt_nouid32),
1734 fsparam_flag ("debug", Opt_debug),
1735 fsparam_flag ("oldalloc", Opt_removed),
1736 fsparam_flag ("orlov", Opt_removed),
1737 fsparam_flag ("user_xattr", Opt_user_xattr),
1738 fsparam_flag ("acl", Opt_acl),
1739 fsparam_flag ("norecovery", Opt_noload),
1740 fsparam_flag ("noload", Opt_noload),
1741 fsparam_flag ("bh", Opt_removed),
1742 fsparam_flag ("nobh", Opt_removed),
1743 fsparam_u32 ("commit", Opt_commit),
1744 fsparam_u32 ("min_batch_time", Opt_min_batch_time),
1745 fsparam_u32 ("max_batch_time", Opt_max_batch_time),
1746 fsparam_u32 ("journal_dev", Opt_journal_dev),
1747 fsparam_bdev ("journal_path", Opt_journal_path),
1748 fsparam_flag ("journal_checksum", Opt_journal_checksum),
1749 fsparam_flag ("nojournal_checksum", Opt_nojournal_checksum),
1750 fsparam_flag ("journal_async_commit",Opt_journal_async_commit),
1751 fsparam_flag ("abort", Opt_abort),
1752 fsparam_enum ("data", Opt_data, ext4_param_data),
1753 fsparam_enum ("data_err", Opt_data_err,
1754 ext4_param_data_err),
1755 fsparam_string_empty
1756 ("usrjquota", Opt_usrjquota),
1757 fsparam_string_empty
1758 ("grpjquota", Opt_grpjquota),
1759 fsparam_enum ("jqfmt", Opt_jqfmt, ext4_param_jqfmt),
1760 fsparam_flag ("grpquota", Opt_grpquota),
1761 fsparam_flag ("quota", Opt_quota),
1762 fsparam_flag ("noquota", Opt_noquota),
1763 fsparam_flag ("usrquota", Opt_usrquota),
1764 fsparam_flag ("prjquota", Opt_prjquota),
1765 fsparam_flag ("barrier", Opt_barrier),
1766 fsparam_u32 ("barrier", Opt_barrier),
1767 fsparam_flag ("nobarrier", Opt_nobarrier),
1768 fsparam_flag ("i_version", Opt_removed),
1769 fsparam_flag ("dax", Opt_dax),
1770 fsparam_enum ("dax", Opt_dax_type, ext4_param_dax),
1771 fsparam_u32 ("stripe", Opt_stripe),
1772 fsparam_flag ("delalloc", Opt_delalloc),
1773 fsparam_flag ("nodelalloc", Opt_nodelalloc),
1774 fsparam_flag ("warn_on_error", Opt_warn_on_error),
1775 fsparam_flag ("nowarn_on_error", Opt_nowarn_on_error),
1776 fsparam_u32 ("debug_want_extra_isize",
1777 Opt_debug_want_extra_isize),
1778 fsparam_flag ("mblk_io_submit", Opt_removed),
1779 fsparam_flag ("nomblk_io_submit", Opt_removed),
1780 fsparam_flag ("block_validity", Opt_block_validity),
1781 fsparam_flag ("noblock_validity", Opt_noblock_validity),
1782 fsparam_u32 ("inode_readahead_blks",
1783 Opt_inode_readahead_blks),
1784 fsparam_u32 ("journal_ioprio", Opt_journal_ioprio),
1785 fsparam_u32 ("auto_da_alloc", Opt_auto_da_alloc),
1786 fsparam_flag ("auto_da_alloc", Opt_auto_da_alloc),
1787 fsparam_flag ("noauto_da_alloc", Opt_noauto_da_alloc),
1788 fsparam_flag ("dioread_nolock", Opt_dioread_nolock),
1789 fsparam_flag ("nodioread_nolock", Opt_dioread_lock),
1790 fsparam_flag ("dioread_lock", Opt_dioread_lock),
1791 fsparam_flag ("discard", Opt_discard),
1792 fsparam_flag ("nodiscard", Opt_nodiscard),
1793 fsparam_u32 ("init_itable", Opt_init_itable),
1794 fsparam_flag ("init_itable", Opt_init_itable),
1795 fsparam_flag ("noinit_itable", Opt_noinit_itable),
1796 #ifdef CONFIG_EXT4_DEBUG
1797 fsparam_flag ("fc_debug_force", Opt_fc_debug_force),
1798 fsparam_u32 ("fc_debug_max_replay", Opt_fc_debug_max_replay),
1799 #endif
1800 fsparam_u32 ("max_dir_size_kb", Opt_max_dir_size_kb),
1801 fsparam_flag ("test_dummy_encryption",
1802 Opt_test_dummy_encryption),
1803 fsparam_string ("test_dummy_encryption",
1804 Opt_test_dummy_encryption),
1805 fsparam_flag ("inlinecrypt", Opt_inlinecrypt),
1806 fsparam_flag ("nombcache", Opt_nombcache),
1807 fsparam_flag ("no_mbcache", Opt_nombcache), /* for backward compatibility */
1808 fsparam_flag ("prefetch_block_bitmaps",
1809 Opt_removed),
1810 fsparam_flag ("no_prefetch_block_bitmaps",
1811 Opt_no_prefetch_block_bitmaps),
1812 fsparam_s32 ("mb_optimize_scan", Opt_mb_optimize_scan),
1813 fsparam_string ("check", Opt_removed), /* mount option from ext2/3 */
1814 fsparam_flag ("nocheck", Opt_removed), /* mount option from ext2/3 */
1815 fsparam_flag ("reservation", Opt_removed), /* mount option from ext2/3 */
1816 fsparam_flag ("noreservation", Opt_removed), /* mount option from ext2/3 */
1817 fsparam_u32 ("journal", Opt_removed), /* mount option from ext2/3 */
1818 {}
1819 };
1820
1821 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1822
1823 #define MOPT_SET 0x0001
1824 #define MOPT_CLEAR 0x0002
1825 #define MOPT_NOSUPPORT 0x0004
1826 #define MOPT_EXPLICIT 0x0008
1827 #ifdef CONFIG_QUOTA
1828 #define MOPT_Q 0
1829 #define MOPT_QFMT 0x0010
1830 #else
1831 #define MOPT_Q MOPT_NOSUPPORT
1832 #define MOPT_QFMT MOPT_NOSUPPORT
1833 #endif
1834 #define MOPT_NO_EXT2 0x0020
1835 #define MOPT_NO_EXT3 0x0040
1836 #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1837 #define MOPT_SKIP 0x0080
1838 #define MOPT_2 0x0100
1839
1840 static const struct mount_opts {
1841 int token;
1842 int mount_opt;
1843 int flags;
1844 } ext4_mount_opts[] = {
1845 {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1846 {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1847 {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1848 {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1849 {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1850 {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1851 {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1852 MOPT_EXT4_ONLY | MOPT_SET},
1853 {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1854 MOPT_EXT4_ONLY | MOPT_CLEAR},
1855 {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1856 {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1857 {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1858 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1859 {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1860 MOPT_EXT4_ONLY | MOPT_CLEAR},
1861 {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
1862 {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
1863 {Opt_commit, 0, MOPT_NO_EXT2},
1864 {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1865 MOPT_EXT4_ONLY | MOPT_CLEAR},
1866 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1867 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1868 {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1869 EXT4_MOUNT_JOURNAL_CHECKSUM),
1870 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1871 {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1872 {Opt_data_err, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_NO_EXT2},
1873 {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1874 {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1875 {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1876 {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1877 {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1878 {Opt_dax_type, 0, MOPT_EXT4_ONLY},
1879 {Opt_journal_dev, 0, MOPT_NO_EXT2},
1880 {Opt_journal_path, 0, MOPT_NO_EXT2},
1881 {Opt_journal_ioprio, 0, MOPT_NO_EXT2},
1882 {Opt_data, 0, MOPT_NO_EXT2},
1883 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1884 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1885 {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1886 #else
1887 {Opt_acl, 0, MOPT_NOSUPPORT},
1888 #endif
1889 {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1890 {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1891 {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1892 {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1893 MOPT_SET | MOPT_Q},
1894 {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1895 MOPT_SET | MOPT_Q},
1896 {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
1897 MOPT_SET | MOPT_Q},
1898 {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1899 EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
1900 MOPT_CLEAR | MOPT_Q},
1901 {Opt_usrjquota, 0, MOPT_Q},
1902 {Opt_grpjquota, 0, MOPT_Q},
1903 {Opt_jqfmt, 0, MOPT_QFMT},
1904 {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
1905 {Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS,
1906 MOPT_SET},
1907 #ifdef CONFIG_EXT4_DEBUG
1908 {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
1909 MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
1910 #endif
1911 {Opt_abort, EXT4_MOUNT2_ABORT, MOPT_SET | MOPT_2},
1912 {Opt_err, 0, 0}
1913 };
1914
1915 #if IS_ENABLED(CONFIG_UNICODE)
1916 static const struct ext4_sb_encodings {
1917 __u16 magic;
1918 char *name;
1919 unsigned int version;
1920 } ext4_sb_encoding_map[] = {
1921 {EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
1922 };
1923
1924 static const struct ext4_sb_encodings *
ext4_sb_read_encoding(const struct ext4_super_block * es)1925 ext4_sb_read_encoding(const struct ext4_super_block *es)
1926 {
1927 __u16 magic = le16_to_cpu(es->s_encoding);
1928 int i;
1929
1930 for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
1931 if (magic == ext4_sb_encoding_map[i].magic)
1932 return &ext4_sb_encoding_map[i];
1933
1934 return NULL;
1935 }
1936 #endif
1937
1938 #define EXT4_SPEC_JQUOTA (1 << 0)
1939 #define EXT4_SPEC_JQFMT (1 << 1)
1940 #define EXT4_SPEC_DATAJ (1 << 2)
1941 #define EXT4_SPEC_SB_BLOCK (1 << 3)
1942 #define EXT4_SPEC_JOURNAL_DEV (1 << 4)
1943 #define EXT4_SPEC_JOURNAL_IOPRIO (1 << 5)
1944 #define EXT4_SPEC_s_want_extra_isize (1 << 7)
1945 #define EXT4_SPEC_s_max_batch_time (1 << 8)
1946 #define EXT4_SPEC_s_min_batch_time (1 << 9)
1947 #define EXT4_SPEC_s_inode_readahead_blks (1 << 10)
1948 #define EXT4_SPEC_s_li_wait_mult (1 << 11)
1949 #define EXT4_SPEC_s_max_dir_size_kb (1 << 12)
1950 #define EXT4_SPEC_s_stripe (1 << 13)
1951 #define EXT4_SPEC_s_resuid (1 << 14)
1952 #define EXT4_SPEC_s_resgid (1 << 15)
1953 #define EXT4_SPEC_s_commit_interval (1 << 16)
1954 #define EXT4_SPEC_s_fc_debug_max_replay (1 << 17)
1955 #define EXT4_SPEC_s_sb_block (1 << 18)
1956 #define EXT4_SPEC_mb_optimize_scan (1 << 19)
1957
1958 struct ext4_fs_context {
1959 char *s_qf_names[EXT4_MAXQUOTAS];
1960 struct fscrypt_dummy_policy dummy_enc_policy;
1961 int s_jquota_fmt; /* Format of quota to use */
1962 #ifdef CONFIG_EXT4_DEBUG
1963 int s_fc_debug_max_replay;
1964 #endif
1965 unsigned short qname_spec;
1966 unsigned long vals_s_flags; /* Bits to set in s_flags */
1967 unsigned long mask_s_flags; /* Bits changed in s_flags */
1968 unsigned long journal_devnum;
1969 unsigned long s_commit_interval;
1970 unsigned long s_stripe;
1971 unsigned int s_inode_readahead_blks;
1972 unsigned int s_want_extra_isize;
1973 unsigned int s_li_wait_mult;
1974 unsigned int s_max_dir_size_kb;
1975 unsigned int journal_ioprio;
1976 unsigned int vals_s_mount_opt;
1977 unsigned int mask_s_mount_opt;
1978 unsigned int vals_s_mount_opt2;
1979 unsigned int mask_s_mount_opt2;
1980 unsigned int opt_flags; /* MOPT flags */
1981 unsigned int spec;
1982 u32 s_max_batch_time;
1983 u32 s_min_batch_time;
1984 kuid_t s_resuid;
1985 kgid_t s_resgid;
1986 ext4_fsblk_t s_sb_block;
1987 };
1988
ext4_fc_free(struct fs_context * fc)1989 static void ext4_fc_free(struct fs_context *fc)
1990 {
1991 struct ext4_fs_context *ctx = fc->fs_private;
1992 int i;
1993
1994 if (!ctx)
1995 return;
1996
1997 for (i = 0; i < EXT4_MAXQUOTAS; i++)
1998 kfree(ctx->s_qf_names[i]);
1999
2000 fscrypt_free_dummy_policy(&ctx->dummy_enc_policy);
2001 kfree(ctx);
2002 }
2003
ext4_init_fs_context(struct fs_context * fc)2004 int ext4_init_fs_context(struct fs_context *fc)
2005 {
2006 struct ext4_fs_context *ctx;
2007
2008 ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
2009 if (!ctx)
2010 return -ENOMEM;
2011
2012 fc->fs_private = ctx;
2013 fc->ops = &ext4_context_ops;
2014
2015 return 0;
2016 }
2017
2018 #ifdef CONFIG_QUOTA
2019 /*
2020 * Note the name of the specified quota file.
2021 */
note_qf_name(struct fs_context * fc,int qtype,struct fs_parameter * param)2022 static int note_qf_name(struct fs_context *fc, int qtype,
2023 struct fs_parameter *param)
2024 {
2025 struct ext4_fs_context *ctx = fc->fs_private;
2026 char *qname;
2027
2028 if (param->size < 1) {
2029 ext4_msg(NULL, KERN_ERR, "Missing quota name");
2030 return -EINVAL;
2031 }
2032 if (strchr(param->string, '/')) {
2033 ext4_msg(NULL, KERN_ERR,
2034 "quotafile must be on filesystem root");
2035 return -EINVAL;
2036 }
2037 if (ctx->s_qf_names[qtype]) {
2038 if (strcmp(ctx->s_qf_names[qtype], param->string) != 0) {
2039 ext4_msg(NULL, KERN_ERR,
2040 "%s quota file already specified",
2041 QTYPE2NAME(qtype));
2042 return -EINVAL;
2043 }
2044 return 0;
2045 }
2046
2047 qname = kmemdup_nul(param->string, param->size, GFP_KERNEL);
2048 if (!qname) {
2049 ext4_msg(NULL, KERN_ERR,
2050 "Not enough memory for storing quotafile name");
2051 return -ENOMEM;
2052 }
2053 ctx->s_qf_names[qtype] = qname;
2054 ctx->qname_spec |= 1 << qtype;
2055 ctx->spec |= EXT4_SPEC_JQUOTA;
2056 return 0;
2057 }
2058
2059 /*
2060 * Clear the name of the specified quota file.
2061 */
unnote_qf_name(struct fs_context * fc,int qtype)2062 static int unnote_qf_name(struct fs_context *fc, int qtype)
2063 {
2064 struct ext4_fs_context *ctx = fc->fs_private;
2065
2066 kfree(ctx->s_qf_names[qtype]);
2067
2068 ctx->s_qf_names[qtype] = NULL;
2069 ctx->qname_spec |= 1 << qtype;
2070 ctx->spec |= EXT4_SPEC_JQUOTA;
2071 return 0;
2072 }
2073 #endif
2074
ext4_parse_test_dummy_encryption(const struct fs_parameter * param,struct ext4_fs_context * ctx)2075 static int ext4_parse_test_dummy_encryption(const struct fs_parameter *param,
2076 struct ext4_fs_context *ctx)
2077 {
2078 int err;
2079
2080 if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) {
2081 ext4_msg(NULL, KERN_WARNING,
2082 "test_dummy_encryption option not supported");
2083 return -EINVAL;
2084 }
2085 err = fscrypt_parse_test_dummy_encryption(param,
2086 &ctx->dummy_enc_policy);
2087 if (err == -EINVAL) {
2088 ext4_msg(NULL, KERN_WARNING,
2089 "Value of option \"%s\" is unrecognized", param->key);
2090 } else if (err == -EEXIST) {
2091 ext4_msg(NULL, KERN_WARNING,
2092 "Conflicting test_dummy_encryption options");
2093 return -EINVAL;
2094 }
2095 return err;
2096 }
2097
2098 #define EXT4_SET_CTX(name) \
2099 static inline void ctx_set_##name(struct ext4_fs_context *ctx, \
2100 unsigned long flag) \
2101 { \
2102 ctx->mask_s_##name |= flag; \
2103 ctx->vals_s_##name |= flag; \
2104 }
2105
2106 #define EXT4_CLEAR_CTX(name) \
2107 static inline void ctx_clear_##name(struct ext4_fs_context *ctx, \
2108 unsigned long flag) \
2109 { \
2110 ctx->mask_s_##name |= flag; \
2111 ctx->vals_s_##name &= ~flag; \
2112 }
2113
2114 #define EXT4_TEST_CTX(name) \
2115 static inline unsigned long \
2116 ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag) \
2117 { \
2118 return (ctx->vals_s_##name & flag); \
2119 }
2120
2121 EXT4_SET_CTX(flags); /* set only */
2122 EXT4_SET_CTX(mount_opt);
2123 EXT4_CLEAR_CTX(mount_opt);
2124 EXT4_TEST_CTX(mount_opt);
2125 EXT4_SET_CTX(mount_opt2);
2126 EXT4_CLEAR_CTX(mount_opt2);
2127 EXT4_TEST_CTX(mount_opt2);
2128
ext4_parse_param(struct fs_context * fc,struct fs_parameter * param)2129 static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
2130 {
2131 struct ext4_fs_context *ctx = fc->fs_private;
2132 struct fs_parse_result result;
2133 const struct mount_opts *m;
2134 int is_remount;
2135 int token;
2136
2137 token = fs_parse(fc, ext4_param_specs, param, &result);
2138 if (token < 0)
2139 return token;
2140 is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
2141
2142 for (m = ext4_mount_opts; m->token != Opt_err; m++)
2143 if (token == m->token)
2144 break;
2145
2146 ctx->opt_flags |= m->flags;
2147
2148 if (m->flags & MOPT_EXPLICIT) {
2149 if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
2150 ctx_set_mount_opt2(ctx, EXT4_MOUNT2_EXPLICIT_DELALLOC);
2151 } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
2152 ctx_set_mount_opt2(ctx,
2153 EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM);
2154 } else
2155 return -EINVAL;
2156 }
2157
2158 if (m->flags & MOPT_NOSUPPORT) {
2159 ext4_msg(NULL, KERN_ERR, "%s option not supported",
2160 param->key);
2161 return 0;
2162 }
2163
2164 switch (token) {
2165 #ifdef CONFIG_QUOTA
2166 case Opt_usrjquota:
2167 if (!*param->string)
2168 return unnote_qf_name(fc, USRQUOTA);
2169 else
2170 return note_qf_name(fc, USRQUOTA, param);
2171 case Opt_grpjquota:
2172 if (!*param->string)
2173 return unnote_qf_name(fc, GRPQUOTA);
2174 else
2175 return note_qf_name(fc, GRPQUOTA, param);
2176 #endif
2177 case Opt_sb:
2178 if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
2179 ext4_msg(NULL, KERN_WARNING,
2180 "Ignoring %s option on remount", param->key);
2181 } else {
2182 ctx->s_sb_block = result.uint_32;
2183 ctx->spec |= EXT4_SPEC_s_sb_block;
2184 }
2185 return 0;
2186 case Opt_removed:
2187 ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option",
2188 param->key);
2189 return 0;
2190 case Opt_inlinecrypt:
2191 #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
2192 ctx_set_flags(ctx, SB_INLINECRYPT);
2193 #else
2194 ext4_msg(NULL, KERN_ERR, "inline encryption not supported");
2195 #endif
2196 return 0;
2197 case Opt_errors:
2198 ctx_clear_mount_opt(ctx, EXT4_MOUNT_ERRORS_MASK);
2199 ctx_set_mount_opt(ctx, result.uint_32);
2200 return 0;
2201 #ifdef CONFIG_QUOTA
2202 case Opt_jqfmt:
2203 ctx->s_jquota_fmt = result.uint_32;
2204 ctx->spec |= EXT4_SPEC_JQFMT;
2205 return 0;
2206 #endif
2207 case Opt_data:
2208 ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2209 ctx_set_mount_opt(ctx, result.uint_32);
2210 ctx->spec |= EXT4_SPEC_DATAJ;
2211 return 0;
2212 case Opt_commit:
2213 if (result.uint_32 == 0)
2214 result.uint_32 = JBD2_DEFAULT_MAX_COMMIT_AGE;
2215 else if (result.uint_32 > INT_MAX / HZ) {
2216 ext4_msg(NULL, KERN_ERR,
2217 "Invalid commit interval %d, "
2218 "must be smaller than %d",
2219 result.uint_32, INT_MAX / HZ);
2220 return -EINVAL;
2221 }
2222 ctx->s_commit_interval = HZ * result.uint_32;
2223 ctx->spec |= EXT4_SPEC_s_commit_interval;
2224 return 0;
2225 case Opt_debug_want_extra_isize:
2226 if ((result.uint_32 & 1) || (result.uint_32 < 4)) {
2227 ext4_msg(NULL, KERN_ERR,
2228 "Invalid want_extra_isize %d", result.uint_32);
2229 return -EINVAL;
2230 }
2231 ctx->s_want_extra_isize = result.uint_32;
2232 ctx->spec |= EXT4_SPEC_s_want_extra_isize;
2233 return 0;
2234 case Opt_max_batch_time:
2235 ctx->s_max_batch_time = result.uint_32;
2236 ctx->spec |= EXT4_SPEC_s_max_batch_time;
2237 return 0;
2238 case Opt_min_batch_time:
2239 ctx->s_min_batch_time = result.uint_32;
2240 ctx->spec |= EXT4_SPEC_s_min_batch_time;
2241 return 0;
2242 case Opt_inode_readahead_blks:
2243 if (result.uint_32 &&
2244 (result.uint_32 > (1 << 30) ||
2245 !is_power_of_2(result.uint_32))) {
2246 ext4_msg(NULL, KERN_ERR,
2247 "EXT4-fs: inode_readahead_blks must be "
2248 "0 or a power of 2 smaller than 2^31");
2249 return -EINVAL;
2250 }
2251 ctx->s_inode_readahead_blks = result.uint_32;
2252 ctx->spec |= EXT4_SPEC_s_inode_readahead_blks;
2253 return 0;
2254 case Opt_init_itable:
2255 ctx_set_mount_opt(ctx, EXT4_MOUNT_INIT_INODE_TABLE);
2256 ctx->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
2257 if (param->type == fs_value_is_string)
2258 ctx->s_li_wait_mult = result.uint_32;
2259 ctx->spec |= EXT4_SPEC_s_li_wait_mult;
2260 return 0;
2261 case Opt_max_dir_size_kb:
2262 ctx->s_max_dir_size_kb = result.uint_32;
2263 ctx->spec |= EXT4_SPEC_s_max_dir_size_kb;
2264 return 0;
2265 #ifdef CONFIG_EXT4_DEBUG
2266 case Opt_fc_debug_max_replay:
2267 ctx->s_fc_debug_max_replay = result.uint_32;
2268 ctx->spec |= EXT4_SPEC_s_fc_debug_max_replay;
2269 return 0;
2270 #endif
2271 case Opt_stripe:
2272 ctx->s_stripe = result.uint_32;
2273 ctx->spec |= EXT4_SPEC_s_stripe;
2274 return 0;
2275 case Opt_resuid:
2276 ctx->s_resuid = result.uid;
2277 ctx->spec |= EXT4_SPEC_s_resuid;
2278 return 0;
2279 case Opt_resgid:
2280 ctx->s_resgid = result.gid;
2281 ctx->spec |= EXT4_SPEC_s_resgid;
2282 return 0;
2283 case Opt_journal_dev:
2284 if (is_remount) {
2285 ext4_msg(NULL, KERN_ERR,
2286 "Cannot specify journal on remount");
2287 return -EINVAL;
2288 }
2289 ctx->journal_devnum = result.uint_32;
2290 ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
2291 return 0;
2292 case Opt_journal_path:
2293 {
2294 struct inode *journal_inode;
2295 struct path path;
2296 int error;
2297
2298 if (is_remount) {
2299 ext4_msg(NULL, KERN_ERR,
2300 "Cannot specify journal on remount");
2301 return -EINVAL;
2302 }
2303
2304 error = fs_lookup_param(fc, param, 1, LOOKUP_FOLLOW, &path);
2305 if (error) {
2306 ext4_msg(NULL, KERN_ERR, "error: could not find "
2307 "journal device path");
2308 return -EINVAL;
2309 }
2310
2311 journal_inode = d_inode(path.dentry);
2312 ctx->journal_devnum = new_encode_dev(journal_inode->i_rdev);
2313 ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
2314 path_put(&path);
2315 return 0;
2316 }
2317 case Opt_journal_ioprio:
2318 if (result.uint_32 > 7) {
2319 ext4_msg(NULL, KERN_ERR, "Invalid journal IO priority"
2320 " (must be 0-7)");
2321 return -EINVAL;
2322 }
2323 ctx->journal_ioprio =
2324 IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32);
2325 ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO;
2326 return 0;
2327 case Opt_test_dummy_encryption:
2328 return ext4_parse_test_dummy_encryption(param, ctx);
2329 case Opt_dax:
2330 case Opt_dax_type:
2331 #ifdef CONFIG_FS_DAX
2332 {
2333 int type = (token == Opt_dax) ?
2334 Opt_dax : result.uint_32;
2335
2336 switch (type) {
2337 case Opt_dax:
2338 case Opt_dax_always:
2339 ctx_set_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2340 ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2341 break;
2342 case Opt_dax_never:
2343 ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2344 ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2345 break;
2346 case Opt_dax_inode:
2347 ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2348 ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2349 /* Strictly for printing options */
2350 ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE);
2351 break;
2352 }
2353 return 0;
2354 }
2355 #else
2356 ext4_msg(NULL, KERN_INFO, "dax option not supported");
2357 return -EINVAL;
2358 #endif
2359 case Opt_data_err:
2360 if (result.uint_32 == Opt_data_err_abort)
2361 ctx_set_mount_opt(ctx, m->mount_opt);
2362 else if (result.uint_32 == Opt_data_err_ignore)
2363 ctx_clear_mount_opt(ctx, m->mount_opt);
2364 return 0;
2365 case Opt_mb_optimize_scan:
2366 if (result.int_32 == 1) {
2367 ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
2368 ctx->spec |= EXT4_SPEC_mb_optimize_scan;
2369 } else if (result.int_32 == 0) {
2370 ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
2371 ctx->spec |= EXT4_SPEC_mb_optimize_scan;
2372 } else {
2373 ext4_msg(NULL, KERN_WARNING,
2374 "mb_optimize_scan should be set to 0 or 1.");
2375 return -EINVAL;
2376 }
2377 return 0;
2378 }
2379
2380 /*
2381 * At this point we should only be getting options requiring MOPT_SET,
2382 * or MOPT_CLEAR. Anything else is a bug
2383 */
2384 if (m->token == Opt_err) {
2385 ext4_msg(NULL, KERN_WARNING, "buggy handling of option %s",
2386 param->key);
2387 WARN_ON(1);
2388 return -EINVAL;
2389 }
2390
2391 else {
2392 unsigned int set = 0;
2393
2394 if ((param->type == fs_value_is_flag) ||
2395 result.uint_32 > 0)
2396 set = 1;
2397
2398 if (m->flags & MOPT_CLEAR)
2399 set = !set;
2400 else if (unlikely(!(m->flags & MOPT_SET))) {
2401 ext4_msg(NULL, KERN_WARNING,
2402 "buggy handling of option %s",
2403 param->key);
2404 WARN_ON(1);
2405 return -EINVAL;
2406 }
2407 if (m->flags & MOPT_2) {
2408 if (set != 0)
2409 ctx_set_mount_opt2(ctx, m->mount_opt);
2410 else
2411 ctx_clear_mount_opt2(ctx, m->mount_opt);
2412 } else {
2413 if (set != 0)
2414 ctx_set_mount_opt(ctx, m->mount_opt);
2415 else
2416 ctx_clear_mount_opt(ctx, m->mount_opt);
2417 }
2418 }
2419
2420 return 0;
2421 }
2422
parse_options(struct fs_context * fc,char * options)2423 static int parse_options(struct fs_context *fc, char *options)
2424 {
2425 struct fs_parameter param;
2426 int ret;
2427 char *key;
2428
2429 if (!options)
2430 return 0;
2431
2432 while ((key = strsep(&options, ",")) != NULL) {
2433 if (*key) {
2434 size_t v_len = 0;
2435 char *value = strchr(key, '=');
2436
2437 param.type = fs_value_is_flag;
2438 param.string = NULL;
2439
2440 if (value) {
2441 if (value == key)
2442 continue;
2443
2444 *value++ = 0;
2445 v_len = strlen(value);
2446 param.string = kmemdup_nul(value, v_len,
2447 GFP_KERNEL);
2448 if (!param.string)
2449 return -ENOMEM;
2450 param.type = fs_value_is_string;
2451 }
2452
2453 param.key = key;
2454 param.size = v_len;
2455
2456 ret = ext4_parse_param(fc, ¶m);
2457 kfree(param.string);
2458 if (ret < 0)
2459 return ret;
2460 }
2461 }
2462
2463 ret = ext4_validate_options(fc);
2464 if (ret < 0)
2465 return ret;
2466
2467 return 0;
2468 }
2469
parse_apply_sb_mount_options(struct super_block * sb,struct ext4_fs_context * m_ctx)2470 static int parse_apply_sb_mount_options(struct super_block *sb,
2471 struct ext4_fs_context *m_ctx)
2472 {
2473 struct ext4_sb_info *sbi = EXT4_SB(sb);
2474 char *s_mount_opts = NULL;
2475 struct ext4_fs_context *s_ctx = NULL;
2476 struct fs_context *fc = NULL;
2477 int ret = -ENOMEM;
2478
2479 if (!sbi->s_es->s_mount_opts[0])
2480 return 0;
2481
2482 s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
2483 sizeof(sbi->s_es->s_mount_opts),
2484 GFP_KERNEL);
2485 if (!s_mount_opts)
2486 return ret;
2487
2488 fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
2489 if (!fc)
2490 goto out_free;
2491
2492 s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
2493 if (!s_ctx)
2494 goto out_free;
2495
2496 fc->fs_private = s_ctx;
2497 fc->s_fs_info = sbi;
2498
2499 ret = parse_options(fc, s_mount_opts);
2500 if (ret < 0)
2501 goto parse_failed;
2502
2503 ret = ext4_check_opt_consistency(fc, sb);
2504 if (ret < 0) {
2505 parse_failed:
2506 ext4_msg(sb, KERN_WARNING,
2507 "failed to parse options in superblock: %s",
2508 s_mount_opts);
2509 ret = 0;
2510 goto out_free;
2511 }
2512
2513 if (s_ctx->spec & EXT4_SPEC_JOURNAL_DEV)
2514 m_ctx->journal_devnum = s_ctx->journal_devnum;
2515 if (s_ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)
2516 m_ctx->journal_ioprio = s_ctx->journal_ioprio;
2517
2518 ext4_apply_options(fc, sb);
2519 ret = 0;
2520
2521 out_free:
2522 if (fc) {
2523 ext4_fc_free(fc);
2524 kfree(fc);
2525 }
2526 kfree(s_mount_opts);
2527 return ret;
2528 }
2529
ext4_apply_quota_options(struct fs_context * fc,struct super_block * sb)2530 static void ext4_apply_quota_options(struct fs_context *fc,
2531 struct super_block *sb)
2532 {
2533 #ifdef CONFIG_QUOTA
2534 bool quota_feature = ext4_has_feature_quota(sb);
2535 struct ext4_fs_context *ctx = fc->fs_private;
2536 struct ext4_sb_info *sbi = EXT4_SB(sb);
2537 char *qname;
2538 int i;
2539
2540 if (quota_feature)
2541 return;
2542
2543 if (ctx->spec & EXT4_SPEC_JQUOTA) {
2544 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2545 if (!(ctx->qname_spec & (1 << i)))
2546 continue;
2547
2548 qname = ctx->s_qf_names[i]; /* May be NULL */
2549 if (qname)
2550 set_opt(sb, QUOTA);
2551 ctx->s_qf_names[i] = NULL;
2552 qname = rcu_replace_pointer(sbi->s_qf_names[i], qname,
2553 lockdep_is_held(&sb->s_umount));
2554 if (qname)
2555 kfree_rcu_mightsleep(qname);
2556 }
2557 }
2558
2559 if (ctx->spec & EXT4_SPEC_JQFMT)
2560 sbi->s_jquota_fmt = ctx->s_jquota_fmt;
2561 #endif
2562 }
2563
2564 /*
2565 * Check quota settings consistency.
2566 */
ext4_check_quota_consistency(struct fs_context * fc,struct super_block * sb)2567 static int ext4_check_quota_consistency(struct fs_context *fc,
2568 struct super_block *sb)
2569 {
2570 #ifdef CONFIG_QUOTA
2571 struct ext4_fs_context *ctx = fc->fs_private;
2572 struct ext4_sb_info *sbi = EXT4_SB(sb);
2573 bool quota_feature = ext4_has_feature_quota(sb);
2574 bool quota_loaded = sb_any_quota_loaded(sb);
2575 bool usr_qf_name, grp_qf_name, usrquota, grpquota;
2576 int quota_flags, i;
2577
2578 /*
2579 * We do the test below only for project quotas. 'usrquota' and
2580 * 'grpquota' mount options are allowed even without quota feature
2581 * to support legacy quotas in quota files.
2582 */
2583 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_PRJQUOTA) &&
2584 !ext4_has_feature_project(sb)) {
2585 ext4_msg(NULL, KERN_ERR, "Project quota feature not enabled. "
2586 "Cannot enable project quota enforcement.");
2587 return -EINVAL;
2588 }
2589
2590 quota_flags = EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
2591 EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA;
2592 if (quota_loaded &&
2593 ctx->mask_s_mount_opt & quota_flags &&
2594 !ctx_test_mount_opt(ctx, quota_flags))
2595 goto err_quota_change;
2596
2597 if (ctx->spec & EXT4_SPEC_JQUOTA) {
2598
2599 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2600 if (!(ctx->qname_spec & (1 << i)))
2601 continue;
2602
2603 if (quota_loaded &&
2604 !!sbi->s_qf_names[i] != !!ctx->s_qf_names[i])
2605 goto err_jquota_change;
2606
2607 if (sbi->s_qf_names[i] && ctx->s_qf_names[i] &&
2608 strcmp(get_qf_name(sb, sbi, i),
2609 ctx->s_qf_names[i]) != 0)
2610 goto err_jquota_specified;
2611 }
2612
2613 if (quota_feature) {
2614 ext4_msg(NULL, KERN_INFO,
2615 "Journaled quota options ignored when "
2616 "QUOTA feature is enabled");
2617 return 0;
2618 }
2619 }
2620
2621 if (ctx->spec & EXT4_SPEC_JQFMT) {
2622 if (sbi->s_jquota_fmt != ctx->s_jquota_fmt && quota_loaded)
2623 goto err_jquota_change;
2624 if (quota_feature) {
2625 ext4_msg(NULL, KERN_INFO, "Quota format mount options "
2626 "ignored when QUOTA feature is enabled");
2627 return 0;
2628 }
2629 }
2630
2631 /* Make sure we don't mix old and new quota format */
2632 usr_qf_name = (get_qf_name(sb, sbi, USRQUOTA) ||
2633 ctx->s_qf_names[USRQUOTA]);
2634 grp_qf_name = (get_qf_name(sb, sbi, GRPQUOTA) ||
2635 ctx->s_qf_names[GRPQUOTA]);
2636
2637 usrquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2638 test_opt(sb, USRQUOTA));
2639
2640 grpquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) ||
2641 test_opt(sb, GRPQUOTA));
2642
2643 if (usr_qf_name) {
2644 ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2645 usrquota = false;
2646 }
2647 if (grp_qf_name) {
2648 ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2649 grpquota = false;
2650 }
2651
2652 if (usr_qf_name || grp_qf_name) {
2653 if (usrquota || grpquota) {
2654 ext4_msg(NULL, KERN_ERR, "old and new quota "
2655 "format mixing");
2656 return -EINVAL;
2657 }
2658
2659 if (!(ctx->spec & EXT4_SPEC_JQFMT || sbi->s_jquota_fmt)) {
2660 ext4_msg(NULL, KERN_ERR, "journaled quota format "
2661 "not specified");
2662 return -EINVAL;
2663 }
2664 }
2665
2666 return 0;
2667
2668 err_quota_change:
2669 ext4_msg(NULL, KERN_ERR,
2670 "Cannot change quota options when quota turned on");
2671 return -EINVAL;
2672 err_jquota_change:
2673 ext4_msg(NULL, KERN_ERR, "Cannot change journaled quota "
2674 "options when quota turned on");
2675 return -EINVAL;
2676 err_jquota_specified:
2677 ext4_msg(NULL, KERN_ERR, "%s quota file already specified",
2678 QTYPE2NAME(i));
2679 return -EINVAL;
2680 #else
2681 return 0;
2682 #endif
2683 }
2684
ext4_check_test_dummy_encryption(const struct fs_context * fc,struct super_block * sb)2685 static int ext4_check_test_dummy_encryption(const struct fs_context *fc,
2686 struct super_block *sb)
2687 {
2688 const struct ext4_fs_context *ctx = fc->fs_private;
2689 const struct ext4_sb_info *sbi = EXT4_SB(sb);
2690
2691 if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy))
2692 return 0;
2693
2694 if (!ext4_has_feature_encrypt(sb)) {
2695 ext4_msg(NULL, KERN_WARNING,
2696 "test_dummy_encryption requires encrypt feature");
2697 return -EINVAL;
2698 }
2699 /*
2700 * This mount option is just for testing, and it's not worthwhile to
2701 * implement the extra complexity (e.g. RCU protection) that would be
2702 * needed to allow it to be set or changed during remount. We do allow
2703 * it to be specified during remount, but only if there is no change.
2704 */
2705 if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
2706 if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
2707 &ctx->dummy_enc_policy))
2708 return 0;
2709 ext4_msg(NULL, KERN_WARNING,
2710 "Can't set or change test_dummy_encryption on remount");
2711 return -EINVAL;
2712 }
2713 /* Also make sure s_mount_opts didn't contain a conflicting value. */
2714 if (fscrypt_is_dummy_policy_set(&sbi->s_dummy_enc_policy)) {
2715 if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
2716 &ctx->dummy_enc_policy))
2717 return 0;
2718 ext4_msg(NULL, KERN_WARNING,
2719 "Conflicting test_dummy_encryption options");
2720 return -EINVAL;
2721 }
2722 return 0;
2723 }
2724
ext4_apply_test_dummy_encryption(struct ext4_fs_context * ctx,struct super_block * sb)2725 static void ext4_apply_test_dummy_encryption(struct ext4_fs_context *ctx,
2726 struct super_block *sb)
2727 {
2728 if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy) ||
2729 /* if already set, it was already verified to be the same */
2730 fscrypt_is_dummy_policy_set(&EXT4_SB(sb)->s_dummy_enc_policy))
2731 return;
2732 EXT4_SB(sb)->s_dummy_enc_policy = ctx->dummy_enc_policy;
2733 memset(&ctx->dummy_enc_policy, 0, sizeof(ctx->dummy_enc_policy));
2734 ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
2735 }
2736
ext4_check_opt_consistency(struct fs_context * fc,struct super_block * sb)2737 static int ext4_check_opt_consistency(struct fs_context *fc,
2738 struct super_block *sb)
2739 {
2740 struct ext4_fs_context *ctx = fc->fs_private;
2741 struct ext4_sb_info *sbi = fc->s_fs_info;
2742 int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
2743 int err;
2744
2745 if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
2746 ext4_msg(NULL, KERN_ERR,
2747 "Mount option(s) incompatible with ext2");
2748 return -EINVAL;
2749 }
2750 if ((ctx->opt_flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
2751 ext4_msg(NULL, KERN_ERR,
2752 "Mount option(s) incompatible with ext3");
2753 return -EINVAL;
2754 }
2755
2756 if (ctx->s_want_extra_isize >
2757 (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE)) {
2758 ext4_msg(NULL, KERN_ERR,
2759 "Invalid want_extra_isize %d",
2760 ctx->s_want_extra_isize);
2761 return -EINVAL;
2762 }
2763
2764 err = ext4_check_test_dummy_encryption(fc, sb);
2765 if (err)
2766 return err;
2767
2768 if ((ctx->spec & EXT4_SPEC_DATAJ) && is_remount) {
2769 if (!sbi->s_journal) {
2770 ext4_msg(NULL, KERN_WARNING,
2771 "Remounting file system with no journal "
2772 "so ignoring journalled data option");
2773 ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2774 } else if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS) !=
2775 test_opt(sb, DATA_FLAGS)) {
2776 ext4_msg(NULL, KERN_ERR, "Cannot change data mode "
2777 "on remount");
2778 return -EINVAL;
2779 }
2780 }
2781
2782 if (is_remount) {
2783 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2784 (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
2785 ext4_msg(NULL, KERN_ERR, "can't mount with "
2786 "both data=journal and dax");
2787 return -EINVAL;
2788 }
2789
2790 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2791 (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2792 (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
2793 fail_dax_change_remount:
2794 ext4_msg(NULL, KERN_ERR, "can't change "
2795 "dax mount option while remounting");
2796 return -EINVAL;
2797 } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER) &&
2798 (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2799 (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS))) {
2800 goto fail_dax_change_remount;
2801 } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE) &&
2802 ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2803 (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2804 !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) {
2805 goto fail_dax_change_remount;
2806 }
2807 }
2808
2809 return ext4_check_quota_consistency(fc, sb);
2810 }
2811
ext4_apply_options(struct fs_context * fc,struct super_block * sb)2812 static void ext4_apply_options(struct fs_context *fc, struct super_block *sb)
2813 {
2814 struct ext4_fs_context *ctx = fc->fs_private;
2815 struct ext4_sb_info *sbi = fc->s_fs_info;
2816
2817 sbi->s_mount_opt &= ~ctx->mask_s_mount_opt;
2818 sbi->s_mount_opt |= ctx->vals_s_mount_opt;
2819 sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2;
2820 sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2;
2821 sb->s_flags &= ~ctx->mask_s_flags;
2822 sb->s_flags |= ctx->vals_s_flags;
2823
2824 #define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; })
2825 APPLY(s_commit_interval);
2826 APPLY(s_stripe);
2827 APPLY(s_max_batch_time);
2828 APPLY(s_min_batch_time);
2829 APPLY(s_want_extra_isize);
2830 APPLY(s_inode_readahead_blks);
2831 APPLY(s_max_dir_size_kb);
2832 APPLY(s_li_wait_mult);
2833 APPLY(s_resgid);
2834 APPLY(s_resuid);
2835
2836 #ifdef CONFIG_EXT4_DEBUG
2837 APPLY(s_fc_debug_max_replay);
2838 #endif
2839
2840 ext4_apply_quota_options(fc, sb);
2841 ext4_apply_test_dummy_encryption(ctx, sb);
2842 }
2843
2844
ext4_validate_options(struct fs_context * fc)2845 static int ext4_validate_options(struct fs_context *fc)
2846 {
2847 #ifdef CONFIG_QUOTA
2848 struct ext4_fs_context *ctx = fc->fs_private;
2849 char *usr_qf_name, *grp_qf_name;
2850
2851 usr_qf_name = ctx->s_qf_names[USRQUOTA];
2852 grp_qf_name = ctx->s_qf_names[GRPQUOTA];
2853
2854 if (usr_qf_name || grp_qf_name) {
2855 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) && usr_qf_name)
2856 ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2857
2858 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) && grp_qf_name)
2859 ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2860
2861 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2862 ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA)) {
2863 ext4_msg(NULL, KERN_ERR, "old and new quota "
2864 "format mixing");
2865 return -EINVAL;
2866 }
2867 }
2868 #endif
2869 return 1;
2870 }
2871
ext4_show_quota_options(struct seq_file * seq,struct super_block * sb)2872 static inline void ext4_show_quota_options(struct seq_file *seq,
2873 struct super_block *sb)
2874 {
2875 #if defined(CONFIG_QUOTA)
2876 struct ext4_sb_info *sbi = EXT4_SB(sb);
2877 char *usr_qf_name, *grp_qf_name;
2878
2879 if (sbi->s_jquota_fmt) {
2880 char *fmtname = "";
2881
2882 switch (sbi->s_jquota_fmt) {
2883 case QFMT_VFS_OLD:
2884 fmtname = "vfsold";
2885 break;
2886 case QFMT_VFS_V0:
2887 fmtname = "vfsv0";
2888 break;
2889 case QFMT_VFS_V1:
2890 fmtname = "vfsv1";
2891 break;
2892 }
2893 seq_printf(seq, ",jqfmt=%s", fmtname);
2894 }
2895
2896 rcu_read_lock();
2897 usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
2898 grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
2899 if (usr_qf_name)
2900 seq_show_option(seq, "usrjquota", usr_qf_name);
2901 if (grp_qf_name)
2902 seq_show_option(seq, "grpjquota", grp_qf_name);
2903 rcu_read_unlock();
2904 #endif
2905 }
2906
token2str(int token)2907 static const char *token2str(int token)
2908 {
2909 const struct fs_parameter_spec *spec;
2910
2911 for (spec = ext4_param_specs; spec->name != NULL; spec++)
2912 if (spec->opt == token && !spec->type)
2913 break;
2914 return spec->name;
2915 }
2916
2917 /*
2918 * Show an option if
2919 * - it's set to a non-default value OR
2920 * - if the per-sb default is different from the global default
2921 */
_ext4_show_options(struct seq_file * seq,struct super_block * sb,int nodefs)2922 static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
2923 int nodefs)
2924 {
2925 struct ext4_sb_info *sbi = EXT4_SB(sb);
2926 struct ext4_super_block *es = sbi->s_es;
2927 int def_errors;
2928 const struct mount_opts *m;
2929 char sep = nodefs ? '\n' : ',';
2930
2931 #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
2932 #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
2933
2934 if (sbi->s_sb_block != 1)
2935 SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
2936
2937 for (m = ext4_mount_opts; m->token != Opt_err; m++) {
2938 int want_set = m->flags & MOPT_SET;
2939 int opt_2 = m->flags & MOPT_2;
2940 unsigned int mount_opt, def_mount_opt;
2941
2942 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
2943 m->flags & MOPT_SKIP)
2944 continue;
2945
2946 if (opt_2) {
2947 mount_opt = sbi->s_mount_opt2;
2948 def_mount_opt = sbi->s_def_mount_opt2;
2949 } else {
2950 mount_opt = sbi->s_mount_opt;
2951 def_mount_opt = sbi->s_def_mount_opt;
2952 }
2953 /* skip if same as the default */
2954 if (!nodefs && !(m->mount_opt & (mount_opt ^ def_mount_opt)))
2955 continue;
2956 /* select Opt_noFoo vs Opt_Foo */
2957 if ((want_set &&
2958 (mount_opt & m->mount_opt) != m->mount_opt) ||
2959 (!want_set && (mount_opt & m->mount_opt)))
2960 continue;
2961 SEQ_OPTS_PRINT("%s", token2str(m->token));
2962 }
2963
2964 if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
2965 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
2966 SEQ_OPTS_PRINT("resuid=%u",
2967 from_kuid_munged(&init_user_ns, sbi->s_resuid));
2968 if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
2969 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
2970 SEQ_OPTS_PRINT("resgid=%u",
2971 from_kgid_munged(&init_user_ns, sbi->s_resgid));
2972 def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
2973 if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
2974 SEQ_OPTS_PUTS("errors=remount-ro");
2975 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
2976 SEQ_OPTS_PUTS("errors=continue");
2977 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
2978 SEQ_OPTS_PUTS("errors=panic");
2979 if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
2980 SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
2981 if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
2982 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
2983 if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
2984 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
2985 if (nodefs || sbi->s_stripe)
2986 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
2987 if (nodefs || EXT4_MOUNT_DATA_FLAGS &
2988 (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
2989 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2990 SEQ_OPTS_PUTS("data=journal");
2991 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2992 SEQ_OPTS_PUTS("data=ordered");
2993 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
2994 SEQ_OPTS_PUTS("data=writeback");
2995 }
2996 if (nodefs ||
2997 sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
2998 SEQ_OPTS_PRINT("inode_readahead_blks=%u",
2999 sbi->s_inode_readahead_blks);
3000
3001 if (test_opt(sb, INIT_INODE_TABLE) && (nodefs ||
3002 (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
3003 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
3004 if (nodefs || sbi->s_max_dir_size_kb)
3005 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
3006 if (test_opt(sb, DATA_ERR_ABORT))
3007 SEQ_OPTS_PUTS("data_err=abort");
3008
3009 fscrypt_show_test_dummy_encryption(seq, sep, sb);
3010
3011 if (sb->s_flags & SB_INLINECRYPT)
3012 SEQ_OPTS_PUTS("inlinecrypt");
3013
3014 if (test_opt(sb, DAX_ALWAYS)) {
3015 if (IS_EXT2_SB(sb))
3016 SEQ_OPTS_PUTS("dax");
3017 else
3018 SEQ_OPTS_PUTS("dax=always");
3019 } else if (test_opt2(sb, DAX_NEVER)) {
3020 SEQ_OPTS_PUTS("dax=never");
3021 } else if (test_opt2(sb, DAX_INODE)) {
3022 SEQ_OPTS_PUTS("dax=inode");
3023 }
3024
3025 if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
3026 !test_opt2(sb, MB_OPTIMIZE_SCAN)) {
3027 SEQ_OPTS_PUTS("mb_optimize_scan=0");
3028 } else if (sbi->s_groups_count < MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
3029 test_opt2(sb, MB_OPTIMIZE_SCAN)) {
3030 SEQ_OPTS_PUTS("mb_optimize_scan=1");
3031 }
3032
3033 ext4_show_quota_options(seq, sb);
3034 return 0;
3035 }
3036
ext4_show_options(struct seq_file * seq,struct dentry * root)3037 static int ext4_show_options(struct seq_file *seq, struct dentry *root)
3038 {
3039 return _ext4_show_options(seq, root->d_sb, 0);
3040 }
3041
ext4_seq_options_show(struct seq_file * seq,void * offset)3042 int ext4_seq_options_show(struct seq_file *seq, void *offset)
3043 {
3044 struct super_block *sb = seq->private;
3045 int rc;
3046
3047 seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
3048 rc = _ext4_show_options(seq, sb, 1);
3049 seq_putc(seq, '\n');
3050 return rc;
3051 }
3052
ext4_setup_super(struct super_block * sb,struct ext4_super_block * es,int read_only)3053 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
3054 int read_only)
3055 {
3056 struct ext4_sb_info *sbi = EXT4_SB(sb);
3057 int err = 0;
3058
3059 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
3060 ext4_msg(sb, KERN_ERR, "revision level too high, "
3061 "forcing read-only mode");
3062 err = -EROFS;
3063 goto done;
3064 }
3065 if (read_only)
3066 goto done;
3067 if (!(sbi->s_mount_state & EXT4_VALID_FS))
3068 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
3069 "running e2fsck is recommended");
3070 else if (sbi->s_mount_state & EXT4_ERROR_FS)
3071 ext4_msg(sb, KERN_WARNING,
3072 "warning: mounting fs with errors, "
3073 "running e2fsck is recommended");
3074 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
3075 le16_to_cpu(es->s_mnt_count) >=
3076 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
3077 ext4_msg(sb, KERN_WARNING,
3078 "warning: maximal mount count reached, "
3079 "running e2fsck is recommended");
3080 else if (le32_to_cpu(es->s_checkinterval) &&
3081 (ext4_get_tstamp(es, s_lastcheck) +
3082 le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
3083 ext4_msg(sb, KERN_WARNING,
3084 "warning: checktime reached, "
3085 "running e2fsck is recommended");
3086 if (!sbi->s_journal)
3087 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
3088 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
3089 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
3090 le16_add_cpu(&es->s_mnt_count, 1);
3091 ext4_update_tstamp(es, s_mtime);
3092 if (sbi->s_journal) {
3093 ext4_set_feature_journal_needs_recovery(sb);
3094 if (ext4_has_feature_orphan_file(sb))
3095 ext4_set_feature_orphan_present(sb);
3096 }
3097
3098 err = ext4_commit_super(sb);
3099 done:
3100 if (test_opt(sb, DEBUG))
3101 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
3102 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
3103 sb->s_blocksize,
3104 sbi->s_groups_count,
3105 EXT4_BLOCKS_PER_GROUP(sb),
3106 EXT4_INODES_PER_GROUP(sb),
3107 sbi->s_mount_opt, sbi->s_mount_opt2);
3108 return err;
3109 }
3110
ext4_alloc_flex_bg_array(struct super_block * sb,ext4_group_t ngroup)3111 int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
3112 {
3113 struct ext4_sb_info *sbi = EXT4_SB(sb);
3114 struct flex_groups **old_groups, **new_groups;
3115 int size, i, j;
3116
3117 if (!sbi->s_log_groups_per_flex)
3118 return 0;
3119
3120 size = ext4_flex_group(sbi, ngroup - 1) + 1;
3121 if (size <= sbi->s_flex_groups_allocated)
3122 return 0;
3123
3124 new_groups = kvzalloc(roundup_pow_of_two(size *
3125 sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
3126 if (!new_groups) {
3127 ext4_msg(sb, KERN_ERR,
3128 "not enough memory for %d flex group pointers", size);
3129 return -ENOMEM;
3130 }
3131 for (i = sbi->s_flex_groups_allocated; i < size; i++) {
3132 new_groups[i] = kvzalloc(roundup_pow_of_two(
3133 sizeof(struct flex_groups)),
3134 GFP_KERNEL);
3135 if (!new_groups[i]) {
3136 for (j = sbi->s_flex_groups_allocated; j < i; j++)
3137 kvfree(new_groups[j]);
3138 kvfree(new_groups);
3139 ext4_msg(sb, KERN_ERR,
3140 "not enough memory for %d flex groups", size);
3141 return -ENOMEM;
3142 }
3143 }
3144 rcu_read_lock();
3145 old_groups = rcu_dereference(sbi->s_flex_groups);
3146 if (old_groups)
3147 memcpy(new_groups, old_groups,
3148 (sbi->s_flex_groups_allocated *
3149 sizeof(struct flex_groups *)));
3150 rcu_read_unlock();
3151 rcu_assign_pointer(sbi->s_flex_groups, new_groups);
3152 sbi->s_flex_groups_allocated = size;
3153 if (old_groups)
3154 ext4_kvfree_array_rcu(old_groups);
3155 return 0;
3156 }
3157
ext4_fill_flex_info(struct super_block * sb)3158 static int ext4_fill_flex_info(struct super_block *sb)
3159 {
3160 struct ext4_sb_info *sbi = EXT4_SB(sb);
3161 struct ext4_group_desc *gdp = NULL;
3162 struct flex_groups *fg;
3163 ext4_group_t flex_group;
3164 int i, err;
3165
3166 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
3167 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
3168 sbi->s_log_groups_per_flex = 0;
3169 return 1;
3170 }
3171
3172 err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
3173 if (err)
3174 goto failed;
3175
3176 for (i = 0; i < sbi->s_groups_count; i++) {
3177 gdp = ext4_get_group_desc(sb, i, NULL);
3178
3179 flex_group = ext4_flex_group(sbi, i);
3180 fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
3181 atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
3182 atomic64_add(ext4_free_group_clusters(sb, gdp),
3183 &fg->free_clusters);
3184 atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
3185 }
3186
3187 return 1;
3188 failed:
3189 return 0;
3190 }
3191
ext4_group_desc_csum(struct super_block * sb,__u32 block_group,struct ext4_group_desc * gdp)3192 static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
3193 struct ext4_group_desc *gdp)
3194 {
3195 int offset = offsetof(struct ext4_group_desc, bg_checksum);
3196 __u16 crc = 0;
3197 __le32 le_group = cpu_to_le32(block_group);
3198 struct ext4_sb_info *sbi = EXT4_SB(sb);
3199
3200 if (ext4_has_metadata_csum(sbi->s_sb)) {
3201 /* Use new metadata_csum algorithm */
3202 __u32 csum32;
3203 __u16 dummy_csum = 0;
3204
3205 csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
3206 sizeof(le_group));
3207 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
3208 csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
3209 sizeof(dummy_csum));
3210 offset += sizeof(dummy_csum);
3211 if (offset < sbi->s_desc_size)
3212 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
3213 sbi->s_desc_size - offset);
3214
3215 crc = csum32 & 0xFFFF;
3216 goto out;
3217 }
3218
3219 /* old crc16 code */
3220 if (!ext4_has_feature_gdt_csum(sb))
3221 return 0;
3222
3223 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
3224 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
3225 crc = crc16(crc, (__u8 *)gdp, offset);
3226 offset += sizeof(gdp->bg_checksum); /* skip checksum */
3227 /* for checksum of struct ext4_group_desc do the rest...*/
3228 if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size)
3229 crc = crc16(crc, (__u8 *)gdp + offset,
3230 sbi->s_desc_size - offset);
3231
3232 out:
3233 return cpu_to_le16(crc);
3234 }
3235
ext4_group_desc_csum_verify(struct super_block * sb,__u32 block_group,struct ext4_group_desc * gdp)3236 int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
3237 struct ext4_group_desc *gdp)
3238 {
3239 if (ext4_has_group_desc_csum(sb) &&
3240 (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
3241 return 0;
3242
3243 return 1;
3244 }
3245
ext4_group_desc_csum_set(struct super_block * sb,__u32 block_group,struct ext4_group_desc * gdp)3246 void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
3247 struct ext4_group_desc *gdp)
3248 {
3249 if (!ext4_has_group_desc_csum(sb))
3250 return;
3251 gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
3252 }
3253
3254 /* Called at mount-time, super-block is locked */
ext4_check_descriptors(struct super_block * sb,ext4_fsblk_t sb_block,ext4_group_t * first_not_zeroed)3255 static int ext4_check_descriptors(struct super_block *sb,
3256 ext4_fsblk_t sb_block,
3257 ext4_group_t *first_not_zeroed)
3258 {
3259 struct ext4_sb_info *sbi = EXT4_SB(sb);
3260 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
3261 ext4_fsblk_t last_block;
3262 ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
3263 ext4_fsblk_t block_bitmap;
3264 ext4_fsblk_t inode_bitmap;
3265 ext4_fsblk_t inode_table;
3266 int flexbg_flag = 0;
3267 ext4_group_t i, grp = sbi->s_groups_count;
3268
3269 if (ext4_has_feature_flex_bg(sb))
3270 flexbg_flag = 1;
3271
3272 ext4_debug("Checking group descriptors");
3273
3274 for (i = 0; i < sbi->s_groups_count; i++) {
3275 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
3276
3277 if (i == sbi->s_groups_count - 1 || flexbg_flag)
3278 last_block = ext4_blocks_count(sbi->s_es) - 1;
3279 else
3280 last_block = first_block +
3281 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
3282
3283 if ((grp == sbi->s_groups_count) &&
3284 !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3285 grp = i;
3286
3287 block_bitmap = ext4_block_bitmap(sb, gdp);
3288 if (block_bitmap == sb_block) {
3289 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3290 "Block bitmap for group %u overlaps "
3291 "superblock", i);
3292 if (!sb_rdonly(sb))
3293 return 0;
3294 }
3295 if (block_bitmap >= sb_block + 1 &&
3296 block_bitmap <= last_bg_block) {
3297 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3298 "Block bitmap for group %u overlaps "
3299 "block group descriptors", i);
3300 if (!sb_rdonly(sb))
3301 return 0;
3302 }
3303 if (block_bitmap < first_block || block_bitmap > last_block) {
3304 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3305 "Block bitmap for group %u not in group "
3306 "(block %llu)!", i, block_bitmap);
3307 return 0;
3308 }
3309 inode_bitmap = ext4_inode_bitmap(sb, gdp);
3310 if (inode_bitmap == sb_block) {
3311 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3312 "Inode bitmap for group %u overlaps "
3313 "superblock", i);
3314 if (!sb_rdonly(sb))
3315 return 0;
3316 }
3317 if (inode_bitmap >= sb_block + 1 &&
3318 inode_bitmap <= last_bg_block) {
3319 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3320 "Inode bitmap for group %u overlaps "
3321 "block group descriptors", i);
3322 if (!sb_rdonly(sb))
3323 return 0;
3324 }
3325 if (inode_bitmap < first_block || inode_bitmap > last_block) {
3326 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3327 "Inode bitmap for group %u not in group "
3328 "(block %llu)!", i, inode_bitmap);
3329 return 0;
3330 }
3331 inode_table = ext4_inode_table(sb, gdp);
3332 if (inode_table == sb_block) {
3333 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3334 "Inode table for group %u overlaps "
3335 "superblock", i);
3336 if (!sb_rdonly(sb))
3337 return 0;
3338 }
3339 if (inode_table >= sb_block + 1 &&
3340 inode_table <= last_bg_block) {
3341 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3342 "Inode table for group %u overlaps "
3343 "block group descriptors", i);
3344 if (!sb_rdonly(sb))
3345 return 0;
3346 }
3347 if (inode_table < first_block ||
3348 inode_table + sbi->s_itb_per_group - 1 > last_block) {
3349 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3350 "Inode table for group %u not in group "
3351 "(block %llu)!", i, inode_table);
3352 return 0;
3353 }
3354 ext4_lock_group(sb, i);
3355 if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
3356 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3357 "Checksum for group %u failed (%u!=%u)",
3358 i, le16_to_cpu(ext4_group_desc_csum(sb, i,
3359 gdp)), le16_to_cpu(gdp->bg_checksum));
3360 if (!sb_rdonly(sb)) {
3361 ext4_unlock_group(sb, i);
3362 return 0;
3363 }
3364 }
3365 ext4_unlock_group(sb, i);
3366 if (!flexbg_flag)
3367 first_block += EXT4_BLOCKS_PER_GROUP(sb);
3368 }
3369 if (NULL != first_not_zeroed)
3370 *first_not_zeroed = grp;
3371 return 1;
3372 }
3373
3374 /*
3375 * Maximal extent format file size.
3376 * Resulting logical blkno at s_maxbytes must fit in our on-disk
3377 * extent format containers, within a sector_t, and within i_blocks
3378 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
3379 * so that won't be a limiting factor.
3380 *
3381 * However there is other limiting factor. We do store extents in the form
3382 * of starting block and length, hence the resulting length of the extent
3383 * covering maximum file size must fit into on-disk format containers as
3384 * well. Given that length is always by 1 unit bigger than max unit (because
3385 * we count 0 as well) we have to lower the s_maxbytes by one fs block.
3386 *
3387 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
3388 */
ext4_max_size(int blkbits,int has_huge_files)3389 static loff_t ext4_max_size(int blkbits, int has_huge_files)
3390 {
3391 loff_t res;
3392 loff_t upper_limit = MAX_LFS_FILESIZE;
3393
3394 BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
3395
3396 if (!has_huge_files) {
3397 upper_limit = (1LL << 32) - 1;
3398
3399 /* total blocks in file system block size */
3400 upper_limit >>= (blkbits - 9);
3401 upper_limit <<= blkbits;
3402 }
3403
3404 /*
3405 * 32-bit extent-start container, ee_block. We lower the maxbytes
3406 * by one fs block, so ee_len can cover the extent of maximum file
3407 * size
3408 */
3409 res = (1LL << 32) - 1;
3410 res <<= blkbits;
3411
3412 /* Sanity check against vm- & vfs- imposed limits */
3413 if (res > upper_limit)
3414 res = upper_limit;
3415
3416 return res;
3417 }
3418
3419 /*
3420 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect
3421 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
3422 * We need to be 1 filesystem block less than the 2^48 sector limit.
3423 */
ext4_max_bitmap_size(int bits,int has_huge_files)3424 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
3425 {
3426 loff_t upper_limit, res = EXT4_NDIR_BLOCKS;
3427 int meta_blocks;
3428 unsigned int ppb = 1 << (bits - 2);
3429
3430 /*
3431 * This is calculated to be the largest file size for a dense, block
3432 * mapped file such that the file's total number of 512-byte sectors,
3433 * including data and all indirect blocks, does not exceed (2^48 - 1).
3434 *
3435 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
3436 * number of 512-byte sectors of the file.
3437 */
3438 if (!has_huge_files) {
3439 /*
3440 * !has_huge_files or implies that the inode i_block field
3441 * represents total file blocks in 2^32 512-byte sectors ==
3442 * size of vfs inode i_blocks * 8
3443 */
3444 upper_limit = (1LL << 32) - 1;
3445
3446 /* total blocks in file system block size */
3447 upper_limit >>= (bits - 9);
3448
3449 } else {
3450 /*
3451 * We use 48 bit ext4_inode i_blocks
3452 * With EXT4_HUGE_FILE_FL set the i_blocks
3453 * represent total number of blocks in
3454 * file system block size
3455 */
3456 upper_limit = (1LL << 48) - 1;
3457
3458 }
3459
3460 /* Compute how many blocks we can address by block tree */
3461 res += ppb;
3462 res += ppb * ppb;
3463 res += ((loff_t)ppb) * ppb * ppb;
3464 /* Compute how many metadata blocks are needed */
3465 meta_blocks = 1;
3466 meta_blocks += 1 + ppb;
3467 meta_blocks += 1 + ppb + ppb * ppb;
3468 /* Does block tree limit file size? */
3469 if (res + meta_blocks <= upper_limit)
3470 goto check_lfs;
3471
3472 res = upper_limit;
3473 /* How many metadata blocks are needed for addressing upper_limit? */
3474 upper_limit -= EXT4_NDIR_BLOCKS;
3475 /* indirect blocks */
3476 meta_blocks = 1;
3477 upper_limit -= ppb;
3478 /* double indirect blocks */
3479 if (upper_limit < ppb * ppb) {
3480 meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb);
3481 res -= meta_blocks;
3482 goto check_lfs;
3483 }
3484 meta_blocks += 1 + ppb;
3485 upper_limit -= ppb * ppb;
3486 /* tripple indirect blocks for the rest */
3487 meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) +
3488 DIV_ROUND_UP_ULL(upper_limit, ppb*ppb);
3489 res -= meta_blocks;
3490 check_lfs:
3491 res <<= bits;
3492 if (res > MAX_LFS_FILESIZE)
3493 res = MAX_LFS_FILESIZE;
3494
3495 return res;
3496 }
3497
descriptor_loc(struct super_block * sb,ext4_fsblk_t logical_sb_block,int nr)3498 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
3499 ext4_fsblk_t logical_sb_block, int nr)
3500 {
3501 struct ext4_sb_info *sbi = EXT4_SB(sb);
3502 ext4_group_t bg, first_meta_bg;
3503 int has_super = 0;
3504
3505 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
3506
3507 if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg)
3508 return logical_sb_block + nr + 1;
3509 bg = sbi->s_desc_per_block * nr;
3510 if (ext4_bg_has_super(sb, bg))
3511 has_super = 1;
3512
3513 /*
3514 * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
3515 * block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled
3516 * on modern mke2fs or blksize > 1k on older mke2fs) then we must
3517 * compensate.
3518 */
3519 if (sb->s_blocksize == 1024 && nr == 0 &&
3520 le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
3521 has_super++;
3522
3523 return (has_super + ext4_group_first_block_no(sb, bg));
3524 }
3525
3526 /**
3527 * ext4_get_stripe_size: Get the stripe size.
3528 * @sbi: In memory super block info
3529 *
3530 * If we have specified it via mount option, then
3531 * use the mount option value. If the value specified at mount time is
3532 * greater than the blocks per group use the super block value.
3533 * If the super block value is greater than blocks per group return 0.
3534 * Allocator needs it be less than blocks per group.
3535 *
3536 */
ext4_get_stripe_size(struct ext4_sb_info * sbi)3537 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
3538 {
3539 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
3540 unsigned long stripe_width =
3541 le32_to_cpu(sbi->s_es->s_raid_stripe_width);
3542 int ret;
3543
3544 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
3545 ret = sbi->s_stripe;
3546 else if (stripe_width && stripe_width <= sbi->s_blocks_per_group)
3547 ret = stripe_width;
3548 else if (stride && stride <= sbi->s_blocks_per_group)
3549 ret = stride;
3550 else
3551 ret = 0;
3552
3553 /*
3554 * If the stripe width is 1, this makes no sense and
3555 * we set it to 0 to turn off stripe handling code.
3556 */
3557 if (ret <= 1)
3558 ret = 0;
3559
3560 return ret;
3561 }
3562
3563 /*
3564 * Check whether this filesystem can be mounted based on
3565 * the features present and the RDONLY/RDWR mount requested.
3566 * Returns 1 if this filesystem can be mounted as requested,
3567 * 0 if it cannot be.
3568 */
ext4_feature_set_ok(struct super_block * sb,int readonly)3569 int ext4_feature_set_ok(struct super_block *sb, int readonly)
3570 {
3571 if (ext4_has_unknown_ext4_incompat_features(sb)) {
3572 ext4_msg(sb, KERN_ERR,
3573 "Couldn't mount because of "
3574 "unsupported optional features (%x)",
3575 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
3576 ~EXT4_FEATURE_INCOMPAT_SUPP));
3577 return 0;
3578 }
3579
3580 if (!IS_ENABLED(CONFIG_UNICODE) && ext4_has_feature_casefold(sb)) {
3581 ext4_msg(sb, KERN_ERR,
3582 "Filesystem with casefold feature cannot be "
3583 "mounted without CONFIG_UNICODE");
3584 return 0;
3585 }
3586
3587 if (readonly)
3588 return 1;
3589
3590 if (ext4_has_feature_readonly(sb)) {
3591 ext4_msg(sb, KERN_INFO, "filesystem is read-only");
3592 sb->s_flags |= SB_RDONLY;
3593 return 1;
3594 }
3595
3596 /* Check that feature set is OK for a read-write mount */
3597 if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
3598 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
3599 "unsupported optional features (%x)",
3600 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
3601 ~EXT4_FEATURE_RO_COMPAT_SUPP));
3602 return 0;
3603 }
3604 if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
3605 ext4_msg(sb, KERN_ERR,
3606 "Can't support bigalloc feature without "
3607 "extents feature\n");
3608 return 0;
3609 }
3610
3611 #if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
3612 if (!readonly && (ext4_has_feature_quota(sb) ||
3613 ext4_has_feature_project(sb))) {
3614 ext4_msg(sb, KERN_ERR,
3615 "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
3616 return 0;
3617 }
3618 #endif /* CONFIG_QUOTA */
3619 return 1;
3620 }
3621
3622 /*
3623 * This function is called once a day if we have errors logged
3624 * on the file system
3625 */
print_daily_error_info(struct timer_list * t)3626 static void print_daily_error_info(struct timer_list *t)
3627 {
3628 struct ext4_sb_info *sbi = from_timer(sbi, t, s_err_report);
3629 struct super_block *sb = sbi->s_sb;
3630 struct ext4_super_block *es = sbi->s_es;
3631
3632 if (es->s_error_count)
3633 /* fsck newer than v1.41.13 is needed to clean this condition. */
3634 ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
3635 le32_to_cpu(es->s_error_count));
3636 if (es->s_first_error_time) {
3637 printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
3638 sb->s_id,
3639 ext4_get_tstamp(es, s_first_error_time),
3640 (int) sizeof(es->s_first_error_func),
3641 es->s_first_error_func,
3642 le32_to_cpu(es->s_first_error_line));
3643 if (es->s_first_error_ino)
3644 printk(KERN_CONT ": inode %u",
3645 le32_to_cpu(es->s_first_error_ino));
3646 if (es->s_first_error_block)
3647 printk(KERN_CONT ": block %llu", (unsigned long long)
3648 le64_to_cpu(es->s_first_error_block));
3649 printk(KERN_CONT "\n");
3650 }
3651 if (es->s_last_error_time) {
3652 printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
3653 sb->s_id,
3654 ext4_get_tstamp(es, s_last_error_time),
3655 (int) sizeof(es->s_last_error_func),
3656 es->s_last_error_func,
3657 le32_to_cpu(es->s_last_error_line));
3658 if (es->s_last_error_ino)
3659 printk(KERN_CONT ": inode %u",
3660 le32_to_cpu(es->s_last_error_ino));
3661 if (es->s_last_error_block)
3662 printk(KERN_CONT ": block %llu", (unsigned long long)
3663 le64_to_cpu(es->s_last_error_block));
3664 printk(KERN_CONT "\n");
3665 }
3666 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
3667 }
3668
3669 /* Find next suitable group and run ext4_init_inode_table */
ext4_run_li_request(struct ext4_li_request * elr)3670 static int ext4_run_li_request(struct ext4_li_request *elr)
3671 {
3672 struct ext4_group_desc *gdp = NULL;
3673 struct super_block *sb = elr->lr_super;
3674 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3675 ext4_group_t group = elr->lr_next_group;
3676 unsigned int prefetch_ios = 0;
3677 int ret = 0;
3678 int nr = EXT4_SB(sb)->s_mb_prefetch;
3679 u64 start_time;
3680
3681 if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
3682 elr->lr_next_group = ext4_mb_prefetch(sb, group, nr, &prefetch_ios);
3683 ext4_mb_prefetch_fini(sb, elr->lr_next_group, nr);
3684 trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group, nr);
3685 if (group >= elr->lr_next_group) {
3686 ret = 1;
3687 if (elr->lr_first_not_zeroed != ngroups &&
3688 !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
3689 elr->lr_next_group = elr->lr_first_not_zeroed;
3690 elr->lr_mode = EXT4_LI_MODE_ITABLE;
3691 ret = 0;
3692 }
3693 }
3694 return ret;
3695 }
3696
3697 for (; group < ngroups; group++) {
3698 gdp = ext4_get_group_desc(sb, group, NULL);
3699 if (!gdp) {
3700 ret = 1;
3701 break;
3702 }
3703
3704 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3705 break;
3706 }
3707
3708 if (group >= ngroups)
3709 ret = 1;
3710
3711 if (!ret) {
3712 start_time = ktime_get_real_ns();
3713 ret = ext4_init_inode_table(sb, group,
3714 elr->lr_timeout ? 0 : 1);
3715 trace_ext4_lazy_itable_init(sb, group);
3716 if (elr->lr_timeout == 0) {
3717 elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) *
3718 EXT4_SB(elr->lr_super)->s_li_wait_mult);
3719 }
3720 elr->lr_next_sched = jiffies + elr->lr_timeout;
3721 elr->lr_next_group = group + 1;
3722 }
3723 return ret;
3724 }
3725
3726 /*
3727 * Remove lr_request from the list_request and free the
3728 * request structure. Should be called with li_list_mtx held
3729 */
ext4_remove_li_request(struct ext4_li_request * elr)3730 static void ext4_remove_li_request(struct ext4_li_request *elr)
3731 {
3732 if (!elr)
3733 return;
3734
3735 list_del(&elr->lr_request);
3736 EXT4_SB(elr->lr_super)->s_li_request = NULL;
3737 kfree(elr);
3738 }
3739
ext4_unregister_li_request(struct super_block * sb)3740 static void ext4_unregister_li_request(struct super_block *sb)
3741 {
3742 mutex_lock(&ext4_li_mtx);
3743 if (!ext4_li_info) {
3744 mutex_unlock(&ext4_li_mtx);
3745 return;
3746 }
3747
3748 mutex_lock(&ext4_li_info->li_list_mtx);
3749 ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
3750 mutex_unlock(&ext4_li_info->li_list_mtx);
3751 mutex_unlock(&ext4_li_mtx);
3752 }
3753
3754 static struct task_struct *ext4_lazyinit_task;
3755
3756 /*
3757 * This is the function where ext4lazyinit thread lives. It walks
3758 * through the request list searching for next scheduled filesystem.
3759 * When such a fs is found, run the lazy initialization request
3760 * (ext4_rn_li_request) and keep track of the time spend in this
3761 * function. Based on that time we compute next schedule time of
3762 * the request. When walking through the list is complete, compute
3763 * next waking time and put itself into sleep.
3764 */
ext4_lazyinit_thread(void * arg)3765 static int ext4_lazyinit_thread(void *arg)
3766 {
3767 struct ext4_lazy_init *eli = arg;
3768 struct list_head *pos, *n;
3769 struct ext4_li_request *elr;
3770 unsigned long next_wakeup, cur;
3771
3772 BUG_ON(NULL == eli);
3773 set_freezable();
3774
3775 cont_thread:
3776 while (true) {
3777 next_wakeup = MAX_JIFFY_OFFSET;
3778
3779 mutex_lock(&eli->li_list_mtx);
3780 if (list_empty(&eli->li_request_list)) {
3781 mutex_unlock(&eli->li_list_mtx);
3782 goto exit_thread;
3783 }
3784 list_for_each_safe(pos, n, &eli->li_request_list) {
3785 int err = 0;
3786 int progress = 0;
3787 elr = list_entry(pos, struct ext4_li_request,
3788 lr_request);
3789
3790 if (time_before(jiffies, elr->lr_next_sched)) {
3791 if (time_before(elr->lr_next_sched, next_wakeup))
3792 next_wakeup = elr->lr_next_sched;
3793 continue;
3794 }
3795 if (down_read_trylock(&elr->lr_super->s_umount)) {
3796 if (sb_start_write_trylock(elr->lr_super)) {
3797 progress = 1;
3798 /*
3799 * We hold sb->s_umount, sb can not
3800 * be removed from the list, it is
3801 * now safe to drop li_list_mtx
3802 */
3803 mutex_unlock(&eli->li_list_mtx);
3804 err = ext4_run_li_request(elr);
3805 sb_end_write(elr->lr_super);
3806 mutex_lock(&eli->li_list_mtx);
3807 n = pos->next;
3808 }
3809 up_read((&elr->lr_super->s_umount));
3810 }
3811 /* error, remove the lazy_init job */
3812 if (err) {
3813 ext4_remove_li_request(elr);
3814 continue;
3815 }
3816 if (!progress) {
3817 elr->lr_next_sched = jiffies +
3818 get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
3819 }
3820 if (time_before(elr->lr_next_sched, next_wakeup))
3821 next_wakeup = elr->lr_next_sched;
3822 }
3823 mutex_unlock(&eli->li_list_mtx);
3824
3825 try_to_freeze();
3826
3827 cur = jiffies;
3828 if ((time_after_eq(cur, next_wakeup)) ||
3829 (MAX_JIFFY_OFFSET == next_wakeup)) {
3830 cond_resched();
3831 continue;
3832 }
3833
3834 schedule_timeout_interruptible(next_wakeup - cur);
3835
3836 if (kthread_should_stop()) {
3837 ext4_clear_request_list();
3838 goto exit_thread;
3839 }
3840 }
3841
3842 exit_thread:
3843 /*
3844 * It looks like the request list is empty, but we need
3845 * to check it under the li_list_mtx lock, to prevent any
3846 * additions into it, and of course we should lock ext4_li_mtx
3847 * to atomically free the list and ext4_li_info, because at
3848 * this point another ext4 filesystem could be registering
3849 * new one.
3850 */
3851 mutex_lock(&ext4_li_mtx);
3852 mutex_lock(&eli->li_list_mtx);
3853 if (!list_empty(&eli->li_request_list)) {
3854 mutex_unlock(&eli->li_list_mtx);
3855 mutex_unlock(&ext4_li_mtx);
3856 goto cont_thread;
3857 }
3858 mutex_unlock(&eli->li_list_mtx);
3859 kfree(ext4_li_info);
3860 ext4_li_info = NULL;
3861 mutex_unlock(&ext4_li_mtx);
3862
3863 return 0;
3864 }
3865
ext4_clear_request_list(void)3866 static void ext4_clear_request_list(void)
3867 {
3868 struct list_head *pos, *n;
3869 struct ext4_li_request *elr;
3870
3871 mutex_lock(&ext4_li_info->li_list_mtx);
3872 list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3873 elr = list_entry(pos, struct ext4_li_request,
3874 lr_request);
3875 ext4_remove_li_request(elr);
3876 }
3877 mutex_unlock(&ext4_li_info->li_list_mtx);
3878 }
3879
ext4_run_lazyinit_thread(void)3880 static int ext4_run_lazyinit_thread(void)
3881 {
3882 ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3883 ext4_li_info, "ext4lazyinit");
3884 if (IS_ERR(ext4_lazyinit_task)) {
3885 int err = PTR_ERR(ext4_lazyinit_task);
3886 ext4_clear_request_list();
3887 kfree(ext4_li_info);
3888 ext4_li_info = NULL;
3889 printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
3890 "initialization thread\n",
3891 err);
3892 return err;
3893 }
3894 ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
3895 return 0;
3896 }
3897
3898 /*
3899 * Check whether it make sense to run itable init. thread or not.
3900 * If there is at least one uninitialized inode table, return
3901 * corresponding group number, else the loop goes through all
3902 * groups and return total number of groups.
3903 */
ext4_has_uninit_itable(struct super_block * sb)3904 static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3905 {
3906 ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3907 struct ext4_group_desc *gdp = NULL;
3908
3909 if (!ext4_has_group_desc_csum(sb))
3910 return ngroups;
3911
3912 for (group = 0; group < ngroups; group++) {
3913 gdp = ext4_get_group_desc(sb, group, NULL);
3914 if (!gdp)
3915 continue;
3916
3917 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3918 break;
3919 }
3920
3921 return group;
3922 }
3923
ext4_li_info_new(void)3924 static int ext4_li_info_new(void)
3925 {
3926 struct ext4_lazy_init *eli = NULL;
3927
3928 eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3929 if (!eli)
3930 return -ENOMEM;
3931
3932 INIT_LIST_HEAD(&eli->li_request_list);
3933 mutex_init(&eli->li_list_mtx);
3934
3935 eli->li_state |= EXT4_LAZYINIT_QUIT;
3936
3937 ext4_li_info = eli;
3938
3939 return 0;
3940 }
3941
ext4_li_request_new(struct super_block * sb,ext4_group_t start)3942 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3943 ext4_group_t start)
3944 {
3945 struct ext4_li_request *elr;
3946
3947 elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3948 if (!elr)
3949 return NULL;
3950
3951 elr->lr_super = sb;
3952 elr->lr_first_not_zeroed = start;
3953 if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS)) {
3954 elr->lr_mode = EXT4_LI_MODE_ITABLE;
3955 elr->lr_next_group = start;
3956 } else {
3957 elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
3958 }
3959
3960 /*
3961 * Randomize first schedule time of the request to
3962 * spread the inode table initialization requests
3963 * better.
3964 */
3965 elr->lr_next_sched = jiffies + get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
3966 return elr;
3967 }
3968
ext4_register_li_request(struct super_block * sb,ext4_group_t first_not_zeroed)3969 int ext4_register_li_request(struct super_block *sb,
3970 ext4_group_t first_not_zeroed)
3971 {
3972 struct ext4_sb_info *sbi = EXT4_SB(sb);
3973 struct ext4_li_request *elr = NULL;
3974 ext4_group_t ngroups = sbi->s_groups_count;
3975 int ret = 0;
3976
3977 mutex_lock(&ext4_li_mtx);
3978 if (sbi->s_li_request != NULL) {
3979 /*
3980 * Reset timeout so it can be computed again, because
3981 * s_li_wait_mult might have changed.
3982 */
3983 sbi->s_li_request->lr_timeout = 0;
3984 goto out;
3985 }
3986
3987 if (sb_rdonly(sb) ||
3988 (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
3989 (first_not_zeroed == ngroups || !test_opt(sb, INIT_INODE_TABLE))))
3990 goto out;
3991
3992 elr = ext4_li_request_new(sb, first_not_zeroed);
3993 if (!elr) {
3994 ret = -ENOMEM;
3995 goto out;
3996 }
3997
3998 if (NULL == ext4_li_info) {
3999 ret = ext4_li_info_new();
4000 if (ret)
4001 goto out;
4002 }
4003
4004 mutex_lock(&ext4_li_info->li_list_mtx);
4005 list_add(&elr->lr_request, &ext4_li_info->li_request_list);
4006 mutex_unlock(&ext4_li_info->li_list_mtx);
4007
4008 sbi->s_li_request = elr;
4009 /*
4010 * set elr to NULL here since it has been inserted to
4011 * the request_list and the removal and free of it is
4012 * handled by ext4_clear_request_list from now on.
4013 */
4014 elr = NULL;
4015
4016 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
4017 ret = ext4_run_lazyinit_thread();
4018 if (ret)
4019 goto out;
4020 }
4021 out:
4022 mutex_unlock(&ext4_li_mtx);
4023 if (ret)
4024 kfree(elr);
4025 return ret;
4026 }
4027
4028 /*
4029 * We do not need to lock anything since this is called on
4030 * module unload.
4031 */
ext4_destroy_lazyinit_thread(void)4032 static void ext4_destroy_lazyinit_thread(void)
4033 {
4034 /*
4035 * If thread exited earlier
4036 * there's nothing to be done.
4037 */
4038 if (!ext4_li_info || !ext4_lazyinit_task)
4039 return;
4040
4041 kthread_stop(ext4_lazyinit_task);
4042 }
4043
set_journal_csum_feature_set(struct super_block * sb)4044 static int set_journal_csum_feature_set(struct super_block *sb)
4045 {
4046 int ret = 1;
4047 int compat, incompat;
4048 struct ext4_sb_info *sbi = EXT4_SB(sb);
4049
4050 if (ext4_has_metadata_csum(sb)) {
4051 /* journal checksum v3 */
4052 compat = 0;
4053 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
4054 } else {
4055 /* journal checksum v1 */
4056 compat = JBD2_FEATURE_COMPAT_CHECKSUM;
4057 incompat = 0;
4058 }
4059
4060 jbd2_journal_clear_features(sbi->s_journal,
4061 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
4062 JBD2_FEATURE_INCOMPAT_CSUM_V3 |
4063 JBD2_FEATURE_INCOMPAT_CSUM_V2);
4064 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4065 ret = jbd2_journal_set_features(sbi->s_journal,
4066 compat, 0,
4067 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
4068 incompat);
4069 } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
4070 ret = jbd2_journal_set_features(sbi->s_journal,
4071 compat, 0,
4072 incompat);
4073 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4074 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4075 } else {
4076 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4077 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4078 }
4079
4080 return ret;
4081 }
4082
4083 /*
4084 * Note: calculating the overhead so we can be compatible with
4085 * historical BSD practice is quite difficult in the face of
4086 * clusters/bigalloc. This is because multiple metadata blocks from
4087 * different block group can end up in the same allocation cluster.
4088 * Calculating the exact overhead in the face of clustered allocation
4089 * requires either O(all block bitmaps) in memory or O(number of block
4090 * groups**2) in time. We will still calculate the superblock for
4091 * older file systems --- and if we come across with a bigalloc file
4092 * system with zero in s_overhead_clusters the estimate will be close to
4093 * correct especially for very large cluster sizes --- but for newer
4094 * file systems, it's better to calculate this figure once at mkfs
4095 * time, and store it in the superblock. If the superblock value is
4096 * present (even for non-bigalloc file systems), we will use it.
4097 */
count_overhead(struct super_block * sb,ext4_group_t grp,char * buf)4098 static int count_overhead(struct super_block *sb, ext4_group_t grp,
4099 char *buf)
4100 {
4101 struct ext4_sb_info *sbi = EXT4_SB(sb);
4102 struct ext4_group_desc *gdp;
4103 ext4_fsblk_t first_block, last_block, b;
4104 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4105 int s, j, count = 0;
4106 int has_super = ext4_bg_has_super(sb, grp);
4107
4108 if (!ext4_has_feature_bigalloc(sb))
4109 return (has_super + ext4_bg_num_gdb(sb, grp) +
4110 (has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
4111 sbi->s_itb_per_group + 2);
4112
4113 first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
4114 (grp * EXT4_BLOCKS_PER_GROUP(sb));
4115 last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
4116 for (i = 0; i < ngroups; i++) {
4117 gdp = ext4_get_group_desc(sb, i, NULL);
4118 b = ext4_block_bitmap(sb, gdp);
4119 if (b >= first_block && b <= last_block) {
4120 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4121 count++;
4122 }
4123 b = ext4_inode_bitmap(sb, gdp);
4124 if (b >= first_block && b <= last_block) {
4125 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4126 count++;
4127 }
4128 b = ext4_inode_table(sb, gdp);
4129 if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
4130 for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
4131 int c = EXT4_B2C(sbi, b - first_block);
4132 ext4_set_bit(c, buf);
4133 count++;
4134 }
4135 if (i != grp)
4136 continue;
4137 s = 0;
4138 if (ext4_bg_has_super(sb, grp)) {
4139 ext4_set_bit(s++, buf);
4140 count++;
4141 }
4142 j = ext4_bg_num_gdb(sb, grp);
4143 if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
4144 ext4_error(sb, "Invalid number of block group "
4145 "descriptor blocks: %d", j);
4146 j = EXT4_BLOCKS_PER_GROUP(sb) - s;
4147 }
4148 count += j;
4149 for (; j > 0; j--)
4150 ext4_set_bit(EXT4_B2C(sbi, s++), buf);
4151 }
4152 if (!count)
4153 return 0;
4154 return EXT4_CLUSTERS_PER_GROUP(sb) -
4155 ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
4156 }
4157
4158 /*
4159 * Compute the overhead and stash it in sbi->s_overhead
4160 */
ext4_calculate_overhead(struct super_block * sb)4161 int ext4_calculate_overhead(struct super_block *sb)
4162 {
4163 struct ext4_sb_info *sbi = EXT4_SB(sb);
4164 struct ext4_super_block *es = sbi->s_es;
4165 struct inode *j_inode;
4166 unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
4167 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4168 ext4_fsblk_t overhead = 0;
4169 char *buf = (char *) get_zeroed_page(GFP_NOFS);
4170
4171 if (!buf)
4172 return -ENOMEM;
4173
4174 /*
4175 * Compute the overhead (FS structures). This is constant
4176 * for a given filesystem unless the number of block groups
4177 * changes so we cache the previous value until it does.
4178 */
4179
4180 /*
4181 * All of the blocks before first_data_block are overhead
4182 */
4183 overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
4184
4185 /*
4186 * Add the overhead found in each block group
4187 */
4188 for (i = 0; i < ngroups; i++) {
4189 int blks;
4190
4191 blks = count_overhead(sb, i, buf);
4192 overhead += blks;
4193 if (blks)
4194 memset(buf, 0, PAGE_SIZE);
4195 cond_resched();
4196 }
4197
4198 /*
4199 * Add the internal journal blocks whether the journal has been
4200 * loaded or not
4201 */
4202 if (sbi->s_journal && !sbi->s_journal_bdev_file)
4203 overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
4204 else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
4205 /* j_inum for internal journal is non-zero */
4206 j_inode = ext4_get_journal_inode(sb, j_inum);
4207 if (!IS_ERR(j_inode)) {
4208 j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
4209 overhead += EXT4_NUM_B2C(sbi, j_blocks);
4210 iput(j_inode);
4211 } else {
4212 ext4_msg(sb, KERN_ERR, "can't get journal size");
4213 }
4214 }
4215 sbi->s_overhead = overhead;
4216 smp_wmb();
4217 free_page((unsigned long) buf);
4218 return 0;
4219 }
4220
ext4_set_resv_clusters(struct super_block * sb)4221 static void ext4_set_resv_clusters(struct super_block *sb)
4222 {
4223 ext4_fsblk_t resv_clusters;
4224 struct ext4_sb_info *sbi = EXT4_SB(sb);
4225
4226 /*
4227 * There's no need to reserve anything when we aren't using extents.
4228 * The space estimates are exact, there are no unwritten extents,
4229 * hole punching doesn't need new metadata... This is needed especially
4230 * to keep ext2/3 backward compatibility.
4231 */
4232 if (!ext4_has_feature_extents(sb))
4233 return;
4234 /*
4235 * By default we reserve 2% or 4096 clusters, whichever is smaller.
4236 * This should cover the situations where we can not afford to run
4237 * out of space like for example punch hole, or converting
4238 * unwritten extents in delalloc path. In most cases such
4239 * allocation would require 1, or 2 blocks, higher numbers are
4240 * very rare.
4241 */
4242 resv_clusters = (ext4_blocks_count(sbi->s_es) >>
4243 sbi->s_cluster_bits);
4244
4245 do_div(resv_clusters, 50);
4246 resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
4247
4248 atomic64_set(&sbi->s_resv_clusters, resv_clusters);
4249 }
4250
ext4_quota_mode(struct super_block * sb)4251 static const char *ext4_quota_mode(struct super_block *sb)
4252 {
4253 #ifdef CONFIG_QUOTA
4254 if (!ext4_quota_capable(sb))
4255 return "none";
4256
4257 if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb))
4258 return "journalled";
4259 else
4260 return "writeback";
4261 #else
4262 return "disabled";
4263 #endif
4264 }
4265
ext4_setup_csum_trigger(struct super_block * sb,enum ext4_journal_trigger_type type,void (* trigger)(struct jbd2_buffer_trigger_type * type,struct buffer_head * bh,void * mapped_data,size_t size))4266 static void ext4_setup_csum_trigger(struct super_block *sb,
4267 enum ext4_journal_trigger_type type,
4268 void (*trigger)(
4269 struct jbd2_buffer_trigger_type *type,
4270 struct buffer_head *bh,
4271 void *mapped_data,
4272 size_t size))
4273 {
4274 struct ext4_sb_info *sbi = EXT4_SB(sb);
4275
4276 sbi->s_journal_triggers[type].sb = sb;
4277 sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger;
4278 }
4279
ext4_free_sbi(struct ext4_sb_info * sbi)4280 static void ext4_free_sbi(struct ext4_sb_info *sbi)
4281 {
4282 if (!sbi)
4283 return;
4284
4285 kfree(sbi->s_blockgroup_lock);
4286 fs_put_dax(sbi->s_daxdev, NULL);
4287 kfree(sbi);
4288 }
4289
ext4_alloc_sbi(struct super_block * sb)4290 static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb)
4291 {
4292 struct ext4_sb_info *sbi;
4293
4294 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
4295 if (!sbi)
4296 return NULL;
4297
4298 sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off,
4299 NULL, NULL);
4300
4301 sbi->s_blockgroup_lock =
4302 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
4303
4304 if (!sbi->s_blockgroup_lock)
4305 goto err_out;
4306
4307 sb->s_fs_info = sbi;
4308 sbi->s_sb = sb;
4309 return sbi;
4310 err_out:
4311 fs_put_dax(sbi->s_daxdev, NULL);
4312 kfree(sbi);
4313 return NULL;
4314 }
4315
ext4_set_def_opts(struct super_block * sb,struct ext4_super_block * es)4316 static void ext4_set_def_opts(struct super_block *sb,
4317 struct ext4_super_block *es)
4318 {
4319 unsigned long def_mount_opts;
4320
4321 /* Set defaults before we parse the mount options */
4322 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
4323 set_opt(sb, INIT_INODE_TABLE);
4324 if (def_mount_opts & EXT4_DEFM_DEBUG)
4325 set_opt(sb, DEBUG);
4326 if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
4327 set_opt(sb, GRPID);
4328 if (def_mount_opts & EXT4_DEFM_UID16)
4329 set_opt(sb, NO_UID32);
4330 /* xattr user namespace & acls are now defaulted on */
4331 set_opt(sb, XATTR_USER);
4332 #ifdef CONFIG_EXT4_FS_POSIX_ACL
4333 set_opt(sb, POSIX_ACL);
4334 #endif
4335 if (ext4_has_feature_fast_commit(sb))
4336 set_opt2(sb, JOURNAL_FAST_COMMIT);
4337 /* don't forget to enable journal_csum when metadata_csum is enabled. */
4338 if (ext4_has_metadata_csum(sb))
4339 set_opt(sb, JOURNAL_CHECKSUM);
4340
4341 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
4342 set_opt(sb, JOURNAL_DATA);
4343 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
4344 set_opt(sb, ORDERED_DATA);
4345 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
4346 set_opt(sb, WRITEBACK_DATA);
4347
4348 if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_PANIC)
4349 set_opt(sb, ERRORS_PANIC);
4350 else if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_CONTINUE)
4351 set_opt(sb, ERRORS_CONT);
4352 else
4353 set_opt(sb, ERRORS_RO);
4354 /* block_validity enabled by default; disable with noblock_validity */
4355 set_opt(sb, BLOCK_VALIDITY);
4356 if (def_mount_opts & EXT4_DEFM_DISCARD)
4357 set_opt(sb, DISCARD);
4358
4359 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
4360 set_opt(sb, BARRIER);
4361
4362 /*
4363 * enable delayed allocation by default
4364 * Use -o nodelalloc to turn it off
4365 */
4366 if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
4367 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
4368 set_opt(sb, DELALLOC);
4369
4370 if (sb->s_blocksize <= PAGE_SIZE)
4371 set_opt(sb, DIOREAD_NOLOCK);
4372 }
4373
ext4_handle_clustersize(struct super_block * sb)4374 static int ext4_handle_clustersize(struct super_block *sb)
4375 {
4376 struct ext4_sb_info *sbi = EXT4_SB(sb);
4377 struct ext4_super_block *es = sbi->s_es;
4378 int clustersize;
4379
4380 /* Handle clustersize */
4381 clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
4382 if (ext4_has_feature_bigalloc(sb)) {
4383 if (clustersize < sb->s_blocksize) {
4384 ext4_msg(sb, KERN_ERR,
4385 "cluster size (%d) smaller than "
4386 "block size (%lu)", clustersize, sb->s_blocksize);
4387 return -EINVAL;
4388 }
4389 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
4390 le32_to_cpu(es->s_log_block_size);
4391 } else {
4392 if (clustersize != sb->s_blocksize) {
4393 ext4_msg(sb, KERN_ERR,
4394 "fragment/cluster size (%d) != "
4395 "block size (%lu)", clustersize, sb->s_blocksize);
4396 return -EINVAL;
4397 }
4398 if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
4399 ext4_msg(sb, KERN_ERR,
4400 "#blocks per group too big: %lu",
4401 sbi->s_blocks_per_group);
4402 return -EINVAL;
4403 }
4404 sbi->s_cluster_bits = 0;
4405 }
4406 sbi->s_clusters_per_group = le32_to_cpu(es->s_clusters_per_group);
4407 if (sbi->s_clusters_per_group > sb->s_blocksize * 8) {
4408 ext4_msg(sb, KERN_ERR, "#clusters per group too big: %lu",
4409 sbi->s_clusters_per_group);
4410 return -EINVAL;
4411 }
4412 if (sbi->s_blocks_per_group !=
4413 (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) {
4414 ext4_msg(sb, KERN_ERR,
4415 "blocks per group (%lu) and clusters per group (%lu) inconsistent",
4416 sbi->s_blocks_per_group, sbi->s_clusters_per_group);
4417 return -EINVAL;
4418 }
4419 sbi->s_cluster_ratio = clustersize / sb->s_blocksize;
4420
4421 /* Do we have standard group size of clustersize * 8 blocks ? */
4422 if (sbi->s_blocks_per_group == clustersize << 3)
4423 set_opt2(sb, STD_GROUP_SIZE);
4424
4425 return 0;
4426 }
4427
ext4_fast_commit_init(struct super_block * sb)4428 static void ext4_fast_commit_init(struct super_block *sb)
4429 {
4430 struct ext4_sb_info *sbi = EXT4_SB(sb);
4431
4432 /* Initialize fast commit stuff */
4433 atomic_set(&sbi->s_fc_subtid, 0);
4434 INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
4435 INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
4436 INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
4437 INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
4438 sbi->s_fc_bytes = 0;
4439 ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
4440 sbi->s_fc_ineligible_tid = 0;
4441 spin_lock_init(&sbi->s_fc_lock);
4442 memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
4443 sbi->s_fc_replay_state.fc_regions = NULL;
4444 sbi->s_fc_replay_state.fc_regions_size = 0;
4445 sbi->s_fc_replay_state.fc_regions_used = 0;
4446 sbi->s_fc_replay_state.fc_regions_valid = 0;
4447 sbi->s_fc_replay_state.fc_modified_inodes = NULL;
4448 sbi->s_fc_replay_state.fc_modified_inodes_size = 0;
4449 sbi->s_fc_replay_state.fc_modified_inodes_used = 0;
4450 }
4451
ext4_inode_info_init(struct super_block * sb,struct ext4_super_block * es)4452 static int ext4_inode_info_init(struct super_block *sb,
4453 struct ext4_super_block *es)
4454 {
4455 struct ext4_sb_info *sbi = EXT4_SB(sb);
4456
4457 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
4458 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
4459 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
4460 } else {
4461 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
4462 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
4463 if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
4464 ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
4465 sbi->s_first_ino);
4466 return -EINVAL;
4467 }
4468 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
4469 (!is_power_of_2(sbi->s_inode_size)) ||
4470 (sbi->s_inode_size > sb->s_blocksize)) {
4471 ext4_msg(sb, KERN_ERR,
4472 "unsupported inode size: %d",
4473 sbi->s_inode_size);
4474 ext4_msg(sb, KERN_ERR, "blocksize: %lu", sb->s_blocksize);
4475 return -EINVAL;
4476 }
4477 /*
4478 * i_atime_extra is the last extra field available for
4479 * [acm]times in struct ext4_inode. Checking for that
4480 * field should suffice to ensure we have extra space
4481 * for all three.
4482 */
4483 if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
4484 sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
4485 sb->s_time_gran = 1;
4486 sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
4487 } else {
4488 sb->s_time_gran = NSEC_PER_SEC;
4489 sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
4490 }
4491 sb->s_time_min = EXT4_TIMESTAMP_MIN;
4492 }
4493
4494 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
4495 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4496 EXT4_GOOD_OLD_INODE_SIZE;
4497 if (ext4_has_feature_extra_isize(sb)) {
4498 unsigned v, max = (sbi->s_inode_size -
4499 EXT4_GOOD_OLD_INODE_SIZE);
4500
4501 v = le16_to_cpu(es->s_want_extra_isize);
4502 if (v > max) {
4503 ext4_msg(sb, KERN_ERR,
4504 "bad s_want_extra_isize: %d", v);
4505 return -EINVAL;
4506 }
4507 if (sbi->s_want_extra_isize < v)
4508 sbi->s_want_extra_isize = v;
4509
4510 v = le16_to_cpu(es->s_min_extra_isize);
4511 if (v > max) {
4512 ext4_msg(sb, KERN_ERR,
4513 "bad s_min_extra_isize: %d", v);
4514 return -EINVAL;
4515 }
4516 if (sbi->s_want_extra_isize < v)
4517 sbi->s_want_extra_isize = v;
4518 }
4519 }
4520
4521 return 0;
4522 }
4523
4524 #if IS_ENABLED(CONFIG_UNICODE)
ext4_encoding_init(struct super_block * sb,struct ext4_super_block * es)4525 static int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
4526 {
4527 const struct ext4_sb_encodings *encoding_info;
4528 struct unicode_map *encoding;
4529 __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags);
4530
4531 if (!ext4_has_feature_casefold(sb) || sb->s_encoding)
4532 return 0;
4533
4534 encoding_info = ext4_sb_read_encoding(es);
4535 if (!encoding_info) {
4536 ext4_msg(sb, KERN_ERR,
4537 "Encoding requested by superblock is unknown");
4538 return -EINVAL;
4539 }
4540
4541 encoding = utf8_load(encoding_info->version);
4542 if (IS_ERR(encoding)) {
4543 ext4_msg(sb, KERN_ERR,
4544 "can't mount with superblock charset: %s-%u.%u.%u "
4545 "not supported by the kernel. flags: 0x%x.",
4546 encoding_info->name,
4547 unicode_major(encoding_info->version),
4548 unicode_minor(encoding_info->version),
4549 unicode_rev(encoding_info->version),
4550 encoding_flags);
4551 return -EINVAL;
4552 }
4553 ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
4554 "%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
4555 unicode_major(encoding_info->version),
4556 unicode_minor(encoding_info->version),
4557 unicode_rev(encoding_info->version),
4558 encoding_flags);
4559
4560 sb->s_encoding = encoding;
4561 sb->s_encoding_flags = encoding_flags;
4562
4563 return 0;
4564 }
4565 #else
ext4_encoding_init(struct super_block * sb,struct ext4_super_block * es)4566 static inline int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
4567 {
4568 return 0;
4569 }
4570 #endif
4571
ext4_init_metadata_csum(struct super_block * sb,struct ext4_super_block * es)4572 static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_block *es)
4573 {
4574 struct ext4_sb_info *sbi = EXT4_SB(sb);
4575
4576 /* Warn if metadata_csum and gdt_csum are both set. */
4577 if (ext4_has_feature_metadata_csum(sb) &&
4578 ext4_has_feature_gdt_csum(sb))
4579 ext4_warning(sb, "metadata_csum and uninit_bg are "
4580 "redundant flags; please run fsck.");
4581
4582 /* Check for a known checksum algorithm */
4583 if (!ext4_verify_csum_type(sb, es)) {
4584 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4585 "unknown checksum algorithm.");
4586 return -EINVAL;
4587 }
4588 ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
4589 ext4_orphan_file_block_trigger);
4590
4591 /* Load the checksum driver */
4592 sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
4593 if (IS_ERR(sbi->s_chksum_driver)) {
4594 int ret = PTR_ERR(sbi->s_chksum_driver);
4595 ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
4596 sbi->s_chksum_driver = NULL;
4597 return ret;
4598 }
4599
4600 /* Check superblock checksum */
4601 if (!ext4_superblock_csum_verify(sb, es)) {
4602 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4603 "invalid superblock checksum. Run e2fsck?");
4604 return -EFSBADCRC;
4605 }
4606
4607 /* Precompute checksum seed for all metadata */
4608 if (ext4_has_feature_csum_seed(sb))
4609 sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
4610 else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
4611 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
4612 sizeof(es->s_uuid));
4613 return 0;
4614 }
4615
ext4_check_feature_compatibility(struct super_block * sb,struct ext4_super_block * es,int silent)4616 static int ext4_check_feature_compatibility(struct super_block *sb,
4617 struct ext4_super_block *es,
4618 int silent)
4619 {
4620 struct ext4_sb_info *sbi = EXT4_SB(sb);
4621
4622 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
4623 (ext4_has_compat_features(sb) ||
4624 ext4_has_ro_compat_features(sb) ||
4625 ext4_has_incompat_features(sb)))
4626 ext4_msg(sb, KERN_WARNING,
4627 "feature flags set on rev 0 fs, "
4628 "running e2fsck is recommended");
4629
4630 if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
4631 set_opt2(sb, HURD_COMPAT);
4632 if (ext4_has_feature_64bit(sb)) {
4633 ext4_msg(sb, KERN_ERR,
4634 "The Hurd can't support 64-bit file systems");
4635 return -EINVAL;
4636 }
4637
4638 /*
4639 * ea_inode feature uses l_i_version field which is not
4640 * available in HURD_COMPAT mode.
4641 */
4642 if (ext4_has_feature_ea_inode(sb)) {
4643 ext4_msg(sb, KERN_ERR,
4644 "ea_inode feature is not supported for Hurd");
4645 return -EINVAL;
4646 }
4647 }
4648
4649 if (IS_EXT2_SB(sb)) {
4650 if (ext2_feature_set_ok(sb))
4651 ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
4652 "using the ext4 subsystem");
4653 else {
4654 /*
4655 * If we're probing be silent, if this looks like
4656 * it's actually an ext[34] filesystem.
4657 */
4658 if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4659 return -EINVAL;
4660 ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
4661 "to feature incompatibilities");
4662 return -EINVAL;
4663 }
4664 }
4665
4666 if (IS_EXT3_SB(sb)) {
4667 if (ext3_feature_set_ok(sb))
4668 ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
4669 "using the ext4 subsystem");
4670 else {
4671 /*
4672 * If we're probing be silent, if this looks like
4673 * it's actually an ext4 filesystem.
4674 */
4675 if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4676 return -EINVAL;
4677 ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
4678 "to feature incompatibilities");
4679 return -EINVAL;
4680 }
4681 }
4682
4683 /*
4684 * Check feature flags regardless of the revision level, since we
4685 * previously didn't change the revision level when setting the flags,
4686 * so there is a chance incompat flags are set on a rev 0 filesystem.
4687 */
4688 if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
4689 return -EINVAL;
4690
4691 if (sbi->s_daxdev) {
4692 if (sb->s_blocksize == PAGE_SIZE)
4693 set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
4694 else
4695 ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
4696 }
4697
4698 if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
4699 if (ext4_has_feature_inline_data(sb)) {
4700 ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
4701 " that may contain inline data");
4702 return -EINVAL;
4703 }
4704 if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
4705 ext4_msg(sb, KERN_ERR,
4706 "DAX unsupported by block device.");
4707 return -EINVAL;
4708 }
4709 }
4710
4711 if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
4712 ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
4713 es->s_encryption_level);
4714 return -EINVAL;
4715 }
4716
4717 return 0;
4718 }
4719
ext4_check_geometry(struct super_block * sb,struct ext4_super_block * es)4720 static int ext4_check_geometry(struct super_block *sb,
4721 struct ext4_super_block *es)
4722 {
4723 struct ext4_sb_info *sbi = EXT4_SB(sb);
4724 __u64 blocks_count;
4725 int err;
4726
4727 if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (sb->s_blocksize / 4)) {
4728 ext4_msg(sb, KERN_ERR,
4729 "Number of reserved GDT blocks insanely large: %d",
4730 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
4731 return -EINVAL;
4732 }
4733 /*
4734 * Test whether we have more sectors than will fit in sector_t,
4735 * and whether the max offset is addressable by the page cache.
4736 */
4737 err = generic_check_addressable(sb->s_blocksize_bits,
4738 ext4_blocks_count(es));
4739 if (err) {
4740 ext4_msg(sb, KERN_ERR, "filesystem"
4741 " too large to mount safely on this system");
4742 return err;
4743 }
4744
4745 /* check blocks count against device size */
4746 blocks_count = sb_bdev_nr_blocks(sb);
4747 if (blocks_count && ext4_blocks_count(es) > blocks_count) {
4748 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
4749 "exceeds size of device (%llu blocks)",
4750 ext4_blocks_count(es), blocks_count);
4751 return -EINVAL;
4752 }
4753
4754 /*
4755 * It makes no sense for the first data block to be beyond the end
4756 * of the filesystem.
4757 */
4758 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
4759 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4760 "block %u is beyond end of filesystem (%llu)",
4761 le32_to_cpu(es->s_first_data_block),
4762 ext4_blocks_count(es));
4763 return -EINVAL;
4764 }
4765 if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
4766 (sbi->s_cluster_ratio == 1)) {
4767 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4768 "block is 0 with a 1k block and cluster size");
4769 return -EINVAL;
4770 }
4771
4772 blocks_count = (ext4_blocks_count(es) -
4773 le32_to_cpu(es->s_first_data_block) +
4774 EXT4_BLOCKS_PER_GROUP(sb) - 1);
4775 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
4776 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
4777 ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
4778 "(block count %llu, first data block %u, "
4779 "blocks per group %lu)", blocks_count,
4780 ext4_blocks_count(es),
4781 le32_to_cpu(es->s_first_data_block),
4782 EXT4_BLOCKS_PER_GROUP(sb));
4783 return -EINVAL;
4784 }
4785 sbi->s_groups_count = blocks_count;
4786 sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
4787 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
4788 if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
4789 le32_to_cpu(es->s_inodes_count)) {
4790 ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
4791 le32_to_cpu(es->s_inodes_count),
4792 ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
4793 return -EINVAL;
4794 }
4795
4796 return 0;
4797 }
4798
ext4_group_desc_init(struct super_block * sb,struct ext4_super_block * es,ext4_fsblk_t logical_sb_block,ext4_group_t * first_not_zeroed)4799 static int ext4_group_desc_init(struct super_block *sb,
4800 struct ext4_super_block *es,
4801 ext4_fsblk_t logical_sb_block,
4802 ext4_group_t *first_not_zeroed)
4803 {
4804 struct ext4_sb_info *sbi = EXT4_SB(sb);
4805 unsigned int db_count;
4806 ext4_fsblk_t block;
4807 int i;
4808
4809 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
4810 EXT4_DESC_PER_BLOCK(sb);
4811 if (ext4_has_feature_meta_bg(sb)) {
4812 if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
4813 ext4_msg(sb, KERN_WARNING,
4814 "first meta block group too large: %u "
4815 "(group descriptor block count %u)",
4816 le32_to_cpu(es->s_first_meta_bg), db_count);
4817 return -EINVAL;
4818 }
4819 }
4820 rcu_assign_pointer(sbi->s_group_desc,
4821 kvmalloc_array(db_count,
4822 sizeof(struct buffer_head *),
4823 GFP_KERNEL));
4824 if (sbi->s_group_desc == NULL) {
4825 ext4_msg(sb, KERN_ERR, "not enough memory");
4826 return -ENOMEM;
4827 }
4828
4829 bgl_lock_init(sbi->s_blockgroup_lock);
4830
4831 /* Pre-read the descriptors into the buffer cache */
4832 for (i = 0; i < db_count; i++) {
4833 block = descriptor_loc(sb, logical_sb_block, i);
4834 ext4_sb_breadahead_unmovable(sb, block);
4835 }
4836
4837 for (i = 0; i < db_count; i++) {
4838 struct buffer_head *bh;
4839
4840 block = descriptor_loc(sb, logical_sb_block, i);
4841 bh = ext4_sb_bread_unmovable(sb, block);
4842 if (IS_ERR(bh)) {
4843 ext4_msg(sb, KERN_ERR,
4844 "can't read group descriptor %d", i);
4845 sbi->s_gdb_count = i;
4846 return PTR_ERR(bh);
4847 }
4848 rcu_read_lock();
4849 rcu_dereference(sbi->s_group_desc)[i] = bh;
4850 rcu_read_unlock();
4851 }
4852 sbi->s_gdb_count = db_count;
4853 if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) {
4854 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
4855 return -EFSCORRUPTED;
4856 }
4857
4858 return 0;
4859 }
4860
ext4_load_and_init_journal(struct super_block * sb,struct ext4_super_block * es,struct ext4_fs_context * ctx)4861 static int ext4_load_and_init_journal(struct super_block *sb,
4862 struct ext4_super_block *es,
4863 struct ext4_fs_context *ctx)
4864 {
4865 struct ext4_sb_info *sbi = EXT4_SB(sb);
4866 int err;
4867
4868 err = ext4_load_journal(sb, es, ctx->journal_devnum);
4869 if (err)
4870 return err;
4871
4872 if (ext4_has_feature_64bit(sb) &&
4873 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4874 JBD2_FEATURE_INCOMPAT_64BIT)) {
4875 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
4876 goto out;
4877 }
4878
4879 if (!set_journal_csum_feature_set(sb)) {
4880 ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
4881 "feature set");
4882 goto out;
4883 }
4884
4885 if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
4886 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4887 JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
4888 ext4_msg(sb, KERN_ERR,
4889 "Failed to set fast commit journal feature");
4890 goto out;
4891 }
4892
4893 /* We have now updated the journal if required, so we can
4894 * validate the data journaling mode. */
4895 switch (test_opt(sb, DATA_FLAGS)) {
4896 case 0:
4897 /* No mode set, assume a default based on the journal
4898 * capabilities: ORDERED_DATA if the journal can
4899 * cope, else JOURNAL_DATA
4900 */
4901 if (jbd2_journal_check_available_features
4902 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4903 set_opt(sb, ORDERED_DATA);
4904 sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
4905 } else {
4906 set_opt(sb, JOURNAL_DATA);
4907 sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
4908 }
4909 break;
4910
4911 case EXT4_MOUNT_ORDERED_DATA:
4912 case EXT4_MOUNT_WRITEBACK_DATA:
4913 if (!jbd2_journal_check_available_features
4914 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4915 ext4_msg(sb, KERN_ERR, "Journal does not support "
4916 "requested data journaling mode");
4917 goto out;
4918 }
4919 break;
4920 default:
4921 break;
4922 }
4923
4924 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
4925 test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4926 ext4_msg(sb, KERN_ERR, "can't mount with "
4927 "journal_async_commit in data=ordered mode");
4928 goto out;
4929 }
4930
4931 set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
4932
4933 sbi->s_journal->j_submit_inode_data_buffers =
4934 ext4_journal_submit_inode_data_buffers;
4935 sbi->s_journal->j_finish_inode_data_buffers =
4936 ext4_journal_finish_inode_data_buffers;
4937
4938 return 0;
4939
4940 out:
4941 /* flush s_sb_upd_work before destroying the journal. */
4942 flush_work(&sbi->s_sb_upd_work);
4943 jbd2_journal_destroy(sbi->s_journal);
4944 sbi->s_journal = NULL;
4945 return -EINVAL;
4946 }
4947
ext4_check_journal_data_mode(struct super_block * sb)4948 static int ext4_check_journal_data_mode(struct super_block *sb)
4949 {
4950 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4951 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with "
4952 "data=journal disables delayed allocation, "
4953 "dioread_nolock, O_DIRECT and fast_commit support!\n");
4954 /* can't mount with both data=journal and dioread_nolock. */
4955 clear_opt(sb, DIOREAD_NOLOCK);
4956 clear_opt2(sb, JOURNAL_FAST_COMMIT);
4957 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4958 ext4_msg(sb, KERN_ERR, "can't mount with "
4959 "both data=journal and delalloc");
4960 return -EINVAL;
4961 }
4962 if (test_opt(sb, DAX_ALWAYS)) {
4963 ext4_msg(sb, KERN_ERR, "can't mount with "
4964 "both data=journal and dax");
4965 return -EINVAL;
4966 }
4967 if (ext4_has_feature_encrypt(sb)) {
4968 ext4_msg(sb, KERN_WARNING,
4969 "encrypted files will use data=ordered "
4970 "instead of data journaling mode");
4971 }
4972 if (test_opt(sb, DELALLOC))
4973 clear_opt(sb, DELALLOC);
4974 } else {
4975 sb->s_iflags |= SB_I_CGROUPWB;
4976 }
4977
4978 return 0;
4979 }
4980
ext4_load_super(struct super_block * sb,ext4_fsblk_t * lsb,int silent)4981 static int ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb,
4982 int silent)
4983 {
4984 struct ext4_sb_info *sbi = EXT4_SB(sb);
4985 struct ext4_super_block *es;
4986 ext4_fsblk_t logical_sb_block;
4987 unsigned long offset = 0;
4988 struct buffer_head *bh;
4989 int ret = -EINVAL;
4990 int blocksize;
4991
4992 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
4993 if (!blocksize) {
4994 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
4995 return -EINVAL;
4996 }
4997
4998 /*
4999 * The ext4 superblock will not be buffer aligned for other than 1kB
5000 * block sizes. We need to calculate the offset from buffer start.
5001 */
5002 if (blocksize != EXT4_MIN_BLOCK_SIZE) {
5003 logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
5004 offset = do_div(logical_sb_block, blocksize);
5005 } else {
5006 logical_sb_block = sbi->s_sb_block;
5007 }
5008
5009 bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
5010 if (IS_ERR(bh)) {
5011 ext4_msg(sb, KERN_ERR, "unable to read superblock");
5012 return PTR_ERR(bh);
5013 }
5014 /*
5015 * Note: s_es must be initialized as soon as possible because
5016 * some ext4 macro-instructions depend on its value
5017 */
5018 es = (struct ext4_super_block *) (bh->b_data + offset);
5019 sbi->s_es = es;
5020 sb->s_magic = le16_to_cpu(es->s_magic);
5021 if (sb->s_magic != EXT4_SUPER_MAGIC) {
5022 if (!silent)
5023 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
5024 goto out;
5025 }
5026
5027 if (le32_to_cpu(es->s_log_block_size) >
5028 (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
5029 ext4_msg(sb, KERN_ERR,
5030 "Invalid log block size: %u",
5031 le32_to_cpu(es->s_log_block_size));
5032 goto out;
5033 }
5034 if (le32_to_cpu(es->s_log_cluster_size) >
5035 (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
5036 ext4_msg(sb, KERN_ERR,
5037 "Invalid log cluster size: %u",
5038 le32_to_cpu(es->s_log_cluster_size));
5039 goto out;
5040 }
5041
5042 blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
5043
5044 /*
5045 * If the default block size is not the same as the real block size,
5046 * we need to reload it.
5047 */
5048 if (sb->s_blocksize == blocksize) {
5049 *lsb = logical_sb_block;
5050 sbi->s_sbh = bh;
5051 return 0;
5052 }
5053
5054 /*
5055 * bh must be released before kill_bdev(), otherwise
5056 * it won't be freed and its page also. kill_bdev()
5057 * is called by sb_set_blocksize().
5058 */
5059 brelse(bh);
5060 /* Validate the filesystem blocksize */
5061 if (!sb_set_blocksize(sb, blocksize)) {
5062 ext4_msg(sb, KERN_ERR, "bad block size %d",
5063 blocksize);
5064 bh = NULL;
5065 goto out;
5066 }
5067
5068 logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
5069 offset = do_div(logical_sb_block, blocksize);
5070 bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
5071 if (IS_ERR(bh)) {
5072 ext4_msg(sb, KERN_ERR, "Can't read superblock on 2nd try");
5073 ret = PTR_ERR(bh);
5074 bh = NULL;
5075 goto out;
5076 }
5077 es = (struct ext4_super_block *)(bh->b_data + offset);
5078 sbi->s_es = es;
5079 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
5080 ext4_msg(sb, KERN_ERR, "Magic mismatch, very weird!");
5081 goto out;
5082 }
5083 *lsb = logical_sb_block;
5084 sbi->s_sbh = bh;
5085 return 0;
5086 out:
5087 brelse(bh);
5088 return ret;
5089 }
5090
ext4_hash_info_init(struct super_block * sb)5091 static int ext4_hash_info_init(struct super_block *sb)
5092 {
5093 struct ext4_sb_info *sbi = EXT4_SB(sb);
5094 struct ext4_super_block *es = sbi->s_es;
5095 unsigned int i;
5096
5097 sbi->s_def_hash_version = es->s_def_hash_version;
5098
5099 if (sbi->s_def_hash_version > DX_HASH_LAST) {
5100 ext4_msg(sb, KERN_ERR,
5101 "Invalid default hash set in the superblock");
5102 return -EINVAL;
5103 } else if (sbi->s_def_hash_version == DX_HASH_SIPHASH) {
5104 ext4_msg(sb, KERN_ERR,
5105 "SIPHASH is not a valid default hash value");
5106 return -EINVAL;
5107 }
5108
5109 for (i = 0; i < 4; i++)
5110 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
5111
5112 if (ext4_has_feature_dir_index(sb)) {
5113 i = le32_to_cpu(es->s_flags);
5114 if (i & EXT2_FLAGS_UNSIGNED_HASH)
5115 sbi->s_hash_unsigned = 3;
5116 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
5117 #ifdef __CHAR_UNSIGNED__
5118 if (!sb_rdonly(sb))
5119 es->s_flags |=
5120 cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
5121 sbi->s_hash_unsigned = 3;
5122 #else
5123 if (!sb_rdonly(sb))
5124 es->s_flags |=
5125 cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
5126 #endif
5127 }
5128 }
5129 return 0;
5130 }
5131
ext4_block_group_meta_init(struct super_block * sb,int silent)5132 static int ext4_block_group_meta_init(struct super_block *sb, int silent)
5133 {
5134 struct ext4_sb_info *sbi = EXT4_SB(sb);
5135 struct ext4_super_block *es = sbi->s_es;
5136 int has_huge_files;
5137
5138 has_huge_files = ext4_has_feature_huge_file(sb);
5139 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
5140 has_huge_files);
5141 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
5142
5143 sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
5144 if (ext4_has_feature_64bit(sb)) {
5145 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
5146 sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
5147 !is_power_of_2(sbi->s_desc_size)) {
5148 ext4_msg(sb, KERN_ERR,
5149 "unsupported descriptor size %lu",
5150 sbi->s_desc_size);
5151 return -EINVAL;
5152 }
5153 } else
5154 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
5155
5156 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
5157 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
5158
5159 sbi->s_inodes_per_block = sb->s_blocksize / EXT4_INODE_SIZE(sb);
5160 if (sbi->s_inodes_per_block == 0 || sbi->s_blocks_per_group == 0) {
5161 if (!silent)
5162 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
5163 return -EINVAL;
5164 }
5165 if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
5166 sbi->s_inodes_per_group > sb->s_blocksize * 8) {
5167 ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
5168 sbi->s_inodes_per_group);
5169 return -EINVAL;
5170 }
5171 sbi->s_itb_per_group = sbi->s_inodes_per_group /
5172 sbi->s_inodes_per_block;
5173 sbi->s_desc_per_block = sb->s_blocksize / EXT4_DESC_SIZE(sb);
5174 sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY;
5175 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
5176 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
5177
5178 return 0;
5179 }
5180
5181 /*
5182 * It's hard to get stripe aligned blocks if stripe is not aligned with
5183 * cluster, just disable stripe and alert user to simplify code and avoid
5184 * stripe aligned allocation which will rarely succeed.
5185 */
ext4_is_stripe_incompatible(struct super_block * sb,unsigned long stripe)5186 static bool ext4_is_stripe_incompatible(struct super_block *sb, unsigned long stripe)
5187 {
5188 struct ext4_sb_info *sbi = EXT4_SB(sb);
5189 return (stripe > 0 && sbi->s_cluster_ratio > 1 &&
5190 stripe % sbi->s_cluster_ratio != 0);
5191 }
5192
__ext4_fill_super(struct fs_context * fc,struct super_block * sb)5193 static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
5194 {
5195 struct ext4_super_block *es = NULL;
5196 struct ext4_sb_info *sbi = EXT4_SB(sb);
5197 ext4_fsblk_t logical_sb_block;
5198 struct inode *root;
5199 int needs_recovery;
5200 int err;
5201 ext4_group_t first_not_zeroed;
5202 struct ext4_fs_context *ctx = fc->fs_private;
5203 int silent = fc->sb_flags & SB_SILENT;
5204
5205 /* Set defaults for the variables that will be set during parsing */
5206 if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO))
5207 ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
5208
5209 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
5210 sbi->s_sectors_written_start =
5211 part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
5212
5213 err = ext4_load_super(sb, &logical_sb_block, silent);
5214 if (err)
5215 goto out_fail;
5216
5217 es = sbi->s_es;
5218 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
5219
5220 err = ext4_init_metadata_csum(sb, es);
5221 if (err)
5222 goto failed_mount;
5223
5224 ext4_set_def_opts(sb, es);
5225
5226 sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
5227 sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
5228 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
5229 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
5230 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
5231
5232 /*
5233 * set default s_li_wait_mult for lazyinit, for the case there is
5234 * no mount option specified.
5235 */
5236 sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
5237
5238 err = ext4_inode_info_init(sb, es);
5239 if (err)
5240 goto failed_mount;
5241
5242 err = parse_apply_sb_mount_options(sb, ctx);
5243 if (err < 0)
5244 goto failed_mount;
5245
5246 sbi->s_def_mount_opt = sbi->s_mount_opt;
5247 sbi->s_def_mount_opt2 = sbi->s_mount_opt2;
5248
5249 err = ext4_check_opt_consistency(fc, sb);
5250 if (err < 0)
5251 goto failed_mount;
5252
5253 ext4_apply_options(fc, sb);
5254
5255 err = ext4_encoding_init(sb, es);
5256 if (err)
5257 goto failed_mount;
5258
5259 err = ext4_check_journal_data_mode(sb);
5260 if (err)
5261 goto failed_mount;
5262
5263 sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
5264 (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
5265
5266 /* i_version is always enabled now */
5267 sb->s_flags |= SB_I_VERSION;
5268
5269 err = ext4_check_feature_compatibility(sb, es, silent);
5270 if (err)
5271 goto failed_mount;
5272
5273 err = ext4_block_group_meta_init(sb, silent);
5274 if (err)
5275 goto failed_mount;
5276
5277 err = ext4_hash_info_init(sb);
5278 if (err)
5279 goto failed_mount;
5280
5281 err = ext4_handle_clustersize(sb);
5282 if (err)
5283 goto failed_mount;
5284
5285 err = ext4_check_geometry(sb, es);
5286 if (err)
5287 goto failed_mount;
5288
5289 timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
5290 spin_lock_init(&sbi->s_error_lock);
5291 INIT_WORK(&sbi->s_sb_upd_work, update_super_work);
5292
5293 err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
5294 if (err)
5295 goto failed_mount3;
5296
5297 err = ext4_es_register_shrinker(sbi);
5298 if (err)
5299 goto failed_mount3;
5300
5301 sbi->s_stripe = ext4_get_stripe_size(sbi);
5302 if (ext4_is_stripe_incompatible(sb, sbi->s_stripe)) {
5303 ext4_msg(sb, KERN_WARNING,
5304 "stripe (%lu) is not aligned with cluster size (%u), "
5305 "stripe is disabled",
5306 sbi->s_stripe, sbi->s_cluster_ratio);
5307 sbi->s_stripe = 0;
5308 }
5309 sbi->s_extent_max_zeroout_kb = 32;
5310
5311 /*
5312 * set up enough so that it can read an inode
5313 */
5314 sb->s_op = &ext4_sops;
5315 sb->s_export_op = &ext4_export_ops;
5316 sb->s_xattr = ext4_xattr_handlers;
5317 #ifdef CONFIG_FS_ENCRYPTION
5318 sb->s_cop = &ext4_cryptops;
5319 #endif
5320 #ifdef CONFIG_FS_VERITY
5321 sb->s_vop = &ext4_verityops;
5322 #endif
5323 #ifdef CONFIG_QUOTA
5324 sb->dq_op = &ext4_quota_operations;
5325 if (ext4_has_feature_quota(sb))
5326 sb->s_qcop = &dquot_quotactl_sysfile_ops;
5327 else
5328 sb->s_qcop = &ext4_qctl_operations;
5329 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
5330 #endif
5331 super_set_uuid(sb, es->s_uuid, sizeof(es->s_uuid));
5332 super_set_sysfs_name_bdev(sb);
5333
5334 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
5335 mutex_init(&sbi->s_orphan_lock);
5336
5337 spin_lock_init(&sbi->s_bdev_wb_lock);
5338
5339 ext4_fast_commit_init(sb);
5340
5341 sb->s_root = NULL;
5342
5343 needs_recovery = (es->s_last_orphan != 0 ||
5344 ext4_has_feature_orphan_present(sb) ||
5345 ext4_has_feature_journal_needs_recovery(sb));
5346
5347 if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb)) {
5348 err = ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block));
5349 if (err)
5350 goto failed_mount3a;
5351 }
5352
5353 err = -EINVAL;
5354 /*
5355 * The first inode we look at is the journal inode. Don't try
5356 * root first: it may be modified in the journal!
5357 */
5358 if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
5359 err = ext4_load_and_init_journal(sb, es, ctx);
5360 if (err)
5361 goto failed_mount3a;
5362 } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
5363 ext4_has_feature_journal_needs_recovery(sb)) {
5364 ext4_msg(sb, KERN_ERR, "required journal recovery "
5365 "suppressed and not mounted read-only");
5366 goto failed_mount3a;
5367 } else {
5368 /* Nojournal mode, all journal mount options are illegal */
5369 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
5370 ext4_msg(sb, KERN_ERR, "can't mount with "
5371 "journal_async_commit, fs mounted w/o journal");
5372 goto failed_mount3a;
5373 }
5374
5375 if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
5376 ext4_msg(sb, KERN_ERR, "can't mount with "
5377 "journal_checksum, fs mounted w/o journal");
5378 goto failed_mount3a;
5379 }
5380 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
5381 ext4_msg(sb, KERN_ERR, "can't mount with "
5382 "commit=%lu, fs mounted w/o journal",
5383 sbi->s_commit_interval / HZ);
5384 goto failed_mount3a;
5385 }
5386 if (EXT4_MOUNT_DATA_FLAGS &
5387 (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
5388 ext4_msg(sb, KERN_ERR, "can't mount with "
5389 "data=, fs mounted w/o journal");
5390 goto failed_mount3a;
5391 }
5392 sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
5393 clear_opt(sb, JOURNAL_CHECKSUM);
5394 clear_opt(sb, DATA_FLAGS);
5395 clear_opt2(sb, JOURNAL_FAST_COMMIT);
5396 sbi->s_journal = NULL;
5397 needs_recovery = 0;
5398 }
5399
5400 if (!test_opt(sb, NO_MBCACHE)) {
5401 sbi->s_ea_block_cache = ext4_xattr_create_cache();
5402 if (!sbi->s_ea_block_cache) {
5403 ext4_msg(sb, KERN_ERR,
5404 "Failed to create ea_block_cache");
5405 err = -EINVAL;
5406 goto failed_mount_wq;
5407 }
5408
5409 if (ext4_has_feature_ea_inode(sb)) {
5410 sbi->s_ea_inode_cache = ext4_xattr_create_cache();
5411 if (!sbi->s_ea_inode_cache) {
5412 ext4_msg(sb, KERN_ERR,
5413 "Failed to create ea_inode_cache");
5414 err = -EINVAL;
5415 goto failed_mount_wq;
5416 }
5417 }
5418 }
5419
5420 /*
5421 * Get the # of file system overhead blocks from the
5422 * superblock if present.
5423 */
5424 sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
5425 /* ignore the precalculated value if it is ridiculous */
5426 if (sbi->s_overhead > ext4_blocks_count(es))
5427 sbi->s_overhead = 0;
5428 /*
5429 * If the bigalloc feature is not enabled recalculating the
5430 * overhead doesn't take long, so we might as well just redo
5431 * it to make sure we are using the correct value.
5432 */
5433 if (!ext4_has_feature_bigalloc(sb))
5434 sbi->s_overhead = 0;
5435 if (sbi->s_overhead == 0) {
5436 err = ext4_calculate_overhead(sb);
5437 if (err)
5438 goto failed_mount_wq;
5439 }
5440
5441 /*
5442 * The maximum number of concurrent works can be high and
5443 * concurrency isn't really necessary. Limit it to 1.
5444 */
5445 EXT4_SB(sb)->rsv_conversion_wq =
5446 alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
5447 if (!EXT4_SB(sb)->rsv_conversion_wq) {
5448 printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
5449 err = -ENOMEM;
5450 goto failed_mount4;
5451 }
5452
5453 /*
5454 * The jbd2_journal_load will have done any necessary log recovery,
5455 * so we can safely mount the rest of the filesystem now.
5456 */
5457
5458 root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
5459 if (IS_ERR(root)) {
5460 ext4_msg(sb, KERN_ERR, "get root inode failed");
5461 err = PTR_ERR(root);
5462 root = NULL;
5463 goto failed_mount4;
5464 }
5465 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
5466 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
5467 iput(root);
5468 err = -EFSCORRUPTED;
5469 goto failed_mount4;
5470 }
5471
5472 generic_set_sb_d_ops(sb);
5473 sb->s_root = d_make_root(root);
5474 if (!sb->s_root) {
5475 ext4_msg(sb, KERN_ERR, "get root dentry failed");
5476 err = -ENOMEM;
5477 goto failed_mount4;
5478 }
5479
5480 err = ext4_setup_super(sb, es, sb_rdonly(sb));
5481 if (err == -EROFS) {
5482 sb->s_flags |= SB_RDONLY;
5483 } else if (err)
5484 goto failed_mount4a;
5485
5486 ext4_set_resv_clusters(sb);
5487
5488 if (test_opt(sb, BLOCK_VALIDITY)) {
5489 err = ext4_setup_system_zone(sb);
5490 if (err) {
5491 ext4_msg(sb, KERN_ERR, "failed to initialize system "
5492 "zone (%d)", err);
5493 goto failed_mount4a;
5494 }
5495 }
5496 ext4_fc_replay_cleanup(sb);
5497
5498 ext4_ext_init(sb);
5499
5500 /*
5501 * Enable optimize_scan if number of groups is > threshold. This can be
5502 * turned off by passing "mb_optimize_scan=0". This can also be
5503 * turned on forcefully by passing "mb_optimize_scan=1".
5504 */
5505 if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) {
5506 if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
5507 set_opt2(sb, MB_OPTIMIZE_SCAN);
5508 else
5509 clear_opt2(sb, MB_OPTIMIZE_SCAN);
5510 }
5511
5512 err = ext4_mb_init(sb);
5513 if (err) {
5514 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
5515 err);
5516 goto failed_mount5;
5517 }
5518
5519 /*
5520 * We can only set up the journal commit callback once
5521 * mballoc is initialized
5522 */
5523 if (sbi->s_journal)
5524 sbi->s_journal->j_commit_callback =
5525 ext4_journal_commit_callback;
5526
5527 err = ext4_percpu_param_init(sbi);
5528 if (err)
5529 goto failed_mount6;
5530
5531 if (ext4_has_feature_flex_bg(sb))
5532 if (!ext4_fill_flex_info(sb)) {
5533 ext4_msg(sb, KERN_ERR,
5534 "unable to initialize "
5535 "flex_bg meta info!");
5536 err = -ENOMEM;
5537 goto failed_mount6;
5538 }
5539
5540 err = ext4_register_li_request(sb, first_not_zeroed);
5541 if (err)
5542 goto failed_mount6;
5543
5544 err = ext4_init_orphan_info(sb);
5545 if (err)
5546 goto failed_mount7;
5547 #ifdef CONFIG_QUOTA
5548 /* Enable quota usage during mount. */
5549 if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
5550 err = ext4_enable_quotas(sb);
5551 if (err)
5552 goto failed_mount8;
5553 }
5554 #endif /* CONFIG_QUOTA */
5555
5556 /*
5557 * Save the original bdev mapping's wb_err value which could be
5558 * used to detect the metadata async write error.
5559 */
5560 errseq_check_and_advance(&sb->s_bdev->bd_mapping->wb_err,
5561 &sbi->s_bdev_wb_err);
5562 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
5563 ext4_orphan_cleanup(sb, es);
5564 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
5565 /*
5566 * Update the checksum after updating free space/inode counters and
5567 * ext4_orphan_cleanup. Otherwise the superblock can have an incorrect
5568 * checksum in the buffer cache until it is written out and
5569 * e2fsprogs programs trying to open a file system immediately
5570 * after it is mounted can fail.
5571 */
5572 ext4_superblock_csum_set(sb);
5573 if (needs_recovery) {
5574 ext4_msg(sb, KERN_INFO, "recovery complete");
5575 err = ext4_mark_recovery_complete(sb, es);
5576 if (err)
5577 goto failed_mount9;
5578 }
5579
5580 if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
5581 ext4_msg(sb, KERN_WARNING,
5582 "mounting with \"discard\" option, but the device does not support discard");
5583
5584 if (es->s_error_count)
5585 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
5586
5587 /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
5588 ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
5589 ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
5590 ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
5591 atomic_set(&sbi->s_warning_count, 0);
5592 atomic_set(&sbi->s_msg_count, 0);
5593
5594 /* Register sysfs after all initializations are complete. */
5595 err = ext4_register_sysfs(sb);
5596 if (err)
5597 goto failed_mount9;
5598
5599 return 0;
5600
5601 failed_mount9:
5602 ext4_quotas_off(sb, EXT4_MAXQUOTAS);
5603 failed_mount8: __maybe_unused
5604 ext4_release_orphan_info(sb);
5605 failed_mount7:
5606 ext4_unregister_li_request(sb);
5607 failed_mount6:
5608 ext4_mb_release(sb);
5609 ext4_flex_groups_free(sbi);
5610 ext4_percpu_param_destroy(sbi);
5611 failed_mount5:
5612 ext4_ext_release(sb);
5613 ext4_release_system_zone(sb);
5614 failed_mount4a:
5615 dput(sb->s_root);
5616 sb->s_root = NULL;
5617 failed_mount4:
5618 ext4_msg(sb, KERN_ERR, "mount failed");
5619 if (EXT4_SB(sb)->rsv_conversion_wq)
5620 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
5621 failed_mount_wq:
5622 ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
5623 sbi->s_ea_inode_cache = NULL;
5624
5625 ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
5626 sbi->s_ea_block_cache = NULL;
5627
5628 if (sbi->s_journal) {
5629 /* flush s_sb_upd_work before journal destroy. */
5630 flush_work(&sbi->s_sb_upd_work);
5631 jbd2_journal_destroy(sbi->s_journal);
5632 sbi->s_journal = NULL;
5633 }
5634 failed_mount3a:
5635 ext4_es_unregister_shrinker(sbi);
5636 failed_mount3:
5637 /* flush s_sb_upd_work before sbi destroy */
5638 flush_work(&sbi->s_sb_upd_work);
5639 ext4_stop_mmpd(sbi);
5640 del_timer_sync(&sbi->s_err_report);
5641 ext4_group_desc_free(sbi);
5642 failed_mount:
5643 if (sbi->s_chksum_driver)
5644 crypto_free_shash(sbi->s_chksum_driver);
5645
5646 #if IS_ENABLED(CONFIG_UNICODE)
5647 utf8_unload(sb->s_encoding);
5648 #endif
5649
5650 #ifdef CONFIG_QUOTA
5651 for (unsigned int i = 0; i < EXT4_MAXQUOTAS; i++)
5652 kfree(get_qf_name(sb, sbi, i));
5653 #endif
5654 fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
5655 brelse(sbi->s_sbh);
5656 if (sbi->s_journal_bdev_file) {
5657 invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
5658 bdev_fput(sbi->s_journal_bdev_file);
5659 }
5660 out_fail:
5661 invalidate_bdev(sb->s_bdev);
5662 sb->s_fs_info = NULL;
5663 return err;
5664 }
5665
ext4_fill_super(struct super_block * sb,struct fs_context * fc)5666 static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
5667 {
5668 struct ext4_fs_context *ctx = fc->fs_private;
5669 struct ext4_sb_info *sbi;
5670 const char *descr;
5671 int ret;
5672
5673 sbi = ext4_alloc_sbi(sb);
5674 if (!sbi)
5675 return -ENOMEM;
5676
5677 fc->s_fs_info = sbi;
5678
5679 /* Cleanup superblock name */
5680 strreplace(sb->s_id, '/', '!');
5681
5682 sbi->s_sb_block = 1; /* Default super block location */
5683 if (ctx->spec & EXT4_SPEC_s_sb_block)
5684 sbi->s_sb_block = ctx->s_sb_block;
5685
5686 ret = __ext4_fill_super(fc, sb);
5687 if (ret < 0)
5688 goto free_sbi;
5689
5690 if (sbi->s_journal) {
5691 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
5692 descr = " journalled data mode";
5693 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
5694 descr = " ordered data mode";
5695 else
5696 descr = " writeback data mode";
5697 } else
5698 descr = "out journal";
5699
5700 if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
5701 ext4_msg(sb, KERN_INFO, "mounted filesystem %pU %s with%s. "
5702 "Quota mode: %s.", &sb->s_uuid,
5703 sb_rdonly(sb) ? "ro" : "r/w", descr,
5704 ext4_quota_mode(sb));
5705
5706 /* Update the s_overhead_clusters if necessary */
5707 ext4_update_overhead(sb, false);
5708 return 0;
5709
5710 free_sbi:
5711 ext4_free_sbi(sbi);
5712 fc->s_fs_info = NULL;
5713 return ret;
5714 }
5715
ext4_get_tree(struct fs_context * fc)5716 static int ext4_get_tree(struct fs_context *fc)
5717 {
5718 return get_tree_bdev(fc, ext4_fill_super);
5719 }
5720
5721 /*
5722 * Setup any per-fs journal parameters now. We'll do this both on
5723 * initial mount, once the journal has been initialised but before we've
5724 * done any recovery; and again on any subsequent remount.
5725 */
ext4_init_journal_params(struct super_block * sb,journal_t * journal)5726 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
5727 {
5728 struct ext4_sb_info *sbi = EXT4_SB(sb);
5729
5730 journal->j_commit_interval = sbi->s_commit_interval;
5731 journal->j_min_batch_time = sbi->s_min_batch_time;
5732 journal->j_max_batch_time = sbi->s_max_batch_time;
5733 ext4_fc_init(sb, journal);
5734
5735 write_lock(&journal->j_state_lock);
5736 if (test_opt(sb, BARRIER))
5737 journal->j_flags |= JBD2_BARRIER;
5738 else
5739 journal->j_flags &= ~JBD2_BARRIER;
5740 if (test_opt(sb, DATA_ERR_ABORT))
5741 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
5742 else
5743 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
5744 /*
5745 * Always enable journal cycle record option, letting the journal
5746 * records log transactions continuously between each mount.
5747 */
5748 journal->j_flags |= JBD2_CYCLE_RECORD;
5749 write_unlock(&journal->j_state_lock);
5750 }
5751
ext4_get_journal_inode(struct super_block * sb,unsigned int journal_inum)5752 static struct inode *ext4_get_journal_inode(struct super_block *sb,
5753 unsigned int journal_inum)
5754 {
5755 struct inode *journal_inode;
5756
5757 /*
5758 * Test for the existence of a valid inode on disk. Bad things
5759 * happen if we iget() an unused inode, as the subsequent iput()
5760 * will try to delete it.
5761 */
5762 journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
5763 if (IS_ERR(journal_inode)) {
5764 ext4_msg(sb, KERN_ERR, "no journal found");
5765 return ERR_CAST(journal_inode);
5766 }
5767 if (!journal_inode->i_nlink) {
5768 make_bad_inode(journal_inode);
5769 iput(journal_inode);
5770 ext4_msg(sb, KERN_ERR, "journal inode is deleted");
5771 return ERR_PTR(-EFSCORRUPTED);
5772 }
5773 if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
5774 ext4_msg(sb, KERN_ERR, "invalid journal inode");
5775 iput(journal_inode);
5776 return ERR_PTR(-EFSCORRUPTED);
5777 }
5778
5779 ext4_debug("Journal inode found at %p: %lld bytes\n",
5780 journal_inode, journal_inode->i_size);
5781 return journal_inode;
5782 }
5783
ext4_journal_bmap(journal_t * journal,sector_t * block)5784 static int ext4_journal_bmap(journal_t *journal, sector_t *block)
5785 {
5786 struct ext4_map_blocks map;
5787 int ret;
5788
5789 if (journal->j_inode == NULL)
5790 return 0;
5791
5792 map.m_lblk = *block;
5793 map.m_len = 1;
5794 ret = ext4_map_blocks(NULL, journal->j_inode, &map, 0);
5795 if (ret <= 0) {
5796 ext4_msg(journal->j_inode->i_sb, KERN_CRIT,
5797 "journal bmap failed: block %llu ret %d\n",
5798 *block, ret);
5799 jbd2_journal_abort(journal, ret ? ret : -EIO);
5800 return ret;
5801 }
5802 *block = map.m_pblk;
5803 return 0;
5804 }
5805
ext4_open_inode_journal(struct super_block * sb,unsigned int journal_inum)5806 static journal_t *ext4_open_inode_journal(struct super_block *sb,
5807 unsigned int journal_inum)
5808 {
5809 struct inode *journal_inode;
5810 journal_t *journal;
5811
5812 journal_inode = ext4_get_journal_inode(sb, journal_inum);
5813 if (IS_ERR(journal_inode))
5814 return ERR_CAST(journal_inode);
5815
5816 journal = jbd2_journal_init_inode(journal_inode);
5817 if (IS_ERR(journal)) {
5818 ext4_msg(sb, KERN_ERR, "Could not load journal inode");
5819 iput(journal_inode);
5820 return ERR_CAST(journal);
5821 }
5822 journal->j_private = sb;
5823 journal->j_bmap = ext4_journal_bmap;
5824 ext4_init_journal_params(sb, journal);
5825 return journal;
5826 }
5827
ext4_get_journal_blkdev(struct super_block * sb,dev_t j_dev,ext4_fsblk_t * j_start,ext4_fsblk_t * j_len)5828 static struct file *ext4_get_journal_blkdev(struct super_block *sb,
5829 dev_t j_dev, ext4_fsblk_t *j_start,
5830 ext4_fsblk_t *j_len)
5831 {
5832 struct buffer_head *bh;
5833 struct block_device *bdev;
5834 struct file *bdev_file;
5835 int hblock, blocksize;
5836 ext4_fsblk_t sb_block;
5837 unsigned long offset;
5838 struct ext4_super_block *es;
5839 int errno;
5840
5841 bdev_file = bdev_file_open_by_dev(j_dev,
5842 BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES,
5843 sb, &fs_holder_ops);
5844 if (IS_ERR(bdev_file)) {
5845 ext4_msg(sb, KERN_ERR,
5846 "failed to open journal device unknown-block(%u,%u) %ld",
5847 MAJOR(j_dev), MINOR(j_dev), PTR_ERR(bdev_file));
5848 return bdev_file;
5849 }
5850
5851 bdev = file_bdev(bdev_file);
5852 blocksize = sb->s_blocksize;
5853 hblock = bdev_logical_block_size(bdev);
5854 if (blocksize < hblock) {
5855 ext4_msg(sb, KERN_ERR,
5856 "blocksize too small for journal device");
5857 errno = -EINVAL;
5858 goto out_bdev;
5859 }
5860
5861 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
5862 offset = EXT4_MIN_BLOCK_SIZE % blocksize;
5863 set_blocksize(bdev_file, blocksize);
5864 bh = __bread(bdev, sb_block, blocksize);
5865 if (!bh) {
5866 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
5867 "external journal");
5868 errno = -EINVAL;
5869 goto out_bdev;
5870 }
5871
5872 es = (struct ext4_super_block *) (bh->b_data + offset);
5873 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
5874 !(le32_to_cpu(es->s_feature_incompat) &
5875 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
5876 ext4_msg(sb, KERN_ERR, "external journal has bad superblock");
5877 errno = -EFSCORRUPTED;
5878 goto out_bh;
5879 }
5880
5881 if ((le32_to_cpu(es->s_feature_ro_compat) &
5882 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
5883 es->s_checksum != ext4_superblock_csum(sb, es)) {
5884 ext4_msg(sb, KERN_ERR, "external journal has corrupt superblock");
5885 errno = -EFSCORRUPTED;
5886 goto out_bh;
5887 }
5888
5889 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
5890 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
5891 errno = -EFSCORRUPTED;
5892 goto out_bh;
5893 }
5894
5895 *j_start = sb_block + 1;
5896 *j_len = ext4_blocks_count(es);
5897 brelse(bh);
5898 return bdev_file;
5899
5900 out_bh:
5901 brelse(bh);
5902 out_bdev:
5903 bdev_fput(bdev_file);
5904 return ERR_PTR(errno);
5905 }
5906
ext4_open_dev_journal(struct super_block * sb,dev_t j_dev)5907 static journal_t *ext4_open_dev_journal(struct super_block *sb,
5908 dev_t j_dev)
5909 {
5910 journal_t *journal;
5911 ext4_fsblk_t j_start;
5912 ext4_fsblk_t j_len;
5913 struct file *bdev_file;
5914 int errno = 0;
5915
5916 bdev_file = ext4_get_journal_blkdev(sb, j_dev, &j_start, &j_len);
5917 if (IS_ERR(bdev_file))
5918 return ERR_CAST(bdev_file);
5919
5920 journal = jbd2_journal_init_dev(file_bdev(bdev_file), sb->s_bdev, j_start,
5921 j_len, sb->s_blocksize);
5922 if (IS_ERR(journal)) {
5923 ext4_msg(sb, KERN_ERR, "failed to create device journal");
5924 errno = PTR_ERR(journal);
5925 goto out_bdev;
5926 }
5927 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
5928 ext4_msg(sb, KERN_ERR, "External journal has more than one "
5929 "user (unsupported) - %d",
5930 be32_to_cpu(journal->j_superblock->s_nr_users));
5931 errno = -EINVAL;
5932 goto out_journal;
5933 }
5934 journal->j_private = sb;
5935 EXT4_SB(sb)->s_journal_bdev_file = bdev_file;
5936 ext4_init_journal_params(sb, journal);
5937 return journal;
5938
5939 out_journal:
5940 jbd2_journal_destroy(journal);
5941 out_bdev:
5942 bdev_fput(bdev_file);
5943 return ERR_PTR(errno);
5944 }
5945
ext4_load_journal(struct super_block * sb,struct ext4_super_block * es,unsigned long journal_devnum)5946 static int ext4_load_journal(struct super_block *sb,
5947 struct ext4_super_block *es,
5948 unsigned long journal_devnum)
5949 {
5950 journal_t *journal;
5951 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
5952 dev_t journal_dev;
5953 int err = 0;
5954 int really_read_only;
5955 int journal_dev_ro;
5956
5957 if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5958 return -EFSCORRUPTED;
5959
5960 if (journal_devnum &&
5961 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
5962 ext4_msg(sb, KERN_INFO, "external journal device major/minor "
5963 "numbers have changed");
5964 journal_dev = new_decode_dev(journal_devnum);
5965 } else
5966 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
5967
5968 if (journal_inum && journal_dev) {
5969 ext4_msg(sb, KERN_ERR,
5970 "filesystem has both journal inode and journal device!");
5971 return -EINVAL;
5972 }
5973
5974 if (journal_inum) {
5975 journal = ext4_open_inode_journal(sb, journal_inum);
5976 if (IS_ERR(journal))
5977 return PTR_ERR(journal);
5978 } else {
5979 journal = ext4_open_dev_journal(sb, journal_dev);
5980 if (IS_ERR(journal))
5981 return PTR_ERR(journal);
5982 }
5983
5984 journal_dev_ro = bdev_read_only(journal->j_dev);
5985 really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro;
5986
5987 if (journal_dev_ro && !sb_rdonly(sb)) {
5988 ext4_msg(sb, KERN_ERR,
5989 "journal device read-only, try mounting with '-o ro'");
5990 err = -EROFS;
5991 goto err_out;
5992 }
5993
5994 /*
5995 * Are we loading a blank journal or performing recovery after a
5996 * crash? For recovery, we need to check in advance whether we
5997 * can get read-write access to the device.
5998 */
5999 if (ext4_has_feature_journal_needs_recovery(sb)) {
6000 if (sb_rdonly(sb)) {
6001 ext4_msg(sb, KERN_INFO, "INFO: recovery "
6002 "required on readonly filesystem");
6003 if (really_read_only) {
6004 ext4_msg(sb, KERN_ERR, "write access "
6005 "unavailable, cannot proceed "
6006 "(try mounting with noload)");
6007 err = -EROFS;
6008 goto err_out;
6009 }
6010 ext4_msg(sb, KERN_INFO, "write access will "
6011 "be enabled during recovery");
6012 }
6013 }
6014
6015 if (!(journal->j_flags & JBD2_BARRIER))
6016 ext4_msg(sb, KERN_INFO, "barriers disabled");
6017
6018 if (!ext4_has_feature_journal_needs_recovery(sb))
6019 err = jbd2_journal_wipe(journal, !really_read_only);
6020 if (!err) {
6021 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
6022 __le16 orig_state;
6023 bool changed = false;
6024
6025 if (save)
6026 memcpy(save, ((char *) es) +
6027 EXT4_S_ERR_START, EXT4_S_ERR_LEN);
6028 err = jbd2_journal_load(journal);
6029 if (save && memcmp(((char *) es) + EXT4_S_ERR_START,
6030 save, EXT4_S_ERR_LEN)) {
6031 memcpy(((char *) es) + EXT4_S_ERR_START,
6032 save, EXT4_S_ERR_LEN);
6033 changed = true;
6034 }
6035 kfree(save);
6036 orig_state = es->s_state;
6037 es->s_state |= cpu_to_le16(EXT4_SB(sb)->s_mount_state &
6038 EXT4_ERROR_FS);
6039 if (orig_state != es->s_state)
6040 changed = true;
6041 /* Write out restored error information to the superblock */
6042 if (changed && !really_read_only) {
6043 int err2;
6044 err2 = ext4_commit_super(sb);
6045 err = err ? : err2;
6046 }
6047 }
6048
6049 if (err) {
6050 ext4_msg(sb, KERN_ERR, "error loading journal");
6051 goto err_out;
6052 }
6053
6054 EXT4_SB(sb)->s_journal = journal;
6055 err = ext4_clear_journal_err(sb, es);
6056 if (err) {
6057 EXT4_SB(sb)->s_journal = NULL;
6058 jbd2_journal_destroy(journal);
6059 return err;
6060 }
6061
6062 if (!really_read_only && journal_devnum &&
6063 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
6064 es->s_journal_dev = cpu_to_le32(journal_devnum);
6065 ext4_commit_super(sb);
6066 }
6067 if (!really_read_only && journal_inum &&
6068 journal_inum != le32_to_cpu(es->s_journal_inum)) {
6069 es->s_journal_inum = cpu_to_le32(journal_inum);
6070 ext4_commit_super(sb);
6071 }
6072
6073 return 0;
6074
6075 err_out:
6076 jbd2_journal_destroy(journal);
6077 return err;
6078 }
6079
6080 /* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */
ext4_update_super(struct super_block * sb)6081 static void ext4_update_super(struct super_block *sb)
6082 {
6083 struct ext4_sb_info *sbi = EXT4_SB(sb);
6084 struct ext4_super_block *es = sbi->s_es;
6085 struct buffer_head *sbh = sbi->s_sbh;
6086
6087 lock_buffer(sbh);
6088 /*
6089 * If the file system is mounted read-only, don't update the
6090 * superblock write time. This avoids updating the superblock
6091 * write time when we are mounting the root file system
6092 * read/only but we need to replay the journal; at that point,
6093 * for people who are east of GMT and who make their clock
6094 * tick in localtime for Windows bug-for-bug compatibility,
6095 * the clock is set in the future, and this will cause e2fsck
6096 * to complain and force a full file system check.
6097 */
6098 if (!sb_rdonly(sb))
6099 ext4_update_tstamp(es, s_wtime);
6100 es->s_kbytes_written =
6101 cpu_to_le64(sbi->s_kbytes_written +
6102 ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
6103 sbi->s_sectors_written_start) >> 1));
6104 if (percpu_counter_initialized(&sbi->s_freeclusters_counter))
6105 ext4_free_blocks_count_set(es,
6106 EXT4_C2B(sbi, percpu_counter_sum_positive(
6107 &sbi->s_freeclusters_counter)));
6108 if (percpu_counter_initialized(&sbi->s_freeinodes_counter))
6109 es->s_free_inodes_count =
6110 cpu_to_le32(percpu_counter_sum_positive(
6111 &sbi->s_freeinodes_counter));
6112 /* Copy error information to the on-disk superblock */
6113 spin_lock(&sbi->s_error_lock);
6114 if (sbi->s_add_error_count > 0) {
6115 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
6116 if (!es->s_first_error_time && !es->s_first_error_time_hi) {
6117 __ext4_update_tstamp(&es->s_first_error_time,
6118 &es->s_first_error_time_hi,
6119 sbi->s_first_error_time);
6120 strtomem_pad(es->s_first_error_func,
6121 sbi->s_first_error_func, 0);
6122 es->s_first_error_line =
6123 cpu_to_le32(sbi->s_first_error_line);
6124 es->s_first_error_ino =
6125 cpu_to_le32(sbi->s_first_error_ino);
6126 es->s_first_error_block =
6127 cpu_to_le64(sbi->s_first_error_block);
6128 es->s_first_error_errcode =
6129 ext4_errno_to_code(sbi->s_first_error_code);
6130 }
6131 __ext4_update_tstamp(&es->s_last_error_time,
6132 &es->s_last_error_time_hi,
6133 sbi->s_last_error_time);
6134 strtomem_pad(es->s_last_error_func, sbi->s_last_error_func, 0);
6135 es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
6136 es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
6137 es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
6138 es->s_last_error_errcode =
6139 ext4_errno_to_code(sbi->s_last_error_code);
6140 /*
6141 * Start the daily error reporting function if it hasn't been
6142 * started already
6143 */
6144 if (!es->s_error_count)
6145 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
6146 le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
6147 sbi->s_add_error_count = 0;
6148 }
6149 spin_unlock(&sbi->s_error_lock);
6150
6151 ext4_superblock_csum_set(sb);
6152 unlock_buffer(sbh);
6153 }
6154
ext4_commit_super(struct super_block * sb)6155 static int ext4_commit_super(struct super_block *sb)
6156 {
6157 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
6158
6159 if (!sbh)
6160 return -EINVAL;
6161
6162 ext4_update_super(sb);
6163
6164 lock_buffer(sbh);
6165 /* Buffer got discarded which means block device got invalidated */
6166 if (!buffer_mapped(sbh)) {
6167 unlock_buffer(sbh);
6168 return -EIO;
6169 }
6170
6171 if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
6172 /*
6173 * Oh, dear. A previous attempt to write the
6174 * superblock failed. This could happen because the
6175 * USB device was yanked out. Or it could happen to
6176 * be a transient write error and maybe the block will
6177 * be remapped. Nothing we can do but to retry the
6178 * write and hope for the best.
6179 */
6180 ext4_msg(sb, KERN_ERR, "previous I/O error to "
6181 "superblock detected");
6182 clear_buffer_write_io_error(sbh);
6183 set_buffer_uptodate(sbh);
6184 }
6185 get_bh(sbh);
6186 /* Clear potential dirty bit if it was journalled update */
6187 clear_buffer_dirty(sbh);
6188 sbh->b_end_io = end_buffer_write_sync;
6189 submit_bh(REQ_OP_WRITE | REQ_SYNC |
6190 (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh);
6191 wait_on_buffer(sbh);
6192 if (buffer_write_io_error(sbh)) {
6193 ext4_msg(sb, KERN_ERR, "I/O error while writing "
6194 "superblock");
6195 clear_buffer_write_io_error(sbh);
6196 set_buffer_uptodate(sbh);
6197 return -EIO;
6198 }
6199 return 0;
6200 }
6201
6202 /*
6203 * Have we just finished recovery? If so, and if we are mounting (or
6204 * remounting) the filesystem readonly, then we will end up with a
6205 * consistent fs on disk. Record that fact.
6206 */
ext4_mark_recovery_complete(struct super_block * sb,struct ext4_super_block * es)6207 static int ext4_mark_recovery_complete(struct super_block *sb,
6208 struct ext4_super_block *es)
6209 {
6210 int err;
6211 journal_t *journal = EXT4_SB(sb)->s_journal;
6212
6213 if (!ext4_has_feature_journal(sb)) {
6214 if (journal != NULL) {
6215 ext4_error(sb, "Journal got removed while the fs was "
6216 "mounted!");
6217 return -EFSCORRUPTED;
6218 }
6219 return 0;
6220 }
6221 jbd2_journal_lock_updates(journal);
6222 err = jbd2_journal_flush(journal, 0);
6223 if (err < 0)
6224 goto out;
6225
6226 if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
6227 ext4_has_feature_orphan_present(sb))) {
6228 if (!ext4_orphan_file_empty(sb)) {
6229 ext4_error(sb, "Orphan file not empty on read-only fs.");
6230 err = -EFSCORRUPTED;
6231 goto out;
6232 }
6233 ext4_clear_feature_journal_needs_recovery(sb);
6234 ext4_clear_feature_orphan_present(sb);
6235 ext4_commit_super(sb);
6236 }
6237 out:
6238 jbd2_journal_unlock_updates(journal);
6239 return err;
6240 }
6241
6242 /*
6243 * If we are mounting (or read-write remounting) a filesystem whose journal
6244 * has recorded an error from a previous lifetime, move that error to the
6245 * main filesystem now.
6246 */
ext4_clear_journal_err(struct super_block * sb,struct ext4_super_block * es)6247 static int ext4_clear_journal_err(struct super_block *sb,
6248 struct ext4_super_block *es)
6249 {
6250 journal_t *journal;
6251 int j_errno;
6252 const char *errstr;
6253
6254 if (!ext4_has_feature_journal(sb)) {
6255 ext4_error(sb, "Journal got removed while the fs was mounted!");
6256 return -EFSCORRUPTED;
6257 }
6258
6259 journal = EXT4_SB(sb)->s_journal;
6260
6261 /*
6262 * Now check for any error status which may have been recorded in the
6263 * journal by a prior ext4_error() or ext4_abort()
6264 */
6265
6266 j_errno = jbd2_journal_errno(journal);
6267 if (j_errno) {
6268 char nbuf[16];
6269
6270 errstr = ext4_decode_error(sb, j_errno, nbuf);
6271 ext4_warning(sb, "Filesystem error recorded "
6272 "from previous mount: %s", errstr);
6273
6274 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
6275 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
6276 j_errno = ext4_commit_super(sb);
6277 if (j_errno)
6278 return j_errno;
6279 ext4_warning(sb, "Marked fs in need of filesystem check.");
6280
6281 jbd2_journal_clear_err(journal);
6282 jbd2_journal_update_sb_errno(journal);
6283 }
6284 return 0;
6285 }
6286
6287 /*
6288 * Force the running and committing transactions to commit,
6289 * and wait on the commit.
6290 */
ext4_force_commit(struct super_block * sb)6291 int ext4_force_commit(struct super_block *sb)
6292 {
6293 return ext4_journal_force_commit(EXT4_SB(sb)->s_journal);
6294 }
6295
ext4_sync_fs(struct super_block * sb,int wait)6296 static int ext4_sync_fs(struct super_block *sb, int wait)
6297 {
6298 int ret = 0;
6299 tid_t target;
6300 bool needs_barrier = false;
6301 struct ext4_sb_info *sbi = EXT4_SB(sb);
6302
6303 if (unlikely(ext4_forced_shutdown(sb)))
6304 return 0;
6305
6306 trace_ext4_sync_fs(sb, wait);
6307 flush_workqueue(sbi->rsv_conversion_wq);
6308 /*
6309 * Writeback quota in non-journalled quota case - journalled quota has
6310 * no dirty dquots
6311 */
6312 dquot_writeback_dquots(sb, -1);
6313 /*
6314 * Data writeback is possible w/o journal transaction, so barrier must
6315 * being sent at the end of the function. But we can skip it if
6316 * transaction_commit will do it for us.
6317 */
6318 if (sbi->s_journal) {
6319 target = jbd2_get_latest_transaction(sbi->s_journal);
6320 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
6321 !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
6322 needs_barrier = true;
6323
6324 if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
6325 if (wait)
6326 ret = jbd2_log_wait_commit(sbi->s_journal,
6327 target);
6328 }
6329 } else if (wait && test_opt(sb, BARRIER))
6330 needs_barrier = true;
6331 if (needs_barrier) {
6332 int err;
6333 err = blkdev_issue_flush(sb->s_bdev);
6334 if (!ret)
6335 ret = err;
6336 }
6337
6338 return ret;
6339 }
6340
6341 /*
6342 * LVM calls this function before a (read-only) snapshot is created. This
6343 * gives us a chance to flush the journal completely and mark the fs clean.
6344 *
6345 * Note that only this function cannot bring a filesystem to be in a clean
6346 * state independently. It relies on upper layer to stop all data & metadata
6347 * modifications.
6348 */
ext4_freeze(struct super_block * sb)6349 static int ext4_freeze(struct super_block *sb)
6350 {
6351 int error = 0;
6352 journal_t *journal = EXT4_SB(sb)->s_journal;
6353
6354 if (journal) {
6355 /* Now we set up the journal barrier. */
6356 jbd2_journal_lock_updates(journal);
6357
6358 /*
6359 * Don't clear the needs_recovery flag if we failed to
6360 * flush the journal.
6361 */
6362 error = jbd2_journal_flush(journal, 0);
6363 if (error < 0)
6364 goto out;
6365
6366 /* Journal blocked and flushed, clear needs_recovery flag. */
6367 ext4_clear_feature_journal_needs_recovery(sb);
6368 if (ext4_orphan_file_empty(sb))
6369 ext4_clear_feature_orphan_present(sb);
6370 }
6371
6372 error = ext4_commit_super(sb);
6373 out:
6374 if (journal)
6375 /* we rely on upper layer to stop further updates */
6376 jbd2_journal_unlock_updates(journal);
6377 return error;
6378 }
6379
6380 /*
6381 * Called by LVM after the snapshot is done. We need to reset the RECOVER
6382 * flag here, even though the filesystem is not technically dirty yet.
6383 */
ext4_unfreeze(struct super_block * sb)6384 static int ext4_unfreeze(struct super_block *sb)
6385 {
6386 if (ext4_forced_shutdown(sb))
6387 return 0;
6388
6389 if (EXT4_SB(sb)->s_journal) {
6390 /* Reset the needs_recovery flag before the fs is unlocked. */
6391 ext4_set_feature_journal_needs_recovery(sb);
6392 if (ext4_has_feature_orphan_file(sb))
6393 ext4_set_feature_orphan_present(sb);
6394 }
6395
6396 ext4_commit_super(sb);
6397 return 0;
6398 }
6399
6400 /*
6401 * Structure to save mount options for ext4_remount's benefit
6402 */
6403 struct ext4_mount_options {
6404 unsigned long s_mount_opt;
6405 unsigned long s_mount_opt2;
6406 kuid_t s_resuid;
6407 kgid_t s_resgid;
6408 unsigned long s_commit_interval;
6409 u32 s_min_batch_time, s_max_batch_time;
6410 #ifdef CONFIG_QUOTA
6411 int s_jquota_fmt;
6412 char *s_qf_names[EXT4_MAXQUOTAS];
6413 #endif
6414 };
6415
__ext4_remount(struct fs_context * fc,struct super_block * sb)6416 static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
6417 {
6418 struct ext4_fs_context *ctx = fc->fs_private;
6419 struct ext4_super_block *es;
6420 struct ext4_sb_info *sbi = EXT4_SB(sb);
6421 unsigned long old_sb_flags;
6422 struct ext4_mount_options old_opts;
6423 ext4_group_t g;
6424 int err = 0;
6425 int alloc_ctx;
6426 #ifdef CONFIG_QUOTA
6427 int enable_quota = 0;
6428 int i, j;
6429 char *to_free[EXT4_MAXQUOTAS];
6430 #endif
6431
6432
6433 /* Store the original options */
6434 old_sb_flags = sb->s_flags;
6435 old_opts.s_mount_opt = sbi->s_mount_opt;
6436 old_opts.s_mount_opt2 = sbi->s_mount_opt2;
6437 old_opts.s_resuid = sbi->s_resuid;
6438 old_opts.s_resgid = sbi->s_resgid;
6439 old_opts.s_commit_interval = sbi->s_commit_interval;
6440 old_opts.s_min_batch_time = sbi->s_min_batch_time;
6441 old_opts.s_max_batch_time = sbi->s_max_batch_time;
6442 #ifdef CONFIG_QUOTA
6443 old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
6444 for (i = 0; i < EXT4_MAXQUOTAS; i++)
6445 if (sbi->s_qf_names[i]) {
6446 char *qf_name = get_qf_name(sb, sbi, i);
6447
6448 old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
6449 if (!old_opts.s_qf_names[i]) {
6450 for (j = 0; j < i; j++)
6451 kfree(old_opts.s_qf_names[j]);
6452 return -ENOMEM;
6453 }
6454 } else
6455 old_opts.s_qf_names[i] = NULL;
6456 #endif
6457 if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) {
6458 if (sbi->s_journal && sbi->s_journal->j_task->io_context)
6459 ctx->journal_ioprio =
6460 sbi->s_journal->j_task->io_context->ioprio;
6461 else
6462 ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
6463
6464 }
6465
6466 if ((ctx->spec & EXT4_SPEC_s_stripe) &&
6467 ext4_is_stripe_incompatible(sb, ctx->s_stripe)) {
6468 ext4_msg(sb, KERN_WARNING,
6469 "stripe (%lu) is not aligned with cluster size (%u), "
6470 "stripe is disabled",
6471 ctx->s_stripe, sbi->s_cluster_ratio);
6472 ctx->s_stripe = 0;
6473 }
6474
6475 /*
6476 * Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause
6477 * two calls to ext4_should_dioread_nolock() to return inconsistent
6478 * values, triggering WARN_ON in ext4_add_complete_io(). we grab
6479 * here s_writepages_rwsem to avoid race between writepages ops and
6480 * remount.
6481 */
6482 alloc_ctx = ext4_writepages_down_write(sb);
6483 ext4_apply_options(fc, sb);
6484 ext4_writepages_up_write(sb, alloc_ctx);
6485
6486 if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
6487 test_opt(sb, JOURNAL_CHECKSUM)) {
6488 ext4_msg(sb, KERN_ERR, "changing journal_checksum "
6489 "during remount not supported; ignoring");
6490 sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
6491 }
6492
6493 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
6494 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
6495 ext4_msg(sb, KERN_ERR, "can't mount with "
6496 "both data=journal and delalloc");
6497 err = -EINVAL;
6498 goto restore_opts;
6499 }
6500 if (test_opt(sb, DIOREAD_NOLOCK)) {
6501 ext4_msg(sb, KERN_ERR, "can't mount with "
6502 "both data=journal and dioread_nolock");
6503 err = -EINVAL;
6504 goto restore_opts;
6505 }
6506 } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
6507 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
6508 ext4_msg(sb, KERN_ERR, "can't mount with "
6509 "journal_async_commit in data=ordered mode");
6510 err = -EINVAL;
6511 goto restore_opts;
6512 }
6513 }
6514
6515 if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
6516 ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
6517 err = -EINVAL;
6518 goto restore_opts;
6519 }
6520
6521 if (test_opt2(sb, ABORT))
6522 ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
6523
6524 sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
6525 (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
6526
6527 es = sbi->s_es;
6528
6529 if (sbi->s_journal) {
6530 ext4_init_journal_params(sb, sbi->s_journal);
6531 set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
6532 }
6533
6534 /* Flush outstanding errors before changing fs state */
6535 flush_work(&sbi->s_sb_upd_work);
6536
6537 if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
6538 if (ext4_forced_shutdown(sb)) {
6539 err = -EROFS;
6540 goto restore_opts;
6541 }
6542
6543 if (fc->sb_flags & SB_RDONLY) {
6544 err = sync_filesystem(sb);
6545 if (err < 0)
6546 goto restore_opts;
6547 err = dquot_suspend(sb, -1);
6548 if (err < 0)
6549 goto restore_opts;
6550
6551 /*
6552 * First of all, the unconditional stuff we have to do
6553 * to disable replay of the journal when we next remount
6554 */
6555 sb->s_flags |= SB_RDONLY;
6556
6557 /*
6558 * OK, test if we are remounting a valid rw partition
6559 * readonly, and if so set the rdonly flag and then
6560 * mark the partition as valid again.
6561 */
6562 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
6563 (sbi->s_mount_state & EXT4_VALID_FS))
6564 es->s_state = cpu_to_le16(sbi->s_mount_state);
6565
6566 if (sbi->s_journal) {
6567 /*
6568 * We let remount-ro finish even if marking fs
6569 * as clean failed...
6570 */
6571 ext4_mark_recovery_complete(sb, es);
6572 }
6573 } else {
6574 /* Make sure we can mount this feature set readwrite */
6575 if (ext4_has_feature_readonly(sb) ||
6576 !ext4_feature_set_ok(sb, 0)) {
6577 err = -EROFS;
6578 goto restore_opts;
6579 }
6580 /*
6581 * Make sure the group descriptor checksums
6582 * are sane. If they aren't, refuse to remount r/w.
6583 */
6584 for (g = 0; g < sbi->s_groups_count; g++) {
6585 struct ext4_group_desc *gdp =
6586 ext4_get_group_desc(sb, g, NULL);
6587
6588 if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
6589 ext4_msg(sb, KERN_ERR,
6590 "ext4_remount: Checksum for group %u failed (%u!=%u)",
6591 g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
6592 le16_to_cpu(gdp->bg_checksum));
6593 err = -EFSBADCRC;
6594 goto restore_opts;
6595 }
6596 }
6597
6598 /*
6599 * If we have an unprocessed orphan list hanging
6600 * around from a previously readonly bdev mount,
6601 * require a full umount/remount for now.
6602 */
6603 if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
6604 ext4_msg(sb, KERN_WARNING, "Couldn't "
6605 "remount RDWR because of unprocessed "
6606 "orphan inode list. Please "
6607 "umount/remount instead");
6608 err = -EINVAL;
6609 goto restore_opts;
6610 }
6611
6612 /*
6613 * Mounting a RDONLY partition read-write, so reread
6614 * and store the current valid flag. (It may have
6615 * been changed by e2fsck since we originally mounted
6616 * the partition.)
6617 */
6618 if (sbi->s_journal) {
6619 err = ext4_clear_journal_err(sb, es);
6620 if (err)
6621 goto restore_opts;
6622 }
6623 sbi->s_mount_state = (le16_to_cpu(es->s_state) &
6624 ~EXT4_FC_REPLAY);
6625
6626 err = ext4_setup_super(sb, es, 0);
6627 if (err)
6628 goto restore_opts;
6629
6630 sb->s_flags &= ~SB_RDONLY;
6631 if (ext4_has_feature_mmp(sb)) {
6632 err = ext4_multi_mount_protect(sb,
6633 le64_to_cpu(es->s_mmp_block));
6634 if (err)
6635 goto restore_opts;
6636 }
6637 #ifdef CONFIG_QUOTA
6638 enable_quota = 1;
6639 #endif
6640 }
6641 }
6642
6643 /*
6644 * Handle creation of system zone data early because it can fail.
6645 * Releasing of existing data is done when we are sure remount will
6646 * succeed.
6647 */
6648 if (test_opt(sb, BLOCK_VALIDITY) && !sbi->s_system_blks) {
6649 err = ext4_setup_system_zone(sb);
6650 if (err)
6651 goto restore_opts;
6652 }
6653
6654 if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
6655 err = ext4_commit_super(sb);
6656 if (err)
6657 goto restore_opts;
6658 }
6659
6660 #ifdef CONFIG_QUOTA
6661 if (enable_quota) {
6662 if (sb_any_quota_suspended(sb))
6663 dquot_resume(sb, -1);
6664 else if (ext4_has_feature_quota(sb)) {
6665 err = ext4_enable_quotas(sb);
6666 if (err)
6667 goto restore_opts;
6668 }
6669 }
6670 /* Release old quota file names */
6671 for (i = 0; i < EXT4_MAXQUOTAS; i++)
6672 kfree(old_opts.s_qf_names[i]);
6673 #endif
6674 if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6675 ext4_release_system_zone(sb);
6676
6677 /*
6678 * Reinitialize lazy itable initialization thread based on
6679 * current settings
6680 */
6681 if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
6682 ext4_unregister_li_request(sb);
6683 else {
6684 ext4_group_t first_not_zeroed;
6685 first_not_zeroed = ext4_has_uninit_itable(sb);
6686 ext4_register_li_request(sb, first_not_zeroed);
6687 }
6688
6689 if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6690 ext4_stop_mmpd(sbi);
6691
6692 return 0;
6693
6694 restore_opts:
6695 /*
6696 * If there was a failing r/w to ro transition, we may need to
6697 * re-enable quota
6698 */
6699 if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) &&
6700 sb_any_quota_suspended(sb))
6701 dquot_resume(sb, -1);
6702
6703 alloc_ctx = ext4_writepages_down_write(sb);
6704 sb->s_flags = old_sb_flags;
6705 sbi->s_mount_opt = old_opts.s_mount_opt;
6706 sbi->s_mount_opt2 = old_opts.s_mount_opt2;
6707 sbi->s_resuid = old_opts.s_resuid;
6708 sbi->s_resgid = old_opts.s_resgid;
6709 sbi->s_commit_interval = old_opts.s_commit_interval;
6710 sbi->s_min_batch_time = old_opts.s_min_batch_time;
6711 sbi->s_max_batch_time = old_opts.s_max_batch_time;
6712 ext4_writepages_up_write(sb, alloc_ctx);
6713
6714 if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6715 ext4_release_system_zone(sb);
6716 #ifdef CONFIG_QUOTA
6717 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
6718 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
6719 to_free[i] = get_qf_name(sb, sbi, i);
6720 rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
6721 }
6722 synchronize_rcu();
6723 for (i = 0; i < EXT4_MAXQUOTAS; i++)
6724 kfree(to_free[i]);
6725 #endif
6726 if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6727 ext4_stop_mmpd(sbi);
6728 return err;
6729 }
6730
ext4_reconfigure(struct fs_context * fc)6731 static int ext4_reconfigure(struct fs_context *fc)
6732 {
6733 struct super_block *sb = fc->root->d_sb;
6734 int ret;
6735
6736 fc->s_fs_info = EXT4_SB(sb);
6737
6738 ret = ext4_check_opt_consistency(fc, sb);
6739 if (ret < 0)
6740 return ret;
6741
6742 ret = __ext4_remount(fc, sb);
6743 if (ret < 0)
6744 return ret;
6745
6746 ext4_msg(sb, KERN_INFO, "re-mounted %pU %s. Quota mode: %s.",
6747 &sb->s_uuid, sb_rdonly(sb) ? "ro" : "r/w",
6748 ext4_quota_mode(sb));
6749
6750 return 0;
6751 }
6752
6753 #ifdef CONFIG_QUOTA
ext4_statfs_project(struct super_block * sb,kprojid_t projid,struct kstatfs * buf)6754 static int ext4_statfs_project(struct super_block *sb,
6755 kprojid_t projid, struct kstatfs *buf)
6756 {
6757 struct kqid qid;
6758 struct dquot *dquot;
6759 u64 limit;
6760 u64 curblock;
6761
6762 qid = make_kqid_projid(projid);
6763 dquot = dqget(sb, qid);
6764 if (IS_ERR(dquot))
6765 return PTR_ERR(dquot);
6766 spin_lock(&dquot->dq_dqb_lock);
6767
6768 limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
6769 dquot->dq_dqb.dqb_bhardlimit);
6770 limit >>= sb->s_blocksize_bits;
6771
6772 if (limit && buf->f_blocks > limit) {
6773 curblock = (dquot->dq_dqb.dqb_curspace +
6774 dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
6775 buf->f_blocks = limit;
6776 buf->f_bfree = buf->f_bavail =
6777 (buf->f_blocks > curblock) ?
6778 (buf->f_blocks - curblock) : 0;
6779 }
6780
6781 limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
6782 dquot->dq_dqb.dqb_ihardlimit);
6783 if (limit && buf->f_files > limit) {
6784 buf->f_files = limit;
6785 buf->f_ffree =
6786 (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
6787 (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
6788 }
6789
6790 spin_unlock(&dquot->dq_dqb_lock);
6791 dqput(dquot);
6792 return 0;
6793 }
6794 #endif
6795
ext4_statfs(struct dentry * dentry,struct kstatfs * buf)6796 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
6797 {
6798 struct super_block *sb = dentry->d_sb;
6799 struct ext4_sb_info *sbi = EXT4_SB(sb);
6800 struct ext4_super_block *es = sbi->s_es;
6801 ext4_fsblk_t overhead = 0, resv_blocks;
6802 s64 bfree;
6803 resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
6804
6805 if (!test_opt(sb, MINIX_DF))
6806 overhead = sbi->s_overhead;
6807
6808 buf->f_type = EXT4_SUPER_MAGIC;
6809 buf->f_bsize = sb->s_blocksize;
6810 buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
6811 bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
6812 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
6813 /* prevent underflow in case that few free space is available */
6814 buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
6815 buf->f_bavail = buf->f_bfree -
6816 (ext4_r_blocks_count(es) + resv_blocks);
6817 if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
6818 buf->f_bavail = 0;
6819 buf->f_files = le32_to_cpu(es->s_inodes_count);
6820 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
6821 buf->f_namelen = EXT4_NAME_LEN;
6822 buf->f_fsid = uuid_to_fsid(es->s_uuid);
6823
6824 #ifdef CONFIG_QUOTA
6825 if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
6826 sb_has_quota_limits_enabled(sb, PRJQUOTA))
6827 ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
6828 #endif
6829 return 0;
6830 }
6831
6832
6833 #ifdef CONFIG_QUOTA
6834
6835 /*
6836 * Helper functions so that transaction is started before we acquire dqio_sem
6837 * to keep correct lock ordering of transaction > dqio_sem
6838 */
dquot_to_inode(struct dquot * dquot)6839 static inline struct inode *dquot_to_inode(struct dquot *dquot)
6840 {
6841 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
6842 }
6843
ext4_write_dquot(struct dquot * dquot)6844 static int ext4_write_dquot(struct dquot *dquot)
6845 {
6846 int ret, err;
6847 handle_t *handle;
6848 struct inode *inode;
6849
6850 inode = dquot_to_inode(dquot);
6851 handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
6852 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
6853 if (IS_ERR(handle))
6854 return PTR_ERR(handle);
6855 ret = dquot_commit(dquot);
6856 if (ret < 0)
6857 ext4_error_err(dquot->dq_sb, -ret,
6858 "Failed to commit dquot type %d",
6859 dquot->dq_id.type);
6860 err = ext4_journal_stop(handle);
6861 if (!ret)
6862 ret = err;
6863 return ret;
6864 }
6865
ext4_acquire_dquot(struct dquot * dquot)6866 static int ext4_acquire_dquot(struct dquot *dquot)
6867 {
6868 int ret, err;
6869 handle_t *handle;
6870
6871 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
6872 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
6873 if (IS_ERR(handle))
6874 return PTR_ERR(handle);
6875 ret = dquot_acquire(dquot);
6876 if (ret < 0)
6877 ext4_error_err(dquot->dq_sb, -ret,
6878 "Failed to acquire dquot type %d",
6879 dquot->dq_id.type);
6880 err = ext4_journal_stop(handle);
6881 if (!ret)
6882 ret = err;
6883 return ret;
6884 }
6885
ext4_release_dquot(struct dquot * dquot)6886 static int ext4_release_dquot(struct dquot *dquot)
6887 {
6888 int ret, err;
6889 handle_t *handle;
6890
6891 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
6892 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
6893 if (IS_ERR(handle)) {
6894 /* Release dquot anyway to avoid endless cycle in dqput() */
6895 dquot_release(dquot);
6896 return PTR_ERR(handle);
6897 }
6898 ret = dquot_release(dquot);
6899 if (ret < 0)
6900 ext4_error_err(dquot->dq_sb, -ret,
6901 "Failed to release dquot type %d",
6902 dquot->dq_id.type);
6903 err = ext4_journal_stop(handle);
6904 if (!ret)
6905 ret = err;
6906 return ret;
6907 }
6908
ext4_mark_dquot_dirty(struct dquot * dquot)6909 static int ext4_mark_dquot_dirty(struct dquot *dquot)
6910 {
6911 struct super_block *sb = dquot->dq_sb;
6912
6913 if (ext4_is_quota_journalled(sb)) {
6914 dquot_mark_dquot_dirty(dquot);
6915 return ext4_write_dquot(dquot);
6916 } else {
6917 return dquot_mark_dquot_dirty(dquot);
6918 }
6919 }
6920
ext4_write_info(struct super_block * sb,int type)6921 static int ext4_write_info(struct super_block *sb, int type)
6922 {
6923 int ret, err;
6924 handle_t *handle;
6925
6926 /* Data block + inode block */
6927 handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2);
6928 if (IS_ERR(handle))
6929 return PTR_ERR(handle);
6930 ret = dquot_commit_info(sb, type);
6931 err = ext4_journal_stop(handle);
6932 if (!ret)
6933 ret = err;
6934 return ret;
6935 }
6936
lockdep_set_quota_inode(struct inode * inode,int subclass)6937 static void lockdep_set_quota_inode(struct inode *inode, int subclass)
6938 {
6939 struct ext4_inode_info *ei = EXT4_I(inode);
6940
6941 /* The first argument of lockdep_set_subclass has to be
6942 * *exactly* the same as the argument to init_rwsem() --- in
6943 * this case, in init_once() --- or lockdep gets unhappy
6944 * because the name of the lock is set using the
6945 * stringification of the argument to init_rwsem().
6946 */
6947 (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */
6948 lockdep_set_subclass(&ei->i_data_sem, subclass);
6949 }
6950
6951 /*
6952 * Standard function to be called on quota_on
6953 */
ext4_quota_on(struct super_block * sb,int type,int format_id,const struct path * path)6954 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
6955 const struct path *path)
6956 {
6957 int err;
6958
6959 if (!test_opt(sb, QUOTA))
6960 return -EINVAL;
6961
6962 /* Quotafile not on the same filesystem? */
6963 if (path->dentry->d_sb != sb)
6964 return -EXDEV;
6965
6966 /* Quota already enabled for this file? */
6967 if (IS_NOQUOTA(d_inode(path->dentry)))
6968 return -EBUSY;
6969
6970 /* Journaling quota? */
6971 if (EXT4_SB(sb)->s_qf_names[type]) {
6972 /* Quotafile not in fs root? */
6973 if (path->dentry->d_parent != sb->s_root)
6974 ext4_msg(sb, KERN_WARNING,
6975 "Quota file not on filesystem root. "
6976 "Journaled quota will not work");
6977 sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
6978 } else {
6979 /*
6980 * Clear the flag just in case mount options changed since
6981 * last time.
6982 */
6983 sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
6984 }
6985
6986 lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
6987 err = dquot_quota_on(sb, type, format_id, path);
6988 if (!err) {
6989 struct inode *inode = d_inode(path->dentry);
6990 handle_t *handle;
6991
6992 /*
6993 * Set inode flags to prevent userspace from messing with quota
6994 * files. If this fails, we return success anyway since quotas
6995 * are already enabled and this is not a hard failure.
6996 */
6997 inode_lock(inode);
6998 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
6999 if (IS_ERR(handle))
7000 goto unlock_inode;
7001 EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
7002 inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
7003 S_NOATIME | S_IMMUTABLE);
7004 err = ext4_mark_inode_dirty(handle, inode);
7005 ext4_journal_stop(handle);
7006 unlock_inode:
7007 inode_unlock(inode);
7008 if (err)
7009 dquot_quota_off(sb, type);
7010 }
7011 if (err)
7012 lockdep_set_quota_inode(path->dentry->d_inode,
7013 I_DATA_SEM_NORMAL);
7014 return err;
7015 }
7016
ext4_check_quota_inum(int type,unsigned long qf_inum)7017 static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum)
7018 {
7019 switch (type) {
7020 case USRQUOTA:
7021 return qf_inum == EXT4_USR_QUOTA_INO;
7022 case GRPQUOTA:
7023 return qf_inum == EXT4_GRP_QUOTA_INO;
7024 case PRJQUOTA:
7025 return qf_inum >= EXT4_GOOD_OLD_FIRST_INO;
7026 default:
7027 BUG();
7028 }
7029 }
7030
ext4_quota_enable(struct super_block * sb,int type,int format_id,unsigned int flags)7031 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
7032 unsigned int flags)
7033 {
7034 int err;
7035 struct inode *qf_inode;
7036 unsigned long qf_inums[EXT4_MAXQUOTAS] = {
7037 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
7038 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
7039 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
7040 };
7041
7042 BUG_ON(!ext4_has_feature_quota(sb));
7043
7044 if (!qf_inums[type])
7045 return -EPERM;
7046
7047 if (!ext4_check_quota_inum(type, qf_inums[type])) {
7048 ext4_error(sb, "Bad quota inum: %lu, type: %d",
7049 qf_inums[type], type);
7050 return -EUCLEAN;
7051 }
7052
7053 qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
7054 if (IS_ERR(qf_inode)) {
7055 ext4_error(sb, "Bad quota inode: %lu, type: %d",
7056 qf_inums[type], type);
7057 return PTR_ERR(qf_inode);
7058 }
7059
7060 /* Don't account quota for quota files to avoid recursion */
7061 qf_inode->i_flags |= S_NOQUOTA;
7062 lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
7063 err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
7064 if (err)
7065 lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
7066 iput(qf_inode);
7067
7068 return err;
7069 }
7070
7071 /* Enable usage tracking for all quota types. */
ext4_enable_quotas(struct super_block * sb)7072 int ext4_enable_quotas(struct super_block *sb)
7073 {
7074 int type, err = 0;
7075 unsigned long qf_inums[EXT4_MAXQUOTAS] = {
7076 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
7077 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
7078 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
7079 };
7080 bool quota_mopt[EXT4_MAXQUOTAS] = {
7081 test_opt(sb, USRQUOTA),
7082 test_opt(sb, GRPQUOTA),
7083 test_opt(sb, PRJQUOTA),
7084 };
7085
7086 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
7087 for (type = 0; type < EXT4_MAXQUOTAS; type++) {
7088 if (qf_inums[type]) {
7089 err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
7090 DQUOT_USAGE_ENABLED |
7091 (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
7092 if (err) {
7093 ext4_warning(sb,
7094 "Failed to enable quota tracking "
7095 "(type=%d, err=%d, ino=%lu). "
7096 "Please run e2fsck to fix.", type,
7097 err, qf_inums[type]);
7098
7099 ext4_quotas_off(sb, type);
7100 return err;
7101 }
7102 }
7103 }
7104 return 0;
7105 }
7106
ext4_quota_off(struct super_block * sb,int type)7107 static int ext4_quota_off(struct super_block *sb, int type)
7108 {
7109 struct inode *inode = sb_dqopt(sb)->files[type];
7110 handle_t *handle;
7111 int err;
7112
7113 /* Force all delayed allocation blocks to be allocated.
7114 * Caller already holds s_umount sem */
7115 if (test_opt(sb, DELALLOC))
7116 sync_filesystem(sb);
7117
7118 if (!inode || !igrab(inode))
7119 goto out;
7120
7121 err = dquot_quota_off(sb, type);
7122 if (err || ext4_has_feature_quota(sb))
7123 goto out_put;
7124 /*
7125 * When the filesystem was remounted read-only first, we cannot cleanup
7126 * inode flags here. Bad luck but people should be using QUOTA feature
7127 * these days anyway.
7128 */
7129 if (sb_rdonly(sb))
7130 goto out_put;
7131
7132 inode_lock(inode);
7133 /*
7134 * Update modification times of quota files when userspace can
7135 * start looking at them. If we fail, we return success anyway since
7136 * this is not a hard failure and quotas are already disabled.
7137 */
7138 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
7139 if (IS_ERR(handle)) {
7140 err = PTR_ERR(handle);
7141 goto out_unlock;
7142 }
7143 EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
7144 inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
7145 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
7146 err = ext4_mark_inode_dirty(handle, inode);
7147 ext4_journal_stop(handle);
7148 out_unlock:
7149 inode_unlock(inode);
7150 out_put:
7151 lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
7152 iput(inode);
7153 return err;
7154 out:
7155 return dquot_quota_off(sb, type);
7156 }
7157
7158 /* Read data from quotafile - avoid pagecache and such because we cannot afford
7159 * acquiring the locks... As quota files are never truncated and quota code
7160 * itself serializes the operations (and no one else should touch the files)
7161 * we don't have to be afraid of races */
ext4_quota_read(struct super_block * sb,int type,char * data,size_t len,loff_t off)7162 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
7163 size_t len, loff_t off)
7164 {
7165 struct inode *inode = sb_dqopt(sb)->files[type];
7166 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
7167 int offset = off & (sb->s_blocksize - 1);
7168 int tocopy;
7169 size_t toread;
7170 struct buffer_head *bh;
7171 loff_t i_size = i_size_read(inode);
7172
7173 if (off > i_size)
7174 return 0;
7175 if (off+len > i_size)
7176 len = i_size-off;
7177 toread = len;
7178 while (toread > 0) {
7179 tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
7180 bh = ext4_bread(NULL, inode, blk, 0);
7181 if (IS_ERR(bh))
7182 return PTR_ERR(bh);
7183 if (!bh) /* A hole? */
7184 memset(data, 0, tocopy);
7185 else
7186 memcpy(data, bh->b_data+offset, tocopy);
7187 brelse(bh);
7188 offset = 0;
7189 toread -= tocopy;
7190 data += tocopy;
7191 blk++;
7192 }
7193 return len;
7194 }
7195
7196 /* Write to quotafile (we know the transaction is already started and has
7197 * enough credits) */
ext4_quota_write(struct super_block * sb,int type,const char * data,size_t len,loff_t off)7198 static ssize_t ext4_quota_write(struct super_block *sb, int type,
7199 const char *data, size_t len, loff_t off)
7200 {
7201 struct inode *inode = sb_dqopt(sb)->files[type];
7202 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
7203 int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
7204 int retries = 0;
7205 struct buffer_head *bh;
7206 handle_t *handle = journal_current_handle();
7207
7208 if (!handle) {
7209 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
7210 " cancelled because transaction is not started",
7211 (unsigned long long)off, (unsigned long long)len);
7212 return -EIO;
7213 }
7214 /*
7215 * Since we account only one data block in transaction credits,
7216 * then it is impossible to cross a block boundary.
7217 */
7218 if (sb->s_blocksize - offset < len) {
7219 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
7220 " cancelled because not block aligned",
7221 (unsigned long long)off, (unsigned long long)len);
7222 return -EIO;
7223 }
7224
7225 do {
7226 bh = ext4_bread(handle, inode, blk,
7227 EXT4_GET_BLOCKS_CREATE |
7228 EXT4_GET_BLOCKS_METADATA_NOFAIL);
7229 } while (PTR_ERR(bh) == -ENOSPC &&
7230 ext4_should_retry_alloc(inode->i_sb, &retries));
7231 if (IS_ERR(bh))
7232 return PTR_ERR(bh);
7233 if (!bh)
7234 goto out;
7235 BUFFER_TRACE(bh, "get write access");
7236 err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
7237 if (err) {
7238 brelse(bh);
7239 return err;
7240 }
7241 lock_buffer(bh);
7242 memcpy(bh->b_data+offset, data, len);
7243 flush_dcache_page(bh->b_page);
7244 unlock_buffer(bh);
7245 err = ext4_handle_dirty_metadata(handle, NULL, bh);
7246 brelse(bh);
7247 out:
7248 if (inode->i_size < off + len) {
7249 i_size_write(inode, off + len);
7250 EXT4_I(inode)->i_disksize = inode->i_size;
7251 err2 = ext4_mark_inode_dirty(handle, inode);
7252 if (unlikely(err2 && !err))
7253 err = err2;
7254 }
7255 return err ? err : len;
7256 }
7257 #endif
7258
7259 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
register_as_ext2(void)7260 static inline void register_as_ext2(void)
7261 {
7262 int err = register_filesystem(&ext2_fs_type);
7263 if (err)
7264 printk(KERN_WARNING
7265 "EXT4-fs: Unable to register as ext2 (%d)\n", err);
7266 }
7267
unregister_as_ext2(void)7268 static inline void unregister_as_ext2(void)
7269 {
7270 unregister_filesystem(&ext2_fs_type);
7271 }
7272
ext2_feature_set_ok(struct super_block * sb)7273 static inline int ext2_feature_set_ok(struct super_block *sb)
7274 {
7275 if (ext4_has_unknown_ext2_incompat_features(sb))
7276 return 0;
7277 if (sb_rdonly(sb))
7278 return 1;
7279 if (ext4_has_unknown_ext2_ro_compat_features(sb))
7280 return 0;
7281 return 1;
7282 }
7283 #else
register_as_ext2(void)7284 static inline void register_as_ext2(void) { }
unregister_as_ext2(void)7285 static inline void unregister_as_ext2(void) { }
ext2_feature_set_ok(struct super_block * sb)7286 static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
7287 #endif
7288
register_as_ext3(void)7289 static inline void register_as_ext3(void)
7290 {
7291 int err = register_filesystem(&ext3_fs_type);
7292 if (err)
7293 printk(KERN_WARNING
7294 "EXT4-fs: Unable to register as ext3 (%d)\n", err);
7295 }
7296
unregister_as_ext3(void)7297 static inline void unregister_as_ext3(void)
7298 {
7299 unregister_filesystem(&ext3_fs_type);
7300 }
7301
ext3_feature_set_ok(struct super_block * sb)7302 static inline int ext3_feature_set_ok(struct super_block *sb)
7303 {
7304 if (ext4_has_unknown_ext3_incompat_features(sb))
7305 return 0;
7306 if (!ext4_has_feature_journal(sb))
7307 return 0;
7308 if (sb_rdonly(sb))
7309 return 1;
7310 if (ext4_has_unknown_ext3_ro_compat_features(sb))
7311 return 0;
7312 return 1;
7313 }
7314
ext4_kill_sb(struct super_block * sb)7315 static void ext4_kill_sb(struct super_block *sb)
7316 {
7317 struct ext4_sb_info *sbi = EXT4_SB(sb);
7318 struct file *bdev_file = sbi ? sbi->s_journal_bdev_file : NULL;
7319
7320 kill_block_super(sb);
7321
7322 if (bdev_file)
7323 bdev_fput(bdev_file);
7324 }
7325
7326 static struct file_system_type ext4_fs_type = {
7327 .owner = THIS_MODULE,
7328 .name = "ext4",
7329 .init_fs_context = ext4_init_fs_context,
7330 .parameters = ext4_param_specs,
7331 .kill_sb = ext4_kill_sb,
7332 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
7333 };
7334 MODULE_ALIAS_FS("ext4");
7335
7336 /* Shared across all ext4 file systems */
7337 wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
7338
ext4_init_fs(void)7339 static int __init ext4_init_fs(void)
7340 {
7341 int i, err;
7342
7343 ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
7344 ext4_li_info = NULL;
7345
7346 /* Build-time check for flags consistency */
7347 ext4_check_flag_values();
7348
7349 for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
7350 init_waitqueue_head(&ext4__ioend_wq[i]);
7351
7352 err = ext4_init_es();
7353 if (err)
7354 return err;
7355
7356 err = ext4_init_pending();
7357 if (err)
7358 goto out7;
7359
7360 err = ext4_init_post_read_processing();
7361 if (err)
7362 goto out6;
7363
7364 err = ext4_init_pageio();
7365 if (err)
7366 goto out5;
7367
7368 err = ext4_init_system_zone();
7369 if (err)
7370 goto out4;
7371
7372 err = ext4_init_sysfs();
7373 if (err)
7374 goto out3;
7375
7376 err = ext4_init_mballoc();
7377 if (err)
7378 goto out2;
7379 err = init_inodecache();
7380 if (err)
7381 goto out1;
7382
7383 err = ext4_fc_init_dentry_cache();
7384 if (err)
7385 goto out05;
7386
7387 register_as_ext3();
7388 register_as_ext2();
7389 err = register_filesystem(&ext4_fs_type);
7390 if (err)
7391 goto out;
7392
7393 return 0;
7394 out:
7395 unregister_as_ext2();
7396 unregister_as_ext3();
7397 ext4_fc_destroy_dentry_cache();
7398 out05:
7399 destroy_inodecache();
7400 out1:
7401 ext4_exit_mballoc();
7402 out2:
7403 ext4_exit_sysfs();
7404 out3:
7405 ext4_exit_system_zone();
7406 out4:
7407 ext4_exit_pageio();
7408 out5:
7409 ext4_exit_post_read_processing();
7410 out6:
7411 ext4_exit_pending();
7412 out7:
7413 ext4_exit_es();
7414
7415 return err;
7416 }
7417
ext4_exit_fs(void)7418 static void __exit ext4_exit_fs(void)
7419 {
7420 ext4_destroy_lazyinit_thread();
7421 unregister_as_ext2();
7422 unregister_as_ext3();
7423 unregister_filesystem(&ext4_fs_type);
7424 ext4_fc_destroy_dentry_cache();
7425 destroy_inodecache();
7426 ext4_exit_mballoc();
7427 ext4_exit_sysfs();
7428 ext4_exit_system_zone();
7429 ext4_exit_pageio();
7430 ext4_exit_post_read_processing();
7431 ext4_exit_es();
7432 ext4_exit_pending();
7433 }
7434
7435 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
7436 MODULE_DESCRIPTION("Fourth Extended Filesystem");
7437 MODULE_LICENSE("GPL");
7438 MODULE_SOFTDEP("pre: crc32c");
7439 module_init(ext4_init_fs)
7440 module_exit(ext4_exit_fs)
7441