xref: /linux/fs/erofs/super.c (revision 3607ac37a4f378cd5f673d6bdb3776e45a899e2c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017-2018 HUAWEI, Inc.
4  *             https://www.huawei.com/
5  * Copyright (C) 2021, Alibaba Cloud
6  */
7 #include <linux/statfs.h>
8 #include <linux/seq_file.h>
9 #include <linux/crc32c.h>
10 #include <linux/fs_context.h>
11 #include <linux/fs_parser.h>
12 #include <linux/exportfs.h>
13 #include "xattr.h"
14 
15 #define CREATE_TRACE_POINTS
16 #include <trace/events/erofs.h>
17 
18 static struct kmem_cache *erofs_inode_cachep __read_mostly;
19 
20 void _erofs_err(struct super_block *sb, const char *func, const char *fmt, ...)
21 {
22 	struct va_format vaf;
23 	va_list args;
24 
25 	va_start(args, fmt);
26 
27 	vaf.fmt = fmt;
28 	vaf.va = &args;
29 
30 	pr_err("(device %s): %s: %pV", sb->s_id, func, &vaf);
31 	va_end(args);
32 }
33 
34 void _erofs_info(struct super_block *sb, const char *func, const char *fmt, ...)
35 {
36 	struct va_format vaf;
37 	va_list args;
38 
39 	va_start(args, fmt);
40 
41 	vaf.fmt = fmt;
42 	vaf.va = &args;
43 
44 	pr_info("(device %s): %pV", sb->s_id, &vaf);
45 	va_end(args);
46 }
47 
48 static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata)
49 {
50 	size_t len = 1 << EROFS_SB(sb)->blkszbits;
51 	struct erofs_super_block *dsb;
52 	u32 expected_crc, crc;
53 
54 	if (len > EROFS_SUPER_OFFSET)
55 		len -= EROFS_SUPER_OFFSET;
56 
57 	dsb = kmemdup(sbdata + EROFS_SUPER_OFFSET, len, GFP_KERNEL);
58 	if (!dsb)
59 		return -ENOMEM;
60 
61 	expected_crc = le32_to_cpu(dsb->checksum);
62 	dsb->checksum = 0;
63 	/* to allow for x86 boot sectors and other oddities. */
64 	crc = crc32c(~0, dsb, len);
65 	kfree(dsb);
66 
67 	if (crc != expected_crc) {
68 		erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected",
69 			  crc, expected_crc);
70 		return -EBADMSG;
71 	}
72 	return 0;
73 }
74 
75 static void erofs_inode_init_once(void *ptr)
76 {
77 	struct erofs_inode *vi = ptr;
78 
79 	inode_init_once(&vi->vfs_inode);
80 }
81 
82 static struct inode *erofs_alloc_inode(struct super_block *sb)
83 {
84 	struct erofs_inode *vi =
85 		alloc_inode_sb(sb, erofs_inode_cachep, GFP_KERNEL);
86 
87 	if (!vi)
88 		return NULL;
89 
90 	/* zero out everything except vfs_inode */
91 	memset(vi, 0, offsetof(struct erofs_inode, vfs_inode));
92 	return &vi->vfs_inode;
93 }
94 
95 static void erofs_free_inode(struct inode *inode)
96 {
97 	struct erofs_inode *vi = EROFS_I(inode);
98 
99 	if (inode->i_op == &erofs_fast_symlink_iops)
100 		kfree(inode->i_link);
101 	kfree(vi->xattr_shared_xattrs);
102 	kmem_cache_free(erofs_inode_cachep, vi);
103 }
104 
105 static bool check_layout_compatibility(struct super_block *sb,
106 				       struct erofs_super_block *dsb)
107 {
108 	const unsigned int feature = le32_to_cpu(dsb->feature_incompat);
109 
110 	EROFS_SB(sb)->feature_incompat = feature;
111 
112 	/* check if current kernel meets all mandatory requirements */
113 	if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) {
114 		erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel",
115 			   feature & ~EROFS_ALL_FEATURE_INCOMPAT);
116 		return false;
117 	}
118 	return true;
119 }
120 
121 /* read variable-sized metadata, offset will be aligned by 4-byte */
122 void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf,
123 			  erofs_off_t *offset, int *lengthp)
124 {
125 	u8 *buffer, *ptr;
126 	int len, i, cnt;
127 
128 	*offset = round_up(*offset, 4);
129 	ptr = erofs_bread(buf, erofs_blknr(sb, *offset), EROFS_KMAP);
130 	if (IS_ERR(ptr))
131 		return ptr;
132 
133 	len = le16_to_cpu(*(__le16 *)&ptr[erofs_blkoff(sb, *offset)]);
134 	if (!len)
135 		len = U16_MAX + 1;
136 	buffer = kmalloc(len, GFP_KERNEL);
137 	if (!buffer)
138 		return ERR_PTR(-ENOMEM);
139 	*offset += sizeof(__le16);
140 	*lengthp = len;
141 
142 	for (i = 0; i < len; i += cnt) {
143 		cnt = min_t(int, sb->s_blocksize - erofs_blkoff(sb, *offset),
144 			    len - i);
145 		ptr = erofs_bread(buf, erofs_blknr(sb, *offset), EROFS_KMAP);
146 		if (IS_ERR(ptr)) {
147 			kfree(buffer);
148 			return ptr;
149 		}
150 		memcpy(buffer + i, ptr + erofs_blkoff(sb, *offset), cnt);
151 		*offset += cnt;
152 	}
153 	return buffer;
154 }
155 
156 #ifndef CONFIG_EROFS_FS_ZIP
157 static int z_erofs_parse_cfgs(struct super_block *sb,
158 			      struct erofs_super_block *dsb)
159 {
160 	if (!dsb->u1.available_compr_algs)
161 		return 0;
162 
163 	erofs_err(sb, "compression disabled, unable to mount compressed EROFS");
164 	return -EOPNOTSUPP;
165 }
166 #endif
167 
168 static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
169 			     struct erofs_device_info *dif, erofs_off_t *pos)
170 {
171 	struct erofs_sb_info *sbi = EROFS_SB(sb);
172 	struct erofs_fscache *fscache;
173 	struct erofs_deviceslot *dis;
174 	struct bdev_handle *bdev_handle;
175 	void *ptr;
176 
177 	ptr = erofs_read_metabuf(buf, sb, erofs_blknr(sb, *pos), EROFS_KMAP);
178 	if (IS_ERR(ptr))
179 		return PTR_ERR(ptr);
180 	dis = ptr + erofs_blkoff(sb, *pos);
181 
182 	if (!sbi->devs->flatdev && !dif->path) {
183 		if (!dis->tag[0]) {
184 			erofs_err(sb, "empty device tag @ pos %llu", *pos);
185 			return -EINVAL;
186 		}
187 		dif->path = kmemdup_nul(dis->tag, sizeof(dis->tag), GFP_KERNEL);
188 		if (!dif->path)
189 			return -ENOMEM;
190 	}
191 
192 	if (erofs_is_fscache_mode(sb)) {
193 		fscache = erofs_fscache_register_cookie(sb, dif->path, 0);
194 		if (IS_ERR(fscache))
195 			return PTR_ERR(fscache);
196 		dif->fscache = fscache;
197 	} else if (!sbi->devs->flatdev) {
198 		bdev_handle = bdev_open_by_path(dif->path, BLK_OPEN_READ,
199 						sb->s_type, NULL);
200 		if (IS_ERR(bdev_handle))
201 			return PTR_ERR(bdev_handle);
202 		dif->bdev_handle = bdev_handle;
203 		dif->dax_dev = fs_dax_get_by_bdev(bdev_handle->bdev,
204 				&dif->dax_part_off, NULL, NULL);
205 	}
206 
207 	dif->blocks = le32_to_cpu(dis->blocks);
208 	dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr);
209 	sbi->total_blocks += dif->blocks;
210 	*pos += EROFS_DEVT_SLOT_SIZE;
211 	return 0;
212 }
213 
214 static int erofs_scan_devices(struct super_block *sb,
215 			      struct erofs_super_block *dsb)
216 {
217 	struct erofs_sb_info *sbi = EROFS_SB(sb);
218 	unsigned int ondisk_extradevs;
219 	erofs_off_t pos;
220 	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
221 	struct erofs_device_info *dif;
222 	int id, err = 0;
223 
224 	sbi->total_blocks = sbi->primarydevice_blocks;
225 	if (!erofs_sb_has_device_table(sbi))
226 		ondisk_extradevs = 0;
227 	else
228 		ondisk_extradevs = le16_to_cpu(dsb->extra_devices);
229 
230 	if (sbi->devs->extra_devices &&
231 	    ondisk_extradevs != sbi->devs->extra_devices) {
232 		erofs_err(sb, "extra devices don't match (ondisk %u, given %u)",
233 			  ondisk_extradevs, sbi->devs->extra_devices);
234 		return -EINVAL;
235 	}
236 	if (!ondisk_extradevs)
237 		return 0;
238 
239 	if (!sbi->devs->extra_devices && !erofs_is_fscache_mode(sb))
240 		sbi->devs->flatdev = true;
241 
242 	sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1;
243 	pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE;
244 	down_read(&sbi->devs->rwsem);
245 	if (sbi->devs->extra_devices) {
246 		idr_for_each_entry(&sbi->devs->tree, dif, id) {
247 			err = erofs_init_device(&buf, sb, dif, &pos);
248 			if (err)
249 				break;
250 		}
251 	} else {
252 		for (id = 0; id < ondisk_extradevs; id++) {
253 			dif = kzalloc(sizeof(*dif), GFP_KERNEL);
254 			if (!dif) {
255 				err = -ENOMEM;
256 				break;
257 			}
258 
259 			err = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL);
260 			if (err < 0) {
261 				kfree(dif);
262 				break;
263 			}
264 			++sbi->devs->extra_devices;
265 
266 			err = erofs_init_device(&buf, sb, dif, &pos);
267 			if (err)
268 				break;
269 		}
270 	}
271 	up_read(&sbi->devs->rwsem);
272 	erofs_put_metabuf(&buf);
273 	return err;
274 }
275 
276 static int erofs_read_superblock(struct super_block *sb)
277 {
278 	struct erofs_sb_info *sbi;
279 	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
280 	struct erofs_super_block *dsb;
281 	void *data;
282 	int ret;
283 
284 	data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP);
285 	if (IS_ERR(data)) {
286 		erofs_err(sb, "cannot read erofs superblock");
287 		return PTR_ERR(data);
288 	}
289 
290 	sbi = EROFS_SB(sb);
291 	dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET);
292 
293 	ret = -EINVAL;
294 	if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) {
295 		erofs_err(sb, "cannot find valid erofs superblock");
296 		goto out;
297 	}
298 
299 	sbi->blkszbits  = dsb->blkszbits;
300 	if (sbi->blkszbits < 9 || sbi->blkszbits > PAGE_SHIFT) {
301 		erofs_err(sb, "blkszbits %u isn't supported", sbi->blkszbits);
302 		goto out;
303 	}
304 	if (dsb->dirblkbits) {
305 		erofs_err(sb, "dirblkbits %u isn't supported", dsb->dirblkbits);
306 		goto out;
307 	}
308 
309 	sbi->feature_compat = le32_to_cpu(dsb->feature_compat);
310 	if (erofs_sb_has_sb_chksum(sbi)) {
311 		ret = erofs_superblock_csum_verify(sb, data);
312 		if (ret)
313 			goto out;
314 	}
315 
316 	ret = -EINVAL;
317 	if (!check_layout_compatibility(sb, dsb))
318 		goto out;
319 
320 	sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE;
321 	if (sbi->sb_size > PAGE_SIZE - EROFS_SUPER_OFFSET) {
322 		erofs_err(sb, "invalid sb_extslots %u (more than a fs block)",
323 			  sbi->sb_size);
324 		goto out;
325 	}
326 	sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks);
327 	sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
328 #ifdef CONFIG_EROFS_FS_XATTR
329 	sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
330 	sbi->xattr_prefix_start = le32_to_cpu(dsb->xattr_prefix_start);
331 	sbi->xattr_prefix_count = dsb->xattr_prefix_count;
332 	sbi->xattr_filter_reserved = dsb->xattr_filter_reserved;
333 #endif
334 	sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact));
335 	sbi->root_nid = le16_to_cpu(dsb->root_nid);
336 	sbi->packed_nid = le64_to_cpu(dsb->packed_nid);
337 	sbi->inos = le64_to_cpu(dsb->inos);
338 
339 	sbi->build_time = le64_to_cpu(dsb->build_time);
340 	sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
341 
342 	memcpy(&sb->s_uuid, dsb->uuid, sizeof(dsb->uuid));
343 
344 	ret = strscpy(sbi->volume_name, dsb->volume_name,
345 		      sizeof(dsb->volume_name));
346 	if (ret < 0) {	/* -E2BIG */
347 		erofs_err(sb, "bad volume name without NIL terminator");
348 		ret = -EFSCORRUPTED;
349 		goto out;
350 	}
351 
352 	/* parse on-disk compression configurations */
353 	ret = z_erofs_parse_cfgs(sb, dsb);
354 	if (ret < 0)
355 		goto out;
356 
357 	/* handle multiple devices */
358 	ret = erofs_scan_devices(sb, dsb);
359 
360 	if (erofs_is_fscache_mode(sb))
361 		erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!");
362 out:
363 	erofs_put_metabuf(&buf);
364 	return ret;
365 }
366 
367 static void erofs_default_options(struct erofs_fs_context *ctx)
368 {
369 #ifdef CONFIG_EROFS_FS_ZIP
370 	ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND;
371 	ctx->opt.max_sync_decompress_pages = 3;
372 	ctx->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO;
373 #endif
374 #ifdef CONFIG_EROFS_FS_XATTR
375 	set_opt(&ctx->opt, XATTR_USER);
376 #endif
377 #ifdef CONFIG_EROFS_FS_POSIX_ACL
378 	set_opt(&ctx->opt, POSIX_ACL);
379 #endif
380 }
381 
382 enum {
383 	Opt_user_xattr,
384 	Opt_acl,
385 	Opt_cache_strategy,
386 	Opt_dax,
387 	Opt_dax_enum,
388 	Opt_device,
389 	Opt_fsid,
390 	Opt_domain_id,
391 	Opt_err
392 };
393 
394 static const struct constant_table erofs_param_cache_strategy[] = {
395 	{"disabled",	EROFS_ZIP_CACHE_DISABLED},
396 	{"readahead",	EROFS_ZIP_CACHE_READAHEAD},
397 	{"readaround",	EROFS_ZIP_CACHE_READAROUND},
398 	{}
399 };
400 
401 static const struct constant_table erofs_dax_param_enums[] = {
402 	{"always",	EROFS_MOUNT_DAX_ALWAYS},
403 	{"never",	EROFS_MOUNT_DAX_NEVER},
404 	{}
405 };
406 
407 static const struct fs_parameter_spec erofs_fs_parameters[] = {
408 	fsparam_flag_no("user_xattr",	Opt_user_xattr),
409 	fsparam_flag_no("acl",		Opt_acl),
410 	fsparam_enum("cache_strategy",	Opt_cache_strategy,
411 		     erofs_param_cache_strategy),
412 	fsparam_flag("dax",             Opt_dax),
413 	fsparam_enum("dax",		Opt_dax_enum, erofs_dax_param_enums),
414 	fsparam_string("device",	Opt_device),
415 	fsparam_string("fsid",		Opt_fsid),
416 	fsparam_string("domain_id",	Opt_domain_id),
417 	{}
418 };
419 
420 static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
421 {
422 #ifdef CONFIG_FS_DAX
423 	struct erofs_fs_context *ctx = fc->fs_private;
424 
425 	switch (mode) {
426 	case EROFS_MOUNT_DAX_ALWAYS:
427 		warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
428 		set_opt(&ctx->opt, DAX_ALWAYS);
429 		clear_opt(&ctx->opt, DAX_NEVER);
430 		return true;
431 	case EROFS_MOUNT_DAX_NEVER:
432 		set_opt(&ctx->opt, DAX_NEVER);
433 		clear_opt(&ctx->opt, DAX_ALWAYS);
434 		return true;
435 	default:
436 		DBG_BUGON(1);
437 		return false;
438 	}
439 #else
440 	errorfc(fc, "dax options not supported");
441 	return false;
442 #endif
443 }
444 
445 static int erofs_fc_parse_param(struct fs_context *fc,
446 				struct fs_parameter *param)
447 {
448 	struct erofs_fs_context *ctx = fc->fs_private;
449 	struct fs_parse_result result;
450 	struct erofs_device_info *dif;
451 	int opt, ret;
452 
453 	opt = fs_parse(fc, erofs_fs_parameters, param, &result);
454 	if (opt < 0)
455 		return opt;
456 
457 	switch (opt) {
458 	case Opt_user_xattr:
459 #ifdef CONFIG_EROFS_FS_XATTR
460 		if (result.boolean)
461 			set_opt(&ctx->opt, XATTR_USER);
462 		else
463 			clear_opt(&ctx->opt, XATTR_USER);
464 #else
465 		errorfc(fc, "{,no}user_xattr options not supported");
466 #endif
467 		break;
468 	case Opt_acl:
469 #ifdef CONFIG_EROFS_FS_POSIX_ACL
470 		if (result.boolean)
471 			set_opt(&ctx->opt, POSIX_ACL);
472 		else
473 			clear_opt(&ctx->opt, POSIX_ACL);
474 #else
475 		errorfc(fc, "{,no}acl options not supported");
476 #endif
477 		break;
478 	case Opt_cache_strategy:
479 #ifdef CONFIG_EROFS_FS_ZIP
480 		ctx->opt.cache_strategy = result.uint_32;
481 #else
482 		errorfc(fc, "compression not supported, cache_strategy ignored");
483 #endif
484 		break;
485 	case Opt_dax:
486 		if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS))
487 			return -EINVAL;
488 		break;
489 	case Opt_dax_enum:
490 		if (!erofs_fc_set_dax_mode(fc, result.uint_32))
491 			return -EINVAL;
492 		break;
493 	case Opt_device:
494 		dif = kzalloc(sizeof(*dif), GFP_KERNEL);
495 		if (!dif)
496 			return -ENOMEM;
497 		dif->path = kstrdup(param->string, GFP_KERNEL);
498 		if (!dif->path) {
499 			kfree(dif);
500 			return -ENOMEM;
501 		}
502 		down_write(&ctx->devs->rwsem);
503 		ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL);
504 		up_write(&ctx->devs->rwsem);
505 		if (ret < 0) {
506 			kfree(dif->path);
507 			kfree(dif);
508 			return ret;
509 		}
510 		++ctx->devs->extra_devices;
511 		break;
512 #ifdef CONFIG_EROFS_FS_ONDEMAND
513 	case Opt_fsid:
514 		kfree(ctx->fsid);
515 		ctx->fsid = kstrdup(param->string, GFP_KERNEL);
516 		if (!ctx->fsid)
517 			return -ENOMEM;
518 		break;
519 	case Opt_domain_id:
520 		kfree(ctx->domain_id);
521 		ctx->domain_id = kstrdup(param->string, GFP_KERNEL);
522 		if (!ctx->domain_id)
523 			return -ENOMEM;
524 		break;
525 #else
526 	case Opt_fsid:
527 	case Opt_domain_id:
528 		errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
529 		break;
530 #endif
531 	default:
532 		return -ENOPARAM;
533 	}
534 	return 0;
535 }
536 
537 static struct inode *erofs_nfs_get_inode(struct super_block *sb,
538 					 u64 ino, u32 generation)
539 {
540 	return erofs_iget(sb, ino);
541 }
542 
543 static struct dentry *erofs_fh_to_dentry(struct super_block *sb,
544 		struct fid *fid, int fh_len, int fh_type)
545 {
546 	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
547 				    erofs_nfs_get_inode);
548 }
549 
550 static struct dentry *erofs_fh_to_parent(struct super_block *sb,
551 		struct fid *fid, int fh_len, int fh_type)
552 {
553 	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
554 				    erofs_nfs_get_inode);
555 }
556 
557 static struct dentry *erofs_get_parent(struct dentry *child)
558 {
559 	erofs_nid_t nid;
560 	unsigned int d_type;
561 	int err;
562 
563 	err = erofs_namei(d_inode(child), &dotdot_name, &nid, &d_type);
564 	if (err)
565 		return ERR_PTR(err);
566 	return d_obtain_alias(erofs_iget(child->d_sb, nid));
567 }
568 
569 static const struct export_operations erofs_export_ops = {
570 	.fh_to_dentry = erofs_fh_to_dentry,
571 	.fh_to_parent = erofs_fh_to_parent,
572 	.get_parent = erofs_get_parent,
573 };
574 
575 static int erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc)
576 {
577 	static const struct tree_descr empty_descr = {""};
578 
579 	return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr);
580 }
581 
582 static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
583 {
584 	struct inode *inode;
585 	struct erofs_sb_info *sbi;
586 	struct erofs_fs_context *ctx = fc->fs_private;
587 	int err;
588 
589 	sb->s_magic = EROFS_SUPER_MAGIC;
590 	sb->s_flags |= SB_RDONLY | SB_NOATIME;
591 	sb->s_maxbytes = MAX_LFS_FILESIZE;
592 	sb->s_op = &erofs_sops;
593 
594 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
595 	if (!sbi)
596 		return -ENOMEM;
597 
598 	sb->s_fs_info = sbi;
599 	sbi->opt = ctx->opt;
600 	sbi->devs = ctx->devs;
601 	ctx->devs = NULL;
602 	sbi->fsid = ctx->fsid;
603 	ctx->fsid = NULL;
604 	sbi->domain_id = ctx->domain_id;
605 	ctx->domain_id = NULL;
606 
607 	sbi->blkszbits = PAGE_SHIFT;
608 	if (erofs_is_fscache_mode(sb)) {
609 		sb->s_blocksize = PAGE_SIZE;
610 		sb->s_blocksize_bits = PAGE_SHIFT;
611 
612 		err = erofs_fscache_register_fs(sb);
613 		if (err)
614 			return err;
615 
616 		err = super_setup_bdi(sb);
617 		if (err)
618 			return err;
619 	} else {
620 		if (!sb_set_blocksize(sb, PAGE_SIZE)) {
621 			errorfc(fc, "failed to set initial blksize");
622 			return -EINVAL;
623 		}
624 
625 		sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev,
626 						  &sbi->dax_part_off,
627 						  NULL, NULL);
628 	}
629 
630 	err = erofs_read_superblock(sb);
631 	if (err)
632 		return err;
633 
634 	if (sb->s_blocksize_bits != sbi->blkszbits) {
635 		if (erofs_is_fscache_mode(sb)) {
636 			errorfc(fc, "unsupported blksize for fscache mode");
637 			return -EINVAL;
638 		}
639 		if (!sb_set_blocksize(sb, 1 << sbi->blkszbits)) {
640 			errorfc(fc, "failed to set erofs blksize");
641 			return -EINVAL;
642 		}
643 	}
644 
645 	if (test_opt(&sbi->opt, DAX_ALWAYS)) {
646 		if (!sbi->dax_dev) {
647 			errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
648 			clear_opt(&sbi->opt, DAX_ALWAYS);
649 		} else if (sbi->blkszbits != PAGE_SHIFT) {
650 			errorfc(fc, "unsupported blocksize for DAX");
651 			clear_opt(&sbi->opt, DAX_ALWAYS);
652 		}
653 	}
654 
655 	sb->s_time_gran = 1;
656 	sb->s_xattr = erofs_xattr_handlers;
657 	sb->s_export_op = &erofs_export_ops;
658 
659 	if (test_opt(&sbi->opt, POSIX_ACL))
660 		sb->s_flags |= SB_POSIXACL;
661 	else
662 		sb->s_flags &= ~SB_POSIXACL;
663 
664 #ifdef CONFIG_EROFS_FS_ZIP
665 	xa_init(&sbi->managed_pslots);
666 #endif
667 
668 	inode = erofs_iget(sb, sbi->root_nid);
669 	if (IS_ERR(inode))
670 		return PTR_ERR(inode);
671 
672 	if (!S_ISDIR(inode->i_mode)) {
673 		erofs_err(sb, "rootino(nid %llu) is not a directory(i_mode %o)",
674 			  sbi->root_nid, inode->i_mode);
675 		iput(inode);
676 		return -EINVAL;
677 	}
678 
679 	sb->s_root = d_make_root(inode);
680 	if (!sb->s_root)
681 		return -ENOMEM;
682 
683 	erofs_shrinker_register(sb);
684 	if (erofs_sb_has_fragments(sbi) && sbi->packed_nid) {
685 		sbi->packed_inode = erofs_iget(sb, sbi->packed_nid);
686 		if (IS_ERR(sbi->packed_inode)) {
687 			err = PTR_ERR(sbi->packed_inode);
688 			sbi->packed_inode = NULL;
689 			return err;
690 		}
691 	}
692 	err = erofs_init_managed_cache(sb);
693 	if (err)
694 		return err;
695 
696 	err = erofs_xattr_prefixes_init(sb);
697 	if (err)
698 		return err;
699 
700 	err = erofs_register_sysfs(sb);
701 	if (err)
702 		return err;
703 
704 	erofs_info(sb, "mounted with root inode @ nid %llu.", sbi->root_nid);
705 	return 0;
706 }
707 
708 static int erofs_fc_anon_get_tree(struct fs_context *fc)
709 {
710 	return get_tree_nodev(fc, erofs_fc_fill_pseudo_super);
711 }
712 
713 static int erofs_fc_get_tree(struct fs_context *fc)
714 {
715 	struct erofs_fs_context *ctx = fc->fs_private;
716 
717 	if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->fsid)
718 		return get_tree_nodev(fc, erofs_fc_fill_super);
719 
720 	return get_tree_bdev(fc, erofs_fc_fill_super);
721 }
722 
723 static int erofs_fc_reconfigure(struct fs_context *fc)
724 {
725 	struct super_block *sb = fc->root->d_sb;
726 	struct erofs_sb_info *sbi = EROFS_SB(sb);
727 	struct erofs_fs_context *ctx = fc->fs_private;
728 
729 	DBG_BUGON(!sb_rdonly(sb));
730 
731 	if (ctx->fsid || ctx->domain_id)
732 		erofs_info(sb, "ignoring reconfiguration for fsid|domain_id.");
733 
734 	if (test_opt(&ctx->opt, POSIX_ACL))
735 		fc->sb_flags |= SB_POSIXACL;
736 	else
737 		fc->sb_flags &= ~SB_POSIXACL;
738 
739 	sbi->opt = ctx->opt;
740 
741 	fc->sb_flags |= SB_RDONLY;
742 	return 0;
743 }
744 
745 static int erofs_release_device_info(int id, void *ptr, void *data)
746 {
747 	struct erofs_device_info *dif = ptr;
748 
749 	fs_put_dax(dif->dax_dev, NULL);
750 	if (dif->bdev_handle)
751 		bdev_release(dif->bdev_handle);
752 	erofs_fscache_unregister_cookie(dif->fscache);
753 	dif->fscache = NULL;
754 	kfree(dif->path);
755 	kfree(dif);
756 	return 0;
757 }
758 
759 static void erofs_free_dev_context(struct erofs_dev_context *devs)
760 {
761 	if (!devs)
762 		return;
763 	idr_for_each(&devs->tree, &erofs_release_device_info, NULL);
764 	idr_destroy(&devs->tree);
765 	kfree(devs);
766 }
767 
768 static void erofs_fc_free(struct fs_context *fc)
769 {
770 	struct erofs_fs_context *ctx = fc->fs_private;
771 
772 	erofs_free_dev_context(ctx->devs);
773 	kfree(ctx->fsid);
774 	kfree(ctx->domain_id);
775 	kfree(ctx);
776 }
777 
778 static const struct fs_context_operations erofs_context_ops = {
779 	.parse_param	= erofs_fc_parse_param,
780 	.get_tree       = erofs_fc_get_tree,
781 	.reconfigure    = erofs_fc_reconfigure,
782 	.free		= erofs_fc_free,
783 };
784 
785 static const struct fs_context_operations erofs_anon_context_ops = {
786 	.get_tree       = erofs_fc_anon_get_tree,
787 };
788 
789 static int erofs_init_fs_context(struct fs_context *fc)
790 {
791 	struct erofs_fs_context *ctx;
792 
793 	/* pseudo mount for anon inodes */
794 	if (fc->sb_flags & SB_KERNMOUNT) {
795 		fc->ops = &erofs_anon_context_ops;
796 		return 0;
797 	}
798 
799 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
800 	if (!ctx)
801 		return -ENOMEM;
802 	ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL);
803 	if (!ctx->devs) {
804 		kfree(ctx);
805 		return -ENOMEM;
806 	}
807 	fc->fs_private = ctx;
808 
809 	idr_init(&ctx->devs->tree);
810 	init_rwsem(&ctx->devs->rwsem);
811 	erofs_default_options(ctx);
812 	fc->ops = &erofs_context_ops;
813 	return 0;
814 }
815 
816 static void erofs_kill_sb(struct super_block *sb)
817 {
818 	struct erofs_sb_info *sbi;
819 
820 	/* pseudo mount for anon inodes */
821 	if (sb->s_flags & SB_KERNMOUNT) {
822 		kill_anon_super(sb);
823 		return;
824 	}
825 
826 	if (erofs_is_fscache_mode(sb))
827 		kill_anon_super(sb);
828 	else
829 		kill_block_super(sb);
830 
831 	sbi = EROFS_SB(sb);
832 	if (!sbi)
833 		return;
834 
835 	erofs_free_dev_context(sbi->devs);
836 	fs_put_dax(sbi->dax_dev, NULL);
837 	erofs_fscache_unregister_fs(sb);
838 	kfree(sbi->fsid);
839 	kfree(sbi->domain_id);
840 	kfree(sbi);
841 	sb->s_fs_info = NULL;
842 }
843 
844 static void erofs_put_super(struct super_block *sb)
845 {
846 	struct erofs_sb_info *const sbi = EROFS_SB(sb);
847 
848 	DBG_BUGON(!sbi);
849 
850 	erofs_unregister_sysfs(sb);
851 	erofs_shrinker_unregister(sb);
852 	erofs_xattr_prefixes_cleanup(sb);
853 #ifdef CONFIG_EROFS_FS_ZIP
854 	iput(sbi->managed_cache);
855 	sbi->managed_cache = NULL;
856 #endif
857 	iput(sbi->packed_inode);
858 	sbi->packed_inode = NULL;
859 	erofs_free_dev_context(sbi->devs);
860 	sbi->devs = NULL;
861 	erofs_fscache_unregister_fs(sb);
862 }
863 
864 struct file_system_type erofs_fs_type = {
865 	.owner          = THIS_MODULE,
866 	.name           = "erofs",
867 	.init_fs_context = erofs_init_fs_context,
868 	.kill_sb        = erofs_kill_sb,
869 	.fs_flags       = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
870 };
871 MODULE_ALIAS_FS("erofs");
872 
873 static int __init erofs_module_init(void)
874 {
875 	int err;
876 
877 	erofs_check_ondisk_layout_definitions();
878 
879 	erofs_inode_cachep = kmem_cache_create("erofs_inode",
880 			sizeof(struct erofs_inode), 0,
881 			SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
882 			erofs_inode_init_once);
883 	if (!erofs_inode_cachep)
884 		return -ENOMEM;
885 
886 	err = erofs_init_shrinker();
887 	if (err)
888 		goto shrinker_err;
889 
890 	err = z_erofs_lzma_init();
891 	if (err)
892 		goto lzma_err;
893 
894 	err = z_erofs_deflate_init();
895 	if (err)
896 		goto deflate_err;
897 
898 	erofs_pcpubuf_init();
899 	err = z_erofs_init_zip_subsystem();
900 	if (err)
901 		goto zip_err;
902 
903 	err = erofs_init_sysfs();
904 	if (err)
905 		goto sysfs_err;
906 
907 	err = register_filesystem(&erofs_fs_type);
908 	if (err)
909 		goto fs_err;
910 
911 	return 0;
912 
913 fs_err:
914 	erofs_exit_sysfs();
915 sysfs_err:
916 	z_erofs_exit_zip_subsystem();
917 zip_err:
918 	z_erofs_deflate_exit();
919 deflate_err:
920 	z_erofs_lzma_exit();
921 lzma_err:
922 	erofs_exit_shrinker();
923 shrinker_err:
924 	kmem_cache_destroy(erofs_inode_cachep);
925 	return err;
926 }
927 
928 static void __exit erofs_module_exit(void)
929 {
930 	unregister_filesystem(&erofs_fs_type);
931 
932 	/* Ensure all RCU free inodes / pclusters are safe to be destroyed. */
933 	rcu_barrier();
934 
935 	erofs_exit_sysfs();
936 	z_erofs_exit_zip_subsystem();
937 	z_erofs_deflate_exit();
938 	z_erofs_lzma_exit();
939 	erofs_exit_shrinker();
940 	kmem_cache_destroy(erofs_inode_cachep);
941 	erofs_pcpubuf_exit();
942 }
943 
944 static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
945 {
946 	struct super_block *sb = dentry->d_sb;
947 	struct erofs_sb_info *sbi = EROFS_SB(sb);
948 	u64 id = 0;
949 
950 	if (!erofs_is_fscache_mode(sb))
951 		id = huge_encode_dev(sb->s_bdev->bd_dev);
952 
953 	buf->f_type = sb->s_magic;
954 	buf->f_bsize = sb->s_blocksize;
955 	buf->f_blocks = sbi->total_blocks;
956 	buf->f_bfree = buf->f_bavail = 0;
957 
958 	buf->f_files = ULLONG_MAX;
959 	buf->f_ffree = ULLONG_MAX - sbi->inos;
960 
961 	buf->f_namelen = EROFS_NAME_LEN;
962 
963 	buf->f_fsid    = u64_to_fsid(id);
964 	return 0;
965 }
966 
967 static int erofs_show_options(struct seq_file *seq, struct dentry *root)
968 {
969 	struct erofs_sb_info *sbi = EROFS_SB(root->d_sb);
970 	struct erofs_mount_opts *opt = &sbi->opt;
971 
972 #ifdef CONFIG_EROFS_FS_XATTR
973 	if (test_opt(opt, XATTR_USER))
974 		seq_puts(seq, ",user_xattr");
975 	else
976 		seq_puts(seq, ",nouser_xattr");
977 #endif
978 #ifdef CONFIG_EROFS_FS_POSIX_ACL
979 	if (test_opt(opt, POSIX_ACL))
980 		seq_puts(seq, ",acl");
981 	else
982 		seq_puts(seq, ",noacl");
983 #endif
984 #ifdef CONFIG_EROFS_FS_ZIP
985 	if (opt->cache_strategy == EROFS_ZIP_CACHE_DISABLED)
986 		seq_puts(seq, ",cache_strategy=disabled");
987 	else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAHEAD)
988 		seq_puts(seq, ",cache_strategy=readahead");
989 	else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAROUND)
990 		seq_puts(seq, ",cache_strategy=readaround");
991 #endif
992 	if (test_opt(opt, DAX_ALWAYS))
993 		seq_puts(seq, ",dax=always");
994 	if (test_opt(opt, DAX_NEVER))
995 		seq_puts(seq, ",dax=never");
996 #ifdef CONFIG_EROFS_FS_ONDEMAND
997 	if (sbi->fsid)
998 		seq_printf(seq, ",fsid=%s", sbi->fsid);
999 	if (sbi->domain_id)
1000 		seq_printf(seq, ",domain_id=%s", sbi->domain_id);
1001 #endif
1002 	return 0;
1003 }
1004 
1005 const struct super_operations erofs_sops = {
1006 	.put_super = erofs_put_super,
1007 	.alloc_inode = erofs_alloc_inode,
1008 	.free_inode = erofs_free_inode,
1009 	.statfs = erofs_statfs,
1010 	.show_options = erofs_show_options,
1011 };
1012 
1013 module_init(erofs_module_init);
1014 module_exit(erofs_module_exit);
1015 
1016 MODULE_DESCRIPTION("Enhanced ROM File System");
1017 MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc.");
1018 MODULE_LICENSE("GPL");
1019