xref: /linux/fs/erofs/super.c (revision 8e07e0e3964ca4e23ce7b68e2096fe660a888942)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017-2018 HUAWEI, Inc.
4  *             https://www.huawei.com/
5  * Copyright (C) 2021, Alibaba Cloud
6  */
7 #include <linux/statfs.h>
8 #include <linux/seq_file.h>
9 #include <linux/crc32c.h>
10 #include <linux/fs_context.h>
11 #include <linux/fs_parser.h>
12 #include <linux/exportfs.h>
13 #include "xattr.h"
14 
15 #define CREATE_TRACE_POINTS
16 #include <trace/events/erofs.h>
17 
18 static struct kmem_cache *erofs_inode_cachep __read_mostly;
19 
20 void _erofs_err(struct super_block *sb, const char *func, const char *fmt, ...)
21 {
22 	struct va_format vaf;
23 	va_list args;
24 
25 	va_start(args, fmt);
26 
27 	vaf.fmt = fmt;
28 	vaf.va = &args;
29 
30 	pr_err("(device %s): %s: %pV", sb->s_id, func, &vaf);
31 	va_end(args);
32 }
33 
34 void _erofs_info(struct super_block *sb, const char *func, const char *fmt, ...)
35 {
36 	struct va_format vaf;
37 	va_list args;
38 
39 	va_start(args, fmt);
40 
41 	vaf.fmt = fmt;
42 	vaf.va = &args;
43 
44 	pr_info("(device %s): %pV", sb->s_id, &vaf);
45 	va_end(args);
46 }
47 
48 static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata)
49 {
50 	size_t len = 1 << EROFS_SB(sb)->blkszbits;
51 	struct erofs_super_block *dsb;
52 	u32 expected_crc, crc;
53 
54 	if (len > EROFS_SUPER_OFFSET)
55 		len -= EROFS_SUPER_OFFSET;
56 
57 	dsb = kmemdup(sbdata + EROFS_SUPER_OFFSET, len, GFP_KERNEL);
58 	if (!dsb)
59 		return -ENOMEM;
60 
61 	expected_crc = le32_to_cpu(dsb->checksum);
62 	dsb->checksum = 0;
63 	/* to allow for x86 boot sectors and other oddities. */
64 	crc = crc32c(~0, dsb, len);
65 	kfree(dsb);
66 
67 	if (crc != expected_crc) {
68 		erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected",
69 			  crc, expected_crc);
70 		return -EBADMSG;
71 	}
72 	return 0;
73 }
74 
75 static void erofs_inode_init_once(void *ptr)
76 {
77 	struct erofs_inode *vi = ptr;
78 
79 	inode_init_once(&vi->vfs_inode);
80 }
81 
82 static struct inode *erofs_alloc_inode(struct super_block *sb)
83 {
84 	struct erofs_inode *vi =
85 		alloc_inode_sb(sb, erofs_inode_cachep, GFP_KERNEL);
86 
87 	if (!vi)
88 		return NULL;
89 
90 	/* zero out everything except vfs_inode */
91 	memset(vi, 0, offsetof(struct erofs_inode, vfs_inode));
92 	return &vi->vfs_inode;
93 }
94 
95 static void erofs_free_inode(struct inode *inode)
96 {
97 	struct erofs_inode *vi = EROFS_I(inode);
98 
99 	if (inode->i_op == &erofs_fast_symlink_iops)
100 		kfree(inode->i_link);
101 	kfree(vi->xattr_shared_xattrs);
102 	kmem_cache_free(erofs_inode_cachep, vi);
103 }
104 
105 static bool check_layout_compatibility(struct super_block *sb,
106 				       struct erofs_super_block *dsb)
107 {
108 	const unsigned int feature = le32_to_cpu(dsb->feature_incompat);
109 
110 	EROFS_SB(sb)->feature_incompat = feature;
111 
112 	/* check if current kernel meets all mandatory requirements */
113 	if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) {
114 		erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel",
115 			   feature & ~EROFS_ALL_FEATURE_INCOMPAT);
116 		return false;
117 	}
118 	return true;
119 }
120 
121 /* read variable-sized metadata, offset will be aligned by 4-byte */
122 void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf,
123 			  erofs_off_t *offset, int *lengthp)
124 {
125 	u8 *buffer, *ptr;
126 	int len, i, cnt;
127 
128 	*offset = round_up(*offset, 4);
129 	ptr = erofs_bread(buf, erofs_blknr(sb, *offset), EROFS_KMAP);
130 	if (IS_ERR(ptr))
131 		return ptr;
132 
133 	len = le16_to_cpu(*(__le16 *)&ptr[erofs_blkoff(sb, *offset)]);
134 	if (!len)
135 		len = U16_MAX + 1;
136 	buffer = kmalloc(len, GFP_KERNEL);
137 	if (!buffer)
138 		return ERR_PTR(-ENOMEM);
139 	*offset += sizeof(__le16);
140 	*lengthp = len;
141 
142 	for (i = 0; i < len; i += cnt) {
143 		cnt = min_t(int, sb->s_blocksize - erofs_blkoff(sb, *offset),
144 			    len - i);
145 		ptr = erofs_bread(buf, erofs_blknr(sb, *offset), EROFS_KMAP);
146 		if (IS_ERR(ptr)) {
147 			kfree(buffer);
148 			return ptr;
149 		}
150 		memcpy(buffer + i, ptr + erofs_blkoff(sb, *offset), cnt);
151 		*offset += cnt;
152 	}
153 	return buffer;
154 }
155 
156 #ifndef CONFIG_EROFS_FS_ZIP
157 static int z_erofs_parse_cfgs(struct super_block *sb,
158 			      struct erofs_super_block *dsb)
159 {
160 	if (!dsb->u1.available_compr_algs)
161 		return 0;
162 
163 	erofs_err(sb, "compression disabled, unable to mount compressed EROFS");
164 	return -EOPNOTSUPP;
165 }
166 #endif
167 
168 static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
169 			     struct erofs_device_info *dif, erofs_off_t *pos)
170 {
171 	struct erofs_sb_info *sbi = EROFS_SB(sb);
172 	struct erofs_fscache *fscache;
173 	struct erofs_deviceslot *dis;
174 	struct bdev_handle *bdev_handle;
175 	void *ptr;
176 
177 	ptr = erofs_read_metabuf(buf, sb, erofs_blknr(sb, *pos), EROFS_KMAP);
178 	if (IS_ERR(ptr))
179 		return PTR_ERR(ptr);
180 	dis = ptr + erofs_blkoff(sb, *pos);
181 
182 	if (!sbi->devs->flatdev && !dif->path) {
183 		if (!dis->tag[0]) {
184 			erofs_err(sb, "empty device tag @ pos %llu", *pos);
185 			return -EINVAL;
186 		}
187 		dif->path = kmemdup_nul(dis->tag, sizeof(dis->tag), GFP_KERNEL);
188 		if (!dif->path)
189 			return -ENOMEM;
190 	}
191 
192 	if (erofs_is_fscache_mode(sb)) {
193 		fscache = erofs_fscache_register_cookie(sb, dif->path, 0);
194 		if (IS_ERR(fscache))
195 			return PTR_ERR(fscache);
196 		dif->fscache = fscache;
197 	} else if (!sbi->devs->flatdev) {
198 		bdev_handle = bdev_open_by_path(dif->path, BLK_OPEN_READ,
199 						sb->s_type, NULL);
200 		if (IS_ERR(bdev_handle))
201 			return PTR_ERR(bdev_handle);
202 		dif->bdev_handle = bdev_handle;
203 		dif->dax_dev = fs_dax_get_by_bdev(bdev_handle->bdev,
204 				&dif->dax_part_off, NULL, NULL);
205 	}
206 
207 	dif->blocks = le32_to_cpu(dis->blocks);
208 	dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr);
209 	sbi->total_blocks += dif->blocks;
210 	*pos += EROFS_DEVT_SLOT_SIZE;
211 	return 0;
212 }
213 
214 static int erofs_scan_devices(struct super_block *sb,
215 			      struct erofs_super_block *dsb)
216 {
217 	struct erofs_sb_info *sbi = EROFS_SB(sb);
218 	unsigned int ondisk_extradevs;
219 	erofs_off_t pos;
220 	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
221 	struct erofs_device_info *dif;
222 	int id, err = 0;
223 
224 	sbi->total_blocks = sbi->primarydevice_blocks;
225 	if (!erofs_sb_has_device_table(sbi))
226 		ondisk_extradevs = 0;
227 	else
228 		ondisk_extradevs = le16_to_cpu(dsb->extra_devices);
229 
230 	if (sbi->devs->extra_devices &&
231 	    ondisk_extradevs != sbi->devs->extra_devices) {
232 		erofs_err(sb, "extra devices don't match (ondisk %u, given %u)",
233 			  ondisk_extradevs, sbi->devs->extra_devices);
234 		return -EINVAL;
235 	}
236 	if (!ondisk_extradevs)
237 		return 0;
238 
239 	if (!sbi->devs->extra_devices && !erofs_is_fscache_mode(sb))
240 		sbi->devs->flatdev = true;
241 
242 	sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1;
243 	pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE;
244 	down_read(&sbi->devs->rwsem);
245 	if (sbi->devs->extra_devices) {
246 		idr_for_each_entry(&sbi->devs->tree, dif, id) {
247 			err = erofs_init_device(&buf, sb, dif, &pos);
248 			if (err)
249 				break;
250 		}
251 	} else {
252 		for (id = 0; id < ondisk_extradevs; id++) {
253 			dif = kzalloc(sizeof(*dif), GFP_KERNEL);
254 			if (!dif) {
255 				err = -ENOMEM;
256 				break;
257 			}
258 
259 			err = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL);
260 			if (err < 0) {
261 				kfree(dif);
262 				break;
263 			}
264 			++sbi->devs->extra_devices;
265 
266 			err = erofs_init_device(&buf, sb, dif, &pos);
267 			if (err)
268 				break;
269 		}
270 	}
271 	up_read(&sbi->devs->rwsem);
272 	erofs_put_metabuf(&buf);
273 	return err;
274 }
275 
276 static int erofs_read_superblock(struct super_block *sb)
277 {
278 	struct erofs_sb_info *sbi;
279 	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
280 	struct erofs_super_block *dsb;
281 	void *data;
282 	int ret;
283 
284 	data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP);
285 	if (IS_ERR(data)) {
286 		erofs_err(sb, "cannot read erofs superblock");
287 		return PTR_ERR(data);
288 	}
289 
290 	sbi = EROFS_SB(sb);
291 	dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET);
292 
293 	ret = -EINVAL;
294 	if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) {
295 		erofs_err(sb, "cannot find valid erofs superblock");
296 		goto out;
297 	}
298 
299 	sbi->blkszbits  = dsb->blkszbits;
300 	if (sbi->blkszbits < 9 || sbi->blkszbits > PAGE_SHIFT) {
301 		erofs_err(sb, "blkszbits %u isn't supported", sbi->blkszbits);
302 		goto out;
303 	}
304 	if (dsb->dirblkbits) {
305 		erofs_err(sb, "dirblkbits %u isn't supported", dsb->dirblkbits);
306 		goto out;
307 	}
308 
309 	sbi->feature_compat = le32_to_cpu(dsb->feature_compat);
310 	if (erofs_sb_has_sb_chksum(sbi)) {
311 		ret = erofs_superblock_csum_verify(sb, data);
312 		if (ret)
313 			goto out;
314 	}
315 
316 	ret = -EINVAL;
317 	if (!check_layout_compatibility(sb, dsb))
318 		goto out;
319 
320 	sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE;
321 	if (sbi->sb_size > PAGE_SIZE - EROFS_SUPER_OFFSET) {
322 		erofs_err(sb, "invalid sb_extslots %u (more than a fs block)",
323 			  sbi->sb_size);
324 		goto out;
325 	}
326 	sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks);
327 	sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
328 #ifdef CONFIG_EROFS_FS_XATTR
329 	sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
330 	sbi->xattr_prefix_start = le32_to_cpu(dsb->xattr_prefix_start);
331 	sbi->xattr_prefix_count = dsb->xattr_prefix_count;
332 	sbi->xattr_filter_reserved = dsb->xattr_filter_reserved;
333 #endif
334 	sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact));
335 	sbi->root_nid = le16_to_cpu(dsb->root_nid);
336 	sbi->packed_nid = le64_to_cpu(dsb->packed_nid);
337 	sbi->inos = le64_to_cpu(dsb->inos);
338 
339 	sbi->build_time = le64_to_cpu(dsb->build_time);
340 	sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
341 
342 	memcpy(&sb->s_uuid, dsb->uuid, sizeof(dsb->uuid));
343 
344 	ret = strscpy(sbi->volume_name, dsb->volume_name,
345 		      sizeof(dsb->volume_name));
346 	if (ret < 0) {	/* -E2BIG */
347 		erofs_err(sb, "bad volume name without NIL terminator");
348 		ret = -EFSCORRUPTED;
349 		goto out;
350 	}
351 
352 	/* parse on-disk compression configurations */
353 	ret = z_erofs_parse_cfgs(sb, dsb);
354 	if (ret < 0)
355 		goto out;
356 
357 	/* handle multiple devices */
358 	ret = erofs_scan_devices(sb, dsb);
359 
360 	if (erofs_is_fscache_mode(sb))
361 		erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!");
362 out:
363 	erofs_put_metabuf(&buf);
364 	return ret;
365 }
366 
367 static void erofs_default_options(struct erofs_fs_context *ctx)
368 {
369 #ifdef CONFIG_EROFS_FS_ZIP
370 	ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND;
371 	ctx->opt.max_sync_decompress_pages = 3;
372 	ctx->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO;
373 #endif
374 #ifdef CONFIG_EROFS_FS_XATTR
375 	set_opt(&ctx->opt, XATTR_USER);
376 #endif
377 #ifdef CONFIG_EROFS_FS_POSIX_ACL
378 	set_opt(&ctx->opt, POSIX_ACL);
379 #endif
380 }
381 
382 enum {
383 	Opt_user_xattr,
384 	Opt_acl,
385 	Opt_cache_strategy,
386 	Opt_dax,
387 	Opt_dax_enum,
388 	Opt_device,
389 	Opt_fsid,
390 	Opt_domain_id,
391 	Opt_err
392 };
393 
394 static const struct constant_table erofs_param_cache_strategy[] = {
395 	{"disabled",	EROFS_ZIP_CACHE_DISABLED},
396 	{"readahead",	EROFS_ZIP_CACHE_READAHEAD},
397 	{"readaround",	EROFS_ZIP_CACHE_READAROUND},
398 	{}
399 };
400 
401 static const struct constant_table erofs_dax_param_enums[] = {
402 	{"always",	EROFS_MOUNT_DAX_ALWAYS},
403 	{"never",	EROFS_MOUNT_DAX_NEVER},
404 	{}
405 };
406 
407 static const struct fs_parameter_spec erofs_fs_parameters[] = {
408 	fsparam_flag_no("user_xattr",	Opt_user_xattr),
409 	fsparam_flag_no("acl",		Opt_acl),
410 	fsparam_enum("cache_strategy",	Opt_cache_strategy,
411 		     erofs_param_cache_strategy),
412 	fsparam_flag("dax",             Opt_dax),
413 	fsparam_enum("dax",		Opt_dax_enum, erofs_dax_param_enums),
414 	fsparam_string("device",	Opt_device),
415 	fsparam_string("fsid",		Opt_fsid),
416 	fsparam_string("domain_id",	Opt_domain_id),
417 	{}
418 };
419 
420 static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
421 {
422 #ifdef CONFIG_FS_DAX
423 	struct erofs_fs_context *ctx = fc->fs_private;
424 
425 	switch (mode) {
426 	case EROFS_MOUNT_DAX_ALWAYS:
427 		warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
428 		set_opt(&ctx->opt, DAX_ALWAYS);
429 		clear_opt(&ctx->opt, DAX_NEVER);
430 		return true;
431 	case EROFS_MOUNT_DAX_NEVER:
432 		set_opt(&ctx->opt, DAX_NEVER);
433 		clear_opt(&ctx->opt, DAX_ALWAYS);
434 		return true;
435 	default:
436 		DBG_BUGON(1);
437 		return false;
438 	}
439 #else
440 	errorfc(fc, "dax options not supported");
441 	return false;
442 #endif
443 }
444 
445 static int erofs_fc_parse_param(struct fs_context *fc,
446 				struct fs_parameter *param)
447 {
448 	struct erofs_fs_context *ctx = fc->fs_private;
449 	struct fs_parse_result result;
450 	struct erofs_device_info *dif;
451 	int opt, ret;
452 
453 	opt = fs_parse(fc, erofs_fs_parameters, param, &result);
454 	if (opt < 0)
455 		return opt;
456 
457 	switch (opt) {
458 	case Opt_user_xattr:
459 #ifdef CONFIG_EROFS_FS_XATTR
460 		if (result.boolean)
461 			set_opt(&ctx->opt, XATTR_USER);
462 		else
463 			clear_opt(&ctx->opt, XATTR_USER);
464 #else
465 		errorfc(fc, "{,no}user_xattr options not supported");
466 #endif
467 		break;
468 	case Opt_acl:
469 #ifdef CONFIG_EROFS_FS_POSIX_ACL
470 		if (result.boolean)
471 			set_opt(&ctx->opt, POSIX_ACL);
472 		else
473 			clear_opt(&ctx->opt, POSIX_ACL);
474 #else
475 		errorfc(fc, "{,no}acl options not supported");
476 #endif
477 		break;
478 	case Opt_cache_strategy:
479 #ifdef CONFIG_EROFS_FS_ZIP
480 		ctx->opt.cache_strategy = result.uint_32;
481 #else
482 		errorfc(fc, "compression not supported, cache_strategy ignored");
483 #endif
484 		break;
485 	case Opt_dax:
486 		if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS))
487 			return -EINVAL;
488 		break;
489 	case Opt_dax_enum:
490 		if (!erofs_fc_set_dax_mode(fc, result.uint_32))
491 			return -EINVAL;
492 		break;
493 	case Opt_device:
494 		dif = kzalloc(sizeof(*dif), GFP_KERNEL);
495 		if (!dif)
496 			return -ENOMEM;
497 		dif->path = kstrdup(param->string, GFP_KERNEL);
498 		if (!dif->path) {
499 			kfree(dif);
500 			return -ENOMEM;
501 		}
502 		down_write(&ctx->devs->rwsem);
503 		ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL);
504 		up_write(&ctx->devs->rwsem);
505 		if (ret < 0) {
506 			kfree(dif->path);
507 			kfree(dif);
508 			return ret;
509 		}
510 		++ctx->devs->extra_devices;
511 		break;
512 #ifdef CONFIG_EROFS_FS_ONDEMAND
513 	case Opt_fsid:
514 		kfree(ctx->fsid);
515 		ctx->fsid = kstrdup(param->string, GFP_KERNEL);
516 		if (!ctx->fsid)
517 			return -ENOMEM;
518 		break;
519 	case Opt_domain_id:
520 		kfree(ctx->domain_id);
521 		ctx->domain_id = kstrdup(param->string, GFP_KERNEL);
522 		if (!ctx->domain_id)
523 			return -ENOMEM;
524 		break;
525 #else
526 	case Opt_fsid:
527 	case Opt_domain_id:
528 		errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
529 		break;
530 #endif
531 	default:
532 		return -ENOPARAM;
533 	}
534 	return 0;
535 }
536 
537 static struct inode *erofs_nfs_get_inode(struct super_block *sb,
538 					 u64 ino, u32 generation)
539 {
540 	return erofs_iget(sb, ino);
541 }
542 
543 static struct dentry *erofs_fh_to_dentry(struct super_block *sb,
544 		struct fid *fid, int fh_len, int fh_type)
545 {
546 	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
547 				    erofs_nfs_get_inode);
548 }
549 
550 static struct dentry *erofs_fh_to_parent(struct super_block *sb,
551 		struct fid *fid, int fh_len, int fh_type)
552 {
553 	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
554 				    erofs_nfs_get_inode);
555 }
556 
557 static struct dentry *erofs_get_parent(struct dentry *child)
558 {
559 	erofs_nid_t nid;
560 	unsigned int d_type;
561 	int err;
562 
563 	err = erofs_namei(d_inode(child), &dotdot_name, &nid, &d_type);
564 	if (err)
565 		return ERR_PTR(err);
566 	return d_obtain_alias(erofs_iget(child->d_sb, nid));
567 }
568 
569 static const struct export_operations erofs_export_ops = {
570 	.encode_fh = generic_encode_ino32_fh,
571 	.fh_to_dentry = erofs_fh_to_dentry,
572 	.fh_to_parent = erofs_fh_to_parent,
573 	.get_parent = erofs_get_parent,
574 };
575 
576 static int erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc)
577 {
578 	static const struct tree_descr empty_descr = {""};
579 
580 	return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr);
581 }
582 
583 static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
584 {
585 	struct inode *inode;
586 	struct erofs_sb_info *sbi;
587 	struct erofs_fs_context *ctx = fc->fs_private;
588 	int err;
589 
590 	sb->s_magic = EROFS_SUPER_MAGIC;
591 	sb->s_flags |= SB_RDONLY | SB_NOATIME;
592 	sb->s_maxbytes = MAX_LFS_FILESIZE;
593 	sb->s_op = &erofs_sops;
594 
595 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
596 	if (!sbi)
597 		return -ENOMEM;
598 
599 	sb->s_fs_info = sbi;
600 	sbi->opt = ctx->opt;
601 	sbi->devs = ctx->devs;
602 	ctx->devs = NULL;
603 	sbi->fsid = ctx->fsid;
604 	ctx->fsid = NULL;
605 	sbi->domain_id = ctx->domain_id;
606 	ctx->domain_id = NULL;
607 
608 	sbi->blkszbits = PAGE_SHIFT;
609 	if (erofs_is_fscache_mode(sb)) {
610 		sb->s_blocksize = PAGE_SIZE;
611 		sb->s_blocksize_bits = PAGE_SHIFT;
612 
613 		err = erofs_fscache_register_fs(sb);
614 		if (err)
615 			return err;
616 
617 		err = super_setup_bdi(sb);
618 		if (err)
619 			return err;
620 	} else {
621 		if (!sb_set_blocksize(sb, PAGE_SIZE)) {
622 			errorfc(fc, "failed to set initial blksize");
623 			return -EINVAL;
624 		}
625 
626 		sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev,
627 						  &sbi->dax_part_off,
628 						  NULL, NULL);
629 	}
630 
631 	err = erofs_read_superblock(sb);
632 	if (err)
633 		return err;
634 
635 	if (sb->s_blocksize_bits != sbi->blkszbits) {
636 		if (erofs_is_fscache_mode(sb)) {
637 			errorfc(fc, "unsupported blksize for fscache mode");
638 			return -EINVAL;
639 		}
640 		if (!sb_set_blocksize(sb, 1 << sbi->blkszbits)) {
641 			errorfc(fc, "failed to set erofs blksize");
642 			return -EINVAL;
643 		}
644 	}
645 
646 	if (test_opt(&sbi->opt, DAX_ALWAYS)) {
647 		if (!sbi->dax_dev) {
648 			errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
649 			clear_opt(&sbi->opt, DAX_ALWAYS);
650 		} else if (sbi->blkszbits != PAGE_SHIFT) {
651 			errorfc(fc, "unsupported blocksize for DAX");
652 			clear_opt(&sbi->opt, DAX_ALWAYS);
653 		}
654 	}
655 
656 	sb->s_time_gran = 1;
657 	sb->s_xattr = erofs_xattr_handlers;
658 	sb->s_export_op = &erofs_export_ops;
659 
660 	if (test_opt(&sbi->opt, POSIX_ACL))
661 		sb->s_flags |= SB_POSIXACL;
662 	else
663 		sb->s_flags &= ~SB_POSIXACL;
664 
665 #ifdef CONFIG_EROFS_FS_ZIP
666 	xa_init(&sbi->managed_pslots);
667 #endif
668 
669 	inode = erofs_iget(sb, sbi->root_nid);
670 	if (IS_ERR(inode))
671 		return PTR_ERR(inode);
672 
673 	if (!S_ISDIR(inode->i_mode)) {
674 		erofs_err(sb, "rootino(nid %llu) is not a directory(i_mode %o)",
675 			  sbi->root_nid, inode->i_mode);
676 		iput(inode);
677 		return -EINVAL;
678 	}
679 
680 	sb->s_root = d_make_root(inode);
681 	if (!sb->s_root)
682 		return -ENOMEM;
683 
684 	erofs_shrinker_register(sb);
685 	if (erofs_sb_has_fragments(sbi) && sbi->packed_nid) {
686 		sbi->packed_inode = erofs_iget(sb, sbi->packed_nid);
687 		if (IS_ERR(sbi->packed_inode)) {
688 			err = PTR_ERR(sbi->packed_inode);
689 			sbi->packed_inode = NULL;
690 			return err;
691 		}
692 	}
693 	err = erofs_init_managed_cache(sb);
694 	if (err)
695 		return err;
696 
697 	err = erofs_xattr_prefixes_init(sb);
698 	if (err)
699 		return err;
700 
701 	err = erofs_register_sysfs(sb);
702 	if (err)
703 		return err;
704 
705 	erofs_info(sb, "mounted with root inode @ nid %llu.", sbi->root_nid);
706 	return 0;
707 }
708 
709 static int erofs_fc_anon_get_tree(struct fs_context *fc)
710 {
711 	return get_tree_nodev(fc, erofs_fc_fill_pseudo_super);
712 }
713 
714 static int erofs_fc_get_tree(struct fs_context *fc)
715 {
716 	struct erofs_fs_context *ctx = fc->fs_private;
717 
718 	if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->fsid)
719 		return get_tree_nodev(fc, erofs_fc_fill_super);
720 
721 	return get_tree_bdev(fc, erofs_fc_fill_super);
722 }
723 
724 static int erofs_fc_reconfigure(struct fs_context *fc)
725 {
726 	struct super_block *sb = fc->root->d_sb;
727 	struct erofs_sb_info *sbi = EROFS_SB(sb);
728 	struct erofs_fs_context *ctx = fc->fs_private;
729 
730 	DBG_BUGON(!sb_rdonly(sb));
731 
732 	if (ctx->fsid || ctx->domain_id)
733 		erofs_info(sb, "ignoring reconfiguration for fsid|domain_id.");
734 
735 	if (test_opt(&ctx->opt, POSIX_ACL))
736 		fc->sb_flags |= SB_POSIXACL;
737 	else
738 		fc->sb_flags &= ~SB_POSIXACL;
739 
740 	sbi->opt = ctx->opt;
741 
742 	fc->sb_flags |= SB_RDONLY;
743 	return 0;
744 }
745 
746 static int erofs_release_device_info(int id, void *ptr, void *data)
747 {
748 	struct erofs_device_info *dif = ptr;
749 
750 	fs_put_dax(dif->dax_dev, NULL);
751 	if (dif->bdev_handle)
752 		bdev_release(dif->bdev_handle);
753 	erofs_fscache_unregister_cookie(dif->fscache);
754 	dif->fscache = NULL;
755 	kfree(dif->path);
756 	kfree(dif);
757 	return 0;
758 }
759 
760 static void erofs_free_dev_context(struct erofs_dev_context *devs)
761 {
762 	if (!devs)
763 		return;
764 	idr_for_each(&devs->tree, &erofs_release_device_info, NULL);
765 	idr_destroy(&devs->tree);
766 	kfree(devs);
767 }
768 
769 static void erofs_fc_free(struct fs_context *fc)
770 {
771 	struct erofs_fs_context *ctx = fc->fs_private;
772 
773 	erofs_free_dev_context(ctx->devs);
774 	kfree(ctx->fsid);
775 	kfree(ctx->domain_id);
776 	kfree(ctx);
777 }
778 
779 static const struct fs_context_operations erofs_context_ops = {
780 	.parse_param	= erofs_fc_parse_param,
781 	.get_tree       = erofs_fc_get_tree,
782 	.reconfigure    = erofs_fc_reconfigure,
783 	.free		= erofs_fc_free,
784 };
785 
786 static const struct fs_context_operations erofs_anon_context_ops = {
787 	.get_tree       = erofs_fc_anon_get_tree,
788 };
789 
790 static int erofs_init_fs_context(struct fs_context *fc)
791 {
792 	struct erofs_fs_context *ctx;
793 
794 	/* pseudo mount for anon inodes */
795 	if (fc->sb_flags & SB_KERNMOUNT) {
796 		fc->ops = &erofs_anon_context_ops;
797 		return 0;
798 	}
799 
800 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
801 	if (!ctx)
802 		return -ENOMEM;
803 	ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL);
804 	if (!ctx->devs) {
805 		kfree(ctx);
806 		return -ENOMEM;
807 	}
808 	fc->fs_private = ctx;
809 
810 	idr_init(&ctx->devs->tree);
811 	init_rwsem(&ctx->devs->rwsem);
812 	erofs_default_options(ctx);
813 	fc->ops = &erofs_context_ops;
814 	return 0;
815 }
816 
817 static void erofs_kill_sb(struct super_block *sb)
818 {
819 	struct erofs_sb_info *sbi;
820 
821 	/* pseudo mount for anon inodes */
822 	if (sb->s_flags & SB_KERNMOUNT) {
823 		kill_anon_super(sb);
824 		return;
825 	}
826 
827 	if (erofs_is_fscache_mode(sb))
828 		kill_anon_super(sb);
829 	else
830 		kill_block_super(sb);
831 
832 	sbi = EROFS_SB(sb);
833 	if (!sbi)
834 		return;
835 
836 	erofs_free_dev_context(sbi->devs);
837 	fs_put_dax(sbi->dax_dev, NULL);
838 	erofs_fscache_unregister_fs(sb);
839 	kfree(sbi->fsid);
840 	kfree(sbi->domain_id);
841 	kfree(sbi);
842 	sb->s_fs_info = NULL;
843 }
844 
845 static void erofs_put_super(struct super_block *sb)
846 {
847 	struct erofs_sb_info *const sbi = EROFS_SB(sb);
848 
849 	DBG_BUGON(!sbi);
850 
851 	erofs_unregister_sysfs(sb);
852 	erofs_shrinker_unregister(sb);
853 	erofs_xattr_prefixes_cleanup(sb);
854 #ifdef CONFIG_EROFS_FS_ZIP
855 	iput(sbi->managed_cache);
856 	sbi->managed_cache = NULL;
857 #endif
858 	iput(sbi->packed_inode);
859 	sbi->packed_inode = NULL;
860 	erofs_free_dev_context(sbi->devs);
861 	sbi->devs = NULL;
862 	erofs_fscache_unregister_fs(sb);
863 }
864 
865 struct file_system_type erofs_fs_type = {
866 	.owner          = THIS_MODULE,
867 	.name           = "erofs",
868 	.init_fs_context = erofs_init_fs_context,
869 	.kill_sb        = erofs_kill_sb,
870 	.fs_flags       = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
871 };
872 MODULE_ALIAS_FS("erofs");
873 
874 static int __init erofs_module_init(void)
875 {
876 	int err;
877 
878 	erofs_check_ondisk_layout_definitions();
879 
880 	erofs_inode_cachep = kmem_cache_create("erofs_inode",
881 			sizeof(struct erofs_inode), 0,
882 			SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
883 			erofs_inode_init_once);
884 	if (!erofs_inode_cachep)
885 		return -ENOMEM;
886 
887 	err = erofs_init_shrinker();
888 	if (err)
889 		goto shrinker_err;
890 
891 	err = z_erofs_lzma_init();
892 	if (err)
893 		goto lzma_err;
894 
895 	err = z_erofs_deflate_init();
896 	if (err)
897 		goto deflate_err;
898 
899 	erofs_pcpubuf_init();
900 	err = z_erofs_init_zip_subsystem();
901 	if (err)
902 		goto zip_err;
903 
904 	err = erofs_init_sysfs();
905 	if (err)
906 		goto sysfs_err;
907 
908 	err = register_filesystem(&erofs_fs_type);
909 	if (err)
910 		goto fs_err;
911 
912 	return 0;
913 
914 fs_err:
915 	erofs_exit_sysfs();
916 sysfs_err:
917 	z_erofs_exit_zip_subsystem();
918 zip_err:
919 	z_erofs_deflate_exit();
920 deflate_err:
921 	z_erofs_lzma_exit();
922 lzma_err:
923 	erofs_exit_shrinker();
924 shrinker_err:
925 	kmem_cache_destroy(erofs_inode_cachep);
926 	return err;
927 }
928 
929 static void __exit erofs_module_exit(void)
930 {
931 	unregister_filesystem(&erofs_fs_type);
932 
933 	/* Ensure all RCU free inodes / pclusters are safe to be destroyed. */
934 	rcu_barrier();
935 
936 	erofs_exit_sysfs();
937 	z_erofs_exit_zip_subsystem();
938 	z_erofs_deflate_exit();
939 	z_erofs_lzma_exit();
940 	erofs_exit_shrinker();
941 	kmem_cache_destroy(erofs_inode_cachep);
942 	erofs_pcpubuf_exit();
943 }
944 
945 static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
946 {
947 	struct super_block *sb = dentry->d_sb;
948 	struct erofs_sb_info *sbi = EROFS_SB(sb);
949 	u64 id = 0;
950 
951 	if (!erofs_is_fscache_mode(sb))
952 		id = huge_encode_dev(sb->s_bdev->bd_dev);
953 
954 	buf->f_type = sb->s_magic;
955 	buf->f_bsize = sb->s_blocksize;
956 	buf->f_blocks = sbi->total_blocks;
957 	buf->f_bfree = buf->f_bavail = 0;
958 
959 	buf->f_files = ULLONG_MAX;
960 	buf->f_ffree = ULLONG_MAX - sbi->inos;
961 
962 	buf->f_namelen = EROFS_NAME_LEN;
963 
964 	buf->f_fsid    = u64_to_fsid(id);
965 	return 0;
966 }
967 
968 static int erofs_show_options(struct seq_file *seq, struct dentry *root)
969 {
970 	struct erofs_sb_info *sbi = EROFS_SB(root->d_sb);
971 	struct erofs_mount_opts *opt = &sbi->opt;
972 
973 #ifdef CONFIG_EROFS_FS_XATTR
974 	if (test_opt(opt, XATTR_USER))
975 		seq_puts(seq, ",user_xattr");
976 	else
977 		seq_puts(seq, ",nouser_xattr");
978 #endif
979 #ifdef CONFIG_EROFS_FS_POSIX_ACL
980 	if (test_opt(opt, POSIX_ACL))
981 		seq_puts(seq, ",acl");
982 	else
983 		seq_puts(seq, ",noacl");
984 #endif
985 #ifdef CONFIG_EROFS_FS_ZIP
986 	if (opt->cache_strategy == EROFS_ZIP_CACHE_DISABLED)
987 		seq_puts(seq, ",cache_strategy=disabled");
988 	else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAHEAD)
989 		seq_puts(seq, ",cache_strategy=readahead");
990 	else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAROUND)
991 		seq_puts(seq, ",cache_strategy=readaround");
992 #endif
993 	if (test_opt(opt, DAX_ALWAYS))
994 		seq_puts(seq, ",dax=always");
995 	if (test_opt(opt, DAX_NEVER))
996 		seq_puts(seq, ",dax=never");
997 #ifdef CONFIG_EROFS_FS_ONDEMAND
998 	if (sbi->fsid)
999 		seq_printf(seq, ",fsid=%s", sbi->fsid);
1000 	if (sbi->domain_id)
1001 		seq_printf(seq, ",domain_id=%s", sbi->domain_id);
1002 #endif
1003 	return 0;
1004 }
1005 
1006 const struct super_operations erofs_sops = {
1007 	.put_super = erofs_put_super,
1008 	.alloc_inode = erofs_alloc_inode,
1009 	.free_inode = erofs_free_inode,
1010 	.statfs = erofs_statfs,
1011 	.show_options = erofs_show_options,
1012 };
1013 
1014 module_init(erofs_module_init);
1015 module_exit(erofs_module_exit);
1016 
1017 MODULE_DESCRIPTION("Enhanced ROM File System");
1018 MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc.");
1019 MODULE_LICENSE("GPL");
1020