1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017-2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 * Copyright (C) 2021, Alibaba Cloud 6 */ 7 #include <linux/statfs.h> 8 #include <linux/seq_file.h> 9 #include <linux/crc32c.h> 10 #include <linux/fs_context.h> 11 #include <linux/fs_parser.h> 12 #include <linux/exportfs.h> 13 #include <linux/backing-dev.h> 14 #include <linux/pseudo_fs.h> 15 #include "xattr.h" 16 17 #define CREATE_TRACE_POINTS 18 #include <trace/events/erofs.h> 19 20 static struct kmem_cache *erofs_inode_cachep __read_mostly; 21 22 void _erofs_printk(struct super_block *sb, const char *fmt, ...) 23 { 24 struct va_format vaf; 25 va_list args; 26 int level; 27 28 va_start(args, fmt); 29 30 level = printk_get_level(fmt); 31 vaf.fmt = printk_skip_level(fmt); 32 vaf.va = &args; 33 if (sb) 34 printk("%c%cerofs (device %s): %pV", 35 KERN_SOH_ASCII, level, sb->s_id, &vaf); 36 else 37 printk("%c%cerofs: %pV", KERN_SOH_ASCII, level, &vaf); 38 va_end(args); 39 } 40 41 static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata) 42 { 43 struct erofs_super_block *dsb = sbdata + EROFS_SUPER_OFFSET; 44 u32 len = 1 << EROFS_SB(sb)->blkszbits, crc; 45 46 if (len > EROFS_SUPER_OFFSET) 47 len -= EROFS_SUPER_OFFSET; 48 len -= offsetof(struct erofs_super_block, checksum) + 49 sizeof(dsb->checksum); 50 51 /* skip .magic(pre-verified) and .checksum(0) fields */ 52 crc = crc32c(0x5045B54A, (&dsb->checksum) + 1, len); 53 if (crc == le32_to_cpu(dsb->checksum)) 54 return 0; 55 erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected", 56 crc, le32_to_cpu(dsb->checksum)); 57 return -EBADMSG; 58 } 59 60 static void erofs_inode_init_once(void *ptr) 61 { 62 struct erofs_inode *vi = ptr; 63 64 inode_init_once(&vi->vfs_inode); 65 } 66 67 static struct inode *erofs_alloc_inode(struct super_block *sb) 68 { 69 struct erofs_inode *vi = 70 alloc_inode_sb(sb, erofs_inode_cachep, GFP_KERNEL); 71 72 if (!vi) 73 return NULL; 74 75 /* zero out everything except vfs_inode */ 76 memset(vi, 0, offsetof(struct erofs_inode, vfs_inode)); 77 return &vi->vfs_inode; 78 } 79 80 static void erofs_free_inode(struct inode *inode) 81 { 82 struct erofs_inode *vi = EROFS_I(inode); 83 84 if (inode->i_op == &erofs_fast_symlink_iops) 85 kfree(inode->i_link); 86 kfree(vi->xattr_shared_xattrs); 87 kmem_cache_free(erofs_inode_cachep, vi); 88 } 89 90 /* read variable-sized metadata, offset will be aligned by 4-byte */ 91 void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, 92 erofs_off_t *offset, int *lengthp) 93 { 94 u8 *buffer, *ptr; 95 int len, i, cnt; 96 97 *offset = round_up(*offset, 4); 98 ptr = erofs_bread(buf, *offset, true); 99 if (IS_ERR(ptr)) 100 return ptr; 101 102 len = le16_to_cpu(*(__le16 *)ptr); 103 if (!len) 104 len = U16_MAX + 1; 105 buffer = kmalloc(len, GFP_KERNEL); 106 if (!buffer) 107 return ERR_PTR(-ENOMEM); 108 *offset += sizeof(__le16); 109 *lengthp = len; 110 111 for (i = 0; i < len; i += cnt) { 112 cnt = min_t(int, sb->s_blocksize - erofs_blkoff(sb, *offset), 113 len - i); 114 ptr = erofs_bread(buf, *offset, true); 115 if (IS_ERR(ptr)) { 116 kfree(buffer); 117 return ptr; 118 } 119 memcpy(buffer + i, ptr, cnt); 120 *offset += cnt; 121 } 122 return buffer; 123 } 124 125 static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, 126 struct erofs_device_info *dif, erofs_off_t *pos) 127 { 128 struct erofs_sb_info *sbi = EROFS_SB(sb); 129 struct erofs_deviceslot *dis; 130 struct file *file; 131 bool _48bit; 132 133 dis = erofs_read_metabuf(buf, sb, *pos, false); 134 if (IS_ERR(dis)) 135 return PTR_ERR(dis); 136 137 if (!sbi->devs->flatdev && !dif->path) { 138 if (!dis->tag[0]) { 139 erofs_err(sb, "empty device tag @ pos %llu", *pos); 140 return -EINVAL; 141 } 142 dif->path = kmemdup_nul(dis->tag, sizeof(dis->tag), GFP_KERNEL); 143 if (!dif->path) 144 return -ENOMEM; 145 } 146 147 if (!sbi->devs->flatdev) { 148 file = erofs_is_fileio_mode(sbi) ? 149 filp_open(dif->path, O_RDONLY | O_LARGEFILE, 0) : 150 bdev_file_open_by_path(dif->path, 151 BLK_OPEN_READ, sb->s_type, NULL); 152 if (IS_ERR(file)) { 153 if (file == ERR_PTR(-ENOTBLK)) 154 return -EINVAL; 155 return PTR_ERR(file); 156 } 157 158 if (!erofs_is_fileio_mode(sbi)) { 159 dif->dax_dev = fs_dax_get_by_bdev(file_bdev(file), 160 &dif->dax_part_off, NULL, NULL); 161 } else if (!S_ISREG(file_inode(file)->i_mode)) { 162 fput(file); 163 return -EINVAL; 164 } 165 if (!dif->dax_dev && test_opt(&sbi->opt, DAX_ALWAYS)) { 166 erofs_info(sb, "DAX unsupported by %s. Turning off DAX.", 167 dif->path); 168 clear_opt(&sbi->opt, DAX_ALWAYS); 169 } 170 dif->file = file; 171 } 172 173 _48bit = erofs_sb_has_48bit(sbi); 174 dif->blocks = le32_to_cpu(dis->blocks_lo) | 175 (_48bit ? (u64)le16_to_cpu(dis->blocks_hi) << 32 : 0); 176 dif->uniaddr = le32_to_cpu(dis->uniaddr_lo) | 177 (_48bit ? (u64)le16_to_cpu(dis->uniaddr_hi) << 32 : 0); 178 sbi->total_blocks += dif->blocks; 179 *pos += EROFS_DEVT_SLOT_SIZE; 180 return 0; 181 } 182 183 static int erofs_scan_devices(struct super_block *sb, 184 struct erofs_super_block *dsb) 185 { 186 struct erofs_sb_info *sbi = EROFS_SB(sb); 187 unsigned int ondisk_extradevs; 188 erofs_off_t pos; 189 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 190 struct erofs_device_info *dif; 191 int id, err = 0; 192 193 sbi->total_blocks = sbi->dif0.blocks; 194 if (!erofs_sb_has_device_table(sbi)) 195 ondisk_extradevs = 0; 196 else 197 ondisk_extradevs = le16_to_cpu(dsb->extra_devices); 198 199 if (sbi->devs->extra_devices && 200 ondisk_extradevs != sbi->devs->extra_devices) { 201 erofs_err(sb, "extra devices don't match (ondisk %u, given %u)", 202 ondisk_extradevs, sbi->devs->extra_devices); 203 return -EINVAL; 204 } 205 206 if (test_opt(&sbi->opt, DAX_ALWAYS) && !sbi->dif0.dax_dev) { 207 erofs_info(sb, "DAX unsupported by block device. Turning off DAX."); 208 clear_opt(&sbi->opt, DAX_ALWAYS); 209 } 210 if (!ondisk_extradevs) 211 return 0; 212 213 if (!sbi->devs->extra_devices) 214 sbi->devs->flatdev = true; 215 216 sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1; 217 pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE; 218 down_read(&sbi->devs->rwsem); 219 if (sbi->devs->extra_devices) { 220 idr_for_each_entry(&sbi->devs->tree, dif, id) { 221 err = erofs_init_device(&buf, sb, dif, &pos); 222 if (err) 223 break; 224 } 225 } else { 226 for (id = 0; id < ondisk_extradevs; id++) { 227 dif = kzalloc_obj(*dif); 228 if (!dif) { 229 err = -ENOMEM; 230 break; 231 } 232 233 err = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL); 234 if (err < 0) { 235 kfree(dif); 236 break; 237 } 238 ++sbi->devs->extra_devices; 239 240 err = erofs_init_device(&buf, sb, dif, &pos); 241 if (err) 242 break; 243 } 244 } 245 up_read(&sbi->devs->rwsem); 246 erofs_put_metabuf(&buf); 247 return err; 248 } 249 250 static int erofs_read_superblock(struct super_block *sb) 251 { 252 struct erofs_sb_info *sbi = EROFS_SB(sb); 253 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 254 struct erofs_super_block *dsb; 255 void *data; 256 int ret; 257 258 data = erofs_read_metabuf(&buf, sb, 0, false); 259 if (IS_ERR(data)) { 260 erofs_err(sb, "cannot read erofs superblock"); 261 return PTR_ERR(data); 262 } 263 264 dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); 265 ret = -EINVAL; 266 if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) { 267 erofs_err(sb, "cannot find valid erofs superblock"); 268 goto out; 269 } 270 271 sbi->blkszbits = dsb->blkszbits; 272 if (sbi->blkszbits < 9 || sbi->blkszbits > PAGE_SHIFT) { 273 erofs_err(sb, "blkszbits %u isn't supported", sbi->blkszbits); 274 goto out; 275 } 276 if (dsb->dirblkbits) { 277 erofs_err(sb, "dirblkbits %u isn't supported", dsb->dirblkbits); 278 goto out; 279 } 280 281 sbi->feature_compat = le32_to_cpu(dsb->feature_compat); 282 if (erofs_sb_has_sb_chksum(sbi)) { 283 ret = erofs_superblock_csum_verify(sb, data); 284 if (ret) 285 goto out; 286 } 287 288 ret = -EINVAL; 289 sbi->feature_incompat = le32_to_cpu(dsb->feature_incompat); 290 if (sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT) { 291 erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel", 292 sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT); 293 goto out; 294 } 295 296 sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE; 297 if (sbi->sb_size > PAGE_SIZE - EROFS_SUPER_OFFSET) { 298 erofs_err(sb, "invalid sb_extslots %u (more than a fs block)", 299 sbi->sb_size); 300 goto out; 301 } 302 sbi->dif0.blocks = le32_to_cpu(dsb->blocks_lo); 303 sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); 304 #ifdef CONFIG_EROFS_FS_XATTR 305 sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); 306 sbi->xattr_prefix_start = le32_to_cpu(dsb->xattr_prefix_start); 307 sbi->xattr_prefix_count = dsb->xattr_prefix_count; 308 sbi->xattr_filter_reserved = dsb->xattr_filter_reserved; 309 if (erofs_sb_has_ishare_xattrs(sbi)) { 310 if (dsb->ishare_xattr_prefix_id >= sbi->xattr_prefix_count) { 311 erofs_err(sb, "invalid ishare xattr prefix id %u", 312 dsb->ishare_xattr_prefix_id); 313 ret = -EFSCORRUPTED; 314 goto out; 315 } 316 sbi->ishare_xattr_prefix_id = dsb->ishare_xattr_prefix_id; 317 } 318 #endif 319 sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); 320 if (erofs_sb_has_48bit(sbi) && dsb->rootnid_8b) { 321 sbi->root_nid = le64_to_cpu(dsb->rootnid_8b); 322 sbi->dif0.blocks = sbi->dif0.blocks | 323 ((u64)le16_to_cpu(dsb->rb.blocks_hi) << 32); 324 } else { 325 sbi->root_nid = le16_to_cpu(dsb->rb.rootnid_2b); 326 } 327 sbi->packed_nid = le64_to_cpu(dsb->packed_nid); 328 if (erofs_sb_has_metabox(sbi)) { 329 ret = -EFSCORRUPTED; 330 if (sbi->sb_size <= offsetof(struct erofs_super_block, 331 metabox_nid)) 332 goto out; 333 sbi->metabox_nid = le64_to_cpu(dsb->metabox_nid); 334 if (sbi->metabox_nid & BIT_ULL(EROFS_DIRENT_NID_METABOX_BIT)) 335 goto out; /* self-loop detection */ 336 } 337 sbi->inos = le64_to_cpu(dsb->inos); 338 339 sbi->epoch = (s64)le64_to_cpu(dsb->epoch); 340 sbi->fixed_nsec = le32_to_cpu(dsb->fixed_nsec); 341 super_set_uuid(sb, (void *)dsb->uuid, sizeof(dsb->uuid)); 342 343 if (dsb->volume_name[0]) { 344 sbi->volume_name = kstrndup(dsb->volume_name, 345 sizeof(dsb->volume_name), GFP_KERNEL); 346 if (!sbi->volume_name) { 347 ret = -ENOMEM; 348 goto out; 349 } 350 } 351 352 if (IS_ENABLED(CONFIG_EROFS_FS_ZIP)) { 353 ret = z_erofs_parse_cfgs(sb, dsb); 354 if (ret < 0) 355 goto out; 356 } else if (dsb->u1.available_compr_algs || 357 erofs_sb_has_lz4_0padding(sbi)) { 358 erofs_err(sb, "compression disabled, unable to mount compressed EROFS"); 359 ret = -EOPNOTSUPP; 360 goto out; 361 } 362 363 ret = erofs_scan_devices(sb, dsb); 364 365 if (erofs_sb_has_48bit(sbi)) 366 erofs_info(sb, "EXPERIMENTAL 48-bit layout support in use. Use at your own risk!"); 367 if (erofs_sb_has_metabox(sbi)) 368 erofs_info(sb, "EXPERIMENTAL metadata compression support in use. Use at your own risk!"); 369 out: 370 erofs_put_metabuf(&buf); 371 return ret; 372 } 373 374 static void erofs_default_options(struct erofs_sb_info *sbi) 375 { 376 #ifdef CONFIG_EROFS_FS_ZIP 377 sbi->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND; 378 sbi->sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO; 379 #endif 380 if (IS_ENABLED(CONFIG_EROFS_FS_XATTR)) 381 set_opt(&sbi->opt, XATTR_USER); 382 if (IS_ENABLED(CONFIG_EROFS_FS_POSIX_ACL)) 383 set_opt(&sbi->opt, POSIX_ACL); 384 } 385 386 enum { 387 Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum, 388 Opt_device, Opt_domain_id, Opt_directio, Opt_fsoffset, Opt_inode_share, 389 }; 390 391 static const struct constant_table erofs_param_cache_strategy[] = { 392 {"disabled", EROFS_ZIP_CACHE_DISABLED}, 393 {"readahead", EROFS_ZIP_CACHE_READAHEAD}, 394 {"readaround", EROFS_ZIP_CACHE_READAROUND}, 395 {} 396 }; 397 398 static const struct constant_table erofs_dax_param_enums[] = { 399 {"always", EROFS_MOUNT_DAX_ALWAYS}, 400 {"never", EROFS_MOUNT_DAX_NEVER}, 401 {} 402 }; 403 404 static const struct fs_parameter_spec erofs_fs_parameters[] = { 405 fsparam_flag_no("user_xattr", Opt_user_xattr), 406 fsparam_flag_no("acl", Opt_acl), 407 fsparam_enum("cache_strategy", Opt_cache_strategy, 408 erofs_param_cache_strategy), 409 fsparam_flag("dax", Opt_dax), 410 fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums), 411 fsparam_string("device", Opt_device), 412 fsparam_string("domain_id", Opt_domain_id), 413 fsparam_flag_no("directio", Opt_directio), 414 fsparam_u64("fsoffset", Opt_fsoffset), 415 fsparam_flag("inode_share", Opt_inode_share), 416 {} 417 }; 418 419 static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) 420 { 421 if (IS_ENABLED(CONFIG_FS_DAX)) { 422 struct erofs_sb_info *sbi = fc->s_fs_info; 423 424 if (mode == EROFS_MOUNT_DAX_ALWAYS) { 425 set_opt(&sbi->opt, DAX_ALWAYS); 426 clear_opt(&sbi->opt, DAX_NEVER); 427 return true; 428 } else if (mode == EROFS_MOUNT_DAX_NEVER) { 429 set_opt(&sbi->opt, DAX_NEVER); 430 clear_opt(&sbi->opt, DAX_ALWAYS); 431 return true; 432 } 433 DBG_BUGON(1); 434 return false; 435 } 436 errorfc(fc, "dax options not supported"); 437 return false; 438 } 439 440 static int erofs_fc_parse_param(struct fs_context *fc, 441 struct fs_parameter *param) 442 { 443 struct erofs_sb_info *sbi = fc->s_fs_info; 444 struct fs_parse_result result; 445 struct erofs_device_info *dif; 446 int opt, ret; 447 448 opt = fs_parse(fc, erofs_fs_parameters, param, &result); 449 if (opt < 0) 450 return opt; 451 452 switch (opt) { 453 case Opt_user_xattr: 454 if (!IS_ENABLED(CONFIG_EROFS_FS_XATTR)) 455 errorfc(fc, "{,no}user_xattr options not supported"); 456 else if (result.boolean) 457 set_opt(&sbi->opt, XATTR_USER); 458 else 459 clear_opt(&sbi->opt, XATTR_USER); 460 break; 461 case Opt_acl: 462 if (!IS_ENABLED(CONFIG_EROFS_FS_POSIX_ACL)) 463 errorfc(fc, "{,no}acl options not supported"); 464 else if (result.boolean) 465 set_opt(&sbi->opt, POSIX_ACL); 466 else 467 clear_opt(&sbi->opt, POSIX_ACL); 468 break; 469 case Opt_cache_strategy: 470 if (!IS_ENABLED(CONFIG_EROFS_FS_ZIP)) 471 errorfc(fc, "compression not supported, cache_strategy ignored"); 472 else 473 sbi->opt.cache_strategy = result.uint_32; 474 break; 475 case Opt_dax: 476 if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS)) 477 return -EINVAL; 478 break; 479 case Opt_dax_enum: 480 if (!erofs_fc_set_dax_mode(fc, result.uint_32)) 481 return -EINVAL; 482 break; 483 case Opt_device: 484 dif = kzalloc_obj(*dif); 485 if (!dif) 486 return -ENOMEM; 487 dif->path = kstrdup(param->string, GFP_KERNEL); 488 if (!dif->path) { 489 kfree(dif); 490 return -ENOMEM; 491 } 492 down_write(&sbi->devs->rwsem); 493 ret = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL); 494 up_write(&sbi->devs->rwsem); 495 if (ret < 0) { 496 kfree(dif->path); 497 kfree(dif); 498 return ret; 499 } 500 ++sbi->devs->extra_devices; 501 break; 502 case Opt_domain_id: 503 if (!IS_ENABLED(CONFIG_EROFS_FS_PAGE_CACHE_SHARE)) { 504 errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); 505 } else { 506 kfree_sensitive(sbi->domain_id); 507 sbi->domain_id = no_free_ptr(param->string); 508 } 509 break; 510 case Opt_directio: 511 if (!IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE)) 512 errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); 513 else if (result.boolean) 514 set_opt(&sbi->opt, DIRECT_IO); 515 else 516 clear_opt(&sbi->opt, DIRECT_IO); 517 break; 518 case Opt_fsoffset: 519 sbi->dif0.fsoff = result.uint_64; 520 break; 521 case Opt_inode_share: 522 if (!IS_ENABLED(CONFIG_EROFS_FS_PAGE_CACHE_SHARE)) 523 errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); 524 else 525 set_opt(&sbi->opt, INODE_SHARE); 526 break; 527 } 528 return 0; 529 } 530 531 static int erofs_encode_fh(struct inode *inode, u32 *fh, int *max_len, 532 struct inode *parent) 533 { 534 erofs_nid_t nid = EROFS_I(inode)->nid; 535 int len = parent ? 6 : 3; 536 537 if (*max_len < len) { 538 *max_len = len; 539 return FILEID_INVALID; 540 } 541 542 fh[0] = (u32)(nid >> 32); 543 fh[1] = (u32)(nid & 0xffffffff); 544 fh[2] = inode->i_generation; 545 546 if (parent) { 547 nid = EROFS_I(parent)->nid; 548 549 fh[3] = (u32)(nid >> 32); 550 fh[4] = (u32)(nid & 0xffffffff); 551 fh[5] = parent->i_generation; 552 } 553 554 *max_len = len; 555 return parent ? FILEID_INO64_GEN_PARENT : FILEID_INO64_GEN; 556 } 557 558 static struct dentry *erofs_fh_to_dentry(struct super_block *sb, 559 struct fid *fid, int fh_len, int fh_type) 560 { 561 if ((fh_type != FILEID_INO64_GEN && 562 fh_type != FILEID_INO64_GEN_PARENT) || fh_len < 3) 563 return NULL; 564 565 return d_obtain_alias(erofs_iget(sb, 566 ((u64)fid->raw[0] << 32) | fid->raw[1])); 567 } 568 569 static struct dentry *erofs_fh_to_parent(struct super_block *sb, 570 struct fid *fid, int fh_len, int fh_type) 571 { 572 if (fh_type != FILEID_INO64_GEN_PARENT || fh_len < 6) 573 return NULL; 574 575 return d_obtain_alias(erofs_iget(sb, 576 ((u64)fid->raw[3] << 32) | fid->raw[4])); 577 } 578 579 static struct dentry *erofs_get_parent(struct dentry *child) 580 { 581 erofs_nid_t nid; 582 unsigned int d_type; 583 int err; 584 585 err = erofs_namei(d_inode(child), &dotdot_name, &nid, &d_type); 586 if (err) 587 return ERR_PTR(err); 588 return d_obtain_alias(erofs_iget(child->d_sb, nid)); 589 } 590 591 static const struct export_operations erofs_export_ops = { 592 .encode_fh = erofs_encode_fh, 593 .fh_to_dentry = erofs_fh_to_dentry, 594 .fh_to_parent = erofs_fh_to_parent, 595 .get_parent = erofs_get_parent, 596 }; 597 598 static void erofs_set_sysfs_name(struct super_block *sb) 599 { 600 struct erofs_sb_info *sbi = EROFS_SB(sb); 601 602 if (erofs_is_fileio_mode(sbi)) 603 super_set_sysfs_name_generic(sb, "%s", 604 bdi_dev_name(sb->s_bdi)); 605 else 606 super_set_sysfs_name_id(sb); 607 } 608 609 static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) 610 { 611 struct inode *inode; 612 struct erofs_sb_info *sbi = EROFS_SB(sb); 613 int err; 614 615 sb->s_magic = EROFS_SUPER_MAGIC; 616 sb->s_flags |= SB_RDONLY | SB_NOATIME; 617 sb->s_maxbytes = MAX_LFS_FILESIZE; 618 sb->s_op = &erofs_sops; 619 620 if (!sbi->domain_id && test_opt(&sbi->opt, INODE_SHARE)) { 621 errorfc(fc, "domain_id is needed when inode_ishare is on"); 622 return -EINVAL; 623 } 624 if (test_opt(&sbi->opt, DAX_ALWAYS) && test_opt(&sbi->opt, INODE_SHARE)) { 625 errorfc(fc, "FSDAX is not allowed when inode_ishare is on"); 626 return -EINVAL; 627 } 628 629 sbi->blkszbits = PAGE_SHIFT; 630 if (!sb->s_bdev) { 631 /* 632 * (File-backed mounts) EROFS claims it's safe to nest other 633 * fs contexts (including its own) due to self-controlled RO 634 * accesses/contexts and no side-effect changes that need to 635 * context save & restore so it can reuse the current thread 636 * context. 637 * However, we still need to prevent kernel stack overflow due 638 * to filesystem nesting: just ensure that s_stack_depth is 0 639 * to disallow mounting EROFS on stacked filesystems. 640 * Note: s_stack_depth is not incremented here for now, since 641 * EROFS is the only fs supporting file-backed mounts for now. 642 * It MUST change if another fs plans to support them, which 643 * may also require adjusting FILESYSTEM_MAX_STACK_DEPTH. 644 */ 645 if (erofs_is_fileio_mode(sbi)) { 646 inode = file_inode(sbi->dif0.file); 647 if ((inode->i_sb->s_op == &erofs_sops && 648 !inode->i_sb->s_bdev) || 649 inode->i_sb->s_stack_depth) { 650 erofs_err(sb, "file-backed mounts cannot be applied to stacked fses"); 651 return -ENOTBLK; 652 } 653 } 654 sb->s_blocksize = PAGE_SIZE; 655 sb->s_blocksize_bits = PAGE_SHIFT; 656 657 err = super_setup_bdi(sb); 658 if (err) 659 return err; 660 } else { 661 if (!sb_set_blocksize(sb, PAGE_SIZE)) { 662 errorfc(fc, "failed to set initial blksize"); 663 return -EINVAL; 664 } 665 666 sbi->dif0.dax_dev = fs_dax_get_by_bdev(sb->s_bdev, 667 &sbi->dif0.dax_part_off, NULL, NULL); 668 } 669 670 err = erofs_read_superblock(sb); 671 if (err) 672 return err; 673 674 if (sb->s_blocksize_bits != sbi->blkszbits) { 675 if (erofs_is_fileio_mode(sbi)) { 676 sb->s_blocksize = 1 << sbi->blkszbits; 677 sb->s_blocksize_bits = sbi->blkszbits; 678 } else if (!sb_set_blocksize(sb, 1 << sbi->blkszbits)) { 679 errorfc(fc, "failed to set erofs blksize"); 680 return -EINVAL; 681 } 682 } 683 if (sbi->dif0.fsoff & (sb->s_blocksize - 1)) 684 return invalfc(fc, "fsoffset %llu is not aligned to block size %lu", 685 sbi->dif0.fsoff, sb->s_blocksize); 686 687 if (test_opt(&sbi->opt, DAX_ALWAYS) && sbi->blkszbits != PAGE_SHIFT) { 688 erofs_info(sb, "unsupported blocksize for DAX"); 689 clear_opt(&sbi->opt, DAX_ALWAYS); 690 } 691 if (test_opt(&sbi->opt, INODE_SHARE) && !erofs_sb_has_ishare_xattrs(sbi)) { 692 erofs_info(sb, "on-disk ishare xattrs not found. Turning off inode_share."); 693 clear_opt(&sbi->opt, INODE_SHARE); 694 } 695 if (test_opt(&sbi->opt, INODE_SHARE)) 696 erofs_info(sb, "EXPERIMENTAL EROFS page cache share support in use. Use at your own risk!"); 697 698 sb->s_time_gran = 1; 699 sb->s_xattr = erofs_xattr_handlers; 700 sb->s_export_op = &erofs_export_ops; 701 702 if (test_opt(&sbi->opt, POSIX_ACL)) 703 sb->s_flags |= SB_POSIXACL; 704 else 705 sb->s_flags &= ~SB_POSIXACL; 706 707 err = z_erofs_init_super(sb); 708 if (err) 709 return err; 710 711 if (erofs_sb_has_fragments(sbi) && sbi->packed_nid) { 712 inode = erofs_iget(sb, sbi->packed_nid); 713 if (IS_ERR(inode)) 714 return PTR_ERR(inode); 715 sbi->packed_inode = inode; 716 } 717 if (erofs_sb_has_metabox(sbi)) { 718 inode = erofs_iget(sb, sbi->metabox_nid); 719 if (IS_ERR(inode)) 720 return PTR_ERR(inode); 721 sbi->metabox_inode = inode; 722 } 723 724 inode = erofs_iget(sb, sbi->root_nid); 725 if (IS_ERR(inode)) 726 return PTR_ERR(inode); 727 728 if (!S_ISDIR(inode->i_mode)) { 729 erofs_err(sb, "rootino(nid %llu) is not a directory(i_mode %o)", 730 sbi->root_nid, inode->i_mode); 731 iput(inode); 732 return -EINVAL; 733 } 734 sb->s_root = d_make_root(inode); 735 if (!sb->s_root) 736 return -ENOMEM; 737 738 erofs_shrinker_register(sb); 739 err = erofs_xattr_prefixes_init(sb); 740 if (err) 741 return err; 742 743 erofs_set_sysfs_name(sb); 744 err = erofs_register_sysfs(sb); 745 if (err) 746 return err; 747 748 sbi->dir_ra_bytes = EROFS_DIR_RA_BYTES; 749 erofs_info(sb, "mounted with root inode @ nid %llu.", sbi->root_nid); 750 return 0; 751 } 752 753 static int erofs_fc_get_tree(struct fs_context *fc) 754 { 755 int ret; 756 757 ret = get_tree_bdev_flags(fc, erofs_fc_fill_super, 758 IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) ? 759 GET_TREE_BDEV_QUIET_LOOKUP : 0); 760 if (IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && ret == -ENOTBLK) { 761 struct erofs_sb_info *sbi = fc->s_fs_info; 762 struct file *file; 763 764 if (!fc->source) 765 return invalf(fc, "No source specified"); 766 file = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0); 767 if (IS_ERR(file)) 768 return PTR_ERR(file); 769 sbi->dif0.file = file; 770 771 if (S_ISREG(file_inode(sbi->dif0.file)->i_mode) && 772 sbi->dif0.file->f_mapping->a_ops->read_folio) 773 return get_tree_nodev(fc, erofs_fc_fill_super); 774 } 775 return ret; 776 } 777 778 static int erofs_fc_reconfigure(struct fs_context *fc) 779 { 780 struct super_block *sb = fc->root->d_sb; 781 struct erofs_sb_info *sbi = EROFS_SB(sb); 782 struct erofs_sb_info *new_sbi = fc->s_fs_info; 783 784 DBG_BUGON(!sb_rdonly(sb)); 785 786 if (new_sbi->domain_id) 787 erofs_info(sb, "ignoring reconfiguration for domain_id."); 788 789 if (test_opt(&new_sbi->opt, POSIX_ACL)) 790 fc->sb_flags |= SB_POSIXACL; 791 else 792 fc->sb_flags &= ~SB_POSIXACL; 793 794 sbi->opt = new_sbi->opt; 795 796 fc->sb_flags |= SB_RDONLY; 797 return 0; 798 } 799 800 static int erofs_release_device_info(int id, void *ptr, void *data) 801 { 802 struct erofs_device_info *dif = ptr; 803 804 fs_put_dax(dif->dax_dev, NULL); 805 if (dif->file) 806 fput(dif->file); 807 kfree(dif->path); 808 kfree(dif); 809 return 0; 810 } 811 812 static void erofs_free_dev_context(struct erofs_dev_context *devs) 813 { 814 if (!devs) 815 return; 816 idr_for_each(&devs->tree, &erofs_release_device_info, NULL); 817 idr_destroy(&devs->tree); 818 kfree(devs); 819 } 820 821 static void erofs_sb_free(struct erofs_sb_info *sbi) 822 { 823 erofs_free_dev_context(sbi->devs); 824 kfree_sensitive(sbi->domain_id); 825 if (sbi->dif0.file) 826 fput(sbi->dif0.file); 827 kfree(sbi->volume_name); 828 kfree(sbi); 829 } 830 831 static void erofs_fc_free(struct fs_context *fc) 832 { 833 struct erofs_sb_info *sbi = fc->s_fs_info; 834 835 if (sbi) /* free here if an error occurs before transferring to sb */ 836 erofs_sb_free(sbi); 837 } 838 839 static const struct fs_context_operations erofs_context_ops = { 840 .parse_param = erofs_fc_parse_param, 841 .get_tree = erofs_fc_get_tree, 842 .reconfigure = erofs_fc_reconfigure, 843 .free = erofs_fc_free, 844 }; 845 846 static int erofs_init_fs_context(struct fs_context *fc) 847 { 848 struct erofs_sb_info *sbi; 849 850 sbi = kzalloc_obj(*sbi); 851 if (!sbi) 852 return -ENOMEM; 853 854 sbi->devs = kzalloc_obj(struct erofs_dev_context); 855 if (!sbi->devs) { 856 kfree(sbi); 857 return -ENOMEM; 858 } 859 fc->s_fs_info = sbi; 860 861 idr_init(&sbi->devs->tree); 862 init_rwsem(&sbi->devs->rwsem); 863 erofs_default_options(sbi); 864 fc->ops = &erofs_context_ops; 865 return 0; 866 } 867 868 static void erofs_drop_internal_inodes(struct erofs_sb_info *sbi) 869 { 870 iput(sbi->packed_inode); 871 sbi->packed_inode = NULL; 872 iput(sbi->metabox_inode); 873 sbi->metabox_inode = NULL; 874 #ifdef CONFIG_EROFS_FS_ZIP 875 iput(sbi->managed_cache); 876 sbi->managed_cache = NULL; 877 #endif 878 } 879 880 static void erofs_kill_sb(struct super_block *sb) 881 { 882 struct erofs_sb_info *sbi = EROFS_SB(sb); 883 884 if (sbi->dif0.file) 885 kill_anon_super(sb); 886 else 887 kill_block_super(sb); 888 erofs_drop_internal_inodes(sbi); 889 fs_put_dax(sbi->dif0.dax_dev, NULL); 890 erofs_sb_free(sbi); 891 sb->s_fs_info = NULL; 892 } 893 894 static void erofs_put_super(struct super_block *sb) 895 { 896 struct erofs_sb_info *const sbi = EROFS_SB(sb); 897 898 erofs_unregister_sysfs(sb); 899 erofs_shrinker_unregister(sb); 900 erofs_xattr_prefixes_cleanup(sb); 901 erofs_drop_internal_inodes(sbi); 902 erofs_free_dev_context(sbi->devs); 903 sbi->devs = NULL; 904 } 905 906 static struct file_system_type erofs_fs_type = { 907 .owner = THIS_MODULE, 908 .name = "erofs", 909 .init_fs_context = erofs_init_fs_context, 910 .kill_sb = erofs_kill_sb, 911 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, 912 }; 913 MODULE_ALIAS_FS("erofs"); 914 915 #ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE 916 static void erofs_free_anon_inode(struct inode *inode) 917 { 918 struct erofs_inode *vi = EROFS_I(inode); 919 920 kfree(vi->fingerprint.opaque); 921 kmem_cache_free(erofs_inode_cachep, vi); 922 } 923 924 static const struct super_operations erofs_anon_sops = { 925 .alloc_inode = erofs_alloc_inode, 926 .drop_inode = inode_just_drop, 927 .free_inode = erofs_free_anon_inode, 928 }; 929 930 static int erofs_anon_init_fs_context(struct fs_context *fc) 931 { 932 struct pseudo_fs_context *ctx; 933 934 ctx = init_pseudo(fc, EROFS_SUPER_MAGIC); 935 if (!ctx) 936 return -ENOMEM; 937 ctx->ops = &erofs_anon_sops; 938 return 0; 939 } 940 941 struct file_system_type erofs_anon_fs_type = { 942 .name = "pseudo_erofs", 943 .init_fs_context = erofs_anon_init_fs_context, 944 .kill_sb = kill_anon_super, 945 }; 946 #endif 947 948 static int __init erofs_module_init(void) 949 { 950 int err; 951 952 erofs_check_ondisk_layout_definitions(); 953 954 erofs_inode_cachep = kmem_cache_create("erofs_inode", 955 sizeof(struct erofs_inode), 0, 956 SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT, 957 erofs_inode_init_once); 958 if (!erofs_inode_cachep) 959 return -ENOMEM; 960 961 err = erofs_init_shrinker(); 962 if (err) 963 goto shrinker_err; 964 965 err = z_erofs_init_subsystem(); 966 if (err) 967 goto zip_err; 968 969 err = erofs_init_sysfs(); 970 if (err) 971 goto sysfs_err; 972 973 err = erofs_init_ishare(); 974 if (err) 975 goto ishare_err; 976 977 err = register_filesystem(&erofs_fs_type); 978 if (err) 979 goto fs_err; 980 981 return 0; 982 983 fs_err: 984 erofs_exit_ishare(); 985 ishare_err: 986 erofs_exit_sysfs(); 987 sysfs_err: 988 z_erofs_exit_subsystem(); 989 zip_err: 990 erofs_exit_shrinker(); 991 shrinker_err: 992 kmem_cache_destroy(erofs_inode_cachep); 993 return err; 994 } 995 996 static void __exit erofs_module_exit(void) 997 { 998 unregister_filesystem(&erofs_fs_type); 999 erofs_exit_ishare(); 1000 1001 /* ensure all delayed rcu free inodes & pclusters are flushed */ 1002 rcu_barrier(); 1003 1004 erofs_exit_sysfs(); 1005 z_erofs_exit_subsystem(); 1006 erofs_exit_shrinker(); 1007 kmem_cache_destroy(erofs_inode_cachep); 1008 } 1009 1010 static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) 1011 { 1012 struct super_block *sb = dentry->d_sb; 1013 struct erofs_sb_info *sbi = EROFS_SB(sb); 1014 1015 buf->f_type = sb->s_magic; 1016 buf->f_bsize = sb->s_blocksize; 1017 buf->f_blocks = sbi->total_blocks; 1018 buf->f_bfree = buf->f_bavail = 0; 1019 buf->f_files = ULLONG_MAX; 1020 buf->f_ffree = ULLONG_MAX - sbi->inos; 1021 buf->f_namelen = EROFS_NAME_LEN; 1022 1023 if (uuid_is_null(&sb->s_uuid)) 1024 buf->f_fsid = u64_to_fsid(!sb->s_bdev ? 0 : 1025 huge_encode_dev(sb->s_bdev->bd_dev)); 1026 else 1027 buf->f_fsid = uuid_to_fsid(sb->s_uuid.b); 1028 return 0; 1029 } 1030 1031 static int erofs_show_options(struct seq_file *seq, struct dentry *root) 1032 { 1033 struct erofs_sb_info *sbi = EROFS_SB(root->d_sb); 1034 struct erofs_mount_opts *opt = &sbi->opt; 1035 1036 if (IS_ENABLED(CONFIG_EROFS_FS_XATTR)) 1037 seq_puts(seq, test_opt(opt, XATTR_USER) ? 1038 ",user_xattr" : ",nouser_xattr"); 1039 if (IS_ENABLED(CONFIG_EROFS_FS_POSIX_ACL)) 1040 seq_puts(seq, test_opt(opt, POSIX_ACL) ? ",acl" : ",noacl"); 1041 if (IS_ENABLED(CONFIG_EROFS_FS_ZIP)) 1042 seq_printf(seq, ",cache_strategy=%s", 1043 erofs_param_cache_strategy[opt->cache_strategy].name); 1044 if (test_opt(opt, DAX_ALWAYS)) 1045 seq_puts(seq, ",dax=always"); 1046 if (test_opt(opt, DAX_NEVER)) 1047 seq_puts(seq, ",dax=never"); 1048 if (erofs_is_fileio_mode(sbi) && test_opt(opt, DIRECT_IO)) 1049 seq_puts(seq, ",directio"); 1050 if (sbi->dif0.fsoff) 1051 seq_printf(seq, ",fsoffset=%llu", sbi->dif0.fsoff); 1052 if (test_opt(opt, INODE_SHARE)) 1053 seq_puts(seq, ",inode_share"); 1054 return 0; 1055 } 1056 1057 static void erofs_evict_inode(struct inode *inode) 1058 { 1059 if (IS_DAX(inode)) 1060 dax_break_layout_final(inode); 1061 erofs_ishare_free_inode(inode); 1062 truncate_inode_pages_final(&inode->i_data); 1063 clear_inode(inode); 1064 } 1065 1066 const struct super_operations erofs_sops = { 1067 .put_super = erofs_put_super, 1068 .alloc_inode = erofs_alloc_inode, 1069 .free_inode = erofs_free_inode, 1070 .evict_inode = erofs_evict_inode, 1071 .statfs = erofs_statfs, 1072 .show_options = erofs_show_options, 1073 }; 1074 1075 module_init(erofs_module_init); 1076 module_exit(erofs_module_exit); 1077 1078 MODULE_DESCRIPTION("Enhanced ROM File System"); 1079 MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc."); 1080 MODULE_LICENSE("GPL"); 1081