1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017-2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 * Copyright (C) 2021, Alibaba Cloud 6 */ 7 #include <linux/statfs.h> 8 #include <linux/seq_file.h> 9 #include <linux/crc32c.h> 10 #include <linux/fs_context.h> 11 #include <linux/fs_parser.h> 12 #include <linux/exportfs.h> 13 #include <linux/backing-dev.h> 14 #include <linux/pseudo_fs.h> 15 #include "xattr.h" 16 17 #define CREATE_TRACE_POINTS 18 #include <trace/events/erofs.h> 19 20 static struct kmem_cache *erofs_inode_cachep __read_mostly; 21 22 void _erofs_printk(struct super_block *sb, const char *fmt, ...) 23 { 24 struct va_format vaf; 25 va_list args; 26 int level; 27 28 va_start(args, fmt); 29 30 level = printk_get_level(fmt); 31 vaf.fmt = printk_skip_level(fmt); 32 vaf.va = &args; 33 if (sb) 34 printk("%c%cerofs (device %s): %pV", 35 KERN_SOH_ASCII, level, sb->s_id, &vaf); 36 else 37 printk("%c%cerofs: %pV", KERN_SOH_ASCII, level, &vaf); 38 va_end(args); 39 } 40 41 static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata) 42 { 43 struct erofs_super_block *dsb = sbdata + EROFS_SUPER_OFFSET; 44 u32 len = 1 << EROFS_SB(sb)->blkszbits, crc; 45 46 if (len > EROFS_SUPER_OFFSET) 47 len -= EROFS_SUPER_OFFSET; 48 len -= offsetof(struct erofs_super_block, checksum) + 49 sizeof(dsb->checksum); 50 51 /* skip .magic(pre-verified) and .checksum(0) fields */ 52 crc = crc32c(0x5045B54A, (&dsb->checksum) + 1, len); 53 if (crc == le32_to_cpu(dsb->checksum)) 54 return 0; 55 erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected", 56 crc, le32_to_cpu(dsb->checksum)); 57 return -EBADMSG; 58 } 59 60 static void erofs_inode_init_once(void *ptr) 61 { 62 struct erofs_inode *vi = ptr; 63 64 inode_init_once(&vi->vfs_inode); 65 } 66 67 static struct inode *erofs_alloc_inode(struct super_block *sb) 68 { 69 struct erofs_inode *vi = 70 alloc_inode_sb(sb, erofs_inode_cachep, GFP_KERNEL); 71 72 if (!vi) 73 return NULL; 74 75 /* zero out everything except vfs_inode */ 76 memset(vi, 0, offsetof(struct erofs_inode, vfs_inode)); 77 return &vi->vfs_inode; 78 } 79 80 static void erofs_free_inode(struct inode *inode) 81 { 82 struct erofs_inode *vi = EROFS_I(inode); 83 84 if (inode->i_op == &erofs_fast_symlink_iops) 85 kfree(inode->i_link); 86 kfree(vi->xattr_shared_xattrs); 87 kmem_cache_free(erofs_inode_cachep, vi); 88 } 89 90 /* read variable-sized metadata, offset will be aligned by 4-byte */ 91 void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, 92 erofs_off_t *offset, int *lengthp) 93 { 94 u8 *buffer, *ptr; 95 int len, i, cnt; 96 97 *offset = round_up(*offset, 4); 98 ptr = erofs_bread(buf, *offset, true); 99 if (IS_ERR(ptr)) 100 return ptr; 101 102 len = le16_to_cpu(*(__le16 *)ptr); 103 if (!len) 104 len = U16_MAX + 1; 105 buffer = kmalloc(len, GFP_KERNEL); 106 if (!buffer) 107 return ERR_PTR(-ENOMEM); 108 *offset += sizeof(__le16); 109 *lengthp = len; 110 111 for (i = 0; i < len; i += cnt) { 112 cnt = min_t(int, sb->s_blocksize - erofs_blkoff(sb, *offset), 113 len - i); 114 ptr = erofs_bread(buf, *offset, true); 115 if (IS_ERR(ptr)) { 116 kfree(buffer); 117 return ptr; 118 } 119 memcpy(buffer + i, ptr, cnt); 120 *offset += cnt; 121 } 122 return buffer; 123 } 124 125 static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, 126 struct erofs_device_info *dif, erofs_off_t *pos) 127 { 128 struct erofs_sb_info *sbi = EROFS_SB(sb); 129 struct erofs_fscache *fscache; 130 struct erofs_deviceslot *dis; 131 struct file *file; 132 133 dis = erofs_read_metabuf(buf, sb, *pos, false); 134 if (IS_ERR(dis)) 135 return PTR_ERR(dis); 136 137 if (!sbi->devs->flatdev && !dif->path) { 138 if (!dis->tag[0]) { 139 erofs_err(sb, "empty device tag @ pos %llu", *pos); 140 return -EINVAL; 141 } 142 dif->path = kmemdup_nul(dis->tag, sizeof(dis->tag), GFP_KERNEL); 143 if (!dif->path) 144 return -ENOMEM; 145 } 146 147 if (erofs_is_fscache_mode(sb)) { 148 fscache = erofs_fscache_register_cookie(sb, dif->path, 0); 149 if (IS_ERR(fscache)) 150 return PTR_ERR(fscache); 151 dif->fscache = fscache; 152 } else if (!sbi->devs->flatdev) { 153 file = erofs_is_fileio_mode(sbi) ? 154 filp_open(dif->path, O_RDONLY | O_LARGEFILE, 0) : 155 bdev_file_open_by_path(dif->path, 156 BLK_OPEN_READ, sb->s_type, NULL); 157 if (IS_ERR(file)) { 158 if (file == ERR_PTR(-ENOTBLK)) 159 return -EINVAL; 160 return PTR_ERR(file); 161 } 162 163 if (!erofs_is_fileio_mode(sbi)) { 164 dif->dax_dev = fs_dax_get_by_bdev(file_bdev(file), 165 &dif->dax_part_off, NULL, NULL); 166 } else if (!S_ISREG(file_inode(file)->i_mode)) { 167 fput(file); 168 return -EINVAL; 169 } 170 if (!dif->dax_dev && test_opt(&sbi->opt, DAX_ALWAYS)) { 171 erofs_info(sb, "DAX unsupported by %s. Turning off DAX.", 172 dif->path); 173 clear_opt(&sbi->opt, DAX_ALWAYS); 174 } 175 dif->file = file; 176 } 177 178 dif->blocks = le32_to_cpu(dis->blocks_lo); 179 dif->uniaddr = le32_to_cpu(dis->uniaddr_lo); 180 sbi->total_blocks += dif->blocks; 181 *pos += EROFS_DEVT_SLOT_SIZE; 182 return 0; 183 } 184 185 static int erofs_scan_devices(struct super_block *sb, 186 struct erofs_super_block *dsb) 187 { 188 struct erofs_sb_info *sbi = EROFS_SB(sb); 189 unsigned int ondisk_extradevs; 190 erofs_off_t pos; 191 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 192 struct erofs_device_info *dif; 193 int id, err = 0; 194 195 sbi->total_blocks = sbi->dif0.blocks; 196 if (!erofs_sb_has_device_table(sbi)) 197 ondisk_extradevs = 0; 198 else 199 ondisk_extradevs = le16_to_cpu(dsb->extra_devices); 200 201 if (sbi->devs->extra_devices && 202 ondisk_extradevs != sbi->devs->extra_devices) { 203 erofs_err(sb, "extra devices don't match (ondisk %u, given %u)", 204 ondisk_extradevs, sbi->devs->extra_devices); 205 return -EINVAL; 206 } 207 208 if (test_opt(&sbi->opt, DAX_ALWAYS) && !sbi->dif0.dax_dev) { 209 erofs_info(sb, "DAX unsupported by block device. Turning off DAX."); 210 clear_opt(&sbi->opt, DAX_ALWAYS); 211 } 212 if (!ondisk_extradevs) 213 return 0; 214 215 if (!sbi->devs->extra_devices && !erofs_is_fscache_mode(sb)) 216 sbi->devs->flatdev = true; 217 218 sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1; 219 pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE; 220 down_read(&sbi->devs->rwsem); 221 if (sbi->devs->extra_devices) { 222 idr_for_each_entry(&sbi->devs->tree, dif, id) { 223 err = erofs_init_device(&buf, sb, dif, &pos); 224 if (err) 225 break; 226 } 227 } else { 228 for (id = 0; id < ondisk_extradevs; id++) { 229 dif = kzalloc(sizeof(*dif), GFP_KERNEL); 230 if (!dif) { 231 err = -ENOMEM; 232 break; 233 } 234 235 err = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL); 236 if (err < 0) { 237 kfree(dif); 238 break; 239 } 240 ++sbi->devs->extra_devices; 241 242 err = erofs_init_device(&buf, sb, dif, &pos); 243 if (err) 244 break; 245 } 246 } 247 up_read(&sbi->devs->rwsem); 248 erofs_put_metabuf(&buf); 249 return err; 250 } 251 252 static int erofs_read_superblock(struct super_block *sb) 253 { 254 struct erofs_sb_info *sbi = EROFS_SB(sb); 255 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 256 struct erofs_super_block *dsb; 257 void *data; 258 int ret; 259 260 data = erofs_read_metabuf(&buf, sb, 0, false); 261 if (IS_ERR(data)) { 262 erofs_err(sb, "cannot read erofs superblock"); 263 return PTR_ERR(data); 264 } 265 266 dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); 267 ret = -EINVAL; 268 if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) { 269 erofs_err(sb, "cannot find valid erofs superblock"); 270 goto out; 271 } 272 273 sbi->blkszbits = dsb->blkszbits; 274 if (sbi->blkszbits < 9 || sbi->blkszbits > PAGE_SHIFT) { 275 erofs_err(sb, "blkszbits %u isn't supported", sbi->blkszbits); 276 goto out; 277 } 278 if (dsb->dirblkbits) { 279 erofs_err(sb, "dirblkbits %u isn't supported", dsb->dirblkbits); 280 goto out; 281 } 282 283 sbi->feature_compat = le32_to_cpu(dsb->feature_compat); 284 if (erofs_sb_has_sb_chksum(sbi)) { 285 ret = erofs_superblock_csum_verify(sb, data); 286 if (ret) 287 goto out; 288 } 289 290 ret = -EINVAL; 291 sbi->feature_incompat = le32_to_cpu(dsb->feature_incompat); 292 if (sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT) { 293 erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel", 294 sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT); 295 goto out; 296 } 297 298 sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE; 299 if (sbi->sb_size > PAGE_SIZE - EROFS_SUPER_OFFSET) { 300 erofs_err(sb, "invalid sb_extslots %u (more than a fs block)", 301 sbi->sb_size); 302 goto out; 303 } 304 sbi->dif0.blocks = le32_to_cpu(dsb->blocks_lo); 305 sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); 306 #ifdef CONFIG_EROFS_FS_XATTR 307 sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); 308 sbi->xattr_prefix_start = le32_to_cpu(dsb->xattr_prefix_start); 309 sbi->xattr_prefix_count = dsb->xattr_prefix_count; 310 sbi->xattr_filter_reserved = dsb->xattr_filter_reserved; 311 if (erofs_sb_has_ishare_xattrs(sbi)) { 312 if (dsb->ishare_xattr_prefix_id >= sbi->xattr_prefix_count) { 313 erofs_err(sb, "invalid ishare xattr prefix id %u", 314 dsb->ishare_xattr_prefix_id); 315 ret = -EFSCORRUPTED; 316 goto out; 317 } 318 sbi->ishare_xattr_prefix_id = dsb->ishare_xattr_prefix_id; 319 } 320 #endif 321 sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); 322 if (erofs_sb_has_48bit(sbi) && dsb->rootnid_8b) { 323 sbi->root_nid = le64_to_cpu(dsb->rootnid_8b); 324 sbi->dif0.blocks = sbi->dif0.blocks | 325 ((u64)le16_to_cpu(dsb->rb.blocks_hi) << 32); 326 } else { 327 sbi->root_nid = le16_to_cpu(dsb->rb.rootnid_2b); 328 } 329 sbi->packed_nid = le64_to_cpu(dsb->packed_nid); 330 if (erofs_sb_has_metabox(sbi)) { 331 ret = -EFSCORRUPTED; 332 if (sbi->sb_size <= offsetof(struct erofs_super_block, 333 metabox_nid)) 334 goto out; 335 sbi->metabox_nid = le64_to_cpu(dsb->metabox_nid); 336 if (sbi->metabox_nid & BIT_ULL(EROFS_DIRENT_NID_METABOX_BIT)) 337 goto out; /* self-loop detection */ 338 } 339 sbi->inos = le64_to_cpu(dsb->inos); 340 341 sbi->epoch = (s64)le64_to_cpu(dsb->epoch); 342 sbi->fixed_nsec = le32_to_cpu(dsb->fixed_nsec); 343 super_set_uuid(sb, (void *)dsb->uuid, sizeof(dsb->uuid)); 344 345 if (dsb->volume_name[0]) { 346 sbi->volume_name = kstrndup(dsb->volume_name, 347 sizeof(dsb->volume_name), GFP_KERNEL); 348 if (!sbi->volume_name) { 349 ret = -ENOMEM; 350 goto out; 351 } 352 } 353 354 if (IS_ENABLED(CONFIG_EROFS_FS_ZIP)) { 355 ret = z_erofs_parse_cfgs(sb, dsb); 356 if (ret < 0) 357 goto out; 358 } else if (dsb->u1.available_compr_algs || 359 erofs_sb_has_lz4_0padding(sbi)) { 360 erofs_err(sb, "compression disabled, unable to mount compressed EROFS"); 361 ret = -EOPNOTSUPP; 362 goto out; 363 } 364 365 ret = erofs_scan_devices(sb, dsb); 366 367 if (erofs_sb_has_48bit(sbi)) 368 erofs_info(sb, "EXPERIMENTAL 48-bit layout support in use. Use at your own risk!"); 369 if (erofs_sb_has_metabox(sbi)) 370 erofs_info(sb, "EXPERIMENTAL metadata compression support in use. Use at your own risk!"); 371 if (erofs_is_fscache_mode(sb)) 372 erofs_info(sb, "[deprecated] fscache-based on-demand read feature in use. Use at your own risk!"); 373 out: 374 erofs_put_metabuf(&buf); 375 return ret; 376 } 377 378 static void erofs_default_options(struct erofs_sb_info *sbi) 379 { 380 #ifdef CONFIG_EROFS_FS_ZIP 381 sbi->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND; 382 sbi->sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO; 383 #endif 384 if (IS_ENABLED(CONFIG_EROFS_FS_XATTR)) 385 set_opt(&sbi->opt, XATTR_USER); 386 if (IS_ENABLED(CONFIG_EROFS_FS_POSIX_ACL)) 387 set_opt(&sbi->opt, POSIX_ACL); 388 } 389 390 enum { 391 Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum, 392 Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, Opt_fsoffset, 393 Opt_inode_share, 394 }; 395 396 static const struct constant_table erofs_param_cache_strategy[] = { 397 {"disabled", EROFS_ZIP_CACHE_DISABLED}, 398 {"readahead", EROFS_ZIP_CACHE_READAHEAD}, 399 {"readaround", EROFS_ZIP_CACHE_READAROUND}, 400 {} 401 }; 402 403 static const struct constant_table erofs_dax_param_enums[] = { 404 {"always", EROFS_MOUNT_DAX_ALWAYS}, 405 {"never", EROFS_MOUNT_DAX_NEVER}, 406 {} 407 }; 408 409 static const struct fs_parameter_spec erofs_fs_parameters[] = { 410 fsparam_flag_no("user_xattr", Opt_user_xattr), 411 fsparam_flag_no("acl", Opt_acl), 412 fsparam_enum("cache_strategy", Opt_cache_strategy, 413 erofs_param_cache_strategy), 414 fsparam_flag("dax", Opt_dax), 415 fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums), 416 fsparam_string("device", Opt_device), 417 fsparam_string("fsid", Opt_fsid), 418 fsparam_string("domain_id", Opt_domain_id), 419 fsparam_flag_no("directio", Opt_directio), 420 fsparam_u64("fsoffset", Opt_fsoffset), 421 fsparam_flag("inode_share", Opt_inode_share), 422 {} 423 }; 424 425 static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) 426 { 427 #ifdef CONFIG_FS_DAX 428 struct erofs_sb_info *sbi = fc->s_fs_info; 429 430 switch (mode) { 431 case EROFS_MOUNT_DAX_ALWAYS: 432 set_opt(&sbi->opt, DAX_ALWAYS); 433 clear_opt(&sbi->opt, DAX_NEVER); 434 return true; 435 case EROFS_MOUNT_DAX_NEVER: 436 set_opt(&sbi->opt, DAX_NEVER); 437 clear_opt(&sbi->opt, DAX_ALWAYS); 438 return true; 439 default: 440 DBG_BUGON(1); 441 return false; 442 } 443 #else 444 errorfc(fc, "dax options not supported"); 445 return false; 446 #endif 447 } 448 449 static int erofs_fc_parse_param(struct fs_context *fc, 450 struct fs_parameter *param) 451 { 452 struct erofs_sb_info *sbi = fc->s_fs_info; 453 struct fs_parse_result result; 454 struct erofs_device_info *dif; 455 int opt, ret; 456 457 opt = fs_parse(fc, erofs_fs_parameters, param, &result); 458 if (opt < 0) 459 return opt; 460 461 switch (opt) { 462 case Opt_user_xattr: 463 #ifdef CONFIG_EROFS_FS_XATTR 464 if (result.boolean) 465 set_opt(&sbi->opt, XATTR_USER); 466 else 467 clear_opt(&sbi->opt, XATTR_USER); 468 #else 469 errorfc(fc, "{,no}user_xattr options not supported"); 470 #endif 471 break; 472 case Opt_acl: 473 #ifdef CONFIG_EROFS_FS_POSIX_ACL 474 if (result.boolean) 475 set_opt(&sbi->opt, POSIX_ACL); 476 else 477 clear_opt(&sbi->opt, POSIX_ACL); 478 #else 479 errorfc(fc, "{,no}acl options not supported"); 480 #endif 481 break; 482 case Opt_cache_strategy: 483 #ifdef CONFIG_EROFS_FS_ZIP 484 sbi->opt.cache_strategy = result.uint_32; 485 #else 486 errorfc(fc, "compression not supported, cache_strategy ignored"); 487 #endif 488 break; 489 case Opt_dax: 490 if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS)) 491 return -EINVAL; 492 break; 493 case Opt_dax_enum: 494 if (!erofs_fc_set_dax_mode(fc, result.uint_32)) 495 return -EINVAL; 496 break; 497 case Opt_device: 498 dif = kzalloc(sizeof(*dif), GFP_KERNEL); 499 if (!dif) 500 return -ENOMEM; 501 dif->path = kstrdup(param->string, GFP_KERNEL); 502 if (!dif->path) { 503 kfree(dif); 504 return -ENOMEM; 505 } 506 down_write(&sbi->devs->rwsem); 507 ret = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL); 508 up_write(&sbi->devs->rwsem); 509 if (ret < 0) { 510 kfree(dif->path); 511 kfree(dif); 512 return ret; 513 } 514 ++sbi->devs->extra_devices; 515 break; 516 #ifdef CONFIG_EROFS_FS_ONDEMAND 517 case Opt_fsid: 518 kfree(sbi->fsid); 519 sbi->fsid = kstrdup(param->string, GFP_KERNEL); 520 if (!sbi->fsid) 521 return -ENOMEM; 522 break; 523 #endif 524 #if defined(CONFIG_EROFS_FS_ONDEMAND) || defined(CONFIG_EROFS_FS_PAGE_CACHE_SHARE) 525 case Opt_domain_id: 526 kfree_sensitive(sbi->domain_id); 527 sbi->domain_id = no_free_ptr(param->string); 528 break; 529 #else 530 case Opt_fsid: 531 case Opt_domain_id: 532 errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); 533 break; 534 #endif 535 case Opt_directio: 536 #ifdef CONFIG_EROFS_FS_BACKED_BY_FILE 537 if (result.boolean) 538 set_opt(&sbi->opt, DIRECT_IO); 539 else 540 clear_opt(&sbi->opt, DIRECT_IO); 541 #else 542 errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); 543 #endif 544 break; 545 case Opt_fsoffset: 546 sbi->dif0.fsoff = result.uint_64; 547 break; 548 case Opt_inode_share: 549 #ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE 550 set_opt(&sbi->opt, INODE_SHARE); 551 #else 552 errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); 553 #endif 554 break; 555 } 556 return 0; 557 } 558 559 static int erofs_encode_fh(struct inode *inode, u32 *fh, int *max_len, 560 struct inode *parent) 561 { 562 erofs_nid_t nid = EROFS_I(inode)->nid; 563 int len = parent ? 6 : 3; 564 565 if (*max_len < len) { 566 *max_len = len; 567 return FILEID_INVALID; 568 } 569 570 fh[0] = (u32)(nid >> 32); 571 fh[1] = (u32)(nid & 0xffffffff); 572 fh[2] = inode->i_generation; 573 574 if (parent) { 575 nid = EROFS_I(parent)->nid; 576 577 fh[3] = (u32)(nid >> 32); 578 fh[4] = (u32)(nid & 0xffffffff); 579 fh[5] = parent->i_generation; 580 } 581 582 *max_len = len; 583 return parent ? FILEID_INO64_GEN_PARENT : FILEID_INO64_GEN; 584 } 585 586 static struct dentry *erofs_fh_to_dentry(struct super_block *sb, 587 struct fid *fid, int fh_len, int fh_type) 588 { 589 if ((fh_type != FILEID_INO64_GEN && 590 fh_type != FILEID_INO64_GEN_PARENT) || fh_len < 3) 591 return NULL; 592 593 return d_obtain_alias(erofs_iget(sb, 594 ((u64)fid->raw[0] << 32) | fid->raw[1])); 595 } 596 597 static struct dentry *erofs_fh_to_parent(struct super_block *sb, 598 struct fid *fid, int fh_len, int fh_type) 599 { 600 if (fh_type != FILEID_INO64_GEN_PARENT || fh_len < 6) 601 return NULL; 602 603 return d_obtain_alias(erofs_iget(sb, 604 ((u64)fid->raw[3] << 32) | fid->raw[4])); 605 } 606 607 static struct dentry *erofs_get_parent(struct dentry *child) 608 { 609 erofs_nid_t nid; 610 unsigned int d_type; 611 int err; 612 613 err = erofs_namei(d_inode(child), &dotdot_name, &nid, &d_type); 614 if (err) 615 return ERR_PTR(err); 616 return d_obtain_alias(erofs_iget(child->d_sb, nid)); 617 } 618 619 static const struct export_operations erofs_export_ops = { 620 .encode_fh = erofs_encode_fh, 621 .fh_to_dentry = erofs_fh_to_dentry, 622 .fh_to_parent = erofs_fh_to_parent, 623 .get_parent = erofs_get_parent, 624 }; 625 626 static void erofs_set_sysfs_name(struct super_block *sb) 627 { 628 struct erofs_sb_info *sbi = EROFS_SB(sb); 629 630 if (sbi->domain_id && sbi->fsid) 631 super_set_sysfs_name_generic(sb, "%s,%s", sbi->domain_id, 632 sbi->fsid); 633 else if (sbi->fsid) 634 super_set_sysfs_name_generic(sb, "%s", sbi->fsid); 635 else if (erofs_is_fileio_mode(sbi)) 636 super_set_sysfs_name_generic(sb, "%s", 637 bdi_dev_name(sb->s_bdi)); 638 else 639 super_set_sysfs_name_id(sb); 640 } 641 642 static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) 643 { 644 struct inode *inode; 645 struct erofs_sb_info *sbi = EROFS_SB(sb); 646 int err; 647 648 sb->s_magic = EROFS_SUPER_MAGIC; 649 sb->s_flags |= SB_RDONLY | SB_NOATIME; 650 sb->s_maxbytes = MAX_LFS_FILESIZE; 651 sb->s_op = &erofs_sops; 652 653 if (!sbi->domain_id && test_opt(&sbi->opt, INODE_SHARE)) { 654 errorfc(fc, "domain_id is needed when inode_ishare is on"); 655 return -EINVAL; 656 } 657 if (test_opt(&sbi->opt, DAX_ALWAYS) && test_opt(&sbi->opt, INODE_SHARE)) { 658 errorfc(fc, "FSDAX is not allowed when inode_ishare is on"); 659 return -EINVAL; 660 } 661 662 sbi->blkszbits = PAGE_SHIFT; 663 if (!sb->s_bdev) { 664 /* 665 * (File-backed mounts) EROFS claims it's safe to nest other 666 * fs contexts (including its own) due to self-controlled RO 667 * accesses/contexts and no side-effect changes that need to 668 * context save & restore so it can reuse the current thread 669 * context. 670 * However, we still need to prevent kernel stack overflow due 671 * to filesystem nesting: just ensure that s_stack_depth is 0 672 * to disallow mounting EROFS on stacked filesystems. 673 * Note: s_stack_depth is not incremented here for now, since 674 * EROFS is the only fs supporting file-backed mounts for now. 675 * It MUST change if another fs plans to support them, which 676 * may also require adjusting FILESYSTEM_MAX_STACK_DEPTH. 677 */ 678 if (erofs_is_fileio_mode(sbi)) { 679 inode = file_inode(sbi->dif0.file); 680 if ((inode->i_sb->s_op == &erofs_sops && 681 !inode->i_sb->s_bdev) || 682 inode->i_sb->s_stack_depth) { 683 erofs_err(sb, "file-backed mounts cannot be applied to stacked fses"); 684 return -ENOTBLK; 685 } 686 } 687 sb->s_blocksize = PAGE_SIZE; 688 sb->s_blocksize_bits = PAGE_SHIFT; 689 690 if (erofs_is_fscache_mode(sb)) { 691 err = erofs_fscache_register_fs(sb); 692 if (err) 693 return err; 694 } 695 err = super_setup_bdi(sb); 696 if (err) 697 return err; 698 } else { 699 if (!sb_set_blocksize(sb, PAGE_SIZE)) { 700 errorfc(fc, "failed to set initial blksize"); 701 return -EINVAL; 702 } 703 704 sbi->dif0.dax_dev = fs_dax_get_by_bdev(sb->s_bdev, 705 &sbi->dif0.dax_part_off, NULL, NULL); 706 } 707 708 err = erofs_read_superblock(sb); 709 if (err) 710 return err; 711 712 if (sb->s_blocksize_bits != sbi->blkszbits) { 713 if (erofs_is_fscache_mode(sb)) { 714 errorfc(fc, "unsupported blksize for fscache mode"); 715 return -EINVAL; 716 } 717 718 if (erofs_is_fileio_mode(sbi)) { 719 sb->s_blocksize = 1 << sbi->blkszbits; 720 sb->s_blocksize_bits = sbi->blkszbits; 721 } else if (!sb_set_blocksize(sb, 1 << sbi->blkszbits)) { 722 errorfc(fc, "failed to set erofs blksize"); 723 return -EINVAL; 724 } 725 } 726 727 if (sbi->dif0.fsoff) { 728 if (sbi->dif0.fsoff & (sb->s_blocksize - 1)) 729 return invalfc(fc, "fsoffset %llu is not aligned to block size %lu", 730 sbi->dif0.fsoff, sb->s_blocksize); 731 if (erofs_is_fscache_mode(sb)) 732 return invalfc(fc, "cannot use fsoffset in fscache mode"); 733 } 734 735 if (test_opt(&sbi->opt, DAX_ALWAYS) && sbi->blkszbits != PAGE_SHIFT) { 736 erofs_info(sb, "unsupported blocksize for DAX"); 737 clear_opt(&sbi->opt, DAX_ALWAYS); 738 } 739 if (test_opt(&sbi->opt, INODE_SHARE) && !erofs_sb_has_ishare_xattrs(sbi)) { 740 erofs_info(sb, "on-disk ishare xattrs not found. Turning off inode_share."); 741 clear_opt(&sbi->opt, INODE_SHARE); 742 } 743 if (test_opt(&sbi->opt, INODE_SHARE)) 744 erofs_info(sb, "EXPERIMENTAL EROFS page cache share support in use. Use at your own risk!"); 745 746 sb->s_time_gran = 1; 747 sb->s_xattr = erofs_xattr_handlers; 748 sb->s_export_op = &erofs_export_ops; 749 750 if (test_opt(&sbi->opt, POSIX_ACL)) 751 sb->s_flags |= SB_POSIXACL; 752 else 753 sb->s_flags &= ~SB_POSIXACL; 754 755 err = z_erofs_init_super(sb); 756 if (err) 757 return err; 758 759 if (erofs_sb_has_fragments(sbi) && sbi->packed_nid) { 760 inode = erofs_iget(sb, sbi->packed_nid); 761 if (IS_ERR(inode)) 762 return PTR_ERR(inode); 763 sbi->packed_inode = inode; 764 } 765 if (erofs_sb_has_metabox(sbi)) { 766 inode = erofs_iget(sb, sbi->metabox_nid); 767 if (IS_ERR(inode)) 768 return PTR_ERR(inode); 769 sbi->metabox_inode = inode; 770 } 771 772 inode = erofs_iget(sb, sbi->root_nid); 773 if (IS_ERR(inode)) 774 return PTR_ERR(inode); 775 776 if (!S_ISDIR(inode->i_mode)) { 777 erofs_err(sb, "rootino(nid %llu) is not a directory(i_mode %o)", 778 sbi->root_nid, inode->i_mode); 779 iput(inode); 780 return -EINVAL; 781 } 782 sb->s_root = d_make_root(inode); 783 if (!sb->s_root) 784 return -ENOMEM; 785 786 erofs_shrinker_register(sb); 787 err = erofs_xattr_prefixes_init(sb); 788 if (err) 789 return err; 790 791 erofs_set_sysfs_name(sb); 792 err = erofs_register_sysfs(sb); 793 if (err) 794 return err; 795 796 sbi->dir_ra_bytes = EROFS_DIR_RA_BYTES; 797 erofs_info(sb, "mounted with root inode @ nid %llu.", sbi->root_nid); 798 return 0; 799 } 800 801 static int erofs_fc_get_tree(struct fs_context *fc) 802 { 803 struct erofs_sb_info *sbi = fc->s_fs_info; 804 int ret; 805 806 if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) 807 return get_tree_nodev(fc, erofs_fc_fill_super); 808 809 ret = get_tree_bdev_flags(fc, erofs_fc_fill_super, 810 IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) ? 811 GET_TREE_BDEV_QUIET_LOOKUP : 0); 812 #ifdef CONFIG_EROFS_FS_BACKED_BY_FILE 813 if (ret == -ENOTBLK) { 814 struct file *file; 815 816 if (!fc->source) 817 return invalf(fc, "No source specified"); 818 file = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0); 819 if (IS_ERR(file)) 820 return PTR_ERR(file); 821 sbi->dif0.file = file; 822 823 if (S_ISREG(file_inode(sbi->dif0.file)->i_mode) && 824 sbi->dif0.file->f_mapping->a_ops->read_folio) 825 return get_tree_nodev(fc, erofs_fc_fill_super); 826 } 827 #endif 828 return ret; 829 } 830 831 static int erofs_fc_reconfigure(struct fs_context *fc) 832 { 833 struct super_block *sb = fc->root->d_sb; 834 struct erofs_sb_info *sbi = EROFS_SB(sb); 835 struct erofs_sb_info *new_sbi = fc->s_fs_info; 836 837 DBG_BUGON(!sb_rdonly(sb)); 838 839 if (new_sbi->fsid || new_sbi->domain_id) 840 erofs_info(sb, "ignoring reconfiguration for fsid|domain_id."); 841 842 if (test_opt(&new_sbi->opt, POSIX_ACL)) 843 fc->sb_flags |= SB_POSIXACL; 844 else 845 fc->sb_flags &= ~SB_POSIXACL; 846 847 sbi->opt = new_sbi->opt; 848 849 fc->sb_flags |= SB_RDONLY; 850 return 0; 851 } 852 853 static int erofs_release_device_info(int id, void *ptr, void *data) 854 { 855 struct erofs_device_info *dif = ptr; 856 857 fs_put_dax(dif->dax_dev, NULL); 858 if (dif->file) 859 fput(dif->file); 860 erofs_fscache_unregister_cookie(dif->fscache); 861 dif->fscache = NULL; 862 kfree(dif->path); 863 kfree(dif); 864 return 0; 865 } 866 867 static void erofs_free_dev_context(struct erofs_dev_context *devs) 868 { 869 if (!devs) 870 return; 871 idr_for_each(&devs->tree, &erofs_release_device_info, NULL); 872 idr_destroy(&devs->tree); 873 kfree(devs); 874 } 875 876 static void erofs_sb_free(struct erofs_sb_info *sbi) 877 { 878 erofs_free_dev_context(sbi->devs); 879 kfree(sbi->fsid); 880 kfree_sensitive(sbi->domain_id); 881 if (sbi->dif0.file) 882 fput(sbi->dif0.file); 883 kfree(sbi->volume_name); 884 kfree(sbi); 885 } 886 887 static void erofs_fc_free(struct fs_context *fc) 888 { 889 struct erofs_sb_info *sbi = fc->s_fs_info; 890 891 if (sbi) /* free here if an error occurs before transferring to sb */ 892 erofs_sb_free(sbi); 893 } 894 895 static const struct fs_context_operations erofs_context_ops = { 896 .parse_param = erofs_fc_parse_param, 897 .get_tree = erofs_fc_get_tree, 898 .reconfigure = erofs_fc_reconfigure, 899 .free = erofs_fc_free, 900 }; 901 902 static int erofs_init_fs_context(struct fs_context *fc) 903 { 904 struct erofs_sb_info *sbi; 905 906 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 907 if (!sbi) 908 return -ENOMEM; 909 910 sbi->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL); 911 if (!sbi->devs) { 912 kfree(sbi); 913 return -ENOMEM; 914 } 915 fc->s_fs_info = sbi; 916 917 idr_init(&sbi->devs->tree); 918 init_rwsem(&sbi->devs->rwsem); 919 erofs_default_options(sbi); 920 fc->ops = &erofs_context_ops; 921 return 0; 922 } 923 924 static void erofs_drop_internal_inodes(struct erofs_sb_info *sbi) 925 { 926 iput(sbi->packed_inode); 927 sbi->packed_inode = NULL; 928 iput(sbi->metabox_inode); 929 sbi->metabox_inode = NULL; 930 #ifdef CONFIG_EROFS_FS_ZIP 931 iput(sbi->managed_cache); 932 sbi->managed_cache = NULL; 933 #endif 934 } 935 936 static void erofs_kill_sb(struct super_block *sb) 937 { 938 struct erofs_sb_info *sbi = EROFS_SB(sb); 939 940 if ((IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) || 941 sbi->dif0.file) 942 kill_anon_super(sb); 943 else 944 kill_block_super(sb); 945 erofs_drop_internal_inodes(sbi); 946 fs_put_dax(sbi->dif0.dax_dev, NULL); 947 erofs_fscache_unregister_fs(sb); 948 erofs_sb_free(sbi); 949 sb->s_fs_info = NULL; 950 } 951 952 static void erofs_put_super(struct super_block *sb) 953 { 954 struct erofs_sb_info *const sbi = EROFS_SB(sb); 955 956 erofs_unregister_sysfs(sb); 957 erofs_shrinker_unregister(sb); 958 erofs_xattr_prefixes_cleanup(sb); 959 erofs_drop_internal_inodes(sbi); 960 erofs_free_dev_context(sbi->devs); 961 sbi->devs = NULL; 962 erofs_fscache_unregister_fs(sb); 963 } 964 965 static struct file_system_type erofs_fs_type = { 966 .owner = THIS_MODULE, 967 .name = "erofs", 968 .init_fs_context = erofs_init_fs_context, 969 .kill_sb = erofs_kill_sb, 970 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, 971 }; 972 MODULE_ALIAS_FS("erofs"); 973 974 #if defined(CONFIG_EROFS_FS_ONDEMAND) || defined(CONFIG_EROFS_FS_PAGE_CACHE_SHARE) 975 static void erofs_free_anon_inode(struct inode *inode) 976 { 977 struct erofs_inode *vi = EROFS_I(inode); 978 979 #ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE 980 kfree(vi->fingerprint.opaque); 981 #endif 982 kmem_cache_free(erofs_inode_cachep, vi); 983 } 984 985 static const struct super_operations erofs_anon_sops = { 986 .alloc_inode = erofs_alloc_inode, 987 .drop_inode = inode_just_drop, 988 .free_inode = erofs_free_anon_inode, 989 }; 990 991 static int erofs_anon_init_fs_context(struct fs_context *fc) 992 { 993 struct pseudo_fs_context *ctx; 994 995 ctx = init_pseudo(fc, EROFS_SUPER_MAGIC); 996 if (!ctx) 997 return -ENOMEM; 998 ctx->ops = &erofs_anon_sops; 999 return 0; 1000 } 1001 1002 struct file_system_type erofs_anon_fs_type = { 1003 .name = "pseudo_erofs", 1004 .init_fs_context = erofs_anon_init_fs_context, 1005 .kill_sb = kill_anon_super, 1006 }; 1007 #endif 1008 1009 static int __init erofs_module_init(void) 1010 { 1011 int err; 1012 1013 erofs_check_ondisk_layout_definitions(); 1014 1015 erofs_inode_cachep = kmem_cache_create("erofs_inode", 1016 sizeof(struct erofs_inode), 0, 1017 SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT, 1018 erofs_inode_init_once); 1019 if (!erofs_inode_cachep) 1020 return -ENOMEM; 1021 1022 err = erofs_init_shrinker(); 1023 if (err) 1024 goto shrinker_err; 1025 1026 err = z_erofs_init_subsystem(); 1027 if (err) 1028 goto zip_err; 1029 1030 err = erofs_init_sysfs(); 1031 if (err) 1032 goto sysfs_err; 1033 1034 err = erofs_init_ishare(); 1035 if (err) 1036 goto ishare_err; 1037 1038 err = register_filesystem(&erofs_fs_type); 1039 if (err) 1040 goto fs_err; 1041 1042 return 0; 1043 1044 fs_err: 1045 erofs_exit_ishare(); 1046 ishare_err: 1047 erofs_exit_sysfs(); 1048 sysfs_err: 1049 z_erofs_exit_subsystem(); 1050 zip_err: 1051 erofs_exit_shrinker(); 1052 shrinker_err: 1053 kmem_cache_destroy(erofs_inode_cachep); 1054 return err; 1055 } 1056 1057 static void __exit erofs_module_exit(void) 1058 { 1059 unregister_filesystem(&erofs_fs_type); 1060 1061 /* Ensure all RCU free inodes / pclusters are safe to be destroyed. */ 1062 rcu_barrier(); 1063 1064 erofs_exit_ishare(); 1065 erofs_exit_sysfs(); 1066 z_erofs_exit_subsystem(); 1067 erofs_exit_shrinker(); 1068 kmem_cache_destroy(erofs_inode_cachep); 1069 } 1070 1071 static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) 1072 { 1073 struct super_block *sb = dentry->d_sb; 1074 struct erofs_sb_info *sbi = EROFS_SB(sb); 1075 1076 buf->f_type = sb->s_magic; 1077 buf->f_bsize = sb->s_blocksize; 1078 buf->f_blocks = sbi->total_blocks; 1079 buf->f_bfree = buf->f_bavail = 0; 1080 buf->f_files = ULLONG_MAX; 1081 buf->f_ffree = ULLONG_MAX - sbi->inos; 1082 buf->f_namelen = EROFS_NAME_LEN; 1083 1084 if (uuid_is_null(&sb->s_uuid)) 1085 buf->f_fsid = u64_to_fsid(!sb->s_bdev ? 0 : 1086 huge_encode_dev(sb->s_bdev->bd_dev)); 1087 else 1088 buf->f_fsid = uuid_to_fsid(sb->s_uuid.b); 1089 return 0; 1090 } 1091 1092 static int erofs_show_options(struct seq_file *seq, struct dentry *root) 1093 { 1094 struct erofs_sb_info *sbi = EROFS_SB(root->d_sb); 1095 struct erofs_mount_opts *opt = &sbi->opt; 1096 1097 if (IS_ENABLED(CONFIG_EROFS_FS_XATTR)) 1098 seq_puts(seq, test_opt(opt, XATTR_USER) ? 1099 ",user_xattr" : ",nouser_xattr"); 1100 if (IS_ENABLED(CONFIG_EROFS_FS_POSIX_ACL)) 1101 seq_puts(seq, test_opt(opt, POSIX_ACL) ? ",acl" : ",noacl"); 1102 if (IS_ENABLED(CONFIG_EROFS_FS_ZIP)) 1103 seq_printf(seq, ",cache_strategy=%s", 1104 erofs_param_cache_strategy[opt->cache_strategy].name); 1105 if (test_opt(opt, DAX_ALWAYS)) 1106 seq_puts(seq, ",dax=always"); 1107 if (test_opt(opt, DAX_NEVER)) 1108 seq_puts(seq, ",dax=never"); 1109 if (erofs_is_fileio_mode(sbi) && test_opt(opt, DIRECT_IO)) 1110 seq_puts(seq, ",directio"); 1111 #ifdef CONFIG_EROFS_FS_ONDEMAND 1112 if (sbi->fsid) 1113 seq_printf(seq, ",fsid=%s", sbi->fsid); 1114 if (sbi->domain_id) 1115 seq_printf(seq, ",domain_id=%s", sbi->domain_id); 1116 #endif 1117 if (sbi->dif0.fsoff) 1118 seq_printf(seq, ",fsoffset=%llu", sbi->dif0.fsoff); 1119 if (test_opt(opt, INODE_SHARE)) 1120 seq_puts(seq, ",inode_share"); 1121 return 0; 1122 } 1123 1124 static void erofs_evict_inode(struct inode *inode) 1125 { 1126 if (IS_DAX(inode)) 1127 dax_break_layout_final(inode); 1128 erofs_ishare_free_inode(inode); 1129 truncate_inode_pages_final(&inode->i_data); 1130 clear_inode(inode); 1131 } 1132 1133 const struct super_operations erofs_sops = { 1134 .put_super = erofs_put_super, 1135 .alloc_inode = erofs_alloc_inode, 1136 .free_inode = erofs_free_inode, 1137 .evict_inode = erofs_evict_inode, 1138 .statfs = erofs_statfs, 1139 .show_options = erofs_show_options, 1140 }; 1141 1142 module_init(erofs_module_init); 1143 module_exit(erofs_module_exit); 1144 1145 MODULE_DESCRIPTION("Enhanced ROM File System"); 1146 MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc."); 1147 MODULE_LICENSE("GPL"); 1148