1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017-2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 * Copyright (C) 2021, Alibaba Cloud 6 */ 7 #include <linux/module.h> 8 #include <linux/buffer_head.h> 9 #include <linux/statfs.h> 10 #include <linux/parser.h> 11 #include <linux/seq_file.h> 12 #include <linux/crc32c.h> 13 #include <linux/fs_context.h> 14 #include <linux/fs_parser.h> 15 #include <linux/dax.h> 16 #include <linux/exportfs.h> 17 #include "xattr.h" 18 19 #define CREATE_TRACE_POINTS 20 #include <trace/events/erofs.h> 21 22 static struct kmem_cache *erofs_inode_cachep __read_mostly; 23 24 void _erofs_err(struct super_block *sb, const char *function, 25 const char *fmt, ...) 26 { 27 struct va_format vaf; 28 va_list args; 29 30 va_start(args, fmt); 31 32 vaf.fmt = fmt; 33 vaf.va = &args; 34 35 pr_err("(device %s): %s: %pV", sb->s_id, function, &vaf); 36 va_end(args); 37 } 38 39 void _erofs_info(struct super_block *sb, const char *function, 40 const char *fmt, ...) 41 { 42 struct va_format vaf; 43 va_list args; 44 45 va_start(args, fmt); 46 47 vaf.fmt = fmt; 48 vaf.va = &args; 49 50 pr_info("(device %s): %pV", sb->s_id, &vaf); 51 va_end(args); 52 } 53 54 static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata) 55 { 56 struct erofs_super_block *dsb; 57 u32 expected_crc, crc; 58 59 dsb = kmemdup(sbdata + EROFS_SUPER_OFFSET, 60 EROFS_BLKSIZ - EROFS_SUPER_OFFSET, GFP_KERNEL); 61 if (!dsb) 62 return -ENOMEM; 63 64 expected_crc = le32_to_cpu(dsb->checksum); 65 dsb->checksum = 0; 66 /* to allow for x86 boot sectors and other oddities. */ 67 crc = crc32c(~0, dsb, EROFS_BLKSIZ - EROFS_SUPER_OFFSET); 68 kfree(dsb); 69 70 if (crc != expected_crc) { 71 erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected", 72 crc, expected_crc); 73 return -EBADMSG; 74 } 75 return 0; 76 } 77 78 static void erofs_inode_init_once(void *ptr) 79 { 80 struct erofs_inode *vi = ptr; 81 82 inode_init_once(&vi->vfs_inode); 83 } 84 85 static struct inode *erofs_alloc_inode(struct super_block *sb) 86 { 87 struct erofs_inode *vi = 88 alloc_inode_sb(sb, erofs_inode_cachep, GFP_KERNEL); 89 90 if (!vi) 91 return NULL; 92 93 /* zero out everything except vfs_inode */ 94 memset(vi, 0, offsetof(struct erofs_inode, vfs_inode)); 95 return &vi->vfs_inode; 96 } 97 98 static void erofs_free_inode(struct inode *inode) 99 { 100 struct erofs_inode *vi = EROFS_I(inode); 101 102 /* be careful of RCU symlink path */ 103 if (inode->i_op == &erofs_fast_symlink_iops) 104 kfree(inode->i_link); 105 kfree(vi->xattr_shared_xattrs); 106 107 kmem_cache_free(erofs_inode_cachep, vi); 108 } 109 110 static bool check_layout_compatibility(struct super_block *sb, 111 struct erofs_super_block *dsb) 112 { 113 const unsigned int feature = le32_to_cpu(dsb->feature_incompat); 114 115 EROFS_SB(sb)->feature_incompat = feature; 116 117 /* check if current kernel meets all mandatory requirements */ 118 if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) { 119 erofs_err(sb, 120 "unidentified incompatible feature %x, please upgrade kernel version", 121 feature & ~EROFS_ALL_FEATURE_INCOMPAT); 122 return false; 123 } 124 return true; 125 } 126 127 #ifdef CONFIG_EROFS_FS_ZIP 128 /* read variable-sized metadata, offset will be aligned by 4-byte */ 129 static void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, 130 erofs_off_t *offset, int *lengthp) 131 { 132 u8 *buffer, *ptr; 133 int len, i, cnt; 134 135 *offset = round_up(*offset, 4); 136 ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*offset), EROFS_KMAP); 137 if (IS_ERR(ptr)) 138 return ptr; 139 140 len = le16_to_cpu(*(__le16 *)&ptr[erofs_blkoff(*offset)]); 141 if (!len) 142 len = U16_MAX + 1; 143 buffer = kmalloc(len, GFP_KERNEL); 144 if (!buffer) 145 return ERR_PTR(-ENOMEM); 146 *offset += sizeof(__le16); 147 *lengthp = len; 148 149 for (i = 0; i < len; i += cnt) { 150 cnt = min(EROFS_BLKSIZ - (int)erofs_blkoff(*offset), len - i); 151 ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*offset), 152 EROFS_KMAP); 153 if (IS_ERR(ptr)) { 154 kfree(buffer); 155 return ptr; 156 } 157 memcpy(buffer + i, ptr + erofs_blkoff(*offset), cnt); 158 *offset += cnt; 159 } 160 return buffer; 161 } 162 163 static int erofs_load_compr_cfgs(struct super_block *sb, 164 struct erofs_super_block *dsb) 165 { 166 struct erofs_sb_info *sbi = EROFS_SB(sb); 167 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 168 unsigned int algs, alg; 169 erofs_off_t offset; 170 int size, ret = 0; 171 172 sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); 173 if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { 174 erofs_err(sb, "try to load compressed fs with unsupported algorithms %x", 175 sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); 176 return -EINVAL; 177 } 178 179 offset = EROFS_SUPER_OFFSET + sbi->sb_size; 180 alg = 0; 181 for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { 182 void *data; 183 184 if (!(algs & 1)) 185 continue; 186 187 data = erofs_read_metadata(sb, &buf, &offset, &size); 188 if (IS_ERR(data)) { 189 ret = PTR_ERR(data); 190 break; 191 } 192 193 switch (alg) { 194 case Z_EROFS_COMPRESSION_LZ4: 195 ret = z_erofs_load_lz4_config(sb, dsb, data, size); 196 break; 197 case Z_EROFS_COMPRESSION_LZMA: 198 ret = z_erofs_load_lzma_config(sb, dsb, data, size); 199 break; 200 default: 201 DBG_BUGON(1); 202 ret = -EFAULT; 203 } 204 kfree(data); 205 if (ret) 206 break; 207 } 208 erofs_put_metabuf(&buf); 209 return ret; 210 } 211 #else 212 static int erofs_load_compr_cfgs(struct super_block *sb, 213 struct erofs_super_block *dsb) 214 { 215 if (dsb->u1.available_compr_algs) { 216 erofs_err(sb, "try to load compressed fs when compression is disabled"); 217 return -EINVAL; 218 } 219 return 0; 220 } 221 #endif 222 223 static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, 224 struct erofs_device_info *dif, erofs_off_t *pos) 225 { 226 struct erofs_sb_info *sbi = EROFS_SB(sb); 227 struct erofs_fscache *fscache; 228 struct erofs_deviceslot *dis; 229 struct block_device *bdev; 230 void *ptr; 231 232 ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*pos), EROFS_KMAP); 233 if (IS_ERR(ptr)) 234 return PTR_ERR(ptr); 235 dis = ptr + erofs_blkoff(*pos); 236 237 if (!dif->path) { 238 if (!dis->tag[0]) { 239 erofs_err(sb, "empty device tag @ pos %llu", *pos); 240 return -EINVAL; 241 } 242 dif->path = kmemdup_nul(dis->tag, sizeof(dis->tag), GFP_KERNEL); 243 if (!dif->path) 244 return -ENOMEM; 245 } 246 247 if (erofs_is_fscache_mode(sb)) { 248 fscache = erofs_fscache_register_cookie(sb, dif->path, 0); 249 if (IS_ERR(fscache)) 250 return PTR_ERR(fscache); 251 dif->fscache = fscache; 252 } else { 253 bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL, 254 sb->s_type); 255 if (IS_ERR(bdev)) 256 return PTR_ERR(bdev); 257 dif->bdev = bdev; 258 dif->dax_dev = fs_dax_get_by_bdev(bdev, &dif->dax_part_off, 259 NULL, NULL); 260 } 261 262 dif->blocks = le32_to_cpu(dis->blocks); 263 dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr); 264 sbi->total_blocks += dif->blocks; 265 *pos += EROFS_DEVT_SLOT_SIZE; 266 return 0; 267 } 268 269 static int erofs_scan_devices(struct super_block *sb, 270 struct erofs_super_block *dsb) 271 { 272 struct erofs_sb_info *sbi = EROFS_SB(sb); 273 unsigned int ondisk_extradevs; 274 erofs_off_t pos; 275 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 276 struct erofs_device_info *dif; 277 int id, err = 0; 278 279 sbi->total_blocks = sbi->primarydevice_blocks; 280 if (!erofs_sb_has_device_table(sbi)) 281 ondisk_extradevs = 0; 282 else 283 ondisk_extradevs = le16_to_cpu(dsb->extra_devices); 284 285 if (sbi->devs->extra_devices && 286 ondisk_extradevs != sbi->devs->extra_devices) { 287 erofs_err(sb, "extra devices don't match (ondisk %u, given %u)", 288 ondisk_extradevs, sbi->devs->extra_devices); 289 return -EINVAL; 290 } 291 if (!ondisk_extradevs) 292 return 0; 293 294 sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1; 295 pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE; 296 down_read(&sbi->devs->rwsem); 297 if (sbi->devs->extra_devices) { 298 idr_for_each_entry(&sbi->devs->tree, dif, id) { 299 err = erofs_init_device(&buf, sb, dif, &pos); 300 if (err) 301 break; 302 } 303 } else { 304 for (id = 0; id < ondisk_extradevs; id++) { 305 dif = kzalloc(sizeof(*dif), GFP_KERNEL); 306 if (!dif) { 307 err = -ENOMEM; 308 break; 309 } 310 311 err = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL); 312 if (err < 0) { 313 kfree(dif); 314 break; 315 } 316 ++sbi->devs->extra_devices; 317 318 err = erofs_init_device(&buf, sb, dif, &pos); 319 if (err) 320 break; 321 } 322 } 323 up_read(&sbi->devs->rwsem); 324 erofs_put_metabuf(&buf); 325 return err; 326 } 327 328 static int erofs_read_superblock(struct super_block *sb) 329 { 330 struct erofs_sb_info *sbi; 331 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 332 struct erofs_super_block *dsb; 333 unsigned int blkszbits; 334 void *data; 335 int ret; 336 337 data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP); 338 if (IS_ERR(data)) { 339 erofs_err(sb, "cannot read erofs superblock"); 340 return PTR_ERR(data); 341 } 342 343 sbi = EROFS_SB(sb); 344 dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); 345 346 ret = -EINVAL; 347 if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) { 348 erofs_err(sb, "cannot find valid erofs superblock"); 349 goto out; 350 } 351 352 sbi->feature_compat = le32_to_cpu(dsb->feature_compat); 353 if (erofs_sb_has_sb_chksum(sbi)) { 354 ret = erofs_superblock_csum_verify(sb, data); 355 if (ret) 356 goto out; 357 } 358 359 ret = -EINVAL; 360 blkszbits = dsb->blkszbits; 361 /* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */ 362 if (blkszbits != LOG_BLOCK_SIZE) { 363 erofs_err(sb, "blkszbits %u isn't supported on this platform", 364 blkszbits); 365 goto out; 366 } 367 368 if (!check_layout_compatibility(sb, dsb)) 369 goto out; 370 371 sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE; 372 if (sbi->sb_size > EROFS_BLKSIZ) { 373 erofs_err(sb, "invalid sb_extslots %u (more than a fs block)", 374 sbi->sb_size); 375 goto out; 376 } 377 sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks); 378 sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); 379 #ifdef CONFIG_EROFS_FS_XATTR 380 sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); 381 #endif 382 sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); 383 sbi->root_nid = le16_to_cpu(dsb->root_nid); 384 #ifdef CONFIG_EROFS_FS_ZIP 385 sbi->packed_inode = NULL; 386 if (erofs_sb_has_fragments(sbi) && dsb->packed_nid) { 387 sbi->packed_inode = 388 erofs_iget(sb, le64_to_cpu(dsb->packed_nid)); 389 if (IS_ERR(sbi->packed_inode)) { 390 ret = PTR_ERR(sbi->packed_inode); 391 goto out; 392 } 393 } 394 #endif 395 sbi->inos = le64_to_cpu(dsb->inos); 396 397 sbi->build_time = le64_to_cpu(dsb->build_time); 398 sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec); 399 400 memcpy(&sb->s_uuid, dsb->uuid, sizeof(dsb->uuid)); 401 402 ret = strscpy(sbi->volume_name, dsb->volume_name, 403 sizeof(dsb->volume_name)); 404 if (ret < 0) { /* -E2BIG */ 405 erofs_err(sb, "bad volume name without NIL terminator"); 406 ret = -EFSCORRUPTED; 407 goto out; 408 } 409 410 /* parse on-disk compression configurations */ 411 if (erofs_sb_has_compr_cfgs(sbi)) 412 ret = erofs_load_compr_cfgs(sb, dsb); 413 else 414 ret = z_erofs_load_lz4_config(sb, dsb, NULL, 0); 415 if (ret < 0) 416 goto out; 417 418 /* handle multiple devices */ 419 ret = erofs_scan_devices(sb, dsb); 420 421 if (erofs_sb_has_ztailpacking(sbi)) 422 erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!"); 423 if (erofs_is_fscache_mode(sb)) 424 erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!"); 425 if (erofs_sb_has_fragments(sbi)) 426 erofs_info(sb, "EXPERIMENTAL compressed fragments feature in use. Use at your own risk!"); 427 if (erofs_sb_has_dedupe(sbi)) 428 erofs_info(sb, "EXPERIMENTAL global deduplication feature in use. Use at your own risk!"); 429 out: 430 erofs_put_metabuf(&buf); 431 return ret; 432 } 433 434 /* set up default EROFS parameters */ 435 static void erofs_default_options(struct erofs_fs_context *ctx) 436 { 437 #ifdef CONFIG_EROFS_FS_ZIP 438 ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND; 439 ctx->opt.max_sync_decompress_pages = 3; 440 ctx->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO; 441 #endif 442 #ifdef CONFIG_EROFS_FS_XATTR 443 set_opt(&ctx->opt, XATTR_USER); 444 #endif 445 #ifdef CONFIG_EROFS_FS_POSIX_ACL 446 set_opt(&ctx->opt, POSIX_ACL); 447 #endif 448 } 449 450 enum { 451 Opt_user_xattr, 452 Opt_acl, 453 Opt_cache_strategy, 454 Opt_dax, 455 Opt_dax_enum, 456 Opt_device, 457 Opt_fsid, 458 Opt_domain_id, 459 Opt_err 460 }; 461 462 static const struct constant_table erofs_param_cache_strategy[] = { 463 {"disabled", EROFS_ZIP_CACHE_DISABLED}, 464 {"readahead", EROFS_ZIP_CACHE_READAHEAD}, 465 {"readaround", EROFS_ZIP_CACHE_READAROUND}, 466 {} 467 }; 468 469 static const struct constant_table erofs_dax_param_enums[] = { 470 {"always", EROFS_MOUNT_DAX_ALWAYS}, 471 {"never", EROFS_MOUNT_DAX_NEVER}, 472 {} 473 }; 474 475 static const struct fs_parameter_spec erofs_fs_parameters[] = { 476 fsparam_flag_no("user_xattr", Opt_user_xattr), 477 fsparam_flag_no("acl", Opt_acl), 478 fsparam_enum("cache_strategy", Opt_cache_strategy, 479 erofs_param_cache_strategy), 480 fsparam_flag("dax", Opt_dax), 481 fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums), 482 fsparam_string("device", Opt_device), 483 fsparam_string("fsid", Opt_fsid), 484 fsparam_string("domain_id", Opt_domain_id), 485 {} 486 }; 487 488 static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) 489 { 490 #ifdef CONFIG_FS_DAX 491 struct erofs_fs_context *ctx = fc->fs_private; 492 493 switch (mode) { 494 case EROFS_MOUNT_DAX_ALWAYS: 495 warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); 496 set_opt(&ctx->opt, DAX_ALWAYS); 497 clear_opt(&ctx->opt, DAX_NEVER); 498 return true; 499 case EROFS_MOUNT_DAX_NEVER: 500 set_opt(&ctx->opt, DAX_NEVER); 501 clear_opt(&ctx->opt, DAX_ALWAYS); 502 return true; 503 default: 504 DBG_BUGON(1); 505 return false; 506 } 507 #else 508 errorfc(fc, "dax options not supported"); 509 return false; 510 #endif 511 } 512 513 static int erofs_fc_parse_param(struct fs_context *fc, 514 struct fs_parameter *param) 515 { 516 struct erofs_fs_context *ctx = fc->fs_private; 517 struct fs_parse_result result; 518 struct erofs_device_info *dif; 519 int opt, ret; 520 521 opt = fs_parse(fc, erofs_fs_parameters, param, &result); 522 if (opt < 0) 523 return opt; 524 525 switch (opt) { 526 case Opt_user_xattr: 527 #ifdef CONFIG_EROFS_FS_XATTR 528 if (result.boolean) 529 set_opt(&ctx->opt, XATTR_USER); 530 else 531 clear_opt(&ctx->opt, XATTR_USER); 532 #else 533 errorfc(fc, "{,no}user_xattr options not supported"); 534 #endif 535 break; 536 case Opt_acl: 537 #ifdef CONFIG_EROFS_FS_POSIX_ACL 538 if (result.boolean) 539 set_opt(&ctx->opt, POSIX_ACL); 540 else 541 clear_opt(&ctx->opt, POSIX_ACL); 542 #else 543 errorfc(fc, "{,no}acl options not supported"); 544 #endif 545 break; 546 case Opt_cache_strategy: 547 #ifdef CONFIG_EROFS_FS_ZIP 548 ctx->opt.cache_strategy = result.uint_32; 549 #else 550 errorfc(fc, "compression not supported, cache_strategy ignored"); 551 #endif 552 break; 553 case Opt_dax: 554 if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS)) 555 return -EINVAL; 556 break; 557 case Opt_dax_enum: 558 if (!erofs_fc_set_dax_mode(fc, result.uint_32)) 559 return -EINVAL; 560 break; 561 case Opt_device: 562 dif = kzalloc(sizeof(*dif), GFP_KERNEL); 563 if (!dif) 564 return -ENOMEM; 565 dif->path = kstrdup(param->string, GFP_KERNEL); 566 if (!dif->path) { 567 kfree(dif); 568 return -ENOMEM; 569 } 570 down_write(&ctx->devs->rwsem); 571 ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL); 572 up_write(&ctx->devs->rwsem); 573 if (ret < 0) { 574 kfree(dif->path); 575 kfree(dif); 576 return ret; 577 } 578 ++ctx->devs->extra_devices; 579 break; 580 case Opt_fsid: 581 #ifdef CONFIG_EROFS_FS_ONDEMAND 582 kfree(ctx->fsid); 583 ctx->fsid = kstrdup(param->string, GFP_KERNEL); 584 if (!ctx->fsid) 585 return -ENOMEM; 586 #else 587 errorfc(fc, "fsid option not supported"); 588 #endif 589 break; 590 case Opt_domain_id: 591 #ifdef CONFIG_EROFS_FS_ONDEMAND 592 kfree(ctx->domain_id); 593 ctx->domain_id = kstrdup(param->string, GFP_KERNEL); 594 if (!ctx->domain_id) 595 return -ENOMEM; 596 #else 597 errorfc(fc, "domain_id option not supported"); 598 #endif 599 break; 600 default: 601 return -ENOPARAM; 602 } 603 return 0; 604 } 605 606 #ifdef CONFIG_EROFS_FS_ZIP 607 static const struct address_space_operations managed_cache_aops; 608 609 static bool erofs_managed_cache_release_folio(struct folio *folio, gfp_t gfp) 610 { 611 bool ret = true; 612 struct address_space *const mapping = folio->mapping; 613 614 DBG_BUGON(!folio_test_locked(folio)); 615 DBG_BUGON(mapping->a_ops != &managed_cache_aops); 616 617 if (folio_test_private(folio)) 618 ret = erofs_try_to_free_cached_page(&folio->page); 619 620 return ret; 621 } 622 623 /* 624 * It will be called only on inode eviction. In case that there are still some 625 * decompression requests in progress, wait with rescheduling for a bit here. 626 * We could introduce an extra locking instead but it seems unnecessary. 627 */ 628 static void erofs_managed_cache_invalidate_folio(struct folio *folio, 629 size_t offset, size_t length) 630 { 631 const size_t stop = length + offset; 632 633 DBG_BUGON(!folio_test_locked(folio)); 634 635 /* Check for potential overflow in debug mode */ 636 DBG_BUGON(stop > folio_size(folio) || stop < length); 637 638 if (offset == 0 && stop == folio_size(folio)) 639 while (!erofs_managed_cache_release_folio(folio, GFP_NOFS)) 640 cond_resched(); 641 } 642 643 static const struct address_space_operations managed_cache_aops = { 644 .release_folio = erofs_managed_cache_release_folio, 645 .invalidate_folio = erofs_managed_cache_invalidate_folio, 646 }; 647 648 static int erofs_init_managed_cache(struct super_block *sb) 649 { 650 struct erofs_sb_info *const sbi = EROFS_SB(sb); 651 struct inode *const inode = new_inode(sb); 652 653 if (!inode) 654 return -ENOMEM; 655 656 set_nlink(inode, 1); 657 inode->i_size = OFFSET_MAX; 658 659 inode->i_mapping->a_ops = &managed_cache_aops; 660 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 661 sbi->managed_cache = inode; 662 return 0; 663 } 664 #else 665 static int erofs_init_managed_cache(struct super_block *sb) { return 0; } 666 #endif 667 668 static struct inode *erofs_nfs_get_inode(struct super_block *sb, 669 u64 ino, u32 generation) 670 { 671 return erofs_iget(sb, ino); 672 } 673 674 static struct dentry *erofs_fh_to_dentry(struct super_block *sb, 675 struct fid *fid, int fh_len, int fh_type) 676 { 677 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 678 erofs_nfs_get_inode); 679 } 680 681 static struct dentry *erofs_fh_to_parent(struct super_block *sb, 682 struct fid *fid, int fh_len, int fh_type) 683 { 684 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 685 erofs_nfs_get_inode); 686 } 687 688 static struct dentry *erofs_get_parent(struct dentry *child) 689 { 690 erofs_nid_t nid; 691 unsigned int d_type; 692 int err; 693 694 err = erofs_namei(d_inode(child), &dotdot_name, &nid, &d_type); 695 if (err) 696 return ERR_PTR(err); 697 return d_obtain_alias(erofs_iget(child->d_sb, nid)); 698 } 699 700 static const struct export_operations erofs_export_ops = { 701 .fh_to_dentry = erofs_fh_to_dentry, 702 .fh_to_parent = erofs_fh_to_parent, 703 .get_parent = erofs_get_parent, 704 }; 705 706 static int erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc) 707 { 708 static const struct tree_descr empty_descr = {""}; 709 710 return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr); 711 } 712 713 static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) 714 { 715 struct inode *inode; 716 struct erofs_sb_info *sbi; 717 struct erofs_fs_context *ctx = fc->fs_private; 718 int err; 719 720 sb->s_magic = EROFS_SUPER_MAGIC; 721 sb->s_flags |= SB_RDONLY | SB_NOATIME; 722 sb->s_maxbytes = MAX_LFS_FILESIZE; 723 sb->s_op = &erofs_sops; 724 725 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 726 if (!sbi) 727 return -ENOMEM; 728 729 sb->s_fs_info = sbi; 730 sbi->opt = ctx->opt; 731 sbi->devs = ctx->devs; 732 ctx->devs = NULL; 733 sbi->fsid = ctx->fsid; 734 ctx->fsid = NULL; 735 sbi->domain_id = ctx->domain_id; 736 ctx->domain_id = NULL; 737 738 if (erofs_is_fscache_mode(sb)) { 739 sb->s_blocksize = EROFS_BLKSIZ; 740 sb->s_blocksize_bits = LOG_BLOCK_SIZE; 741 742 err = erofs_fscache_register_fs(sb); 743 if (err) 744 return err; 745 746 err = super_setup_bdi(sb); 747 if (err) 748 return err; 749 } else { 750 if (!sb_set_blocksize(sb, EROFS_BLKSIZ)) { 751 erofs_err(sb, "failed to set erofs blksize"); 752 return -EINVAL; 753 } 754 755 sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev, 756 &sbi->dax_part_off, 757 NULL, NULL); 758 } 759 760 err = erofs_read_superblock(sb); 761 if (err) 762 return err; 763 764 if (test_opt(&sbi->opt, DAX_ALWAYS)) { 765 BUILD_BUG_ON(EROFS_BLKSIZ != PAGE_SIZE); 766 767 if (!sbi->dax_dev) { 768 errorfc(fc, "DAX unsupported by block device. Turning off DAX."); 769 clear_opt(&sbi->opt, DAX_ALWAYS); 770 } 771 } 772 773 sb->s_time_gran = 1; 774 sb->s_xattr = erofs_xattr_handlers; 775 sb->s_export_op = &erofs_export_ops; 776 777 if (test_opt(&sbi->opt, POSIX_ACL)) 778 sb->s_flags |= SB_POSIXACL; 779 else 780 sb->s_flags &= ~SB_POSIXACL; 781 782 #ifdef CONFIG_EROFS_FS_ZIP 783 xa_init(&sbi->managed_pslots); 784 #endif 785 786 /* get the root inode */ 787 inode = erofs_iget(sb, ROOT_NID(sbi)); 788 if (IS_ERR(inode)) 789 return PTR_ERR(inode); 790 791 if (!S_ISDIR(inode->i_mode)) { 792 erofs_err(sb, "rootino(nid %llu) is not a directory(i_mode %o)", 793 ROOT_NID(sbi), inode->i_mode); 794 iput(inode); 795 return -EINVAL; 796 } 797 798 sb->s_root = d_make_root(inode); 799 if (!sb->s_root) 800 return -ENOMEM; 801 802 erofs_shrinker_register(sb); 803 /* sb->s_umount is already locked, SB_ACTIVE and SB_BORN are not set */ 804 err = erofs_init_managed_cache(sb); 805 if (err) 806 return err; 807 808 err = erofs_register_sysfs(sb); 809 if (err) 810 return err; 811 812 erofs_info(sb, "mounted with root inode @ nid %llu.", ROOT_NID(sbi)); 813 return 0; 814 } 815 816 static int erofs_fc_anon_get_tree(struct fs_context *fc) 817 { 818 return get_tree_nodev(fc, erofs_fc_fill_pseudo_super); 819 } 820 821 static int erofs_fc_get_tree(struct fs_context *fc) 822 { 823 struct erofs_fs_context *ctx = fc->fs_private; 824 825 if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->fsid) 826 return get_tree_nodev(fc, erofs_fc_fill_super); 827 828 return get_tree_bdev(fc, erofs_fc_fill_super); 829 } 830 831 static int erofs_fc_reconfigure(struct fs_context *fc) 832 { 833 struct super_block *sb = fc->root->d_sb; 834 struct erofs_sb_info *sbi = EROFS_SB(sb); 835 struct erofs_fs_context *ctx = fc->fs_private; 836 837 DBG_BUGON(!sb_rdonly(sb)); 838 839 if (ctx->fsid || ctx->domain_id) 840 erofs_info(sb, "ignoring reconfiguration for fsid|domain_id."); 841 842 if (test_opt(&ctx->opt, POSIX_ACL)) 843 fc->sb_flags |= SB_POSIXACL; 844 else 845 fc->sb_flags &= ~SB_POSIXACL; 846 847 sbi->opt = ctx->opt; 848 849 fc->sb_flags |= SB_RDONLY; 850 return 0; 851 } 852 853 static int erofs_release_device_info(int id, void *ptr, void *data) 854 { 855 struct erofs_device_info *dif = ptr; 856 857 fs_put_dax(dif->dax_dev, NULL); 858 if (dif->bdev) 859 blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL); 860 erofs_fscache_unregister_cookie(dif->fscache); 861 dif->fscache = NULL; 862 kfree(dif->path); 863 kfree(dif); 864 return 0; 865 } 866 867 static void erofs_free_dev_context(struct erofs_dev_context *devs) 868 { 869 if (!devs) 870 return; 871 idr_for_each(&devs->tree, &erofs_release_device_info, NULL); 872 idr_destroy(&devs->tree); 873 kfree(devs); 874 } 875 876 static void erofs_fc_free(struct fs_context *fc) 877 { 878 struct erofs_fs_context *ctx = fc->fs_private; 879 880 erofs_free_dev_context(ctx->devs); 881 kfree(ctx->fsid); 882 kfree(ctx->domain_id); 883 kfree(ctx); 884 } 885 886 static const struct fs_context_operations erofs_context_ops = { 887 .parse_param = erofs_fc_parse_param, 888 .get_tree = erofs_fc_get_tree, 889 .reconfigure = erofs_fc_reconfigure, 890 .free = erofs_fc_free, 891 }; 892 893 static const struct fs_context_operations erofs_anon_context_ops = { 894 .get_tree = erofs_fc_anon_get_tree, 895 }; 896 897 static int erofs_init_fs_context(struct fs_context *fc) 898 { 899 struct erofs_fs_context *ctx; 900 901 /* pseudo mount for anon inodes */ 902 if (fc->sb_flags & SB_KERNMOUNT) { 903 fc->ops = &erofs_anon_context_ops; 904 return 0; 905 } 906 907 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 908 if (!ctx) 909 return -ENOMEM; 910 ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL); 911 if (!ctx->devs) { 912 kfree(ctx); 913 return -ENOMEM; 914 } 915 fc->fs_private = ctx; 916 917 idr_init(&ctx->devs->tree); 918 init_rwsem(&ctx->devs->rwsem); 919 erofs_default_options(ctx); 920 fc->ops = &erofs_context_ops; 921 return 0; 922 } 923 924 /* 925 * could be triggered after deactivate_locked_super() 926 * is called, thus including umount and failed to initialize. 927 */ 928 static void erofs_kill_sb(struct super_block *sb) 929 { 930 struct erofs_sb_info *sbi; 931 932 WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC); 933 934 /* pseudo mount for anon inodes */ 935 if (sb->s_flags & SB_KERNMOUNT) { 936 kill_anon_super(sb); 937 return; 938 } 939 940 if (erofs_is_fscache_mode(sb)) 941 kill_anon_super(sb); 942 else 943 kill_block_super(sb); 944 945 sbi = EROFS_SB(sb); 946 if (!sbi) 947 return; 948 949 erofs_free_dev_context(sbi->devs); 950 fs_put_dax(sbi->dax_dev, NULL); 951 erofs_fscache_unregister_fs(sb); 952 kfree(sbi->fsid); 953 kfree(sbi->domain_id); 954 kfree(sbi); 955 sb->s_fs_info = NULL; 956 } 957 958 /* called when ->s_root is non-NULL */ 959 static void erofs_put_super(struct super_block *sb) 960 { 961 struct erofs_sb_info *const sbi = EROFS_SB(sb); 962 963 DBG_BUGON(!sbi); 964 965 erofs_unregister_sysfs(sb); 966 erofs_shrinker_unregister(sb); 967 #ifdef CONFIG_EROFS_FS_ZIP 968 iput(sbi->managed_cache); 969 sbi->managed_cache = NULL; 970 iput(sbi->packed_inode); 971 sbi->packed_inode = NULL; 972 #endif 973 erofs_fscache_unregister_fs(sb); 974 } 975 976 struct file_system_type erofs_fs_type = { 977 .owner = THIS_MODULE, 978 .name = "erofs", 979 .init_fs_context = erofs_init_fs_context, 980 .kill_sb = erofs_kill_sb, 981 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, 982 }; 983 MODULE_ALIAS_FS("erofs"); 984 985 static int __init erofs_module_init(void) 986 { 987 int err; 988 989 erofs_check_ondisk_layout_definitions(); 990 991 erofs_inode_cachep = kmem_cache_create("erofs_inode", 992 sizeof(struct erofs_inode), 0, 993 SLAB_RECLAIM_ACCOUNT, 994 erofs_inode_init_once); 995 if (!erofs_inode_cachep) { 996 err = -ENOMEM; 997 goto icache_err; 998 } 999 1000 err = erofs_init_shrinker(); 1001 if (err) 1002 goto shrinker_err; 1003 1004 err = z_erofs_lzma_init(); 1005 if (err) 1006 goto lzma_err; 1007 1008 erofs_pcpubuf_init(); 1009 err = z_erofs_init_zip_subsystem(); 1010 if (err) 1011 goto zip_err; 1012 1013 err = erofs_init_sysfs(); 1014 if (err) 1015 goto sysfs_err; 1016 1017 err = register_filesystem(&erofs_fs_type); 1018 if (err) 1019 goto fs_err; 1020 1021 return 0; 1022 1023 fs_err: 1024 erofs_exit_sysfs(); 1025 sysfs_err: 1026 z_erofs_exit_zip_subsystem(); 1027 zip_err: 1028 z_erofs_lzma_exit(); 1029 lzma_err: 1030 erofs_exit_shrinker(); 1031 shrinker_err: 1032 kmem_cache_destroy(erofs_inode_cachep); 1033 icache_err: 1034 return err; 1035 } 1036 1037 static void __exit erofs_module_exit(void) 1038 { 1039 unregister_filesystem(&erofs_fs_type); 1040 1041 /* Ensure all RCU free inodes / pclusters are safe to be destroyed. */ 1042 rcu_barrier(); 1043 1044 erofs_exit_sysfs(); 1045 z_erofs_exit_zip_subsystem(); 1046 z_erofs_lzma_exit(); 1047 erofs_exit_shrinker(); 1048 kmem_cache_destroy(erofs_inode_cachep); 1049 erofs_pcpubuf_exit(); 1050 } 1051 1052 /* get filesystem statistics */ 1053 static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) 1054 { 1055 struct super_block *sb = dentry->d_sb; 1056 struct erofs_sb_info *sbi = EROFS_SB(sb); 1057 u64 id = 0; 1058 1059 if (!erofs_is_fscache_mode(sb)) 1060 id = huge_encode_dev(sb->s_bdev->bd_dev); 1061 1062 buf->f_type = sb->s_magic; 1063 buf->f_bsize = EROFS_BLKSIZ; 1064 buf->f_blocks = sbi->total_blocks; 1065 buf->f_bfree = buf->f_bavail = 0; 1066 1067 buf->f_files = ULLONG_MAX; 1068 buf->f_ffree = ULLONG_MAX - sbi->inos; 1069 1070 buf->f_namelen = EROFS_NAME_LEN; 1071 1072 buf->f_fsid = u64_to_fsid(id); 1073 return 0; 1074 } 1075 1076 static int erofs_show_options(struct seq_file *seq, struct dentry *root) 1077 { 1078 struct erofs_sb_info *sbi = EROFS_SB(root->d_sb); 1079 struct erofs_mount_opts *opt = &sbi->opt; 1080 1081 #ifdef CONFIG_EROFS_FS_XATTR 1082 if (test_opt(opt, XATTR_USER)) 1083 seq_puts(seq, ",user_xattr"); 1084 else 1085 seq_puts(seq, ",nouser_xattr"); 1086 #endif 1087 #ifdef CONFIG_EROFS_FS_POSIX_ACL 1088 if (test_opt(opt, POSIX_ACL)) 1089 seq_puts(seq, ",acl"); 1090 else 1091 seq_puts(seq, ",noacl"); 1092 #endif 1093 #ifdef CONFIG_EROFS_FS_ZIP 1094 if (opt->cache_strategy == EROFS_ZIP_CACHE_DISABLED) 1095 seq_puts(seq, ",cache_strategy=disabled"); 1096 else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAHEAD) 1097 seq_puts(seq, ",cache_strategy=readahead"); 1098 else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAROUND) 1099 seq_puts(seq, ",cache_strategy=readaround"); 1100 #endif 1101 if (test_opt(opt, DAX_ALWAYS)) 1102 seq_puts(seq, ",dax=always"); 1103 if (test_opt(opt, DAX_NEVER)) 1104 seq_puts(seq, ",dax=never"); 1105 #ifdef CONFIG_EROFS_FS_ONDEMAND 1106 if (sbi->fsid) 1107 seq_printf(seq, ",fsid=%s", sbi->fsid); 1108 if (sbi->domain_id) 1109 seq_printf(seq, ",domain_id=%s", sbi->domain_id); 1110 #endif 1111 return 0; 1112 } 1113 1114 const struct super_operations erofs_sops = { 1115 .put_super = erofs_put_super, 1116 .alloc_inode = erofs_alloc_inode, 1117 .free_inode = erofs_free_inode, 1118 .statfs = erofs_statfs, 1119 .show_options = erofs_show_options, 1120 }; 1121 1122 module_init(erofs_module_init); 1123 module_exit(erofs_module_exit); 1124 1125 MODULE_DESCRIPTION("Enhanced ROM File System"); 1126 MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc."); 1127 MODULE_LICENSE("GPL"); 1128