1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017-2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 * Copyright (C) 2021, Alibaba Cloud 6 */ 7 #include <linux/module.h> 8 #include <linux/buffer_head.h> 9 #include <linux/statfs.h> 10 #include <linux/parser.h> 11 #include <linux/seq_file.h> 12 #include <linux/crc32c.h> 13 #include <linux/fs_context.h> 14 #include <linux/fs_parser.h> 15 #include <linux/dax.h> 16 #include <linux/exportfs.h> 17 #include "xattr.h" 18 19 #define CREATE_TRACE_POINTS 20 #include <trace/events/erofs.h> 21 22 static struct kmem_cache *erofs_inode_cachep __read_mostly; 23 24 void _erofs_err(struct super_block *sb, const char *function, 25 const char *fmt, ...) 26 { 27 struct va_format vaf; 28 va_list args; 29 30 va_start(args, fmt); 31 32 vaf.fmt = fmt; 33 vaf.va = &args; 34 35 pr_err("(device %s): %s: %pV", sb->s_id, function, &vaf); 36 va_end(args); 37 } 38 39 void _erofs_info(struct super_block *sb, const char *function, 40 const char *fmt, ...) 41 { 42 struct va_format vaf; 43 va_list args; 44 45 va_start(args, fmt); 46 47 vaf.fmt = fmt; 48 vaf.va = &args; 49 50 pr_info("(device %s): %pV", sb->s_id, &vaf); 51 va_end(args); 52 } 53 54 static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata) 55 { 56 struct erofs_super_block *dsb; 57 u32 expected_crc, crc; 58 59 dsb = kmemdup(sbdata + EROFS_SUPER_OFFSET, 60 EROFS_BLKSIZ - EROFS_SUPER_OFFSET, GFP_KERNEL); 61 if (!dsb) 62 return -ENOMEM; 63 64 expected_crc = le32_to_cpu(dsb->checksum); 65 dsb->checksum = 0; 66 /* to allow for x86 boot sectors and other oddities. */ 67 crc = crc32c(~0, dsb, EROFS_BLKSIZ - EROFS_SUPER_OFFSET); 68 kfree(dsb); 69 70 if (crc != expected_crc) { 71 erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected", 72 crc, expected_crc); 73 return -EBADMSG; 74 } 75 return 0; 76 } 77 78 static void erofs_inode_init_once(void *ptr) 79 { 80 struct erofs_inode *vi = ptr; 81 82 inode_init_once(&vi->vfs_inode); 83 } 84 85 static struct inode *erofs_alloc_inode(struct super_block *sb) 86 { 87 struct erofs_inode *vi = 88 alloc_inode_sb(sb, erofs_inode_cachep, GFP_KERNEL); 89 90 if (!vi) 91 return NULL; 92 93 /* zero out everything except vfs_inode */ 94 memset(vi, 0, offsetof(struct erofs_inode, vfs_inode)); 95 return &vi->vfs_inode; 96 } 97 98 static void erofs_free_inode(struct inode *inode) 99 { 100 struct erofs_inode *vi = EROFS_I(inode); 101 102 /* be careful of RCU symlink path */ 103 if (inode->i_op == &erofs_fast_symlink_iops) 104 kfree(inode->i_link); 105 kfree(vi->xattr_shared_xattrs); 106 107 kmem_cache_free(erofs_inode_cachep, vi); 108 } 109 110 static bool check_layout_compatibility(struct super_block *sb, 111 struct erofs_super_block *dsb) 112 { 113 const unsigned int feature = le32_to_cpu(dsb->feature_incompat); 114 115 EROFS_SB(sb)->feature_incompat = feature; 116 117 /* check if current kernel meets all mandatory requirements */ 118 if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) { 119 erofs_err(sb, 120 "unidentified incompatible feature %x, please upgrade kernel version", 121 feature & ~EROFS_ALL_FEATURE_INCOMPAT); 122 return false; 123 } 124 return true; 125 } 126 127 #ifdef CONFIG_EROFS_FS_ZIP 128 /* read variable-sized metadata, offset will be aligned by 4-byte */ 129 static void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, 130 erofs_off_t *offset, int *lengthp) 131 { 132 u8 *buffer, *ptr; 133 int len, i, cnt; 134 135 *offset = round_up(*offset, 4); 136 ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*offset), EROFS_KMAP); 137 if (IS_ERR(ptr)) 138 return ptr; 139 140 len = le16_to_cpu(*(__le16 *)&ptr[erofs_blkoff(*offset)]); 141 if (!len) 142 len = U16_MAX + 1; 143 buffer = kmalloc(len, GFP_KERNEL); 144 if (!buffer) 145 return ERR_PTR(-ENOMEM); 146 *offset += sizeof(__le16); 147 *lengthp = len; 148 149 for (i = 0; i < len; i += cnt) { 150 cnt = min(EROFS_BLKSIZ - (int)erofs_blkoff(*offset), len - i); 151 ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*offset), 152 EROFS_KMAP); 153 if (IS_ERR(ptr)) { 154 kfree(buffer); 155 return ptr; 156 } 157 memcpy(buffer + i, ptr + erofs_blkoff(*offset), cnt); 158 *offset += cnt; 159 } 160 return buffer; 161 } 162 163 static int erofs_load_compr_cfgs(struct super_block *sb, 164 struct erofs_super_block *dsb) 165 { 166 struct erofs_sb_info *sbi = EROFS_SB(sb); 167 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 168 unsigned int algs, alg; 169 erofs_off_t offset; 170 int size, ret = 0; 171 172 sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); 173 if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { 174 erofs_err(sb, "try to load compressed fs with unsupported algorithms %x", 175 sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); 176 return -EINVAL; 177 } 178 179 offset = EROFS_SUPER_OFFSET + sbi->sb_size; 180 alg = 0; 181 for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { 182 void *data; 183 184 if (!(algs & 1)) 185 continue; 186 187 data = erofs_read_metadata(sb, &buf, &offset, &size); 188 if (IS_ERR(data)) { 189 ret = PTR_ERR(data); 190 break; 191 } 192 193 switch (alg) { 194 case Z_EROFS_COMPRESSION_LZ4: 195 ret = z_erofs_load_lz4_config(sb, dsb, data, size); 196 break; 197 case Z_EROFS_COMPRESSION_LZMA: 198 ret = z_erofs_load_lzma_config(sb, dsb, data, size); 199 break; 200 default: 201 DBG_BUGON(1); 202 ret = -EFAULT; 203 } 204 kfree(data); 205 if (ret) 206 break; 207 } 208 erofs_put_metabuf(&buf); 209 return ret; 210 } 211 #else 212 static int erofs_load_compr_cfgs(struct super_block *sb, 213 struct erofs_super_block *dsb) 214 { 215 if (dsb->u1.available_compr_algs) { 216 erofs_err(sb, "try to load compressed fs when compression is disabled"); 217 return -EINVAL; 218 } 219 return 0; 220 } 221 #endif 222 223 static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, 224 struct erofs_device_info *dif, erofs_off_t *pos) 225 { 226 struct erofs_sb_info *sbi = EROFS_SB(sb); 227 struct erofs_fscache *fscache; 228 struct erofs_deviceslot *dis; 229 struct block_device *bdev; 230 void *ptr; 231 232 ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*pos), EROFS_KMAP); 233 if (IS_ERR(ptr)) 234 return PTR_ERR(ptr); 235 dis = ptr + erofs_blkoff(*pos); 236 237 if (!dif->path) { 238 if (!dis->tag[0]) { 239 erofs_err(sb, "empty device tag @ pos %llu", *pos); 240 return -EINVAL; 241 } 242 dif->path = kmemdup_nul(dis->tag, sizeof(dis->tag), GFP_KERNEL); 243 if (!dif->path) 244 return -ENOMEM; 245 } 246 247 if (erofs_is_fscache_mode(sb)) { 248 fscache = erofs_fscache_register_cookie(sb, dif->path, false); 249 if (IS_ERR(fscache)) 250 return PTR_ERR(fscache); 251 dif->fscache = fscache; 252 } else { 253 bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL, 254 sb->s_type); 255 if (IS_ERR(bdev)) 256 return PTR_ERR(bdev); 257 dif->bdev = bdev; 258 dif->dax_dev = fs_dax_get_by_bdev(bdev, &dif->dax_part_off, 259 NULL, NULL); 260 } 261 262 dif->blocks = le32_to_cpu(dis->blocks); 263 dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr); 264 sbi->total_blocks += dif->blocks; 265 *pos += EROFS_DEVT_SLOT_SIZE; 266 return 0; 267 } 268 269 static int erofs_scan_devices(struct super_block *sb, 270 struct erofs_super_block *dsb) 271 { 272 struct erofs_sb_info *sbi = EROFS_SB(sb); 273 unsigned int ondisk_extradevs; 274 erofs_off_t pos; 275 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 276 struct erofs_device_info *dif; 277 int id, err = 0; 278 279 sbi->total_blocks = sbi->primarydevice_blocks; 280 if (!erofs_sb_has_device_table(sbi)) 281 ondisk_extradevs = 0; 282 else 283 ondisk_extradevs = le16_to_cpu(dsb->extra_devices); 284 285 if (sbi->devs->extra_devices && 286 ondisk_extradevs != sbi->devs->extra_devices) { 287 erofs_err(sb, "extra devices don't match (ondisk %u, given %u)", 288 ondisk_extradevs, sbi->devs->extra_devices); 289 return -EINVAL; 290 } 291 if (!ondisk_extradevs) 292 return 0; 293 294 sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1; 295 pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE; 296 down_read(&sbi->devs->rwsem); 297 if (sbi->devs->extra_devices) { 298 idr_for_each_entry(&sbi->devs->tree, dif, id) { 299 err = erofs_init_device(&buf, sb, dif, &pos); 300 if (err) 301 break; 302 } 303 } else { 304 for (id = 0; id < ondisk_extradevs; id++) { 305 dif = kzalloc(sizeof(*dif), GFP_KERNEL); 306 if (!dif) { 307 err = -ENOMEM; 308 break; 309 } 310 311 err = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL); 312 if (err < 0) { 313 kfree(dif); 314 break; 315 } 316 ++sbi->devs->extra_devices; 317 318 err = erofs_init_device(&buf, sb, dif, &pos); 319 if (err) 320 break; 321 } 322 } 323 up_read(&sbi->devs->rwsem); 324 erofs_put_metabuf(&buf); 325 return err; 326 } 327 328 static int erofs_read_superblock(struct super_block *sb) 329 { 330 struct erofs_sb_info *sbi; 331 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 332 struct erofs_super_block *dsb; 333 unsigned int blkszbits; 334 void *data; 335 int ret; 336 337 data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP); 338 if (IS_ERR(data)) { 339 erofs_err(sb, "cannot read erofs superblock"); 340 return PTR_ERR(data); 341 } 342 343 sbi = EROFS_SB(sb); 344 dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); 345 346 ret = -EINVAL; 347 if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) { 348 erofs_err(sb, "cannot find valid erofs superblock"); 349 goto out; 350 } 351 352 sbi->feature_compat = le32_to_cpu(dsb->feature_compat); 353 if (erofs_sb_has_sb_chksum(sbi)) { 354 ret = erofs_superblock_csum_verify(sb, data); 355 if (ret) 356 goto out; 357 } 358 359 ret = -EINVAL; 360 blkszbits = dsb->blkszbits; 361 /* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */ 362 if (blkszbits != LOG_BLOCK_SIZE) { 363 erofs_err(sb, "blkszbits %u isn't supported on this platform", 364 blkszbits); 365 goto out; 366 } 367 368 if (!check_layout_compatibility(sb, dsb)) 369 goto out; 370 371 sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE; 372 if (sbi->sb_size > EROFS_BLKSIZ) { 373 erofs_err(sb, "invalid sb_extslots %u (more than a fs block)", 374 sbi->sb_size); 375 goto out; 376 } 377 sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks); 378 sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); 379 #ifdef CONFIG_EROFS_FS_XATTR 380 sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); 381 #endif 382 sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); 383 sbi->root_nid = le16_to_cpu(dsb->root_nid); 384 #ifdef CONFIG_EROFS_FS_ZIP 385 sbi->packed_inode = NULL; 386 if (erofs_sb_has_fragments(sbi) && dsb->packed_nid) { 387 sbi->packed_inode = 388 erofs_iget(sb, le64_to_cpu(dsb->packed_nid)); 389 if (IS_ERR(sbi->packed_inode)) { 390 ret = PTR_ERR(sbi->packed_inode); 391 goto out; 392 } 393 } 394 #endif 395 sbi->inos = le64_to_cpu(dsb->inos); 396 397 sbi->build_time = le64_to_cpu(dsb->build_time); 398 sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec); 399 400 memcpy(&sb->s_uuid, dsb->uuid, sizeof(dsb->uuid)); 401 402 ret = strscpy(sbi->volume_name, dsb->volume_name, 403 sizeof(dsb->volume_name)); 404 if (ret < 0) { /* -E2BIG */ 405 erofs_err(sb, "bad volume name without NIL terminator"); 406 ret = -EFSCORRUPTED; 407 goto out; 408 } 409 410 /* parse on-disk compression configurations */ 411 if (erofs_sb_has_compr_cfgs(sbi)) 412 ret = erofs_load_compr_cfgs(sb, dsb); 413 else 414 ret = z_erofs_load_lz4_config(sb, dsb, NULL, 0); 415 if (ret < 0) 416 goto out; 417 418 /* handle multiple devices */ 419 ret = erofs_scan_devices(sb, dsb); 420 421 if (erofs_sb_has_ztailpacking(sbi)) 422 erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!"); 423 if (erofs_is_fscache_mode(sb)) 424 erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!"); 425 if (erofs_sb_has_fragments(sbi)) 426 erofs_info(sb, "EXPERIMENTAL compressed fragments feature in use. Use at your own risk!"); 427 if (erofs_sb_has_dedupe(sbi)) 428 erofs_info(sb, "EXPERIMENTAL global deduplication feature in use. Use at your own risk!"); 429 out: 430 erofs_put_metabuf(&buf); 431 return ret; 432 } 433 434 /* set up default EROFS parameters */ 435 static void erofs_default_options(struct erofs_fs_context *ctx) 436 { 437 #ifdef CONFIG_EROFS_FS_ZIP 438 ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND; 439 ctx->opt.max_sync_decompress_pages = 3; 440 ctx->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO; 441 #endif 442 #ifdef CONFIG_EROFS_FS_XATTR 443 set_opt(&ctx->opt, XATTR_USER); 444 #endif 445 #ifdef CONFIG_EROFS_FS_POSIX_ACL 446 set_opt(&ctx->opt, POSIX_ACL); 447 #endif 448 } 449 450 enum { 451 Opt_user_xattr, 452 Opt_acl, 453 Opt_cache_strategy, 454 Opt_dax, 455 Opt_dax_enum, 456 Opt_device, 457 Opt_fsid, 458 Opt_domain_id, 459 Opt_err 460 }; 461 462 static const struct constant_table erofs_param_cache_strategy[] = { 463 {"disabled", EROFS_ZIP_CACHE_DISABLED}, 464 {"readahead", EROFS_ZIP_CACHE_READAHEAD}, 465 {"readaround", EROFS_ZIP_CACHE_READAROUND}, 466 {} 467 }; 468 469 static const struct constant_table erofs_dax_param_enums[] = { 470 {"always", EROFS_MOUNT_DAX_ALWAYS}, 471 {"never", EROFS_MOUNT_DAX_NEVER}, 472 {} 473 }; 474 475 static const struct fs_parameter_spec erofs_fs_parameters[] = { 476 fsparam_flag_no("user_xattr", Opt_user_xattr), 477 fsparam_flag_no("acl", Opt_acl), 478 fsparam_enum("cache_strategy", Opt_cache_strategy, 479 erofs_param_cache_strategy), 480 fsparam_flag("dax", Opt_dax), 481 fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums), 482 fsparam_string("device", Opt_device), 483 fsparam_string("fsid", Opt_fsid), 484 fsparam_string("domain_id", Opt_domain_id), 485 {} 486 }; 487 488 static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) 489 { 490 #ifdef CONFIG_FS_DAX 491 struct erofs_fs_context *ctx = fc->fs_private; 492 493 switch (mode) { 494 case EROFS_MOUNT_DAX_ALWAYS: 495 warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); 496 set_opt(&ctx->opt, DAX_ALWAYS); 497 clear_opt(&ctx->opt, DAX_NEVER); 498 return true; 499 case EROFS_MOUNT_DAX_NEVER: 500 set_opt(&ctx->opt, DAX_NEVER); 501 clear_opt(&ctx->opt, DAX_ALWAYS); 502 return true; 503 default: 504 DBG_BUGON(1); 505 return false; 506 } 507 #else 508 errorfc(fc, "dax options not supported"); 509 return false; 510 #endif 511 } 512 513 static int erofs_fc_parse_param(struct fs_context *fc, 514 struct fs_parameter *param) 515 { 516 struct erofs_fs_context *ctx = fc->fs_private; 517 struct fs_parse_result result; 518 struct erofs_device_info *dif; 519 int opt, ret; 520 521 opt = fs_parse(fc, erofs_fs_parameters, param, &result); 522 if (opt < 0) 523 return opt; 524 525 switch (opt) { 526 case Opt_user_xattr: 527 #ifdef CONFIG_EROFS_FS_XATTR 528 if (result.boolean) 529 set_opt(&ctx->opt, XATTR_USER); 530 else 531 clear_opt(&ctx->opt, XATTR_USER); 532 #else 533 errorfc(fc, "{,no}user_xattr options not supported"); 534 #endif 535 break; 536 case Opt_acl: 537 #ifdef CONFIG_EROFS_FS_POSIX_ACL 538 if (result.boolean) 539 set_opt(&ctx->opt, POSIX_ACL); 540 else 541 clear_opt(&ctx->opt, POSIX_ACL); 542 #else 543 errorfc(fc, "{,no}acl options not supported"); 544 #endif 545 break; 546 case Opt_cache_strategy: 547 #ifdef CONFIG_EROFS_FS_ZIP 548 ctx->opt.cache_strategy = result.uint_32; 549 #else 550 errorfc(fc, "compression not supported, cache_strategy ignored"); 551 #endif 552 break; 553 case Opt_dax: 554 if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS)) 555 return -EINVAL; 556 break; 557 case Opt_dax_enum: 558 if (!erofs_fc_set_dax_mode(fc, result.uint_32)) 559 return -EINVAL; 560 break; 561 case Opt_device: 562 dif = kzalloc(sizeof(*dif), GFP_KERNEL); 563 if (!dif) 564 return -ENOMEM; 565 dif->path = kstrdup(param->string, GFP_KERNEL); 566 if (!dif->path) { 567 kfree(dif); 568 return -ENOMEM; 569 } 570 down_write(&ctx->devs->rwsem); 571 ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL); 572 up_write(&ctx->devs->rwsem); 573 if (ret < 0) { 574 kfree(dif->path); 575 kfree(dif); 576 return ret; 577 } 578 ++ctx->devs->extra_devices; 579 break; 580 case Opt_fsid: 581 #ifdef CONFIG_EROFS_FS_ONDEMAND 582 kfree(ctx->opt.fsid); 583 ctx->opt.fsid = kstrdup(param->string, GFP_KERNEL); 584 if (!ctx->opt.fsid) 585 return -ENOMEM; 586 #else 587 errorfc(fc, "fsid option not supported"); 588 #endif 589 break; 590 case Opt_domain_id: 591 #ifdef CONFIG_EROFS_FS_ONDEMAND 592 kfree(ctx->opt.domain_id); 593 ctx->opt.domain_id = kstrdup(param->string, GFP_KERNEL); 594 if (!ctx->opt.domain_id) 595 return -ENOMEM; 596 #else 597 errorfc(fc, "domain_id option not supported"); 598 #endif 599 break; 600 default: 601 return -ENOPARAM; 602 } 603 return 0; 604 } 605 606 #ifdef CONFIG_EROFS_FS_ZIP 607 static const struct address_space_operations managed_cache_aops; 608 609 static bool erofs_managed_cache_release_folio(struct folio *folio, gfp_t gfp) 610 { 611 bool ret = true; 612 struct address_space *const mapping = folio->mapping; 613 614 DBG_BUGON(!folio_test_locked(folio)); 615 DBG_BUGON(mapping->a_ops != &managed_cache_aops); 616 617 if (folio_test_private(folio)) 618 ret = erofs_try_to_free_cached_page(&folio->page); 619 620 return ret; 621 } 622 623 /* 624 * It will be called only on inode eviction. In case that there are still some 625 * decompression requests in progress, wait with rescheduling for a bit here. 626 * We could introduce an extra locking instead but it seems unnecessary. 627 */ 628 static void erofs_managed_cache_invalidate_folio(struct folio *folio, 629 size_t offset, size_t length) 630 { 631 const size_t stop = length + offset; 632 633 DBG_BUGON(!folio_test_locked(folio)); 634 635 /* Check for potential overflow in debug mode */ 636 DBG_BUGON(stop > folio_size(folio) || stop < length); 637 638 if (offset == 0 && stop == folio_size(folio)) 639 while (!erofs_managed_cache_release_folio(folio, GFP_NOFS)) 640 cond_resched(); 641 } 642 643 static const struct address_space_operations managed_cache_aops = { 644 .release_folio = erofs_managed_cache_release_folio, 645 .invalidate_folio = erofs_managed_cache_invalidate_folio, 646 }; 647 648 static int erofs_init_managed_cache(struct super_block *sb) 649 { 650 struct erofs_sb_info *const sbi = EROFS_SB(sb); 651 struct inode *const inode = new_inode(sb); 652 653 if (!inode) 654 return -ENOMEM; 655 656 set_nlink(inode, 1); 657 inode->i_size = OFFSET_MAX; 658 659 inode->i_mapping->a_ops = &managed_cache_aops; 660 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 661 sbi->managed_cache = inode; 662 return 0; 663 } 664 #else 665 static int erofs_init_managed_cache(struct super_block *sb) { return 0; } 666 #endif 667 668 static struct inode *erofs_nfs_get_inode(struct super_block *sb, 669 u64 ino, u32 generation) 670 { 671 return erofs_iget(sb, ino); 672 } 673 674 static struct dentry *erofs_fh_to_dentry(struct super_block *sb, 675 struct fid *fid, int fh_len, int fh_type) 676 { 677 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 678 erofs_nfs_get_inode); 679 } 680 681 static struct dentry *erofs_fh_to_parent(struct super_block *sb, 682 struct fid *fid, int fh_len, int fh_type) 683 { 684 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 685 erofs_nfs_get_inode); 686 } 687 688 static struct dentry *erofs_get_parent(struct dentry *child) 689 { 690 erofs_nid_t nid; 691 unsigned int d_type; 692 int err; 693 694 err = erofs_namei(d_inode(child), &dotdot_name, &nid, &d_type); 695 if (err) 696 return ERR_PTR(err); 697 return d_obtain_alias(erofs_iget(child->d_sb, nid)); 698 } 699 700 static const struct export_operations erofs_export_ops = { 701 .fh_to_dentry = erofs_fh_to_dentry, 702 .fh_to_parent = erofs_fh_to_parent, 703 .get_parent = erofs_get_parent, 704 }; 705 706 static int erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc) 707 { 708 static const struct tree_descr empty_descr = {""}; 709 710 return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr); 711 } 712 713 static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) 714 { 715 struct inode *inode; 716 struct erofs_sb_info *sbi; 717 struct erofs_fs_context *ctx = fc->fs_private; 718 int err; 719 720 sb->s_magic = EROFS_SUPER_MAGIC; 721 sb->s_flags |= SB_RDONLY | SB_NOATIME; 722 sb->s_maxbytes = MAX_LFS_FILESIZE; 723 sb->s_op = &erofs_sops; 724 725 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 726 if (!sbi) 727 return -ENOMEM; 728 729 sb->s_fs_info = sbi; 730 sbi->opt = ctx->opt; 731 ctx->opt.fsid = NULL; 732 ctx->opt.domain_id = NULL; 733 sbi->devs = ctx->devs; 734 ctx->devs = NULL; 735 736 if (erofs_is_fscache_mode(sb)) { 737 sb->s_blocksize = EROFS_BLKSIZ; 738 sb->s_blocksize_bits = LOG_BLOCK_SIZE; 739 740 err = erofs_fscache_register_fs(sb); 741 if (err) 742 return err; 743 744 err = super_setup_bdi(sb); 745 if (err) 746 return err; 747 } else { 748 if (!sb_set_blocksize(sb, EROFS_BLKSIZ)) { 749 erofs_err(sb, "failed to set erofs blksize"); 750 return -EINVAL; 751 } 752 753 sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev, 754 &sbi->dax_part_off, 755 NULL, NULL); 756 } 757 758 err = erofs_read_superblock(sb); 759 if (err) 760 return err; 761 762 if (test_opt(&sbi->opt, DAX_ALWAYS)) { 763 BUILD_BUG_ON(EROFS_BLKSIZ != PAGE_SIZE); 764 765 if (!sbi->dax_dev) { 766 errorfc(fc, "DAX unsupported by block device. Turning off DAX."); 767 clear_opt(&sbi->opt, DAX_ALWAYS); 768 } 769 } 770 771 sb->s_time_gran = 1; 772 sb->s_xattr = erofs_xattr_handlers; 773 sb->s_export_op = &erofs_export_ops; 774 775 if (test_opt(&sbi->opt, POSIX_ACL)) 776 sb->s_flags |= SB_POSIXACL; 777 else 778 sb->s_flags &= ~SB_POSIXACL; 779 780 #ifdef CONFIG_EROFS_FS_ZIP 781 xa_init(&sbi->managed_pslots); 782 #endif 783 784 /* get the root inode */ 785 inode = erofs_iget(sb, ROOT_NID(sbi)); 786 if (IS_ERR(inode)) 787 return PTR_ERR(inode); 788 789 if (!S_ISDIR(inode->i_mode)) { 790 erofs_err(sb, "rootino(nid %llu) is not a directory(i_mode %o)", 791 ROOT_NID(sbi), inode->i_mode); 792 iput(inode); 793 return -EINVAL; 794 } 795 796 sb->s_root = d_make_root(inode); 797 if (!sb->s_root) 798 return -ENOMEM; 799 800 erofs_shrinker_register(sb); 801 /* sb->s_umount is already locked, SB_ACTIVE and SB_BORN are not set */ 802 err = erofs_init_managed_cache(sb); 803 if (err) 804 return err; 805 806 err = erofs_register_sysfs(sb); 807 if (err) 808 return err; 809 810 erofs_info(sb, "mounted with root inode @ nid %llu.", ROOT_NID(sbi)); 811 return 0; 812 } 813 814 static int erofs_fc_anon_get_tree(struct fs_context *fc) 815 { 816 return get_tree_nodev(fc, erofs_fc_fill_pseudo_super); 817 } 818 819 static int erofs_fc_get_tree(struct fs_context *fc) 820 { 821 struct erofs_fs_context *ctx = fc->fs_private; 822 823 if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->opt.fsid) 824 return get_tree_nodev(fc, erofs_fc_fill_super); 825 826 return get_tree_bdev(fc, erofs_fc_fill_super); 827 } 828 829 static int erofs_fc_reconfigure(struct fs_context *fc) 830 { 831 struct super_block *sb = fc->root->d_sb; 832 struct erofs_sb_info *sbi = EROFS_SB(sb); 833 struct erofs_fs_context *ctx = fc->fs_private; 834 835 DBG_BUGON(!sb_rdonly(sb)); 836 837 if (test_opt(&ctx->opt, POSIX_ACL)) 838 fc->sb_flags |= SB_POSIXACL; 839 else 840 fc->sb_flags &= ~SB_POSIXACL; 841 842 sbi->opt = ctx->opt; 843 844 fc->sb_flags |= SB_RDONLY; 845 return 0; 846 } 847 848 static int erofs_release_device_info(int id, void *ptr, void *data) 849 { 850 struct erofs_device_info *dif = ptr; 851 852 fs_put_dax(dif->dax_dev, NULL); 853 if (dif->bdev) 854 blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL); 855 erofs_fscache_unregister_cookie(dif->fscache); 856 dif->fscache = NULL; 857 kfree(dif->path); 858 kfree(dif); 859 return 0; 860 } 861 862 static void erofs_free_dev_context(struct erofs_dev_context *devs) 863 { 864 if (!devs) 865 return; 866 idr_for_each(&devs->tree, &erofs_release_device_info, NULL); 867 idr_destroy(&devs->tree); 868 kfree(devs); 869 } 870 871 static void erofs_fc_free(struct fs_context *fc) 872 { 873 struct erofs_fs_context *ctx = fc->fs_private; 874 875 erofs_free_dev_context(ctx->devs); 876 kfree(ctx->opt.fsid); 877 kfree(ctx->opt.domain_id); 878 kfree(ctx); 879 } 880 881 static const struct fs_context_operations erofs_context_ops = { 882 .parse_param = erofs_fc_parse_param, 883 .get_tree = erofs_fc_get_tree, 884 .reconfigure = erofs_fc_reconfigure, 885 .free = erofs_fc_free, 886 }; 887 888 static const struct fs_context_operations erofs_anon_context_ops = { 889 .get_tree = erofs_fc_anon_get_tree, 890 }; 891 892 static int erofs_init_fs_context(struct fs_context *fc) 893 { 894 struct erofs_fs_context *ctx; 895 896 /* pseudo mount for anon inodes */ 897 if (fc->sb_flags & SB_KERNMOUNT) { 898 fc->ops = &erofs_anon_context_ops; 899 return 0; 900 } 901 902 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 903 if (!ctx) 904 return -ENOMEM; 905 ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL); 906 if (!ctx->devs) { 907 kfree(ctx); 908 return -ENOMEM; 909 } 910 fc->fs_private = ctx; 911 912 idr_init(&ctx->devs->tree); 913 init_rwsem(&ctx->devs->rwsem); 914 erofs_default_options(ctx); 915 fc->ops = &erofs_context_ops; 916 return 0; 917 } 918 919 /* 920 * could be triggered after deactivate_locked_super() 921 * is called, thus including umount and failed to initialize. 922 */ 923 static void erofs_kill_sb(struct super_block *sb) 924 { 925 struct erofs_sb_info *sbi; 926 927 WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC); 928 929 /* pseudo mount for anon inodes */ 930 if (sb->s_flags & SB_KERNMOUNT) { 931 kill_anon_super(sb); 932 return; 933 } 934 935 if (erofs_is_fscache_mode(sb)) 936 kill_anon_super(sb); 937 else 938 kill_block_super(sb); 939 940 sbi = EROFS_SB(sb); 941 if (!sbi) 942 return; 943 944 erofs_free_dev_context(sbi->devs); 945 fs_put_dax(sbi->dax_dev, NULL); 946 erofs_fscache_unregister_fs(sb); 947 kfree(sbi->opt.fsid); 948 kfree(sbi->opt.domain_id); 949 kfree(sbi); 950 sb->s_fs_info = NULL; 951 } 952 953 /* called when ->s_root is non-NULL */ 954 static void erofs_put_super(struct super_block *sb) 955 { 956 struct erofs_sb_info *const sbi = EROFS_SB(sb); 957 958 DBG_BUGON(!sbi); 959 960 erofs_unregister_sysfs(sb); 961 erofs_shrinker_unregister(sb); 962 #ifdef CONFIG_EROFS_FS_ZIP 963 iput(sbi->managed_cache); 964 sbi->managed_cache = NULL; 965 iput(sbi->packed_inode); 966 sbi->packed_inode = NULL; 967 #endif 968 erofs_fscache_unregister_fs(sb); 969 } 970 971 struct file_system_type erofs_fs_type = { 972 .owner = THIS_MODULE, 973 .name = "erofs", 974 .init_fs_context = erofs_init_fs_context, 975 .kill_sb = erofs_kill_sb, 976 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, 977 }; 978 MODULE_ALIAS_FS("erofs"); 979 980 static int __init erofs_module_init(void) 981 { 982 int err; 983 984 erofs_check_ondisk_layout_definitions(); 985 986 erofs_inode_cachep = kmem_cache_create("erofs_inode", 987 sizeof(struct erofs_inode), 0, 988 SLAB_RECLAIM_ACCOUNT, 989 erofs_inode_init_once); 990 if (!erofs_inode_cachep) { 991 err = -ENOMEM; 992 goto icache_err; 993 } 994 995 err = erofs_init_shrinker(); 996 if (err) 997 goto shrinker_err; 998 999 err = z_erofs_lzma_init(); 1000 if (err) 1001 goto lzma_err; 1002 1003 erofs_pcpubuf_init(); 1004 err = z_erofs_init_zip_subsystem(); 1005 if (err) 1006 goto zip_err; 1007 1008 err = erofs_init_sysfs(); 1009 if (err) 1010 goto sysfs_err; 1011 1012 err = register_filesystem(&erofs_fs_type); 1013 if (err) 1014 goto fs_err; 1015 1016 return 0; 1017 1018 fs_err: 1019 erofs_exit_sysfs(); 1020 sysfs_err: 1021 z_erofs_exit_zip_subsystem(); 1022 zip_err: 1023 z_erofs_lzma_exit(); 1024 lzma_err: 1025 erofs_exit_shrinker(); 1026 shrinker_err: 1027 kmem_cache_destroy(erofs_inode_cachep); 1028 icache_err: 1029 return err; 1030 } 1031 1032 static void __exit erofs_module_exit(void) 1033 { 1034 unregister_filesystem(&erofs_fs_type); 1035 1036 /* Ensure all RCU free inodes / pclusters are safe to be destroyed. */ 1037 rcu_barrier(); 1038 1039 erofs_exit_sysfs(); 1040 z_erofs_exit_zip_subsystem(); 1041 z_erofs_lzma_exit(); 1042 erofs_exit_shrinker(); 1043 kmem_cache_destroy(erofs_inode_cachep); 1044 erofs_pcpubuf_exit(); 1045 } 1046 1047 /* get filesystem statistics */ 1048 static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) 1049 { 1050 struct super_block *sb = dentry->d_sb; 1051 struct erofs_sb_info *sbi = EROFS_SB(sb); 1052 u64 id = 0; 1053 1054 if (!erofs_is_fscache_mode(sb)) 1055 id = huge_encode_dev(sb->s_bdev->bd_dev); 1056 1057 buf->f_type = sb->s_magic; 1058 buf->f_bsize = EROFS_BLKSIZ; 1059 buf->f_blocks = sbi->total_blocks; 1060 buf->f_bfree = buf->f_bavail = 0; 1061 1062 buf->f_files = ULLONG_MAX; 1063 buf->f_ffree = ULLONG_MAX - sbi->inos; 1064 1065 buf->f_namelen = EROFS_NAME_LEN; 1066 1067 buf->f_fsid = u64_to_fsid(id); 1068 return 0; 1069 } 1070 1071 static int erofs_show_options(struct seq_file *seq, struct dentry *root) 1072 { 1073 struct erofs_sb_info *sbi = EROFS_SB(root->d_sb); 1074 struct erofs_mount_opts *opt = &sbi->opt; 1075 1076 #ifdef CONFIG_EROFS_FS_XATTR 1077 if (test_opt(opt, XATTR_USER)) 1078 seq_puts(seq, ",user_xattr"); 1079 else 1080 seq_puts(seq, ",nouser_xattr"); 1081 #endif 1082 #ifdef CONFIG_EROFS_FS_POSIX_ACL 1083 if (test_opt(opt, POSIX_ACL)) 1084 seq_puts(seq, ",acl"); 1085 else 1086 seq_puts(seq, ",noacl"); 1087 #endif 1088 #ifdef CONFIG_EROFS_FS_ZIP 1089 if (opt->cache_strategy == EROFS_ZIP_CACHE_DISABLED) 1090 seq_puts(seq, ",cache_strategy=disabled"); 1091 else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAHEAD) 1092 seq_puts(seq, ",cache_strategy=readahead"); 1093 else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAROUND) 1094 seq_puts(seq, ",cache_strategy=readaround"); 1095 #endif 1096 if (test_opt(opt, DAX_ALWAYS)) 1097 seq_puts(seq, ",dax=always"); 1098 if (test_opt(opt, DAX_NEVER)) 1099 seq_puts(seq, ",dax=never"); 1100 #ifdef CONFIG_EROFS_FS_ONDEMAND 1101 if (opt->fsid) 1102 seq_printf(seq, ",fsid=%s", opt->fsid); 1103 if (opt->domain_id) 1104 seq_printf(seq, ",domain_id=%s", opt->domain_id); 1105 #endif 1106 return 0; 1107 } 1108 1109 const struct super_operations erofs_sops = { 1110 .put_super = erofs_put_super, 1111 .alloc_inode = erofs_alloc_inode, 1112 .free_inode = erofs_free_inode, 1113 .statfs = erofs_statfs, 1114 .show_options = erofs_show_options, 1115 }; 1116 1117 module_init(erofs_module_init); 1118 module_exit(erofs_module_exit); 1119 1120 MODULE_DESCRIPTION("Enhanced ROM File System"); 1121 MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc."); 1122 MODULE_LICENSE("GPL"); 1123