1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2022, Alibaba Cloud 4 * Copyright (C) 2022, Bytedance Inc. All rights reserved. 5 */ 6 #include <linux/pseudo_fs.h> 7 #include <linux/fscache.h> 8 #include "internal.h" 9 10 static DEFINE_MUTEX(erofs_domain_list_lock); 11 static DEFINE_MUTEX(erofs_domain_cookies_lock); 12 static LIST_HEAD(erofs_domain_list); 13 static LIST_HEAD(erofs_domain_cookies_list); 14 static struct vfsmount *erofs_pseudo_mnt; 15 16 static int erofs_anon_init_fs_context(struct fs_context *fc) 17 { 18 return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM; 19 } 20 21 static struct file_system_type erofs_anon_fs_type = { 22 .owner = THIS_MODULE, 23 .name = "pseudo_erofs", 24 .init_fs_context = erofs_anon_init_fs_context, 25 .kill_sb = kill_anon_super, 26 }; 27 28 struct erofs_fscache_io { 29 struct netfs_cache_resources cres; 30 struct iov_iter iter; 31 netfs_io_terminated_t end_io; 32 void *private; 33 refcount_t ref; 34 }; 35 36 struct erofs_fscache_rq { 37 struct address_space *mapping; /* The mapping being accessed */ 38 loff_t start; /* Start position */ 39 size_t len; /* Length of the request */ 40 size_t submitted; /* Length of submitted */ 41 short error; /* 0 or error that occurred */ 42 refcount_t ref; 43 }; 44 45 static bool erofs_fscache_io_put(struct erofs_fscache_io *io) 46 { 47 if (!refcount_dec_and_test(&io->ref)) 48 return false; 49 if (io->cres.ops) 50 io->cres.ops->end_operation(&io->cres); 51 kfree(io); 52 return true; 53 } 54 55 static void erofs_fscache_req_complete(struct erofs_fscache_rq *req) 56 { 57 struct folio *folio; 58 bool failed = req->error; 59 pgoff_t start_page = req->start / PAGE_SIZE; 60 pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1; 61 62 XA_STATE(xas, &req->mapping->i_pages, start_page); 63 64 rcu_read_lock(); 65 xas_for_each(&xas, folio, last_page) { 66 if (xas_retry(&xas, folio)) 67 continue; 68 if (!failed) 69 folio_mark_uptodate(folio); 70 folio_unlock(folio); 71 } 72 rcu_read_unlock(); 73 } 74 75 static void erofs_fscache_req_put(struct erofs_fscache_rq *req) 76 { 77 if (!refcount_dec_and_test(&req->ref)) 78 return; 79 erofs_fscache_req_complete(req); 80 kfree(req); 81 } 82 83 static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *mapping, 84 loff_t start, size_t len) 85 { 86 struct erofs_fscache_rq *req = kzalloc(sizeof(*req), GFP_KERNEL); 87 88 if (!req) 89 return NULL; 90 req->mapping = mapping; 91 req->start = start; 92 req->len = len; 93 refcount_set(&req->ref, 1); 94 return req; 95 } 96 97 static void erofs_fscache_req_io_put(struct erofs_fscache_io *io) 98 { 99 struct erofs_fscache_rq *req = io->private; 100 101 if (erofs_fscache_io_put(io)) 102 erofs_fscache_req_put(req); 103 } 104 105 static void erofs_fscache_req_end_io(void *priv, 106 ssize_t transferred_or_error, bool was_async) 107 { 108 struct erofs_fscache_io *io = priv; 109 struct erofs_fscache_rq *req = io->private; 110 111 if (IS_ERR_VALUE(transferred_or_error)) 112 req->error = transferred_or_error; 113 erofs_fscache_req_io_put(io); 114 } 115 116 static struct erofs_fscache_io *erofs_fscache_req_io_alloc(struct erofs_fscache_rq *req) 117 { 118 struct erofs_fscache_io *io = kzalloc(sizeof(*io), GFP_KERNEL); 119 120 if (!io) 121 return NULL; 122 io->end_io = erofs_fscache_req_end_io; 123 io->private = req; 124 refcount_inc(&req->ref); 125 refcount_set(&io->ref, 1); 126 return io; 127 } 128 129 /* 130 * Read data from fscache described by cookie at pstart physical address 131 * offset, and fill the read data into buffer described by io->iter. 132 */ 133 static int erofs_fscache_read_io_async(struct fscache_cookie *cookie, 134 loff_t pstart, struct erofs_fscache_io *io) 135 { 136 enum netfs_io_source source; 137 struct netfs_cache_resources *cres = &io->cres; 138 struct iov_iter *iter = &io->iter; 139 int ret; 140 141 ret = fscache_begin_read_operation(cres, cookie); 142 if (ret) 143 return ret; 144 145 while (iov_iter_count(iter)) { 146 size_t orig_count = iov_iter_count(iter), len = orig_count; 147 unsigned long flags = 1 << NETFS_SREQ_ONDEMAND; 148 149 source = cres->ops->prepare_ondemand_read(cres, 150 pstart, &len, LLONG_MAX, &flags, 0); 151 if (WARN_ON(len == 0)) 152 source = NETFS_INVALID_READ; 153 if (source != NETFS_READ_FROM_CACHE) { 154 erofs_err(NULL, "prepare_read failed (source %d)", source); 155 return -EIO; 156 } 157 158 iov_iter_truncate(iter, len); 159 refcount_inc(&io->ref); 160 ret = fscache_read(cres, pstart, iter, NETFS_READ_HOLE_FAIL, 161 io->end_io, io); 162 if (ret == -EIOCBQUEUED) 163 ret = 0; 164 if (ret) { 165 erofs_err(NULL, "fscache_read failed (ret %d)", ret); 166 return ret; 167 } 168 if (WARN_ON(iov_iter_count(iter))) 169 return -EIO; 170 171 iov_iter_reexpand(iter, orig_count - len); 172 pstart += len; 173 } 174 return 0; 175 } 176 177 struct erofs_fscache_bio { 178 struct erofs_fscache_io io; 179 struct bio bio; /* w/o bdev to share bio_add_page/endio() */ 180 struct bio_vec bvecs[BIO_MAX_VECS]; 181 }; 182 183 static void erofs_fscache_bio_endio(void *priv, 184 ssize_t transferred_or_error, bool was_async) 185 { 186 struct erofs_fscache_bio *io = priv; 187 188 if (IS_ERR_VALUE(transferred_or_error)) 189 io->bio.bi_status = errno_to_blk_status(transferred_or_error); 190 io->bio.bi_end_io(&io->bio); 191 BUILD_BUG_ON(offsetof(struct erofs_fscache_bio, io) != 0); 192 erofs_fscache_io_put(&io->io); 193 } 194 195 struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) 196 { 197 struct erofs_fscache_bio *io; 198 199 io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL); 200 bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ); 201 io->io.private = mdev->m_fscache->cookie; 202 io->io.end_io = erofs_fscache_bio_endio; 203 refcount_set(&io->io.ref, 1); 204 return &io->bio; 205 } 206 207 void erofs_fscache_submit_bio(struct bio *bio) 208 { 209 struct erofs_fscache_bio *io = container_of(bio, 210 struct erofs_fscache_bio, bio); 211 int ret; 212 213 iov_iter_bvec(&io->io.iter, ITER_DEST, io->bvecs, bio->bi_vcnt, 214 bio->bi_iter.bi_size); 215 ret = erofs_fscache_read_io_async(io->io.private, 216 bio->bi_iter.bi_sector << 9, &io->io); 217 erofs_fscache_io_put(&io->io); 218 if (!ret) 219 return; 220 bio->bi_status = errno_to_blk_status(ret); 221 bio->bi_end_io(bio); 222 } 223 224 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) 225 { 226 struct erofs_fscache *ctx = folio->mapping->host->i_private; 227 int ret = -ENOMEM; 228 struct erofs_fscache_rq *req; 229 struct erofs_fscache_io *io; 230 231 req = erofs_fscache_req_alloc(folio->mapping, 232 folio_pos(folio), folio_size(folio)); 233 if (!req) { 234 folio_unlock(folio); 235 return ret; 236 } 237 238 io = erofs_fscache_req_io_alloc(req); 239 if (!io) { 240 req->error = ret; 241 goto out; 242 } 243 iov_iter_xarray(&io->iter, ITER_DEST, &folio->mapping->i_pages, 244 folio_pos(folio), folio_size(folio)); 245 246 ret = erofs_fscache_read_io_async(ctx->cookie, folio_pos(folio), io); 247 if (ret) 248 req->error = ret; 249 250 erofs_fscache_req_io_put(io); 251 out: 252 erofs_fscache_req_put(req); 253 return ret; 254 } 255 256 static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req) 257 { 258 struct address_space *mapping = req->mapping; 259 struct inode *inode = mapping->host; 260 struct super_block *sb = inode->i_sb; 261 struct erofs_fscache_io *io; 262 struct erofs_map_blocks map; 263 struct erofs_map_dev mdev; 264 loff_t pos = req->start + req->submitted; 265 size_t count; 266 int ret; 267 268 map.m_la = pos; 269 ret = erofs_map_blocks(inode, &map); 270 if (ret) 271 return ret; 272 273 if (map.m_flags & EROFS_MAP_META) { 274 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 275 struct iov_iter iter; 276 erofs_blk_t blknr; 277 size_t offset, size; 278 void *src; 279 280 /* For tail packing layout, the offset may be non-zero. */ 281 offset = erofs_blkoff(sb, map.m_pa); 282 blknr = erofs_blknr(sb, map.m_pa); 283 size = map.m_llen; 284 285 src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); 286 if (IS_ERR(src)) 287 return PTR_ERR(src); 288 289 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE); 290 if (copy_to_iter(src + offset, size, &iter) != size) { 291 erofs_put_metabuf(&buf); 292 return -EFAULT; 293 } 294 iov_iter_zero(PAGE_SIZE - size, &iter); 295 erofs_put_metabuf(&buf); 296 req->submitted += PAGE_SIZE; 297 return 0; 298 } 299 300 count = req->len - req->submitted; 301 if (!(map.m_flags & EROFS_MAP_MAPPED)) { 302 struct iov_iter iter; 303 304 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count); 305 iov_iter_zero(count, &iter); 306 req->submitted += count; 307 return 0; 308 } 309 310 count = min_t(size_t, map.m_llen - (pos - map.m_la), count); 311 DBG_BUGON(!count || count % PAGE_SIZE); 312 313 mdev = (struct erofs_map_dev) { 314 .m_deviceid = map.m_deviceid, 315 .m_pa = map.m_pa, 316 }; 317 ret = erofs_map_dev(sb, &mdev); 318 if (ret) 319 return ret; 320 321 io = erofs_fscache_req_io_alloc(req); 322 if (!io) 323 return -ENOMEM; 324 iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count); 325 ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie, 326 mdev.m_pa + (pos - map.m_la), io); 327 erofs_fscache_req_io_put(io); 328 329 req->submitted += count; 330 return ret; 331 } 332 333 static int erofs_fscache_data_read(struct erofs_fscache_rq *req) 334 { 335 int ret; 336 337 do { 338 ret = erofs_fscache_data_read_slice(req); 339 if (ret) 340 req->error = ret; 341 } while (!ret && req->submitted < req->len); 342 return ret; 343 } 344 345 static int erofs_fscache_read_folio(struct file *file, struct folio *folio) 346 { 347 struct erofs_fscache_rq *req; 348 int ret; 349 350 req = erofs_fscache_req_alloc(folio->mapping, 351 folio_pos(folio), folio_size(folio)); 352 if (!req) { 353 folio_unlock(folio); 354 return -ENOMEM; 355 } 356 357 ret = erofs_fscache_data_read(req); 358 erofs_fscache_req_put(req); 359 return ret; 360 } 361 362 static void erofs_fscache_readahead(struct readahead_control *rac) 363 { 364 struct erofs_fscache_rq *req; 365 366 if (!readahead_count(rac)) 367 return; 368 369 req = erofs_fscache_req_alloc(rac->mapping, 370 readahead_pos(rac), readahead_length(rac)); 371 if (!req) 372 return; 373 374 /* The request completion will drop refs on the folios. */ 375 while (readahead_folio(rac)) 376 ; 377 378 erofs_fscache_data_read(req); 379 erofs_fscache_req_put(req); 380 } 381 382 static const struct address_space_operations erofs_fscache_meta_aops = { 383 .read_folio = erofs_fscache_meta_read_folio, 384 }; 385 386 const struct address_space_operations erofs_fscache_access_aops = { 387 .read_folio = erofs_fscache_read_folio, 388 .readahead = erofs_fscache_readahead, 389 }; 390 391 static void erofs_fscache_domain_put(struct erofs_domain *domain) 392 { 393 mutex_lock(&erofs_domain_list_lock); 394 if (refcount_dec_and_test(&domain->ref)) { 395 list_del(&domain->list); 396 if (list_empty(&erofs_domain_list)) { 397 kern_unmount(erofs_pseudo_mnt); 398 erofs_pseudo_mnt = NULL; 399 } 400 fscache_relinquish_volume(domain->volume, NULL, false); 401 mutex_unlock(&erofs_domain_list_lock); 402 kfree(domain->domain_id); 403 kfree(domain); 404 return; 405 } 406 mutex_unlock(&erofs_domain_list_lock); 407 } 408 409 static int erofs_fscache_register_volume(struct super_block *sb) 410 { 411 struct erofs_sb_info *sbi = EROFS_SB(sb); 412 char *domain_id = sbi->domain_id; 413 struct fscache_volume *volume; 414 char *name; 415 int ret = 0; 416 417 name = kasprintf(GFP_KERNEL, "erofs,%s", 418 domain_id ? domain_id : sbi->fsid); 419 if (!name) 420 return -ENOMEM; 421 422 volume = fscache_acquire_volume(name, NULL, NULL, 0); 423 if (IS_ERR_OR_NULL(volume)) { 424 erofs_err(sb, "failed to register volume for %s", name); 425 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; 426 volume = NULL; 427 } 428 429 sbi->volume = volume; 430 kfree(name); 431 return ret; 432 } 433 434 static int erofs_fscache_init_domain(struct super_block *sb) 435 { 436 int err; 437 struct erofs_domain *domain; 438 struct erofs_sb_info *sbi = EROFS_SB(sb); 439 440 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL); 441 if (!domain) 442 return -ENOMEM; 443 444 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL); 445 if (!domain->domain_id) { 446 kfree(domain); 447 return -ENOMEM; 448 } 449 450 err = erofs_fscache_register_volume(sb); 451 if (err) 452 goto out; 453 454 if (!erofs_pseudo_mnt) { 455 struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type); 456 if (IS_ERR(mnt)) { 457 err = PTR_ERR(mnt); 458 goto out; 459 } 460 erofs_pseudo_mnt = mnt; 461 } 462 463 domain->volume = sbi->volume; 464 refcount_set(&domain->ref, 1); 465 list_add(&domain->list, &erofs_domain_list); 466 sbi->domain = domain; 467 return 0; 468 out: 469 kfree(domain->domain_id); 470 kfree(domain); 471 return err; 472 } 473 474 static int erofs_fscache_register_domain(struct super_block *sb) 475 { 476 int err; 477 struct erofs_domain *domain; 478 struct erofs_sb_info *sbi = EROFS_SB(sb); 479 480 mutex_lock(&erofs_domain_list_lock); 481 list_for_each_entry(domain, &erofs_domain_list, list) { 482 if (!strcmp(domain->domain_id, sbi->domain_id)) { 483 sbi->domain = domain; 484 sbi->volume = domain->volume; 485 refcount_inc(&domain->ref); 486 mutex_unlock(&erofs_domain_list_lock); 487 return 0; 488 } 489 } 490 err = erofs_fscache_init_domain(sb); 491 mutex_unlock(&erofs_domain_list_lock); 492 return err; 493 } 494 495 static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, 496 char *name, unsigned int flags) 497 { 498 struct fscache_volume *volume = EROFS_SB(sb)->volume; 499 struct erofs_fscache *ctx; 500 struct fscache_cookie *cookie; 501 struct super_block *isb; 502 struct inode *inode; 503 int ret; 504 505 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 506 if (!ctx) 507 return ERR_PTR(-ENOMEM); 508 INIT_LIST_HEAD(&ctx->node); 509 refcount_set(&ctx->ref, 1); 510 511 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE, 512 name, strlen(name), NULL, 0, 0); 513 if (!cookie) { 514 erofs_err(sb, "failed to get cookie for %s", name); 515 ret = -EINVAL; 516 goto err; 517 } 518 fscache_use_cookie(cookie, false); 519 520 /* 521 * Allocate anonymous inode in global pseudo mount for shareable blobs, 522 * so that they are accessible among erofs fs instances. 523 */ 524 isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb; 525 inode = new_inode(isb); 526 if (!inode) { 527 erofs_err(sb, "failed to get anon inode for %s", name); 528 ret = -ENOMEM; 529 goto err_cookie; 530 } 531 532 inode->i_size = OFFSET_MAX; 533 inode->i_mapping->a_ops = &erofs_fscache_meta_aops; 534 mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); 535 inode->i_blkbits = EROFS_SB(sb)->blkszbits; 536 inode->i_private = ctx; 537 538 ctx->cookie = cookie; 539 ctx->inode = inode; 540 return ctx; 541 542 err_cookie: 543 fscache_unuse_cookie(cookie, NULL, NULL); 544 fscache_relinquish_cookie(cookie, false); 545 err: 546 kfree(ctx); 547 return ERR_PTR(ret); 548 } 549 550 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) 551 { 552 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 553 fscache_relinquish_cookie(ctx->cookie, false); 554 iput(ctx->inode); 555 kfree(ctx->name); 556 kfree(ctx); 557 } 558 559 static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb, 560 char *name, unsigned int flags) 561 { 562 struct erofs_fscache *ctx; 563 struct erofs_domain *domain = EROFS_SB(sb)->domain; 564 565 ctx = erofs_fscache_acquire_cookie(sb, name, flags); 566 if (IS_ERR(ctx)) 567 return ctx; 568 569 ctx->name = kstrdup(name, GFP_KERNEL); 570 if (!ctx->name) { 571 erofs_fscache_relinquish_cookie(ctx); 572 return ERR_PTR(-ENOMEM); 573 } 574 575 refcount_inc(&domain->ref); 576 ctx->domain = domain; 577 list_add(&ctx->node, &erofs_domain_cookies_list); 578 return ctx; 579 } 580 581 static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, 582 char *name, unsigned int flags) 583 { 584 struct erofs_fscache *ctx; 585 struct erofs_domain *domain = EROFS_SB(sb)->domain; 586 587 flags |= EROFS_REG_COOKIE_SHARE; 588 mutex_lock(&erofs_domain_cookies_lock); 589 list_for_each_entry(ctx, &erofs_domain_cookies_list, node) { 590 if (ctx->domain != domain || strcmp(ctx->name, name)) 591 continue; 592 if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) { 593 refcount_inc(&ctx->ref); 594 } else { 595 erofs_err(sb, "%s already exists in domain %s", name, 596 domain->domain_id); 597 ctx = ERR_PTR(-EEXIST); 598 } 599 mutex_unlock(&erofs_domain_cookies_lock); 600 return ctx; 601 } 602 ctx = erofs_domain_init_cookie(sb, name, flags); 603 mutex_unlock(&erofs_domain_cookies_lock); 604 return ctx; 605 } 606 607 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 608 char *name, 609 unsigned int flags) 610 { 611 if (EROFS_SB(sb)->domain_id) 612 return erofs_domain_register_cookie(sb, name, flags); 613 return erofs_fscache_acquire_cookie(sb, name, flags); 614 } 615 616 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) 617 { 618 struct erofs_domain *domain = NULL; 619 620 if (!ctx) 621 return; 622 if (!ctx->domain) 623 return erofs_fscache_relinquish_cookie(ctx); 624 625 mutex_lock(&erofs_domain_cookies_lock); 626 if (refcount_dec_and_test(&ctx->ref)) { 627 domain = ctx->domain; 628 list_del(&ctx->node); 629 erofs_fscache_relinquish_cookie(ctx); 630 } 631 mutex_unlock(&erofs_domain_cookies_lock); 632 if (domain) 633 erofs_fscache_domain_put(domain); 634 } 635 636 int erofs_fscache_register_fs(struct super_block *sb) 637 { 638 int ret; 639 struct erofs_sb_info *sbi = EROFS_SB(sb); 640 struct erofs_fscache *fscache; 641 unsigned int flags = 0; 642 643 if (sbi->domain_id) 644 ret = erofs_fscache_register_domain(sb); 645 else 646 ret = erofs_fscache_register_volume(sb); 647 if (ret) 648 return ret; 649 650 /* 651 * When shared domain is enabled, using NEED_NOEXIST to guarantee 652 * the primary data blob (aka fsid) is unique in the shared domain. 653 * 654 * For non-shared-domain case, fscache_acquire_volume() invoked by 655 * erofs_fscache_register_volume() has already guaranteed 656 * the uniqueness of primary data blob. 657 * 658 * Acquired domain/volume will be relinquished in kill_sb() on error. 659 */ 660 if (sbi->domain_id) 661 flags |= EROFS_REG_COOKIE_NEED_NOEXIST; 662 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags); 663 if (IS_ERR(fscache)) 664 return PTR_ERR(fscache); 665 666 sbi->s_fscache = fscache; 667 return 0; 668 } 669 670 void erofs_fscache_unregister_fs(struct super_block *sb) 671 { 672 struct erofs_sb_info *sbi = EROFS_SB(sb); 673 674 erofs_fscache_unregister_cookie(sbi->s_fscache); 675 676 if (sbi->domain) 677 erofs_fscache_domain_put(sbi->domain); 678 else 679 fscache_relinquish_volume(sbi->volume, NULL, false); 680 681 sbi->s_fscache = NULL; 682 sbi->volume = NULL; 683 sbi->domain = NULL; 684 } 685