1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2022, Alibaba Cloud 4 * Copyright (C) 2022, Bytedance Inc. All rights reserved. 5 */ 6 #include <linux/pseudo_fs.h> 7 #include <linux/fscache.h> 8 #include "internal.h" 9 10 static DEFINE_MUTEX(erofs_domain_list_lock); 11 static DEFINE_MUTEX(erofs_domain_cookies_lock); 12 static LIST_HEAD(erofs_domain_list); 13 static LIST_HEAD(erofs_domain_cookies_list); 14 static struct vfsmount *erofs_pseudo_mnt; 15 16 static int erofs_anon_init_fs_context(struct fs_context *fc) 17 { 18 return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM; 19 } 20 21 static struct file_system_type erofs_anon_fs_type = { 22 .owner = THIS_MODULE, 23 .name = "pseudo_erofs", 24 .init_fs_context = erofs_anon_init_fs_context, 25 .kill_sb = kill_anon_super, 26 }; 27 28 struct erofs_fscache_io { 29 struct netfs_cache_resources cres; 30 struct iov_iter iter; 31 netfs_io_terminated_t end_io; 32 void *private; 33 refcount_t ref; 34 }; 35 36 struct erofs_fscache_rq { 37 struct address_space *mapping; /* The mapping being accessed */ 38 loff_t start; /* Start position */ 39 size_t len; /* Length of the request */ 40 size_t submitted; /* Length of submitted */ 41 short error; /* 0 or error that occurred */ 42 refcount_t ref; 43 }; 44 45 static bool erofs_fscache_io_put(struct erofs_fscache_io *io) 46 { 47 if (!refcount_dec_and_test(&io->ref)) 48 return false; 49 if (io->cres.ops) 50 io->cres.ops->end_operation(&io->cres); 51 kfree(io); 52 return true; 53 } 54 55 static void erofs_fscache_req_complete(struct erofs_fscache_rq *req) 56 { 57 struct folio *folio; 58 bool failed = req->error; 59 pgoff_t start_page = req->start / PAGE_SIZE; 60 pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1; 61 62 XA_STATE(xas, &req->mapping->i_pages, start_page); 63 64 rcu_read_lock(); 65 xas_for_each(&xas, folio, last_page) { 66 if (xas_retry(&xas, folio)) 67 continue; 68 if (!failed) 69 folio_mark_uptodate(folio); 70 folio_unlock(folio); 71 } 72 rcu_read_unlock(); 73 } 74 75 static void erofs_fscache_req_put(struct erofs_fscache_rq *req) 76 { 77 if (!refcount_dec_and_test(&req->ref)) 78 return; 79 erofs_fscache_req_complete(req); 80 kfree(req); 81 } 82 83 static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *mapping, 84 loff_t start, size_t len) 85 { 86 struct erofs_fscache_rq *req = kzalloc(sizeof(*req), GFP_KERNEL); 87 88 if (!req) 89 return NULL; 90 req->mapping = mapping; 91 req->start = start; 92 req->len = len; 93 refcount_set(&req->ref, 1); 94 return req; 95 } 96 97 static void erofs_fscache_req_io_put(struct erofs_fscache_io *io) 98 { 99 struct erofs_fscache_rq *req = io->private; 100 101 if (erofs_fscache_io_put(io)) 102 erofs_fscache_req_put(req); 103 } 104 105 static void erofs_fscache_req_end_io(void *priv, 106 ssize_t transferred_or_error, bool was_async) 107 { 108 struct erofs_fscache_io *io = priv; 109 struct erofs_fscache_rq *req = io->private; 110 111 if (IS_ERR_VALUE(transferred_or_error)) 112 req->error = transferred_or_error; 113 erofs_fscache_req_io_put(io); 114 } 115 116 static struct erofs_fscache_io *erofs_fscache_req_io_alloc(struct erofs_fscache_rq *req) 117 { 118 struct erofs_fscache_io *io = kzalloc(sizeof(*io), GFP_KERNEL); 119 120 if (!io) 121 return NULL; 122 io->end_io = erofs_fscache_req_end_io; 123 io->private = req; 124 refcount_inc(&req->ref); 125 refcount_set(&io->ref, 1); 126 return io; 127 } 128 129 /* 130 * Read data from fscache described by cookie at pstart physical address 131 * offset, and fill the read data into buffer described by io->iter. 132 */ 133 static int erofs_fscache_read_io_async(struct fscache_cookie *cookie, 134 loff_t pstart, struct erofs_fscache_io *io) 135 { 136 enum netfs_io_source source; 137 struct netfs_cache_resources *cres = &io->cres; 138 struct iov_iter *iter = &io->iter; 139 int ret; 140 141 ret = fscache_begin_read_operation(cres, cookie); 142 if (ret) 143 return ret; 144 145 while (iov_iter_count(iter)) { 146 size_t orig_count = iov_iter_count(iter), len = orig_count; 147 unsigned long flags = 1 << NETFS_SREQ_ONDEMAND; 148 149 source = cres->ops->prepare_ondemand_read(cres, 150 pstart, &len, LLONG_MAX, &flags, 0); 151 if (WARN_ON(len == 0)) 152 source = NETFS_INVALID_READ; 153 if (source != NETFS_READ_FROM_CACHE) { 154 erofs_err(NULL, "prepare_ondemand_read failed (source %d)", source); 155 return -EIO; 156 } 157 158 iov_iter_truncate(iter, len); 159 refcount_inc(&io->ref); 160 ret = fscache_read(cres, pstart, iter, NETFS_READ_HOLE_FAIL, 161 io->end_io, io); 162 if (ret == -EIOCBQUEUED) 163 ret = 0; 164 if (ret) { 165 erofs_err(NULL, "fscache_read failed (ret %d)", ret); 166 return ret; 167 } 168 if (WARN_ON(iov_iter_count(iter))) 169 return -EIO; 170 171 iov_iter_reexpand(iter, orig_count - len); 172 pstart += len; 173 } 174 return 0; 175 } 176 177 struct erofs_fscache_bio { 178 struct erofs_fscache_io io; 179 struct bio bio; /* w/o bdev to share bio_add_page/endio() */ 180 struct bio_vec bvecs[BIO_MAX_VECS]; 181 }; 182 183 static void erofs_fscache_bio_endio(void *priv, 184 ssize_t transferred_or_error, bool was_async) 185 { 186 struct erofs_fscache_bio *io = priv; 187 188 if (IS_ERR_VALUE(transferred_or_error)) 189 io->bio.bi_status = errno_to_blk_status(transferred_or_error); 190 io->bio.bi_end_io(&io->bio); 191 BUILD_BUG_ON(offsetof(struct erofs_fscache_bio, io) != 0); 192 erofs_fscache_io_put(&io->io); 193 } 194 195 struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) 196 { 197 struct erofs_fscache_bio *io; 198 199 io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL); 200 bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ); 201 io->io.private = mdev->m_fscache->cookie; 202 io->io.end_io = erofs_fscache_bio_endio; 203 refcount_set(&io->io.ref, 1); 204 return &io->bio; 205 } 206 207 void erofs_fscache_submit_bio(struct bio *bio) 208 { 209 struct erofs_fscache_bio *io = container_of(bio, 210 struct erofs_fscache_bio, bio); 211 int ret; 212 213 iov_iter_bvec(&io->io.iter, ITER_DEST, io->bvecs, bio->bi_vcnt, 214 bio->bi_iter.bi_size); 215 ret = erofs_fscache_read_io_async(io->io.private, 216 bio->bi_iter.bi_sector << 9, &io->io); 217 erofs_fscache_io_put(&io->io); 218 if (!ret) 219 return; 220 bio->bi_status = errno_to_blk_status(ret); 221 bio->bi_end_io(bio); 222 } 223 224 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) 225 { 226 struct erofs_fscache *ctx = folio->mapping->host->i_private; 227 int ret = -ENOMEM; 228 struct erofs_fscache_rq *req; 229 struct erofs_fscache_io *io; 230 231 req = erofs_fscache_req_alloc(folio->mapping, 232 folio_pos(folio), folio_size(folio)); 233 if (!req) { 234 folio_unlock(folio); 235 return ret; 236 } 237 238 io = erofs_fscache_req_io_alloc(req); 239 if (!io) { 240 req->error = ret; 241 goto out; 242 } 243 iov_iter_xarray(&io->iter, ITER_DEST, &folio->mapping->i_pages, 244 folio_pos(folio), folio_size(folio)); 245 246 ret = erofs_fscache_read_io_async(ctx->cookie, folio_pos(folio), io); 247 if (ret) 248 req->error = ret; 249 250 erofs_fscache_req_io_put(io); 251 out: 252 erofs_fscache_req_put(req); 253 return ret; 254 } 255 256 static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req) 257 { 258 struct address_space *mapping = req->mapping; 259 struct inode *inode = mapping->host; 260 struct super_block *sb = inode->i_sb; 261 struct erofs_fscache_io *io; 262 struct erofs_map_blocks map; 263 struct erofs_map_dev mdev; 264 loff_t pos = req->start + req->submitted; 265 size_t count; 266 int ret; 267 268 map.m_la = pos; 269 ret = erofs_map_blocks(inode, &map); 270 if (ret) 271 return ret; 272 273 if (map.m_flags & EROFS_MAP_META) { 274 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 275 struct iov_iter iter; 276 size_t size = map.m_llen; 277 void *src; 278 279 src = erofs_read_metabuf(&buf, sb, map.m_pa, EROFS_KMAP); 280 if (IS_ERR(src)) 281 return PTR_ERR(src); 282 283 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE); 284 if (copy_to_iter(src, size, &iter) != size) { 285 erofs_put_metabuf(&buf); 286 return -EFAULT; 287 } 288 iov_iter_zero(PAGE_SIZE - size, &iter); 289 erofs_put_metabuf(&buf); 290 req->submitted += PAGE_SIZE; 291 return 0; 292 } 293 294 count = req->len - req->submitted; 295 if (!(map.m_flags & EROFS_MAP_MAPPED)) { 296 struct iov_iter iter; 297 298 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count); 299 iov_iter_zero(count, &iter); 300 req->submitted += count; 301 return 0; 302 } 303 304 count = min_t(size_t, map.m_llen - (pos - map.m_la), count); 305 DBG_BUGON(!count || count % PAGE_SIZE); 306 307 mdev = (struct erofs_map_dev) { 308 .m_deviceid = map.m_deviceid, 309 .m_pa = map.m_pa, 310 }; 311 ret = erofs_map_dev(sb, &mdev); 312 if (ret) 313 return ret; 314 315 io = erofs_fscache_req_io_alloc(req); 316 if (!io) 317 return -ENOMEM; 318 iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count); 319 ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie, 320 mdev.m_pa + (pos - map.m_la), io); 321 erofs_fscache_req_io_put(io); 322 323 req->submitted += count; 324 return ret; 325 } 326 327 static int erofs_fscache_data_read(struct erofs_fscache_rq *req) 328 { 329 int ret; 330 331 do { 332 ret = erofs_fscache_data_read_slice(req); 333 if (ret) 334 req->error = ret; 335 } while (!ret && req->submitted < req->len); 336 return ret; 337 } 338 339 static int erofs_fscache_read_folio(struct file *file, struct folio *folio) 340 { 341 struct erofs_fscache_rq *req; 342 int ret; 343 344 req = erofs_fscache_req_alloc(folio->mapping, 345 folio_pos(folio), folio_size(folio)); 346 if (!req) { 347 folio_unlock(folio); 348 return -ENOMEM; 349 } 350 351 ret = erofs_fscache_data_read(req); 352 erofs_fscache_req_put(req); 353 return ret; 354 } 355 356 static void erofs_fscache_readahead(struct readahead_control *rac) 357 { 358 struct erofs_fscache_rq *req; 359 360 if (!readahead_count(rac)) 361 return; 362 363 req = erofs_fscache_req_alloc(rac->mapping, 364 readahead_pos(rac), readahead_length(rac)); 365 if (!req) 366 return; 367 368 /* The request completion will drop refs on the folios. */ 369 while (readahead_folio(rac)) 370 ; 371 372 erofs_fscache_data_read(req); 373 erofs_fscache_req_put(req); 374 } 375 376 static const struct address_space_operations erofs_fscache_meta_aops = { 377 .read_folio = erofs_fscache_meta_read_folio, 378 }; 379 380 const struct address_space_operations erofs_fscache_access_aops = { 381 .read_folio = erofs_fscache_read_folio, 382 .readahead = erofs_fscache_readahead, 383 }; 384 385 static void erofs_fscache_domain_put(struct erofs_domain *domain) 386 { 387 mutex_lock(&erofs_domain_list_lock); 388 if (refcount_dec_and_test(&domain->ref)) { 389 list_del(&domain->list); 390 if (list_empty(&erofs_domain_list)) { 391 kern_unmount(erofs_pseudo_mnt); 392 erofs_pseudo_mnt = NULL; 393 } 394 fscache_relinquish_volume(domain->volume, NULL, false); 395 mutex_unlock(&erofs_domain_list_lock); 396 kfree(domain->domain_id); 397 kfree(domain); 398 return; 399 } 400 mutex_unlock(&erofs_domain_list_lock); 401 } 402 403 static int erofs_fscache_register_volume(struct super_block *sb) 404 { 405 struct erofs_sb_info *sbi = EROFS_SB(sb); 406 char *domain_id = sbi->domain_id; 407 struct fscache_volume *volume; 408 char *name; 409 int ret = 0; 410 411 name = kasprintf(GFP_KERNEL, "erofs,%s", 412 domain_id ? domain_id : sbi->fsid); 413 if (!name) 414 return -ENOMEM; 415 416 volume = fscache_acquire_volume(name, NULL, NULL, 0); 417 if (IS_ERR_OR_NULL(volume)) { 418 erofs_err(sb, "failed to register volume for %s", name); 419 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; 420 volume = NULL; 421 } 422 423 sbi->volume = volume; 424 kfree(name); 425 return ret; 426 } 427 428 static int erofs_fscache_init_domain(struct super_block *sb) 429 { 430 int err; 431 struct erofs_domain *domain; 432 struct erofs_sb_info *sbi = EROFS_SB(sb); 433 434 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL); 435 if (!domain) 436 return -ENOMEM; 437 438 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL); 439 if (!domain->domain_id) { 440 kfree(domain); 441 return -ENOMEM; 442 } 443 444 err = erofs_fscache_register_volume(sb); 445 if (err) 446 goto out; 447 448 if (!erofs_pseudo_mnt) { 449 struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type); 450 if (IS_ERR(mnt)) { 451 err = PTR_ERR(mnt); 452 goto out; 453 } 454 erofs_pseudo_mnt = mnt; 455 } 456 457 domain->volume = sbi->volume; 458 refcount_set(&domain->ref, 1); 459 list_add(&domain->list, &erofs_domain_list); 460 sbi->domain = domain; 461 return 0; 462 out: 463 kfree(domain->domain_id); 464 kfree(domain); 465 return err; 466 } 467 468 static int erofs_fscache_register_domain(struct super_block *sb) 469 { 470 int err; 471 struct erofs_domain *domain; 472 struct erofs_sb_info *sbi = EROFS_SB(sb); 473 474 mutex_lock(&erofs_domain_list_lock); 475 list_for_each_entry(domain, &erofs_domain_list, list) { 476 if (!strcmp(domain->domain_id, sbi->domain_id)) { 477 sbi->domain = domain; 478 sbi->volume = domain->volume; 479 refcount_inc(&domain->ref); 480 mutex_unlock(&erofs_domain_list_lock); 481 return 0; 482 } 483 } 484 err = erofs_fscache_init_domain(sb); 485 mutex_unlock(&erofs_domain_list_lock); 486 return err; 487 } 488 489 static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, 490 char *name, unsigned int flags) 491 { 492 struct fscache_volume *volume = EROFS_SB(sb)->volume; 493 struct erofs_fscache *ctx; 494 struct fscache_cookie *cookie; 495 struct super_block *isb; 496 struct inode *inode; 497 int ret; 498 499 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 500 if (!ctx) 501 return ERR_PTR(-ENOMEM); 502 INIT_LIST_HEAD(&ctx->node); 503 refcount_set(&ctx->ref, 1); 504 505 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE, 506 name, strlen(name), NULL, 0, 0); 507 if (!cookie) { 508 erofs_err(sb, "failed to get cookie for %s", name); 509 ret = -EINVAL; 510 goto err; 511 } 512 fscache_use_cookie(cookie, false); 513 514 /* 515 * Allocate anonymous inode in global pseudo mount for shareable blobs, 516 * so that they are accessible among erofs fs instances. 517 */ 518 isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb; 519 inode = new_inode(isb); 520 if (!inode) { 521 erofs_err(sb, "failed to get anon inode for %s", name); 522 ret = -ENOMEM; 523 goto err_cookie; 524 } 525 526 inode->i_size = OFFSET_MAX; 527 inode->i_mapping->a_ops = &erofs_fscache_meta_aops; 528 mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); 529 inode->i_blkbits = EROFS_SB(sb)->blkszbits; 530 inode->i_private = ctx; 531 532 ctx->cookie = cookie; 533 ctx->inode = inode; 534 return ctx; 535 536 err_cookie: 537 fscache_unuse_cookie(cookie, NULL, NULL); 538 fscache_relinquish_cookie(cookie, false); 539 err: 540 kfree(ctx); 541 return ERR_PTR(ret); 542 } 543 544 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) 545 { 546 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 547 fscache_relinquish_cookie(ctx->cookie, false); 548 iput(ctx->inode); 549 kfree(ctx->name); 550 kfree(ctx); 551 } 552 553 static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb, 554 char *name, unsigned int flags) 555 { 556 struct erofs_fscache *ctx; 557 struct erofs_domain *domain = EROFS_SB(sb)->domain; 558 559 ctx = erofs_fscache_acquire_cookie(sb, name, flags); 560 if (IS_ERR(ctx)) 561 return ctx; 562 563 ctx->name = kstrdup(name, GFP_KERNEL); 564 if (!ctx->name) { 565 erofs_fscache_relinquish_cookie(ctx); 566 return ERR_PTR(-ENOMEM); 567 } 568 569 refcount_inc(&domain->ref); 570 ctx->domain = domain; 571 list_add(&ctx->node, &erofs_domain_cookies_list); 572 return ctx; 573 } 574 575 static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, 576 char *name, unsigned int flags) 577 { 578 struct erofs_fscache *ctx; 579 struct erofs_domain *domain = EROFS_SB(sb)->domain; 580 581 flags |= EROFS_REG_COOKIE_SHARE; 582 mutex_lock(&erofs_domain_cookies_lock); 583 list_for_each_entry(ctx, &erofs_domain_cookies_list, node) { 584 if (ctx->domain != domain || strcmp(ctx->name, name)) 585 continue; 586 if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) { 587 refcount_inc(&ctx->ref); 588 } else { 589 erofs_err(sb, "%s already exists in domain %s", name, 590 domain->domain_id); 591 ctx = ERR_PTR(-EEXIST); 592 } 593 mutex_unlock(&erofs_domain_cookies_lock); 594 return ctx; 595 } 596 ctx = erofs_domain_init_cookie(sb, name, flags); 597 mutex_unlock(&erofs_domain_cookies_lock); 598 return ctx; 599 } 600 601 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 602 char *name, 603 unsigned int flags) 604 { 605 if (EROFS_SB(sb)->domain_id) 606 return erofs_domain_register_cookie(sb, name, flags); 607 return erofs_fscache_acquire_cookie(sb, name, flags); 608 } 609 610 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) 611 { 612 struct erofs_domain *domain = NULL; 613 614 if (!ctx) 615 return; 616 if (!ctx->domain) 617 return erofs_fscache_relinquish_cookie(ctx); 618 619 mutex_lock(&erofs_domain_cookies_lock); 620 if (refcount_dec_and_test(&ctx->ref)) { 621 domain = ctx->domain; 622 list_del(&ctx->node); 623 erofs_fscache_relinquish_cookie(ctx); 624 } 625 mutex_unlock(&erofs_domain_cookies_lock); 626 if (domain) 627 erofs_fscache_domain_put(domain); 628 } 629 630 int erofs_fscache_register_fs(struct super_block *sb) 631 { 632 int ret; 633 struct erofs_sb_info *sbi = EROFS_SB(sb); 634 struct erofs_fscache *fscache; 635 unsigned int flags = 0; 636 637 if (sbi->domain_id) 638 ret = erofs_fscache_register_domain(sb); 639 else 640 ret = erofs_fscache_register_volume(sb); 641 if (ret) 642 return ret; 643 644 /* 645 * When shared domain is enabled, using NEED_NOEXIST to guarantee 646 * the primary data blob (aka fsid) is unique in the shared domain. 647 * 648 * For non-shared-domain case, fscache_acquire_volume() invoked by 649 * erofs_fscache_register_volume() has already guaranteed 650 * the uniqueness of primary data blob. 651 * 652 * Acquired domain/volume will be relinquished in kill_sb() on error. 653 */ 654 if (sbi->domain_id) 655 flags |= EROFS_REG_COOKIE_NEED_NOEXIST; 656 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags); 657 if (IS_ERR(fscache)) 658 return PTR_ERR(fscache); 659 660 sbi->s_fscache = fscache; 661 return 0; 662 } 663 664 void erofs_fscache_unregister_fs(struct super_block *sb) 665 { 666 struct erofs_sb_info *sbi = EROFS_SB(sb); 667 668 erofs_fscache_unregister_cookie(sbi->s_fscache); 669 670 if (sbi->domain) 671 erofs_fscache_domain_put(sbi->domain); 672 else 673 fscache_relinquish_volume(sbi->volume, NULL, false); 674 675 sbi->s_fscache = NULL; 676 sbi->volume = NULL; 677 sbi->domain = NULL; 678 } 679