1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2022, Alibaba Cloud 4 * Copyright (C) 2022, Bytedance Inc. All rights reserved. 5 */ 6 #include <linux/pseudo_fs.h> 7 #include <linux/fscache.h> 8 #include "internal.h" 9 10 static DEFINE_MUTEX(erofs_domain_list_lock); 11 static DEFINE_MUTEX(erofs_domain_cookies_lock); 12 static LIST_HEAD(erofs_domain_list); 13 static LIST_HEAD(erofs_domain_cookies_list); 14 static struct vfsmount *erofs_pseudo_mnt; 15 16 static int erofs_anon_init_fs_context(struct fs_context *fc) 17 { 18 return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM; 19 } 20 21 static struct file_system_type erofs_anon_fs_type = { 22 .owner = THIS_MODULE, 23 .name = "pseudo_erofs", 24 .init_fs_context = erofs_anon_init_fs_context, 25 .kill_sb = kill_anon_super, 26 }; 27 28 struct erofs_fscache_io { 29 struct netfs_cache_resources cres; 30 struct iov_iter iter; 31 netfs_io_terminated_t end_io; 32 void *private; 33 refcount_t ref; 34 }; 35 36 struct erofs_fscache_rq { 37 struct address_space *mapping; /* The mapping being accessed */ 38 loff_t start; /* Start position */ 39 size_t len; /* Length of the request */ 40 size_t submitted; /* Length of submitted */ 41 short error; /* 0 or error that occurred */ 42 refcount_t ref; 43 }; 44 45 static bool erofs_fscache_io_put(struct erofs_fscache_io *io) 46 { 47 if (!refcount_dec_and_test(&io->ref)) 48 return false; 49 if (io->cres.ops) 50 io->cres.ops->end_operation(&io->cres); 51 kfree(io); 52 return true; 53 } 54 55 static void erofs_fscache_req_complete(struct erofs_fscache_rq *req) 56 { 57 struct folio *folio; 58 bool failed = req->error; 59 pgoff_t start_page = req->start / PAGE_SIZE; 60 pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1; 61 62 XA_STATE(xas, &req->mapping->i_pages, start_page); 63 64 rcu_read_lock(); 65 xas_for_each(&xas, folio, last_page) { 66 if (xas_retry(&xas, folio)) 67 continue; 68 if (!failed) 69 folio_mark_uptodate(folio); 70 folio_unlock(folio); 71 } 72 rcu_read_unlock(); 73 } 74 75 static void erofs_fscache_req_put(struct erofs_fscache_rq *req) 76 { 77 if (!refcount_dec_and_test(&req->ref)) 78 return; 79 erofs_fscache_req_complete(req); 80 kfree(req); 81 } 82 83 static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *mapping, 84 loff_t start, size_t len) 85 { 86 struct erofs_fscache_rq *req = kzalloc(sizeof(*req), GFP_KERNEL); 87 88 if (!req) 89 return NULL; 90 req->mapping = mapping; 91 req->start = start; 92 req->len = len; 93 refcount_set(&req->ref, 1); 94 return req; 95 } 96 97 static void erofs_fscache_req_io_put(struct erofs_fscache_io *io) 98 { 99 struct erofs_fscache_rq *req = io->private; 100 101 if (erofs_fscache_io_put(io)) 102 erofs_fscache_req_put(req); 103 } 104 105 static void erofs_fscache_req_end_io(void *priv, ssize_t transferred_or_error) 106 { 107 struct erofs_fscache_io *io = priv; 108 struct erofs_fscache_rq *req = io->private; 109 110 if (IS_ERR_VALUE(transferred_or_error)) 111 req->error = transferred_or_error; 112 erofs_fscache_req_io_put(io); 113 } 114 115 static struct erofs_fscache_io *erofs_fscache_req_io_alloc(struct erofs_fscache_rq *req) 116 { 117 struct erofs_fscache_io *io = kzalloc(sizeof(*io), GFP_KERNEL); 118 119 if (!io) 120 return NULL; 121 io->end_io = erofs_fscache_req_end_io; 122 io->private = req; 123 refcount_inc(&req->ref); 124 refcount_set(&io->ref, 1); 125 return io; 126 } 127 128 /* 129 * Read data from fscache described by cookie at pstart physical address 130 * offset, and fill the read data into buffer described by io->iter. 131 */ 132 static int erofs_fscache_read_io_async(struct fscache_cookie *cookie, 133 loff_t pstart, struct erofs_fscache_io *io) 134 { 135 enum netfs_io_source source; 136 struct netfs_cache_resources *cres = &io->cres; 137 struct iov_iter *iter = &io->iter; 138 int ret; 139 140 ret = fscache_begin_read_operation(cres, cookie); 141 if (ret) 142 return ret; 143 144 while (iov_iter_count(iter)) { 145 size_t orig_count = iov_iter_count(iter), len = orig_count; 146 unsigned long flags = 1 << NETFS_SREQ_ONDEMAND; 147 148 source = cres->ops->prepare_ondemand_read(cres, 149 pstart, &len, LLONG_MAX, &flags, 0); 150 if (WARN_ON(len == 0)) 151 source = NETFS_INVALID_READ; 152 if (source != NETFS_READ_FROM_CACHE) { 153 erofs_err(NULL, "prepare_ondemand_read failed (source %d)", source); 154 return -EIO; 155 } 156 157 iov_iter_truncate(iter, len); 158 refcount_inc(&io->ref); 159 ret = fscache_read(cres, pstart, iter, NETFS_READ_HOLE_FAIL, 160 io->end_io, io); 161 if (ret == -EIOCBQUEUED) 162 ret = 0; 163 if (ret) { 164 erofs_err(NULL, "fscache_read failed (ret %d)", ret); 165 return ret; 166 } 167 if (WARN_ON(iov_iter_count(iter))) 168 return -EIO; 169 170 iov_iter_reexpand(iter, orig_count - len); 171 pstart += len; 172 } 173 return 0; 174 } 175 176 struct erofs_fscache_bio { 177 struct erofs_fscache_io io; 178 struct bio bio; /* w/o bdev to share bio_add_page/endio() */ 179 struct bio_vec bvecs[BIO_MAX_VECS]; 180 }; 181 182 static void erofs_fscache_bio_endio(void *priv, ssize_t transferred_or_error) 183 { 184 struct erofs_fscache_bio *io = priv; 185 186 if (IS_ERR_VALUE(transferred_or_error)) 187 io->bio.bi_status = errno_to_blk_status(transferred_or_error); 188 io->bio.bi_end_io(&io->bio); 189 BUILD_BUG_ON(offsetof(struct erofs_fscache_bio, io) != 0); 190 erofs_fscache_io_put(&io->io); 191 } 192 193 struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) 194 { 195 struct erofs_fscache_bio *io; 196 197 io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL); 198 bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ); 199 io->io.private = mdev->m_dif->fscache->cookie; 200 io->io.end_io = erofs_fscache_bio_endio; 201 refcount_set(&io->io.ref, 1); 202 return &io->bio; 203 } 204 205 void erofs_fscache_submit_bio(struct bio *bio) 206 { 207 struct erofs_fscache_bio *io = container_of(bio, 208 struct erofs_fscache_bio, bio); 209 int ret; 210 211 iov_iter_bvec(&io->io.iter, ITER_DEST, io->bvecs, bio->bi_vcnt, 212 bio->bi_iter.bi_size); 213 ret = erofs_fscache_read_io_async(io->io.private, 214 bio->bi_iter.bi_sector << 9, &io->io); 215 erofs_fscache_io_put(&io->io); 216 if (!ret) 217 return; 218 bio->bi_status = errno_to_blk_status(ret); 219 bio->bi_end_io(bio); 220 } 221 222 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) 223 { 224 struct erofs_fscache *ctx = folio->mapping->host->i_private; 225 int ret = -ENOMEM; 226 struct erofs_fscache_rq *req; 227 struct erofs_fscache_io *io; 228 229 req = erofs_fscache_req_alloc(folio->mapping, 230 folio_pos(folio), folio_size(folio)); 231 if (!req) { 232 folio_unlock(folio); 233 return ret; 234 } 235 236 io = erofs_fscache_req_io_alloc(req); 237 if (!io) { 238 req->error = ret; 239 goto out; 240 } 241 iov_iter_xarray(&io->iter, ITER_DEST, &folio->mapping->i_pages, 242 folio_pos(folio), folio_size(folio)); 243 244 ret = erofs_fscache_read_io_async(ctx->cookie, folio_pos(folio), io); 245 if (ret) 246 req->error = ret; 247 248 erofs_fscache_req_io_put(io); 249 out: 250 erofs_fscache_req_put(req); 251 return ret; 252 } 253 254 static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req) 255 { 256 struct address_space *mapping = req->mapping; 257 struct inode *inode = mapping->host; 258 struct super_block *sb = inode->i_sb; 259 struct erofs_fscache_io *io; 260 struct erofs_map_blocks map; 261 struct erofs_map_dev mdev; 262 loff_t pos = req->start + req->submitted; 263 size_t count; 264 int ret; 265 266 map.m_la = pos; 267 ret = erofs_map_blocks(inode, &map); 268 if (ret) 269 return ret; 270 271 if (map.m_flags & EROFS_MAP_META) { 272 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 273 struct iov_iter iter; 274 size_t size = map.m_llen; 275 void *src; 276 277 src = erofs_read_metabuf(&buf, sb, map.m_pa, 278 erofs_inode_in_metabox(inode)); 279 if (IS_ERR(src)) 280 return PTR_ERR(src); 281 282 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE); 283 if (copy_to_iter(src, size, &iter) != size) { 284 erofs_put_metabuf(&buf); 285 return -EFAULT; 286 } 287 iov_iter_zero(PAGE_SIZE - size, &iter); 288 erofs_put_metabuf(&buf); 289 req->submitted += PAGE_SIZE; 290 return 0; 291 } 292 293 count = req->len - req->submitted; 294 if (!(map.m_flags & EROFS_MAP_MAPPED)) { 295 struct iov_iter iter; 296 297 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count); 298 iov_iter_zero(count, &iter); 299 req->submitted += count; 300 return 0; 301 } 302 303 count = min_t(size_t, map.m_llen - (pos - map.m_la), count); 304 DBG_BUGON(!count || count % PAGE_SIZE); 305 306 mdev = (struct erofs_map_dev) { 307 .m_deviceid = map.m_deviceid, 308 .m_pa = map.m_pa, 309 }; 310 ret = erofs_map_dev(sb, &mdev); 311 if (ret) 312 return ret; 313 314 io = erofs_fscache_req_io_alloc(req); 315 if (!io) 316 return -ENOMEM; 317 iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count); 318 ret = erofs_fscache_read_io_async(mdev.m_dif->fscache->cookie, 319 mdev.m_pa + (pos - map.m_la), io); 320 erofs_fscache_req_io_put(io); 321 322 req->submitted += count; 323 return ret; 324 } 325 326 static int erofs_fscache_data_read(struct erofs_fscache_rq *req) 327 { 328 int ret; 329 330 do { 331 ret = erofs_fscache_data_read_slice(req); 332 if (ret) 333 req->error = ret; 334 } while (!ret && req->submitted < req->len); 335 return ret; 336 } 337 338 static int erofs_fscache_read_folio(struct file *file, struct folio *folio) 339 { 340 struct erofs_fscache_rq *req; 341 int ret; 342 343 req = erofs_fscache_req_alloc(folio->mapping, 344 folio_pos(folio), folio_size(folio)); 345 if (!req) { 346 folio_unlock(folio); 347 return -ENOMEM; 348 } 349 350 ret = erofs_fscache_data_read(req); 351 erofs_fscache_req_put(req); 352 return ret; 353 } 354 355 static void erofs_fscache_readahead(struct readahead_control *rac) 356 { 357 struct erofs_fscache_rq *req; 358 359 if (!readahead_count(rac)) 360 return; 361 362 req = erofs_fscache_req_alloc(rac->mapping, 363 readahead_pos(rac), readahead_length(rac)); 364 if (!req) 365 return; 366 367 /* The request completion will drop refs on the folios. */ 368 while (readahead_folio(rac)) 369 ; 370 371 erofs_fscache_data_read(req); 372 erofs_fscache_req_put(req); 373 } 374 375 static const struct address_space_operations erofs_fscache_meta_aops = { 376 .read_folio = erofs_fscache_meta_read_folio, 377 }; 378 379 const struct address_space_operations erofs_fscache_access_aops = { 380 .read_folio = erofs_fscache_read_folio, 381 .readahead = erofs_fscache_readahead, 382 }; 383 384 static void erofs_fscache_domain_put(struct erofs_domain *domain) 385 { 386 mutex_lock(&erofs_domain_list_lock); 387 if (refcount_dec_and_test(&domain->ref)) { 388 list_del(&domain->list); 389 if (list_empty(&erofs_domain_list)) { 390 kern_unmount(erofs_pseudo_mnt); 391 erofs_pseudo_mnt = NULL; 392 } 393 fscache_relinquish_volume(domain->volume, NULL, false); 394 mutex_unlock(&erofs_domain_list_lock); 395 kfree(domain->domain_id); 396 kfree(domain); 397 return; 398 } 399 mutex_unlock(&erofs_domain_list_lock); 400 } 401 402 static int erofs_fscache_register_volume(struct super_block *sb) 403 { 404 struct erofs_sb_info *sbi = EROFS_SB(sb); 405 char *domain_id = sbi->domain_id; 406 struct fscache_volume *volume; 407 char *name; 408 int ret = 0; 409 410 name = kasprintf(GFP_KERNEL, "erofs,%s", 411 domain_id ? domain_id : sbi->fsid); 412 if (!name) 413 return -ENOMEM; 414 415 volume = fscache_acquire_volume(name, NULL, NULL, 0); 416 if (IS_ERR_OR_NULL(volume)) { 417 erofs_err(sb, "failed to register volume for %s", name); 418 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; 419 volume = NULL; 420 } 421 422 sbi->volume = volume; 423 kfree(name); 424 return ret; 425 } 426 427 static int erofs_fscache_init_domain(struct super_block *sb) 428 { 429 int err; 430 struct erofs_domain *domain; 431 struct erofs_sb_info *sbi = EROFS_SB(sb); 432 433 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL); 434 if (!domain) 435 return -ENOMEM; 436 437 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL); 438 if (!domain->domain_id) { 439 kfree(domain); 440 return -ENOMEM; 441 } 442 443 err = erofs_fscache_register_volume(sb); 444 if (err) 445 goto out; 446 447 if (!erofs_pseudo_mnt) { 448 struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type); 449 if (IS_ERR(mnt)) { 450 err = PTR_ERR(mnt); 451 goto out; 452 } 453 erofs_pseudo_mnt = mnt; 454 } 455 456 domain->volume = sbi->volume; 457 refcount_set(&domain->ref, 1); 458 list_add(&domain->list, &erofs_domain_list); 459 sbi->domain = domain; 460 return 0; 461 out: 462 kfree(domain->domain_id); 463 kfree(domain); 464 return err; 465 } 466 467 static int erofs_fscache_register_domain(struct super_block *sb) 468 { 469 int err; 470 struct erofs_domain *domain; 471 struct erofs_sb_info *sbi = EROFS_SB(sb); 472 473 mutex_lock(&erofs_domain_list_lock); 474 list_for_each_entry(domain, &erofs_domain_list, list) { 475 if (!strcmp(domain->domain_id, sbi->domain_id)) { 476 sbi->domain = domain; 477 sbi->volume = domain->volume; 478 refcount_inc(&domain->ref); 479 mutex_unlock(&erofs_domain_list_lock); 480 return 0; 481 } 482 } 483 err = erofs_fscache_init_domain(sb); 484 mutex_unlock(&erofs_domain_list_lock); 485 return err; 486 } 487 488 static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, 489 char *name, unsigned int flags) 490 { 491 struct fscache_volume *volume = EROFS_SB(sb)->volume; 492 struct erofs_fscache *ctx; 493 struct fscache_cookie *cookie; 494 struct super_block *isb; 495 struct inode *inode; 496 int ret; 497 498 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 499 if (!ctx) 500 return ERR_PTR(-ENOMEM); 501 INIT_LIST_HEAD(&ctx->node); 502 refcount_set(&ctx->ref, 1); 503 504 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE, 505 name, strlen(name), NULL, 0, 0); 506 if (!cookie) { 507 erofs_err(sb, "failed to get cookie for %s", name); 508 ret = -EINVAL; 509 goto err; 510 } 511 fscache_use_cookie(cookie, false); 512 513 /* 514 * Allocate anonymous inode in global pseudo mount for shareable blobs, 515 * so that they are accessible among erofs fs instances. 516 */ 517 isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb; 518 inode = new_inode(isb); 519 if (!inode) { 520 erofs_err(sb, "failed to get anon inode for %s", name); 521 ret = -ENOMEM; 522 goto err_cookie; 523 } 524 525 inode->i_size = OFFSET_MAX; 526 inode->i_mapping->a_ops = &erofs_fscache_meta_aops; 527 mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); 528 inode->i_blkbits = EROFS_SB(sb)->blkszbits; 529 inode->i_private = ctx; 530 531 ctx->cookie = cookie; 532 ctx->inode = inode; 533 return ctx; 534 535 err_cookie: 536 fscache_unuse_cookie(cookie, NULL, NULL); 537 fscache_relinquish_cookie(cookie, false); 538 err: 539 kfree(ctx); 540 return ERR_PTR(ret); 541 } 542 543 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) 544 { 545 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 546 fscache_relinquish_cookie(ctx->cookie, false); 547 iput(ctx->inode); 548 kfree(ctx->name); 549 kfree(ctx); 550 } 551 552 static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb, 553 char *name, unsigned int flags) 554 { 555 struct erofs_fscache *ctx; 556 struct erofs_domain *domain = EROFS_SB(sb)->domain; 557 558 ctx = erofs_fscache_acquire_cookie(sb, name, flags); 559 if (IS_ERR(ctx)) 560 return ctx; 561 562 ctx->name = kstrdup(name, GFP_KERNEL); 563 if (!ctx->name) { 564 erofs_fscache_relinquish_cookie(ctx); 565 return ERR_PTR(-ENOMEM); 566 } 567 568 refcount_inc(&domain->ref); 569 ctx->domain = domain; 570 list_add(&ctx->node, &erofs_domain_cookies_list); 571 return ctx; 572 } 573 574 static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, 575 char *name, unsigned int flags) 576 { 577 struct erofs_fscache *ctx; 578 struct erofs_domain *domain = EROFS_SB(sb)->domain; 579 580 flags |= EROFS_REG_COOKIE_SHARE; 581 mutex_lock(&erofs_domain_cookies_lock); 582 list_for_each_entry(ctx, &erofs_domain_cookies_list, node) { 583 if (ctx->domain != domain || strcmp(ctx->name, name)) 584 continue; 585 if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) { 586 refcount_inc(&ctx->ref); 587 } else { 588 erofs_err(sb, "%s already exists in domain %s", name, 589 domain->domain_id); 590 ctx = ERR_PTR(-EEXIST); 591 } 592 mutex_unlock(&erofs_domain_cookies_lock); 593 return ctx; 594 } 595 ctx = erofs_domain_init_cookie(sb, name, flags); 596 mutex_unlock(&erofs_domain_cookies_lock); 597 return ctx; 598 } 599 600 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 601 char *name, 602 unsigned int flags) 603 { 604 if (EROFS_SB(sb)->domain_id) 605 return erofs_domain_register_cookie(sb, name, flags); 606 return erofs_fscache_acquire_cookie(sb, name, flags); 607 } 608 609 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) 610 { 611 struct erofs_domain *domain = NULL; 612 613 if (!ctx) 614 return; 615 if (!ctx->domain) 616 return erofs_fscache_relinquish_cookie(ctx); 617 618 mutex_lock(&erofs_domain_cookies_lock); 619 if (refcount_dec_and_test(&ctx->ref)) { 620 domain = ctx->domain; 621 list_del(&ctx->node); 622 erofs_fscache_relinquish_cookie(ctx); 623 } 624 mutex_unlock(&erofs_domain_cookies_lock); 625 if (domain) 626 erofs_fscache_domain_put(domain); 627 } 628 629 int erofs_fscache_register_fs(struct super_block *sb) 630 { 631 int ret; 632 struct erofs_sb_info *sbi = EROFS_SB(sb); 633 struct erofs_fscache *fscache; 634 unsigned int flags = 0; 635 636 if (sbi->domain_id) 637 ret = erofs_fscache_register_domain(sb); 638 else 639 ret = erofs_fscache_register_volume(sb); 640 if (ret) 641 return ret; 642 643 /* 644 * When shared domain is enabled, using NEED_NOEXIST to guarantee 645 * the primary data blob (aka fsid) is unique in the shared domain. 646 * 647 * For non-shared-domain case, fscache_acquire_volume() invoked by 648 * erofs_fscache_register_volume() has already guaranteed 649 * the uniqueness of primary data blob. 650 * 651 * Acquired domain/volume will be relinquished in kill_sb() on error. 652 */ 653 if (sbi->domain_id) 654 flags |= EROFS_REG_COOKIE_NEED_NOEXIST; 655 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags); 656 if (IS_ERR(fscache)) 657 return PTR_ERR(fscache); 658 659 sbi->dif0.fscache = fscache; 660 return 0; 661 } 662 663 void erofs_fscache_unregister_fs(struct super_block *sb) 664 { 665 struct erofs_sb_info *sbi = EROFS_SB(sb); 666 667 erofs_fscache_unregister_cookie(sbi->dif0.fscache); 668 669 if (sbi->domain) 670 erofs_fscache_domain_put(sbi->domain); 671 else 672 fscache_relinquish_volume(sbi->volume, NULL, false); 673 674 sbi->dif0.fscache = NULL; 675 sbi->volume = NULL; 676 sbi->domain = NULL; 677 } 678