1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2022, Alibaba Cloud 4 * Copyright (C) 2022, Bytedance Inc. All rights reserved. 5 */ 6 #include <linux/fscache.h> 7 #include "internal.h" 8 9 static DEFINE_MUTEX(erofs_domain_list_lock); 10 static DEFINE_MUTEX(erofs_domain_cookies_lock); 11 static LIST_HEAD(erofs_domain_list); 12 static LIST_HEAD(erofs_domain_cookies_list); 13 static struct vfsmount *erofs_pseudo_mnt; 14 15 struct erofs_fscache_io { 16 struct netfs_cache_resources cres; 17 struct iov_iter iter; 18 netfs_io_terminated_t end_io; 19 void *private; 20 refcount_t ref; 21 }; 22 23 struct erofs_fscache_rq { 24 struct address_space *mapping; /* The mapping being accessed */ 25 loff_t start; /* Start position */ 26 size_t len; /* Length of the request */ 27 size_t submitted; /* Length of submitted */ 28 short error; /* 0 or error that occurred */ 29 refcount_t ref; 30 }; 31 32 static bool erofs_fscache_io_put(struct erofs_fscache_io *io) 33 { 34 if (!refcount_dec_and_test(&io->ref)) 35 return false; 36 if (io->cres.ops) 37 io->cres.ops->end_operation(&io->cres); 38 kfree(io); 39 return true; 40 } 41 42 static void erofs_fscache_req_complete(struct erofs_fscache_rq *req) 43 { 44 struct folio *folio; 45 bool failed = req->error; 46 pgoff_t start_page = req->start / PAGE_SIZE; 47 pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1; 48 49 XA_STATE(xas, &req->mapping->i_pages, start_page); 50 51 rcu_read_lock(); 52 xas_for_each(&xas, folio, last_page) { 53 if (xas_retry(&xas, folio)) 54 continue; 55 if (!failed) 56 folio_mark_uptodate(folio); 57 folio_unlock(folio); 58 } 59 rcu_read_unlock(); 60 } 61 62 static void erofs_fscache_req_put(struct erofs_fscache_rq *req) 63 { 64 if (!refcount_dec_and_test(&req->ref)) 65 return; 66 erofs_fscache_req_complete(req); 67 kfree(req); 68 } 69 70 static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *mapping, 71 loff_t start, size_t len) 72 { 73 struct erofs_fscache_rq *req = kzalloc(sizeof(*req), GFP_KERNEL); 74 75 if (!req) 76 return NULL; 77 req->mapping = mapping; 78 req->start = start; 79 req->len = len; 80 refcount_set(&req->ref, 1); 81 return req; 82 } 83 84 static void erofs_fscache_req_io_put(struct erofs_fscache_io *io) 85 { 86 struct erofs_fscache_rq *req = io->private; 87 88 if (erofs_fscache_io_put(io)) 89 erofs_fscache_req_put(req); 90 } 91 92 static void erofs_fscache_req_end_io(void *priv, ssize_t transferred_or_error) 93 { 94 struct erofs_fscache_io *io = priv; 95 struct erofs_fscache_rq *req = io->private; 96 97 if (IS_ERR_VALUE(transferred_or_error)) 98 req->error = transferred_or_error; 99 erofs_fscache_req_io_put(io); 100 } 101 102 static struct erofs_fscache_io *erofs_fscache_req_io_alloc(struct erofs_fscache_rq *req) 103 { 104 struct erofs_fscache_io *io = kzalloc(sizeof(*io), GFP_KERNEL); 105 106 if (!io) 107 return NULL; 108 io->end_io = erofs_fscache_req_end_io; 109 io->private = req; 110 refcount_inc(&req->ref); 111 refcount_set(&io->ref, 1); 112 return io; 113 } 114 115 /* 116 * Read data from fscache described by cookie at pstart physical address 117 * offset, and fill the read data into buffer described by io->iter. 118 */ 119 static int erofs_fscache_read_io_async(struct fscache_cookie *cookie, 120 loff_t pstart, struct erofs_fscache_io *io) 121 { 122 enum netfs_io_source source; 123 struct netfs_cache_resources *cres = &io->cres; 124 struct iov_iter *iter = &io->iter; 125 int ret; 126 127 ret = fscache_begin_read_operation(cres, cookie); 128 if (ret) 129 return ret; 130 131 while (iov_iter_count(iter)) { 132 size_t orig_count = iov_iter_count(iter), len = orig_count; 133 unsigned long flags = 1 << NETFS_SREQ_ONDEMAND; 134 135 source = cres->ops->prepare_ondemand_read(cres, 136 pstart, &len, LLONG_MAX, &flags, 0); 137 if (WARN_ON(len == 0)) 138 source = NETFS_INVALID_READ; 139 if (source != NETFS_READ_FROM_CACHE) { 140 erofs_err(NULL, "prepare_ondemand_read failed (source %d)", source); 141 return -EIO; 142 } 143 144 iov_iter_truncate(iter, len); 145 refcount_inc(&io->ref); 146 ret = fscache_read(cres, pstart, iter, NETFS_READ_HOLE_FAIL, 147 io->end_io, io); 148 if (ret == -EIOCBQUEUED) 149 ret = 0; 150 if (ret) { 151 erofs_err(NULL, "fscache_read failed (ret %d)", ret); 152 return ret; 153 } 154 if (WARN_ON(iov_iter_count(iter))) 155 return -EIO; 156 157 iov_iter_reexpand(iter, orig_count - len); 158 pstart += len; 159 } 160 return 0; 161 } 162 163 struct erofs_fscache_bio { 164 struct erofs_fscache_io io; 165 struct bio bio; /* w/o bdev to share bio_add_page/endio() */ 166 struct bio_vec bvecs[BIO_MAX_VECS]; 167 }; 168 169 static void erofs_fscache_bio_endio(void *priv, ssize_t transferred_or_error) 170 { 171 struct erofs_fscache_bio *io = priv; 172 173 if (IS_ERR_VALUE(transferred_or_error)) 174 io->bio.bi_status = errno_to_blk_status(transferred_or_error); 175 bio_endio(&io->bio); 176 BUILD_BUG_ON(offsetof(struct erofs_fscache_bio, io) != 0); 177 erofs_fscache_io_put(&io->io); 178 } 179 180 struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) 181 { 182 struct erofs_fscache_bio *io; 183 184 io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL); 185 bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ); 186 io->io.private = mdev->m_dif->fscache->cookie; 187 io->io.end_io = erofs_fscache_bio_endio; 188 refcount_set(&io->io.ref, 1); 189 return &io->bio; 190 } 191 192 void erofs_fscache_submit_bio(struct bio *bio) 193 { 194 struct erofs_fscache_bio *io = container_of(bio, 195 struct erofs_fscache_bio, bio); 196 int ret; 197 198 iov_iter_bvec(&io->io.iter, ITER_DEST, io->bvecs, bio->bi_vcnt, 199 bio->bi_iter.bi_size); 200 ret = erofs_fscache_read_io_async(io->io.private, 201 bio->bi_iter.bi_sector << 9, &io->io); 202 erofs_fscache_io_put(&io->io); 203 if (!ret) 204 return; 205 bio->bi_status = errno_to_blk_status(ret); 206 bio_endio(bio); 207 } 208 209 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) 210 { 211 struct erofs_fscache *ctx = folio->mapping->host->i_private; 212 int ret = -ENOMEM; 213 struct erofs_fscache_rq *req; 214 struct erofs_fscache_io *io; 215 216 req = erofs_fscache_req_alloc(folio->mapping, 217 folio_pos(folio), folio_size(folio)); 218 if (!req) { 219 folio_unlock(folio); 220 return ret; 221 } 222 223 io = erofs_fscache_req_io_alloc(req); 224 if (!io) { 225 req->error = ret; 226 goto out; 227 } 228 iov_iter_xarray(&io->iter, ITER_DEST, &folio->mapping->i_pages, 229 folio_pos(folio), folio_size(folio)); 230 231 ret = erofs_fscache_read_io_async(ctx->cookie, folio_pos(folio), io); 232 if (ret) 233 req->error = ret; 234 235 erofs_fscache_req_io_put(io); 236 out: 237 erofs_fscache_req_put(req); 238 return ret; 239 } 240 241 static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req) 242 { 243 struct address_space *mapping = req->mapping; 244 struct inode *inode = mapping->host; 245 struct super_block *sb = inode->i_sb; 246 struct erofs_fscache_io *io; 247 struct erofs_map_blocks map; 248 struct erofs_map_dev mdev; 249 loff_t pos = req->start + req->submitted; 250 size_t count; 251 int ret; 252 253 map.m_la = pos; 254 ret = erofs_map_blocks(inode, &map); 255 if (ret) 256 return ret; 257 258 if (map.m_flags & EROFS_MAP_META) { 259 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 260 struct iov_iter iter; 261 size_t size = map.m_llen; 262 void *src; 263 264 src = erofs_read_metabuf(&buf, sb, map.m_pa, 265 erofs_inode_in_metabox(inode)); 266 if (IS_ERR(src)) 267 return PTR_ERR(src); 268 269 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE); 270 if (copy_to_iter(src, size, &iter) != size) { 271 erofs_put_metabuf(&buf); 272 return -EFAULT; 273 } 274 iov_iter_zero(PAGE_SIZE - size, &iter); 275 erofs_put_metabuf(&buf); 276 req->submitted += PAGE_SIZE; 277 return 0; 278 } 279 280 count = req->len - req->submitted; 281 if (!(map.m_flags & EROFS_MAP_MAPPED)) { 282 struct iov_iter iter; 283 284 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count); 285 iov_iter_zero(count, &iter); 286 req->submitted += count; 287 return 0; 288 } 289 290 count = min_t(size_t, map.m_llen - (pos - map.m_la), count); 291 DBG_BUGON(!count || count % PAGE_SIZE); 292 293 mdev = (struct erofs_map_dev) { 294 .m_deviceid = map.m_deviceid, 295 .m_pa = map.m_pa, 296 }; 297 ret = erofs_map_dev(sb, &mdev); 298 if (ret) 299 return ret; 300 301 io = erofs_fscache_req_io_alloc(req); 302 if (!io) 303 return -ENOMEM; 304 iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count); 305 ret = erofs_fscache_read_io_async(mdev.m_dif->fscache->cookie, 306 mdev.m_pa + (pos - map.m_la), io); 307 erofs_fscache_req_io_put(io); 308 309 req->submitted += count; 310 return ret; 311 } 312 313 static int erofs_fscache_data_read(struct erofs_fscache_rq *req) 314 { 315 int ret; 316 317 do { 318 ret = erofs_fscache_data_read_slice(req); 319 if (ret) 320 req->error = ret; 321 } while (!ret && req->submitted < req->len); 322 return ret; 323 } 324 325 static int erofs_fscache_read_folio(struct file *file, struct folio *folio) 326 { 327 struct erofs_fscache_rq *req; 328 int ret; 329 330 req = erofs_fscache_req_alloc(folio->mapping, 331 folio_pos(folio), folio_size(folio)); 332 if (!req) { 333 folio_unlock(folio); 334 return -ENOMEM; 335 } 336 337 ret = erofs_fscache_data_read(req); 338 erofs_fscache_req_put(req); 339 return ret; 340 } 341 342 static void erofs_fscache_readahead(struct readahead_control *rac) 343 { 344 struct erofs_fscache_rq *req; 345 346 if (!readahead_count(rac)) 347 return; 348 349 req = erofs_fscache_req_alloc(rac->mapping, 350 readahead_pos(rac), readahead_length(rac)); 351 if (!req) 352 return; 353 354 /* The request completion will drop refs on the folios. */ 355 while (readahead_folio(rac)) 356 ; 357 358 erofs_fscache_data_read(req); 359 erofs_fscache_req_put(req); 360 } 361 362 static const struct address_space_operations erofs_fscache_meta_aops = { 363 .read_folio = erofs_fscache_meta_read_folio, 364 }; 365 366 const struct address_space_operations erofs_fscache_access_aops = { 367 .read_folio = erofs_fscache_read_folio, 368 .readahead = erofs_fscache_readahead, 369 }; 370 371 static void erofs_fscache_domain_put(struct erofs_domain *domain) 372 { 373 mutex_lock(&erofs_domain_list_lock); 374 if (refcount_dec_and_test(&domain->ref)) { 375 list_del(&domain->list); 376 if (list_empty(&erofs_domain_list)) { 377 kern_unmount(erofs_pseudo_mnt); 378 erofs_pseudo_mnt = NULL; 379 } 380 fscache_relinquish_volume(domain->volume, NULL, false); 381 mutex_unlock(&erofs_domain_list_lock); 382 kfree_sensitive(domain->domain_id); 383 kfree(domain); 384 return; 385 } 386 mutex_unlock(&erofs_domain_list_lock); 387 } 388 389 static int erofs_fscache_register_volume(struct super_block *sb) 390 { 391 struct erofs_sb_info *sbi = EROFS_SB(sb); 392 char *domain_id = sbi->domain_id; 393 struct fscache_volume *volume; 394 char *name; 395 int ret = 0; 396 397 name = kasprintf(GFP_KERNEL, "erofs,%s", 398 domain_id ? domain_id : sbi->fsid); 399 if (!name) 400 return -ENOMEM; 401 402 volume = fscache_acquire_volume(name, NULL, NULL, 0); 403 if (IS_ERR_OR_NULL(volume)) { 404 erofs_err(sb, "failed to register volume for %s", name); 405 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; 406 volume = NULL; 407 } 408 409 sbi->volume = volume; 410 kfree(name); 411 return ret; 412 } 413 414 static int erofs_fscache_init_domain(struct super_block *sb) 415 { 416 int err; 417 struct erofs_domain *domain; 418 struct erofs_sb_info *sbi = EROFS_SB(sb); 419 420 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL); 421 if (!domain) 422 return -ENOMEM; 423 424 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL); 425 if (!domain->domain_id) { 426 kfree(domain); 427 return -ENOMEM; 428 } 429 430 err = erofs_fscache_register_volume(sb); 431 if (err) 432 goto out; 433 434 if (!erofs_pseudo_mnt) { 435 struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type); 436 if (IS_ERR(mnt)) { 437 err = PTR_ERR(mnt); 438 goto out; 439 } 440 erofs_pseudo_mnt = mnt; 441 } 442 443 domain->volume = sbi->volume; 444 refcount_set(&domain->ref, 1); 445 list_add(&domain->list, &erofs_domain_list); 446 sbi->domain = domain; 447 return 0; 448 out: 449 kfree_sensitive(domain->domain_id); 450 kfree(domain); 451 return err; 452 } 453 454 static int erofs_fscache_register_domain(struct super_block *sb) 455 { 456 int err; 457 struct erofs_domain *domain; 458 struct erofs_sb_info *sbi = EROFS_SB(sb); 459 460 mutex_lock(&erofs_domain_list_lock); 461 list_for_each_entry(domain, &erofs_domain_list, list) { 462 if (!strcmp(domain->domain_id, sbi->domain_id)) { 463 sbi->domain = domain; 464 sbi->volume = domain->volume; 465 refcount_inc(&domain->ref); 466 mutex_unlock(&erofs_domain_list_lock); 467 return 0; 468 } 469 } 470 err = erofs_fscache_init_domain(sb); 471 mutex_unlock(&erofs_domain_list_lock); 472 return err; 473 } 474 475 static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, 476 char *name, unsigned int flags) 477 { 478 struct fscache_volume *volume = EROFS_SB(sb)->volume; 479 struct erofs_fscache *ctx; 480 struct fscache_cookie *cookie; 481 struct super_block *isb; 482 struct inode *inode; 483 int ret; 484 485 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 486 if (!ctx) 487 return ERR_PTR(-ENOMEM); 488 INIT_LIST_HEAD(&ctx->node); 489 refcount_set(&ctx->ref, 1); 490 491 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE, 492 name, strlen(name), NULL, 0, 0); 493 if (!cookie) { 494 erofs_err(sb, "failed to get cookie for %s", name); 495 ret = -EINVAL; 496 goto err; 497 } 498 fscache_use_cookie(cookie, false); 499 500 /* 501 * Allocate anonymous inode in global pseudo mount for shareable blobs, 502 * so that they are accessible among erofs fs instances. 503 */ 504 isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb; 505 inode = new_inode(isb); 506 if (!inode) { 507 erofs_err(sb, "failed to get anon inode for %s", name); 508 ret = -ENOMEM; 509 goto err_cookie; 510 } 511 512 inode->i_size = OFFSET_MAX; 513 inode->i_mapping->a_ops = &erofs_fscache_meta_aops; 514 mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); 515 inode->i_blkbits = EROFS_SB(sb)->blkszbits; 516 inode->i_private = ctx; 517 518 ctx->cookie = cookie; 519 ctx->inode = inode; 520 return ctx; 521 522 err_cookie: 523 fscache_unuse_cookie(cookie, NULL, NULL); 524 fscache_relinquish_cookie(cookie, false); 525 err: 526 kfree(ctx); 527 return ERR_PTR(ret); 528 } 529 530 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) 531 { 532 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 533 fscache_relinquish_cookie(ctx->cookie, false); 534 iput(ctx->inode); 535 kfree(ctx->name); 536 kfree(ctx); 537 } 538 539 static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb, 540 char *name, unsigned int flags) 541 { 542 struct erofs_fscache *ctx; 543 struct erofs_domain *domain = EROFS_SB(sb)->domain; 544 545 ctx = erofs_fscache_acquire_cookie(sb, name, flags); 546 if (IS_ERR(ctx)) 547 return ctx; 548 549 ctx->name = kstrdup(name, GFP_KERNEL); 550 if (!ctx->name) { 551 erofs_fscache_relinquish_cookie(ctx); 552 return ERR_PTR(-ENOMEM); 553 } 554 555 refcount_inc(&domain->ref); 556 ctx->domain = domain; 557 list_add(&ctx->node, &erofs_domain_cookies_list); 558 return ctx; 559 } 560 561 static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, 562 char *name, unsigned int flags) 563 { 564 struct erofs_fscache *ctx; 565 struct erofs_domain *domain = EROFS_SB(sb)->domain; 566 567 flags |= EROFS_REG_COOKIE_SHARE; 568 mutex_lock(&erofs_domain_cookies_lock); 569 list_for_each_entry(ctx, &erofs_domain_cookies_list, node) { 570 if (ctx->domain != domain || strcmp(ctx->name, name)) 571 continue; 572 if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) { 573 refcount_inc(&ctx->ref); 574 } else { 575 erofs_err(sb, "%s already exists in domain %s", name, 576 domain->domain_id); 577 ctx = ERR_PTR(-EEXIST); 578 } 579 mutex_unlock(&erofs_domain_cookies_lock); 580 return ctx; 581 } 582 ctx = erofs_domain_init_cookie(sb, name, flags); 583 mutex_unlock(&erofs_domain_cookies_lock); 584 return ctx; 585 } 586 587 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 588 char *name, 589 unsigned int flags) 590 { 591 if (EROFS_SB(sb)->domain_id) 592 return erofs_domain_register_cookie(sb, name, flags); 593 return erofs_fscache_acquire_cookie(sb, name, flags); 594 } 595 596 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) 597 { 598 struct erofs_domain *domain = NULL; 599 600 if (!ctx) 601 return; 602 if (!ctx->domain) 603 return erofs_fscache_relinquish_cookie(ctx); 604 605 mutex_lock(&erofs_domain_cookies_lock); 606 if (refcount_dec_and_test(&ctx->ref)) { 607 domain = ctx->domain; 608 list_del(&ctx->node); 609 erofs_fscache_relinquish_cookie(ctx); 610 } 611 mutex_unlock(&erofs_domain_cookies_lock); 612 if (domain) 613 erofs_fscache_domain_put(domain); 614 } 615 616 int erofs_fscache_register_fs(struct super_block *sb) 617 { 618 int ret; 619 struct erofs_sb_info *sbi = EROFS_SB(sb); 620 struct erofs_fscache *fscache; 621 unsigned int flags = 0; 622 623 if (sbi->domain_id) 624 ret = erofs_fscache_register_domain(sb); 625 else 626 ret = erofs_fscache_register_volume(sb); 627 if (ret) 628 return ret; 629 630 /* 631 * When shared domain is enabled, using NEED_NOEXIST to guarantee 632 * the primary data blob (aka fsid) is unique in the shared domain. 633 * 634 * For non-shared-domain case, fscache_acquire_volume() invoked by 635 * erofs_fscache_register_volume() has already guaranteed 636 * the uniqueness of primary data blob. 637 * 638 * Acquired domain/volume will be relinquished in kill_sb() on error. 639 */ 640 if (sbi->domain_id) 641 flags |= EROFS_REG_COOKIE_NEED_NOEXIST; 642 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags); 643 if (IS_ERR(fscache)) 644 return PTR_ERR(fscache); 645 646 sbi->dif0.fscache = fscache; 647 return 0; 648 } 649 650 void erofs_fscache_unregister_fs(struct super_block *sb) 651 { 652 struct erofs_sb_info *sbi = EROFS_SB(sb); 653 654 erofs_fscache_unregister_cookie(sbi->dif0.fscache); 655 656 if (sbi->domain) 657 erofs_fscache_domain_put(sbi->domain); 658 else 659 fscache_relinquish_volume(sbi->volume, NULL, false); 660 661 sbi->dif0.fscache = NULL; 662 sbi->volume = NULL; 663 sbi->domain = NULL; 664 } 665