1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Network filesystem high-level buffered read support. 3 * 4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/export.h> 9 #include <linux/task_io_accounting_ops.h> 10 #include "internal.h" 11 12 /* 13 * [DEPRECATED] Unlock the folios in a read operation for when the filesystem 14 * is using PG_private_2 and direct writing to the cache from here rather than 15 * marking the page for writeback. 16 * 17 * Note that we don't touch folio->private in this code. 18 */ 19 static void netfs_rreq_unlock_folios_pgpriv2(struct netfs_io_request *rreq, 20 size_t *account) 21 { 22 struct netfs_io_subrequest *subreq; 23 struct folio *folio; 24 pgoff_t start_page = rreq->start / PAGE_SIZE; 25 pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; 26 bool subreq_failed = false; 27 28 XA_STATE(xas, &rreq->mapping->i_pages, start_page); 29 30 /* Walk through the pagecache and the I/O request lists simultaneously. 31 * We may have a mixture of cached and uncached sections and we only 32 * really want to write out the uncached sections. This is slightly 33 * complicated by the possibility that we might have huge pages with a 34 * mixture inside. 35 */ 36 subreq = list_first_entry(&rreq->subrequests, 37 struct netfs_io_subrequest, rreq_link); 38 subreq_failed = (subreq->error < 0); 39 40 trace_netfs_rreq(rreq, netfs_rreq_trace_unlock_pgpriv2); 41 42 rcu_read_lock(); 43 xas_for_each(&xas, folio, last_page) { 44 loff_t pg_end; 45 bool pg_failed = false; 46 bool folio_started = false; 47 48 if (xas_retry(&xas, folio)) 49 continue; 50 51 pg_end = folio_pos(folio) + folio_size(folio) - 1; 52 53 for (;;) { 54 loff_t sreq_end; 55 56 if (!subreq) { 57 pg_failed = true; 58 break; 59 } 60 61 if (!folio_started && 62 test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags) && 63 fscache_operation_valid(&rreq->cache_resources)) { 64 trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 65 folio_start_private_2(folio); 66 folio_started = true; 67 } 68 69 pg_failed |= subreq_failed; 70 sreq_end = subreq->start + subreq->len - 1; 71 if (pg_end < sreq_end) 72 break; 73 74 *account += subreq->transferred; 75 if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { 76 subreq = list_next_entry(subreq, rreq_link); 77 subreq_failed = (subreq->error < 0); 78 } else { 79 subreq = NULL; 80 subreq_failed = false; 81 } 82 83 if (pg_end == sreq_end) 84 break; 85 } 86 87 if (!pg_failed) { 88 flush_dcache_folio(folio); 89 folio_mark_uptodate(folio); 90 } 91 92 if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { 93 if (folio->index == rreq->no_unlock_folio && 94 test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) 95 _debug("no unlock"); 96 else 97 folio_unlock(folio); 98 } 99 } 100 rcu_read_unlock(); 101 } 102 103 /* 104 * Unlock the folios in a read operation. We need to set PG_writeback on any 105 * folios we're going to write back before we unlock them. 106 * 107 * Note that if the deprecated NETFS_RREQ_USE_PGPRIV2 is set then we use 108 * PG_private_2 and do a direct write to the cache from here instead. 109 */ 110 void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) 111 { 112 struct netfs_io_subrequest *subreq; 113 struct netfs_folio *finfo; 114 struct folio *folio; 115 pgoff_t start_page = rreq->start / PAGE_SIZE; 116 pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; 117 size_t account = 0; 118 bool subreq_failed = false; 119 120 XA_STATE(xas, &rreq->mapping->i_pages, start_page); 121 122 if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) { 123 __clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags); 124 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 125 __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); 126 } 127 } 128 129 /* Handle deprecated PG_private_2 case. */ 130 if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { 131 netfs_rreq_unlock_folios_pgpriv2(rreq, &account); 132 goto out; 133 } 134 135 /* Walk through the pagecache and the I/O request lists simultaneously. 136 * We may have a mixture of cached and uncached sections and we only 137 * really want to write out the uncached sections. This is slightly 138 * complicated by the possibility that we might have huge pages with a 139 * mixture inside. 140 */ 141 subreq = list_first_entry(&rreq->subrequests, 142 struct netfs_io_subrequest, rreq_link); 143 subreq_failed = (subreq->error < 0); 144 145 trace_netfs_rreq(rreq, netfs_rreq_trace_unlock); 146 147 rcu_read_lock(); 148 xas_for_each(&xas, folio, last_page) { 149 loff_t pg_end; 150 bool pg_failed = false; 151 bool wback_to_cache = false; 152 153 if (xas_retry(&xas, folio)) 154 continue; 155 156 pg_end = folio_pos(folio) + folio_size(folio) - 1; 157 158 for (;;) { 159 loff_t sreq_end; 160 161 if (!subreq) { 162 pg_failed = true; 163 break; 164 } 165 166 wback_to_cache |= test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); 167 pg_failed |= subreq_failed; 168 sreq_end = subreq->start + subreq->len - 1; 169 if (pg_end < sreq_end) 170 break; 171 172 account += subreq->transferred; 173 if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { 174 subreq = list_next_entry(subreq, rreq_link); 175 subreq_failed = (subreq->error < 0); 176 } else { 177 subreq = NULL; 178 subreq_failed = false; 179 } 180 181 if (pg_end == sreq_end) 182 break; 183 } 184 185 if (!pg_failed) { 186 flush_dcache_folio(folio); 187 finfo = netfs_folio_info(folio); 188 if (finfo) { 189 trace_netfs_folio(folio, netfs_folio_trace_filled_gaps); 190 if (finfo->netfs_group) 191 folio_change_private(folio, finfo->netfs_group); 192 else 193 folio_detach_private(folio); 194 kfree(finfo); 195 } 196 folio_mark_uptodate(folio); 197 if (wback_to_cache && !WARN_ON_ONCE(folio_get_private(folio) != NULL)) { 198 trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 199 folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE); 200 filemap_dirty_folio(folio->mapping, folio); 201 } 202 } 203 204 if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { 205 if (folio->index == rreq->no_unlock_folio && 206 test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) 207 _debug("no unlock"); 208 else 209 folio_unlock(folio); 210 } 211 } 212 rcu_read_unlock(); 213 214 out: 215 task_io_account_read(account); 216 if (rreq->netfs_ops->done) 217 rreq->netfs_ops->done(rreq); 218 } 219 220 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq, 221 unsigned long long *_start, 222 unsigned long long *_len, 223 unsigned long long i_size) 224 { 225 struct netfs_cache_resources *cres = &rreq->cache_resources; 226 227 if (cres->ops && cres->ops->expand_readahead) 228 cres->ops->expand_readahead(cres, _start, _len, i_size); 229 } 230 231 static void netfs_rreq_expand(struct netfs_io_request *rreq, 232 struct readahead_control *ractl) 233 { 234 /* Give the cache a chance to change the request parameters. The 235 * resultant request must contain the original region. 236 */ 237 netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); 238 239 /* Give the netfs a chance to change the request parameters. The 240 * resultant request must contain the original region. 241 */ 242 if (rreq->netfs_ops->expand_readahead) 243 rreq->netfs_ops->expand_readahead(rreq); 244 245 /* Expand the request if the cache wants it to start earlier. Note 246 * that the expansion may get further extended if the VM wishes to 247 * insert THPs and the preferred start and/or end wind up in the middle 248 * of THPs. 249 * 250 * If this is the case, however, the THP size should be an integer 251 * multiple of the cache granule size, so we get a whole number of 252 * granules to deal with. 253 */ 254 if (rreq->start != readahead_pos(ractl) || 255 rreq->len != readahead_length(ractl)) { 256 readahead_expand(ractl, rreq->start, rreq->len); 257 rreq->start = readahead_pos(ractl); 258 rreq->len = readahead_length(ractl); 259 260 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 261 netfs_read_trace_expanded); 262 } 263 } 264 265 /* 266 * Begin an operation, and fetch the stored zero point value from the cookie if 267 * available. 268 */ 269 static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx) 270 { 271 return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx)); 272 } 273 274 /** 275 * netfs_readahead - Helper to manage a read request 276 * @ractl: The description of the readahead request 277 * 278 * Fulfil a readahead request by drawing data from the cache if possible, or 279 * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O 280 * requests from different sources will get munged together. If necessary, the 281 * readahead window can be expanded in either direction to a more convenient 282 * alighment for RPC efficiency or to make storage in the cache feasible. 283 * 284 * The calling netfs must initialise a netfs context contiguous to the vfs 285 * inode before calling this. 286 * 287 * This is usable whether or not caching is enabled. 288 */ 289 void netfs_readahead(struct readahead_control *ractl) 290 { 291 struct netfs_io_request *rreq; 292 struct netfs_inode *ctx = netfs_inode(ractl->mapping->host); 293 int ret; 294 295 _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); 296 297 if (readahead_count(ractl) == 0) 298 return; 299 300 rreq = netfs_alloc_request(ractl->mapping, ractl->file, 301 readahead_pos(ractl), 302 readahead_length(ractl), 303 NETFS_READAHEAD); 304 if (IS_ERR(rreq)) 305 return; 306 307 ret = netfs_begin_cache_read(rreq, ctx); 308 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 309 goto cleanup_free; 310 311 netfs_stat(&netfs_n_rh_readahead); 312 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 313 netfs_read_trace_readahead); 314 315 netfs_rreq_expand(rreq, ractl); 316 317 /* Set up the output buffer */ 318 iov_iter_xarray(&rreq->iter, ITER_DEST, &ractl->mapping->i_pages, 319 rreq->start, rreq->len); 320 321 /* Drop the refs on the folios here rather than in the cache or 322 * filesystem. The locks will be dropped in netfs_rreq_unlock(). 323 */ 324 while (readahead_folio(ractl)) 325 ; 326 327 netfs_begin_read(rreq, false); 328 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 329 return; 330 331 cleanup_free: 332 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); 333 return; 334 } 335 EXPORT_SYMBOL(netfs_readahead); 336 337 /** 338 * netfs_read_folio - Helper to manage a read_folio request 339 * @file: The file to read from 340 * @folio: The folio to read 341 * 342 * Fulfil a read_folio request by drawing data from the cache if 343 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 344 * Multiple I/O requests from different sources will get munged together. 345 * 346 * The calling netfs must initialise a netfs context contiguous to the vfs 347 * inode before calling this. 348 * 349 * This is usable whether or not caching is enabled. 350 */ 351 int netfs_read_folio(struct file *file, struct folio *folio) 352 { 353 struct address_space *mapping = folio->mapping; 354 struct netfs_io_request *rreq; 355 struct netfs_inode *ctx = netfs_inode(mapping->host); 356 struct folio *sink = NULL; 357 int ret; 358 359 _enter("%lx", folio->index); 360 361 rreq = netfs_alloc_request(mapping, file, 362 folio_pos(folio), folio_size(folio), 363 NETFS_READPAGE); 364 if (IS_ERR(rreq)) { 365 ret = PTR_ERR(rreq); 366 goto alloc_error; 367 } 368 369 ret = netfs_begin_cache_read(rreq, ctx); 370 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 371 goto discard; 372 373 netfs_stat(&netfs_n_rh_read_folio); 374 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); 375 376 /* Set up the output buffer */ 377 if (folio_test_dirty(folio)) { 378 /* Handle someone trying to read from an unflushed streaming 379 * write. We fiddle the buffer so that a gap at the beginning 380 * and/or a gap at the end get copied to, but the middle is 381 * discarded. 382 */ 383 struct netfs_folio *finfo = netfs_folio_info(folio); 384 struct bio_vec *bvec; 385 unsigned int from = finfo->dirty_offset; 386 unsigned int to = from + finfo->dirty_len; 387 unsigned int off = 0, i = 0; 388 size_t flen = folio_size(folio); 389 size_t nr_bvec = flen / PAGE_SIZE + 2; 390 size_t part; 391 392 ret = -ENOMEM; 393 bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL); 394 if (!bvec) 395 goto discard; 396 397 sink = folio_alloc(GFP_KERNEL, 0); 398 if (!sink) 399 goto discard; 400 401 trace_netfs_folio(folio, netfs_folio_trace_read_gaps); 402 403 rreq->direct_bv = bvec; 404 rreq->direct_bv_count = nr_bvec; 405 if (from > 0) { 406 bvec_set_folio(&bvec[i++], folio, from, 0); 407 off = from; 408 } 409 while (off < to) { 410 part = min_t(size_t, to - off, PAGE_SIZE); 411 bvec_set_folio(&bvec[i++], sink, part, 0); 412 off += part; 413 } 414 if (to < flen) 415 bvec_set_folio(&bvec[i++], folio, flen - to, to); 416 iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len); 417 } else { 418 iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages, 419 rreq->start, rreq->len); 420 } 421 422 ret = netfs_begin_read(rreq, true); 423 if (sink) 424 folio_put(sink); 425 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 426 return ret < 0 ? ret : 0; 427 428 discard: 429 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 430 alloc_error: 431 folio_unlock(folio); 432 return ret; 433 } 434 EXPORT_SYMBOL(netfs_read_folio); 435 436 /* 437 * Prepare a folio for writing without reading first 438 * @folio: The folio being prepared 439 * @pos: starting position for the write 440 * @len: length of write 441 * @always_fill: T if the folio should always be completely filled/cleared 442 * 443 * In some cases, write_begin doesn't need to read at all: 444 * - full folio write 445 * - write that lies in a folio that is completely beyond EOF 446 * - write that covers the folio from start to EOF or beyond it 447 * 448 * If any of these criteria are met, then zero out the unwritten parts 449 * of the folio and return true. Otherwise, return false. 450 */ 451 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len, 452 bool always_fill) 453 { 454 struct inode *inode = folio_inode(folio); 455 loff_t i_size = i_size_read(inode); 456 size_t offset = offset_in_folio(folio, pos); 457 size_t plen = folio_size(folio); 458 459 if (unlikely(always_fill)) { 460 if (pos - offset + len <= i_size) 461 return false; /* Page entirely before EOF */ 462 zero_user_segment(&folio->page, 0, plen); 463 folio_mark_uptodate(folio); 464 return true; 465 } 466 467 /* Full folio write */ 468 if (offset == 0 && len >= plen) 469 return true; 470 471 /* Page entirely beyond the end of the file */ 472 if (pos - offset >= i_size) 473 goto zero_out; 474 475 /* Write that covers from the start of the folio to EOF or beyond */ 476 if (offset == 0 && (pos + len) >= i_size) 477 goto zero_out; 478 479 return false; 480 zero_out: 481 zero_user_segments(&folio->page, 0, offset, offset + len, plen); 482 return true; 483 } 484 485 /** 486 * netfs_write_begin - Helper to prepare for writing [DEPRECATED] 487 * @ctx: The netfs context 488 * @file: The file to read from 489 * @mapping: The mapping to read from 490 * @pos: File position at which the write will begin 491 * @len: The length of the write (may extend beyond the end of the folio chosen) 492 * @_folio: Where to put the resultant folio 493 * @_fsdata: Place for the netfs to store a cookie 494 * 495 * Pre-read data for a write-begin request by drawing data from the cache if 496 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 497 * Multiple I/O requests from different sources will get munged together. If 498 * necessary, the readahead window can be expanded in either direction to a 499 * more convenient alighment for RPC efficiency or to make storage in the cache 500 * feasible. 501 * 502 * The calling netfs must provide a table of operations, only one of which, 503 * issue_op, is mandatory. 504 * 505 * The check_write_begin() operation can be provided to check for and flush 506 * conflicting writes once the folio is grabbed and locked. It is passed a 507 * pointer to the fsdata cookie that gets returned to the VM to be passed to 508 * write_end. It is permitted to sleep. It should return 0 if the request 509 * should go ahead or it may return an error. It may also unlock and put the 510 * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0 511 * will cause the folio to be re-got and the process to be retried. 512 * 513 * The calling netfs must initialise a netfs context contiguous to the vfs 514 * inode before calling this. 515 * 516 * This is usable whether or not caching is enabled. 517 * 518 * Note that this should be considered deprecated and netfs_perform_write() 519 * used instead. 520 */ 521 int netfs_write_begin(struct netfs_inode *ctx, 522 struct file *file, struct address_space *mapping, 523 loff_t pos, unsigned int len, struct folio **_folio, 524 void **_fsdata) 525 { 526 struct netfs_io_request *rreq; 527 struct folio *folio; 528 pgoff_t index = pos >> PAGE_SHIFT; 529 int ret; 530 531 DEFINE_READAHEAD(ractl, file, NULL, mapping, index); 532 533 retry: 534 folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, 535 mapping_gfp_mask(mapping)); 536 if (IS_ERR(folio)) 537 return PTR_ERR(folio); 538 539 if (ctx->ops->check_write_begin) { 540 /* Allow the netfs (eg. ceph) to flush conflicts. */ 541 ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata); 542 if (ret < 0) { 543 trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); 544 goto error; 545 } 546 if (!folio) 547 goto retry; 548 } 549 550 if (folio_test_uptodate(folio)) 551 goto have_folio; 552 553 /* If the page is beyond the EOF, we want to clear it - unless it's 554 * within the cache granule containing the EOF, in which case we need 555 * to preload the granule. 556 */ 557 if (!netfs_is_cache_enabled(ctx) && 558 netfs_skip_folio_read(folio, pos, len, false)) { 559 netfs_stat(&netfs_n_rh_write_zskip); 560 goto have_folio_no_wait; 561 } 562 563 rreq = netfs_alloc_request(mapping, file, 564 folio_pos(folio), folio_size(folio), 565 NETFS_READ_FOR_WRITE); 566 if (IS_ERR(rreq)) { 567 ret = PTR_ERR(rreq); 568 goto error; 569 } 570 rreq->no_unlock_folio = folio->index; 571 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); 572 573 ret = netfs_begin_cache_read(rreq, ctx); 574 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 575 goto error_put; 576 577 netfs_stat(&netfs_n_rh_write_begin); 578 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); 579 580 /* Expand the request to meet caching requirements and download 581 * preferences. 582 */ 583 ractl._nr_pages = folio_nr_pages(folio); 584 netfs_rreq_expand(rreq, &ractl); 585 586 /* Set up the output buffer */ 587 iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages, 588 rreq->start, rreq->len); 589 590 /* We hold the folio locks, so we can drop the references */ 591 folio_get(folio); 592 while (readahead_folio(&ractl)) 593 ; 594 595 ret = netfs_begin_read(rreq, true); 596 if (ret < 0) 597 goto error; 598 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 599 600 have_folio: 601 ret = folio_wait_private_2_killable(folio); 602 if (ret < 0) 603 goto error; 604 have_folio_no_wait: 605 *_folio = folio; 606 _leave(" = 0"); 607 return 0; 608 609 error_put: 610 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); 611 error: 612 if (folio) { 613 folio_unlock(folio); 614 folio_put(folio); 615 } 616 _leave(" = %d", ret); 617 return ret; 618 } 619 EXPORT_SYMBOL(netfs_write_begin); 620 621 /* 622 * Preload the data into a page we're proposing to write into. 623 */ 624 int netfs_prefetch_for_write(struct file *file, struct folio *folio, 625 size_t offset, size_t len) 626 { 627 struct netfs_io_request *rreq; 628 struct address_space *mapping = folio->mapping; 629 struct netfs_inode *ctx = netfs_inode(mapping->host); 630 unsigned long long start = folio_pos(folio); 631 size_t flen = folio_size(folio); 632 int ret; 633 634 _enter("%zx @%llx", flen, start); 635 636 ret = -ENOMEM; 637 638 rreq = netfs_alloc_request(mapping, file, start, flen, 639 NETFS_READ_FOR_WRITE); 640 if (IS_ERR(rreq)) { 641 ret = PTR_ERR(rreq); 642 goto error; 643 } 644 645 rreq->no_unlock_folio = folio->index; 646 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); 647 ret = netfs_begin_cache_read(rreq, ctx); 648 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 649 goto error_put; 650 651 netfs_stat(&netfs_n_rh_write_begin); 652 trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write); 653 654 /* Set up the output buffer */ 655 iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages, 656 rreq->start, rreq->len); 657 658 ret = netfs_begin_read(rreq, true); 659 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 660 return ret; 661 662 error_put: 663 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 664 error: 665 _leave(" = %d", ret); 666 return ret; 667 } 668 669 /** 670 * netfs_buffered_read_iter - Filesystem buffered I/O read routine 671 * @iocb: kernel I/O control block 672 * @iter: destination for the data read 673 * 674 * This is the ->read_iter() routine for all filesystems that can use the page 675 * cache directly. 676 * 677 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be 678 * returned when no data can be read without waiting for I/O requests to 679 * complete; it doesn't prevent readahead. 680 * 681 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests 682 * shall be made for the read or for readahead. When no data can be read, 683 * -EAGAIN shall be returned. When readahead would be triggered, a partial, 684 * possibly empty read shall be returned. 685 * 686 * Return: 687 * * number of bytes copied, even for partial reads 688 * * negative error code (or 0 if IOCB_NOIO) if nothing was read 689 */ 690 ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter) 691 { 692 struct inode *inode = file_inode(iocb->ki_filp); 693 struct netfs_inode *ictx = netfs_inode(inode); 694 ssize_t ret; 695 696 if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) || 697 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))) 698 return -EINVAL; 699 700 ret = netfs_start_io_read(inode); 701 if (ret == 0) { 702 ret = filemap_read(iocb, iter, 0); 703 netfs_end_io_read(inode); 704 } 705 return ret; 706 } 707 EXPORT_SYMBOL(netfs_buffered_read_iter); 708 709 /** 710 * netfs_file_read_iter - Generic filesystem read routine 711 * @iocb: kernel I/O control block 712 * @iter: destination for the data read 713 * 714 * This is the ->read_iter() routine for all filesystems that can use the page 715 * cache directly. 716 * 717 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be 718 * returned when no data can be read without waiting for I/O requests to 719 * complete; it doesn't prevent readahead. 720 * 721 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests 722 * shall be made for the read or for readahead. When no data can be read, 723 * -EAGAIN shall be returned. When readahead would be triggered, a partial, 724 * possibly empty read shall be returned. 725 * 726 * Return: 727 * * number of bytes copied, even for partial reads 728 * * negative error code (or 0 if IOCB_NOIO) if nothing was read 729 */ 730 ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) 731 { 732 struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host); 733 734 if ((iocb->ki_flags & IOCB_DIRECT) || 735 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)) 736 return netfs_unbuffered_read_iter(iocb, iter); 737 738 return netfs_buffered_read_iter(iocb, iter); 739 } 740 EXPORT_SYMBOL(netfs_file_read_iter); 741