1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Network filesystem high-level buffered read support. 3 * 4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/export.h> 9 #include <linux/task_io_accounting_ops.h> 10 #include "internal.h" 11 12 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq, 13 unsigned long long *_start, 14 unsigned long long *_len, 15 unsigned long long i_size) 16 { 17 struct netfs_cache_resources *cres = &rreq->cache_resources; 18 19 if (cres->ops && cres->ops->expand_readahead) 20 cres->ops->expand_readahead(cres, _start, _len, i_size); 21 } 22 23 static void netfs_rreq_expand(struct netfs_io_request *rreq, 24 struct readahead_control *ractl) 25 { 26 /* Give the cache a chance to change the request parameters. The 27 * resultant request must contain the original region. 28 */ 29 netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); 30 31 /* Give the netfs a chance to change the request parameters. The 32 * resultant request must contain the original region. 33 */ 34 if (rreq->netfs_ops->expand_readahead) 35 rreq->netfs_ops->expand_readahead(rreq); 36 37 /* Expand the request if the cache wants it to start earlier. Note 38 * that the expansion may get further extended if the VM wishes to 39 * insert THPs and the preferred start and/or end wind up in the middle 40 * of THPs. 41 * 42 * If this is the case, however, the THP size should be an integer 43 * multiple of the cache granule size, so we get a whole number of 44 * granules to deal with. 45 */ 46 if (rreq->start != readahead_pos(ractl) || 47 rreq->len != readahead_length(ractl)) { 48 readahead_expand(ractl, rreq->start, rreq->len); 49 rreq->start = readahead_pos(ractl); 50 rreq->len = readahead_length(ractl); 51 52 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 53 netfs_read_trace_expanded); 54 } 55 } 56 57 /* 58 * Begin an operation, and fetch the stored zero point value from the cookie if 59 * available. 60 */ 61 static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx) 62 { 63 return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx)); 64 } 65 66 /* 67 * netfs_prepare_read_iterator - Prepare the subreq iterator for I/O 68 * @subreq: The subrequest to be set up 69 * 70 * Prepare the I/O iterator representing the read buffer on a subrequest for 71 * the filesystem to use for I/O (it can be passed directly to a socket). This 72 * is intended to be called from the ->issue_read() method once the filesystem 73 * has trimmed the request to the size it wants. 74 * 75 * Returns the limited size if successful and -ENOMEM if insufficient memory 76 * available. 77 * 78 * [!] NOTE: This must be run in the same thread as ->issue_read() was called 79 * in as we access the readahead_control struct. 80 */ 81 static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq) 82 { 83 struct netfs_io_request *rreq = subreq->rreq; 84 size_t rsize = subreq->len; 85 86 if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER) 87 rsize = umin(rsize, rreq->io_streams[0].sreq_max_len); 88 89 if (rreq->ractl) { 90 /* If we don't have sufficient folios in the rolling buffer, 91 * extract a folioq's worth from the readahead region at a time 92 * into the buffer. Note that this acquires a ref on each page 93 * that we will need to release later - but we don't want to do 94 * that until after we've started the I/O. 95 */ 96 struct folio_batch put_batch; 97 98 folio_batch_init(&put_batch); 99 while (rreq->submitted < subreq->start + rsize) { 100 ssize_t added; 101 102 added = rolling_buffer_load_from_ra(&rreq->buffer, rreq->ractl, 103 &put_batch); 104 if (added < 0) 105 return added; 106 rreq->submitted += added; 107 } 108 folio_batch_release(&put_batch); 109 } 110 111 subreq->len = rsize; 112 if (unlikely(rreq->io_streams[0].sreq_max_segs)) { 113 size_t limit = netfs_limit_iter(&rreq->buffer.iter, 0, rsize, 114 rreq->io_streams[0].sreq_max_segs); 115 116 if (limit < rsize) { 117 subreq->len = limit; 118 trace_netfs_sreq(subreq, netfs_sreq_trace_limited); 119 } 120 } 121 122 subreq->io_iter = rreq->buffer.iter; 123 124 iov_iter_truncate(&subreq->io_iter, subreq->len); 125 rolling_buffer_advance(&rreq->buffer, subreq->len); 126 return subreq->len; 127 } 128 129 static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rreq, 130 struct netfs_io_subrequest *subreq, 131 loff_t i_size) 132 { 133 struct netfs_cache_resources *cres = &rreq->cache_resources; 134 enum netfs_io_source source; 135 136 if (!cres->ops) 137 return NETFS_DOWNLOAD_FROM_SERVER; 138 source = cres->ops->prepare_read(subreq, i_size); 139 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 140 return source; 141 142 } 143 144 /* 145 * Issue a read against the cache. 146 * - Eats the caller's ref on subreq. 147 */ 148 static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq, 149 struct netfs_io_subrequest *subreq) 150 { 151 struct netfs_cache_resources *cres = &rreq->cache_resources; 152 153 netfs_stat(&netfs_n_rh_read); 154 cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_IGNORE, 155 netfs_cache_read_terminated, subreq); 156 } 157 158 static void netfs_issue_read(struct netfs_io_request *rreq, 159 struct netfs_io_subrequest *subreq) 160 { 161 struct netfs_io_stream *stream = &rreq->io_streams[0]; 162 163 __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 164 165 /* We add to the end of the list whilst the collector may be walking 166 * the list. The collector only goes nextwards and uses the lock to 167 * remove entries off of the front. 168 */ 169 spin_lock(&rreq->lock); 170 list_add_tail(&subreq->rreq_link, &stream->subrequests); 171 if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { 172 stream->front = subreq; 173 if (!stream->active) { 174 stream->collected_to = stream->front->start; 175 /* Store list pointers before active flag */ 176 smp_store_release(&stream->active, true); 177 } 178 } 179 180 spin_unlock(&rreq->lock); 181 182 switch (subreq->source) { 183 case NETFS_DOWNLOAD_FROM_SERVER: 184 rreq->netfs_ops->issue_read(subreq); 185 break; 186 case NETFS_READ_FROM_CACHE: 187 netfs_read_cache_to_pagecache(rreq, subreq); 188 break; 189 default: 190 __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); 191 subreq->error = 0; 192 iov_iter_zero(subreq->len, &subreq->io_iter); 193 subreq->transferred = subreq->len; 194 netfs_read_subreq_terminated(subreq); 195 break; 196 } 197 } 198 199 /* 200 * Perform a read to the pagecache from a series of sources of different types, 201 * slicing up the region to be read according to available cache blocks and 202 * network rsize. 203 */ 204 static void netfs_read_to_pagecache(struct netfs_io_request *rreq) 205 { 206 struct netfs_inode *ictx = netfs_inode(rreq->inode); 207 unsigned long long start = rreq->start; 208 ssize_t size = rreq->len; 209 int ret = 0; 210 211 do { 212 struct netfs_io_subrequest *subreq; 213 enum netfs_io_source source = NETFS_SOURCE_UNKNOWN; 214 ssize_t slice; 215 216 subreq = netfs_alloc_subrequest(rreq); 217 if (!subreq) { 218 ret = -ENOMEM; 219 break; 220 } 221 222 subreq->start = start; 223 subreq->len = size; 224 225 source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size); 226 subreq->source = source; 227 if (source == NETFS_DOWNLOAD_FROM_SERVER) { 228 unsigned long long zp = umin(ictx->zero_point, rreq->i_size); 229 size_t len = subreq->len; 230 231 if (unlikely(rreq->origin == NETFS_READ_SINGLE)) 232 zp = rreq->i_size; 233 if (subreq->start >= zp) { 234 subreq->source = source = NETFS_FILL_WITH_ZEROES; 235 goto fill_with_zeroes; 236 } 237 238 if (len > zp - subreq->start) 239 len = zp - subreq->start; 240 if (len == 0) { 241 pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx", 242 rreq->debug_id, subreq->debug_index, 243 subreq->len, size, 244 subreq->start, ictx->zero_point, rreq->i_size); 245 break; 246 } 247 subreq->len = len; 248 249 netfs_stat(&netfs_n_rh_download); 250 if (rreq->netfs_ops->prepare_read) { 251 ret = rreq->netfs_ops->prepare_read(subreq); 252 if (ret < 0) { 253 subreq->error = ret; 254 /* Not queued - release both refs. */ 255 netfs_put_subrequest(subreq, false, 256 netfs_sreq_trace_put_cancel); 257 netfs_put_subrequest(subreq, false, 258 netfs_sreq_trace_put_cancel); 259 break; 260 } 261 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 262 } 263 goto issue; 264 } 265 266 fill_with_zeroes: 267 if (source == NETFS_FILL_WITH_ZEROES) { 268 subreq->source = NETFS_FILL_WITH_ZEROES; 269 trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 270 netfs_stat(&netfs_n_rh_zero); 271 goto issue; 272 } 273 274 if (source == NETFS_READ_FROM_CACHE) { 275 trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 276 goto issue; 277 } 278 279 pr_err("Unexpected read source %u\n", source); 280 WARN_ON_ONCE(1); 281 break; 282 283 issue: 284 slice = netfs_prepare_read_iterator(subreq); 285 if (slice < 0) { 286 ret = slice; 287 subreq->error = ret; 288 trace_netfs_sreq(subreq, netfs_sreq_trace_cancel); 289 /* Not queued - release both refs. */ 290 netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); 291 netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); 292 break; 293 } 294 size -= slice; 295 start += slice; 296 if (size <= 0) { 297 smp_wmb(); /* Write lists before ALL_QUEUED. */ 298 set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); 299 } 300 301 netfs_issue_read(rreq, subreq); 302 cond_resched(); 303 } while (size > 0); 304 305 if (unlikely(size > 0)) { 306 smp_wmb(); /* Write lists before ALL_QUEUED. */ 307 set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); 308 netfs_wake_read_collector(rreq); 309 } 310 311 /* Defer error return as we may need to wait for outstanding I/O. */ 312 cmpxchg(&rreq->error, 0, ret); 313 } 314 315 /** 316 * netfs_readahead - Helper to manage a read request 317 * @ractl: The description of the readahead request 318 * 319 * Fulfil a readahead request by drawing data from the cache if possible, or 320 * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O 321 * requests from different sources will get munged together. If necessary, the 322 * readahead window can be expanded in either direction to a more convenient 323 * alighment for RPC efficiency or to make storage in the cache feasible. 324 * 325 * The calling netfs must initialise a netfs context contiguous to the vfs 326 * inode before calling this. 327 * 328 * This is usable whether or not caching is enabled. 329 */ 330 void netfs_readahead(struct readahead_control *ractl) 331 { 332 struct netfs_io_request *rreq; 333 struct netfs_inode *ictx = netfs_inode(ractl->mapping->host); 334 unsigned long long start = readahead_pos(ractl); 335 size_t size = readahead_length(ractl); 336 int ret; 337 338 rreq = netfs_alloc_request(ractl->mapping, ractl->file, start, size, 339 NETFS_READAHEAD); 340 if (IS_ERR(rreq)) 341 return; 342 343 __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags); 344 345 ret = netfs_begin_cache_read(rreq, ictx); 346 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 347 goto cleanup_free; 348 349 netfs_stat(&netfs_n_rh_readahead); 350 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 351 netfs_read_trace_readahead); 352 353 netfs_rreq_expand(rreq, ractl); 354 355 rreq->ractl = ractl; 356 rreq->submitted = rreq->start; 357 if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0) 358 goto cleanup_free; 359 netfs_read_to_pagecache(rreq); 360 361 netfs_put_request(rreq, true, netfs_rreq_trace_put_return); 362 return; 363 364 cleanup_free: 365 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); 366 return; 367 } 368 EXPORT_SYMBOL(netfs_readahead); 369 370 /* 371 * Create a rolling buffer with a single occupying folio. 372 */ 373 static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio, 374 unsigned int rollbuf_flags) 375 { 376 ssize_t added; 377 378 if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0) 379 return -ENOMEM; 380 381 added = rolling_buffer_append(&rreq->buffer, folio, rollbuf_flags); 382 if (added < 0) 383 return added; 384 rreq->submitted = rreq->start + added; 385 rreq->ractl = (struct readahead_control *)1UL; 386 return 0; 387 } 388 389 /* 390 * Read into gaps in a folio partially filled by a streaming write. 391 */ 392 static int netfs_read_gaps(struct file *file, struct folio *folio) 393 { 394 struct netfs_io_request *rreq; 395 struct address_space *mapping = folio->mapping; 396 struct netfs_folio *finfo = netfs_folio_info(folio); 397 struct netfs_inode *ctx = netfs_inode(mapping->host); 398 struct folio *sink = NULL; 399 struct bio_vec *bvec; 400 unsigned int from = finfo->dirty_offset; 401 unsigned int to = from + finfo->dirty_len; 402 unsigned int off = 0, i = 0; 403 size_t flen = folio_size(folio); 404 size_t nr_bvec = flen / PAGE_SIZE + 2; 405 size_t part; 406 int ret; 407 408 _enter("%lx", folio->index); 409 410 rreq = netfs_alloc_request(mapping, file, folio_pos(folio), flen, NETFS_READ_GAPS); 411 if (IS_ERR(rreq)) { 412 ret = PTR_ERR(rreq); 413 goto alloc_error; 414 } 415 416 ret = netfs_begin_cache_read(rreq, ctx); 417 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 418 goto discard; 419 420 netfs_stat(&netfs_n_rh_read_folio); 421 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_read_gaps); 422 423 /* Fiddle the buffer so that a gap at the beginning and/or a gap at the 424 * end get copied to, but the middle is discarded. 425 */ 426 ret = -ENOMEM; 427 bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL); 428 if (!bvec) 429 goto discard; 430 431 sink = folio_alloc(GFP_KERNEL, 0); 432 if (!sink) { 433 kfree(bvec); 434 goto discard; 435 } 436 437 trace_netfs_folio(folio, netfs_folio_trace_read_gaps); 438 439 rreq->direct_bv = bvec; 440 rreq->direct_bv_count = nr_bvec; 441 if (from > 0) { 442 bvec_set_folio(&bvec[i++], folio, from, 0); 443 off = from; 444 } 445 while (off < to) { 446 part = min_t(size_t, to - off, PAGE_SIZE); 447 bvec_set_folio(&bvec[i++], sink, part, 0); 448 off += part; 449 } 450 if (to < flen) 451 bvec_set_folio(&bvec[i++], folio, flen - to, to); 452 iov_iter_bvec(&rreq->buffer.iter, ITER_DEST, bvec, i, rreq->len); 453 rreq->submitted = rreq->start + flen; 454 455 netfs_read_to_pagecache(rreq); 456 457 if (sink) 458 folio_put(sink); 459 460 ret = netfs_wait_for_read(rreq); 461 if (ret >= 0) { 462 flush_dcache_folio(folio); 463 folio_mark_uptodate(folio); 464 } 465 folio_unlock(folio); 466 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 467 return ret < 0 ? ret : 0; 468 469 discard: 470 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 471 alloc_error: 472 folio_unlock(folio); 473 return ret; 474 } 475 476 /** 477 * netfs_read_folio - Helper to manage a read_folio request 478 * @file: The file to read from 479 * @folio: The folio to read 480 * 481 * Fulfil a read_folio request by drawing data from the cache if 482 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 483 * Multiple I/O requests from different sources will get munged together. 484 * 485 * The calling netfs must initialise a netfs context contiguous to the vfs 486 * inode before calling this. 487 * 488 * This is usable whether or not caching is enabled. 489 */ 490 int netfs_read_folio(struct file *file, struct folio *folio) 491 { 492 struct address_space *mapping = folio->mapping; 493 struct netfs_io_request *rreq; 494 struct netfs_inode *ctx = netfs_inode(mapping->host); 495 int ret; 496 497 if (folio_test_dirty(folio)) { 498 trace_netfs_folio(folio, netfs_folio_trace_read_gaps); 499 return netfs_read_gaps(file, folio); 500 } 501 502 _enter("%lx", folio->index); 503 504 rreq = netfs_alloc_request(mapping, file, 505 folio_pos(folio), folio_size(folio), 506 NETFS_READPAGE); 507 if (IS_ERR(rreq)) { 508 ret = PTR_ERR(rreq); 509 goto alloc_error; 510 } 511 512 ret = netfs_begin_cache_read(rreq, ctx); 513 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 514 goto discard; 515 516 netfs_stat(&netfs_n_rh_read_folio); 517 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); 518 519 /* Set up the output buffer */ 520 ret = netfs_create_singular_buffer(rreq, folio, 0); 521 if (ret < 0) 522 goto discard; 523 524 netfs_read_to_pagecache(rreq); 525 ret = netfs_wait_for_read(rreq); 526 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 527 return ret < 0 ? ret : 0; 528 529 discard: 530 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 531 alloc_error: 532 folio_unlock(folio); 533 return ret; 534 } 535 EXPORT_SYMBOL(netfs_read_folio); 536 537 /* 538 * Prepare a folio for writing without reading first 539 * @folio: The folio being prepared 540 * @pos: starting position for the write 541 * @len: length of write 542 * @always_fill: T if the folio should always be completely filled/cleared 543 * 544 * In some cases, write_begin doesn't need to read at all: 545 * - full folio write 546 * - write that lies in a folio that is completely beyond EOF 547 * - write that covers the folio from start to EOF or beyond it 548 * 549 * If any of these criteria are met, then zero out the unwritten parts 550 * of the folio and return true. Otherwise, return false. 551 */ 552 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len, 553 bool always_fill) 554 { 555 struct inode *inode = folio_inode(folio); 556 loff_t i_size = i_size_read(inode); 557 size_t offset = offset_in_folio(folio, pos); 558 size_t plen = folio_size(folio); 559 560 if (unlikely(always_fill)) { 561 if (pos - offset + len <= i_size) 562 return false; /* Page entirely before EOF */ 563 folio_zero_segment(folio, 0, plen); 564 folio_mark_uptodate(folio); 565 return true; 566 } 567 568 /* Full folio write */ 569 if (offset == 0 && len >= plen) 570 return true; 571 572 /* Page entirely beyond the end of the file */ 573 if (pos - offset >= i_size) 574 goto zero_out; 575 576 /* Write that covers from the start of the folio to EOF or beyond */ 577 if (offset == 0 && (pos + len) >= i_size) 578 goto zero_out; 579 580 return false; 581 zero_out: 582 folio_zero_segments(folio, 0, offset, offset + len, plen); 583 return true; 584 } 585 586 /** 587 * netfs_write_begin - Helper to prepare for writing [DEPRECATED] 588 * @ctx: The netfs context 589 * @file: The file to read from 590 * @mapping: The mapping to read from 591 * @pos: File position at which the write will begin 592 * @len: The length of the write (may extend beyond the end of the folio chosen) 593 * @_folio: Where to put the resultant folio 594 * @_fsdata: Place for the netfs to store a cookie 595 * 596 * Pre-read data for a write-begin request by drawing data from the cache if 597 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 598 * Multiple I/O requests from different sources will get munged together. 599 * 600 * The calling netfs must provide a table of operations, only one of which, 601 * issue_read, is mandatory. 602 * 603 * The check_write_begin() operation can be provided to check for and flush 604 * conflicting writes once the folio is grabbed and locked. It is passed a 605 * pointer to the fsdata cookie that gets returned to the VM to be passed to 606 * write_end. It is permitted to sleep. It should return 0 if the request 607 * should go ahead or it may return an error. It may also unlock and put the 608 * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0 609 * will cause the folio to be re-got and the process to be retried. 610 * 611 * The calling netfs must initialise a netfs context contiguous to the vfs 612 * inode before calling this. 613 * 614 * This is usable whether or not caching is enabled. 615 * 616 * Note that this should be considered deprecated and netfs_perform_write() 617 * used instead. 618 */ 619 int netfs_write_begin(struct netfs_inode *ctx, 620 struct file *file, struct address_space *mapping, 621 loff_t pos, unsigned int len, struct folio **_folio, 622 void **_fsdata) 623 { 624 struct netfs_io_request *rreq; 625 struct folio *folio; 626 pgoff_t index = pos >> PAGE_SHIFT; 627 int ret; 628 629 retry: 630 folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, 631 mapping_gfp_mask(mapping)); 632 if (IS_ERR(folio)) 633 return PTR_ERR(folio); 634 635 if (ctx->ops->check_write_begin) { 636 /* Allow the netfs (eg. ceph) to flush conflicts. */ 637 ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata); 638 if (ret < 0) { 639 trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); 640 goto error; 641 } 642 if (!folio) 643 goto retry; 644 } 645 646 if (folio_test_uptodate(folio)) 647 goto have_folio; 648 649 /* If the folio is beyond the EOF, we want to clear it - unless it's 650 * within the cache granule containing the EOF, in which case we need 651 * to preload the granule. 652 */ 653 if (!netfs_is_cache_enabled(ctx) && 654 netfs_skip_folio_read(folio, pos, len, false)) { 655 netfs_stat(&netfs_n_rh_write_zskip); 656 goto have_folio_no_wait; 657 } 658 659 rreq = netfs_alloc_request(mapping, file, 660 folio_pos(folio), folio_size(folio), 661 NETFS_READ_FOR_WRITE); 662 if (IS_ERR(rreq)) { 663 ret = PTR_ERR(rreq); 664 goto error; 665 } 666 rreq->no_unlock_folio = folio->index; 667 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); 668 669 ret = netfs_begin_cache_read(rreq, ctx); 670 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 671 goto error_put; 672 673 netfs_stat(&netfs_n_rh_write_begin); 674 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); 675 676 /* Set up the output buffer */ 677 ret = netfs_create_singular_buffer(rreq, folio, 0); 678 if (ret < 0) 679 goto error_put; 680 681 netfs_read_to_pagecache(rreq); 682 ret = netfs_wait_for_read(rreq); 683 if (ret < 0) 684 goto error; 685 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 686 687 have_folio: 688 ret = folio_wait_private_2_killable(folio); 689 if (ret < 0) 690 goto error; 691 have_folio_no_wait: 692 *_folio = folio; 693 _leave(" = 0"); 694 return 0; 695 696 error_put: 697 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); 698 error: 699 if (folio) { 700 folio_unlock(folio); 701 folio_put(folio); 702 } 703 _leave(" = %d", ret); 704 return ret; 705 } 706 EXPORT_SYMBOL(netfs_write_begin); 707 708 /* 709 * Preload the data into a folio we're proposing to write into. 710 */ 711 int netfs_prefetch_for_write(struct file *file, struct folio *folio, 712 size_t offset, size_t len) 713 { 714 struct netfs_io_request *rreq; 715 struct address_space *mapping = folio->mapping; 716 struct netfs_inode *ctx = netfs_inode(mapping->host); 717 unsigned long long start = folio_pos(folio); 718 size_t flen = folio_size(folio); 719 int ret; 720 721 _enter("%zx @%llx", flen, start); 722 723 ret = -ENOMEM; 724 725 rreq = netfs_alloc_request(mapping, file, start, flen, 726 NETFS_READ_FOR_WRITE); 727 if (IS_ERR(rreq)) { 728 ret = PTR_ERR(rreq); 729 goto error; 730 } 731 732 rreq->no_unlock_folio = folio->index; 733 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); 734 ret = netfs_begin_cache_read(rreq, ctx); 735 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 736 goto error_put; 737 738 netfs_stat(&netfs_n_rh_write_begin); 739 trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write); 740 741 /* Set up the output buffer */ 742 ret = netfs_create_singular_buffer(rreq, folio, NETFS_ROLLBUF_PAGECACHE_MARK); 743 if (ret < 0) 744 goto error_put; 745 746 netfs_read_to_pagecache(rreq); 747 ret = netfs_wait_for_read(rreq); 748 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 749 return ret < 0 ? ret : 0; 750 751 error_put: 752 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 753 error: 754 _leave(" = %d", ret); 755 return ret; 756 } 757 758 /** 759 * netfs_buffered_read_iter - Filesystem buffered I/O read routine 760 * @iocb: kernel I/O control block 761 * @iter: destination for the data read 762 * 763 * This is the ->read_iter() routine for all filesystems that can use the page 764 * cache directly. 765 * 766 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be 767 * returned when no data can be read without waiting for I/O requests to 768 * complete; it doesn't prevent readahead. 769 * 770 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests 771 * shall be made for the read or for readahead. When no data can be read, 772 * -EAGAIN shall be returned. When readahead would be triggered, a partial, 773 * possibly empty read shall be returned. 774 * 775 * Return: 776 * * number of bytes copied, even for partial reads 777 * * negative error code (or 0 if IOCB_NOIO) if nothing was read 778 */ 779 ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter) 780 { 781 struct inode *inode = file_inode(iocb->ki_filp); 782 struct netfs_inode *ictx = netfs_inode(inode); 783 ssize_t ret; 784 785 if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) || 786 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))) 787 return -EINVAL; 788 789 ret = netfs_start_io_read(inode); 790 if (ret == 0) { 791 ret = filemap_read(iocb, iter, 0); 792 netfs_end_io_read(inode); 793 } 794 return ret; 795 } 796 EXPORT_SYMBOL(netfs_buffered_read_iter); 797 798 /** 799 * netfs_file_read_iter - Generic filesystem read routine 800 * @iocb: kernel I/O control block 801 * @iter: destination for the data read 802 * 803 * This is the ->read_iter() routine for all filesystems that can use the page 804 * cache directly. 805 * 806 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be 807 * returned when no data can be read without waiting for I/O requests to 808 * complete; it doesn't prevent readahead. 809 * 810 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests 811 * shall be made for the read or for readahead. When no data can be read, 812 * -EAGAIN shall be returned. When readahead would be triggered, a partial, 813 * possibly empty read shall be returned. 814 * 815 * Return: 816 * * number of bytes copied, even for partial reads 817 * * negative error code (or 0 if IOCB_NOIO) if nothing was read 818 */ 819 ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) 820 { 821 struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host); 822 823 if ((iocb->ki_flags & IOCB_DIRECT) || 824 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)) 825 return netfs_unbuffered_read_iter(iocb, iter); 826 827 return netfs_buffered_read_iter(iocb, iter); 828 } 829 EXPORT_SYMBOL(netfs_file_read_iter); 830