1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Network filesystem high-level buffered read support. 3 * 4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/export.h> 9 #include <linux/task_io_accounting_ops.h> 10 #include "internal.h" 11 12 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq, 13 unsigned long long *_start, 14 unsigned long long *_len, 15 unsigned long long i_size) 16 { 17 struct netfs_cache_resources *cres = &rreq->cache_resources; 18 19 if (cres->ops && cres->ops->expand_readahead) 20 cres->ops->expand_readahead(cres, _start, _len, i_size); 21 } 22 23 static void netfs_rreq_expand(struct netfs_io_request *rreq, 24 struct readahead_control *ractl) 25 { 26 /* Give the cache a chance to change the request parameters. The 27 * resultant request must contain the original region. 28 */ 29 netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); 30 31 /* Give the netfs a chance to change the request parameters. The 32 * resultant request must contain the original region. 33 */ 34 if (rreq->netfs_ops->expand_readahead) 35 rreq->netfs_ops->expand_readahead(rreq); 36 37 /* Expand the request if the cache wants it to start earlier. Note 38 * that the expansion may get further extended if the VM wishes to 39 * insert THPs and the preferred start and/or end wind up in the middle 40 * of THPs. 41 * 42 * If this is the case, however, the THP size should be an integer 43 * multiple of the cache granule size, so we get a whole number of 44 * granules to deal with. 45 */ 46 if (rreq->start != readahead_pos(ractl) || 47 rreq->len != readahead_length(ractl)) { 48 readahead_expand(ractl, rreq->start, rreq->len); 49 rreq->start = readahead_pos(ractl); 50 rreq->len = readahead_length(ractl); 51 52 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 53 netfs_read_trace_expanded); 54 } 55 } 56 57 /* 58 * Begin an operation, and fetch the stored zero point value from the cookie if 59 * available. 60 */ 61 static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx) 62 { 63 return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx)); 64 } 65 66 /* 67 * netfs_prepare_read_iterator - Prepare the subreq iterator for I/O 68 * @subreq: The subrequest to be set up 69 * 70 * Prepare the I/O iterator representing the read buffer on a subrequest for 71 * the filesystem to use for I/O (it can be passed directly to a socket). This 72 * is intended to be called from the ->issue_read() method once the filesystem 73 * has trimmed the request to the size it wants. 74 * 75 * Returns the limited size if successful and -ENOMEM if insufficient memory 76 * available. 77 * 78 * [!] NOTE: This must be run in the same thread as ->issue_read() was called 79 * in as we access the readahead_control struct. 80 */ 81 static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq) 82 { 83 struct netfs_io_request *rreq = subreq->rreq; 84 size_t rsize = subreq->len; 85 86 if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER) 87 rsize = umin(rsize, rreq->io_streams[0].sreq_max_len); 88 89 if (rreq->ractl) { 90 /* If we don't have sufficient folios in the rolling buffer, 91 * extract a folioq's worth from the readahead region at a time 92 * into the buffer. Note that this acquires a ref on each page 93 * that we will need to release later - but we don't want to do 94 * that until after we've started the I/O. 95 */ 96 struct folio_batch put_batch; 97 98 folio_batch_init(&put_batch); 99 while (rreq->submitted < subreq->start + rsize) { 100 ssize_t added; 101 102 added = rolling_buffer_load_from_ra(&rreq->buffer, rreq->ractl, 103 &put_batch); 104 if (added < 0) 105 return added; 106 rreq->submitted += added; 107 } 108 folio_batch_release(&put_batch); 109 } 110 111 subreq->len = rsize; 112 if (unlikely(rreq->io_streams[0].sreq_max_segs)) { 113 size_t limit = netfs_limit_iter(&rreq->buffer.iter, 0, rsize, 114 rreq->io_streams[0].sreq_max_segs); 115 116 if (limit < rsize) { 117 subreq->len = limit; 118 trace_netfs_sreq(subreq, netfs_sreq_trace_limited); 119 } 120 } 121 122 subreq->io_iter = rreq->buffer.iter; 123 124 iov_iter_truncate(&subreq->io_iter, subreq->len); 125 rolling_buffer_advance(&rreq->buffer, subreq->len); 126 return subreq->len; 127 } 128 129 static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rreq, 130 struct netfs_io_subrequest *subreq, 131 loff_t i_size) 132 { 133 struct netfs_cache_resources *cres = &rreq->cache_resources; 134 enum netfs_io_source source; 135 136 if (!cres->ops) 137 return NETFS_DOWNLOAD_FROM_SERVER; 138 source = cres->ops->prepare_read(subreq, i_size); 139 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 140 return source; 141 142 } 143 144 /* 145 * Issue a read against the cache. 146 * - Eats the caller's ref on subreq. 147 */ 148 static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq, 149 struct netfs_io_subrequest *subreq) 150 { 151 struct netfs_cache_resources *cres = &rreq->cache_resources; 152 153 netfs_stat(&netfs_n_rh_read); 154 cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_IGNORE, 155 netfs_cache_read_terminated, subreq); 156 } 157 158 static void netfs_queue_read(struct netfs_io_request *rreq, 159 struct netfs_io_subrequest *subreq, 160 bool last_subreq) 161 { 162 struct netfs_io_stream *stream = &rreq->io_streams[0]; 163 164 __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 165 166 /* We add to the end of the list whilst the collector may be walking 167 * the list. The collector only goes nextwards and uses the lock to 168 * remove entries off of the front. 169 */ 170 spin_lock(&rreq->lock); 171 list_add_tail(&subreq->rreq_link, &stream->subrequests); 172 if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { 173 stream->front = subreq; 174 if (!stream->active) { 175 stream->collected_to = stream->front->start; 176 /* Store list pointers before active flag */ 177 smp_store_release(&stream->active, true); 178 } 179 } 180 181 if (last_subreq) { 182 smp_wmb(); /* Write lists before ALL_QUEUED. */ 183 set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); 184 } 185 186 spin_unlock(&rreq->lock); 187 } 188 189 static void netfs_issue_read(struct netfs_io_request *rreq, 190 struct netfs_io_subrequest *subreq) 191 { 192 switch (subreq->source) { 193 case NETFS_DOWNLOAD_FROM_SERVER: 194 rreq->netfs_ops->issue_read(subreq); 195 break; 196 case NETFS_READ_FROM_CACHE: 197 netfs_read_cache_to_pagecache(rreq, subreq); 198 break; 199 default: 200 __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); 201 subreq->error = 0; 202 iov_iter_zero(subreq->len, &subreq->io_iter); 203 subreq->transferred = subreq->len; 204 netfs_read_subreq_terminated(subreq); 205 break; 206 } 207 } 208 209 /* 210 * Perform a read to the pagecache from a series of sources of different types, 211 * slicing up the region to be read according to available cache blocks and 212 * network rsize. 213 */ 214 static void netfs_read_to_pagecache(struct netfs_io_request *rreq) 215 { 216 struct netfs_inode *ictx = netfs_inode(rreq->inode); 217 unsigned long long start = rreq->start; 218 ssize_t size = rreq->len; 219 int ret = 0; 220 221 do { 222 struct netfs_io_subrequest *subreq; 223 enum netfs_io_source source = NETFS_SOURCE_UNKNOWN; 224 ssize_t slice; 225 226 subreq = netfs_alloc_subrequest(rreq); 227 if (!subreq) { 228 ret = -ENOMEM; 229 break; 230 } 231 232 subreq->start = start; 233 subreq->len = size; 234 235 source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size); 236 subreq->source = source; 237 if (source == NETFS_DOWNLOAD_FROM_SERVER) { 238 unsigned long long zp = umin(ictx->zero_point, rreq->i_size); 239 size_t len = subreq->len; 240 241 if (unlikely(rreq->origin == NETFS_READ_SINGLE)) 242 zp = rreq->i_size; 243 if (subreq->start >= zp) { 244 subreq->source = source = NETFS_FILL_WITH_ZEROES; 245 goto fill_with_zeroes; 246 } 247 248 if (len > zp - subreq->start) 249 len = zp - subreq->start; 250 if (len == 0) { 251 pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx", 252 rreq->debug_id, subreq->debug_index, 253 subreq->len, size, 254 subreq->start, ictx->zero_point, rreq->i_size); 255 break; 256 } 257 subreq->len = len; 258 259 netfs_stat(&netfs_n_rh_download); 260 if (rreq->netfs_ops->prepare_read) { 261 ret = rreq->netfs_ops->prepare_read(subreq); 262 if (ret < 0) { 263 subreq->error = ret; 264 /* Not queued - release both refs. */ 265 netfs_put_subrequest(subreq, false, 266 netfs_sreq_trace_put_cancel); 267 netfs_put_subrequest(subreq, false, 268 netfs_sreq_trace_put_cancel); 269 break; 270 } 271 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 272 } 273 goto issue; 274 } 275 276 fill_with_zeroes: 277 if (source == NETFS_FILL_WITH_ZEROES) { 278 subreq->source = NETFS_FILL_WITH_ZEROES; 279 trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 280 netfs_stat(&netfs_n_rh_zero); 281 goto issue; 282 } 283 284 if (source == NETFS_READ_FROM_CACHE) { 285 trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 286 goto issue; 287 } 288 289 pr_err("Unexpected read source %u\n", source); 290 WARN_ON_ONCE(1); 291 break; 292 293 issue: 294 slice = netfs_prepare_read_iterator(subreq); 295 if (slice < 0) { 296 ret = slice; 297 subreq->error = ret; 298 trace_netfs_sreq(subreq, netfs_sreq_trace_cancel); 299 /* Not queued - release both refs. */ 300 netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); 301 netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); 302 break; 303 } 304 size -= slice; 305 start += slice; 306 307 netfs_queue_read(rreq, subreq, size <= 0); 308 netfs_issue_read(rreq, subreq); 309 cond_resched(); 310 } while (size > 0); 311 312 if (unlikely(size > 0)) { 313 smp_wmb(); /* Write lists before ALL_QUEUED. */ 314 set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); 315 netfs_wake_read_collector(rreq); 316 } 317 318 /* Defer error return as we may need to wait for outstanding I/O. */ 319 cmpxchg(&rreq->error, 0, ret); 320 } 321 322 /** 323 * netfs_readahead - Helper to manage a read request 324 * @ractl: The description of the readahead request 325 * 326 * Fulfil a readahead request by drawing data from the cache if possible, or 327 * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O 328 * requests from different sources will get munged together. If necessary, the 329 * readahead window can be expanded in either direction to a more convenient 330 * alighment for RPC efficiency or to make storage in the cache feasible. 331 * 332 * The calling netfs must initialise a netfs context contiguous to the vfs 333 * inode before calling this. 334 * 335 * This is usable whether or not caching is enabled. 336 */ 337 void netfs_readahead(struct readahead_control *ractl) 338 { 339 struct netfs_io_request *rreq; 340 struct netfs_inode *ictx = netfs_inode(ractl->mapping->host); 341 unsigned long long start = readahead_pos(ractl); 342 size_t size = readahead_length(ractl); 343 int ret; 344 345 rreq = netfs_alloc_request(ractl->mapping, ractl->file, start, size, 346 NETFS_READAHEAD); 347 if (IS_ERR(rreq)) 348 return; 349 350 __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags); 351 352 ret = netfs_begin_cache_read(rreq, ictx); 353 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 354 goto cleanup_free; 355 356 netfs_stat(&netfs_n_rh_readahead); 357 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 358 netfs_read_trace_readahead); 359 360 netfs_rreq_expand(rreq, ractl); 361 362 rreq->ractl = ractl; 363 rreq->submitted = rreq->start; 364 if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0) 365 goto cleanup_free; 366 netfs_read_to_pagecache(rreq); 367 368 netfs_put_request(rreq, true, netfs_rreq_trace_put_return); 369 return; 370 371 cleanup_free: 372 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); 373 return; 374 } 375 EXPORT_SYMBOL(netfs_readahead); 376 377 /* 378 * Create a rolling buffer with a single occupying folio. 379 */ 380 static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio, 381 unsigned int rollbuf_flags) 382 { 383 ssize_t added; 384 385 if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0) 386 return -ENOMEM; 387 388 added = rolling_buffer_append(&rreq->buffer, folio, rollbuf_flags); 389 if (added < 0) 390 return added; 391 rreq->submitted = rreq->start + added; 392 rreq->ractl = (struct readahead_control *)1UL; 393 return 0; 394 } 395 396 /* 397 * Read into gaps in a folio partially filled by a streaming write. 398 */ 399 static int netfs_read_gaps(struct file *file, struct folio *folio) 400 { 401 struct netfs_io_request *rreq; 402 struct address_space *mapping = folio->mapping; 403 struct netfs_folio *finfo = netfs_folio_info(folio); 404 struct netfs_inode *ctx = netfs_inode(mapping->host); 405 struct folio *sink = NULL; 406 struct bio_vec *bvec; 407 unsigned int from = finfo->dirty_offset; 408 unsigned int to = from + finfo->dirty_len; 409 unsigned int off = 0, i = 0; 410 size_t flen = folio_size(folio); 411 size_t nr_bvec = flen / PAGE_SIZE + 2; 412 size_t part; 413 int ret; 414 415 _enter("%lx", folio->index); 416 417 rreq = netfs_alloc_request(mapping, file, folio_pos(folio), flen, NETFS_READ_GAPS); 418 if (IS_ERR(rreq)) { 419 ret = PTR_ERR(rreq); 420 goto alloc_error; 421 } 422 423 ret = netfs_begin_cache_read(rreq, ctx); 424 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 425 goto discard; 426 427 netfs_stat(&netfs_n_rh_read_folio); 428 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_read_gaps); 429 430 /* Fiddle the buffer so that a gap at the beginning and/or a gap at the 431 * end get copied to, but the middle is discarded. 432 */ 433 ret = -ENOMEM; 434 bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL); 435 if (!bvec) 436 goto discard; 437 438 sink = folio_alloc(GFP_KERNEL, 0); 439 if (!sink) { 440 kfree(bvec); 441 goto discard; 442 } 443 444 trace_netfs_folio(folio, netfs_folio_trace_read_gaps); 445 446 rreq->direct_bv = bvec; 447 rreq->direct_bv_count = nr_bvec; 448 if (from > 0) { 449 bvec_set_folio(&bvec[i++], folio, from, 0); 450 off = from; 451 } 452 while (off < to) { 453 part = min_t(size_t, to - off, PAGE_SIZE); 454 bvec_set_folio(&bvec[i++], sink, part, 0); 455 off += part; 456 } 457 if (to < flen) 458 bvec_set_folio(&bvec[i++], folio, flen - to, to); 459 iov_iter_bvec(&rreq->buffer.iter, ITER_DEST, bvec, i, rreq->len); 460 rreq->submitted = rreq->start + flen; 461 462 netfs_read_to_pagecache(rreq); 463 464 if (sink) 465 folio_put(sink); 466 467 ret = netfs_wait_for_read(rreq); 468 if (ret >= 0) { 469 flush_dcache_folio(folio); 470 folio_mark_uptodate(folio); 471 } 472 folio_unlock(folio); 473 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 474 return ret < 0 ? ret : 0; 475 476 discard: 477 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 478 alloc_error: 479 folio_unlock(folio); 480 return ret; 481 } 482 483 /** 484 * netfs_read_folio - Helper to manage a read_folio request 485 * @file: The file to read from 486 * @folio: The folio to read 487 * 488 * Fulfil a read_folio request by drawing data from the cache if 489 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 490 * Multiple I/O requests from different sources will get munged together. 491 * 492 * The calling netfs must initialise a netfs context contiguous to the vfs 493 * inode before calling this. 494 * 495 * This is usable whether or not caching is enabled. 496 */ 497 int netfs_read_folio(struct file *file, struct folio *folio) 498 { 499 struct address_space *mapping = folio->mapping; 500 struct netfs_io_request *rreq; 501 struct netfs_inode *ctx = netfs_inode(mapping->host); 502 int ret; 503 504 if (folio_test_dirty(folio)) { 505 trace_netfs_folio(folio, netfs_folio_trace_read_gaps); 506 return netfs_read_gaps(file, folio); 507 } 508 509 _enter("%lx", folio->index); 510 511 rreq = netfs_alloc_request(mapping, file, 512 folio_pos(folio), folio_size(folio), 513 NETFS_READPAGE); 514 if (IS_ERR(rreq)) { 515 ret = PTR_ERR(rreq); 516 goto alloc_error; 517 } 518 519 ret = netfs_begin_cache_read(rreq, ctx); 520 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 521 goto discard; 522 523 netfs_stat(&netfs_n_rh_read_folio); 524 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); 525 526 /* Set up the output buffer */ 527 ret = netfs_create_singular_buffer(rreq, folio, 0); 528 if (ret < 0) 529 goto discard; 530 531 netfs_read_to_pagecache(rreq); 532 ret = netfs_wait_for_read(rreq); 533 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 534 return ret < 0 ? ret : 0; 535 536 discard: 537 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 538 alloc_error: 539 folio_unlock(folio); 540 return ret; 541 } 542 EXPORT_SYMBOL(netfs_read_folio); 543 544 /* 545 * Prepare a folio for writing without reading first 546 * @folio: The folio being prepared 547 * @pos: starting position for the write 548 * @len: length of write 549 * @always_fill: T if the folio should always be completely filled/cleared 550 * 551 * In some cases, write_begin doesn't need to read at all: 552 * - full folio write 553 * - write that lies in a folio that is completely beyond EOF 554 * - write that covers the folio from start to EOF or beyond it 555 * 556 * If any of these criteria are met, then zero out the unwritten parts 557 * of the folio and return true. Otherwise, return false. 558 */ 559 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len, 560 bool always_fill) 561 { 562 struct inode *inode = folio_inode(folio); 563 loff_t i_size = i_size_read(inode); 564 size_t offset = offset_in_folio(folio, pos); 565 size_t plen = folio_size(folio); 566 567 if (unlikely(always_fill)) { 568 if (pos - offset + len <= i_size) 569 return false; /* Page entirely before EOF */ 570 folio_zero_segment(folio, 0, plen); 571 folio_mark_uptodate(folio); 572 return true; 573 } 574 575 /* Full folio write */ 576 if (offset == 0 && len >= plen) 577 return true; 578 579 /* Page entirely beyond the end of the file */ 580 if (pos - offset >= i_size) 581 goto zero_out; 582 583 /* Write that covers from the start of the folio to EOF or beyond */ 584 if (offset == 0 && (pos + len) >= i_size) 585 goto zero_out; 586 587 return false; 588 zero_out: 589 folio_zero_segments(folio, 0, offset, offset + len, plen); 590 return true; 591 } 592 593 /** 594 * netfs_write_begin - Helper to prepare for writing [DEPRECATED] 595 * @ctx: The netfs context 596 * @file: The file to read from 597 * @mapping: The mapping to read from 598 * @pos: File position at which the write will begin 599 * @len: The length of the write (may extend beyond the end of the folio chosen) 600 * @_folio: Where to put the resultant folio 601 * @_fsdata: Place for the netfs to store a cookie 602 * 603 * Pre-read data for a write-begin request by drawing data from the cache if 604 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 605 * Multiple I/O requests from different sources will get munged together. 606 * 607 * The calling netfs must provide a table of operations, only one of which, 608 * issue_read, is mandatory. 609 * 610 * The check_write_begin() operation can be provided to check for and flush 611 * conflicting writes once the folio is grabbed and locked. It is passed a 612 * pointer to the fsdata cookie that gets returned to the VM to be passed to 613 * write_end. It is permitted to sleep. It should return 0 if the request 614 * should go ahead or it may return an error. It may also unlock and put the 615 * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0 616 * will cause the folio to be re-got and the process to be retried. 617 * 618 * The calling netfs must initialise a netfs context contiguous to the vfs 619 * inode before calling this. 620 * 621 * This is usable whether or not caching is enabled. 622 * 623 * Note that this should be considered deprecated and netfs_perform_write() 624 * used instead. 625 */ 626 int netfs_write_begin(struct netfs_inode *ctx, 627 struct file *file, struct address_space *mapping, 628 loff_t pos, unsigned int len, struct folio **_folio, 629 void **_fsdata) 630 { 631 struct netfs_io_request *rreq; 632 struct folio *folio; 633 pgoff_t index = pos >> PAGE_SHIFT; 634 int ret; 635 636 retry: 637 folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, 638 mapping_gfp_mask(mapping)); 639 if (IS_ERR(folio)) 640 return PTR_ERR(folio); 641 642 if (ctx->ops->check_write_begin) { 643 /* Allow the netfs (eg. ceph) to flush conflicts. */ 644 ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata); 645 if (ret < 0) { 646 trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); 647 goto error; 648 } 649 if (!folio) 650 goto retry; 651 } 652 653 if (folio_test_uptodate(folio)) 654 goto have_folio; 655 656 /* If the folio is beyond the EOF, we want to clear it - unless it's 657 * within the cache granule containing the EOF, in which case we need 658 * to preload the granule. 659 */ 660 if (!netfs_is_cache_enabled(ctx) && 661 netfs_skip_folio_read(folio, pos, len, false)) { 662 netfs_stat(&netfs_n_rh_write_zskip); 663 goto have_folio_no_wait; 664 } 665 666 rreq = netfs_alloc_request(mapping, file, 667 folio_pos(folio), folio_size(folio), 668 NETFS_READ_FOR_WRITE); 669 if (IS_ERR(rreq)) { 670 ret = PTR_ERR(rreq); 671 goto error; 672 } 673 rreq->no_unlock_folio = folio->index; 674 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); 675 676 ret = netfs_begin_cache_read(rreq, ctx); 677 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 678 goto error_put; 679 680 netfs_stat(&netfs_n_rh_write_begin); 681 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); 682 683 /* Set up the output buffer */ 684 ret = netfs_create_singular_buffer(rreq, folio, 0); 685 if (ret < 0) 686 goto error_put; 687 688 netfs_read_to_pagecache(rreq); 689 ret = netfs_wait_for_read(rreq); 690 if (ret < 0) 691 goto error; 692 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 693 694 have_folio: 695 ret = folio_wait_private_2_killable(folio); 696 if (ret < 0) 697 goto error; 698 have_folio_no_wait: 699 *_folio = folio; 700 _leave(" = 0"); 701 return 0; 702 703 error_put: 704 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); 705 error: 706 if (folio) { 707 folio_unlock(folio); 708 folio_put(folio); 709 } 710 _leave(" = %d", ret); 711 return ret; 712 } 713 EXPORT_SYMBOL(netfs_write_begin); 714 715 /* 716 * Preload the data into a folio we're proposing to write into. 717 */ 718 int netfs_prefetch_for_write(struct file *file, struct folio *folio, 719 size_t offset, size_t len) 720 { 721 struct netfs_io_request *rreq; 722 struct address_space *mapping = folio->mapping; 723 struct netfs_inode *ctx = netfs_inode(mapping->host); 724 unsigned long long start = folio_pos(folio); 725 size_t flen = folio_size(folio); 726 int ret; 727 728 _enter("%zx @%llx", flen, start); 729 730 ret = -ENOMEM; 731 732 rreq = netfs_alloc_request(mapping, file, start, flen, 733 NETFS_READ_FOR_WRITE); 734 if (IS_ERR(rreq)) { 735 ret = PTR_ERR(rreq); 736 goto error; 737 } 738 739 rreq->no_unlock_folio = folio->index; 740 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); 741 ret = netfs_begin_cache_read(rreq, ctx); 742 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 743 goto error_put; 744 745 netfs_stat(&netfs_n_rh_write_begin); 746 trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write); 747 748 /* Set up the output buffer */ 749 ret = netfs_create_singular_buffer(rreq, folio, NETFS_ROLLBUF_PAGECACHE_MARK); 750 if (ret < 0) 751 goto error_put; 752 753 netfs_read_to_pagecache(rreq); 754 ret = netfs_wait_for_read(rreq); 755 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 756 return ret < 0 ? ret : 0; 757 758 error_put: 759 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 760 error: 761 _leave(" = %d", ret); 762 return ret; 763 } 764 765 /** 766 * netfs_buffered_read_iter - Filesystem buffered I/O read routine 767 * @iocb: kernel I/O control block 768 * @iter: destination for the data read 769 * 770 * This is the ->read_iter() routine for all filesystems that can use the page 771 * cache directly. 772 * 773 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be 774 * returned when no data can be read without waiting for I/O requests to 775 * complete; it doesn't prevent readahead. 776 * 777 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests 778 * shall be made for the read or for readahead. When no data can be read, 779 * -EAGAIN shall be returned. When readahead would be triggered, a partial, 780 * possibly empty read shall be returned. 781 * 782 * Return: 783 * * number of bytes copied, even for partial reads 784 * * negative error code (or 0 if IOCB_NOIO) if nothing was read 785 */ 786 ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter) 787 { 788 struct inode *inode = file_inode(iocb->ki_filp); 789 struct netfs_inode *ictx = netfs_inode(inode); 790 ssize_t ret; 791 792 if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) || 793 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))) 794 return -EINVAL; 795 796 ret = netfs_start_io_read(inode); 797 if (ret == 0) { 798 ret = filemap_read(iocb, iter, 0); 799 netfs_end_io_read(inode); 800 } 801 return ret; 802 } 803 EXPORT_SYMBOL(netfs_buffered_read_iter); 804 805 /** 806 * netfs_file_read_iter - Generic filesystem read routine 807 * @iocb: kernel I/O control block 808 * @iter: destination for the data read 809 * 810 * This is the ->read_iter() routine for all filesystems that can use the page 811 * cache directly. 812 * 813 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be 814 * returned when no data can be read without waiting for I/O requests to 815 * complete; it doesn't prevent readahead. 816 * 817 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests 818 * shall be made for the read or for readahead. When no data can be read, 819 * -EAGAIN shall be returned. When readahead would be triggered, a partial, 820 * possibly empty read shall be returned. 821 * 822 * Return: 823 * * number of bytes copied, even for partial reads 824 * * negative error code (or 0 if IOCB_NOIO) if nothing was read 825 */ 826 ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) 827 { 828 struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host); 829 830 if ((iocb->ki_flags & IOCB_DIRECT) || 831 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)) 832 return netfs_unbuffered_read_iter(iocb, iter); 833 834 return netfs_buffered_read_iter(iocb, iter); 835 } 836 EXPORT_SYMBOL(netfs_file_read_iter); 837