1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Network filesystem high-level buffered read support. 3 * 4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/export.h> 9 #include <linux/task_io_accounting_ops.h> 10 #include "internal.h" 11 12 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq, 13 unsigned long long *_start, 14 unsigned long long *_len, 15 unsigned long long i_size) 16 { 17 struct netfs_cache_resources *cres = &rreq->cache_resources; 18 19 if (cres->ops && cres->ops->expand_readahead) 20 cres->ops->expand_readahead(cres, _start, _len, i_size); 21 } 22 23 static void netfs_rreq_expand(struct netfs_io_request *rreq, 24 struct readahead_control *ractl) 25 { 26 /* Give the cache a chance to change the request parameters. The 27 * resultant request must contain the original region. 28 */ 29 netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); 30 31 /* Give the netfs a chance to change the request parameters. The 32 * resultant request must contain the original region. 33 */ 34 if (rreq->netfs_ops->expand_readahead) 35 rreq->netfs_ops->expand_readahead(rreq); 36 37 /* Expand the request if the cache wants it to start earlier. Note 38 * that the expansion may get further extended if the VM wishes to 39 * insert THPs and the preferred start and/or end wind up in the middle 40 * of THPs. 41 * 42 * If this is the case, however, the THP size should be an integer 43 * multiple of the cache granule size, so we get a whole number of 44 * granules to deal with. 45 */ 46 if (rreq->start != readahead_pos(ractl) || 47 rreq->len != readahead_length(ractl)) { 48 readahead_expand(ractl, rreq->start, rreq->len); 49 rreq->start = readahead_pos(ractl); 50 rreq->len = readahead_length(ractl); 51 52 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 53 netfs_read_trace_expanded); 54 } 55 } 56 57 /* 58 * Begin an operation, and fetch the stored zero point value from the cookie if 59 * available. 60 */ 61 static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx) 62 { 63 return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx)); 64 } 65 66 /* 67 * Decant the list of folios to read into a rolling buffer. 68 */ 69 static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq, 70 struct folio_queue *folioq, 71 struct folio_batch *put_batch) 72 { 73 unsigned int order, nr; 74 size_t size = 0; 75 76 nr = __readahead_batch(rreq->ractl, (struct page **)folioq->vec.folios, 77 ARRAY_SIZE(folioq->vec.folios)); 78 folioq->vec.nr = nr; 79 for (int i = 0; i < nr; i++) { 80 struct folio *folio = folioq_folio(folioq, i); 81 82 trace_netfs_folio(folio, netfs_folio_trace_read); 83 order = folio_order(folio); 84 folioq->orders[i] = order; 85 size += PAGE_SIZE << order; 86 87 if (!folio_batch_add(put_batch, folio)) 88 folio_batch_release(put_batch); 89 } 90 91 for (int i = nr; i < folioq_nr_slots(folioq); i++) 92 folioq_clear(folioq, i); 93 94 return size; 95 } 96 97 /* 98 * netfs_prepare_read_iterator - Prepare the subreq iterator for I/O 99 * @subreq: The subrequest to be set up 100 * 101 * Prepare the I/O iterator representing the read buffer on a subrequest for 102 * the filesystem to use for I/O (it can be passed directly to a socket). This 103 * is intended to be called from the ->issue_read() method once the filesystem 104 * has trimmed the request to the size it wants. 105 * 106 * Returns the limited size if successful and -ENOMEM if insufficient memory 107 * available. 108 * 109 * [!] NOTE: This must be run in the same thread as ->issue_read() was called 110 * in as we access the readahead_control struct. 111 */ 112 static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq) 113 { 114 struct netfs_io_request *rreq = subreq->rreq; 115 size_t rsize = subreq->len; 116 117 if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER) 118 rsize = umin(rsize, rreq->io_streams[0].sreq_max_len); 119 120 if (rreq->ractl) { 121 /* If we don't have sufficient folios in the rolling buffer, 122 * extract a folioq's worth from the readahead region at a time 123 * into the buffer. Note that this acquires a ref on each page 124 * that we will need to release later - but we don't want to do 125 * that until after we've started the I/O. 126 */ 127 struct folio_batch put_batch; 128 129 folio_batch_init(&put_batch); 130 while (rreq->submitted < subreq->start + rsize) { 131 struct folio_queue *tail = rreq->buffer_tail, *new; 132 size_t added; 133 134 new = kmalloc(sizeof(*new), GFP_NOFS); 135 if (!new) 136 return -ENOMEM; 137 netfs_stat(&netfs_n_folioq); 138 folioq_init(new); 139 new->prev = tail; 140 tail->next = new; 141 rreq->buffer_tail = new; 142 added = netfs_load_buffer_from_ra(rreq, new, &put_batch); 143 rreq->iter.count += added; 144 rreq->submitted += added; 145 } 146 folio_batch_release(&put_batch); 147 } 148 149 subreq->len = rsize; 150 if (unlikely(rreq->io_streams[0].sreq_max_segs)) { 151 size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize, 152 rreq->io_streams[0].sreq_max_segs); 153 154 if (limit < rsize) { 155 subreq->len = limit; 156 trace_netfs_sreq(subreq, netfs_sreq_trace_limited); 157 } 158 } 159 160 subreq->io_iter = rreq->iter; 161 162 if (iov_iter_is_folioq(&subreq->io_iter)) { 163 if (subreq->io_iter.folioq_slot >= folioq_nr_slots(subreq->io_iter.folioq)) { 164 subreq->io_iter.folioq = subreq->io_iter.folioq->next; 165 subreq->io_iter.folioq_slot = 0; 166 } 167 subreq->curr_folioq = (struct folio_queue *)subreq->io_iter.folioq; 168 subreq->curr_folioq_slot = subreq->io_iter.folioq_slot; 169 subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot]; 170 } 171 172 iov_iter_truncate(&subreq->io_iter, subreq->len); 173 iov_iter_advance(&rreq->iter, subreq->len); 174 return subreq->len; 175 } 176 177 static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rreq, 178 struct netfs_io_subrequest *subreq, 179 loff_t i_size) 180 { 181 struct netfs_cache_resources *cres = &rreq->cache_resources; 182 183 if (!cres->ops) 184 return NETFS_DOWNLOAD_FROM_SERVER; 185 return cres->ops->prepare_read(subreq, i_size); 186 } 187 188 static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, 189 bool was_async) 190 { 191 struct netfs_io_subrequest *subreq = priv; 192 193 if (transferred_or_error < 0) { 194 netfs_read_subreq_terminated(subreq, transferred_or_error, was_async); 195 return; 196 } 197 198 if (transferred_or_error > 0) 199 subreq->transferred += transferred_or_error; 200 netfs_read_subreq_terminated(subreq, 0, was_async); 201 } 202 203 /* 204 * Issue a read against the cache. 205 * - Eats the caller's ref on subreq. 206 */ 207 static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq, 208 struct netfs_io_subrequest *subreq) 209 { 210 struct netfs_cache_resources *cres = &rreq->cache_resources; 211 212 netfs_stat(&netfs_n_rh_read); 213 cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_IGNORE, 214 netfs_cache_read_terminated, subreq); 215 } 216 217 /* 218 * Perform a read to the pagecache from a series of sources of different types, 219 * slicing up the region to be read according to available cache blocks and 220 * network rsize. 221 */ 222 static void netfs_read_to_pagecache(struct netfs_io_request *rreq) 223 { 224 struct netfs_inode *ictx = netfs_inode(rreq->inode); 225 unsigned long long start = rreq->start; 226 ssize_t size = rreq->len; 227 int ret = 0; 228 229 atomic_inc(&rreq->nr_outstanding); 230 231 do { 232 struct netfs_io_subrequest *subreq; 233 enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER; 234 ssize_t slice; 235 236 subreq = netfs_alloc_subrequest(rreq); 237 if (!subreq) { 238 ret = -ENOMEM; 239 break; 240 } 241 242 subreq->start = start; 243 subreq->len = size; 244 245 atomic_inc(&rreq->nr_outstanding); 246 spin_lock_bh(&rreq->lock); 247 list_add_tail(&subreq->rreq_link, &rreq->subrequests); 248 subreq->prev_donated = rreq->prev_donated; 249 rreq->prev_donated = 0; 250 trace_netfs_sreq(subreq, netfs_sreq_trace_added); 251 spin_unlock_bh(&rreq->lock); 252 253 source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size); 254 subreq->source = source; 255 if (source == NETFS_DOWNLOAD_FROM_SERVER) { 256 unsigned long long zp = umin(ictx->zero_point, rreq->i_size); 257 size_t len = subreq->len; 258 259 if (subreq->start >= zp) { 260 subreq->source = source = NETFS_FILL_WITH_ZEROES; 261 goto fill_with_zeroes; 262 } 263 264 if (len > zp - subreq->start) 265 len = zp - subreq->start; 266 if (len == 0) { 267 pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx", 268 rreq->debug_id, subreq->debug_index, 269 subreq->len, size, 270 subreq->start, ictx->zero_point, rreq->i_size); 271 break; 272 } 273 subreq->len = len; 274 275 netfs_stat(&netfs_n_rh_download); 276 if (rreq->netfs_ops->prepare_read) { 277 ret = rreq->netfs_ops->prepare_read(subreq); 278 if (ret < 0) 279 goto prep_failed; 280 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 281 } 282 283 slice = netfs_prepare_read_iterator(subreq); 284 if (slice < 0) 285 goto prep_iter_failed; 286 287 rreq->netfs_ops->issue_read(subreq); 288 goto done; 289 } 290 291 fill_with_zeroes: 292 if (source == NETFS_FILL_WITH_ZEROES) { 293 subreq->source = NETFS_FILL_WITH_ZEROES; 294 trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 295 netfs_stat(&netfs_n_rh_zero); 296 slice = netfs_prepare_read_iterator(subreq); 297 if (slice < 0) 298 goto prep_iter_failed; 299 __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); 300 netfs_read_subreq_terminated(subreq, 0, false); 301 goto done; 302 } 303 304 if (source == NETFS_READ_FROM_CACHE) { 305 trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 306 slice = netfs_prepare_read_iterator(subreq); 307 if (slice < 0) 308 goto prep_iter_failed; 309 netfs_read_cache_to_pagecache(rreq, subreq); 310 goto done; 311 } 312 313 pr_err("Unexpected read source %u\n", source); 314 WARN_ON_ONCE(1); 315 break; 316 317 prep_iter_failed: 318 ret = slice; 319 prep_failed: 320 subreq->error = ret; 321 atomic_dec(&rreq->nr_outstanding); 322 netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); 323 break; 324 325 done: 326 size -= slice; 327 start += slice; 328 cond_resched(); 329 } while (size > 0); 330 331 if (atomic_dec_and_test(&rreq->nr_outstanding)) 332 netfs_rreq_terminated(rreq, false); 333 334 /* Defer error return as we may need to wait for outstanding I/O. */ 335 cmpxchg(&rreq->error, 0, ret); 336 } 337 338 /* 339 * Wait for the read operation to complete, successfully or otherwise. 340 */ 341 static int netfs_wait_for_read(struct netfs_io_request *rreq) 342 { 343 int ret; 344 345 trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); 346 wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE); 347 ret = rreq->error; 348 if (ret == 0 && rreq->submitted < rreq->len) { 349 trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); 350 ret = -EIO; 351 } 352 353 return ret; 354 } 355 356 /* 357 * Set up the initial folioq of buffer folios in the rolling buffer and set the 358 * iterator to refer to it. 359 */ 360 static int netfs_prime_buffer(struct netfs_io_request *rreq) 361 { 362 struct folio_queue *folioq; 363 struct folio_batch put_batch; 364 size_t added; 365 366 folioq = kmalloc(sizeof(*folioq), GFP_KERNEL); 367 if (!folioq) 368 return -ENOMEM; 369 netfs_stat(&netfs_n_folioq); 370 folioq_init(folioq); 371 rreq->buffer = folioq; 372 rreq->buffer_tail = folioq; 373 rreq->submitted = rreq->start; 374 iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, 0); 375 376 folio_batch_init(&put_batch); 377 added = netfs_load_buffer_from_ra(rreq, folioq, &put_batch); 378 folio_batch_release(&put_batch); 379 rreq->iter.count += added; 380 rreq->submitted += added; 381 return 0; 382 } 383 384 /** 385 * netfs_readahead - Helper to manage a read request 386 * @ractl: The description of the readahead request 387 * 388 * Fulfil a readahead request by drawing data from the cache if possible, or 389 * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O 390 * requests from different sources will get munged together. If necessary, the 391 * readahead window can be expanded in either direction to a more convenient 392 * alighment for RPC efficiency or to make storage in the cache feasible. 393 * 394 * The calling netfs must initialise a netfs context contiguous to the vfs 395 * inode before calling this. 396 * 397 * This is usable whether or not caching is enabled. 398 */ 399 void netfs_readahead(struct readahead_control *ractl) 400 { 401 struct netfs_io_request *rreq; 402 struct netfs_inode *ictx = netfs_inode(ractl->mapping->host); 403 unsigned long long start = readahead_pos(ractl); 404 size_t size = readahead_length(ractl); 405 int ret; 406 407 rreq = netfs_alloc_request(ractl->mapping, ractl->file, start, size, 408 NETFS_READAHEAD); 409 if (IS_ERR(rreq)) 410 return; 411 412 ret = netfs_begin_cache_read(rreq, ictx); 413 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 414 goto cleanup_free; 415 416 netfs_stat(&netfs_n_rh_readahead); 417 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 418 netfs_read_trace_readahead); 419 420 netfs_rreq_expand(rreq, ractl); 421 422 rreq->ractl = ractl; 423 if (netfs_prime_buffer(rreq) < 0) 424 goto cleanup_free; 425 netfs_read_to_pagecache(rreq); 426 427 netfs_put_request(rreq, true, netfs_rreq_trace_put_return); 428 return; 429 430 cleanup_free: 431 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); 432 return; 433 } 434 EXPORT_SYMBOL(netfs_readahead); 435 436 /* 437 * Create a rolling buffer with a single occupying folio. 438 */ 439 static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio) 440 { 441 struct folio_queue *folioq; 442 443 folioq = kmalloc(sizeof(*folioq), GFP_KERNEL); 444 if (!folioq) 445 return -ENOMEM; 446 447 netfs_stat(&netfs_n_folioq); 448 folioq_init(folioq); 449 folioq_append(folioq, folio); 450 BUG_ON(folioq_folio(folioq, 0) != folio); 451 BUG_ON(folioq_folio_order(folioq, 0) != folio_order(folio)); 452 rreq->buffer = folioq; 453 rreq->buffer_tail = folioq; 454 rreq->submitted = rreq->start + rreq->len; 455 iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, rreq->len); 456 rreq->ractl = (struct readahead_control *)1UL; 457 return 0; 458 } 459 460 /* 461 * Read into gaps in a folio partially filled by a streaming write. 462 */ 463 static int netfs_read_gaps(struct file *file, struct folio *folio) 464 { 465 struct netfs_io_request *rreq; 466 struct address_space *mapping = folio->mapping; 467 struct netfs_folio *finfo = netfs_folio_info(folio); 468 struct netfs_inode *ctx = netfs_inode(mapping->host); 469 struct folio *sink = NULL; 470 struct bio_vec *bvec; 471 unsigned int from = finfo->dirty_offset; 472 unsigned int to = from + finfo->dirty_len; 473 unsigned int off = 0, i = 0; 474 size_t flen = folio_size(folio); 475 size_t nr_bvec = flen / PAGE_SIZE + 2; 476 size_t part; 477 int ret; 478 479 _enter("%lx", folio->index); 480 481 rreq = netfs_alloc_request(mapping, file, folio_pos(folio), flen, NETFS_READ_GAPS); 482 if (IS_ERR(rreq)) { 483 ret = PTR_ERR(rreq); 484 goto alloc_error; 485 } 486 487 ret = netfs_begin_cache_read(rreq, ctx); 488 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 489 goto discard; 490 491 netfs_stat(&netfs_n_rh_read_folio); 492 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_read_gaps); 493 494 /* Fiddle the buffer so that a gap at the beginning and/or a gap at the 495 * end get copied to, but the middle is discarded. 496 */ 497 ret = -ENOMEM; 498 bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL); 499 if (!bvec) 500 goto discard; 501 502 sink = folio_alloc(GFP_KERNEL, 0); 503 if (!sink) { 504 kfree(bvec); 505 goto discard; 506 } 507 508 trace_netfs_folio(folio, netfs_folio_trace_read_gaps); 509 510 rreq->direct_bv = bvec; 511 rreq->direct_bv_count = nr_bvec; 512 if (from > 0) { 513 bvec_set_folio(&bvec[i++], folio, from, 0); 514 off = from; 515 } 516 while (off < to) { 517 part = min_t(size_t, to - off, PAGE_SIZE); 518 bvec_set_folio(&bvec[i++], sink, part, 0); 519 off += part; 520 } 521 if (to < flen) 522 bvec_set_folio(&bvec[i++], folio, flen - to, to); 523 iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len); 524 rreq->submitted = rreq->start + flen; 525 526 netfs_read_to_pagecache(rreq); 527 528 if (sink) 529 folio_put(sink); 530 531 ret = netfs_wait_for_read(rreq); 532 if (ret == 0) { 533 flush_dcache_folio(folio); 534 folio_mark_uptodate(folio); 535 } 536 folio_unlock(folio); 537 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 538 return ret < 0 ? ret : 0; 539 540 discard: 541 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 542 alloc_error: 543 folio_unlock(folio); 544 return ret; 545 } 546 547 /** 548 * netfs_read_folio - Helper to manage a read_folio request 549 * @file: The file to read from 550 * @folio: The folio to read 551 * 552 * Fulfil a read_folio request by drawing data from the cache if 553 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 554 * Multiple I/O requests from different sources will get munged together. 555 * 556 * The calling netfs must initialise a netfs context contiguous to the vfs 557 * inode before calling this. 558 * 559 * This is usable whether or not caching is enabled. 560 */ 561 int netfs_read_folio(struct file *file, struct folio *folio) 562 { 563 struct address_space *mapping = folio->mapping; 564 struct netfs_io_request *rreq; 565 struct netfs_inode *ctx = netfs_inode(mapping->host); 566 int ret; 567 568 if (folio_test_dirty(folio)) { 569 trace_netfs_folio(folio, netfs_folio_trace_read_gaps); 570 return netfs_read_gaps(file, folio); 571 } 572 573 _enter("%lx", folio->index); 574 575 rreq = netfs_alloc_request(mapping, file, 576 folio_pos(folio), folio_size(folio), 577 NETFS_READPAGE); 578 if (IS_ERR(rreq)) { 579 ret = PTR_ERR(rreq); 580 goto alloc_error; 581 } 582 583 ret = netfs_begin_cache_read(rreq, ctx); 584 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 585 goto discard; 586 587 netfs_stat(&netfs_n_rh_read_folio); 588 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); 589 590 /* Set up the output buffer */ 591 ret = netfs_create_singular_buffer(rreq, folio); 592 if (ret < 0) 593 goto discard; 594 595 netfs_read_to_pagecache(rreq); 596 ret = netfs_wait_for_read(rreq); 597 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 598 return ret < 0 ? ret : 0; 599 600 discard: 601 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 602 alloc_error: 603 folio_unlock(folio); 604 return ret; 605 } 606 EXPORT_SYMBOL(netfs_read_folio); 607 608 /* 609 * Prepare a folio for writing without reading first 610 * @folio: The folio being prepared 611 * @pos: starting position for the write 612 * @len: length of write 613 * @always_fill: T if the folio should always be completely filled/cleared 614 * 615 * In some cases, write_begin doesn't need to read at all: 616 * - full folio write 617 * - write that lies in a folio that is completely beyond EOF 618 * - write that covers the folio from start to EOF or beyond it 619 * 620 * If any of these criteria are met, then zero out the unwritten parts 621 * of the folio and return true. Otherwise, return false. 622 */ 623 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len, 624 bool always_fill) 625 { 626 struct inode *inode = folio_inode(folio); 627 loff_t i_size = i_size_read(inode); 628 size_t offset = offset_in_folio(folio, pos); 629 size_t plen = folio_size(folio); 630 631 if (unlikely(always_fill)) { 632 if (pos - offset + len <= i_size) 633 return false; /* Page entirely before EOF */ 634 folio_zero_segment(folio, 0, plen); 635 folio_mark_uptodate(folio); 636 return true; 637 } 638 639 /* Full folio write */ 640 if (offset == 0 && len >= plen) 641 return true; 642 643 /* Page entirely beyond the end of the file */ 644 if (pos - offset >= i_size) 645 goto zero_out; 646 647 /* Write that covers from the start of the folio to EOF or beyond */ 648 if (offset == 0 && (pos + len) >= i_size) 649 goto zero_out; 650 651 return false; 652 zero_out: 653 folio_zero_segments(folio, 0, offset, offset + len, plen); 654 return true; 655 } 656 657 /** 658 * netfs_write_begin - Helper to prepare for writing [DEPRECATED] 659 * @ctx: The netfs context 660 * @file: The file to read from 661 * @mapping: The mapping to read from 662 * @pos: File position at which the write will begin 663 * @len: The length of the write (may extend beyond the end of the folio chosen) 664 * @_folio: Where to put the resultant folio 665 * @_fsdata: Place for the netfs to store a cookie 666 * 667 * Pre-read data for a write-begin request by drawing data from the cache if 668 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 669 * Multiple I/O requests from different sources will get munged together. 670 * 671 * The calling netfs must provide a table of operations, only one of which, 672 * issue_read, is mandatory. 673 * 674 * The check_write_begin() operation can be provided to check for and flush 675 * conflicting writes once the folio is grabbed and locked. It is passed a 676 * pointer to the fsdata cookie that gets returned to the VM to be passed to 677 * write_end. It is permitted to sleep. It should return 0 if the request 678 * should go ahead or it may return an error. It may also unlock and put the 679 * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0 680 * will cause the folio to be re-got and the process to be retried. 681 * 682 * The calling netfs must initialise a netfs context contiguous to the vfs 683 * inode before calling this. 684 * 685 * This is usable whether or not caching is enabled. 686 * 687 * Note that this should be considered deprecated and netfs_perform_write() 688 * used instead. 689 */ 690 int netfs_write_begin(struct netfs_inode *ctx, 691 struct file *file, struct address_space *mapping, 692 loff_t pos, unsigned int len, struct folio **_folio, 693 void **_fsdata) 694 { 695 struct netfs_io_request *rreq; 696 struct folio *folio; 697 pgoff_t index = pos >> PAGE_SHIFT; 698 int ret; 699 700 retry: 701 folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, 702 mapping_gfp_mask(mapping)); 703 if (IS_ERR(folio)) 704 return PTR_ERR(folio); 705 706 if (ctx->ops->check_write_begin) { 707 /* Allow the netfs (eg. ceph) to flush conflicts. */ 708 ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata); 709 if (ret < 0) { 710 trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); 711 goto error; 712 } 713 if (!folio) 714 goto retry; 715 } 716 717 if (folio_test_uptodate(folio)) 718 goto have_folio; 719 720 /* If the folio is beyond the EOF, we want to clear it - unless it's 721 * within the cache granule containing the EOF, in which case we need 722 * to preload the granule. 723 */ 724 if (!netfs_is_cache_enabled(ctx) && 725 netfs_skip_folio_read(folio, pos, len, false)) { 726 netfs_stat(&netfs_n_rh_write_zskip); 727 goto have_folio_no_wait; 728 } 729 730 rreq = netfs_alloc_request(mapping, file, 731 folio_pos(folio), folio_size(folio), 732 NETFS_READ_FOR_WRITE); 733 if (IS_ERR(rreq)) { 734 ret = PTR_ERR(rreq); 735 goto error; 736 } 737 rreq->no_unlock_folio = folio->index; 738 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); 739 740 ret = netfs_begin_cache_read(rreq, ctx); 741 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 742 goto error_put; 743 744 netfs_stat(&netfs_n_rh_write_begin); 745 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); 746 747 /* Set up the output buffer */ 748 ret = netfs_create_singular_buffer(rreq, folio); 749 if (ret < 0) 750 goto error_put; 751 752 netfs_read_to_pagecache(rreq); 753 ret = netfs_wait_for_read(rreq); 754 if (ret < 0) 755 goto error; 756 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 757 758 have_folio: 759 ret = folio_wait_private_2_killable(folio); 760 if (ret < 0) 761 goto error; 762 have_folio_no_wait: 763 *_folio = folio; 764 _leave(" = 0"); 765 return 0; 766 767 error_put: 768 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); 769 error: 770 if (folio) { 771 folio_unlock(folio); 772 folio_put(folio); 773 } 774 _leave(" = %d", ret); 775 return ret; 776 } 777 EXPORT_SYMBOL(netfs_write_begin); 778 779 /* 780 * Preload the data into a folio we're proposing to write into. 781 */ 782 int netfs_prefetch_for_write(struct file *file, struct folio *folio, 783 size_t offset, size_t len) 784 { 785 struct netfs_io_request *rreq; 786 struct address_space *mapping = folio->mapping; 787 struct netfs_inode *ctx = netfs_inode(mapping->host); 788 unsigned long long start = folio_pos(folio); 789 size_t flen = folio_size(folio); 790 int ret; 791 792 _enter("%zx @%llx", flen, start); 793 794 ret = -ENOMEM; 795 796 rreq = netfs_alloc_request(mapping, file, start, flen, 797 NETFS_READ_FOR_WRITE); 798 if (IS_ERR(rreq)) { 799 ret = PTR_ERR(rreq); 800 goto error; 801 } 802 803 rreq->no_unlock_folio = folio->index; 804 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); 805 ret = netfs_begin_cache_read(rreq, ctx); 806 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 807 goto error_put; 808 809 netfs_stat(&netfs_n_rh_write_begin); 810 trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write); 811 812 /* Set up the output buffer */ 813 ret = netfs_create_singular_buffer(rreq, folio); 814 if (ret < 0) 815 goto error_put; 816 817 folioq_mark2(rreq->buffer, 0); 818 netfs_read_to_pagecache(rreq); 819 ret = netfs_wait_for_read(rreq); 820 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 821 return ret; 822 823 error_put: 824 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 825 error: 826 _leave(" = %d", ret); 827 return ret; 828 } 829 830 /** 831 * netfs_buffered_read_iter - Filesystem buffered I/O read routine 832 * @iocb: kernel I/O control block 833 * @iter: destination for the data read 834 * 835 * This is the ->read_iter() routine for all filesystems that can use the page 836 * cache directly. 837 * 838 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be 839 * returned when no data can be read without waiting for I/O requests to 840 * complete; it doesn't prevent readahead. 841 * 842 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests 843 * shall be made for the read or for readahead. When no data can be read, 844 * -EAGAIN shall be returned. When readahead would be triggered, a partial, 845 * possibly empty read shall be returned. 846 * 847 * Return: 848 * * number of bytes copied, even for partial reads 849 * * negative error code (or 0 if IOCB_NOIO) if nothing was read 850 */ 851 ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter) 852 { 853 struct inode *inode = file_inode(iocb->ki_filp); 854 struct netfs_inode *ictx = netfs_inode(inode); 855 ssize_t ret; 856 857 if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) || 858 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))) 859 return -EINVAL; 860 861 ret = netfs_start_io_read(inode); 862 if (ret == 0) { 863 ret = filemap_read(iocb, iter, 0); 864 netfs_end_io_read(inode); 865 } 866 return ret; 867 } 868 EXPORT_SYMBOL(netfs_buffered_read_iter); 869 870 /** 871 * netfs_file_read_iter - Generic filesystem read routine 872 * @iocb: kernel I/O control block 873 * @iter: destination for the data read 874 * 875 * This is the ->read_iter() routine for all filesystems that can use the page 876 * cache directly. 877 * 878 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be 879 * returned when no data can be read without waiting for I/O requests to 880 * complete; it doesn't prevent readahead. 881 * 882 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests 883 * shall be made for the read or for readahead. When no data can be read, 884 * -EAGAIN shall be returned. When readahead would be triggered, a partial, 885 * possibly empty read shall be returned. 886 * 887 * Return: 888 * * number of bytes copied, even for partial reads 889 * * negative error code (or 0 if IOCB_NOIO) if nothing was read 890 */ 891 ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) 892 { 893 struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host); 894 895 if ((iocb->ki_flags & IOCB_DIRECT) || 896 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)) 897 return netfs_unbuffered_read_iter(iocb, iter); 898 899 return netfs_buffered_read_iter(iocb, iter); 900 } 901 EXPORT_SYMBOL(netfs_file_read_iter); 902