1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Network filesystem high-level buffered read support. 3 * 4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/export.h> 9 #include <linux/task_io_accounting_ops.h> 10 #include "internal.h" 11 12 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq, 13 unsigned long long *_start, 14 unsigned long long *_len, 15 unsigned long long i_size) 16 { 17 struct netfs_cache_resources *cres = &rreq->cache_resources; 18 19 if (cres->ops && cres->ops->expand_readahead) 20 cres->ops->expand_readahead(cres, _start, _len, i_size); 21 } 22 23 static void netfs_rreq_expand(struct netfs_io_request *rreq, 24 struct readahead_control *ractl) 25 { 26 /* Give the cache a chance to change the request parameters. The 27 * resultant request must contain the original region. 28 */ 29 netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); 30 31 /* Give the netfs a chance to change the request parameters. The 32 * resultant request must contain the original region. 33 */ 34 if (rreq->netfs_ops->expand_readahead) 35 rreq->netfs_ops->expand_readahead(rreq); 36 37 /* Expand the request if the cache wants it to start earlier. Note 38 * that the expansion may get further extended if the VM wishes to 39 * insert THPs and the preferred start and/or end wind up in the middle 40 * of THPs. 41 * 42 * If this is the case, however, the THP size should be an integer 43 * multiple of the cache granule size, so we get a whole number of 44 * granules to deal with. 45 */ 46 if (rreq->start != readahead_pos(ractl) || 47 rreq->len != readahead_length(ractl)) { 48 readahead_expand(ractl, rreq->start, rreq->len); 49 rreq->start = readahead_pos(ractl); 50 rreq->len = readahead_length(ractl); 51 52 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 53 netfs_read_trace_expanded); 54 } 55 } 56 57 /* 58 * Begin an operation, and fetch the stored zero point value from the cookie if 59 * available. 60 */ 61 static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx) 62 { 63 return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx)); 64 } 65 66 /* 67 * Decant the list of folios to read into a rolling buffer. 68 */ 69 static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq, 70 struct folio_queue *folioq, 71 struct folio_batch *put_batch) 72 { 73 unsigned int order, nr; 74 size_t size = 0; 75 76 nr = __readahead_batch(rreq->ractl, (struct page **)folioq->vec.folios, 77 ARRAY_SIZE(folioq->vec.folios)); 78 folioq->vec.nr = nr; 79 for (int i = 0; i < nr; i++) { 80 struct folio *folio = folioq_folio(folioq, i); 81 82 trace_netfs_folio(folio, netfs_folio_trace_read); 83 order = folio_order(folio); 84 folioq->orders[i] = order; 85 size += PAGE_SIZE << order; 86 87 if (!folio_batch_add(put_batch, folio)) 88 folio_batch_release(put_batch); 89 } 90 91 for (int i = nr; i < folioq_nr_slots(folioq); i++) 92 folioq_clear(folioq, i); 93 94 return size; 95 } 96 97 /* 98 * netfs_prepare_read_iterator - Prepare the subreq iterator for I/O 99 * @subreq: The subrequest to be set up 100 * 101 * Prepare the I/O iterator representing the read buffer on a subrequest for 102 * the filesystem to use for I/O (it can be passed directly to a socket). This 103 * is intended to be called from the ->issue_read() method once the filesystem 104 * has trimmed the request to the size it wants. 105 * 106 * Returns the limited size if successful and -ENOMEM if insufficient memory 107 * available. 108 * 109 * [!] NOTE: This must be run in the same thread as ->issue_read() was called 110 * in as we access the readahead_control struct. 111 */ 112 static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq) 113 { 114 struct netfs_io_request *rreq = subreq->rreq; 115 size_t rsize = subreq->len; 116 117 if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER) 118 rsize = umin(rsize, rreq->io_streams[0].sreq_max_len); 119 120 if (rreq->ractl) { 121 /* If we don't have sufficient folios in the rolling buffer, 122 * extract a folioq's worth from the readahead region at a time 123 * into the buffer. Note that this acquires a ref on each page 124 * that we will need to release later - but we don't want to do 125 * that until after we've started the I/O. 126 */ 127 struct folio_batch put_batch; 128 129 folio_batch_init(&put_batch); 130 while (rreq->submitted < subreq->start + rsize) { 131 struct folio_queue *tail = rreq->buffer_tail, *new; 132 size_t added; 133 134 new = kmalloc(sizeof(*new), GFP_NOFS); 135 if (!new) 136 return -ENOMEM; 137 netfs_stat(&netfs_n_folioq); 138 folioq_init(new); 139 new->prev = tail; 140 tail->next = new; 141 rreq->buffer_tail = new; 142 added = netfs_load_buffer_from_ra(rreq, new, &put_batch); 143 rreq->iter.count += added; 144 rreq->submitted += added; 145 } 146 folio_batch_release(&put_batch); 147 } 148 149 subreq->len = rsize; 150 if (unlikely(rreq->io_streams[0].sreq_max_segs)) { 151 size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize, 152 rreq->io_streams[0].sreq_max_segs); 153 154 if (limit < rsize) { 155 subreq->len = limit; 156 trace_netfs_sreq(subreq, netfs_sreq_trace_limited); 157 } 158 } 159 160 subreq->io_iter = rreq->iter; 161 162 if (iov_iter_is_folioq(&subreq->io_iter)) { 163 if (subreq->io_iter.folioq_slot >= folioq_nr_slots(subreq->io_iter.folioq)) { 164 subreq->io_iter.folioq = subreq->io_iter.folioq->next; 165 subreq->io_iter.folioq_slot = 0; 166 } 167 subreq->curr_folioq = (struct folio_queue *)subreq->io_iter.folioq; 168 subreq->curr_folioq_slot = subreq->io_iter.folioq_slot; 169 subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot]; 170 } 171 172 iov_iter_truncate(&subreq->io_iter, subreq->len); 173 iov_iter_advance(&rreq->iter, subreq->len); 174 return subreq->len; 175 } 176 177 static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rreq, 178 struct netfs_io_subrequest *subreq, 179 loff_t i_size) 180 { 181 struct netfs_cache_resources *cres = &rreq->cache_resources; 182 183 if (!cres->ops) 184 return NETFS_DOWNLOAD_FROM_SERVER; 185 return cres->ops->prepare_read(subreq, i_size); 186 } 187 188 static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, 189 bool was_async) 190 { 191 struct netfs_io_subrequest *subreq = priv; 192 193 if (transferred_or_error < 0) { 194 netfs_read_subreq_terminated(subreq, transferred_or_error, was_async); 195 return; 196 } 197 198 if (transferred_or_error > 0) 199 subreq->transferred += transferred_or_error; 200 netfs_read_subreq_terminated(subreq, 0, was_async); 201 } 202 203 /* 204 * Issue a read against the cache. 205 * - Eats the caller's ref on subreq. 206 */ 207 static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq, 208 struct netfs_io_subrequest *subreq) 209 { 210 struct netfs_cache_resources *cres = &rreq->cache_resources; 211 212 netfs_stat(&netfs_n_rh_read); 213 cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_IGNORE, 214 netfs_cache_read_terminated, subreq); 215 } 216 217 /* 218 * Perform a read to the pagecache from a series of sources of different types, 219 * slicing up the region to be read according to available cache blocks and 220 * network rsize. 221 */ 222 static void netfs_read_to_pagecache(struct netfs_io_request *rreq) 223 { 224 struct netfs_inode *ictx = netfs_inode(rreq->inode); 225 unsigned long long start = rreq->start; 226 ssize_t size = rreq->len; 227 int ret = 0; 228 229 atomic_inc(&rreq->nr_outstanding); 230 231 do { 232 struct netfs_io_subrequest *subreq; 233 enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER; 234 ssize_t slice; 235 236 subreq = netfs_alloc_subrequest(rreq); 237 if (!subreq) { 238 ret = -ENOMEM; 239 break; 240 } 241 242 subreq->start = start; 243 subreq->len = size; 244 245 atomic_inc(&rreq->nr_outstanding); 246 spin_lock_bh(&rreq->lock); 247 list_add_tail(&subreq->rreq_link, &rreq->subrequests); 248 subreq->prev_donated = rreq->prev_donated; 249 rreq->prev_donated = 0; 250 trace_netfs_sreq(subreq, netfs_sreq_trace_added); 251 spin_unlock_bh(&rreq->lock); 252 253 source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size); 254 subreq->source = source; 255 if (source == NETFS_DOWNLOAD_FROM_SERVER) { 256 unsigned long long zp = umin(ictx->zero_point, rreq->i_size); 257 size_t len = subreq->len; 258 259 if (subreq->start >= zp) { 260 subreq->source = source = NETFS_FILL_WITH_ZEROES; 261 goto fill_with_zeroes; 262 } 263 264 if (len > zp - subreq->start) 265 len = zp - subreq->start; 266 if (len == 0) { 267 pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx", 268 rreq->debug_id, subreq->debug_index, 269 subreq->len, size, 270 subreq->start, ictx->zero_point, rreq->i_size); 271 break; 272 } 273 subreq->len = len; 274 275 netfs_stat(&netfs_n_rh_download); 276 if (rreq->netfs_ops->prepare_read) { 277 ret = rreq->netfs_ops->prepare_read(subreq); 278 if (ret < 0) { 279 atomic_dec(&rreq->nr_outstanding); 280 netfs_put_subrequest(subreq, false, 281 netfs_sreq_trace_put_cancel); 282 break; 283 } 284 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 285 } 286 287 slice = netfs_prepare_read_iterator(subreq); 288 if (slice < 0) { 289 atomic_dec(&rreq->nr_outstanding); 290 netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); 291 ret = slice; 292 break; 293 } 294 295 rreq->netfs_ops->issue_read(subreq); 296 goto done; 297 } 298 299 fill_with_zeroes: 300 if (source == NETFS_FILL_WITH_ZEROES) { 301 subreq->source = NETFS_FILL_WITH_ZEROES; 302 trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 303 netfs_stat(&netfs_n_rh_zero); 304 slice = netfs_prepare_read_iterator(subreq); 305 __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); 306 netfs_read_subreq_terminated(subreq, 0, false); 307 goto done; 308 } 309 310 if (source == NETFS_READ_FROM_CACHE) { 311 trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 312 slice = netfs_prepare_read_iterator(subreq); 313 netfs_read_cache_to_pagecache(rreq, subreq); 314 goto done; 315 } 316 317 pr_err("Unexpected read source %u\n", source); 318 WARN_ON_ONCE(1); 319 break; 320 321 done: 322 size -= slice; 323 start += slice; 324 cond_resched(); 325 } while (size > 0); 326 327 if (atomic_dec_and_test(&rreq->nr_outstanding)) 328 netfs_rreq_terminated(rreq, false); 329 330 /* Defer error return as we may need to wait for outstanding I/O. */ 331 cmpxchg(&rreq->error, 0, ret); 332 } 333 334 /* 335 * Wait for the read operation to complete, successfully or otherwise. 336 */ 337 static int netfs_wait_for_read(struct netfs_io_request *rreq) 338 { 339 int ret; 340 341 trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); 342 wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE); 343 ret = rreq->error; 344 if (ret == 0 && rreq->submitted < rreq->len) { 345 trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); 346 ret = -EIO; 347 } 348 349 return ret; 350 } 351 352 /* 353 * Set up the initial folioq of buffer folios in the rolling buffer and set the 354 * iterator to refer to it. 355 */ 356 static int netfs_prime_buffer(struct netfs_io_request *rreq) 357 { 358 struct folio_queue *folioq; 359 struct folio_batch put_batch; 360 size_t added; 361 362 folioq = kmalloc(sizeof(*folioq), GFP_KERNEL); 363 if (!folioq) 364 return -ENOMEM; 365 netfs_stat(&netfs_n_folioq); 366 folioq_init(folioq); 367 rreq->buffer = folioq; 368 rreq->buffer_tail = folioq; 369 rreq->submitted = rreq->start; 370 iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, 0); 371 372 folio_batch_init(&put_batch); 373 added = netfs_load_buffer_from_ra(rreq, folioq, &put_batch); 374 folio_batch_release(&put_batch); 375 rreq->iter.count += added; 376 rreq->submitted += added; 377 return 0; 378 } 379 380 /** 381 * netfs_readahead - Helper to manage a read request 382 * @ractl: The description of the readahead request 383 * 384 * Fulfil a readahead request by drawing data from the cache if possible, or 385 * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O 386 * requests from different sources will get munged together. If necessary, the 387 * readahead window can be expanded in either direction to a more convenient 388 * alighment for RPC efficiency or to make storage in the cache feasible. 389 * 390 * The calling netfs must initialise a netfs context contiguous to the vfs 391 * inode before calling this. 392 * 393 * This is usable whether or not caching is enabled. 394 */ 395 void netfs_readahead(struct readahead_control *ractl) 396 { 397 struct netfs_io_request *rreq; 398 struct netfs_inode *ictx = netfs_inode(ractl->mapping->host); 399 unsigned long long start = readahead_pos(ractl); 400 size_t size = readahead_length(ractl); 401 int ret; 402 403 rreq = netfs_alloc_request(ractl->mapping, ractl->file, start, size, 404 NETFS_READAHEAD); 405 if (IS_ERR(rreq)) 406 return; 407 408 ret = netfs_begin_cache_read(rreq, ictx); 409 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 410 goto cleanup_free; 411 412 netfs_stat(&netfs_n_rh_readahead); 413 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 414 netfs_read_trace_readahead); 415 416 netfs_rreq_expand(rreq, ractl); 417 418 rreq->ractl = ractl; 419 if (netfs_prime_buffer(rreq) < 0) 420 goto cleanup_free; 421 netfs_read_to_pagecache(rreq); 422 423 netfs_put_request(rreq, true, netfs_rreq_trace_put_return); 424 return; 425 426 cleanup_free: 427 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); 428 return; 429 } 430 EXPORT_SYMBOL(netfs_readahead); 431 432 /* 433 * Create a rolling buffer with a single occupying folio. 434 */ 435 static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio) 436 { 437 struct folio_queue *folioq; 438 439 folioq = kmalloc(sizeof(*folioq), GFP_KERNEL); 440 if (!folioq) 441 return -ENOMEM; 442 443 netfs_stat(&netfs_n_folioq); 444 folioq_init(folioq); 445 folioq_append(folioq, folio); 446 BUG_ON(folioq_folio(folioq, 0) != folio); 447 BUG_ON(folioq_folio_order(folioq, 0) != folio_order(folio)); 448 rreq->buffer = folioq; 449 rreq->buffer_tail = folioq; 450 rreq->submitted = rreq->start + rreq->len; 451 iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, rreq->len); 452 rreq->ractl = (struct readahead_control *)1UL; 453 return 0; 454 } 455 456 /* 457 * Read into gaps in a folio partially filled by a streaming write. 458 */ 459 static int netfs_read_gaps(struct file *file, struct folio *folio) 460 { 461 struct netfs_io_request *rreq; 462 struct address_space *mapping = folio->mapping; 463 struct netfs_folio *finfo = netfs_folio_info(folio); 464 struct netfs_inode *ctx = netfs_inode(mapping->host); 465 struct folio *sink = NULL; 466 struct bio_vec *bvec; 467 unsigned int from = finfo->dirty_offset; 468 unsigned int to = from + finfo->dirty_len; 469 unsigned int off = 0, i = 0; 470 size_t flen = folio_size(folio); 471 size_t nr_bvec = flen / PAGE_SIZE + 2; 472 size_t part; 473 int ret; 474 475 _enter("%lx", folio->index); 476 477 rreq = netfs_alloc_request(mapping, file, folio_pos(folio), flen, NETFS_READ_GAPS); 478 if (IS_ERR(rreq)) { 479 ret = PTR_ERR(rreq); 480 goto alloc_error; 481 } 482 483 ret = netfs_begin_cache_read(rreq, ctx); 484 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 485 goto discard; 486 487 netfs_stat(&netfs_n_rh_read_folio); 488 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_read_gaps); 489 490 /* Fiddle the buffer so that a gap at the beginning and/or a gap at the 491 * end get copied to, but the middle is discarded. 492 */ 493 ret = -ENOMEM; 494 bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL); 495 if (!bvec) 496 goto discard; 497 498 sink = folio_alloc(GFP_KERNEL, 0); 499 if (!sink) { 500 kfree(bvec); 501 goto discard; 502 } 503 504 trace_netfs_folio(folio, netfs_folio_trace_read_gaps); 505 506 rreq->direct_bv = bvec; 507 rreq->direct_bv_count = nr_bvec; 508 if (from > 0) { 509 bvec_set_folio(&bvec[i++], folio, from, 0); 510 off = from; 511 } 512 while (off < to) { 513 part = min_t(size_t, to - off, PAGE_SIZE); 514 bvec_set_folio(&bvec[i++], sink, part, 0); 515 off += part; 516 } 517 if (to < flen) 518 bvec_set_folio(&bvec[i++], folio, flen - to, to); 519 iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len); 520 rreq->submitted = rreq->start + flen; 521 522 netfs_read_to_pagecache(rreq); 523 524 if (sink) 525 folio_put(sink); 526 527 ret = netfs_wait_for_read(rreq); 528 if (ret == 0) { 529 flush_dcache_folio(folio); 530 folio_mark_uptodate(folio); 531 } 532 folio_unlock(folio); 533 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 534 return ret < 0 ? ret : 0; 535 536 discard: 537 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 538 alloc_error: 539 folio_unlock(folio); 540 return ret; 541 } 542 543 /** 544 * netfs_read_folio - Helper to manage a read_folio request 545 * @file: The file to read from 546 * @folio: The folio to read 547 * 548 * Fulfil a read_folio request by drawing data from the cache if 549 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 550 * Multiple I/O requests from different sources will get munged together. 551 * 552 * The calling netfs must initialise a netfs context contiguous to the vfs 553 * inode before calling this. 554 * 555 * This is usable whether or not caching is enabled. 556 */ 557 int netfs_read_folio(struct file *file, struct folio *folio) 558 { 559 struct address_space *mapping = folio->mapping; 560 struct netfs_io_request *rreq; 561 struct netfs_inode *ctx = netfs_inode(mapping->host); 562 int ret; 563 564 if (folio_test_dirty(folio)) { 565 trace_netfs_folio(folio, netfs_folio_trace_read_gaps); 566 return netfs_read_gaps(file, folio); 567 } 568 569 _enter("%lx", folio->index); 570 571 rreq = netfs_alloc_request(mapping, file, 572 folio_pos(folio), folio_size(folio), 573 NETFS_READPAGE); 574 if (IS_ERR(rreq)) { 575 ret = PTR_ERR(rreq); 576 goto alloc_error; 577 } 578 579 ret = netfs_begin_cache_read(rreq, ctx); 580 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 581 goto discard; 582 583 netfs_stat(&netfs_n_rh_read_folio); 584 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); 585 586 /* Set up the output buffer */ 587 ret = netfs_create_singular_buffer(rreq, folio); 588 if (ret < 0) 589 goto discard; 590 591 netfs_read_to_pagecache(rreq); 592 ret = netfs_wait_for_read(rreq); 593 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 594 return ret < 0 ? ret : 0; 595 596 discard: 597 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 598 alloc_error: 599 folio_unlock(folio); 600 return ret; 601 } 602 EXPORT_SYMBOL(netfs_read_folio); 603 604 /* 605 * Prepare a folio for writing without reading first 606 * @folio: The folio being prepared 607 * @pos: starting position for the write 608 * @len: length of write 609 * @always_fill: T if the folio should always be completely filled/cleared 610 * 611 * In some cases, write_begin doesn't need to read at all: 612 * - full folio write 613 * - write that lies in a folio that is completely beyond EOF 614 * - write that covers the folio from start to EOF or beyond it 615 * 616 * If any of these criteria are met, then zero out the unwritten parts 617 * of the folio and return true. Otherwise, return false. 618 */ 619 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len, 620 bool always_fill) 621 { 622 struct inode *inode = folio_inode(folio); 623 loff_t i_size = i_size_read(inode); 624 size_t offset = offset_in_folio(folio, pos); 625 size_t plen = folio_size(folio); 626 627 if (unlikely(always_fill)) { 628 if (pos - offset + len <= i_size) 629 return false; /* Page entirely before EOF */ 630 folio_zero_segment(folio, 0, plen); 631 folio_mark_uptodate(folio); 632 return true; 633 } 634 635 /* Full folio write */ 636 if (offset == 0 && len >= plen) 637 return true; 638 639 /* Page entirely beyond the end of the file */ 640 if (pos - offset >= i_size) 641 goto zero_out; 642 643 /* Write that covers from the start of the folio to EOF or beyond */ 644 if (offset == 0 && (pos + len) >= i_size) 645 goto zero_out; 646 647 return false; 648 zero_out: 649 folio_zero_segments(folio, 0, offset, offset + len, plen); 650 return true; 651 } 652 653 /** 654 * netfs_write_begin - Helper to prepare for writing [DEPRECATED] 655 * @ctx: The netfs context 656 * @file: The file to read from 657 * @mapping: The mapping to read from 658 * @pos: File position at which the write will begin 659 * @len: The length of the write (may extend beyond the end of the folio chosen) 660 * @_folio: Where to put the resultant folio 661 * @_fsdata: Place for the netfs to store a cookie 662 * 663 * Pre-read data for a write-begin request by drawing data from the cache if 664 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 665 * Multiple I/O requests from different sources will get munged together. 666 * 667 * The calling netfs must provide a table of operations, only one of which, 668 * issue_read, is mandatory. 669 * 670 * The check_write_begin() operation can be provided to check for and flush 671 * conflicting writes once the folio is grabbed and locked. It is passed a 672 * pointer to the fsdata cookie that gets returned to the VM to be passed to 673 * write_end. It is permitted to sleep. It should return 0 if the request 674 * should go ahead or it may return an error. It may also unlock and put the 675 * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0 676 * will cause the folio to be re-got and the process to be retried. 677 * 678 * The calling netfs must initialise a netfs context contiguous to the vfs 679 * inode before calling this. 680 * 681 * This is usable whether or not caching is enabled. 682 * 683 * Note that this should be considered deprecated and netfs_perform_write() 684 * used instead. 685 */ 686 int netfs_write_begin(struct netfs_inode *ctx, 687 struct file *file, struct address_space *mapping, 688 loff_t pos, unsigned int len, struct folio **_folio, 689 void **_fsdata) 690 { 691 struct netfs_io_request *rreq; 692 struct folio *folio; 693 pgoff_t index = pos >> PAGE_SHIFT; 694 int ret; 695 696 retry: 697 folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, 698 mapping_gfp_mask(mapping)); 699 if (IS_ERR(folio)) 700 return PTR_ERR(folio); 701 702 if (ctx->ops->check_write_begin) { 703 /* Allow the netfs (eg. ceph) to flush conflicts. */ 704 ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata); 705 if (ret < 0) { 706 trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); 707 goto error; 708 } 709 if (!folio) 710 goto retry; 711 } 712 713 if (folio_test_uptodate(folio)) 714 goto have_folio; 715 716 /* If the folio is beyond the EOF, we want to clear it - unless it's 717 * within the cache granule containing the EOF, in which case we need 718 * to preload the granule. 719 */ 720 if (!netfs_is_cache_enabled(ctx) && 721 netfs_skip_folio_read(folio, pos, len, false)) { 722 netfs_stat(&netfs_n_rh_write_zskip); 723 goto have_folio_no_wait; 724 } 725 726 rreq = netfs_alloc_request(mapping, file, 727 folio_pos(folio), folio_size(folio), 728 NETFS_READ_FOR_WRITE); 729 if (IS_ERR(rreq)) { 730 ret = PTR_ERR(rreq); 731 goto error; 732 } 733 rreq->no_unlock_folio = folio->index; 734 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); 735 736 ret = netfs_begin_cache_read(rreq, ctx); 737 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 738 goto error_put; 739 740 netfs_stat(&netfs_n_rh_write_begin); 741 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); 742 743 /* Set up the output buffer */ 744 ret = netfs_create_singular_buffer(rreq, folio); 745 if (ret < 0) 746 goto error_put; 747 748 netfs_read_to_pagecache(rreq); 749 ret = netfs_wait_for_read(rreq); 750 if (ret < 0) 751 goto error; 752 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 753 754 have_folio: 755 ret = folio_wait_private_2_killable(folio); 756 if (ret < 0) 757 goto error; 758 have_folio_no_wait: 759 *_folio = folio; 760 _leave(" = 0"); 761 return 0; 762 763 error_put: 764 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); 765 error: 766 if (folio) { 767 folio_unlock(folio); 768 folio_put(folio); 769 } 770 _leave(" = %d", ret); 771 return ret; 772 } 773 EXPORT_SYMBOL(netfs_write_begin); 774 775 /* 776 * Preload the data into a folio we're proposing to write into. 777 */ 778 int netfs_prefetch_for_write(struct file *file, struct folio *folio, 779 size_t offset, size_t len) 780 { 781 struct netfs_io_request *rreq; 782 struct address_space *mapping = folio->mapping; 783 struct netfs_inode *ctx = netfs_inode(mapping->host); 784 unsigned long long start = folio_pos(folio); 785 size_t flen = folio_size(folio); 786 int ret; 787 788 _enter("%zx @%llx", flen, start); 789 790 ret = -ENOMEM; 791 792 rreq = netfs_alloc_request(mapping, file, start, flen, 793 NETFS_READ_FOR_WRITE); 794 if (IS_ERR(rreq)) { 795 ret = PTR_ERR(rreq); 796 goto error; 797 } 798 799 rreq->no_unlock_folio = folio->index; 800 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); 801 ret = netfs_begin_cache_read(rreq, ctx); 802 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 803 goto error_put; 804 805 netfs_stat(&netfs_n_rh_write_begin); 806 trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write); 807 808 /* Set up the output buffer */ 809 ret = netfs_create_singular_buffer(rreq, folio); 810 if (ret < 0) 811 goto error_put; 812 813 folioq_mark2(rreq->buffer, 0); 814 netfs_read_to_pagecache(rreq); 815 ret = netfs_wait_for_read(rreq); 816 netfs_put_request(rreq, false, netfs_rreq_trace_put_return); 817 return ret; 818 819 error_put: 820 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 821 error: 822 _leave(" = %d", ret); 823 return ret; 824 } 825 826 /** 827 * netfs_buffered_read_iter - Filesystem buffered I/O read routine 828 * @iocb: kernel I/O control block 829 * @iter: destination for the data read 830 * 831 * This is the ->read_iter() routine for all filesystems that can use the page 832 * cache directly. 833 * 834 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be 835 * returned when no data can be read without waiting for I/O requests to 836 * complete; it doesn't prevent readahead. 837 * 838 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests 839 * shall be made for the read or for readahead. When no data can be read, 840 * -EAGAIN shall be returned. When readahead would be triggered, a partial, 841 * possibly empty read shall be returned. 842 * 843 * Return: 844 * * number of bytes copied, even for partial reads 845 * * negative error code (or 0 if IOCB_NOIO) if nothing was read 846 */ 847 ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter) 848 { 849 struct inode *inode = file_inode(iocb->ki_filp); 850 struct netfs_inode *ictx = netfs_inode(inode); 851 ssize_t ret; 852 853 if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) || 854 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))) 855 return -EINVAL; 856 857 ret = netfs_start_io_read(inode); 858 if (ret == 0) { 859 ret = filemap_read(iocb, iter, 0); 860 netfs_end_io_read(inode); 861 } 862 return ret; 863 } 864 EXPORT_SYMBOL(netfs_buffered_read_iter); 865 866 /** 867 * netfs_file_read_iter - Generic filesystem read routine 868 * @iocb: kernel I/O control block 869 * @iter: destination for the data read 870 * 871 * This is the ->read_iter() routine for all filesystems that can use the page 872 * cache directly. 873 * 874 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be 875 * returned when no data can be read without waiting for I/O requests to 876 * complete; it doesn't prevent readahead. 877 * 878 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests 879 * shall be made for the read or for readahead. When no data can be read, 880 * -EAGAIN shall be returned. When readahead would be triggered, a partial, 881 * possibly empty read shall be returned. 882 * 883 * Return: 884 * * number of bytes copied, even for partial reads 885 * * negative error code (or 0 if IOCB_NOIO) if nothing was read 886 */ 887 ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) 888 { 889 struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host); 890 891 if ((iocb->ki_flags & IOCB_DIRECT) || 892 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)) 893 return netfs_unbuffered_read_iter(iocb, iter); 894 895 return netfs_buffered_read_iter(iocb, iter); 896 } 897 EXPORT_SYMBOL(netfs_file_read_iter); 898