xref: /linux/fs/netfs/buffered_read.c (revision bf36793fa260cb68cc817f311f1f683788261796)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Network filesystem high-level buffered read support.
3  *
4  * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/export.h>
9 #include <linux/task_io_accounting_ops.h>
10 #include "internal.h"
11 
12 /*
13  * [DEPRECATED] Unlock the folios in a read operation for when the filesystem
14  * is using PG_private_2 and direct writing to the cache from here rather than
15  * marking the page for writeback.
16  *
17  * Note that we don't touch folio->private in this code.
18  */
19 static void netfs_rreq_unlock_folios_pgpriv2(struct netfs_io_request *rreq,
20 					     size_t *account)
21 {
22 	struct netfs_io_subrequest *subreq;
23 	struct folio *folio;
24 	pgoff_t start_page = rreq->start / PAGE_SIZE;
25 	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
26 	bool subreq_failed = false;
27 
28 	XA_STATE(xas, &rreq->mapping->i_pages, start_page);
29 
30 	/* Walk through the pagecache and the I/O request lists simultaneously.
31 	 * We may have a mixture of cached and uncached sections and we only
32 	 * really want to write out the uncached sections.  This is slightly
33 	 * complicated by the possibility that we might have huge pages with a
34 	 * mixture inside.
35 	 */
36 	subreq = list_first_entry(&rreq->subrequests,
37 				  struct netfs_io_subrequest, rreq_link);
38 	subreq_failed = (subreq->error < 0);
39 
40 	trace_netfs_rreq(rreq, netfs_rreq_trace_unlock_pgpriv2);
41 
42 	rcu_read_lock();
43 	xas_for_each(&xas, folio, last_page) {
44 		loff_t pg_end;
45 		bool pg_failed = false;
46 		bool folio_started = false;
47 
48 		if (xas_retry(&xas, folio))
49 			continue;
50 
51 		pg_end = folio_pos(folio) + folio_size(folio) - 1;
52 
53 		for (;;) {
54 			loff_t sreq_end;
55 
56 			if (!subreq) {
57 				pg_failed = true;
58 				break;
59 			}
60 
61 			if (!folio_started &&
62 			    test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags) &&
63 			    fscache_operation_valid(&rreq->cache_resources)) {
64 				trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
65 				folio_start_private_2(folio);
66 				folio_started = true;
67 			}
68 
69 			pg_failed |= subreq_failed;
70 			sreq_end = subreq->start + subreq->len - 1;
71 			if (pg_end < sreq_end)
72 				break;
73 
74 			*account += subreq->transferred;
75 			if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
76 				subreq = list_next_entry(subreq, rreq_link);
77 				subreq_failed = (subreq->error < 0);
78 			} else {
79 				subreq = NULL;
80 				subreq_failed = false;
81 			}
82 
83 			if (pg_end == sreq_end)
84 				break;
85 		}
86 
87 		if (!pg_failed) {
88 			flush_dcache_folio(folio);
89 			folio_mark_uptodate(folio);
90 		}
91 
92 		if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
93 			if (folio->index == rreq->no_unlock_folio &&
94 			    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
95 				_debug("no unlock");
96 			else
97 				folio_unlock(folio);
98 		}
99 	}
100 	rcu_read_unlock();
101 }
102 
103 /*
104  * Unlock the folios in a read operation.  We need to set PG_writeback on any
105  * folios we're going to write back before we unlock them.
106  *
107  * Note that if the deprecated NETFS_RREQ_USE_PGPRIV2 is set then we use
108  * PG_private_2 and do a direct write to the cache from here instead.
109  */
110 void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
111 {
112 	struct netfs_io_subrequest *subreq;
113 	struct netfs_folio *finfo;
114 	struct folio *folio;
115 	pgoff_t start_page = rreq->start / PAGE_SIZE;
116 	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
117 	size_t account = 0;
118 	bool subreq_failed = false;
119 
120 	XA_STATE(xas, &rreq->mapping->i_pages, start_page);
121 
122 	if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) {
123 		__clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags);
124 		list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
125 			__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
126 		}
127 	}
128 
129 	/* Handle deprecated PG_private_2 case. */
130 	if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
131 		netfs_rreq_unlock_folios_pgpriv2(rreq, &account);
132 		goto out;
133 	}
134 
135 	/* Walk through the pagecache and the I/O request lists simultaneously.
136 	 * We may have a mixture of cached and uncached sections and we only
137 	 * really want to write out the uncached sections.  This is slightly
138 	 * complicated by the possibility that we might have huge pages with a
139 	 * mixture inside.
140 	 */
141 	subreq = list_first_entry(&rreq->subrequests,
142 				  struct netfs_io_subrequest, rreq_link);
143 	subreq_failed = (subreq->error < 0);
144 
145 	trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);
146 
147 	rcu_read_lock();
148 	xas_for_each(&xas, folio, last_page) {
149 		loff_t pg_end;
150 		bool pg_failed = false;
151 		bool wback_to_cache = false;
152 
153 		if (xas_retry(&xas, folio))
154 			continue;
155 
156 		pg_end = folio_pos(folio) + folio_size(folio) - 1;
157 
158 		for (;;) {
159 			loff_t sreq_end;
160 
161 			if (!subreq) {
162 				pg_failed = true;
163 				break;
164 			}
165 
166 			wback_to_cache |= test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
167 			pg_failed |= subreq_failed;
168 			sreq_end = subreq->start + subreq->len - 1;
169 			if (pg_end < sreq_end)
170 				break;
171 
172 			account += subreq->transferred;
173 			if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
174 				subreq = list_next_entry(subreq, rreq_link);
175 				subreq_failed = (subreq->error < 0);
176 			} else {
177 				subreq = NULL;
178 				subreq_failed = false;
179 			}
180 
181 			if (pg_end == sreq_end)
182 				break;
183 		}
184 
185 		if (!pg_failed) {
186 			flush_dcache_folio(folio);
187 			finfo = netfs_folio_info(folio);
188 			if (finfo) {
189 				trace_netfs_folio(folio, netfs_folio_trace_filled_gaps);
190 				if (finfo->netfs_group)
191 					folio_change_private(folio, finfo->netfs_group);
192 				else
193 					folio_detach_private(folio);
194 				kfree(finfo);
195 			}
196 			folio_mark_uptodate(folio);
197 			if (wback_to_cache && !WARN_ON_ONCE(folio_get_private(folio) != NULL)) {
198 				trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
199 				folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE);
200 				filemap_dirty_folio(folio->mapping, folio);
201 			}
202 		}
203 
204 		if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
205 			if (folio->index == rreq->no_unlock_folio &&
206 			    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
207 				_debug("no unlock");
208 			else
209 				folio_unlock(folio);
210 		}
211 	}
212 	rcu_read_unlock();
213 
214 out:
215 	task_io_account_read(account);
216 	if (rreq->netfs_ops->done)
217 		rreq->netfs_ops->done(rreq);
218 }
219 
220 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
221 					 unsigned long long *_start,
222 					 unsigned long long *_len,
223 					 unsigned long long i_size)
224 {
225 	struct netfs_cache_resources *cres = &rreq->cache_resources;
226 
227 	if (cres->ops && cres->ops->expand_readahead)
228 		cres->ops->expand_readahead(cres, _start, _len, i_size);
229 }
230 
231 static void netfs_rreq_expand(struct netfs_io_request *rreq,
232 			      struct readahead_control *ractl)
233 {
234 	/* Give the cache a chance to change the request parameters.  The
235 	 * resultant request must contain the original region.
236 	 */
237 	netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
238 
239 	/* Give the netfs a chance to change the request parameters.  The
240 	 * resultant request must contain the original region.
241 	 */
242 	if (rreq->netfs_ops->expand_readahead)
243 		rreq->netfs_ops->expand_readahead(rreq);
244 
245 	/* Expand the request if the cache wants it to start earlier.  Note
246 	 * that the expansion may get further extended if the VM wishes to
247 	 * insert THPs and the preferred start and/or end wind up in the middle
248 	 * of THPs.
249 	 *
250 	 * If this is the case, however, the THP size should be an integer
251 	 * multiple of the cache granule size, so we get a whole number of
252 	 * granules to deal with.
253 	 */
254 	if (rreq->start  != readahead_pos(ractl) ||
255 	    rreq->len != readahead_length(ractl)) {
256 		readahead_expand(ractl, rreq->start, rreq->len);
257 		rreq->start  = readahead_pos(ractl);
258 		rreq->len = readahead_length(ractl);
259 
260 		trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
261 				 netfs_read_trace_expanded);
262 	}
263 }
264 
265 /*
266  * Begin an operation, and fetch the stored zero point value from the cookie if
267  * available.
268  */
269 static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx)
270 {
271 	return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
272 }
273 
274 /**
275  * netfs_readahead - Helper to manage a read request
276  * @ractl: The description of the readahead request
277  *
278  * Fulfil a readahead request by drawing data from the cache if possible, or
279  * the netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O
280  * requests from different sources will get munged together.  If necessary, the
281  * readahead window can be expanded in either direction to a more convenient
282  * alighment for RPC efficiency or to make storage in the cache feasible.
283  *
284  * The calling netfs must initialise a netfs context contiguous to the vfs
285  * inode before calling this.
286  *
287  * This is usable whether or not caching is enabled.
288  */
289 void netfs_readahead(struct readahead_control *ractl)
290 {
291 	struct netfs_io_request *rreq;
292 	struct netfs_inode *ctx = netfs_inode(ractl->mapping->host);
293 	int ret;
294 
295 	_enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
296 
297 	if (readahead_count(ractl) == 0)
298 		return;
299 
300 	rreq = netfs_alloc_request(ractl->mapping, ractl->file,
301 				   readahead_pos(ractl),
302 				   readahead_length(ractl),
303 				   NETFS_READAHEAD);
304 	if (IS_ERR(rreq))
305 		return;
306 
307 	ret = netfs_begin_cache_read(rreq, ctx);
308 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
309 		goto cleanup_free;
310 
311 	netfs_stat(&netfs_n_rh_readahead);
312 	trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
313 			 netfs_read_trace_readahead);
314 
315 	netfs_rreq_expand(rreq, ractl);
316 
317 	/* Set up the output buffer */
318 	iov_iter_xarray(&rreq->iter, ITER_DEST, &ractl->mapping->i_pages,
319 			rreq->start, rreq->len);
320 
321 	/* Drop the refs on the folios here rather than in the cache or
322 	 * filesystem.  The locks will be dropped in netfs_rreq_unlock().
323 	 */
324 	while (readahead_folio(ractl))
325 		;
326 
327 	netfs_begin_read(rreq, false);
328 	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
329 	return;
330 
331 cleanup_free:
332 	netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
333 	return;
334 }
335 EXPORT_SYMBOL(netfs_readahead);
336 
337 /**
338  * netfs_read_folio - Helper to manage a read_folio request
339  * @file: The file to read from
340  * @folio: The folio to read
341  *
342  * Fulfil a read_folio request by drawing data from the cache if
343  * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
344  * Multiple I/O requests from different sources will get munged together.
345  *
346  * The calling netfs must initialise a netfs context contiguous to the vfs
347  * inode before calling this.
348  *
349  * This is usable whether or not caching is enabled.
350  */
351 int netfs_read_folio(struct file *file, struct folio *folio)
352 {
353 	struct address_space *mapping = folio->mapping;
354 	struct netfs_io_request *rreq;
355 	struct netfs_inode *ctx = netfs_inode(mapping->host);
356 	struct folio *sink = NULL;
357 	int ret;
358 
359 	_enter("%lx", folio->index);
360 
361 	rreq = netfs_alloc_request(mapping, file,
362 				   folio_pos(folio), folio_size(folio),
363 				   NETFS_READPAGE);
364 	if (IS_ERR(rreq)) {
365 		ret = PTR_ERR(rreq);
366 		goto alloc_error;
367 	}
368 
369 	ret = netfs_begin_cache_read(rreq, ctx);
370 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
371 		goto discard;
372 
373 	netfs_stat(&netfs_n_rh_read_folio);
374 	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
375 
376 	/* Set up the output buffer */
377 	if (folio_test_dirty(folio)) {
378 		/* Handle someone trying to read from an unflushed streaming
379 		 * write.  We fiddle the buffer so that a gap at the beginning
380 		 * and/or a gap at the end get copied to, but the middle is
381 		 * discarded.
382 		 */
383 		struct netfs_folio *finfo = netfs_folio_info(folio);
384 		struct bio_vec *bvec;
385 		unsigned int from = finfo->dirty_offset;
386 		unsigned int to = from + finfo->dirty_len;
387 		unsigned int off = 0, i = 0;
388 		size_t flen = folio_size(folio);
389 		size_t nr_bvec = flen / PAGE_SIZE + 2;
390 		size_t part;
391 
392 		ret = -ENOMEM;
393 		bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL);
394 		if (!bvec)
395 			goto discard;
396 
397 		sink = folio_alloc(GFP_KERNEL, 0);
398 		if (!sink)
399 			goto discard;
400 
401 		trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
402 
403 		rreq->direct_bv = bvec;
404 		rreq->direct_bv_count = nr_bvec;
405 		if (from > 0) {
406 			bvec_set_folio(&bvec[i++], folio, from, 0);
407 			off = from;
408 		}
409 		while (off < to) {
410 			part = min_t(size_t, to - off, PAGE_SIZE);
411 			bvec_set_folio(&bvec[i++], sink, part, 0);
412 			off += part;
413 		}
414 		if (to < flen)
415 			bvec_set_folio(&bvec[i++], folio, flen - to, to);
416 		iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len);
417 	} else {
418 		iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages,
419 				rreq->start, rreq->len);
420 	}
421 
422 	ret = netfs_begin_read(rreq, true);
423 	if (sink)
424 		folio_put(sink);
425 	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
426 	return ret < 0 ? ret : 0;
427 
428 discard:
429 	netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
430 alloc_error:
431 	folio_unlock(folio);
432 	return ret;
433 }
434 EXPORT_SYMBOL(netfs_read_folio);
435 
436 /*
437  * Prepare a folio for writing without reading first
438  * @folio: The folio being prepared
439  * @pos: starting position for the write
440  * @len: length of write
441  * @always_fill: T if the folio should always be completely filled/cleared
442  *
443  * In some cases, write_begin doesn't need to read at all:
444  * - full folio write
445  * - write that lies in a folio that is completely beyond EOF
446  * - write that covers the folio from start to EOF or beyond it
447  *
448  * If any of these criteria are met, then zero out the unwritten parts
449  * of the folio and return true. Otherwise, return false.
450  */
451 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
452 				 bool always_fill)
453 {
454 	struct inode *inode = folio_inode(folio);
455 	loff_t i_size = i_size_read(inode);
456 	size_t offset = offset_in_folio(folio, pos);
457 	size_t plen = folio_size(folio);
458 
459 	if (unlikely(always_fill)) {
460 		if (pos - offset + len <= i_size)
461 			return false; /* Page entirely before EOF */
462 		zero_user_segment(&folio->page, 0, plen);
463 		folio_mark_uptodate(folio);
464 		return true;
465 	}
466 
467 	/* Full folio write */
468 	if (offset == 0 && len >= plen)
469 		return true;
470 
471 	/* Page entirely beyond the end of the file */
472 	if (pos - offset >= i_size)
473 		goto zero_out;
474 
475 	/* Write that covers from the start of the folio to EOF or beyond */
476 	if (offset == 0 && (pos + len) >= i_size)
477 		goto zero_out;
478 
479 	return false;
480 zero_out:
481 	zero_user_segments(&folio->page, 0, offset, offset + len, plen);
482 	return true;
483 }
484 
485 /**
486  * netfs_write_begin - Helper to prepare for writing [DEPRECATED]
487  * @ctx: The netfs context
488  * @file: The file to read from
489  * @mapping: The mapping to read from
490  * @pos: File position at which the write will begin
491  * @len: The length of the write (may extend beyond the end of the folio chosen)
492  * @_folio: Where to put the resultant folio
493  * @_fsdata: Place for the netfs to store a cookie
494  *
495  * Pre-read data for a write-begin request by drawing data from the cache if
496  * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
497  * Multiple I/O requests from different sources will get munged together.  If
498  * necessary, the readahead window can be expanded in either direction to a
499  * more convenient alighment for RPC efficiency or to make storage in the cache
500  * feasible.
501  *
502  * The calling netfs must provide a table of operations, only one of which,
503  * issue_op, is mandatory.
504  *
505  * The check_write_begin() operation can be provided to check for and flush
506  * conflicting writes once the folio is grabbed and locked.  It is passed a
507  * pointer to the fsdata cookie that gets returned to the VM to be passed to
508  * write_end.  It is permitted to sleep.  It should return 0 if the request
509  * should go ahead or it may return an error.  It may also unlock and put the
510  * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0
511  * will cause the folio to be re-got and the process to be retried.
512  *
513  * The calling netfs must initialise a netfs context contiguous to the vfs
514  * inode before calling this.
515  *
516  * This is usable whether or not caching is enabled.
517  *
518  * Note that this should be considered deprecated and netfs_perform_write()
519  * used instead.
520  */
521 int netfs_write_begin(struct netfs_inode *ctx,
522 		      struct file *file, struct address_space *mapping,
523 		      loff_t pos, unsigned int len, struct folio **_folio,
524 		      void **_fsdata)
525 {
526 	struct netfs_io_request *rreq;
527 	struct folio *folio;
528 	pgoff_t index = pos >> PAGE_SHIFT;
529 	int ret;
530 
531 	DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
532 
533 retry:
534 	folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
535 				    mapping_gfp_mask(mapping));
536 	if (IS_ERR(folio))
537 		return PTR_ERR(folio);
538 
539 	if (ctx->ops->check_write_begin) {
540 		/* Allow the netfs (eg. ceph) to flush conflicts. */
541 		ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata);
542 		if (ret < 0) {
543 			trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
544 			goto error;
545 		}
546 		if (!folio)
547 			goto retry;
548 	}
549 
550 	if (folio_test_uptodate(folio))
551 		goto have_folio;
552 
553 	/* If the page is beyond the EOF, we want to clear it - unless it's
554 	 * within the cache granule containing the EOF, in which case we need
555 	 * to preload the granule.
556 	 */
557 	if (!netfs_is_cache_enabled(ctx) &&
558 	    netfs_skip_folio_read(folio, pos, len, false)) {
559 		netfs_stat(&netfs_n_rh_write_zskip);
560 		goto have_folio_no_wait;
561 	}
562 
563 	rreq = netfs_alloc_request(mapping, file,
564 				   folio_pos(folio), folio_size(folio),
565 				   NETFS_READ_FOR_WRITE);
566 	if (IS_ERR(rreq)) {
567 		ret = PTR_ERR(rreq);
568 		goto error;
569 	}
570 	rreq->no_unlock_folio	= folio->index;
571 	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
572 
573 	ret = netfs_begin_cache_read(rreq, ctx);
574 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
575 		goto error_put;
576 
577 	netfs_stat(&netfs_n_rh_write_begin);
578 	trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
579 
580 	/* Expand the request to meet caching requirements and download
581 	 * preferences.
582 	 */
583 	ractl._nr_pages = folio_nr_pages(folio);
584 	netfs_rreq_expand(rreq, &ractl);
585 
586 	/* Set up the output buffer */
587 	iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages,
588 			rreq->start, rreq->len);
589 
590 	/* We hold the folio locks, so we can drop the references */
591 	folio_get(folio);
592 	while (readahead_folio(&ractl))
593 		;
594 
595 	ret = netfs_begin_read(rreq, true);
596 	if (ret < 0)
597 		goto error;
598 	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
599 
600 have_folio:
601 	ret = folio_wait_private_2_killable(folio);
602 	if (ret < 0)
603 		goto error;
604 have_folio_no_wait:
605 	*_folio = folio;
606 	_leave(" = 0");
607 	return 0;
608 
609 error_put:
610 	netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
611 error:
612 	if (folio) {
613 		folio_unlock(folio);
614 		folio_put(folio);
615 	}
616 	_leave(" = %d", ret);
617 	return ret;
618 }
619 EXPORT_SYMBOL(netfs_write_begin);
620 
621 /*
622  * Preload the data into a page we're proposing to write into.
623  */
624 int netfs_prefetch_for_write(struct file *file, struct folio *folio,
625 			     size_t offset, size_t len)
626 {
627 	struct netfs_io_request *rreq;
628 	struct address_space *mapping = folio->mapping;
629 	struct netfs_inode *ctx = netfs_inode(mapping->host);
630 	unsigned long long start = folio_pos(folio);
631 	size_t flen = folio_size(folio);
632 	int ret;
633 
634 	_enter("%zx @%llx", flen, start);
635 
636 	ret = -ENOMEM;
637 
638 	rreq = netfs_alloc_request(mapping, file, start, flen,
639 				   NETFS_READ_FOR_WRITE);
640 	if (IS_ERR(rreq)) {
641 		ret = PTR_ERR(rreq);
642 		goto error;
643 	}
644 
645 	rreq->no_unlock_folio = folio->index;
646 	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
647 	ret = netfs_begin_cache_read(rreq, ctx);
648 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
649 		goto error_put;
650 
651 	netfs_stat(&netfs_n_rh_write_begin);
652 	trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write);
653 
654 	/* Set up the output buffer */
655 	iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages,
656 			rreq->start, rreq->len);
657 
658 	ret = netfs_begin_read(rreq, true);
659 	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
660 	return ret;
661 
662 error_put:
663 	netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
664 error:
665 	_leave(" = %d", ret);
666 	return ret;
667 }
668 
669 /**
670  * netfs_buffered_read_iter - Filesystem buffered I/O read routine
671  * @iocb: kernel I/O control block
672  * @iter: destination for the data read
673  *
674  * This is the ->read_iter() routine for all filesystems that can use the page
675  * cache directly.
676  *
677  * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
678  * returned when no data can be read without waiting for I/O requests to
679  * complete; it doesn't prevent readahead.
680  *
681  * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
682  * shall be made for the read or for readahead.  When no data can be read,
683  * -EAGAIN shall be returned.  When readahead would be triggered, a partial,
684  * possibly empty read shall be returned.
685  *
686  * Return:
687  * * number of bytes copied, even for partial reads
688  * * negative error code (or 0 if IOCB_NOIO) if nothing was read
689  */
690 ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
691 {
692 	struct inode *inode = file_inode(iocb->ki_filp);
693 	struct netfs_inode *ictx = netfs_inode(inode);
694 	ssize_t ret;
695 
696 	if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) ||
697 			 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)))
698 		return -EINVAL;
699 
700 	ret = netfs_start_io_read(inode);
701 	if (ret == 0) {
702 		ret = filemap_read(iocb, iter, 0);
703 		netfs_end_io_read(inode);
704 	}
705 	return ret;
706 }
707 EXPORT_SYMBOL(netfs_buffered_read_iter);
708 
709 /**
710  * netfs_file_read_iter - Generic filesystem read routine
711  * @iocb: kernel I/O control block
712  * @iter: destination for the data read
713  *
714  * This is the ->read_iter() routine for all filesystems that can use the page
715  * cache directly.
716  *
717  * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
718  * returned when no data can be read without waiting for I/O requests to
719  * complete; it doesn't prevent readahead.
720  *
721  * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
722  * shall be made for the read or for readahead.  When no data can be read,
723  * -EAGAIN shall be returned.  When readahead would be triggered, a partial,
724  * possibly empty read shall be returned.
725  *
726  * Return:
727  * * number of bytes copied, even for partial reads
728  * * negative error code (or 0 if IOCB_NOIO) if nothing was read
729  */
730 ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
731 {
732 	struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host);
733 
734 	if ((iocb->ki_flags & IOCB_DIRECT) ||
735 	    test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
736 		return netfs_unbuffered_read_iter(iocb, iter);
737 
738 	return netfs_buffered_read_iter(iocb, iter);
739 }
740 EXPORT_SYMBOL(netfs_file_read_iter);
741