xref: /linux/fs/netfs/buffered_read.c (revision 5dfa01ef37a8b944773aef8dee747cd76dec4234)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Network filesystem high-level buffered read support.
3  *
4  * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/export.h>
9 #include <linux/task_io_accounting_ops.h>
10 #include "internal.h"
11 
netfs_cache_expand_readahead(struct netfs_io_request * rreq,unsigned long long * _start,unsigned long long * _len,unsigned long long i_size)12 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
13 					 unsigned long long *_start,
14 					 unsigned long long *_len,
15 					 unsigned long long i_size)
16 {
17 	struct netfs_cache_resources *cres = &rreq->cache_resources;
18 
19 	if (cres->ops && cres->ops->expand_readahead)
20 		cres->ops->expand_readahead(cres, _start, _len, i_size);
21 }
22 
netfs_rreq_expand(struct netfs_io_request * rreq,struct readahead_control * ractl)23 static void netfs_rreq_expand(struct netfs_io_request *rreq,
24 			      struct readahead_control *ractl)
25 {
26 	/* Give the cache a chance to change the request parameters.  The
27 	 * resultant request must contain the original region.
28 	 */
29 	netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
30 
31 	/* Give the netfs a chance to change the request parameters.  The
32 	 * resultant request must contain the original region.
33 	 */
34 	if (rreq->netfs_ops->expand_readahead)
35 		rreq->netfs_ops->expand_readahead(rreq);
36 
37 	/* Expand the request if the cache wants it to start earlier.  Note
38 	 * that the expansion may get further extended if the VM wishes to
39 	 * insert THPs and the preferred start and/or end wind up in the middle
40 	 * of THPs.
41 	 *
42 	 * If this is the case, however, the THP size should be an integer
43 	 * multiple of the cache granule size, so we get a whole number of
44 	 * granules to deal with.
45 	 */
46 	if (rreq->start  != readahead_pos(ractl) ||
47 	    rreq->len != readahead_length(ractl)) {
48 		readahead_expand(ractl, rreq->start, rreq->len);
49 		rreq->start  = readahead_pos(ractl);
50 		rreq->len = readahead_length(ractl);
51 
52 		trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
53 				 netfs_read_trace_expanded);
54 	}
55 }
56 
57 /*
58  * Begin an operation, and fetch the stored zero point value from the cookie if
59  * available.
60  */
netfs_begin_cache_read(struct netfs_io_request * rreq,struct netfs_inode * ctx)61 static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx)
62 {
63 	return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
64 }
65 
66 /*
67  * netfs_prepare_read_iterator - Prepare the subreq iterator for I/O
68  * @subreq: The subrequest to be set up
69  *
70  * Prepare the I/O iterator representing the read buffer on a subrequest for
71  * the filesystem to use for I/O (it can be passed directly to a socket).  This
72  * is intended to be called from the ->issue_read() method once the filesystem
73  * has trimmed the request to the size it wants.
74  *
75  * Returns the limited size if successful and -ENOMEM if insufficient memory
76  * available.
77  *
78  * [!] NOTE: This must be run in the same thread as ->issue_read() was called
79  * in as we access the readahead_control struct.
80  */
netfs_prepare_read_iterator(struct netfs_io_subrequest * subreq,struct readahead_control * ractl)81 static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq,
82 					   struct readahead_control *ractl)
83 {
84 	struct netfs_io_request *rreq = subreq->rreq;
85 	size_t rsize = subreq->len;
86 
87 	if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER)
88 		rsize = umin(rsize, rreq->io_streams[0].sreq_max_len);
89 
90 	if (ractl) {
91 		/* If we don't have sufficient folios in the rolling buffer,
92 		 * extract a folioq's worth from the readahead region at a time
93 		 * into the buffer.  Note that this acquires a ref on each page
94 		 * that we will need to release later - but we don't want to do
95 		 * that until after we've started the I/O.
96 		 */
97 		struct folio_batch put_batch;
98 
99 		folio_batch_init(&put_batch);
100 		while (rreq->submitted < subreq->start + rsize) {
101 			ssize_t added;
102 
103 			added = rolling_buffer_load_from_ra(&rreq->buffer, ractl,
104 							    &put_batch);
105 			if (added < 0)
106 				return added;
107 			rreq->submitted += added;
108 		}
109 		folio_batch_release(&put_batch);
110 	}
111 
112 	subreq->len = rsize;
113 	if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
114 		size_t limit = netfs_limit_iter(&rreq->buffer.iter, 0, rsize,
115 						rreq->io_streams[0].sreq_max_segs);
116 
117 		if (limit < rsize) {
118 			subreq->len = limit;
119 			trace_netfs_sreq(subreq, netfs_sreq_trace_limited);
120 		}
121 	}
122 
123 	subreq->io_iter	= rreq->buffer.iter;
124 
125 	iov_iter_truncate(&subreq->io_iter, subreq->len);
126 	rolling_buffer_advance(&rreq->buffer, subreq->len);
127 	return subreq->len;
128 }
129 
netfs_cache_prepare_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq,loff_t i_size)130 static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rreq,
131 						     struct netfs_io_subrequest *subreq,
132 						     loff_t i_size)
133 {
134 	struct netfs_cache_resources *cres = &rreq->cache_resources;
135 	enum netfs_io_source source;
136 
137 	if (!cres->ops)
138 		return NETFS_DOWNLOAD_FROM_SERVER;
139 	source = cres->ops->prepare_read(subreq, i_size);
140 	trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
141 	return source;
142 
143 }
144 
145 /*
146  * Issue a read against the cache.
147  * - Eats the caller's ref on subreq.
148  */
netfs_read_cache_to_pagecache(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq)149 static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
150 					  struct netfs_io_subrequest *subreq)
151 {
152 	struct netfs_cache_resources *cres = &rreq->cache_resources;
153 
154 	netfs_stat(&netfs_n_rh_read);
155 	cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_IGNORE,
156 			netfs_cache_read_terminated, subreq);
157 }
158 
netfs_queue_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq)159 void netfs_queue_read(struct netfs_io_request *rreq,
160 		      struct netfs_io_subrequest *subreq)
161 {
162 	struct netfs_io_stream *stream = &rreq->io_streams[0];
163 
164 	__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
165 
166 	/* We add to the end of the list whilst the collector may be walking
167 	 * the list.  The collector only goes nextwards and uses the lock to
168 	 * remove entries off of the front.
169 	 */
170 	spin_lock(&rreq->lock);
171 	/* Write IN_PROGRESS before pointer to new subreq */
172 	list_add_tail_release(&subreq->rreq_link, &stream->subrequests);
173 	if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
174 		if (!stream->active) {
175 			stream->collected_to = subreq->start;
176 			/* Store list pointers before active flag */
177 			smp_store_release(&stream->active, true);
178 		}
179 	}
180 
181 	spin_unlock(&rreq->lock);
182 }
183 
netfs_issue_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq)184 static void netfs_issue_read(struct netfs_io_request *rreq,
185 			     struct netfs_io_subrequest *subreq)
186 {
187 	switch (subreq->source) {
188 	case NETFS_DOWNLOAD_FROM_SERVER:
189 		rreq->netfs_ops->issue_read(subreq);
190 		break;
191 	case NETFS_READ_FROM_CACHE:
192 		netfs_read_cache_to_pagecache(rreq, subreq);
193 		break;
194 	default:
195 		__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
196 		subreq->error = 0;
197 		iov_iter_zero(subreq->len, &subreq->io_iter);
198 		subreq->transferred = subreq->len;
199 		netfs_read_subreq_terminated(subreq);
200 		break;
201 	}
202 }
203 
204 /*
205  * Perform a read to the pagecache from a series of sources of different types,
206  * slicing up the region to be read according to available cache blocks and
207  * network rsize.
208  */
netfs_read_to_pagecache(struct netfs_io_request * rreq,struct readahead_control * ractl)209 static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
210 				    struct readahead_control *ractl)
211 {
212 	unsigned long long start = rreq->start;
213 	ssize_t size = rreq->len;
214 	int ret = 0;
215 
216 	do {
217 		struct netfs_io_subrequest *subreq;
218 		enum netfs_io_source source = NETFS_SOURCE_UNKNOWN;
219 		ssize_t slice;
220 
221 		subreq = netfs_alloc_subrequest(rreq);
222 		if (!subreq) {
223 			ret = -ENOMEM;
224 			break;
225 		}
226 
227 		subreq->start	= start;
228 		subreq->len	= size;
229 
230 		netfs_queue_read(rreq, subreq);
231 
232 		source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size);
233 		subreq->source = source;
234 		if (source == NETFS_DOWNLOAD_FROM_SERVER) {
235 			unsigned long long zero_point = netfs_read_zero_point(rreq->inode);
236 			unsigned long long zp = umin(zero_point, rreq->i_size);
237 			size_t len = subreq->len;
238 
239 			if (unlikely(rreq->origin == NETFS_READ_SINGLE))
240 				zp = rreq->i_size;
241 			if (subreq->start >= zp) {
242 				subreq->source = source = NETFS_FILL_WITH_ZEROES;
243 				goto fill_with_zeroes;
244 			}
245 
246 			if (len > zp - subreq->start)
247 				len = zp - subreq->start;
248 			if (len == 0) {
249 				pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx",
250 				       rreq->debug_id, subreq->debug_index,
251 				       subreq->len, size,
252 				       subreq->start, zero_point, rreq->i_size);
253 				netfs_cancel_read(subreq, ret);
254 				break;
255 			}
256 			subreq->len = len;
257 
258 			netfs_stat(&netfs_n_rh_download);
259 			if (rreq->netfs_ops->prepare_read) {
260 				ret = rreq->netfs_ops->prepare_read(subreq);
261 				if (ret < 0) {
262 					netfs_cancel_read(subreq, ret);
263 					break;
264 				}
265 				trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
266 			}
267 			goto issue;
268 		}
269 
270 	fill_with_zeroes:
271 		if (source == NETFS_FILL_WITH_ZEROES) {
272 			subreq->source = NETFS_FILL_WITH_ZEROES;
273 			trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
274 			netfs_stat(&netfs_n_rh_zero);
275 			goto issue;
276 		}
277 
278 		if (source == NETFS_READ_FROM_CACHE) {
279 			trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
280 			goto issue;
281 		}
282 
283 		pr_err("Unexpected read source %u\n", source);
284 		WARN_ON_ONCE(1);
285 		netfs_cancel_read(subreq, ret);
286 		break;
287 
288 	issue:
289 		slice = netfs_prepare_read_iterator(subreq, ractl);
290 		if (slice < 0) {
291 			ret = slice;
292 			netfs_cancel_read(subreq, ret);
293 			break;
294 		}
295 		start += slice;
296 		size -= slice;
297 		if (size <= 0) {
298 			smp_wmb(); /* Write lists before ALL_QUEUED. */
299 			set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
300 		}
301 
302 		netfs_issue_read(rreq, subreq);
303 
304 		if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
305 			netfs_wait_for_paused_read(rreq);
306 		if (test_bit(NETFS_RREQ_FAILED, &rreq->flags))
307 			break;
308 		cond_resched();
309 	} while (size > 0);
310 
311 	if (unlikely(size > 0)) {
312 		smp_wmb(); /* Write lists before ALL_QUEUED. */
313 		set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
314 		netfs_wake_collector(rreq);
315 	}
316 
317 	/* Defer error return as we may need to wait for outstanding I/O. */
318 	cmpxchg(&rreq->error, 0, ret);
319 }
320 
321 /**
322  * netfs_readahead - Helper to manage a read request
323  * @ractl: The description of the readahead request
324  *
325  * Fulfil a readahead request by drawing data from the cache if possible, or
326  * the netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O
327  * requests from different sources will get munged together.  If necessary, the
328  * readahead window can be expanded in either direction to a more convenient
329  * alighment for RPC efficiency or to make storage in the cache feasible.
330  *
331  * The calling netfs must initialise a netfs context contiguous to the vfs
332  * inode before calling this.
333  *
334  * This is usable whether or not caching is enabled.
335  */
netfs_readahead(struct readahead_control * ractl)336 void netfs_readahead(struct readahead_control *ractl)
337 {
338 	struct netfs_io_request *rreq;
339 	struct netfs_inode *ictx = netfs_inode(ractl->mapping->host);
340 	unsigned long long start = readahead_pos(ractl);
341 	size_t size = readahead_length(ractl);
342 	int ret;
343 
344 	rreq = netfs_alloc_request(ractl->mapping, ractl->file, start, size,
345 				   NETFS_READAHEAD);
346 	if (IS_ERR(rreq))
347 		return;
348 
349 	__set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags);
350 
351 	ret = netfs_begin_cache_read(rreq, ictx);
352 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
353 		goto cleanup_free;
354 
355 	netfs_stat(&netfs_n_rh_readahead);
356 	trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
357 			 netfs_read_trace_readahead);
358 
359 	netfs_rreq_expand(rreq, ractl);
360 
361 	rreq->submitted = rreq->start;
362 	if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
363 		goto cleanup_free;
364 	netfs_read_to_pagecache(rreq, ractl);
365 
366 	return netfs_put_request(rreq, netfs_rreq_trace_put_return);
367 
368 cleanup_free:
369 	return netfs_put_failed_request(rreq);
370 }
371 EXPORT_SYMBOL(netfs_readahead);
372 
373 /*
374  * Create a rolling buffer with a single occupying folio.
375  */
netfs_create_singular_buffer(struct netfs_io_request * rreq,struct folio * folio,unsigned int rollbuf_flags)376 static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio,
377 					unsigned int rollbuf_flags)
378 {
379 	ssize_t added;
380 
381 	if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
382 		return -ENOMEM;
383 
384 	added = rolling_buffer_append(&rreq->buffer, folio, rollbuf_flags);
385 	if (added < 0)
386 		return added;
387 	rreq->submitted = rreq->start + added;
388 	return 0;
389 }
390 
391 /*
392  * Read into gaps in a folio partially filled by a streaming write.
393  */
netfs_read_gaps(struct file * file,struct folio * folio)394 static int netfs_read_gaps(struct file *file, struct folio *folio)
395 {
396 	struct netfs_io_request *rreq;
397 	struct address_space *mapping = folio->mapping;
398 	struct netfs_group *group = netfs_folio_group(folio);
399 	struct netfs_folio *finfo = netfs_folio_info(folio);
400 	struct netfs_inode *ctx = netfs_inode(mapping->host);
401 	struct folio *sink = NULL;
402 	struct bio_vec *bvec;
403 	unsigned int from = finfo->dirty_offset;
404 	unsigned int to = from + finfo->dirty_len;
405 	unsigned int off = 0, i = 0;
406 	size_t flen = folio_size(folio);
407 	size_t nr_bvec = flen / PAGE_SIZE + 2;
408 	size_t part;
409 	int ret;
410 
411 	_enter("%lx", folio->index);
412 
413 	rreq = netfs_alloc_request(mapping, file, folio_pos(folio), flen, NETFS_READ_GAPS);
414 	if (IS_ERR(rreq)) {
415 		ret = PTR_ERR(rreq);
416 		goto alloc_error;
417 	}
418 
419 	ret = netfs_begin_cache_read(rreq, ctx);
420 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
421 		goto discard;
422 
423 	netfs_stat(&netfs_n_rh_read_folio);
424 	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_read_gaps);
425 
426 	/* Fiddle the buffer so that a gap at the beginning and/or a gap at the
427 	 * end get copied to, but the middle is discarded.
428 	 */
429 	ret = -ENOMEM;
430 	bvec = kmalloc_objs(*bvec, nr_bvec);
431 	if (!bvec)
432 		goto discard;
433 
434 	sink = folio_alloc(GFP_KERNEL, 0);
435 	if (!sink) {
436 		kfree(bvec);
437 		goto discard;
438 	}
439 
440 	trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
441 
442 	rreq->direct_bv = bvec;
443 	rreq->direct_bv_count = nr_bvec;
444 	if (from > 0) {
445 		bvec_set_folio(&bvec[i++], folio, from, 0);
446 		off = from;
447 	}
448 	while (off < to) {
449 		part = min_t(size_t, to - off, PAGE_SIZE);
450 		bvec_set_folio(&bvec[i++], sink, part, 0);
451 		off += part;
452 	}
453 	if (to < flen)
454 		bvec_set_folio(&bvec[i++], folio, flen - to, to);
455 	iov_iter_bvec(&rreq->buffer.iter, ITER_DEST, bvec, i, rreq->len);
456 	rreq->submitted = rreq->start + flen;
457 
458 	netfs_read_to_pagecache(rreq, NULL);
459 
460 	ret = netfs_wait_for_read(rreq);
461 	if (ret >= 0) {
462 		if (group)
463 			folio_change_private(folio, group);
464 		else
465 			folio_detach_private(folio);
466 		kfree(finfo);
467 		trace_netfs_folio(folio, netfs_folio_trace_filled_gaps);
468 		flush_dcache_folio(folio);
469 		folio_mark_uptodate(folio);
470 	}
471 
472 	if (sink)
473 		folio_put(sink);
474 	folio_unlock(folio);
475 	netfs_put_request(rreq, netfs_rreq_trace_put_return);
476 	return ret < 0 ? ret : 0;
477 
478 discard:
479 	netfs_put_failed_request(rreq);
480 alloc_error:
481 	folio_unlock(folio);
482 	return ret;
483 }
484 
485 /**
486  * netfs_read_folio - Helper to manage a read_folio request
487  * @file: The file to read from
488  * @folio: The folio to read
489  *
490  * Fulfil a read_folio request by drawing data from the cache if
491  * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
492  * Multiple I/O requests from different sources will get munged together.
493  *
494  * The calling netfs must initialise a netfs context contiguous to the vfs
495  * inode before calling this.
496  *
497  * This is usable whether or not caching is enabled.
498  */
netfs_read_folio(struct file * file,struct folio * folio)499 int netfs_read_folio(struct file *file, struct folio *folio)
500 {
501 	struct address_space *mapping = folio->mapping;
502 	struct netfs_io_request *rreq;
503 	struct netfs_inode *ctx = netfs_inode(mapping->host);
504 	int ret;
505 
506 	folio_wait_writeback(folio);
507 
508 	if (folio_test_dirty(folio))
509 		return netfs_read_gaps(file, folio);
510 
511 	_enter("%lx", folio->index);
512 
513 	rreq = netfs_alloc_request(mapping, file,
514 				   folio_pos(folio), folio_size(folio),
515 				   NETFS_READPAGE);
516 	if (IS_ERR(rreq)) {
517 		ret = PTR_ERR(rreq);
518 		goto alloc_error;
519 	}
520 
521 	ret = netfs_begin_cache_read(rreq, ctx);
522 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
523 		goto discard;
524 
525 	netfs_stat(&netfs_n_rh_read_folio);
526 	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
527 
528 	/* Set up the output buffer */
529 	ret = netfs_create_singular_buffer(rreq, folio, 0);
530 	if (ret < 0)
531 		goto discard;
532 
533 	netfs_read_to_pagecache(rreq, NULL);
534 	ret = netfs_wait_for_read(rreq);
535 	netfs_put_request(rreq, netfs_rreq_trace_put_return);
536 	return ret < 0 ? ret : 0;
537 
538 discard:
539 	netfs_put_failed_request(rreq);
540 alloc_error:
541 	folio_unlock(folio);
542 	return ret;
543 }
544 EXPORT_SYMBOL(netfs_read_folio);
545 
546 /*
547  * Prepare a folio for writing without reading first
548  * @folio: The folio being prepared
549  * @pos: starting position for the write
550  * @len: length of write
551  * @always_fill: T if the folio should always be completely filled/cleared
552  *
553  * In some cases, write_begin doesn't need to read at all:
554  * - full folio write
555  * - write that lies in a folio that is completely beyond EOF
556  * - write that covers the folio from start to EOF or beyond it
557  *
558  * If any of these criteria are met, then zero out the unwritten parts
559  * of the folio and return true. Otherwise, return false.
560  */
netfs_skip_folio_read(struct folio * folio,loff_t pos,size_t len,bool always_fill)561 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
562 				 bool always_fill)
563 {
564 	struct inode *inode = folio_inode(folio);
565 	loff_t i_size = i_size_read(inode);
566 	size_t offset = offset_in_folio(folio, pos);
567 	size_t plen = folio_size(folio);
568 
569 	if (unlikely(always_fill)) {
570 		if (pos - offset + len <= i_size)
571 			return false; /* Page entirely before EOF */
572 		folio_zero_segment(folio, 0, plen);
573 		folio_mark_uptodate(folio);
574 		return true;
575 	}
576 
577 	/* Full folio write */
578 	if (offset == 0 && len >= plen)
579 		return true;
580 
581 	/* Page entirely beyond the end of the file */
582 	if (pos - offset >= i_size)
583 		goto zero_out;
584 
585 	/* Write that covers from the start of the folio to EOF or beyond */
586 	if (offset == 0 && (pos + len) >= i_size)
587 		goto zero_out;
588 
589 	return false;
590 zero_out:
591 	folio_zero_segments(folio, 0, offset, offset + len, plen);
592 	return true;
593 }
594 
595 /**
596  * netfs_write_begin - Helper to prepare for writing [DEPRECATED]
597  * @ctx: The netfs context
598  * @file: The file to read from
599  * @mapping: The mapping to read from
600  * @pos: File position at which the write will begin
601  * @len: The length of the write (may extend beyond the end of the folio chosen)
602  * @_folio: Where to put the resultant folio
603  * @_fsdata: Place for the netfs to store a cookie
604  *
605  * Pre-read data for a write-begin request by drawing data from the cache if
606  * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
607  * Multiple I/O requests from different sources will get munged together.
608  *
609  * The calling netfs must provide a table of operations, only one of which,
610  * issue_read, is mandatory.
611  *
612  * The check_write_begin() operation can be provided to check for and flush
613  * conflicting writes once the folio is grabbed and locked.  It is passed a
614  * pointer to the fsdata cookie that gets returned to the VM to be passed to
615  * write_end.  It is permitted to sleep.  It should return 0 if the request
616  * should go ahead or it may return an error.  It may also unlock and put the
617  * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0
618  * will cause the folio to be re-got and the process to be retried.
619  *
620  * The calling netfs must initialise a netfs context contiguous to the vfs
621  * inode before calling this.
622  *
623  * This is usable whether or not caching is enabled.
624  *
625  * Note that this should be considered deprecated and netfs_perform_write()
626  * used instead.
627  */
netfs_write_begin(struct netfs_inode * ctx,struct file * file,struct address_space * mapping,loff_t pos,unsigned int len,struct folio ** _folio,void ** _fsdata)628 int netfs_write_begin(struct netfs_inode *ctx,
629 		      struct file *file, struct address_space *mapping,
630 		      loff_t pos, unsigned int len, struct folio **_folio,
631 		      void **_fsdata)
632 {
633 	struct netfs_io_request *rreq;
634 	struct folio *folio;
635 	pgoff_t index = pos >> PAGE_SHIFT;
636 	int ret;
637 
638 retry:
639 	folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
640 				    mapping_gfp_mask(mapping));
641 	if (IS_ERR(folio))
642 		return PTR_ERR(folio);
643 
644 	if (ctx->ops->check_write_begin) {
645 		/* Allow the netfs (eg. ceph) to flush conflicts. */
646 		ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata);
647 		if (ret < 0) {
648 			trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
649 			goto error;
650 		}
651 		if (!folio)
652 			goto retry;
653 	}
654 
655 	if (folio_test_uptodate(folio))
656 		goto have_folio;
657 
658 	/* If the folio is beyond the EOF, we want to clear it - unless it's
659 	 * within the cache granule containing the EOF, in which case we need
660 	 * to preload the granule.
661 	 */
662 	if (!netfs_is_cache_enabled(ctx) &&
663 	    netfs_skip_folio_read(folio, pos, len, false)) {
664 		netfs_stat(&netfs_n_rh_write_zskip);
665 		goto have_folio_no_wait;
666 	}
667 
668 	rreq = netfs_alloc_request(mapping, file,
669 				   folio_pos(folio), folio_size(folio),
670 				   NETFS_READ_FOR_WRITE);
671 	if (IS_ERR(rreq)) {
672 		ret = PTR_ERR(rreq);
673 		goto error;
674 	}
675 	rreq->no_unlock_folio	= folio;
676 	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
677 
678 	ret = netfs_begin_cache_read(rreq, ctx);
679 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
680 		goto error_put;
681 
682 	netfs_stat(&netfs_n_rh_write_begin);
683 	trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
684 
685 	/* Set up the output buffer */
686 	ret = netfs_create_singular_buffer(rreq, folio, 0);
687 	if (ret < 0)
688 		goto error_put;
689 
690 	netfs_read_to_pagecache(rreq, NULL);
691 	ret = netfs_wait_for_read(rreq);
692 	netfs_put_request(rreq, netfs_rreq_trace_put_return);
693 	if (ret < 0)
694 		goto error;
695 
696 have_folio:
697 	ret = folio_wait_private_2_killable(folio);
698 	if (ret < 0)
699 		goto error;
700 have_folio_no_wait:
701 	*_folio = folio;
702 	_leave(" = 0");
703 	return 0;
704 
705 error_put:
706 	netfs_put_failed_request(rreq);
707 error:
708 	if (folio) {
709 		folio_unlock(folio);
710 		folio_put(folio);
711 	}
712 	_leave(" = %d", ret);
713 	return ret;
714 }
715 EXPORT_SYMBOL(netfs_write_begin);
716 
717 /*
718  * Preload the data into a folio we're proposing to write into.
719  */
netfs_prefetch_for_write(struct file * file,struct folio * folio,size_t offset,size_t len)720 int netfs_prefetch_for_write(struct file *file, struct folio *folio,
721 			     size_t offset, size_t len)
722 {
723 	struct netfs_io_request *rreq;
724 	struct address_space *mapping = folio->mapping;
725 	struct netfs_inode *ctx = netfs_inode(mapping->host);
726 	unsigned long long start = folio_pos(folio);
727 	size_t flen = folio_size(folio);
728 	int ret;
729 
730 	_enter("%zx @%llx", flen, start);
731 
732 	ret = -ENOMEM;
733 
734 	rreq = netfs_alloc_request(mapping, file, start, flen,
735 				   NETFS_READ_FOR_WRITE);
736 	if (IS_ERR(rreq)) {
737 		ret = PTR_ERR(rreq);
738 		goto error;
739 	}
740 
741 	rreq->no_unlock_folio = folio;
742 	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
743 	ret = netfs_begin_cache_read(rreq, ctx);
744 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
745 		goto error_put;
746 
747 	netfs_stat(&netfs_n_rh_write_begin);
748 	trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write);
749 
750 	/* Set up the output buffer */
751 	ret = netfs_create_singular_buffer(rreq, folio, NETFS_ROLLBUF_PAGECACHE_MARK);
752 	if (ret < 0)
753 		goto error_put;
754 
755 	netfs_read_to_pagecache(rreq, NULL);
756 	ret = netfs_wait_for_read(rreq);
757 	netfs_put_request(rreq, netfs_rreq_trace_put_return);
758 	return ret < 0 ? ret : 0;
759 
760 error_put:
761 	netfs_put_failed_request(rreq);
762 error:
763 	_leave(" = %d", ret);
764 	return ret;
765 }
766 
767 /**
768  * netfs_buffered_read_iter - Filesystem buffered I/O read routine
769  * @iocb: kernel I/O control block
770  * @iter: destination for the data read
771  *
772  * This is the ->read_iter() routine for all filesystems that can use the page
773  * cache directly.
774  *
775  * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
776  * returned when no data can be read without waiting for I/O requests to
777  * complete; it doesn't prevent readahead.
778  *
779  * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
780  * shall be made for the read or for readahead.  When no data can be read,
781  * -EAGAIN shall be returned.  When readahead would be triggered, a partial,
782  * possibly empty read shall be returned.
783  *
784  * Return:
785  * * number of bytes copied, even for partial reads
786  * * negative error code (or 0 if IOCB_NOIO) if nothing was read
787  */
netfs_buffered_read_iter(struct kiocb * iocb,struct iov_iter * iter)788 ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
789 {
790 	struct inode *inode = file_inode(iocb->ki_filp);
791 	struct netfs_inode *ictx = netfs_inode(inode);
792 	ssize_t ret;
793 
794 	if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) ||
795 			 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)))
796 		return -EINVAL;
797 
798 	ret = netfs_start_io_read(inode);
799 	if (ret == 0) {
800 		ret = filemap_read(iocb, iter, 0);
801 		netfs_end_io_read(inode);
802 	}
803 	return ret;
804 }
805 EXPORT_SYMBOL(netfs_buffered_read_iter);
806 
807 /**
808  * netfs_file_read_iter - Generic filesystem read routine
809  * @iocb: kernel I/O control block
810  * @iter: destination for the data read
811  *
812  * This is the ->read_iter() routine for all filesystems that can use the page
813  * cache directly.
814  *
815  * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
816  * returned when no data can be read without waiting for I/O requests to
817  * complete; it doesn't prevent readahead.
818  *
819  * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
820  * shall be made for the read or for readahead.  When no data can be read,
821  * -EAGAIN shall be returned.  When readahead would be triggered, a partial,
822  * possibly empty read shall be returned.
823  *
824  * Return:
825  * * number of bytes copied, even for partial reads
826  * * negative error code (or 0 if IOCB_NOIO) if nothing was read
827  */
netfs_file_read_iter(struct kiocb * iocb,struct iov_iter * iter)828 ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
829 {
830 	struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host);
831 
832 	if ((iocb->ki_flags & IOCB_DIRECT) ||
833 	    test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
834 		return netfs_unbuffered_read_iter(iocb, iter);
835 
836 	return netfs_buffered_read_iter(iocb, iter);
837 }
838 EXPORT_SYMBOL(netfs_file_read_iter);
839