xref: /linux/fs/netfs/buffered_read.c (revision d1384f70b2e3162786bc73b8f86c27417803bd57)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Network filesystem high-level buffered read support.
3  *
4  * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/export.h>
9 #include <linux/task_io_accounting_ops.h>
10 #include "internal.h"
11 
netfs_cache_expand_readahead(struct netfs_io_request * rreq,unsigned long long * _start,unsigned long long * _len,unsigned long long i_size)12 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
13 					 unsigned long long *_start,
14 					 unsigned long long *_len,
15 					 unsigned long long i_size)
16 {
17 	struct netfs_cache_resources *cres = &rreq->cache_resources;
18 
19 	if (cres->ops && cres->ops->expand_readahead)
20 		cres->ops->expand_readahead(cres, _start, _len, i_size);
21 }
22 
netfs_rreq_expand(struct netfs_io_request * rreq,struct readahead_control * ractl)23 static void netfs_rreq_expand(struct netfs_io_request *rreq,
24 			      struct readahead_control *ractl)
25 {
26 	/* Give the cache a chance to change the request parameters.  The
27 	 * resultant request must contain the original region.
28 	 */
29 	netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
30 
31 	/* Give the netfs a chance to change the request parameters.  The
32 	 * resultant request must contain the original region.
33 	 */
34 	if (rreq->netfs_ops->expand_readahead)
35 		rreq->netfs_ops->expand_readahead(rreq);
36 
37 	/* Expand the request if the cache wants it to start earlier.  Note
38 	 * that the expansion may get further extended if the VM wishes to
39 	 * insert THPs and the preferred start and/or end wind up in the middle
40 	 * of THPs.
41 	 *
42 	 * If this is the case, however, the THP size should be an integer
43 	 * multiple of the cache granule size, so we get a whole number of
44 	 * granules to deal with.
45 	 */
46 	if (rreq->start  != readahead_pos(ractl) ||
47 	    rreq->len != readahead_length(ractl)) {
48 		readahead_expand(ractl, rreq->start, rreq->len);
49 		rreq->start  = readahead_pos(ractl);
50 		rreq->len = readahead_length(ractl);
51 
52 		trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
53 				 netfs_read_trace_expanded);
54 	}
55 }
56 
57 /*
58  * Begin an operation, and fetch the stored zero point value from the cookie if
59  * available.
60  */
netfs_begin_cache_read(struct netfs_io_request * rreq,struct netfs_inode * ctx)61 static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx)
62 {
63 	return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
64 }
65 
66 /*
67  * netfs_prepare_read_iterator - Prepare the subreq iterator for I/O
68  * @subreq: The subrequest to be set up
69  *
70  * Prepare the I/O iterator representing the read buffer on a subrequest for
71  * the filesystem to use for I/O (it can be passed directly to a socket).  This
72  * is intended to be called from the ->issue_read() method once the filesystem
73  * has trimmed the request to the size it wants.
74  *
75  * Returns the limited size if successful and -ENOMEM if insufficient memory
76  * available.
77  *
78  * [!] NOTE: This must be run in the same thread as ->issue_read() was called
79  * in as we access the readahead_control struct.
80  */
netfs_prepare_read_iterator(struct netfs_io_subrequest * subreq,struct readahead_control * ractl)81 static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq,
82 					   struct readahead_control *ractl)
83 {
84 	struct netfs_io_request *rreq = subreq->rreq;
85 	size_t rsize = subreq->len;
86 
87 	if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER)
88 		rsize = umin(rsize, rreq->io_streams[0].sreq_max_len);
89 
90 	if (ractl) {
91 		/* If we don't have sufficient folios in the rolling buffer,
92 		 * extract a folioq's worth from the readahead region at a time
93 		 * into the buffer.  Note that this acquires a ref on each page
94 		 * that we will need to release later - but we don't want to do
95 		 * that until after we've started the I/O.
96 		 */
97 		struct folio_batch put_batch;
98 
99 		folio_batch_init(&put_batch);
100 		while (rreq->submitted < subreq->start + rsize) {
101 			ssize_t added;
102 
103 			added = rolling_buffer_load_from_ra(&rreq->buffer, ractl,
104 							    &put_batch);
105 			if (added < 0)
106 				return added;
107 			rreq->submitted += added;
108 		}
109 		folio_batch_release(&put_batch);
110 	}
111 
112 	subreq->len = rsize;
113 	if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
114 		size_t limit = netfs_limit_iter(&rreq->buffer.iter, 0, rsize,
115 						rreq->io_streams[0].sreq_max_segs);
116 
117 		if (limit < rsize) {
118 			subreq->len = limit;
119 			trace_netfs_sreq(subreq, netfs_sreq_trace_limited);
120 		}
121 	}
122 
123 	subreq->io_iter	= rreq->buffer.iter;
124 
125 	iov_iter_truncate(&subreq->io_iter, subreq->len);
126 	rolling_buffer_advance(&rreq->buffer, subreq->len);
127 	return subreq->len;
128 }
129 
netfs_cache_prepare_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq,loff_t i_size)130 static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rreq,
131 						     struct netfs_io_subrequest *subreq,
132 						     loff_t i_size)
133 {
134 	struct netfs_cache_resources *cres = &rreq->cache_resources;
135 	enum netfs_io_source source;
136 
137 	if (!cres->ops)
138 		return NETFS_DOWNLOAD_FROM_SERVER;
139 	source = cres->ops->prepare_read(subreq, i_size);
140 	trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
141 	return source;
142 
143 }
144 
145 /*
146  * Issue a read against the cache.
147  * - Eats the caller's ref on subreq.
148  */
netfs_read_cache_to_pagecache(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq)149 static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
150 					  struct netfs_io_subrequest *subreq)
151 {
152 	struct netfs_cache_resources *cres = &rreq->cache_resources;
153 
154 	netfs_stat(&netfs_n_rh_read);
155 	cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_IGNORE,
156 			netfs_cache_read_terminated, subreq);
157 }
158 
netfs_queue_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq,bool last_subreq)159 static void netfs_queue_read(struct netfs_io_request *rreq,
160 			     struct netfs_io_subrequest *subreq,
161 			     bool last_subreq)
162 {
163 	struct netfs_io_stream *stream = &rreq->io_streams[0];
164 
165 	__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
166 
167 	/* We add to the end of the list whilst the collector may be walking
168 	 * the list.  The collector only goes nextwards and uses the lock to
169 	 * remove entries off of the front.
170 	 */
171 	spin_lock(&rreq->lock);
172 	list_add_tail(&subreq->rreq_link, &stream->subrequests);
173 	if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
174 		if (!stream->active) {
175 			stream->collected_to = subreq->start;
176 			/* Store list pointers before active flag */
177 			smp_store_release(&stream->active, true);
178 		}
179 	}
180 
181 	if (last_subreq) {
182 		smp_wmb(); /* Write lists before ALL_QUEUED. */
183 		set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
184 	}
185 
186 	spin_unlock(&rreq->lock);
187 }
188 
netfs_issue_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq)189 static void netfs_issue_read(struct netfs_io_request *rreq,
190 			     struct netfs_io_subrequest *subreq)
191 {
192 	switch (subreq->source) {
193 	case NETFS_DOWNLOAD_FROM_SERVER:
194 		rreq->netfs_ops->issue_read(subreq);
195 		break;
196 	case NETFS_READ_FROM_CACHE:
197 		netfs_read_cache_to_pagecache(rreq, subreq);
198 		break;
199 	default:
200 		__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
201 		subreq->error = 0;
202 		iov_iter_zero(subreq->len, &subreq->io_iter);
203 		subreq->transferred = subreq->len;
204 		netfs_read_subreq_terminated(subreq);
205 		break;
206 	}
207 }
208 
209 /*
210  * Perform a read to the pagecache from a series of sources of different types,
211  * slicing up the region to be read according to available cache blocks and
212  * network rsize.
213  */
netfs_read_to_pagecache(struct netfs_io_request * rreq,struct readahead_control * ractl)214 static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
215 				    struct readahead_control *ractl)
216 {
217 	struct netfs_inode *ictx = netfs_inode(rreq->inode);
218 	unsigned long long start = rreq->start;
219 	ssize_t size = rreq->len;
220 	int ret = 0;
221 
222 	do {
223 		struct netfs_io_subrequest *subreq;
224 		enum netfs_io_source source = NETFS_SOURCE_UNKNOWN;
225 		ssize_t slice;
226 
227 		subreq = netfs_alloc_subrequest(rreq);
228 		if (!subreq) {
229 			ret = -ENOMEM;
230 			break;
231 		}
232 
233 		subreq->start	= start;
234 		subreq->len	= size;
235 
236 		source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size);
237 		subreq->source = source;
238 		if (source == NETFS_DOWNLOAD_FROM_SERVER) {
239 			unsigned long long zp = umin(ictx->zero_point, rreq->i_size);
240 			size_t len = subreq->len;
241 
242 			if (unlikely(rreq->origin == NETFS_READ_SINGLE))
243 				zp = rreq->i_size;
244 			if (subreq->start >= zp) {
245 				subreq->source = source = NETFS_FILL_WITH_ZEROES;
246 				goto fill_with_zeroes;
247 			}
248 
249 			if (len > zp - subreq->start)
250 				len = zp - subreq->start;
251 			if (len == 0) {
252 				pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx",
253 				       rreq->debug_id, subreq->debug_index,
254 				       subreq->len, size,
255 				       subreq->start, ictx->zero_point, rreq->i_size);
256 				break;
257 			}
258 			subreq->len = len;
259 
260 			netfs_stat(&netfs_n_rh_download);
261 			if (rreq->netfs_ops->prepare_read) {
262 				ret = rreq->netfs_ops->prepare_read(subreq);
263 				if (ret < 0) {
264 					subreq->error = ret;
265 					/* Not queued - release both refs. */
266 					netfs_put_subrequest(subreq,
267 							     netfs_sreq_trace_put_cancel);
268 					netfs_put_subrequest(subreq,
269 							     netfs_sreq_trace_put_cancel);
270 					break;
271 				}
272 				trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
273 			}
274 			goto issue;
275 		}
276 
277 	fill_with_zeroes:
278 		if (source == NETFS_FILL_WITH_ZEROES) {
279 			subreq->source = NETFS_FILL_WITH_ZEROES;
280 			trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
281 			netfs_stat(&netfs_n_rh_zero);
282 			goto issue;
283 		}
284 
285 		if (source == NETFS_READ_FROM_CACHE) {
286 			trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
287 			goto issue;
288 		}
289 
290 		pr_err("Unexpected read source %u\n", source);
291 		WARN_ON_ONCE(1);
292 		break;
293 
294 	issue:
295 		slice = netfs_prepare_read_iterator(subreq, ractl);
296 		if (slice < 0) {
297 			ret = slice;
298 			subreq->error = ret;
299 			trace_netfs_sreq(subreq, netfs_sreq_trace_cancel);
300 			/* Not queued - release both refs. */
301 			netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
302 			netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
303 			break;
304 		}
305 		size -= slice;
306 		start += slice;
307 
308 		netfs_queue_read(rreq, subreq, size <= 0);
309 		netfs_issue_read(rreq, subreq);
310 		cond_resched();
311 	} while (size > 0);
312 
313 	if (unlikely(size > 0)) {
314 		smp_wmb(); /* Write lists before ALL_QUEUED. */
315 		set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
316 		netfs_wake_collector(rreq);
317 	}
318 
319 	/* Defer error return as we may need to wait for outstanding I/O. */
320 	cmpxchg(&rreq->error, 0, ret);
321 }
322 
323 /**
324  * netfs_readahead - Helper to manage a read request
325  * @ractl: The description of the readahead request
326  *
327  * Fulfil a readahead request by drawing data from the cache if possible, or
328  * the netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O
329  * requests from different sources will get munged together.  If necessary, the
330  * readahead window can be expanded in either direction to a more convenient
331  * alighment for RPC efficiency or to make storage in the cache feasible.
332  *
333  * The calling netfs must initialise a netfs context contiguous to the vfs
334  * inode before calling this.
335  *
336  * This is usable whether or not caching is enabled.
337  */
netfs_readahead(struct readahead_control * ractl)338 void netfs_readahead(struct readahead_control *ractl)
339 {
340 	struct netfs_io_request *rreq;
341 	struct netfs_inode *ictx = netfs_inode(ractl->mapping->host);
342 	unsigned long long start = readahead_pos(ractl);
343 	size_t size = readahead_length(ractl);
344 	int ret;
345 
346 	rreq = netfs_alloc_request(ractl->mapping, ractl->file, start, size,
347 				   NETFS_READAHEAD);
348 	if (IS_ERR(rreq))
349 		return;
350 
351 	__set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags);
352 
353 	ret = netfs_begin_cache_read(rreq, ictx);
354 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
355 		goto cleanup_free;
356 
357 	netfs_stat(&netfs_n_rh_readahead);
358 	trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
359 			 netfs_read_trace_readahead);
360 
361 	netfs_rreq_expand(rreq, ractl);
362 
363 	rreq->submitted = rreq->start;
364 	if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
365 		goto cleanup_free;
366 	netfs_read_to_pagecache(rreq, ractl);
367 
368 	return netfs_put_request(rreq, netfs_rreq_trace_put_return);
369 
370 cleanup_free:
371 	return netfs_put_failed_request(rreq);
372 }
373 EXPORT_SYMBOL(netfs_readahead);
374 
375 /*
376  * Create a rolling buffer with a single occupying folio.
377  */
netfs_create_singular_buffer(struct netfs_io_request * rreq,struct folio * folio,unsigned int rollbuf_flags)378 static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio,
379 					unsigned int rollbuf_flags)
380 {
381 	ssize_t added;
382 
383 	if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
384 		return -ENOMEM;
385 
386 	added = rolling_buffer_append(&rreq->buffer, folio, rollbuf_flags);
387 	if (added < 0)
388 		return added;
389 	rreq->submitted = rreq->start + added;
390 	return 0;
391 }
392 
393 /*
394  * Read into gaps in a folio partially filled by a streaming write.
395  */
netfs_read_gaps(struct file * file,struct folio * folio)396 static int netfs_read_gaps(struct file *file, struct folio *folio)
397 {
398 	struct netfs_io_request *rreq;
399 	struct address_space *mapping = folio->mapping;
400 	struct netfs_folio *finfo = netfs_folio_info(folio);
401 	struct netfs_inode *ctx = netfs_inode(mapping->host);
402 	struct folio *sink = NULL;
403 	struct bio_vec *bvec;
404 	unsigned int from = finfo->dirty_offset;
405 	unsigned int to = from + finfo->dirty_len;
406 	unsigned int off = 0, i = 0;
407 	size_t flen = folio_size(folio);
408 	size_t nr_bvec = flen / PAGE_SIZE + 2;
409 	size_t part;
410 	int ret;
411 
412 	_enter("%lx", folio->index);
413 
414 	rreq = netfs_alloc_request(mapping, file, folio_pos(folio), flen, NETFS_READ_GAPS);
415 	if (IS_ERR(rreq)) {
416 		ret = PTR_ERR(rreq);
417 		goto alloc_error;
418 	}
419 
420 	ret = netfs_begin_cache_read(rreq, ctx);
421 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
422 		goto discard;
423 
424 	netfs_stat(&netfs_n_rh_read_folio);
425 	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_read_gaps);
426 
427 	/* Fiddle the buffer so that a gap at the beginning and/or a gap at the
428 	 * end get copied to, but the middle is discarded.
429 	 */
430 	ret = -ENOMEM;
431 	bvec = kmalloc_objs(*bvec, nr_bvec);
432 	if (!bvec)
433 		goto discard;
434 
435 	sink = folio_alloc(GFP_KERNEL, 0);
436 	if (!sink) {
437 		kfree(bvec);
438 		goto discard;
439 	}
440 
441 	trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
442 
443 	rreq->direct_bv = bvec;
444 	rreq->direct_bv_count = nr_bvec;
445 	if (from > 0) {
446 		bvec_set_folio(&bvec[i++], folio, from, 0);
447 		off = from;
448 	}
449 	while (off < to) {
450 		part = min_t(size_t, to - off, PAGE_SIZE);
451 		bvec_set_folio(&bvec[i++], sink, part, 0);
452 		off += part;
453 	}
454 	if (to < flen)
455 		bvec_set_folio(&bvec[i++], folio, flen - to, to);
456 	iov_iter_bvec(&rreq->buffer.iter, ITER_DEST, bvec, i, rreq->len);
457 	rreq->submitted = rreq->start + flen;
458 
459 	netfs_read_to_pagecache(rreq, NULL);
460 
461 	if (sink)
462 		folio_put(sink);
463 
464 	ret = netfs_wait_for_read(rreq);
465 	if (ret >= 0) {
466 		flush_dcache_folio(folio);
467 		folio_mark_uptodate(folio);
468 	}
469 	folio_unlock(folio);
470 	netfs_put_request(rreq, netfs_rreq_trace_put_return);
471 	return ret < 0 ? ret : 0;
472 
473 discard:
474 	netfs_put_failed_request(rreq);
475 alloc_error:
476 	folio_unlock(folio);
477 	return ret;
478 }
479 
480 /**
481  * netfs_read_folio - Helper to manage a read_folio request
482  * @file: The file to read from
483  * @folio: The folio to read
484  *
485  * Fulfil a read_folio request by drawing data from the cache if
486  * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
487  * Multiple I/O requests from different sources will get munged together.
488  *
489  * The calling netfs must initialise a netfs context contiguous to the vfs
490  * inode before calling this.
491  *
492  * This is usable whether or not caching is enabled.
493  */
netfs_read_folio(struct file * file,struct folio * folio)494 int netfs_read_folio(struct file *file, struct folio *folio)
495 {
496 	struct address_space *mapping = folio->mapping;
497 	struct netfs_io_request *rreq;
498 	struct netfs_inode *ctx = netfs_inode(mapping->host);
499 	int ret;
500 
501 	if (folio_test_dirty(folio)) {
502 		trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
503 		return netfs_read_gaps(file, folio);
504 	}
505 
506 	_enter("%lx", folio->index);
507 
508 	rreq = netfs_alloc_request(mapping, file,
509 				   folio_pos(folio), folio_size(folio),
510 				   NETFS_READPAGE);
511 	if (IS_ERR(rreq)) {
512 		ret = PTR_ERR(rreq);
513 		goto alloc_error;
514 	}
515 
516 	ret = netfs_begin_cache_read(rreq, ctx);
517 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
518 		goto discard;
519 
520 	netfs_stat(&netfs_n_rh_read_folio);
521 	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
522 
523 	/* Set up the output buffer */
524 	ret = netfs_create_singular_buffer(rreq, folio, 0);
525 	if (ret < 0)
526 		goto discard;
527 
528 	netfs_read_to_pagecache(rreq, NULL);
529 	ret = netfs_wait_for_read(rreq);
530 	netfs_put_request(rreq, netfs_rreq_trace_put_return);
531 	return ret < 0 ? ret : 0;
532 
533 discard:
534 	netfs_put_failed_request(rreq);
535 alloc_error:
536 	folio_unlock(folio);
537 	return ret;
538 }
539 EXPORT_SYMBOL(netfs_read_folio);
540 
541 /*
542  * Prepare a folio for writing without reading first
543  * @folio: The folio being prepared
544  * @pos: starting position for the write
545  * @len: length of write
546  * @always_fill: T if the folio should always be completely filled/cleared
547  *
548  * In some cases, write_begin doesn't need to read at all:
549  * - full folio write
550  * - write that lies in a folio that is completely beyond EOF
551  * - write that covers the folio from start to EOF or beyond it
552  *
553  * If any of these criteria are met, then zero out the unwritten parts
554  * of the folio and return true. Otherwise, return false.
555  */
netfs_skip_folio_read(struct folio * folio,loff_t pos,size_t len,bool always_fill)556 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
557 				 bool always_fill)
558 {
559 	struct inode *inode = folio_inode(folio);
560 	loff_t i_size = i_size_read(inode);
561 	size_t offset = offset_in_folio(folio, pos);
562 	size_t plen = folio_size(folio);
563 
564 	if (unlikely(always_fill)) {
565 		if (pos - offset + len <= i_size)
566 			return false; /* Page entirely before EOF */
567 		folio_zero_segment(folio, 0, plen);
568 		folio_mark_uptodate(folio);
569 		return true;
570 	}
571 
572 	/* Full folio write */
573 	if (offset == 0 && len >= plen)
574 		return true;
575 
576 	/* Page entirely beyond the end of the file */
577 	if (pos - offset >= i_size)
578 		goto zero_out;
579 
580 	/* Write that covers from the start of the folio to EOF or beyond */
581 	if (offset == 0 && (pos + len) >= i_size)
582 		goto zero_out;
583 
584 	return false;
585 zero_out:
586 	folio_zero_segments(folio, 0, offset, offset + len, plen);
587 	return true;
588 }
589 
590 /**
591  * netfs_write_begin - Helper to prepare for writing [DEPRECATED]
592  * @ctx: The netfs context
593  * @file: The file to read from
594  * @mapping: The mapping to read from
595  * @pos: File position at which the write will begin
596  * @len: The length of the write (may extend beyond the end of the folio chosen)
597  * @_folio: Where to put the resultant folio
598  * @_fsdata: Place for the netfs to store a cookie
599  *
600  * Pre-read data for a write-begin request by drawing data from the cache if
601  * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
602  * Multiple I/O requests from different sources will get munged together.
603  *
604  * The calling netfs must provide a table of operations, only one of which,
605  * issue_read, is mandatory.
606  *
607  * The check_write_begin() operation can be provided to check for and flush
608  * conflicting writes once the folio is grabbed and locked.  It is passed a
609  * pointer to the fsdata cookie that gets returned to the VM to be passed to
610  * write_end.  It is permitted to sleep.  It should return 0 if the request
611  * should go ahead or it may return an error.  It may also unlock and put the
612  * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0
613  * will cause the folio to be re-got and the process to be retried.
614  *
615  * The calling netfs must initialise a netfs context contiguous to the vfs
616  * inode before calling this.
617  *
618  * This is usable whether or not caching is enabled.
619  *
620  * Note that this should be considered deprecated and netfs_perform_write()
621  * used instead.
622  */
netfs_write_begin(struct netfs_inode * ctx,struct file * file,struct address_space * mapping,loff_t pos,unsigned int len,struct folio ** _folio,void ** _fsdata)623 int netfs_write_begin(struct netfs_inode *ctx,
624 		      struct file *file, struct address_space *mapping,
625 		      loff_t pos, unsigned int len, struct folio **_folio,
626 		      void **_fsdata)
627 {
628 	struct netfs_io_request *rreq;
629 	struct folio *folio;
630 	pgoff_t index = pos >> PAGE_SHIFT;
631 	int ret;
632 
633 retry:
634 	folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
635 				    mapping_gfp_mask(mapping));
636 	if (IS_ERR(folio))
637 		return PTR_ERR(folio);
638 
639 	if (ctx->ops->check_write_begin) {
640 		/* Allow the netfs (eg. ceph) to flush conflicts. */
641 		ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata);
642 		if (ret < 0) {
643 			trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
644 			goto error;
645 		}
646 		if (!folio)
647 			goto retry;
648 	}
649 
650 	if (folio_test_uptodate(folio))
651 		goto have_folio;
652 
653 	/* If the folio is beyond the EOF, we want to clear it - unless it's
654 	 * within the cache granule containing the EOF, in which case we need
655 	 * to preload the granule.
656 	 */
657 	if (!netfs_is_cache_enabled(ctx) &&
658 	    netfs_skip_folio_read(folio, pos, len, false)) {
659 		netfs_stat(&netfs_n_rh_write_zskip);
660 		goto have_folio_no_wait;
661 	}
662 
663 	rreq = netfs_alloc_request(mapping, file,
664 				   folio_pos(folio), folio_size(folio),
665 				   NETFS_READ_FOR_WRITE);
666 	if (IS_ERR(rreq)) {
667 		ret = PTR_ERR(rreq);
668 		goto error;
669 	}
670 	rreq->no_unlock_folio	= folio->index;
671 	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
672 
673 	ret = netfs_begin_cache_read(rreq, ctx);
674 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
675 		goto error_put;
676 
677 	netfs_stat(&netfs_n_rh_write_begin);
678 	trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
679 
680 	/* Set up the output buffer */
681 	ret = netfs_create_singular_buffer(rreq, folio, 0);
682 	if (ret < 0)
683 		goto error_put;
684 
685 	netfs_read_to_pagecache(rreq, NULL);
686 	ret = netfs_wait_for_read(rreq);
687 	if (ret < 0)
688 		goto error;
689 	netfs_put_request(rreq, netfs_rreq_trace_put_return);
690 
691 have_folio:
692 	ret = folio_wait_private_2_killable(folio);
693 	if (ret < 0)
694 		goto error;
695 have_folio_no_wait:
696 	*_folio = folio;
697 	_leave(" = 0");
698 	return 0;
699 
700 error_put:
701 	netfs_put_failed_request(rreq);
702 error:
703 	if (folio) {
704 		folio_unlock(folio);
705 		folio_put(folio);
706 	}
707 	_leave(" = %d", ret);
708 	return ret;
709 }
710 EXPORT_SYMBOL(netfs_write_begin);
711 
712 /*
713  * Preload the data into a folio we're proposing to write into.
714  */
netfs_prefetch_for_write(struct file * file,struct folio * folio,size_t offset,size_t len)715 int netfs_prefetch_for_write(struct file *file, struct folio *folio,
716 			     size_t offset, size_t len)
717 {
718 	struct netfs_io_request *rreq;
719 	struct address_space *mapping = folio->mapping;
720 	struct netfs_inode *ctx = netfs_inode(mapping->host);
721 	unsigned long long start = folio_pos(folio);
722 	size_t flen = folio_size(folio);
723 	int ret;
724 
725 	_enter("%zx @%llx", flen, start);
726 
727 	ret = -ENOMEM;
728 
729 	rreq = netfs_alloc_request(mapping, file, start, flen,
730 				   NETFS_READ_FOR_WRITE);
731 	if (IS_ERR(rreq)) {
732 		ret = PTR_ERR(rreq);
733 		goto error;
734 	}
735 
736 	rreq->no_unlock_folio = folio->index;
737 	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
738 	ret = netfs_begin_cache_read(rreq, ctx);
739 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
740 		goto error_put;
741 
742 	netfs_stat(&netfs_n_rh_write_begin);
743 	trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write);
744 
745 	/* Set up the output buffer */
746 	ret = netfs_create_singular_buffer(rreq, folio, NETFS_ROLLBUF_PAGECACHE_MARK);
747 	if (ret < 0)
748 		goto error_put;
749 
750 	netfs_read_to_pagecache(rreq, NULL);
751 	ret = netfs_wait_for_read(rreq);
752 	netfs_put_request(rreq, netfs_rreq_trace_put_return);
753 	return ret < 0 ? ret : 0;
754 
755 error_put:
756 	netfs_put_failed_request(rreq);
757 error:
758 	_leave(" = %d", ret);
759 	return ret;
760 }
761 
762 /**
763  * netfs_buffered_read_iter - Filesystem buffered I/O read routine
764  * @iocb: kernel I/O control block
765  * @iter: destination for the data read
766  *
767  * This is the ->read_iter() routine for all filesystems that can use the page
768  * cache directly.
769  *
770  * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
771  * returned when no data can be read without waiting for I/O requests to
772  * complete; it doesn't prevent readahead.
773  *
774  * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
775  * shall be made for the read or for readahead.  When no data can be read,
776  * -EAGAIN shall be returned.  When readahead would be triggered, a partial,
777  * possibly empty read shall be returned.
778  *
779  * Return:
780  * * number of bytes copied, even for partial reads
781  * * negative error code (or 0 if IOCB_NOIO) if nothing was read
782  */
netfs_buffered_read_iter(struct kiocb * iocb,struct iov_iter * iter)783 ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
784 {
785 	struct inode *inode = file_inode(iocb->ki_filp);
786 	struct netfs_inode *ictx = netfs_inode(inode);
787 	ssize_t ret;
788 
789 	if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) ||
790 			 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)))
791 		return -EINVAL;
792 
793 	ret = netfs_start_io_read(inode);
794 	if (ret == 0) {
795 		ret = filemap_read(iocb, iter, 0);
796 		netfs_end_io_read(inode);
797 	}
798 	return ret;
799 }
800 EXPORT_SYMBOL(netfs_buffered_read_iter);
801 
802 /**
803  * netfs_file_read_iter - Generic filesystem read routine
804  * @iocb: kernel I/O control block
805  * @iter: destination for the data read
806  *
807  * This is the ->read_iter() routine for all filesystems that can use the page
808  * cache directly.
809  *
810  * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
811  * returned when no data can be read without waiting for I/O requests to
812  * complete; it doesn't prevent readahead.
813  *
814  * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
815  * shall be made for the read or for readahead.  When no data can be read,
816  * -EAGAIN shall be returned.  When readahead would be triggered, a partial,
817  * possibly empty read shall be returned.
818  *
819  * Return:
820  * * number of bytes copied, even for partial reads
821  * * negative error code (or 0 if IOCB_NOIO) if nothing was read
822  */
netfs_file_read_iter(struct kiocb * iocb,struct iov_iter * iter)823 ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
824 {
825 	struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host);
826 
827 	if ((iocb->ki_flags & IOCB_DIRECT) ||
828 	    test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
829 		return netfs_unbuffered_read_iter(iocb, iter);
830 
831 	return netfs_buffered_read_iter(iocb, iter);
832 }
833 EXPORT_SYMBOL(netfs_file_read_iter);
834