1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Network filesystem high-level buffered read support.
3 *
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8 #include <linux/export.h>
9 #include <linux/task_io_accounting_ops.h>
10 #include "internal.h"
11
netfs_cache_expand_readahead(struct netfs_io_request * rreq,unsigned long long * _start,unsigned long long * _len,unsigned long long i_size)12 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
13 unsigned long long *_start,
14 unsigned long long *_len,
15 unsigned long long i_size)
16 {
17 struct netfs_cache_resources *cres = &rreq->cache_resources;
18
19 if (cres->ops && cres->ops->expand_readahead)
20 cres->ops->expand_readahead(cres, _start, _len, i_size);
21 }
22
netfs_rreq_expand(struct netfs_io_request * rreq,struct readahead_control * ractl)23 static void netfs_rreq_expand(struct netfs_io_request *rreq,
24 struct readahead_control *ractl)
25 {
26 /* Give the cache a chance to change the request parameters. The
27 * resultant request must contain the original region.
28 */
29 netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
30
31 /* Give the netfs a chance to change the request parameters. The
32 * resultant request must contain the original region.
33 */
34 if (rreq->netfs_ops->expand_readahead)
35 rreq->netfs_ops->expand_readahead(rreq);
36
37 /* Expand the request if the cache wants it to start earlier. Note
38 * that the expansion may get further extended if the VM wishes to
39 * insert THPs and the preferred start and/or end wind up in the middle
40 * of THPs.
41 *
42 * If this is the case, however, the THP size should be an integer
43 * multiple of the cache granule size, so we get a whole number of
44 * granules to deal with.
45 */
46 if (rreq->start != readahead_pos(ractl) ||
47 rreq->len != readahead_length(ractl)) {
48 readahead_expand(ractl, rreq->start, rreq->len);
49 rreq->start = readahead_pos(ractl);
50 rreq->len = readahead_length(ractl);
51
52 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
53 netfs_read_trace_expanded);
54 }
55 }
56
57 /*
58 * Begin an operation, and fetch the stored zero point value from the cookie if
59 * available.
60 */
netfs_begin_cache_read(struct netfs_io_request * rreq,struct netfs_inode * ctx)61 static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx)
62 {
63 return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
64 }
65
66 /*
67 * Decant the list of folios to read into a rolling buffer.
68 */
netfs_load_buffer_from_ra(struct netfs_io_request * rreq,struct folio_queue * folioq)69 static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq,
70 struct folio_queue *folioq)
71 {
72 unsigned int order, nr;
73 size_t size = 0;
74
75 nr = __readahead_batch(rreq->ractl, (struct page **)folioq->vec.folios,
76 ARRAY_SIZE(folioq->vec.folios));
77 folioq->vec.nr = nr;
78 for (int i = 0; i < nr; i++) {
79 struct folio *folio = folioq_folio(folioq, i);
80
81 trace_netfs_folio(folio, netfs_folio_trace_read);
82 order = folio_order(folio);
83 folioq->orders[i] = order;
84 size += PAGE_SIZE << order;
85 }
86
87 for (int i = nr; i < folioq_nr_slots(folioq); i++)
88 folioq_clear(folioq, i);
89
90 return size;
91 }
92
93 /*
94 * netfs_prepare_read_iterator - Prepare the subreq iterator for I/O
95 * @subreq: The subrequest to be set up
96 *
97 * Prepare the I/O iterator representing the read buffer on a subrequest for
98 * the filesystem to use for I/O (it can be passed directly to a socket). This
99 * is intended to be called from the ->issue_read() method once the filesystem
100 * has trimmed the request to the size it wants.
101 *
102 * Returns the limited size if successful and -ENOMEM if insufficient memory
103 * available.
104 *
105 * [!] NOTE: This must be run in the same thread as ->issue_read() was called
106 * in as we access the readahead_control struct.
107 */
netfs_prepare_read_iterator(struct netfs_io_subrequest * subreq)108 static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
109 {
110 struct netfs_io_request *rreq = subreq->rreq;
111 size_t rsize = subreq->len;
112
113 if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER)
114 rsize = umin(rsize, rreq->io_streams[0].sreq_max_len);
115
116 if (rreq->ractl) {
117 /* If we don't have sufficient folios in the rolling buffer,
118 * extract a folioq's worth from the readahead region at a time
119 * into the buffer. Note that this acquires a ref on each page
120 * that we will need to release later - but we don't want to do
121 * that until after we've started the I/O.
122 */
123 while (rreq->submitted < subreq->start + rsize) {
124 struct folio_queue *tail = rreq->buffer_tail, *new;
125 size_t added;
126
127 new = kmalloc(sizeof(*new), GFP_NOFS);
128 if (!new)
129 return -ENOMEM;
130 netfs_stat(&netfs_n_folioq);
131 folioq_init(new);
132 new->prev = tail;
133 tail->next = new;
134 rreq->buffer_tail = new;
135 added = netfs_load_buffer_from_ra(rreq, new);
136 rreq->iter.count += added;
137 rreq->submitted += added;
138 }
139 }
140
141 subreq->len = rsize;
142 if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
143 size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize,
144 rreq->io_streams[0].sreq_max_segs);
145
146 if (limit < rsize) {
147 subreq->len = limit;
148 trace_netfs_sreq(subreq, netfs_sreq_trace_limited);
149 }
150 }
151
152 subreq->io_iter = rreq->iter;
153
154 if (iov_iter_is_folioq(&subreq->io_iter)) {
155 if (subreq->io_iter.folioq_slot >= folioq_nr_slots(subreq->io_iter.folioq)) {
156 subreq->io_iter.folioq = subreq->io_iter.folioq->next;
157 subreq->io_iter.folioq_slot = 0;
158 }
159 subreq->curr_folioq = (struct folio_queue *)subreq->io_iter.folioq;
160 subreq->curr_folioq_slot = subreq->io_iter.folioq_slot;
161 subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot];
162 }
163
164 iov_iter_truncate(&subreq->io_iter, subreq->len);
165 iov_iter_advance(&rreq->iter, subreq->len);
166 return subreq->len;
167 }
168
netfs_cache_prepare_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq,loff_t i_size)169 static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rreq,
170 struct netfs_io_subrequest *subreq,
171 loff_t i_size)
172 {
173 struct netfs_cache_resources *cres = &rreq->cache_resources;
174
175 if (!cres->ops)
176 return NETFS_DOWNLOAD_FROM_SERVER;
177 return cres->ops->prepare_read(subreq, i_size);
178 }
179
netfs_cache_read_terminated(void * priv,ssize_t transferred_or_error,bool was_async)180 static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error,
181 bool was_async)
182 {
183 struct netfs_io_subrequest *subreq = priv;
184
185 if (transferred_or_error < 0) {
186 netfs_read_subreq_terminated(subreq, transferred_or_error, was_async);
187 return;
188 }
189
190 if (transferred_or_error > 0)
191 subreq->transferred += transferred_or_error;
192 netfs_read_subreq_terminated(subreq, 0, was_async);
193 }
194
195 /*
196 * Issue a read against the cache.
197 * - Eats the caller's ref on subreq.
198 */
netfs_read_cache_to_pagecache(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq)199 static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
200 struct netfs_io_subrequest *subreq)
201 {
202 struct netfs_cache_resources *cres = &rreq->cache_resources;
203
204 netfs_stat(&netfs_n_rh_read);
205 cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_IGNORE,
206 netfs_cache_read_terminated, subreq);
207 }
208
209 /*
210 * Perform a read to the pagecache from a series of sources of different types,
211 * slicing up the region to be read according to available cache blocks and
212 * network rsize.
213 */
netfs_read_to_pagecache(struct netfs_io_request * rreq)214 static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
215 {
216 struct netfs_inode *ictx = netfs_inode(rreq->inode);
217 unsigned long long start = rreq->start;
218 ssize_t size = rreq->len;
219 int ret = 0;
220
221 atomic_inc(&rreq->nr_outstanding);
222
223 do {
224 struct netfs_io_subrequest *subreq;
225 enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER;
226 ssize_t slice;
227
228 subreq = netfs_alloc_subrequest(rreq);
229 if (!subreq) {
230 ret = -ENOMEM;
231 break;
232 }
233
234 subreq->start = start;
235 subreq->len = size;
236
237 atomic_inc(&rreq->nr_outstanding);
238 spin_lock_bh(&rreq->lock);
239 list_add_tail(&subreq->rreq_link, &rreq->subrequests);
240 subreq->prev_donated = rreq->prev_donated;
241 rreq->prev_donated = 0;
242 trace_netfs_sreq(subreq, netfs_sreq_trace_added);
243 spin_unlock_bh(&rreq->lock);
244
245 source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size);
246 subreq->source = source;
247 if (source == NETFS_DOWNLOAD_FROM_SERVER) {
248 unsigned long long zp = umin(ictx->zero_point, rreq->i_size);
249 size_t len = subreq->len;
250
251 if (subreq->start >= zp) {
252 subreq->source = source = NETFS_FILL_WITH_ZEROES;
253 goto fill_with_zeroes;
254 }
255
256 if (len > zp - subreq->start)
257 len = zp - subreq->start;
258 if (len == 0) {
259 pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx",
260 rreq->debug_id, subreq->debug_index,
261 subreq->len, size,
262 subreq->start, ictx->zero_point, rreq->i_size);
263 break;
264 }
265 subreq->len = len;
266
267 netfs_stat(&netfs_n_rh_download);
268 if (rreq->netfs_ops->prepare_read) {
269 ret = rreq->netfs_ops->prepare_read(subreq);
270 if (ret < 0) {
271 atomic_dec(&rreq->nr_outstanding);
272 netfs_put_subrequest(subreq, false,
273 netfs_sreq_trace_put_cancel);
274 break;
275 }
276 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
277 }
278
279 slice = netfs_prepare_read_iterator(subreq);
280 if (slice < 0) {
281 atomic_dec(&rreq->nr_outstanding);
282 netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
283 ret = slice;
284 break;
285 }
286
287 rreq->netfs_ops->issue_read(subreq);
288 goto done;
289 }
290
291 fill_with_zeroes:
292 if (source == NETFS_FILL_WITH_ZEROES) {
293 subreq->source = NETFS_FILL_WITH_ZEROES;
294 trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
295 netfs_stat(&netfs_n_rh_zero);
296 slice = netfs_prepare_read_iterator(subreq);
297 __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
298 netfs_read_subreq_terminated(subreq, 0, false);
299 goto done;
300 }
301
302 if (source == NETFS_READ_FROM_CACHE) {
303 trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
304 slice = netfs_prepare_read_iterator(subreq);
305 netfs_read_cache_to_pagecache(rreq, subreq);
306 goto done;
307 }
308
309 pr_err("Unexpected read source %u\n", source);
310 WARN_ON_ONCE(1);
311 break;
312
313 done:
314 size -= slice;
315 start += slice;
316 cond_resched();
317 } while (size > 0);
318
319 if (atomic_dec_and_test(&rreq->nr_outstanding))
320 netfs_rreq_terminated(rreq, false);
321
322 /* Defer error return as we may need to wait for outstanding I/O. */
323 cmpxchg(&rreq->error, 0, ret);
324 }
325
326 /*
327 * Wait for the read operation to complete, successfully or otherwise.
328 */
netfs_wait_for_read(struct netfs_io_request * rreq)329 static int netfs_wait_for_read(struct netfs_io_request *rreq)
330 {
331 int ret;
332
333 trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
334 wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
335 ret = rreq->error;
336 if (ret == 0 && rreq->submitted < rreq->len) {
337 trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
338 ret = -EIO;
339 }
340
341 return ret;
342 }
343
344 /*
345 * Set up the initial folioq of buffer folios in the rolling buffer and set the
346 * iterator to refer to it.
347 */
netfs_prime_buffer(struct netfs_io_request * rreq)348 static int netfs_prime_buffer(struct netfs_io_request *rreq)
349 {
350 struct folio_queue *folioq;
351 size_t added;
352
353 folioq = kmalloc(sizeof(*folioq), GFP_KERNEL);
354 if (!folioq)
355 return -ENOMEM;
356 netfs_stat(&netfs_n_folioq);
357 folioq_init(folioq);
358 rreq->buffer = folioq;
359 rreq->buffer_tail = folioq;
360 rreq->submitted = rreq->start;
361 iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, 0);
362
363 added = netfs_load_buffer_from_ra(rreq, folioq);
364 rreq->iter.count += added;
365 rreq->submitted += added;
366 return 0;
367 }
368
369 /*
370 * Drop the ref on each folio that we inherited from the VM readahead code. We
371 * still have the folio locks to pin the page until we complete the I/O.
372 *
373 * Note that we can't just release the batch in each queue struct as we use the
374 * occupancy count in other places.
375 */
netfs_put_ra_refs(struct folio_queue * folioq)376 static void netfs_put_ra_refs(struct folio_queue *folioq)
377 {
378 struct folio_batch fbatch;
379
380 folio_batch_init(&fbatch);
381 while (folioq) {
382 for (unsigned int slot = 0; slot < folioq_count(folioq); slot++) {
383 struct folio *folio = folioq_folio(folioq, slot);
384 if (!folio)
385 continue;
386 trace_netfs_folio(folio, netfs_folio_trace_read_put);
387 if (!folio_batch_add(&fbatch, folio))
388 folio_batch_release(&fbatch);
389 }
390 folioq = folioq->next;
391 }
392
393 folio_batch_release(&fbatch);
394 }
395
396 /**
397 * netfs_readahead - Helper to manage a read request
398 * @ractl: The description of the readahead request
399 *
400 * Fulfil a readahead request by drawing data from the cache if possible, or
401 * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O
402 * requests from different sources will get munged together. If necessary, the
403 * readahead window can be expanded in either direction to a more convenient
404 * alighment for RPC efficiency or to make storage in the cache feasible.
405 *
406 * The calling netfs must initialise a netfs context contiguous to the vfs
407 * inode before calling this.
408 *
409 * This is usable whether or not caching is enabled.
410 */
netfs_readahead(struct readahead_control * ractl)411 void netfs_readahead(struct readahead_control *ractl)
412 {
413 struct netfs_io_request *rreq;
414 struct netfs_inode *ictx = netfs_inode(ractl->mapping->host);
415 unsigned long long start = readahead_pos(ractl);
416 size_t size = readahead_length(ractl);
417 int ret;
418
419 rreq = netfs_alloc_request(ractl->mapping, ractl->file, start, size,
420 NETFS_READAHEAD);
421 if (IS_ERR(rreq))
422 return;
423
424 ret = netfs_begin_cache_read(rreq, ictx);
425 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
426 goto cleanup_free;
427
428 netfs_stat(&netfs_n_rh_readahead);
429 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
430 netfs_read_trace_readahead);
431
432 netfs_rreq_expand(rreq, ractl);
433
434 rreq->ractl = ractl;
435 if (netfs_prime_buffer(rreq) < 0)
436 goto cleanup_free;
437 netfs_read_to_pagecache(rreq);
438
439 /* Release the folio refs whilst we're waiting for the I/O. */
440 netfs_put_ra_refs(rreq->buffer);
441
442 netfs_put_request(rreq, true, netfs_rreq_trace_put_return);
443 return;
444
445 cleanup_free:
446 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
447 return;
448 }
449 EXPORT_SYMBOL(netfs_readahead);
450
451 /*
452 * Create a rolling buffer with a single occupying folio.
453 */
netfs_create_singular_buffer(struct netfs_io_request * rreq,struct folio * folio)454 static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio)
455 {
456 struct folio_queue *folioq;
457
458 folioq = kmalloc(sizeof(*folioq), GFP_KERNEL);
459 if (!folioq)
460 return -ENOMEM;
461
462 netfs_stat(&netfs_n_folioq);
463 folioq_init(folioq);
464 folioq_append(folioq, folio);
465 BUG_ON(folioq_folio(folioq, 0) != folio);
466 BUG_ON(folioq_folio_order(folioq, 0) != folio_order(folio));
467 rreq->buffer = folioq;
468 rreq->buffer_tail = folioq;
469 rreq->submitted = rreq->start + rreq->len;
470 iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, rreq->len);
471 rreq->ractl = (struct readahead_control *)1UL;
472 return 0;
473 }
474
475 /*
476 * Read into gaps in a folio partially filled by a streaming write.
477 */
netfs_read_gaps(struct file * file,struct folio * folio)478 static int netfs_read_gaps(struct file *file, struct folio *folio)
479 {
480 struct netfs_io_request *rreq;
481 struct address_space *mapping = folio->mapping;
482 struct netfs_folio *finfo = netfs_folio_info(folio);
483 struct netfs_inode *ctx = netfs_inode(mapping->host);
484 struct folio *sink = NULL;
485 struct bio_vec *bvec;
486 unsigned int from = finfo->dirty_offset;
487 unsigned int to = from + finfo->dirty_len;
488 unsigned int off = 0, i = 0;
489 size_t flen = folio_size(folio);
490 size_t nr_bvec = flen / PAGE_SIZE + 2;
491 size_t part;
492 int ret;
493
494 _enter("%lx", folio->index);
495
496 rreq = netfs_alloc_request(mapping, file, folio_pos(folio), flen, NETFS_READ_GAPS);
497 if (IS_ERR(rreq)) {
498 ret = PTR_ERR(rreq);
499 goto alloc_error;
500 }
501
502 ret = netfs_begin_cache_read(rreq, ctx);
503 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
504 goto discard;
505
506 netfs_stat(&netfs_n_rh_read_folio);
507 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_read_gaps);
508
509 /* Fiddle the buffer so that a gap at the beginning and/or a gap at the
510 * end get copied to, but the middle is discarded.
511 */
512 ret = -ENOMEM;
513 bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL);
514 if (!bvec)
515 goto discard;
516
517 sink = folio_alloc(GFP_KERNEL, 0);
518 if (!sink) {
519 kfree(bvec);
520 goto discard;
521 }
522
523 trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
524
525 rreq->direct_bv = bvec;
526 rreq->direct_bv_count = nr_bvec;
527 if (from > 0) {
528 bvec_set_folio(&bvec[i++], folio, from, 0);
529 off = from;
530 }
531 while (off < to) {
532 part = min_t(size_t, to - off, PAGE_SIZE);
533 bvec_set_folio(&bvec[i++], sink, part, 0);
534 off += part;
535 }
536 if (to < flen)
537 bvec_set_folio(&bvec[i++], folio, flen - to, to);
538 iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len);
539 rreq->submitted = rreq->start + flen;
540
541 netfs_read_to_pagecache(rreq);
542
543 if (sink)
544 folio_put(sink);
545
546 ret = netfs_wait_for_read(rreq);
547 if (ret == 0) {
548 flush_dcache_folio(folio);
549 folio_mark_uptodate(folio);
550 }
551 folio_unlock(folio);
552 netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
553 return ret < 0 ? ret : 0;
554
555 discard:
556 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
557 alloc_error:
558 folio_unlock(folio);
559 return ret;
560 }
561
562 /**
563 * netfs_read_folio - Helper to manage a read_folio request
564 * @file: The file to read from
565 * @folio: The folio to read
566 *
567 * Fulfil a read_folio request by drawing data from the cache if
568 * possible, or the netfs if not. Space beyond the EOF is zero-filled.
569 * Multiple I/O requests from different sources will get munged together.
570 *
571 * The calling netfs must initialise a netfs context contiguous to the vfs
572 * inode before calling this.
573 *
574 * This is usable whether or not caching is enabled.
575 */
netfs_read_folio(struct file * file,struct folio * folio)576 int netfs_read_folio(struct file *file, struct folio *folio)
577 {
578 struct address_space *mapping = folio->mapping;
579 struct netfs_io_request *rreq;
580 struct netfs_inode *ctx = netfs_inode(mapping->host);
581 int ret;
582
583 if (folio_test_dirty(folio)) {
584 trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
585 return netfs_read_gaps(file, folio);
586 }
587
588 _enter("%lx", folio->index);
589
590 rreq = netfs_alloc_request(mapping, file,
591 folio_pos(folio), folio_size(folio),
592 NETFS_READPAGE);
593 if (IS_ERR(rreq)) {
594 ret = PTR_ERR(rreq);
595 goto alloc_error;
596 }
597
598 ret = netfs_begin_cache_read(rreq, ctx);
599 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
600 goto discard;
601
602 netfs_stat(&netfs_n_rh_read_folio);
603 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
604
605 /* Set up the output buffer */
606 ret = netfs_create_singular_buffer(rreq, folio);
607 if (ret < 0)
608 goto discard;
609
610 netfs_read_to_pagecache(rreq);
611 ret = netfs_wait_for_read(rreq);
612 netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
613 return ret < 0 ? ret : 0;
614
615 discard:
616 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
617 alloc_error:
618 folio_unlock(folio);
619 return ret;
620 }
621 EXPORT_SYMBOL(netfs_read_folio);
622
623 /*
624 * Prepare a folio for writing without reading first
625 * @folio: The folio being prepared
626 * @pos: starting position for the write
627 * @len: length of write
628 * @always_fill: T if the folio should always be completely filled/cleared
629 *
630 * In some cases, write_begin doesn't need to read at all:
631 * - full folio write
632 * - write that lies in a folio that is completely beyond EOF
633 * - write that covers the folio from start to EOF or beyond it
634 *
635 * If any of these criteria are met, then zero out the unwritten parts
636 * of the folio and return true. Otherwise, return false.
637 */
netfs_skip_folio_read(struct folio * folio,loff_t pos,size_t len,bool always_fill)638 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
639 bool always_fill)
640 {
641 struct inode *inode = folio_inode(folio);
642 loff_t i_size = i_size_read(inode);
643 size_t offset = offset_in_folio(folio, pos);
644 size_t plen = folio_size(folio);
645
646 if (unlikely(always_fill)) {
647 if (pos - offset + len <= i_size)
648 return false; /* Page entirely before EOF */
649 zero_user_segment(&folio->page, 0, plen);
650 folio_mark_uptodate(folio);
651 return true;
652 }
653
654 /* Full folio write */
655 if (offset == 0 && len >= plen)
656 return true;
657
658 /* Page entirely beyond the end of the file */
659 if (pos - offset >= i_size)
660 goto zero_out;
661
662 /* Write that covers from the start of the folio to EOF or beyond */
663 if (offset == 0 && (pos + len) >= i_size)
664 goto zero_out;
665
666 return false;
667 zero_out:
668 zero_user_segments(&folio->page, 0, offset, offset + len, plen);
669 return true;
670 }
671
672 /**
673 * netfs_write_begin - Helper to prepare for writing [DEPRECATED]
674 * @ctx: The netfs context
675 * @file: The file to read from
676 * @mapping: The mapping to read from
677 * @pos: File position at which the write will begin
678 * @len: The length of the write (may extend beyond the end of the folio chosen)
679 * @_folio: Where to put the resultant folio
680 * @_fsdata: Place for the netfs to store a cookie
681 *
682 * Pre-read data for a write-begin request by drawing data from the cache if
683 * possible, or the netfs if not. Space beyond the EOF is zero-filled.
684 * Multiple I/O requests from different sources will get munged together.
685 *
686 * The calling netfs must provide a table of operations, only one of which,
687 * issue_read, is mandatory.
688 *
689 * The check_write_begin() operation can be provided to check for and flush
690 * conflicting writes once the folio is grabbed and locked. It is passed a
691 * pointer to the fsdata cookie that gets returned to the VM to be passed to
692 * write_end. It is permitted to sleep. It should return 0 if the request
693 * should go ahead or it may return an error. It may also unlock and put the
694 * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0
695 * will cause the folio to be re-got and the process to be retried.
696 *
697 * The calling netfs must initialise a netfs context contiguous to the vfs
698 * inode before calling this.
699 *
700 * This is usable whether or not caching is enabled.
701 *
702 * Note that this should be considered deprecated and netfs_perform_write()
703 * used instead.
704 */
netfs_write_begin(struct netfs_inode * ctx,struct file * file,struct address_space * mapping,loff_t pos,unsigned int len,struct folio ** _folio,void ** _fsdata)705 int netfs_write_begin(struct netfs_inode *ctx,
706 struct file *file, struct address_space *mapping,
707 loff_t pos, unsigned int len, struct folio **_folio,
708 void **_fsdata)
709 {
710 struct netfs_io_request *rreq;
711 struct folio *folio;
712 pgoff_t index = pos >> PAGE_SHIFT;
713 int ret;
714
715 retry:
716 folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
717 mapping_gfp_mask(mapping));
718 if (IS_ERR(folio))
719 return PTR_ERR(folio);
720
721 if (ctx->ops->check_write_begin) {
722 /* Allow the netfs (eg. ceph) to flush conflicts. */
723 ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata);
724 if (ret < 0) {
725 trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
726 goto error;
727 }
728 if (!folio)
729 goto retry;
730 }
731
732 if (folio_test_uptodate(folio))
733 goto have_folio;
734
735 /* If the page is beyond the EOF, we want to clear it - unless it's
736 * within the cache granule containing the EOF, in which case we need
737 * to preload the granule.
738 */
739 if (!netfs_is_cache_enabled(ctx) &&
740 netfs_skip_folio_read(folio, pos, len, false)) {
741 netfs_stat(&netfs_n_rh_write_zskip);
742 goto have_folio_no_wait;
743 }
744
745 rreq = netfs_alloc_request(mapping, file,
746 folio_pos(folio), folio_size(folio),
747 NETFS_READ_FOR_WRITE);
748 if (IS_ERR(rreq)) {
749 ret = PTR_ERR(rreq);
750 goto error;
751 }
752 rreq->no_unlock_folio = folio->index;
753 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
754
755 ret = netfs_begin_cache_read(rreq, ctx);
756 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
757 goto error_put;
758
759 netfs_stat(&netfs_n_rh_write_begin);
760 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
761
762 /* Set up the output buffer */
763 ret = netfs_create_singular_buffer(rreq, folio);
764 if (ret < 0)
765 goto error_put;
766
767 netfs_read_to_pagecache(rreq);
768 ret = netfs_wait_for_read(rreq);
769 if (ret < 0)
770 goto error;
771 netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
772
773 have_folio:
774 ret = folio_wait_private_2_killable(folio);
775 if (ret < 0)
776 goto error;
777 have_folio_no_wait:
778 *_folio = folio;
779 _leave(" = 0");
780 return 0;
781
782 error_put:
783 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
784 error:
785 if (folio) {
786 folio_unlock(folio);
787 folio_put(folio);
788 }
789 _leave(" = %d", ret);
790 return ret;
791 }
792 EXPORT_SYMBOL(netfs_write_begin);
793
794 /*
795 * Preload the data into a page we're proposing to write into.
796 */
netfs_prefetch_for_write(struct file * file,struct folio * folio,size_t offset,size_t len)797 int netfs_prefetch_for_write(struct file *file, struct folio *folio,
798 size_t offset, size_t len)
799 {
800 struct netfs_io_request *rreq;
801 struct address_space *mapping = folio->mapping;
802 struct netfs_inode *ctx = netfs_inode(mapping->host);
803 unsigned long long start = folio_pos(folio);
804 size_t flen = folio_size(folio);
805 int ret;
806
807 _enter("%zx @%llx", flen, start);
808
809 ret = -ENOMEM;
810
811 rreq = netfs_alloc_request(mapping, file, start, flen,
812 NETFS_READ_FOR_WRITE);
813 if (IS_ERR(rreq)) {
814 ret = PTR_ERR(rreq);
815 goto error;
816 }
817
818 rreq->no_unlock_folio = folio->index;
819 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
820 ret = netfs_begin_cache_read(rreq, ctx);
821 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
822 goto error_put;
823
824 netfs_stat(&netfs_n_rh_write_begin);
825 trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write);
826
827 /* Set up the output buffer */
828 ret = netfs_create_singular_buffer(rreq, folio);
829 if (ret < 0)
830 goto error_put;
831
832 folioq_mark2(rreq->buffer, 0);
833 netfs_read_to_pagecache(rreq);
834 ret = netfs_wait_for_read(rreq);
835 netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
836 return ret;
837
838 error_put:
839 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
840 error:
841 _leave(" = %d", ret);
842 return ret;
843 }
844
845 /**
846 * netfs_buffered_read_iter - Filesystem buffered I/O read routine
847 * @iocb: kernel I/O control block
848 * @iter: destination for the data read
849 *
850 * This is the ->read_iter() routine for all filesystems that can use the page
851 * cache directly.
852 *
853 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
854 * returned when no data can be read without waiting for I/O requests to
855 * complete; it doesn't prevent readahead.
856 *
857 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
858 * shall be made for the read or for readahead. When no data can be read,
859 * -EAGAIN shall be returned. When readahead would be triggered, a partial,
860 * possibly empty read shall be returned.
861 *
862 * Return:
863 * * number of bytes copied, even for partial reads
864 * * negative error code (or 0 if IOCB_NOIO) if nothing was read
865 */
netfs_buffered_read_iter(struct kiocb * iocb,struct iov_iter * iter)866 ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
867 {
868 struct inode *inode = file_inode(iocb->ki_filp);
869 struct netfs_inode *ictx = netfs_inode(inode);
870 ssize_t ret;
871
872 if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) ||
873 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)))
874 return -EINVAL;
875
876 ret = netfs_start_io_read(inode);
877 if (ret == 0) {
878 ret = filemap_read(iocb, iter, 0);
879 netfs_end_io_read(inode);
880 }
881 return ret;
882 }
883 EXPORT_SYMBOL(netfs_buffered_read_iter);
884
885 /**
886 * netfs_file_read_iter - Generic filesystem read routine
887 * @iocb: kernel I/O control block
888 * @iter: destination for the data read
889 *
890 * This is the ->read_iter() routine for all filesystems that can use the page
891 * cache directly.
892 *
893 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
894 * returned when no data can be read without waiting for I/O requests to
895 * complete; it doesn't prevent readahead.
896 *
897 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
898 * shall be made for the read or for readahead. When no data can be read,
899 * -EAGAIN shall be returned. When readahead would be triggered, a partial,
900 * possibly empty read shall be returned.
901 *
902 * Return:
903 * * number of bytes copied, even for partial reads
904 * * negative error code (or 0 if IOCB_NOIO) if nothing was read
905 */
netfs_file_read_iter(struct kiocb * iocb,struct iov_iter * iter)906 ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
907 {
908 struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host);
909
910 if ((iocb->ki_flags & IOCB_DIRECT) ||
911 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
912 return netfs_unbuffered_read_iter(iocb, iter);
913
914 return netfs_buffered_read_iter(iocb, iter);
915 }
916 EXPORT_SYMBOL(netfs_file_read_iter);
917