1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Network filesystem high-level buffered read support.
3 *
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8 #include <linux/export.h>
9 #include <linux/task_io_accounting_ops.h>
10 #include "internal.h"
11
netfs_cache_expand_readahead(struct netfs_io_request * rreq,unsigned long long * _start,unsigned long long * _len,unsigned long long i_size)12 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
13 unsigned long long *_start,
14 unsigned long long *_len,
15 unsigned long long i_size)
16 {
17 struct netfs_cache_resources *cres = &rreq->cache_resources;
18
19 if (cres->ops && cres->ops->expand_readahead)
20 cres->ops->expand_readahead(cres, _start, _len, i_size);
21 }
22
netfs_rreq_expand(struct netfs_io_request * rreq,struct readahead_control * ractl)23 static void netfs_rreq_expand(struct netfs_io_request *rreq,
24 struct readahead_control *ractl)
25 {
26 /* Give the cache a chance to change the request parameters. The
27 * resultant request must contain the original region.
28 */
29 netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
30
31 /* Give the netfs a chance to change the request parameters. The
32 * resultant request must contain the original region.
33 */
34 if (rreq->netfs_ops->expand_readahead)
35 rreq->netfs_ops->expand_readahead(rreq);
36
37 /* Expand the request if the cache wants it to start earlier. Note
38 * that the expansion may get further extended if the VM wishes to
39 * insert THPs and the preferred start and/or end wind up in the middle
40 * of THPs.
41 *
42 * If this is the case, however, the THP size should be an integer
43 * multiple of the cache granule size, so we get a whole number of
44 * granules to deal with.
45 */
46 if (rreq->start != readahead_pos(ractl) ||
47 rreq->len != readahead_length(ractl)) {
48 readahead_expand(ractl, rreq->start, rreq->len);
49 rreq->start = readahead_pos(ractl);
50 rreq->len = readahead_length(ractl);
51
52 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
53 netfs_read_trace_expanded);
54 }
55 }
56
57 /*
58 * Begin an operation, and fetch the stored zero point value from the cookie if
59 * available.
60 */
netfs_begin_cache_read(struct netfs_io_request * rreq,struct netfs_inode * ctx)61 static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx)
62 {
63 return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
64 }
65
66 /*
67 * netfs_prepare_read_iterator - Prepare the subreq iterator for I/O
68 * @subreq: The subrequest to be set up
69 *
70 * Prepare the I/O iterator representing the read buffer on a subrequest for
71 * the filesystem to use for I/O (it can be passed directly to a socket). This
72 * is intended to be called from the ->issue_read() method once the filesystem
73 * has trimmed the request to the size it wants.
74 *
75 * Returns the limited size if successful and -ENOMEM if insufficient memory
76 * available.
77 *
78 * [!] NOTE: This must be run in the same thread as ->issue_read() was called
79 * in as we access the readahead_control struct.
80 */
netfs_prepare_read_iterator(struct netfs_io_subrequest * subreq,struct readahead_control * ractl)81 static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq,
82 struct readahead_control *ractl)
83 {
84 struct netfs_io_request *rreq = subreq->rreq;
85 size_t rsize = subreq->len;
86
87 if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER)
88 rsize = umin(rsize, rreq->io_streams[0].sreq_max_len);
89
90 if (ractl) {
91 /* If we don't have sufficient folios in the rolling buffer,
92 * extract a folioq's worth from the readahead region at a time
93 * into the buffer. Note that this acquires a ref on each page
94 * that we will need to release later - but we don't want to do
95 * that until after we've started the I/O.
96 */
97 struct folio_batch put_batch;
98
99 folio_batch_init(&put_batch);
100 while (rreq->submitted < subreq->start + rsize) {
101 ssize_t added;
102
103 added = rolling_buffer_load_from_ra(&rreq->buffer, ractl,
104 &put_batch);
105 if (added < 0)
106 return added;
107 rreq->submitted += added;
108 }
109 folio_batch_release(&put_batch);
110 }
111
112 subreq->len = rsize;
113 if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
114 size_t limit = netfs_limit_iter(&rreq->buffer.iter, 0, rsize,
115 rreq->io_streams[0].sreq_max_segs);
116
117 if (limit < rsize) {
118 subreq->len = limit;
119 trace_netfs_sreq(subreq, netfs_sreq_trace_limited);
120 }
121 }
122
123 subreq->io_iter = rreq->buffer.iter;
124
125 iov_iter_truncate(&subreq->io_iter, subreq->len);
126 rolling_buffer_advance(&rreq->buffer, subreq->len);
127 return subreq->len;
128 }
129
netfs_cache_prepare_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq,loff_t i_size)130 static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rreq,
131 struct netfs_io_subrequest *subreq,
132 loff_t i_size)
133 {
134 struct netfs_cache_resources *cres = &rreq->cache_resources;
135 enum netfs_io_source source;
136
137 if (!cres->ops)
138 return NETFS_DOWNLOAD_FROM_SERVER;
139 source = cres->ops->prepare_read(subreq, i_size);
140 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
141 return source;
142
143 }
144
145 /*
146 * Issue a read against the cache.
147 * - Eats the caller's ref on subreq.
148 */
netfs_read_cache_to_pagecache(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq)149 static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
150 struct netfs_io_subrequest *subreq)
151 {
152 struct netfs_cache_resources *cres = &rreq->cache_resources;
153
154 netfs_stat(&netfs_n_rh_read);
155 cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_IGNORE,
156 netfs_cache_read_terminated, subreq);
157 }
158
netfs_queue_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq)159 void netfs_queue_read(struct netfs_io_request *rreq,
160 struct netfs_io_subrequest *subreq)
161 {
162 struct netfs_io_stream *stream = &rreq->io_streams[0];
163
164 __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
165
166 /* We add to the end of the list whilst the collector may be walking
167 * the list. The collector only goes nextwards and uses the lock to
168 * remove entries off of the front.
169 */
170 spin_lock(&rreq->lock);
171 /* Write IN_PROGRESS before pointer to new subreq */
172 list_add_tail_release(&subreq->rreq_link, &stream->subrequests);
173 if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
174 if (!stream->active) {
175 stream->collected_to = subreq->start;
176 /* Store list pointers before active flag */
177 smp_store_release(&stream->active, true);
178 }
179 }
180
181 spin_unlock(&rreq->lock);
182 }
183
netfs_issue_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq)184 static void netfs_issue_read(struct netfs_io_request *rreq,
185 struct netfs_io_subrequest *subreq)
186 {
187 switch (subreq->source) {
188 case NETFS_DOWNLOAD_FROM_SERVER:
189 rreq->netfs_ops->issue_read(subreq);
190 break;
191 case NETFS_READ_FROM_CACHE:
192 netfs_read_cache_to_pagecache(rreq, subreq);
193 break;
194 default:
195 __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
196 subreq->error = 0;
197 iov_iter_zero(subreq->len, &subreq->io_iter);
198 subreq->transferred = subreq->len;
199 netfs_read_subreq_terminated(subreq);
200 break;
201 }
202 }
203
204 /*
205 * Perform a read to the pagecache from a series of sources of different types,
206 * slicing up the region to be read according to available cache blocks and
207 * network rsize.
208 */
netfs_read_to_pagecache(struct netfs_io_request * rreq,struct readahead_control * ractl)209 static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
210 struct readahead_control *ractl)
211 {
212 unsigned long long start = rreq->start;
213 ssize_t size = rreq->len;
214 int ret = 0;
215
216 do {
217 struct netfs_io_subrequest *subreq;
218 enum netfs_io_source source = NETFS_SOURCE_UNKNOWN;
219 ssize_t slice;
220
221 subreq = netfs_alloc_subrequest(rreq);
222 if (!subreq) {
223 ret = -ENOMEM;
224 break;
225 }
226
227 subreq->start = start;
228 subreq->len = size;
229
230 netfs_queue_read(rreq, subreq);
231
232 source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size);
233 subreq->source = source;
234 if (source == NETFS_DOWNLOAD_FROM_SERVER) {
235 unsigned long long zero_point = netfs_read_zero_point(rreq->inode);
236 unsigned long long zp = umin(zero_point, rreq->i_size);
237 size_t len = subreq->len;
238
239 if (unlikely(rreq->origin == NETFS_READ_SINGLE))
240 zp = rreq->i_size;
241 if (subreq->start >= zp) {
242 subreq->source = source = NETFS_FILL_WITH_ZEROES;
243 goto fill_with_zeroes;
244 }
245
246 if (len > zp - subreq->start)
247 len = zp - subreq->start;
248 if (len == 0) {
249 pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx",
250 rreq->debug_id, subreq->debug_index,
251 subreq->len, size,
252 subreq->start, zero_point, rreq->i_size);
253 netfs_cancel_read(subreq, ret);
254 break;
255 }
256 subreq->len = len;
257
258 netfs_stat(&netfs_n_rh_download);
259 if (rreq->netfs_ops->prepare_read) {
260 ret = rreq->netfs_ops->prepare_read(subreq);
261 if (ret < 0) {
262 netfs_cancel_read(subreq, ret);
263 break;
264 }
265 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
266 }
267 goto issue;
268 }
269
270 fill_with_zeroes:
271 if (source == NETFS_FILL_WITH_ZEROES) {
272 subreq->source = NETFS_FILL_WITH_ZEROES;
273 trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
274 netfs_stat(&netfs_n_rh_zero);
275 goto issue;
276 }
277
278 if (source == NETFS_READ_FROM_CACHE) {
279 trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
280 goto issue;
281 }
282
283 pr_err("Unexpected read source %u\n", source);
284 WARN_ON_ONCE(1);
285 netfs_cancel_read(subreq, ret);
286 break;
287
288 issue:
289 slice = netfs_prepare_read_iterator(subreq, ractl);
290 if (slice < 0) {
291 ret = slice;
292 netfs_cancel_read(subreq, ret);
293 break;
294 }
295 start += slice;
296 size -= slice;
297 if (size <= 0) {
298 smp_wmb(); /* Write lists before ALL_QUEUED. */
299 set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
300 }
301
302 netfs_issue_read(rreq, subreq);
303
304 if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
305 netfs_wait_for_paused_read(rreq);
306 if (test_bit(NETFS_RREQ_FAILED, &rreq->flags))
307 break;
308 cond_resched();
309 } while (size > 0);
310
311 if (unlikely(size > 0)) {
312 smp_wmb(); /* Write lists before ALL_QUEUED. */
313 set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
314 netfs_wake_collector(rreq);
315 }
316
317 /* Defer error return as we may need to wait for outstanding I/O. */
318 cmpxchg(&rreq->error, 0, ret);
319 }
320
321 /**
322 * netfs_readahead - Helper to manage a read request
323 * @ractl: The description of the readahead request
324 *
325 * Fulfil a readahead request by drawing data from the cache if possible, or
326 * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O
327 * requests from different sources will get munged together. If necessary, the
328 * readahead window can be expanded in either direction to a more convenient
329 * alighment for RPC efficiency or to make storage in the cache feasible.
330 *
331 * The calling netfs must initialise a netfs context contiguous to the vfs
332 * inode before calling this.
333 *
334 * This is usable whether or not caching is enabled.
335 */
netfs_readahead(struct readahead_control * ractl)336 void netfs_readahead(struct readahead_control *ractl)
337 {
338 struct netfs_io_request *rreq;
339 struct netfs_inode *ictx = netfs_inode(ractl->mapping->host);
340 unsigned long long start = readahead_pos(ractl);
341 size_t size = readahead_length(ractl);
342 int ret;
343
344 rreq = netfs_alloc_request(ractl->mapping, ractl->file, start, size,
345 NETFS_READAHEAD);
346 if (IS_ERR(rreq))
347 return;
348
349 __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags);
350
351 ret = netfs_begin_cache_read(rreq, ictx);
352 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
353 goto cleanup_free;
354
355 netfs_stat(&netfs_n_rh_readahead);
356 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
357 netfs_read_trace_readahead);
358
359 netfs_rreq_expand(rreq, ractl);
360
361 rreq->submitted = rreq->start;
362 if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
363 goto cleanup_free;
364 netfs_read_to_pagecache(rreq, ractl);
365
366 return netfs_put_request(rreq, netfs_rreq_trace_put_return);
367
368 cleanup_free:
369 return netfs_put_failed_request(rreq);
370 }
371 EXPORT_SYMBOL(netfs_readahead);
372
373 /*
374 * Create a rolling buffer with a single occupying folio.
375 */
netfs_create_singular_buffer(struct netfs_io_request * rreq,struct folio * folio,unsigned int rollbuf_flags)376 static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio,
377 unsigned int rollbuf_flags)
378 {
379 ssize_t added;
380
381 if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
382 return -ENOMEM;
383
384 added = rolling_buffer_append(&rreq->buffer, folio, rollbuf_flags);
385 if (added < 0)
386 return added;
387 rreq->submitted = rreq->start + added;
388 return 0;
389 }
390
391 /*
392 * Read into gaps in a folio partially filled by a streaming write.
393 */
netfs_read_gaps(struct file * file,struct folio * folio)394 static int netfs_read_gaps(struct file *file, struct folio *folio)
395 {
396 struct netfs_io_request *rreq;
397 struct address_space *mapping = folio->mapping;
398 struct netfs_group *group = netfs_folio_group(folio);
399 struct netfs_folio *finfo = netfs_folio_info(folio);
400 struct netfs_inode *ctx = netfs_inode(mapping->host);
401 struct folio *sink = NULL;
402 struct bio_vec *bvec;
403 unsigned int from = finfo->dirty_offset;
404 unsigned int to = from + finfo->dirty_len;
405 unsigned int off = 0, i = 0;
406 size_t flen = folio_size(folio);
407 size_t nr_bvec = flen / PAGE_SIZE + 2;
408 size_t part;
409 int ret;
410
411 _enter("%lx", folio->index);
412
413 rreq = netfs_alloc_request(mapping, file, folio_pos(folio), flen, NETFS_READ_GAPS);
414 if (IS_ERR(rreq)) {
415 ret = PTR_ERR(rreq);
416 goto alloc_error;
417 }
418
419 ret = netfs_begin_cache_read(rreq, ctx);
420 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
421 goto discard;
422
423 netfs_stat(&netfs_n_rh_read_folio);
424 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_read_gaps);
425
426 /* Fiddle the buffer so that a gap at the beginning and/or a gap at the
427 * end get copied to, but the middle is discarded.
428 */
429 ret = -ENOMEM;
430 bvec = kmalloc_objs(*bvec, nr_bvec);
431 if (!bvec)
432 goto discard;
433
434 sink = folio_alloc(GFP_KERNEL, 0);
435 if (!sink) {
436 kfree(bvec);
437 goto discard;
438 }
439
440 trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
441
442 rreq->direct_bv = bvec;
443 rreq->direct_bv_count = nr_bvec;
444 if (from > 0) {
445 bvec_set_folio(&bvec[i++], folio, from, 0);
446 off = from;
447 }
448 while (off < to) {
449 part = min_t(size_t, to - off, PAGE_SIZE);
450 bvec_set_folio(&bvec[i++], sink, part, 0);
451 off += part;
452 }
453 if (to < flen)
454 bvec_set_folio(&bvec[i++], folio, flen - to, to);
455 iov_iter_bvec(&rreq->buffer.iter, ITER_DEST, bvec, i, rreq->len);
456 rreq->submitted = rreq->start + flen;
457
458 netfs_read_to_pagecache(rreq, NULL);
459
460 ret = netfs_wait_for_read(rreq);
461 if (ret >= 0) {
462 if (group)
463 folio_change_private(folio, group);
464 else
465 folio_detach_private(folio);
466 kfree(finfo);
467 trace_netfs_folio(folio, netfs_folio_trace_filled_gaps);
468 flush_dcache_folio(folio);
469 folio_mark_uptodate(folio);
470 }
471
472 if (sink)
473 folio_put(sink);
474 folio_unlock(folio);
475 netfs_put_request(rreq, netfs_rreq_trace_put_return);
476 return ret < 0 ? ret : 0;
477
478 discard:
479 netfs_put_failed_request(rreq);
480 alloc_error:
481 folio_unlock(folio);
482 return ret;
483 }
484
485 /**
486 * netfs_read_folio - Helper to manage a read_folio request
487 * @file: The file to read from
488 * @folio: The folio to read
489 *
490 * Fulfil a read_folio request by drawing data from the cache if
491 * possible, or the netfs if not. Space beyond the EOF is zero-filled.
492 * Multiple I/O requests from different sources will get munged together.
493 *
494 * The calling netfs must initialise a netfs context contiguous to the vfs
495 * inode before calling this.
496 *
497 * This is usable whether or not caching is enabled.
498 */
netfs_read_folio(struct file * file,struct folio * folio)499 int netfs_read_folio(struct file *file, struct folio *folio)
500 {
501 struct address_space *mapping = folio->mapping;
502 struct netfs_io_request *rreq;
503 struct netfs_inode *ctx = netfs_inode(mapping->host);
504 int ret;
505
506 folio_wait_writeback(folio);
507
508 if (folio_test_dirty(folio))
509 return netfs_read_gaps(file, folio);
510
511 _enter("%lx", folio->index);
512
513 rreq = netfs_alloc_request(mapping, file,
514 folio_pos(folio), folio_size(folio),
515 NETFS_READPAGE);
516 if (IS_ERR(rreq)) {
517 ret = PTR_ERR(rreq);
518 goto alloc_error;
519 }
520
521 ret = netfs_begin_cache_read(rreq, ctx);
522 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
523 goto discard;
524
525 netfs_stat(&netfs_n_rh_read_folio);
526 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
527
528 /* Set up the output buffer */
529 ret = netfs_create_singular_buffer(rreq, folio, 0);
530 if (ret < 0)
531 goto discard;
532
533 netfs_read_to_pagecache(rreq, NULL);
534 ret = netfs_wait_for_read(rreq);
535 netfs_put_request(rreq, netfs_rreq_trace_put_return);
536 return ret < 0 ? ret : 0;
537
538 discard:
539 netfs_put_failed_request(rreq);
540 alloc_error:
541 folio_unlock(folio);
542 return ret;
543 }
544 EXPORT_SYMBOL(netfs_read_folio);
545
546 /*
547 * Prepare a folio for writing without reading first
548 * @folio: The folio being prepared
549 * @pos: starting position for the write
550 * @len: length of write
551 * @always_fill: T if the folio should always be completely filled/cleared
552 *
553 * In some cases, write_begin doesn't need to read at all:
554 * - full folio write
555 * - write that lies in a folio that is completely beyond EOF
556 * - write that covers the folio from start to EOF or beyond it
557 *
558 * If any of these criteria are met, then zero out the unwritten parts
559 * of the folio and return true. Otherwise, return false.
560 */
netfs_skip_folio_read(struct folio * folio,loff_t pos,size_t len,bool always_fill)561 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
562 bool always_fill)
563 {
564 struct inode *inode = folio_inode(folio);
565 loff_t i_size = i_size_read(inode);
566 size_t offset = offset_in_folio(folio, pos);
567 size_t plen = folio_size(folio);
568
569 if (unlikely(always_fill)) {
570 if (pos - offset + len <= i_size)
571 return false; /* Page entirely before EOF */
572 folio_zero_segment(folio, 0, plen);
573 folio_mark_uptodate(folio);
574 return true;
575 }
576
577 /* Full folio write */
578 if (offset == 0 && len >= plen)
579 return true;
580
581 /* Page entirely beyond the end of the file */
582 if (pos - offset >= i_size)
583 goto zero_out;
584
585 /* Write that covers from the start of the folio to EOF or beyond */
586 if (offset == 0 && (pos + len) >= i_size)
587 goto zero_out;
588
589 return false;
590 zero_out:
591 folio_zero_segments(folio, 0, offset, offset + len, plen);
592 return true;
593 }
594
595 /**
596 * netfs_write_begin - Helper to prepare for writing [DEPRECATED]
597 * @ctx: The netfs context
598 * @file: The file to read from
599 * @mapping: The mapping to read from
600 * @pos: File position at which the write will begin
601 * @len: The length of the write (may extend beyond the end of the folio chosen)
602 * @_folio: Where to put the resultant folio
603 * @_fsdata: Place for the netfs to store a cookie
604 *
605 * Pre-read data for a write-begin request by drawing data from the cache if
606 * possible, or the netfs if not. Space beyond the EOF is zero-filled.
607 * Multiple I/O requests from different sources will get munged together.
608 *
609 * The calling netfs must provide a table of operations, only one of which,
610 * issue_read, is mandatory.
611 *
612 * The check_write_begin() operation can be provided to check for and flush
613 * conflicting writes once the folio is grabbed and locked. It is passed a
614 * pointer to the fsdata cookie that gets returned to the VM to be passed to
615 * write_end. It is permitted to sleep. It should return 0 if the request
616 * should go ahead or it may return an error. It may also unlock and put the
617 * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0
618 * will cause the folio to be re-got and the process to be retried.
619 *
620 * The calling netfs must initialise a netfs context contiguous to the vfs
621 * inode before calling this.
622 *
623 * This is usable whether or not caching is enabled.
624 *
625 * Note that this should be considered deprecated and netfs_perform_write()
626 * used instead.
627 */
netfs_write_begin(struct netfs_inode * ctx,struct file * file,struct address_space * mapping,loff_t pos,unsigned int len,struct folio ** _folio,void ** _fsdata)628 int netfs_write_begin(struct netfs_inode *ctx,
629 struct file *file, struct address_space *mapping,
630 loff_t pos, unsigned int len, struct folio **_folio,
631 void **_fsdata)
632 {
633 struct netfs_io_request *rreq;
634 struct folio *folio;
635 pgoff_t index = pos >> PAGE_SHIFT;
636 int ret;
637
638 retry:
639 folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
640 mapping_gfp_mask(mapping));
641 if (IS_ERR(folio))
642 return PTR_ERR(folio);
643
644 if (ctx->ops->check_write_begin) {
645 /* Allow the netfs (eg. ceph) to flush conflicts. */
646 ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata);
647 if (ret < 0) {
648 trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
649 goto error;
650 }
651 if (!folio)
652 goto retry;
653 }
654
655 if (folio_test_uptodate(folio))
656 goto have_folio;
657
658 /* If the folio is beyond the EOF, we want to clear it - unless it's
659 * within the cache granule containing the EOF, in which case we need
660 * to preload the granule.
661 */
662 if (!netfs_is_cache_enabled(ctx) &&
663 netfs_skip_folio_read(folio, pos, len, false)) {
664 netfs_stat(&netfs_n_rh_write_zskip);
665 goto have_folio_no_wait;
666 }
667
668 rreq = netfs_alloc_request(mapping, file,
669 folio_pos(folio), folio_size(folio),
670 NETFS_READ_FOR_WRITE);
671 if (IS_ERR(rreq)) {
672 ret = PTR_ERR(rreq);
673 goto error;
674 }
675 rreq->no_unlock_folio = folio;
676 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
677
678 ret = netfs_begin_cache_read(rreq, ctx);
679 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
680 goto error_put;
681
682 netfs_stat(&netfs_n_rh_write_begin);
683 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
684
685 /* Set up the output buffer */
686 ret = netfs_create_singular_buffer(rreq, folio, 0);
687 if (ret < 0)
688 goto error_put;
689
690 netfs_read_to_pagecache(rreq, NULL);
691 ret = netfs_wait_for_read(rreq);
692 netfs_put_request(rreq, netfs_rreq_trace_put_return);
693 if (ret < 0)
694 goto error;
695
696 have_folio:
697 ret = folio_wait_private_2_killable(folio);
698 if (ret < 0)
699 goto error;
700 have_folio_no_wait:
701 *_folio = folio;
702 _leave(" = 0");
703 return 0;
704
705 error_put:
706 netfs_put_failed_request(rreq);
707 error:
708 if (folio) {
709 folio_unlock(folio);
710 folio_put(folio);
711 }
712 _leave(" = %d", ret);
713 return ret;
714 }
715 EXPORT_SYMBOL(netfs_write_begin);
716
717 /*
718 * Preload the data into a folio we're proposing to write into.
719 */
netfs_prefetch_for_write(struct file * file,struct folio * folio,size_t offset,size_t len)720 int netfs_prefetch_for_write(struct file *file, struct folio *folio,
721 size_t offset, size_t len)
722 {
723 struct netfs_io_request *rreq;
724 struct address_space *mapping = folio->mapping;
725 struct netfs_inode *ctx = netfs_inode(mapping->host);
726 unsigned long long start = folio_pos(folio);
727 size_t flen = folio_size(folio);
728 int ret;
729
730 _enter("%zx @%llx", flen, start);
731
732 ret = -ENOMEM;
733
734 rreq = netfs_alloc_request(mapping, file, start, flen,
735 NETFS_READ_FOR_WRITE);
736 if (IS_ERR(rreq)) {
737 ret = PTR_ERR(rreq);
738 goto error;
739 }
740
741 rreq->no_unlock_folio = folio;
742 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
743 ret = netfs_begin_cache_read(rreq, ctx);
744 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
745 goto error_put;
746
747 netfs_stat(&netfs_n_rh_write_begin);
748 trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write);
749
750 /* Set up the output buffer */
751 ret = netfs_create_singular_buffer(rreq, folio, NETFS_ROLLBUF_PAGECACHE_MARK);
752 if (ret < 0)
753 goto error_put;
754
755 netfs_read_to_pagecache(rreq, NULL);
756 ret = netfs_wait_for_read(rreq);
757 netfs_put_request(rreq, netfs_rreq_trace_put_return);
758 return ret < 0 ? ret : 0;
759
760 error_put:
761 netfs_put_failed_request(rreq);
762 error:
763 _leave(" = %d", ret);
764 return ret;
765 }
766
767 /**
768 * netfs_buffered_read_iter - Filesystem buffered I/O read routine
769 * @iocb: kernel I/O control block
770 * @iter: destination for the data read
771 *
772 * This is the ->read_iter() routine for all filesystems that can use the page
773 * cache directly.
774 *
775 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
776 * returned when no data can be read without waiting for I/O requests to
777 * complete; it doesn't prevent readahead.
778 *
779 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
780 * shall be made for the read or for readahead. When no data can be read,
781 * -EAGAIN shall be returned. When readahead would be triggered, a partial,
782 * possibly empty read shall be returned.
783 *
784 * Return:
785 * * number of bytes copied, even for partial reads
786 * * negative error code (or 0 if IOCB_NOIO) if nothing was read
787 */
netfs_buffered_read_iter(struct kiocb * iocb,struct iov_iter * iter)788 ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
789 {
790 struct inode *inode = file_inode(iocb->ki_filp);
791 struct netfs_inode *ictx = netfs_inode(inode);
792 ssize_t ret;
793
794 if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) ||
795 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)))
796 return -EINVAL;
797
798 ret = netfs_start_io_read(inode);
799 if (ret == 0) {
800 ret = filemap_read(iocb, iter, 0);
801 netfs_end_io_read(inode);
802 }
803 return ret;
804 }
805 EXPORT_SYMBOL(netfs_buffered_read_iter);
806
807 /**
808 * netfs_file_read_iter - Generic filesystem read routine
809 * @iocb: kernel I/O control block
810 * @iter: destination for the data read
811 *
812 * This is the ->read_iter() routine for all filesystems that can use the page
813 * cache directly.
814 *
815 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
816 * returned when no data can be read without waiting for I/O requests to
817 * complete; it doesn't prevent readahead.
818 *
819 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
820 * shall be made for the read or for readahead. When no data can be read,
821 * -EAGAIN shall be returned. When readahead would be triggered, a partial,
822 * possibly empty read shall be returned.
823 *
824 * Return:
825 * * number of bytes copied, even for partial reads
826 * * negative error code (or 0 if IOCB_NOIO) if nothing was read
827 */
netfs_file_read_iter(struct kiocb * iocb,struct iov_iter * iter)828 ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
829 {
830 struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host);
831
832 if ((iocb->ki_flags & IOCB_DIRECT) ||
833 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
834 return netfs_unbuffered_read_iter(iocb, iter);
835
836 return netfs_buffered_read_iter(iocb, iter);
837 }
838 EXPORT_SYMBOL(netfs_file_read_iter);
839