1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Network filesystem high-level buffered read support.
3 *
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8 #include <linux/export.h>
9 #include <linux/task_io_accounting_ops.h>
10 #include "internal.h"
11
netfs_cache_expand_readahead(struct netfs_io_request * rreq,unsigned long long * _start,unsigned long long * _len,unsigned long long i_size)12 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
13 unsigned long long *_start,
14 unsigned long long *_len,
15 unsigned long long i_size)
16 {
17 struct netfs_cache_resources *cres = &rreq->cache_resources;
18
19 if (cres->ops && cres->ops->expand_readahead)
20 cres->ops->expand_readahead(cres, _start, _len, i_size);
21 }
22
netfs_rreq_expand(struct netfs_io_request * rreq,struct readahead_control * ractl)23 static void netfs_rreq_expand(struct netfs_io_request *rreq,
24 struct readahead_control *ractl)
25 {
26 /* Give the cache a chance to change the request parameters. The
27 * resultant request must contain the original region.
28 */
29 netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
30
31 /* Give the netfs a chance to change the request parameters. The
32 * resultant request must contain the original region.
33 */
34 if (rreq->netfs_ops->expand_readahead)
35 rreq->netfs_ops->expand_readahead(rreq);
36
37 /* Expand the request if the cache wants it to start earlier. Note
38 * that the expansion may get further extended if the VM wishes to
39 * insert THPs and the preferred start and/or end wind up in the middle
40 * of THPs.
41 *
42 * If this is the case, however, the THP size should be an integer
43 * multiple of the cache granule size, so we get a whole number of
44 * granules to deal with.
45 */
46 if (rreq->start != readahead_pos(ractl) ||
47 rreq->len != readahead_length(ractl)) {
48 readahead_expand(ractl, rreq->start, rreq->len);
49 rreq->start = readahead_pos(ractl);
50 rreq->len = readahead_length(ractl);
51
52 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
53 netfs_read_trace_expanded);
54 }
55 }
56
57 /*
58 * Begin an operation, and fetch the stored zero point value from the cookie if
59 * available.
60 */
netfs_begin_cache_read(struct netfs_io_request * rreq,struct netfs_inode * ctx)61 static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx)
62 {
63 return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
64 }
65
66 /*
67 * netfs_prepare_read_iterator - Prepare the subreq iterator for I/O
68 * @subreq: The subrequest to be set up
69 *
70 * Prepare the I/O iterator representing the read buffer on a subrequest for
71 * the filesystem to use for I/O (it can be passed directly to a socket). This
72 * is intended to be called from the ->issue_read() method once the filesystem
73 * has trimmed the request to the size it wants.
74 *
75 * Returns the limited size if successful and -ENOMEM if insufficient memory
76 * available.
77 *
78 * [!] NOTE: This must be run in the same thread as ->issue_read() was called
79 * in as we access the readahead_control struct.
80 */
netfs_prepare_read_iterator(struct netfs_io_subrequest * subreq,struct readahead_control * ractl)81 static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq,
82 struct readahead_control *ractl)
83 {
84 struct netfs_io_request *rreq = subreq->rreq;
85 size_t rsize = subreq->len;
86
87 if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER)
88 rsize = umin(rsize, rreq->io_streams[0].sreq_max_len);
89
90 if (ractl) {
91 /* If we don't have sufficient folios in the rolling buffer,
92 * extract a folioq's worth from the readahead region at a time
93 * into the buffer. Note that this acquires a ref on each page
94 * that we will need to release later - but we don't want to do
95 * that until after we've started the I/O.
96 */
97 struct folio_batch put_batch;
98
99 folio_batch_init(&put_batch);
100 while (rreq->submitted < subreq->start + rsize) {
101 ssize_t added;
102
103 added = rolling_buffer_load_from_ra(&rreq->buffer, ractl,
104 &put_batch);
105 if (added < 0)
106 return added;
107 rreq->submitted += added;
108 }
109 folio_batch_release(&put_batch);
110 }
111
112 subreq->len = rsize;
113 if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
114 size_t limit = netfs_limit_iter(&rreq->buffer.iter, 0, rsize,
115 rreq->io_streams[0].sreq_max_segs);
116
117 if (limit < rsize) {
118 subreq->len = limit;
119 trace_netfs_sreq(subreq, netfs_sreq_trace_limited);
120 }
121 }
122
123 subreq->io_iter = rreq->buffer.iter;
124
125 iov_iter_truncate(&subreq->io_iter, subreq->len);
126 rolling_buffer_advance(&rreq->buffer, subreq->len);
127 return subreq->len;
128 }
129
netfs_cache_prepare_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq,loff_t i_size)130 static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rreq,
131 struct netfs_io_subrequest *subreq,
132 loff_t i_size)
133 {
134 struct netfs_cache_resources *cres = &rreq->cache_resources;
135 enum netfs_io_source source;
136
137 if (!cres->ops)
138 return NETFS_DOWNLOAD_FROM_SERVER;
139 source = cres->ops->prepare_read(subreq, i_size);
140 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
141 return source;
142
143 }
144
145 /*
146 * Issue a read against the cache.
147 * - Eats the caller's ref on subreq.
148 */
netfs_read_cache_to_pagecache(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq)149 static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
150 struct netfs_io_subrequest *subreq)
151 {
152 struct netfs_cache_resources *cres = &rreq->cache_resources;
153
154 netfs_stat(&netfs_n_rh_read);
155 cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_IGNORE,
156 netfs_cache_read_terminated, subreq);
157 }
158
netfs_queue_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq,bool last_subreq)159 static void netfs_queue_read(struct netfs_io_request *rreq,
160 struct netfs_io_subrequest *subreq,
161 bool last_subreq)
162 {
163 struct netfs_io_stream *stream = &rreq->io_streams[0];
164
165 __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
166
167 /* We add to the end of the list whilst the collector may be walking
168 * the list. The collector only goes nextwards and uses the lock to
169 * remove entries off of the front.
170 */
171 spin_lock(&rreq->lock);
172 list_add_tail(&subreq->rreq_link, &stream->subrequests);
173 if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
174 stream->front = subreq;
175 if (!stream->active) {
176 stream->collected_to = stream->front->start;
177 /* Store list pointers before active flag */
178 smp_store_release(&stream->active, true);
179 }
180 }
181
182 if (last_subreq) {
183 smp_wmb(); /* Write lists before ALL_QUEUED. */
184 set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
185 }
186
187 spin_unlock(&rreq->lock);
188 }
189
netfs_issue_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq)190 static void netfs_issue_read(struct netfs_io_request *rreq,
191 struct netfs_io_subrequest *subreq)
192 {
193 switch (subreq->source) {
194 case NETFS_DOWNLOAD_FROM_SERVER:
195 rreq->netfs_ops->issue_read(subreq);
196 break;
197 case NETFS_READ_FROM_CACHE:
198 netfs_read_cache_to_pagecache(rreq, subreq);
199 break;
200 default:
201 __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
202 subreq->error = 0;
203 iov_iter_zero(subreq->len, &subreq->io_iter);
204 subreq->transferred = subreq->len;
205 netfs_read_subreq_terminated(subreq);
206 break;
207 }
208 }
209
210 /*
211 * Perform a read to the pagecache from a series of sources of different types,
212 * slicing up the region to be read according to available cache blocks and
213 * network rsize.
214 */
netfs_read_to_pagecache(struct netfs_io_request * rreq,struct readahead_control * ractl)215 static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
216 struct readahead_control *ractl)
217 {
218 struct netfs_inode *ictx = netfs_inode(rreq->inode);
219 unsigned long long start = rreq->start;
220 ssize_t size = rreq->len;
221 int ret = 0;
222
223 do {
224 struct netfs_io_subrequest *subreq;
225 enum netfs_io_source source = NETFS_SOURCE_UNKNOWN;
226 ssize_t slice;
227
228 subreq = netfs_alloc_subrequest(rreq);
229 if (!subreq) {
230 ret = -ENOMEM;
231 break;
232 }
233
234 subreq->start = start;
235 subreq->len = size;
236
237 source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size);
238 subreq->source = source;
239 if (source == NETFS_DOWNLOAD_FROM_SERVER) {
240 unsigned long long zp = umin(ictx->zero_point, rreq->i_size);
241 size_t len = subreq->len;
242
243 if (unlikely(rreq->origin == NETFS_READ_SINGLE))
244 zp = rreq->i_size;
245 if (subreq->start >= zp) {
246 subreq->source = source = NETFS_FILL_WITH_ZEROES;
247 goto fill_with_zeroes;
248 }
249
250 if (len > zp - subreq->start)
251 len = zp - subreq->start;
252 if (len == 0) {
253 pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx",
254 rreq->debug_id, subreq->debug_index,
255 subreq->len, size,
256 subreq->start, ictx->zero_point, rreq->i_size);
257 break;
258 }
259 subreq->len = len;
260
261 netfs_stat(&netfs_n_rh_download);
262 if (rreq->netfs_ops->prepare_read) {
263 ret = rreq->netfs_ops->prepare_read(subreq);
264 if (ret < 0) {
265 subreq->error = ret;
266 /* Not queued - release both refs. */
267 netfs_put_subrequest(subreq,
268 netfs_sreq_trace_put_cancel);
269 netfs_put_subrequest(subreq,
270 netfs_sreq_trace_put_cancel);
271 break;
272 }
273 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
274 }
275 goto issue;
276 }
277
278 fill_with_zeroes:
279 if (source == NETFS_FILL_WITH_ZEROES) {
280 subreq->source = NETFS_FILL_WITH_ZEROES;
281 trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
282 netfs_stat(&netfs_n_rh_zero);
283 goto issue;
284 }
285
286 if (source == NETFS_READ_FROM_CACHE) {
287 trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
288 goto issue;
289 }
290
291 pr_err("Unexpected read source %u\n", source);
292 WARN_ON_ONCE(1);
293 break;
294
295 issue:
296 slice = netfs_prepare_read_iterator(subreq, ractl);
297 if (slice < 0) {
298 ret = slice;
299 subreq->error = ret;
300 trace_netfs_sreq(subreq, netfs_sreq_trace_cancel);
301 /* Not queued - release both refs. */
302 netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
303 netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
304 break;
305 }
306 size -= slice;
307 start += slice;
308
309 netfs_queue_read(rreq, subreq, size <= 0);
310 netfs_issue_read(rreq, subreq);
311 cond_resched();
312 } while (size > 0);
313
314 if (unlikely(size > 0)) {
315 smp_wmb(); /* Write lists before ALL_QUEUED. */
316 set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
317 netfs_wake_collector(rreq);
318 }
319
320 /* Defer error return as we may need to wait for outstanding I/O. */
321 cmpxchg(&rreq->error, 0, ret);
322 }
323
324 /**
325 * netfs_readahead - Helper to manage a read request
326 * @ractl: The description of the readahead request
327 *
328 * Fulfil a readahead request by drawing data from the cache if possible, or
329 * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O
330 * requests from different sources will get munged together. If necessary, the
331 * readahead window can be expanded in either direction to a more convenient
332 * alighment for RPC efficiency or to make storage in the cache feasible.
333 *
334 * The calling netfs must initialise a netfs context contiguous to the vfs
335 * inode before calling this.
336 *
337 * This is usable whether or not caching is enabled.
338 */
netfs_readahead(struct readahead_control * ractl)339 void netfs_readahead(struct readahead_control *ractl)
340 {
341 struct netfs_io_request *rreq;
342 struct netfs_inode *ictx = netfs_inode(ractl->mapping->host);
343 unsigned long long start = readahead_pos(ractl);
344 size_t size = readahead_length(ractl);
345 int ret;
346
347 rreq = netfs_alloc_request(ractl->mapping, ractl->file, start, size,
348 NETFS_READAHEAD);
349 if (IS_ERR(rreq))
350 return;
351
352 __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags);
353
354 ret = netfs_begin_cache_read(rreq, ictx);
355 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
356 goto cleanup_free;
357
358 netfs_stat(&netfs_n_rh_readahead);
359 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
360 netfs_read_trace_readahead);
361
362 netfs_rreq_expand(rreq, ractl);
363
364 rreq->submitted = rreq->start;
365 if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
366 goto cleanup_free;
367 netfs_read_to_pagecache(rreq, ractl);
368
369 return netfs_put_request(rreq, netfs_rreq_trace_put_return);
370
371 cleanup_free:
372 return netfs_put_failed_request(rreq);
373 }
374 EXPORT_SYMBOL(netfs_readahead);
375
376 /*
377 * Create a rolling buffer with a single occupying folio.
378 */
netfs_create_singular_buffer(struct netfs_io_request * rreq,struct folio * folio,unsigned int rollbuf_flags)379 static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio,
380 unsigned int rollbuf_flags)
381 {
382 ssize_t added;
383
384 if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
385 return -ENOMEM;
386
387 added = rolling_buffer_append(&rreq->buffer, folio, rollbuf_flags);
388 if (added < 0)
389 return added;
390 rreq->submitted = rreq->start + added;
391 return 0;
392 }
393
394 /*
395 * Read into gaps in a folio partially filled by a streaming write.
396 */
netfs_read_gaps(struct file * file,struct folio * folio)397 static int netfs_read_gaps(struct file *file, struct folio *folio)
398 {
399 struct netfs_io_request *rreq;
400 struct address_space *mapping = folio->mapping;
401 struct netfs_folio *finfo = netfs_folio_info(folio);
402 struct netfs_inode *ctx = netfs_inode(mapping->host);
403 struct folio *sink = NULL;
404 struct bio_vec *bvec;
405 unsigned int from = finfo->dirty_offset;
406 unsigned int to = from + finfo->dirty_len;
407 unsigned int off = 0, i = 0;
408 size_t flen = folio_size(folio);
409 size_t nr_bvec = flen / PAGE_SIZE + 2;
410 size_t part;
411 int ret;
412
413 _enter("%lx", folio->index);
414
415 rreq = netfs_alloc_request(mapping, file, folio_pos(folio), flen, NETFS_READ_GAPS);
416 if (IS_ERR(rreq)) {
417 ret = PTR_ERR(rreq);
418 goto alloc_error;
419 }
420
421 ret = netfs_begin_cache_read(rreq, ctx);
422 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
423 goto discard;
424
425 netfs_stat(&netfs_n_rh_read_folio);
426 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_read_gaps);
427
428 /* Fiddle the buffer so that a gap at the beginning and/or a gap at the
429 * end get copied to, but the middle is discarded.
430 */
431 ret = -ENOMEM;
432 bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL);
433 if (!bvec)
434 goto discard;
435
436 sink = folio_alloc(GFP_KERNEL, 0);
437 if (!sink) {
438 kfree(bvec);
439 goto discard;
440 }
441
442 trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
443
444 rreq->direct_bv = bvec;
445 rreq->direct_bv_count = nr_bvec;
446 if (from > 0) {
447 bvec_set_folio(&bvec[i++], folio, from, 0);
448 off = from;
449 }
450 while (off < to) {
451 part = min_t(size_t, to - off, PAGE_SIZE);
452 bvec_set_folio(&bvec[i++], sink, part, 0);
453 off += part;
454 }
455 if (to < flen)
456 bvec_set_folio(&bvec[i++], folio, flen - to, to);
457 iov_iter_bvec(&rreq->buffer.iter, ITER_DEST, bvec, i, rreq->len);
458 rreq->submitted = rreq->start + flen;
459
460 netfs_read_to_pagecache(rreq, NULL);
461
462 if (sink)
463 folio_put(sink);
464
465 ret = netfs_wait_for_read(rreq);
466 if (ret >= 0) {
467 flush_dcache_folio(folio);
468 folio_mark_uptodate(folio);
469 }
470 folio_unlock(folio);
471 netfs_put_request(rreq, netfs_rreq_trace_put_return);
472 return ret < 0 ? ret : 0;
473
474 discard:
475 netfs_put_failed_request(rreq);
476 alloc_error:
477 folio_unlock(folio);
478 return ret;
479 }
480
481 /**
482 * netfs_read_folio - Helper to manage a read_folio request
483 * @file: The file to read from
484 * @folio: The folio to read
485 *
486 * Fulfil a read_folio request by drawing data from the cache if
487 * possible, or the netfs if not. Space beyond the EOF is zero-filled.
488 * Multiple I/O requests from different sources will get munged together.
489 *
490 * The calling netfs must initialise a netfs context contiguous to the vfs
491 * inode before calling this.
492 *
493 * This is usable whether or not caching is enabled.
494 */
netfs_read_folio(struct file * file,struct folio * folio)495 int netfs_read_folio(struct file *file, struct folio *folio)
496 {
497 struct address_space *mapping = folio->mapping;
498 struct netfs_io_request *rreq;
499 struct netfs_inode *ctx = netfs_inode(mapping->host);
500 int ret;
501
502 if (folio_test_dirty(folio)) {
503 trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
504 return netfs_read_gaps(file, folio);
505 }
506
507 _enter("%lx", folio->index);
508
509 rreq = netfs_alloc_request(mapping, file,
510 folio_pos(folio), folio_size(folio),
511 NETFS_READPAGE);
512 if (IS_ERR(rreq)) {
513 ret = PTR_ERR(rreq);
514 goto alloc_error;
515 }
516
517 ret = netfs_begin_cache_read(rreq, ctx);
518 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
519 goto discard;
520
521 netfs_stat(&netfs_n_rh_read_folio);
522 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
523
524 /* Set up the output buffer */
525 ret = netfs_create_singular_buffer(rreq, folio, 0);
526 if (ret < 0)
527 goto discard;
528
529 netfs_read_to_pagecache(rreq, NULL);
530 ret = netfs_wait_for_read(rreq);
531 netfs_put_request(rreq, netfs_rreq_trace_put_return);
532 return ret < 0 ? ret : 0;
533
534 discard:
535 netfs_put_failed_request(rreq);
536 alloc_error:
537 folio_unlock(folio);
538 return ret;
539 }
540 EXPORT_SYMBOL(netfs_read_folio);
541
542 /*
543 * Prepare a folio for writing without reading first
544 * @folio: The folio being prepared
545 * @pos: starting position for the write
546 * @len: length of write
547 * @always_fill: T if the folio should always be completely filled/cleared
548 *
549 * In some cases, write_begin doesn't need to read at all:
550 * - full folio write
551 * - write that lies in a folio that is completely beyond EOF
552 * - write that covers the folio from start to EOF or beyond it
553 *
554 * If any of these criteria are met, then zero out the unwritten parts
555 * of the folio and return true. Otherwise, return false.
556 */
netfs_skip_folio_read(struct folio * folio,loff_t pos,size_t len,bool always_fill)557 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
558 bool always_fill)
559 {
560 struct inode *inode = folio_inode(folio);
561 loff_t i_size = i_size_read(inode);
562 size_t offset = offset_in_folio(folio, pos);
563 size_t plen = folio_size(folio);
564
565 if (unlikely(always_fill)) {
566 if (pos - offset + len <= i_size)
567 return false; /* Page entirely before EOF */
568 folio_zero_segment(folio, 0, plen);
569 folio_mark_uptodate(folio);
570 return true;
571 }
572
573 /* Full folio write */
574 if (offset == 0 && len >= plen)
575 return true;
576
577 /* Page entirely beyond the end of the file */
578 if (pos - offset >= i_size)
579 goto zero_out;
580
581 /* Write that covers from the start of the folio to EOF or beyond */
582 if (offset == 0 && (pos + len) >= i_size)
583 goto zero_out;
584
585 return false;
586 zero_out:
587 folio_zero_segments(folio, 0, offset, offset + len, plen);
588 return true;
589 }
590
591 /**
592 * netfs_write_begin - Helper to prepare for writing [DEPRECATED]
593 * @ctx: The netfs context
594 * @file: The file to read from
595 * @mapping: The mapping to read from
596 * @pos: File position at which the write will begin
597 * @len: The length of the write (may extend beyond the end of the folio chosen)
598 * @_folio: Where to put the resultant folio
599 * @_fsdata: Place for the netfs to store a cookie
600 *
601 * Pre-read data for a write-begin request by drawing data from the cache if
602 * possible, or the netfs if not. Space beyond the EOF is zero-filled.
603 * Multiple I/O requests from different sources will get munged together.
604 *
605 * The calling netfs must provide a table of operations, only one of which,
606 * issue_read, is mandatory.
607 *
608 * The check_write_begin() operation can be provided to check for and flush
609 * conflicting writes once the folio is grabbed and locked. It is passed a
610 * pointer to the fsdata cookie that gets returned to the VM to be passed to
611 * write_end. It is permitted to sleep. It should return 0 if the request
612 * should go ahead or it may return an error. It may also unlock and put the
613 * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0
614 * will cause the folio to be re-got and the process to be retried.
615 *
616 * The calling netfs must initialise a netfs context contiguous to the vfs
617 * inode before calling this.
618 *
619 * This is usable whether or not caching is enabled.
620 *
621 * Note that this should be considered deprecated and netfs_perform_write()
622 * used instead.
623 */
netfs_write_begin(struct netfs_inode * ctx,struct file * file,struct address_space * mapping,loff_t pos,unsigned int len,struct folio ** _folio,void ** _fsdata)624 int netfs_write_begin(struct netfs_inode *ctx,
625 struct file *file, struct address_space *mapping,
626 loff_t pos, unsigned int len, struct folio **_folio,
627 void **_fsdata)
628 {
629 struct netfs_io_request *rreq;
630 struct folio *folio;
631 pgoff_t index = pos >> PAGE_SHIFT;
632 int ret;
633
634 retry:
635 folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
636 mapping_gfp_mask(mapping));
637 if (IS_ERR(folio))
638 return PTR_ERR(folio);
639
640 if (ctx->ops->check_write_begin) {
641 /* Allow the netfs (eg. ceph) to flush conflicts. */
642 ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata);
643 if (ret < 0) {
644 trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
645 goto error;
646 }
647 if (!folio)
648 goto retry;
649 }
650
651 if (folio_test_uptodate(folio))
652 goto have_folio;
653
654 /* If the folio is beyond the EOF, we want to clear it - unless it's
655 * within the cache granule containing the EOF, in which case we need
656 * to preload the granule.
657 */
658 if (!netfs_is_cache_enabled(ctx) &&
659 netfs_skip_folio_read(folio, pos, len, false)) {
660 netfs_stat(&netfs_n_rh_write_zskip);
661 goto have_folio_no_wait;
662 }
663
664 rreq = netfs_alloc_request(mapping, file,
665 folio_pos(folio), folio_size(folio),
666 NETFS_READ_FOR_WRITE);
667 if (IS_ERR(rreq)) {
668 ret = PTR_ERR(rreq);
669 goto error;
670 }
671 rreq->no_unlock_folio = folio->index;
672 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
673
674 ret = netfs_begin_cache_read(rreq, ctx);
675 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
676 goto error_put;
677
678 netfs_stat(&netfs_n_rh_write_begin);
679 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
680
681 /* Set up the output buffer */
682 ret = netfs_create_singular_buffer(rreq, folio, 0);
683 if (ret < 0)
684 goto error_put;
685
686 netfs_read_to_pagecache(rreq, NULL);
687 ret = netfs_wait_for_read(rreq);
688 if (ret < 0)
689 goto error;
690 netfs_put_request(rreq, netfs_rreq_trace_put_return);
691
692 have_folio:
693 ret = folio_wait_private_2_killable(folio);
694 if (ret < 0)
695 goto error;
696 have_folio_no_wait:
697 *_folio = folio;
698 _leave(" = 0");
699 return 0;
700
701 error_put:
702 netfs_put_failed_request(rreq);
703 error:
704 if (folio) {
705 folio_unlock(folio);
706 folio_put(folio);
707 }
708 _leave(" = %d", ret);
709 return ret;
710 }
711 EXPORT_SYMBOL(netfs_write_begin);
712
713 /*
714 * Preload the data into a folio we're proposing to write into.
715 */
netfs_prefetch_for_write(struct file * file,struct folio * folio,size_t offset,size_t len)716 int netfs_prefetch_for_write(struct file *file, struct folio *folio,
717 size_t offset, size_t len)
718 {
719 struct netfs_io_request *rreq;
720 struct address_space *mapping = folio->mapping;
721 struct netfs_inode *ctx = netfs_inode(mapping->host);
722 unsigned long long start = folio_pos(folio);
723 size_t flen = folio_size(folio);
724 int ret;
725
726 _enter("%zx @%llx", flen, start);
727
728 ret = -ENOMEM;
729
730 rreq = netfs_alloc_request(mapping, file, start, flen,
731 NETFS_READ_FOR_WRITE);
732 if (IS_ERR(rreq)) {
733 ret = PTR_ERR(rreq);
734 goto error;
735 }
736
737 rreq->no_unlock_folio = folio->index;
738 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
739 ret = netfs_begin_cache_read(rreq, ctx);
740 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
741 goto error_put;
742
743 netfs_stat(&netfs_n_rh_write_begin);
744 trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write);
745
746 /* Set up the output buffer */
747 ret = netfs_create_singular_buffer(rreq, folio, NETFS_ROLLBUF_PAGECACHE_MARK);
748 if (ret < 0)
749 goto error_put;
750
751 netfs_read_to_pagecache(rreq, NULL);
752 ret = netfs_wait_for_read(rreq);
753 netfs_put_request(rreq, netfs_rreq_trace_put_return);
754 return ret < 0 ? ret : 0;
755
756 error_put:
757 netfs_put_failed_request(rreq);
758 error:
759 _leave(" = %d", ret);
760 return ret;
761 }
762
763 /**
764 * netfs_buffered_read_iter - Filesystem buffered I/O read routine
765 * @iocb: kernel I/O control block
766 * @iter: destination for the data read
767 *
768 * This is the ->read_iter() routine for all filesystems that can use the page
769 * cache directly.
770 *
771 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
772 * returned when no data can be read without waiting for I/O requests to
773 * complete; it doesn't prevent readahead.
774 *
775 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
776 * shall be made for the read or for readahead. When no data can be read,
777 * -EAGAIN shall be returned. When readahead would be triggered, a partial,
778 * possibly empty read shall be returned.
779 *
780 * Return:
781 * * number of bytes copied, even for partial reads
782 * * negative error code (or 0 if IOCB_NOIO) if nothing was read
783 */
netfs_buffered_read_iter(struct kiocb * iocb,struct iov_iter * iter)784 ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
785 {
786 struct inode *inode = file_inode(iocb->ki_filp);
787 struct netfs_inode *ictx = netfs_inode(inode);
788 ssize_t ret;
789
790 if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) ||
791 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)))
792 return -EINVAL;
793
794 ret = netfs_start_io_read(inode);
795 if (ret == 0) {
796 ret = filemap_read(iocb, iter, 0);
797 netfs_end_io_read(inode);
798 }
799 return ret;
800 }
801 EXPORT_SYMBOL(netfs_buffered_read_iter);
802
803 /**
804 * netfs_file_read_iter - Generic filesystem read routine
805 * @iocb: kernel I/O control block
806 * @iter: destination for the data read
807 *
808 * This is the ->read_iter() routine for all filesystems that can use the page
809 * cache directly.
810 *
811 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
812 * returned when no data can be read without waiting for I/O requests to
813 * complete; it doesn't prevent readahead.
814 *
815 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
816 * shall be made for the read or for readahead. When no data can be read,
817 * -EAGAIN shall be returned. When readahead would be triggered, a partial,
818 * possibly empty read shall be returned.
819 *
820 * Return:
821 * * number of bytes copied, even for partial reads
822 * * negative error code (or 0 if IOCB_NOIO) if nothing was read
823 */
netfs_file_read_iter(struct kiocb * iocb,struct iov_iter * iter)824 ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
825 {
826 struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host);
827
828 if ((iocb->ki_flags & IOCB_DIRECT) ||
829 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
830 return netfs_unbuffered_read_iter(iocb, iter);
831
832 return netfs_buffered_read_iter(iocb, iter);
833 }
834 EXPORT_SYMBOL(netfs_file_read_iter);
835