1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Network filesystem high-level buffered read support.
3 *
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8 #include <linux/export.h>
9 #include <linux/task_io_accounting_ops.h>
10 #include "internal.h"
11
netfs_cache_expand_readahead(struct netfs_io_request * rreq,unsigned long long * _start,unsigned long long * _len,unsigned long long i_size)12 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
13 unsigned long long *_start,
14 unsigned long long *_len,
15 unsigned long long i_size)
16 {
17 struct netfs_cache_resources *cres = &rreq->cache_resources;
18
19 if (cres->ops && cres->ops->expand_readahead)
20 cres->ops->expand_readahead(cres, _start, _len, i_size);
21 }
22
netfs_rreq_expand(struct netfs_io_request * rreq,struct readahead_control * ractl)23 static void netfs_rreq_expand(struct netfs_io_request *rreq,
24 struct readahead_control *ractl)
25 {
26 /* Give the cache a chance to change the request parameters. The
27 * resultant request must contain the original region.
28 */
29 netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
30
31 /* Give the netfs a chance to change the request parameters. The
32 * resultant request must contain the original region.
33 */
34 if (rreq->netfs_ops->expand_readahead)
35 rreq->netfs_ops->expand_readahead(rreq);
36
37 /* Expand the request if the cache wants it to start earlier. Note
38 * that the expansion may get further extended if the VM wishes to
39 * insert THPs and the preferred start and/or end wind up in the middle
40 * of THPs.
41 *
42 * If this is the case, however, the THP size should be an integer
43 * multiple of the cache granule size, so we get a whole number of
44 * granules to deal with.
45 */
46 if (rreq->start != readahead_pos(ractl) ||
47 rreq->len != readahead_length(ractl)) {
48 readahead_expand(ractl, rreq->start, rreq->len);
49 rreq->start = readahead_pos(ractl);
50 rreq->len = readahead_length(ractl);
51
52 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
53 netfs_read_trace_expanded);
54 }
55 }
56
57 /*
58 * Begin an operation, and fetch the stored zero point value from the cookie if
59 * available.
60 */
netfs_begin_cache_read(struct netfs_io_request * rreq,struct netfs_inode * ctx)61 static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx)
62 {
63 return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
64 }
65
66 /*
67 * netfs_prepare_read_iterator - Prepare the subreq iterator for I/O
68 * @subreq: The subrequest to be set up
69 *
70 * Prepare the I/O iterator representing the read buffer on a subrequest for
71 * the filesystem to use for I/O (it can be passed directly to a socket). This
72 * is intended to be called from the ->issue_read() method once the filesystem
73 * has trimmed the request to the size it wants.
74 *
75 * Returns the limited size if successful and -ENOMEM if insufficient memory
76 * available.
77 *
78 * [!] NOTE: This must be run in the same thread as ->issue_read() was called
79 * in as we access the readahead_control struct.
80 */
netfs_prepare_read_iterator(struct netfs_io_subrequest * subreq,struct readahead_control * ractl)81 static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq,
82 struct readahead_control *ractl)
83 {
84 struct netfs_io_request *rreq = subreq->rreq;
85 size_t rsize = subreq->len;
86
87 if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER)
88 rsize = umin(rsize, rreq->io_streams[0].sreq_max_len);
89
90 if (ractl) {
91 /* If we don't have sufficient folios in the rolling buffer,
92 * extract a folioq's worth from the readahead region at a time
93 * into the buffer. Note that this acquires a ref on each page
94 * that we will need to release later - but we don't want to do
95 * that until after we've started the I/O.
96 */
97 struct folio_batch put_batch;
98
99 folio_batch_init(&put_batch);
100 while (rreq->submitted < subreq->start + rsize) {
101 ssize_t added;
102
103 added = rolling_buffer_load_from_ra(&rreq->buffer, ractl,
104 &put_batch);
105 if (added < 0)
106 return added;
107 rreq->submitted += added;
108 }
109 folio_batch_release(&put_batch);
110 }
111
112 subreq->len = rsize;
113 if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
114 size_t limit = netfs_limit_iter(&rreq->buffer.iter, 0, rsize,
115 rreq->io_streams[0].sreq_max_segs);
116
117 if (limit < rsize) {
118 subreq->len = limit;
119 trace_netfs_sreq(subreq, netfs_sreq_trace_limited);
120 }
121 }
122
123 subreq->io_iter = rreq->buffer.iter;
124
125 iov_iter_truncate(&subreq->io_iter, subreq->len);
126 rolling_buffer_advance(&rreq->buffer, subreq->len);
127 return subreq->len;
128 }
129
netfs_cache_prepare_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq,loff_t i_size)130 static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rreq,
131 struct netfs_io_subrequest *subreq,
132 loff_t i_size)
133 {
134 struct netfs_cache_resources *cres = &rreq->cache_resources;
135 enum netfs_io_source source;
136
137 if (!cres->ops)
138 return NETFS_DOWNLOAD_FROM_SERVER;
139 source = cres->ops->prepare_read(subreq, i_size);
140 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
141 return source;
142
143 }
144
145 /*
146 * Issue a read against the cache.
147 * - Eats the caller's ref on subreq.
148 */
netfs_read_cache_to_pagecache(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq)149 static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
150 struct netfs_io_subrequest *subreq)
151 {
152 struct netfs_cache_resources *cres = &rreq->cache_resources;
153
154 netfs_stat(&netfs_n_rh_read);
155 cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_IGNORE,
156 netfs_cache_read_terminated, subreq);
157 }
158
netfs_queue_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq,bool last_subreq)159 static void netfs_queue_read(struct netfs_io_request *rreq,
160 struct netfs_io_subrequest *subreq,
161 bool last_subreq)
162 {
163 struct netfs_io_stream *stream = &rreq->io_streams[0];
164
165 __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
166
167 /* We add to the end of the list whilst the collector may be walking
168 * the list. The collector only goes nextwards and uses the lock to
169 * remove entries off of the front.
170 */
171 spin_lock(&rreq->lock);
172 list_add_tail(&subreq->rreq_link, &stream->subrequests);
173 if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
174 if (!stream->active) {
175 stream->collected_to = subreq->start;
176 /* Store list pointers before active flag */
177 smp_store_release(&stream->active, true);
178 }
179 }
180
181 if (last_subreq) {
182 smp_wmb(); /* Write lists before ALL_QUEUED. */
183 set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
184 }
185
186 spin_unlock(&rreq->lock);
187 }
188
netfs_issue_read(struct netfs_io_request * rreq,struct netfs_io_subrequest * subreq)189 static void netfs_issue_read(struct netfs_io_request *rreq,
190 struct netfs_io_subrequest *subreq)
191 {
192 switch (subreq->source) {
193 case NETFS_DOWNLOAD_FROM_SERVER:
194 rreq->netfs_ops->issue_read(subreq);
195 break;
196 case NETFS_READ_FROM_CACHE:
197 netfs_read_cache_to_pagecache(rreq, subreq);
198 break;
199 default:
200 __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
201 subreq->error = 0;
202 iov_iter_zero(subreq->len, &subreq->io_iter);
203 subreq->transferred = subreq->len;
204 netfs_read_subreq_terminated(subreq);
205 break;
206 }
207 }
208
209 /*
210 * Perform a read to the pagecache from a series of sources of different types,
211 * slicing up the region to be read according to available cache blocks and
212 * network rsize.
213 */
netfs_read_to_pagecache(struct netfs_io_request * rreq,struct readahead_control * ractl)214 static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
215 struct readahead_control *ractl)
216 {
217 struct netfs_inode *ictx = netfs_inode(rreq->inode);
218 unsigned long long start = rreq->start;
219 ssize_t size = rreq->len;
220 int ret = 0;
221
222 do {
223 struct netfs_io_subrequest *subreq;
224 enum netfs_io_source source = NETFS_SOURCE_UNKNOWN;
225 ssize_t slice;
226
227 subreq = netfs_alloc_subrequest(rreq);
228 if (!subreq) {
229 ret = -ENOMEM;
230 break;
231 }
232
233 subreq->start = start;
234 subreq->len = size;
235
236 source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size);
237 subreq->source = source;
238 if (source == NETFS_DOWNLOAD_FROM_SERVER) {
239 unsigned long long zp = umin(ictx->zero_point, rreq->i_size);
240 size_t len = subreq->len;
241
242 if (unlikely(rreq->origin == NETFS_READ_SINGLE))
243 zp = rreq->i_size;
244 if (subreq->start >= zp) {
245 subreq->source = source = NETFS_FILL_WITH_ZEROES;
246 goto fill_with_zeroes;
247 }
248
249 if (len > zp - subreq->start)
250 len = zp - subreq->start;
251 if (len == 0) {
252 pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx",
253 rreq->debug_id, subreq->debug_index,
254 subreq->len, size,
255 subreq->start, ictx->zero_point, rreq->i_size);
256 break;
257 }
258 subreq->len = len;
259
260 netfs_stat(&netfs_n_rh_download);
261 if (rreq->netfs_ops->prepare_read) {
262 ret = rreq->netfs_ops->prepare_read(subreq);
263 if (ret < 0) {
264 subreq->error = ret;
265 /* Not queued - release both refs. */
266 netfs_put_subrequest(subreq,
267 netfs_sreq_trace_put_cancel);
268 netfs_put_subrequest(subreq,
269 netfs_sreq_trace_put_cancel);
270 break;
271 }
272 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
273 }
274 goto issue;
275 }
276
277 fill_with_zeroes:
278 if (source == NETFS_FILL_WITH_ZEROES) {
279 subreq->source = NETFS_FILL_WITH_ZEROES;
280 trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
281 netfs_stat(&netfs_n_rh_zero);
282 goto issue;
283 }
284
285 if (source == NETFS_READ_FROM_CACHE) {
286 trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
287 goto issue;
288 }
289
290 pr_err("Unexpected read source %u\n", source);
291 WARN_ON_ONCE(1);
292 break;
293
294 issue:
295 slice = netfs_prepare_read_iterator(subreq, ractl);
296 if (slice < 0) {
297 ret = slice;
298 subreq->error = ret;
299 trace_netfs_sreq(subreq, netfs_sreq_trace_cancel);
300 /* Not queued - release both refs. */
301 netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
302 netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
303 break;
304 }
305 size -= slice;
306 start += slice;
307
308 netfs_queue_read(rreq, subreq, size <= 0);
309 netfs_issue_read(rreq, subreq);
310 cond_resched();
311 } while (size > 0);
312
313 if (unlikely(size > 0)) {
314 smp_wmb(); /* Write lists before ALL_QUEUED. */
315 set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
316 netfs_wake_collector(rreq);
317 }
318
319 /* Defer error return as we may need to wait for outstanding I/O. */
320 cmpxchg(&rreq->error, 0, ret);
321 }
322
323 /**
324 * netfs_readahead - Helper to manage a read request
325 * @ractl: The description of the readahead request
326 *
327 * Fulfil a readahead request by drawing data from the cache if possible, or
328 * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O
329 * requests from different sources will get munged together. If necessary, the
330 * readahead window can be expanded in either direction to a more convenient
331 * alighment for RPC efficiency or to make storage in the cache feasible.
332 *
333 * The calling netfs must initialise a netfs context contiguous to the vfs
334 * inode before calling this.
335 *
336 * This is usable whether or not caching is enabled.
337 */
netfs_readahead(struct readahead_control * ractl)338 void netfs_readahead(struct readahead_control *ractl)
339 {
340 struct netfs_io_request *rreq;
341 struct netfs_inode *ictx = netfs_inode(ractl->mapping->host);
342 unsigned long long start = readahead_pos(ractl);
343 size_t size = readahead_length(ractl);
344 int ret;
345
346 rreq = netfs_alloc_request(ractl->mapping, ractl->file, start, size,
347 NETFS_READAHEAD);
348 if (IS_ERR(rreq))
349 return;
350
351 __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags);
352
353 ret = netfs_begin_cache_read(rreq, ictx);
354 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
355 goto cleanup_free;
356
357 netfs_stat(&netfs_n_rh_readahead);
358 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
359 netfs_read_trace_readahead);
360
361 netfs_rreq_expand(rreq, ractl);
362
363 rreq->submitted = rreq->start;
364 if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
365 goto cleanup_free;
366 netfs_read_to_pagecache(rreq, ractl);
367
368 return netfs_put_request(rreq, netfs_rreq_trace_put_return);
369
370 cleanup_free:
371 return netfs_put_failed_request(rreq);
372 }
373 EXPORT_SYMBOL(netfs_readahead);
374
375 /*
376 * Create a rolling buffer with a single occupying folio.
377 */
netfs_create_singular_buffer(struct netfs_io_request * rreq,struct folio * folio,unsigned int rollbuf_flags)378 static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio,
379 unsigned int rollbuf_flags)
380 {
381 ssize_t added;
382
383 if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
384 return -ENOMEM;
385
386 added = rolling_buffer_append(&rreq->buffer, folio, rollbuf_flags);
387 if (added < 0)
388 return added;
389 rreq->submitted = rreq->start + added;
390 return 0;
391 }
392
393 /*
394 * Read into gaps in a folio partially filled by a streaming write.
395 */
netfs_read_gaps(struct file * file,struct folio * folio)396 static int netfs_read_gaps(struct file *file, struct folio *folio)
397 {
398 struct netfs_io_request *rreq;
399 struct address_space *mapping = folio->mapping;
400 struct netfs_folio *finfo = netfs_folio_info(folio);
401 struct netfs_inode *ctx = netfs_inode(mapping->host);
402 struct folio *sink = NULL;
403 struct bio_vec *bvec;
404 unsigned int from = finfo->dirty_offset;
405 unsigned int to = from + finfo->dirty_len;
406 unsigned int off = 0, i = 0;
407 size_t flen = folio_size(folio);
408 size_t nr_bvec = flen / PAGE_SIZE + 2;
409 size_t part;
410 int ret;
411
412 _enter("%lx", folio->index);
413
414 rreq = netfs_alloc_request(mapping, file, folio_pos(folio), flen, NETFS_READ_GAPS);
415 if (IS_ERR(rreq)) {
416 ret = PTR_ERR(rreq);
417 goto alloc_error;
418 }
419
420 ret = netfs_begin_cache_read(rreq, ctx);
421 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
422 goto discard;
423
424 netfs_stat(&netfs_n_rh_read_folio);
425 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_read_gaps);
426
427 /* Fiddle the buffer so that a gap at the beginning and/or a gap at the
428 * end get copied to, but the middle is discarded.
429 */
430 ret = -ENOMEM;
431 bvec = kmalloc_objs(*bvec, nr_bvec);
432 if (!bvec)
433 goto discard;
434
435 sink = folio_alloc(GFP_KERNEL, 0);
436 if (!sink) {
437 kfree(bvec);
438 goto discard;
439 }
440
441 trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
442
443 rreq->direct_bv = bvec;
444 rreq->direct_bv_count = nr_bvec;
445 if (from > 0) {
446 bvec_set_folio(&bvec[i++], folio, from, 0);
447 off = from;
448 }
449 while (off < to) {
450 part = min_t(size_t, to - off, PAGE_SIZE);
451 bvec_set_folio(&bvec[i++], sink, part, 0);
452 off += part;
453 }
454 if (to < flen)
455 bvec_set_folio(&bvec[i++], folio, flen - to, to);
456 iov_iter_bvec(&rreq->buffer.iter, ITER_DEST, bvec, i, rreq->len);
457 rreq->submitted = rreq->start + flen;
458
459 netfs_read_to_pagecache(rreq, NULL);
460
461 if (sink)
462 folio_put(sink);
463
464 ret = netfs_wait_for_read(rreq);
465 if (ret >= 0) {
466 flush_dcache_folio(folio);
467 folio_mark_uptodate(folio);
468 }
469 folio_unlock(folio);
470 netfs_put_request(rreq, netfs_rreq_trace_put_return);
471 return ret < 0 ? ret : 0;
472
473 discard:
474 netfs_put_failed_request(rreq);
475 alloc_error:
476 folio_unlock(folio);
477 return ret;
478 }
479
480 /**
481 * netfs_read_folio - Helper to manage a read_folio request
482 * @file: The file to read from
483 * @folio: The folio to read
484 *
485 * Fulfil a read_folio request by drawing data from the cache if
486 * possible, or the netfs if not. Space beyond the EOF is zero-filled.
487 * Multiple I/O requests from different sources will get munged together.
488 *
489 * The calling netfs must initialise a netfs context contiguous to the vfs
490 * inode before calling this.
491 *
492 * This is usable whether or not caching is enabled.
493 */
netfs_read_folio(struct file * file,struct folio * folio)494 int netfs_read_folio(struct file *file, struct folio *folio)
495 {
496 struct address_space *mapping = folio->mapping;
497 struct netfs_io_request *rreq;
498 struct netfs_inode *ctx = netfs_inode(mapping->host);
499 int ret;
500
501 if (folio_test_dirty(folio)) {
502 trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
503 return netfs_read_gaps(file, folio);
504 }
505
506 _enter("%lx", folio->index);
507
508 rreq = netfs_alloc_request(mapping, file,
509 folio_pos(folio), folio_size(folio),
510 NETFS_READPAGE);
511 if (IS_ERR(rreq)) {
512 ret = PTR_ERR(rreq);
513 goto alloc_error;
514 }
515
516 ret = netfs_begin_cache_read(rreq, ctx);
517 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
518 goto discard;
519
520 netfs_stat(&netfs_n_rh_read_folio);
521 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
522
523 /* Set up the output buffer */
524 ret = netfs_create_singular_buffer(rreq, folio, 0);
525 if (ret < 0)
526 goto discard;
527
528 netfs_read_to_pagecache(rreq, NULL);
529 ret = netfs_wait_for_read(rreq);
530 netfs_put_request(rreq, netfs_rreq_trace_put_return);
531 return ret < 0 ? ret : 0;
532
533 discard:
534 netfs_put_failed_request(rreq);
535 alloc_error:
536 folio_unlock(folio);
537 return ret;
538 }
539 EXPORT_SYMBOL(netfs_read_folio);
540
541 /*
542 * Prepare a folio for writing without reading first
543 * @folio: The folio being prepared
544 * @pos: starting position for the write
545 * @len: length of write
546 * @always_fill: T if the folio should always be completely filled/cleared
547 *
548 * In some cases, write_begin doesn't need to read at all:
549 * - full folio write
550 * - write that lies in a folio that is completely beyond EOF
551 * - write that covers the folio from start to EOF or beyond it
552 *
553 * If any of these criteria are met, then zero out the unwritten parts
554 * of the folio and return true. Otherwise, return false.
555 */
netfs_skip_folio_read(struct folio * folio,loff_t pos,size_t len,bool always_fill)556 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
557 bool always_fill)
558 {
559 struct inode *inode = folio_inode(folio);
560 loff_t i_size = i_size_read(inode);
561 size_t offset = offset_in_folio(folio, pos);
562 size_t plen = folio_size(folio);
563
564 if (unlikely(always_fill)) {
565 if (pos - offset + len <= i_size)
566 return false; /* Page entirely before EOF */
567 folio_zero_segment(folio, 0, plen);
568 folio_mark_uptodate(folio);
569 return true;
570 }
571
572 /* Full folio write */
573 if (offset == 0 && len >= plen)
574 return true;
575
576 /* Page entirely beyond the end of the file */
577 if (pos - offset >= i_size)
578 goto zero_out;
579
580 /* Write that covers from the start of the folio to EOF or beyond */
581 if (offset == 0 && (pos + len) >= i_size)
582 goto zero_out;
583
584 return false;
585 zero_out:
586 folio_zero_segments(folio, 0, offset, offset + len, plen);
587 return true;
588 }
589
590 /**
591 * netfs_write_begin - Helper to prepare for writing [DEPRECATED]
592 * @ctx: The netfs context
593 * @file: The file to read from
594 * @mapping: The mapping to read from
595 * @pos: File position at which the write will begin
596 * @len: The length of the write (may extend beyond the end of the folio chosen)
597 * @_folio: Where to put the resultant folio
598 * @_fsdata: Place for the netfs to store a cookie
599 *
600 * Pre-read data for a write-begin request by drawing data from the cache if
601 * possible, or the netfs if not. Space beyond the EOF is zero-filled.
602 * Multiple I/O requests from different sources will get munged together.
603 *
604 * The calling netfs must provide a table of operations, only one of which,
605 * issue_read, is mandatory.
606 *
607 * The check_write_begin() operation can be provided to check for and flush
608 * conflicting writes once the folio is grabbed and locked. It is passed a
609 * pointer to the fsdata cookie that gets returned to the VM to be passed to
610 * write_end. It is permitted to sleep. It should return 0 if the request
611 * should go ahead or it may return an error. It may also unlock and put the
612 * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0
613 * will cause the folio to be re-got and the process to be retried.
614 *
615 * The calling netfs must initialise a netfs context contiguous to the vfs
616 * inode before calling this.
617 *
618 * This is usable whether or not caching is enabled.
619 *
620 * Note that this should be considered deprecated and netfs_perform_write()
621 * used instead.
622 */
netfs_write_begin(struct netfs_inode * ctx,struct file * file,struct address_space * mapping,loff_t pos,unsigned int len,struct folio ** _folio,void ** _fsdata)623 int netfs_write_begin(struct netfs_inode *ctx,
624 struct file *file, struct address_space *mapping,
625 loff_t pos, unsigned int len, struct folio **_folio,
626 void **_fsdata)
627 {
628 struct netfs_io_request *rreq;
629 struct folio *folio;
630 pgoff_t index = pos >> PAGE_SHIFT;
631 int ret;
632
633 retry:
634 folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
635 mapping_gfp_mask(mapping));
636 if (IS_ERR(folio))
637 return PTR_ERR(folio);
638
639 if (ctx->ops->check_write_begin) {
640 /* Allow the netfs (eg. ceph) to flush conflicts. */
641 ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata);
642 if (ret < 0) {
643 trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
644 goto error;
645 }
646 if (!folio)
647 goto retry;
648 }
649
650 if (folio_test_uptodate(folio))
651 goto have_folio;
652
653 /* If the folio is beyond the EOF, we want to clear it - unless it's
654 * within the cache granule containing the EOF, in which case we need
655 * to preload the granule.
656 */
657 if (!netfs_is_cache_enabled(ctx) &&
658 netfs_skip_folio_read(folio, pos, len, false)) {
659 netfs_stat(&netfs_n_rh_write_zskip);
660 goto have_folio_no_wait;
661 }
662
663 rreq = netfs_alloc_request(mapping, file,
664 folio_pos(folio), folio_size(folio),
665 NETFS_READ_FOR_WRITE);
666 if (IS_ERR(rreq)) {
667 ret = PTR_ERR(rreq);
668 goto error;
669 }
670 rreq->no_unlock_folio = folio->index;
671 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
672
673 ret = netfs_begin_cache_read(rreq, ctx);
674 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
675 goto error_put;
676
677 netfs_stat(&netfs_n_rh_write_begin);
678 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
679
680 /* Set up the output buffer */
681 ret = netfs_create_singular_buffer(rreq, folio, 0);
682 if (ret < 0)
683 goto error_put;
684
685 netfs_read_to_pagecache(rreq, NULL);
686 ret = netfs_wait_for_read(rreq);
687 if (ret < 0)
688 goto error;
689 netfs_put_request(rreq, netfs_rreq_trace_put_return);
690
691 have_folio:
692 ret = folio_wait_private_2_killable(folio);
693 if (ret < 0)
694 goto error;
695 have_folio_no_wait:
696 *_folio = folio;
697 _leave(" = 0");
698 return 0;
699
700 error_put:
701 netfs_put_failed_request(rreq);
702 error:
703 if (folio) {
704 folio_unlock(folio);
705 folio_put(folio);
706 }
707 _leave(" = %d", ret);
708 return ret;
709 }
710 EXPORT_SYMBOL(netfs_write_begin);
711
712 /*
713 * Preload the data into a folio we're proposing to write into.
714 */
netfs_prefetch_for_write(struct file * file,struct folio * folio,size_t offset,size_t len)715 int netfs_prefetch_for_write(struct file *file, struct folio *folio,
716 size_t offset, size_t len)
717 {
718 struct netfs_io_request *rreq;
719 struct address_space *mapping = folio->mapping;
720 struct netfs_inode *ctx = netfs_inode(mapping->host);
721 unsigned long long start = folio_pos(folio);
722 size_t flen = folio_size(folio);
723 int ret;
724
725 _enter("%zx @%llx", flen, start);
726
727 ret = -ENOMEM;
728
729 rreq = netfs_alloc_request(mapping, file, start, flen,
730 NETFS_READ_FOR_WRITE);
731 if (IS_ERR(rreq)) {
732 ret = PTR_ERR(rreq);
733 goto error;
734 }
735
736 rreq->no_unlock_folio = folio->index;
737 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
738 ret = netfs_begin_cache_read(rreq, ctx);
739 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
740 goto error_put;
741
742 netfs_stat(&netfs_n_rh_write_begin);
743 trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write);
744
745 /* Set up the output buffer */
746 ret = netfs_create_singular_buffer(rreq, folio, NETFS_ROLLBUF_PAGECACHE_MARK);
747 if (ret < 0)
748 goto error_put;
749
750 netfs_read_to_pagecache(rreq, NULL);
751 ret = netfs_wait_for_read(rreq);
752 netfs_put_request(rreq, netfs_rreq_trace_put_return);
753 return ret < 0 ? ret : 0;
754
755 error_put:
756 netfs_put_failed_request(rreq);
757 error:
758 _leave(" = %d", ret);
759 return ret;
760 }
761
762 /**
763 * netfs_buffered_read_iter - Filesystem buffered I/O read routine
764 * @iocb: kernel I/O control block
765 * @iter: destination for the data read
766 *
767 * This is the ->read_iter() routine for all filesystems that can use the page
768 * cache directly.
769 *
770 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
771 * returned when no data can be read without waiting for I/O requests to
772 * complete; it doesn't prevent readahead.
773 *
774 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
775 * shall be made for the read or for readahead. When no data can be read,
776 * -EAGAIN shall be returned. When readahead would be triggered, a partial,
777 * possibly empty read shall be returned.
778 *
779 * Return:
780 * * number of bytes copied, even for partial reads
781 * * negative error code (or 0 if IOCB_NOIO) if nothing was read
782 */
netfs_buffered_read_iter(struct kiocb * iocb,struct iov_iter * iter)783 ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
784 {
785 struct inode *inode = file_inode(iocb->ki_filp);
786 struct netfs_inode *ictx = netfs_inode(inode);
787 ssize_t ret;
788
789 if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) ||
790 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)))
791 return -EINVAL;
792
793 ret = netfs_start_io_read(inode);
794 if (ret == 0) {
795 ret = filemap_read(iocb, iter, 0);
796 netfs_end_io_read(inode);
797 }
798 return ret;
799 }
800 EXPORT_SYMBOL(netfs_buffered_read_iter);
801
802 /**
803 * netfs_file_read_iter - Generic filesystem read routine
804 * @iocb: kernel I/O control block
805 * @iter: destination for the data read
806 *
807 * This is the ->read_iter() routine for all filesystems that can use the page
808 * cache directly.
809 *
810 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
811 * returned when no data can be read without waiting for I/O requests to
812 * complete; it doesn't prevent readahead.
813 *
814 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
815 * shall be made for the read or for readahead. When no data can be read,
816 * -EAGAIN shall be returned. When readahead would be triggered, a partial,
817 * possibly empty read shall be returned.
818 *
819 * Return:
820 * * number of bytes copied, even for partial reads
821 * * negative error code (or 0 if IOCB_NOIO) if nothing was read
822 */
netfs_file_read_iter(struct kiocb * iocb,struct iov_iter * iter)823 ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
824 {
825 struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host);
826
827 if ((iocb->ki_flags & IOCB_DIRECT) ||
828 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
829 return netfs_unbuffered_read_iter(iocb, iter);
830
831 return netfs_buffered_read_iter(iocb, iter);
832 }
833 EXPORT_SYMBOL(netfs_file_read_iter);
834