1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Direct I/O support.
3 *
4 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8 #include <linux/export.h>
9 #include <linux/fs.h>
10 #include <linux/mm.h>
11 #include <linux/pagemap.h>
12 #include <linux/slab.h>
13 #include <linux/uio.h>
14 #include <linux/sched/mm.h>
15 #include <linux/task_io_accounting_ops.h>
16 #include <linux/netfs.h>
17 #include "internal.h"
18
netfs_prepare_dio_read_iterator(struct netfs_io_subrequest * subreq)19 static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq)
20 {
21 struct netfs_io_request *rreq = subreq->rreq;
22 size_t rsize;
23
24 rsize = umin(subreq->len, rreq->io_streams[0].sreq_max_len);
25 subreq->len = rsize;
26
27 if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
28 size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize,
29 rreq->io_streams[0].sreq_max_segs);
30
31 if (limit < rsize) {
32 subreq->len = limit;
33 trace_netfs_sreq(subreq, netfs_sreq_trace_limited);
34 }
35 }
36
37 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
38
39 subreq->io_iter = rreq->iter;
40 iov_iter_truncate(&subreq->io_iter, subreq->len);
41 iov_iter_advance(&rreq->iter, subreq->len);
42 }
43
44 /*
45 * Perform a read to a buffer from the server, slicing up the region to be read
46 * according to the network rsize.
47 */
netfs_dispatch_unbuffered_reads(struct netfs_io_request * rreq)48 static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
49 {
50 unsigned long long start = rreq->start;
51 ssize_t size = rreq->len;
52 int ret = 0;
53
54 atomic_set(&rreq->nr_outstanding, 1);
55
56 do {
57 struct netfs_io_subrequest *subreq;
58 ssize_t slice;
59
60 subreq = netfs_alloc_subrequest(rreq);
61 if (!subreq) {
62 ret = -ENOMEM;
63 break;
64 }
65
66 subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
67 subreq->start = start;
68 subreq->len = size;
69
70 atomic_inc(&rreq->nr_outstanding);
71 spin_lock_bh(&rreq->lock);
72 list_add_tail(&subreq->rreq_link, &rreq->subrequests);
73 subreq->prev_donated = rreq->prev_donated;
74 rreq->prev_donated = 0;
75 trace_netfs_sreq(subreq, netfs_sreq_trace_added);
76 spin_unlock_bh(&rreq->lock);
77
78 netfs_stat(&netfs_n_rh_download);
79 if (rreq->netfs_ops->prepare_read) {
80 ret = rreq->netfs_ops->prepare_read(subreq);
81 if (ret < 0) {
82 atomic_dec(&rreq->nr_outstanding);
83 netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
84 break;
85 }
86 }
87
88 netfs_prepare_dio_read_iterator(subreq);
89 slice = subreq->len;
90 rreq->netfs_ops->issue_read(subreq);
91
92 size -= slice;
93 start += slice;
94 rreq->submitted += slice;
95
96 if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
97 test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
98 break;
99 cond_resched();
100 } while (size > 0);
101
102 if (atomic_dec_and_test(&rreq->nr_outstanding))
103 netfs_rreq_terminated(rreq, false);
104 return ret;
105 }
106
107 /*
108 * Perform a read to an application buffer, bypassing the pagecache and the
109 * local disk cache.
110 */
netfs_unbuffered_read(struct netfs_io_request * rreq,bool sync)111 static int netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)
112 {
113 int ret;
114
115 _enter("R=%x %llx-%llx",
116 rreq->debug_id, rreq->start, rreq->start + rreq->len - 1);
117
118 if (rreq->len == 0) {
119 pr_err("Zero-sized read [R=%x]\n", rreq->debug_id);
120 return -EIO;
121 }
122
123 // TODO: Use bounce buffer if requested
124
125 inode_dio_begin(rreq->inode);
126
127 ret = netfs_dispatch_unbuffered_reads(rreq);
128
129 if (!rreq->submitted) {
130 netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit);
131 inode_dio_end(rreq->inode);
132 ret = 0;
133 goto out;
134 }
135
136 if (sync) {
137 trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
138 wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS,
139 TASK_UNINTERRUPTIBLE);
140
141 ret = rreq->error;
142 if (ret == 0 && rreq->submitted < rreq->len &&
143 rreq->origin != NETFS_DIO_READ) {
144 trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
145 ret = -EIO;
146 }
147 } else {
148 ret = -EIOCBQUEUED;
149 }
150
151 out:
152 _leave(" = %d", ret);
153 return ret;
154 }
155
156 /**
157 * netfs_unbuffered_read_iter_locked - Perform an unbuffered or direct I/O read
158 * @iocb: The I/O control descriptor describing the read
159 * @iter: The output buffer (also specifies read length)
160 *
161 * Perform an unbuffered I/O or direct I/O from the file in @iocb to the
162 * output buffer. No use is made of the pagecache.
163 *
164 * The caller must hold any appropriate locks.
165 */
netfs_unbuffered_read_iter_locked(struct kiocb * iocb,struct iov_iter * iter)166 ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *iter)
167 {
168 struct netfs_io_request *rreq;
169 ssize_t ret;
170 size_t orig_count = iov_iter_count(iter);
171 bool sync = is_sync_kiocb(iocb);
172
173 _enter("");
174
175 if (!orig_count)
176 return 0; /* Don't update atime */
177
178 ret = kiocb_write_and_wait(iocb, orig_count);
179 if (ret < 0)
180 return ret;
181 file_accessed(iocb->ki_filp);
182
183 rreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp,
184 iocb->ki_pos, orig_count,
185 NETFS_DIO_READ);
186 if (IS_ERR(rreq))
187 return PTR_ERR(rreq);
188
189 netfs_stat(&netfs_n_rh_dio_read);
190 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_dio_read);
191
192 /* If this is an async op, we have to keep track of the destination
193 * buffer for ourselves as the caller's iterator will be trashed when
194 * we return.
195 *
196 * In such a case, extract an iterator to represent as much of the the
197 * output buffer as we can manage. Note that the extraction might not
198 * be able to allocate a sufficiently large bvec array and may shorten
199 * the request.
200 */
201 if (user_backed_iter(iter)) {
202 ret = netfs_extract_user_iter(iter, rreq->len, &rreq->iter, 0);
203 if (ret < 0)
204 goto out;
205 rreq->direct_bv = (struct bio_vec *)rreq->iter.bvec;
206 rreq->direct_bv_count = ret;
207 rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
208 rreq->len = iov_iter_count(&rreq->iter);
209 } else {
210 rreq->iter = *iter;
211 rreq->len = orig_count;
212 rreq->direct_bv_unpin = false;
213 iov_iter_advance(iter, orig_count);
214 }
215
216 // TODO: Set up bounce buffer if needed
217
218 if (!sync)
219 rreq->iocb = iocb;
220
221 ret = netfs_unbuffered_read(rreq, sync);
222 if (ret < 0)
223 goto out; /* May be -EIOCBQUEUED */
224 if (sync) {
225 // TODO: Copy from bounce buffer
226 iocb->ki_pos += rreq->transferred;
227 ret = rreq->transferred;
228 }
229
230 out:
231 netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
232 if (ret > 0)
233 orig_count -= ret;
234 return ret;
235 }
236 EXPORT_SYMBOL(netfs_unbuffered_read_iter_locked);
237
238 /**
239 * netfs_unbuffered_read_iter - Perform an unbuffered or direct I/O read
240 * @iocb: The I/O control descriptor describing the read
241 * @iter: The output buffer (also specifies read length)
242 *
243 * Perform an unbuffered I/O or direct I/O from the file in @iocb to the
244 * output buffer. No use is made of the pagecache.
245 */
netfs_unbuffered_read_iter(struct kiocb * iocb,struct iov_iter * iter)246 ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
247 {
248 struct inode *inode = file_inode(iocb->ki_filp);
249 ssize_t ret;
250
251 if (!iter->count)
252 return 0; /* Don't update atime */
253
254 ret = netfs_start_io_direct(inode);
255 if (ret == 0) {
256 ret = netfs_unbuffered_read_iter_locked(iocb, iter);
257 netfs_end_io_direct(inode);
258 }
259 return ret;
260 }
261 EXPORT_SYMBOL(netfs_unbuffered_read_iter);
262