1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /* I/O iterator iteration building functions.
3 *
4 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8 #ifndef _LINUX_IOV_ITER_H
9 #define _LINUX_IOV_ITER_H
10
11 #include <linux/uio.h>
12 #include <linux/bvec.h>
13 #include <linux/folio_queue.h>
14
15 typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len,
16 void *priv, void *priv2);
17 typedef size_t (*iov_ustep_f)(void __user *iter_base, size_t progress, size_t len,
18 void *priv, void *priv2);
19
20 /*
21 * Handle ITER_UBUF.
22 */
23 static __always_inline
iterate_ubuf(struct iov_iter * iter,size_t len,void * priv,void * priv2,iov_ustep_f step)24 size_t iterate_ubuf(struct iov_iter *iter, size_t len, void *priv, void *priv2,
25 iov_ustep_f step)
26 {
27 void __user *base = iter->ubuf;
28 size_t progress = 0, remain;
29
30 remain = step(base + iter->iov_offset, 0, len, priv, priv2);
31 progress = len - remain;
32 iter->iov_offset += progress;
33 iter->count -= progress;
34 return progress;
35 }
36
37 /*
38 * Handle ITER_IOVEC.
39 */
40 static __always_inline
iterate_iovec(struct iov_iter * iter,size_t len,void * priv,void * priv2,iov_ustep_f step)41 size_t iterate_iovec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
42 iov_ustep_f step)
43 {
44 const struct iovec *p = iter->__iov;
45 size_t progress = 0, skip = iter->iov_offset;
46
47 do {
48 size_t remain, consumed;
49 size_t part = min(len, p->iov_len - skip);
50
51 if (likely(part)) {
52 remain = step(p->iov_base + skip, progress, part, priv, priv2);
53 consumed = part - remain;
54 progress += consumed;
55 skip += consumed;
56 len -= consumed;
57 if (skip < p->iov_len)
58 break;
59 }
60 p++;
61 skip = 0;
62 } while (len);
63
64 iter->nr_segs -= p - iter->__iov;
65 iter->__iov = p;
66 iter->iov_offset = skip;
67 iter->count -= progress;
68 return progress;
69 }
70
71 /*
72 * Handle ITER_KVEC.
73 */
74 static __always_inline
iterate_kvec(struct iov_iter * iter,size_t len,void * priv,void * priv2,iov_step_f step)75 size_t iterate_kvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
76 iov_step_f step)
77 {
78 const struct kvec *p = iter->kvec;
79 size_t progress = 0, skip = iter->iov_offset;
80
81 do {
82 size_t remain, consumed;
83 size_t part = min(len, p->iov_len - skip);
84
85 if (likely(part)) {
86 remain = step(p->iov_base + skip, progress, part, priv, priv2);
87 consumed = part - remain;
88 progress += consumed;
89 skip += consumed;
90 len -= consumed;
91 if (skip < p->iov_len)
92 break;
93 }
94 p++;
95 skip = 0;
96 } while (len);
97
98 iter->nr_segs -= p - iter->kvec;
99 iter->kvec = p;
100 iter->iov_offset = skip;
101 iter->count -= progress;
102 return progress;
103 }
104
105 /*
106 * Handle ITER_BVEC.
107 */
108 static __always_inline
iterate_bvec(struct iov_iter * iter,size_t len,void * priv,void * priv2,iov_step_f step)109 size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
110 iov_step_f step)
111 {
112 const struct bio_vec *p = iter->bvec;
113 size_t progress = 0, skip = iter->iov_offset;
114
115 do {
116 size_t remain, consumed;
117 size_t offset = p->bv_offset + skip, part;
118 void *kaddr = kmap_local_page(p->bv_page + offset / PAGE_SIZE);
119
120 part = min3(len,
121 (size_t)(p->bv_len - skip),
122 (size_t)(PAGE_SIZE - offset % PAGE_SIZE));
123 remain = step(kaddr + offset % PAGE_SIZE, progress, part, priv, priv2);
124 kunmap_local(kaddr);
125 consumed = part - remain;
126 len -= consumed;
127 progress += consumed;
128 skip += consumed;
129 if (skip >= p->bv_len) {
130 skip = 0;
131 p++;
132 }
133 if (remain)
134 break;
135 } while (len);
136
137 iter->nr_segs -= p - iter->bvec;
138 iter->bvec = p;
139 iter->iov_offset = skip;
140 iter->count -= progress;
141 return progress;
142 }
143
144 /*
145 * Handle ITER_FOLIOQ.
146 */
147 static __always_inline
iterate_folioq(struct iov_iter * iter,size_t len,void * priv,void * priv2,iov_step_f step)148 size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2,
149 iov_step_f step)
150 {
151 const struct folio_queue *folioq = iter->folioq;
152 unsigned int slot = iter->folioq_slot;
153 size_t progress = 0, skip = iter->iov_offset;
154
155 if (slot == folioq_nr_slots(folioq)) {
156 /* The iterator may have been extended. */
157 folioq = folioq->next;
158 slot = 0;
159 }
160
161 do {
162 struct folio *folio = folioq_folio(folioq, slot);
163 size_t part, remain = 0, consumed;
164 size_t fsize;
165 void *base;
166
167 if (!folio)
168 break;
169
170 fsize = folioq_folio_size(folioq, slot);
171 if (skip < fsize) {
172 base = kmap_local_folio(folio, skip);
173 part = umin(len, PAGE_SIZE - skip % PAGE_SIZE);
174 remain = step(base, progress, part, priv, priv2);
175 kunmap_local(base);
176 consumed = part - remain;
177 len -= consumed;
178 progress += consumed;
179 skip += consumed;
180 }
181 if (skip >= fsize) {
182 skip = 0;
183 slot++;
184 if (slot == folioq_nr_slots(folioq) && folioq->next) {
185 folioq = folioq->next;
186 slot = 0;
187 }
188 }
189 if (remain)
190 break;
191 } while (len);
192
193 iter->folioq_slot = slot;
194 iter->folioq = folioq;
195 iter->iov_offset = skip;
196 iter->count -= progress;
197 return progress;
198 }
199
200 /*
201 * Handle ITER_XARRAY.
202 */
203 static __always_inline
iterate_xarray(struct iov_iter * iter,size_t len,void * priv,void * priv2,iov_step_f step)204 size_t iterate_xarray(struct iov_iter *iter, size_t len, void *priv, void *priv2,
205 iov_step_f step)
206 {
207 struct folio *folio;
208 size_t progress = 0;
209 loff_t start = iter->xarray_start + iter->iov_offset;
210 pgoff_t index = start / PAGE_SIZE;
211 XA_STATE(xas, iter->xarray, index);
212
213 rcu_read_lock();
214 xas_for_each(&xas, folio, ULONG_MAX) {
215 size_t remain, consumed, offset, part, flen;
216
217 if (xas_retry(&xas, folio))
218 continue;
219 if (WARN_ON(xa_is_value(folio)))
220 break;
221 if (WARN_ON(folio_test_hugetlb(folio)))
222 break;
223
224 offset = offset_in_folio(folio, start + progress);
225 flen = min(folio_size(folio) - offset, len);
226
227 while (flen) {
228 void *base = kmap_local_folio(folio, offset);
229
230 part = min_t(size_t, flen,
231 PAGE_SIZE - offset_in_page(offset));
232 remain = step(base, progress, part, priv, priv2);
233 kunmap_local(base);
234
235 consumed = part - remain;
236 progress += consumed;
237 len -= consumed;
238
239 if (remain || len == 0)
240 goto out;
241 flen -= consumed;
242 offset += consumed;
243 }
244 }
245
246 out:
247 rcu_read_unlock();
248 iter->iov_offset += progress;
249 iter->count -= progress;
250 return progress;
251 }
252
253 /*
254 * Handle ITER_DISCARD.
255 */
256 static __always_inline
iterate_discard(struct iov_iter * iter,size_t len,void * priv,void * priv2,iov_step_f step)257 size_t iterate_discard(struct iov_iter *iter, size_t len, void *priv, void *priv2,
258 iov_step_f step)
259 {
260 size_t progress = len;
261
262 iter->count -= progress;
263 return progress;
264 }
265
266 /**
267 * iterate_and_advance2 - Iterate over an iterator
268 * @iter: The iterator to iterate over.
269 * @len: The amount to iterate over.
270 * @priv: Data for the step functions.
271 * @priv2: More data for the step functions.
272 * @ustep: Function for UBUF/IOVEC iterators; given __user addresses.
273 * @step: Function for other iterators; given kernel addresses.
274 *
275 * Iterate over the next part of an iterator, up to the specified length. The
276 * buffer is presented in segments, which for kernel iteration are broken up by
277 * physical pages and mapped, with the mapped address being presented.
278 *
279 * Two step functions, @step and @ustep, must be provided, one for handling
280 * mapped kernel addresses and the other is given user addresses which have the
281 * potential to fault since no pinning is performed.
282 *
283 * The step functions are passed the address and length of the segment, @priv,
284 * @priv2 and the amount of data so far iterated over (which can, for example,
285 * be added to @priv to point to the right part of a second buffer). The step
286 * functions should return the amount of the segment they didn't process (ie. 0
287 * indicates complete processsing).
288 *
289 * This function returns the amount of data processed (ie. 0 means nothing was
290 * processed and the value of @len means processes to completion).
291 */
292 static __always_inline
iterate_and_advance2(struct iov_iter * iter,size_t len,void * priv,void * priv2,iov_ustep_f ustep,iov_step_f step)293 size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv,
294 void *priv2, iov_ustep_f ustep, iov_step_f step)
295 {
296 if (unlikely(iter->count < len))
297 len = iter->count;
298 if (unlikely(!len))
299 return 0;
300
301 if (likely(iter_is_ubuf(iter)))
302 return iterate_ubuf(iter, len, priv, priv2, ustep);
303 if (likely(iter_is_iovec(iter)))
304 return iterate_iovec(iter, len, priv, priv2, ustep);
305 if (iov_iter_is_bvec(iter))
306 return iterate_bvec(iter, len, priv, priv2, step);
307 if (iov_iter_is_kvec(iter))
308 return iterate_kvec(iter, len, priv, priv2, step);
309 if (iov_iter_is_folioq(iter))
310 return iterate_folioq(iter, len, priv, priv2, step);
311 if (iov_iter_is_xarray(iter))
312 return iterate_xarray(iter, len, priv, priv2, step);
313 return iterate_discard(iter, len, priv, priv2, step);
314 }
315
316 /**
317 * iterate_and_advance - Iterate over an iterator
318 * @iter: The iterator to iterate over.
319 * @len: The amount to iterate over.
320 * @priv: Data for the step functions.
321 * @ustep: Function for UBUF/IOVEC iterators; given __user addresses.
322 * @step: Function for other iterators; given kernel addresses.
323 *
324 * As iterate_and_advance2(), but priv2 is always NULL.
325 */
326 static __always_inline
iterate_and_advance(struct iov_iter * iter,size_t len,void * priv,iov_ustep_f ustep,iov_step_f step)327 size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv,
328 iov_ustep_f ustep, iov_step_f step)
329 {
330 return iterate_and_advance2(iter, len, priv, NULL, ustep, step);
331 }
332
333 /**
334 * iterate_and_advance_kernel - Iterate over a kernel-internal iterator
335 * @iter: The iterator to iterate over.
336 * @len: The amount to iterate over.
337 * @priv: Data for the step functions.
338 * @priv2: More data for the step functions.
339 * @step: Function for other iterators; given kernel addresses.
340 *
341 * Iterate over the next part of an iterator, up to the specified length. The
342 * buffer is presented in segments, which for kernel iteration are broken up by
343 * physical pages and mapped, with the mapped address being presented.
344 *
345 * [!] Note This will only handle BVEC, KVEC, FOLIOQ, XARRAY and DISCARD-type
346 * iterators; it will not handle UBUF or IOVEC-type iterators.
347 *
348 * A step functions, @step, must be provided, one for handling mapped kernel
349 * addresses and the other is given user addresses which have the potential to
350 * fault since no pinning is performed.
351 *
352 * The step functions are passed the address and length of the segment, @priv,
353 * @priv2 and the amount of data so far iterated over (which can, for example,
354 * be added to @priv to point to the right part of a second buffer). The step
355 * functions should return the amount of the segment they didn't process (ie. 0
356 * indicates complete processsing).
357 *
358 * This function returns the amount of data processed (ie. 0 means nothing was
359 * processed and the value of @len means processes to completion).
360 */
361 static __always_inline
iterate_and_advance_kernel(struct iov_iter * iter,size_t len,void * priv,void * priv2,iov_step_f step)362 size_t iterate_and_advance_kernel(struct iov_iter *iter, size_t len, void *priv,
363 void *priv2, iov_step_f step)
364 {
365 if (unlikely(iter->count < len))
366 len = iter->count;
367 if (unlikely(!len))
368 return 0;
369 if (iov_iter_is_bvec(iter))
370 return iterate_bvec(iter, len, priv, priv2, step);
371 if (iov_iter_is_kvec(iter))
372 return iterate_kvec(iter, len, priv, priv2, step);
373 if (iov_iter_is_folioq(iter))
374 return iterate_folioq(iter, len, priv, priv2, step);
375 if (iov_iter_is_xarray(iter))
376 return iterate_xarray(iter, len, priv, priv2, step);
377 return iterate_discard(iter, len, priv, priv2, step);
378 }
379
380 #endif /* _LINUX_IOV_ITER_H */
381