xref: /linux/include/linux/iov_iter.h (revision 6eba757ce90483b76da4e7eda962d8b8b8930f2c)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /* I/O iterator iteration building functions.
3  *
4  * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #ifndef _LINUX_IOV_ITER_H
9 #define _LINUX_IOV_ITER_H
10 
11 #include <linux/uio.h>
12 #include <linux/bvec.h>
13 #include <linux/folio_queue.h>
14 
15 typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len,
16 			     void *priv, void *priv2);
17 typedef size_t (*iov_ustep_f)(void __user *iter_base, size_t progress, size_t len,
18 			      void *priv, void *priv2);
19 
20 /*
21  * Handle ITER_UBUF.
22  */
23 static __always_inline
24 size_t iterate_ubuf(struct iov_iter *iter, size_t len, void *priv, void *priv2,
25 		    iov_ustep_f step)
26 {
27 	void __user *base = iter->ubuf;
28 	size_t progress = 0, remain;
29 
30 	remain = step(base + iter->iov_offset, 0, len, priv, priv2);
31 	progress = len - remain;
32 	iter->iov_offset += progress;
33 	iter->count -= progress;
34 	return progress;
35 }
36 
37 /*
38  * Handle ITER_IOVEC.
39  */
40 static __always_inline
41 size_t iterate_iovec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
42 		     iov_ustep_f step)
43 {
44 	const struct iovec *p = iter->__iov;
45 	size_t progress = 0, skip = iter->iov_offset;
46 
47 	do {
48 		size_t remain, consumed;
49 		size_t part = min(len, p->iov_len - skip);
50 
51 		if (likely(part)) {
52 			remain = step(p->iov_base + skip, progress, part, priv, priv2);
53 			consumed = part - remain;
54 			progress += consumed;
55 			skip += consumed;
56 			len -= consumed;
57 			if (skip < p->iov_len)
58 				break;
59 		}
60 		p++;
61 		skip = 0;
62 	} while (len);
63 
64 	iter->nr_segs -= p - iter->__iov;
65 	iter->__iov = p;
66 	iter->iov_offset = skip;
67 	iter->count -= progress;
68 	return progress;
69 }
70 
71 /*
72  * Handle ITER_KVEC.
73  */
74 static __always_inline
75 size_t iterate_kvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
76 		    iov_step_f step)
77 {
78 	const struct kvec *p = iter->kvec;
79 	size_t progress = 0, skip = iter->iov_offset;
80 
81 	do {
82 		size_t remain, consumed;
83 		size_t part = min(len, p->iov_len - skip);
84 
85 		if (likely(part)) {
86 			remain = step(p->iov_base + skip, progress, part, priv, priv2);
87 			consumed = part - remain;
88 			progress += consumed;
89 			skip += consumed;
90 			len -= consumed;
91 			if (skip < p->iov_len)
92 				break;
93 		}
94 		p++;
95 		skip = 0;
96 	} while (len);
97 
98 	iter->nr_segs -= p - iter->kvec;
99 	iter->kvec = p;
100 	iter->iov_offset = skip;
101 	iter->count -= progress;
102 	return progress;
103 }
104 
105 /*
106  * Handle ITER_BVEC.
107  */
108 static __always_inline
109 size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
110 		    iov_step_f step)
111 {
112 	const struct bio_vec *p = iter->bvec;
113 	size_t progress = 0, skip = iter->iov_offset;
114 
115 	do {
116 		size_t remain, consumed;
117 		size_t offset = p->bv_offset + skip, part;
118 		void *kaddr = kmap_local_page(p->bv_page + offset / PAGE_SIZE);
119 
120 		part = min3(len,
121 			   (size_t)(p->bv_len - skip),
122 			   (size_t)(PAGE_SIZE - offset % PAGE_SIZE));
123 		remain = step(kaddr + offset % PAGE_SIZE, progress, part, priv, priv2);
124 		kunmap_local(kaddr);
125 		consumed = part - remain;
126 		len -= consumed;
127 		progress += consumed;
128 		skip += consumed;
129 		if (skip >= p->bv_len) {
130 			skip = 0;
131 			p++;
132 		}
133 		if (remain)
134 			break;
135 	} while (len);
136 
137 	iter->nr_segs -= p - iter->bvec;
138 	iter->bvec = p;
139 	iter->iov_offset = skip;
140 	iter->count -= progress;
141 	return progress;
142 }
143 
144 /*
145  * Handle ITER_FOLIOQ.
146  */
147 static __always_inline
148 size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2,
149 		      iov_step_f step)
150 {
151 	const struct folio_queue *folioq = iter->folioq;
152 	unsigned int slot = iter->folioq_slot;
153 	size_t progress = 0, skip = iter->iov_offset;
154 
155 	if (slot == folioq_nr_slots(folioq)) {
156 		/* The iterator may have been extended. */
157 		folioq = folioq->next;
158 		slot = 0;
159 	}
160 
161 	do {
162 		struct folio *folio = folioq_folio(folioq, slot);
163 		size_t part, remain = 0, consumed;
164 		size_t fsize;
165 		void *base;
166 
167 		if (!folio)
168 			break;
169 
170 		fsize = folioq_folio_size(folioq, slot);
171 		if (skip < fsize) {
172 			base = kmap_local_folio(folio, skip);
173 			part = umin(len, PAGE_SIZE - skip % PAGE_SIZE);
174 			remain = step(base, progress, part, priv, priv2);
175 			kunmap_local(base);
176 			consumed = part - remain;
177 			len -= consumed;
178 			progress += consumed;
179 			skip += consumed;
180 		}
181 		if (skip >= fsize) {
182 			skip = 0;
183 			slot++;
184 			if (slot == folioq_nr_slots(folioq) && folioq->next) {
185 				folioq = folioq->next;
186 				slot = 0;
187 			}
188 		}
189 		if (remain)
190 			break;
191 	} while (len);
192 
193 	iter->folioq_slot = slot;
194 	iter->folioq = folioq;
195 	iter->iov_offset = skip;
196 	iter->count -= progress;
197 	return progress;
198 }
199 
200 /*
201  * Handle ITER_XARRAY.
202  */
203 static __always_inline
204 size_t iterate_xarray(struct iov_iter *iter, size_t len, void *priv, void *priv2,
205 		      iov_step_f step)
206 {
207 	struct folio *folio;
208 	size_t progress = 0;
209 	loff_t start = iter->xarray_start + iter->iov_offset;
210 	pgoff_t index = start / PAGE_SIZE;
211 	XA_STATE(xas, iter->xarray, index);
212 
213 	rcu_read_lock();
214 	xas_for_each(&xas, folio, ULONG_MAX) {
215 		size_t remain, consumed, offset, part, flen;
216 
217 		if (xas_retry(&xas, folio))
218 			continue;
219 		if (WARN_ON(xa_is_value(folio)))
220 			break;
221 		if (WARN_ON(folio_test_hugetlb(folio)))
222 			break;
223 
224 		offset = offset_in_folio(folio, start + progress);
225 		flen = min(folio_size(folio) - offset, len);
226 
227 		while (flen) {
228 			void *base = kmap_local_folio(folio, offset);
229 
230 			part = min_t(size_t, flen,
231 				     PAGE_SIZE - offset_in_page(offset));
232 			remain = step(base, progress, part, priv, priv2);
233 			kunmap_local(base);
234 
235 			consumed = part - remain;
236 			progress += consumed;
237 			len -= consumed;
238 
239 			if (remain || len == 0)
240 				goto out;
241 			flen -= consumed;
242 			offset += consumed;
243 		}
244 	}
245 
246 out:
247 	rcu_read_unlock();
248 	iter->iov_offset += progress;
249 	iter->count -= progress;
250 	return progress;
251 }
252 
253 /*
254  * Handle ITER_DISCARD.
255  */
256 static __always_inline
257 size_t iterate_discard(struct iov_iter *iter, size_t len, void *priv, void *priv2,
258 		      iov_step_f step)
259 {
260 	size_t progress = len;
261 
262 	iter->count -= progress;
263 	return progress;
264 }
265 
266 /**
267  * iterate_and_advance2 - Iterate over an iterator
268  * @iter: The iterator to iterate over.
269  * @len: The amount to iterate over.
270  * @priv: Data for the step functions.
271  * @priv2: More data for the step functions.
272  * @ustep: Function for UBUF/IOVEC iterators; given __user addresses.
273  * @step: Function for other iterators; given kernel addresses.
274  *
275  * Iterate over the next part of an iterator, up to the specified length.  The
276  * buffer is presented in segments, which for kernel iteration are broken up by
277  * physical pages and mapped, with the mapped address being presented.
278  *
279  * Two step functions, @step and @ustep, must be provided, one for handling
280  * mapped kernel addresses and the other is given user addresses which have the
281  * potential to fault since no pinning is performed.
282  *
283  * The step functions are passed the address and length of the segment, @priv,
284  * @priv2 and the amount of data so far iterated over (which can, for example,
285  * be added to @priv to point to the right part of a second buffer).  The step
286  * functions should return the amount of the segment they didn't process (ie. 0
287  * indicates complete processsing).
288  *
289  * This function returns the amount of data processed (ie. 0 means nothing was
290  * processed and the value of @len means processes to completion).
291  */
292 static __always_inline
293 size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv,
294 			    void *priv2, iov_ustep_f ustep, iov_step_f step)
295 {
296 	if (unlikely(iter->count < len))
297 		len = iter->count;
298 	if (unlikely(!len))
299 		return 0;
300 
301 	if (likely(iter_is_ubuf(iter)))
302 		return iterate_ubuf(iter, len, priv, priv2, ustep);
303 	if (likely(iter_is_iovec(iter)))
304 		return iterate_iovec(iter, len, priv, priv2, ustep);
305 	if (iov_iter_is_bvec(iter))
306 		return iterate_bvec(iter, len, priv, priv2, step);
307 	if (iov_iter_is_kvec(iter))
308 		return iterate_kvec(iter, len, priv, priv2, step);
309 	if (iov_iter_is_folioq(iter))
310 		return iterate_folioq(iter, len, priv, priv2, step);
311 	if (iov_iter_is_xarray(iter))
312 		return iterate_xarray(iter, len, priv, priv2, step);
313 	return iterate_discard(iter, len, priv, priv2, step);
314 }
315 
316 /**
317  * iterate_and_advance - Iterate over an iterator
318  * @iter: The iterator to iterate over.
319  * @len: The amount to iterate over.
320  * @priv: Data for the step functions.
321  * @ustep: Function for UBUF/IOVEC iterators; given __user addresses.
322  * @step: Function for other iterators; given kernel addresses.
323  *
324  * As iterate_and_advance2(), but priv2 is always NULL.
325  */
326 static __always_inline
327 size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv,
328 			   iov_ustep_f ustep, iov_step_f step)
329 {
330 	return iterate_and_advance2(iter, len, priv, NULL, ustep, step);
331 }
332 
333 /**
334  * iterate_and_advance_kernel - Iterate over a kernel-internal iterator
335  * @iter: The iterator to iterate over.
336  * @len: The amount to iterate over.
337  * @priv: Data for the step functions.
338  * @priv2: More data for the step functions.
339  * @step: Function for other iterators; given kernel addresses.
340  *
341  * Iterate over the next part of an iterator, up to the specified length.  The
342  * buffer is presented in segments, which for kernel iteration are broken up by
343  * physical pages and mapped, with the mapped address being presented.
344  *
345  * [!] Note This will only handle BVEC, KVEC, FOLIOQ, XARRAY and DISCARD-type
346  * iterators; it will not handle UBUF or IOVEC-type iterators.
347  *
348  * A step functions, @step, must be provided, one for handling mapped kernel
349  * addresses and the other is given user addresses which have the potential to
350  * fault since no pinning is performed.
351  *
352  * The step functions are passed the address and length of the segment, @priv,
353  * @priv2 and the amount of data so far iterated over (which can, for example,
354  * be added to @priv to point to the right part of a second buffer).  The step
355  * functions should return the amount of the segment they didn't process (ie. 0
356  * indicates complete processsing).
357  *
358  * This function returns the amount of data processed (ie. 0 means nothing was
359  * processed and the value of @len means processes to completion).
360  */
361 static __always_inline
362 size_t iterate_and_advance_kernel(struct iov_iter *iter, size_t len, void *priv,
363 				  void *priv2, iov_step_f step)
364 {
365 	if (unlikely(iter->count < len))
366 		len = iter->count;
367 	if (unlikely(!len))
368 		return 0;
369 	if (iov_iter_is_bvec(iter))
370 		return iterate_bvec(iter, len, priv, priv2, step);
371 	if (iov_iter_is_kvec(iter))
372 		return iterate_kvec(iter, len, priv, priv2, step);
373 	if (iov_iter_is_folioq(iter))
374 		return iterate_folioq(iter, len, priv, priv2, step);
375 	if (iov_iter_is_xarray(iter))
376 		return iterate_xarray(iter, len, priv, priv2, step);
377 	return iterate_discard(iter, len, priv, priv2, step);
378 }
379 
380 #endif /* _LINUX_IOV_ITER_H */
381