xref: /linux/fs/netfs/iterator.c (revision 266679ffd867cb247c36717ea4d7998e9304823b)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Iterator helpers.
3  *
4  * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/export.h>
9 #include <linux/slab.h>
10 #include <linux/mm.h>
11 #include <linux/uio.h>
12 #include <linux/scatterlist.h>
13 #include <linux/netfs.h>
14 #include "internal.h"
15 
16 /**
17  * netfs_extract_user_iter - Extract the pages from a user iterator into a bvec
18  * @orig: The original iterator
19  * @orig_len: The amount of iterator to copy
20  * @new: The iterator to be set up
21  * @extraction_flags: Flags to qualify the request
22  *
23  * Extract the page fragments from the given amount of the source iterator and
24  * build up a second iterator that refers to all of those bits.  This allows
25  * the original iterator to disposed of.
26  *
27  * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA be
28  * allowed on the pages extracted.
29  *
30  * On success, the number of elements in the bvec is returned, the original
31  * iterator will have been advanced by the amount extracted.
32  *
33  * The iov_iter_extract_mode() function should be used to query how cleanup
34  * should be performed.
35  */
36 ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
37 				struct iov_iter *new,
38 				iov_iter_extraction_t extraction_flags)
39 {
40 	struct bio_vec *bv = NULL;
41 	struct page **pages;
42 	unsigned int cur_npages;
43 	unsigned int max_pages;
44 	unsigned int npages = 0;
45 	unsigned int i;
46 	ssize_t ret;
47 	size_t count = orig_len, offset, len;
48 	size_t bv_size, pg_size;
49 
50 	if (WARN_ON_ONCE(!iter_is_ubuf(orig) && !iter_is_iovec(orig)))
51 		return -EIO;
52 
53 	max_pages = iov_iter_npages(orig, INT_MAX);
54 	bv_size = array_size(max_pages, sizeof(*bv));
55 	bv = kvmalloc(bv_size, GFP_KERNEL);
56 	if (!bv)
57 		return -ENOMEM;
58 
59 	/* Put the page list at the end of the bvec list storage.  bvec
60 	 * elements are larger than page pointers, so as long as we work
61 	 * 0->last, we should be fine.
62 	 */
63 	pg_size = array_size(max_pages, sizeof(*pages));
64 	pages = (void *)bv + bv_size - pg_size;
65 
66 	while (count && npages < max_pages) {
67 		ret = iov_iter_extract_pages(orig, &pages, count,
68 					     max_pages - npages, extraction_flags,
69 					     &offset);
70 		if (ret < 0) {
71 			pr_err("Couldn't get user pages (rc=%zd)\n", ret);
72 			break;
73 		}
74 
75 		if (ret > count) {
76 			pr_err("get_pages rc=%zd more than %zu\n", ret, count);
77 			break;
78 		}
79 
80 		count -= ret;
81 		ret += offset;
82 		cur_npages = DIV_ROUND_UP(ret, PAGE_SIZE);
83 
84 		if (npages + cur_npages > max_pages) {
85 			pr_err("Out of bvec array capacity (%u vs %u)\n",
86 			       npages + cur_npages, max_pages);
87 			break;
88 		}
89 
90 		for (i = 0; i < cur_npages; i++) {
91 			len = ret > PAGE_SIZE ? PAGE_SIZE : ret;
92 			bvec_set_page(bv + npages + i, *pages++, len - offset, offset);
93 			ret -= len;
94 			offset = 0;
95 		}
96 
97 		npages += cur_npages;
98 	}
99 
100 	iov_iter_bvec(new, orig->data_source, bv, npages, orig_len - count);
101 	return npages;
102 }
103 EXPORT_SYMBOL_GPL(netfs_extract_user_iter);
104 
105 /*
106  * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class
107  * iterators, and add them to the scatterlist.
108  */
109 static ssize_t netfs_extract_user_to_sg(struct iov_iter *iter,
110 					ssize_t maxsize,
111 					struct sg_table *sgtable,
112 					unsigned int sg_max,
113 					iov_iter_extraction_t extraction_flags)
114 {
115 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
116 	struct page **pages;
117 	unsigned int npages;
118 	ssize_t ret = 0, res;
119 	size_t len, off;
120 
121 	/* We decant the page list into the tail of the scatterlist */
122 	pages = (void *)sgtable->sgl + array_size(sg_max, sizeof(struct scatterlist));
123 	pages -= sg_max;
124 
125 	do {
126 		res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max,
127 					     extraction_flags, &off);
128 		if (res < 0)
129 			goto failed;
130 
131 		len = res;
132 		maxsize -= len;
133 		ret += len;
134 		npages = DIV_ROUND_UP(off + len, PAGE_SIZE);
135 		sg_max -= npages;
136 
137 		for (; npages > 0; npages--) {
138 			struct page *page = *pages;
139 			size_t seg = min_t(size_t, PAGE_SIZE - off, len);
140 
141 			*pages++ = NULL;
142 			sg_set_page(sg, page, len, off);
143 			sgtable->nents++;
144 			sg++;
145 			len -= seg;
146 			off = 0;
147 		}
148 	} while (maxsize > 0 && sg_max > 0);
149 
150 	return ret;
151 
152 failed:
153 	while (sgtable->nents > sgtable->orig_nents)
154 		put_page(sg_page(&sgtable->sgl[--sgtable->nents]));
155 	return res;
156 }
157 
158 /*
159  * Extract up to sg_max pages from a BVEC-type iterator and add them to the
160  * scatterlist.  The pages are not pinned.
161  */
162 static ssize_t netfs_extract_bvec_to_sg(struct iov_iter *iter,
163 					ssize_t maxsize,
164 					struct sg_table *sgtable,
165 					unsigned int sg_max,
166 					iov_iter_extraction_t extraction_flags)
167 {
168 	const struct bio_vec *bv = iter->bvec;
169 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
170 	unsigned long start = iter->iov_offset;
171 	unsigned int i;
172 	ssize_t ret = 0;
173 
174 	for (i = 0; i < iter->nr_segs; i++) {
175 		size_t off, len;
176 
177 		len = bv[i].bv_len;
178 		if (start >= len) {
179 			start -= len;
180 			continue;
181 		}
182 
183 		len = min_t(size_t, maxsize, len - start);
184 		off = bv[i].bv_offset + start;
185 
186 		sg_set_page(sg, bv[i].bv_page, len, off);
187 		sgtable->nents++;
188 		sg++;
189 		sg_max--;
190 
191 		ret += len;
192 		maxsize -= len;
193 		if (maxsize <= 0 || sg_max == 0)
194 			break;
195 		start = 0;
196 	}
197 
198 	if (ret > 0)
199 		iov_iter_advance(iter, ret);
200 	return ret;
201 }
202 
203 /*
204  * Extract up to sg_max pages from a KVEC-type iterator and add them to the
205  * scatterlist.  This can deal with vmalloc'd buffers as well as kmalloc'd or
206  * static buffers.  The pages are not pinned.
207  */
208 static ssize_t netfs_extract_kvec_to_sg(struct iov_iter *iter,
209 					ssize_t maxsize,
210 					struct sg_table *sgtable,
211 					unsigned int sg_max,
212 					iov_iter_extraction_t extraction_flags)
213 {
214 	const struct kvec *kv = iter->kvec;
215 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
216 	unsigned long start = iter->iov_offset;
217 	unsigned int i;
218 	ssize_t ret = 0;
219 
220 	for (i = 0; i < iter->nr_segs; i++) {
221 		struct page *page;
222 		unsigned long kaddr;
223 		size_t off, len, seg;
224 
225 		len = kv[i].iov_len;
226 		if (start >= len) {
227 			start -= len;
228 			continue;
229 		}
230 
231 		kaddr = (unsigned long)kv[i].iov_base + start;
232 		off = kaddr & ~PAGE_MASK;
233 		len = min_t(size_t, maxsize, len - start);
234 		kaddr &= PAGE_MASK;
235 
236 		maxsize -= len;
237 		ret += len;
238 		do {
239 			seg = min_t(size_t, len, PAGE_SIZE - off);
240 			if (is_vmalloc_or_module_addr((void *)kaddr))
241 				page = vmalloc_to_page((void *)kaddr);
242 			else
243 				page = virt_to_page(kaddr);
244 
245 			sg_set_page(sg, page, len, off);
246 			sgtable->nents++;
247 			sg++;
248 			sg_max--;
249 
250 			len -= seg;
251 			kaddr += PAGE_SIZE;
252 			off = 0;
253 		} while (len > 0 && sg_max > 0);
254 
255 		if (maxsize <= 0 || sg_max == 0)
256 			break;
257 		start = 0;
258 	}
259 
260 	if (ret > 0)
261 		iov_iter_advance(iter, ret);
262 	return ret;
263 }
264 
265 /*
266  * Extract up to sg_max folios from an XARRAY-type iterator and add them to
267  * the scatterlist.  The pages are not pinned.
268  */
269 static ssize_t netfs_extract_xarray_to_sg(struct iov_iter *iter,
270 					  ssize_t maxsize,
271 					  struct sg_table *sgtable,
272 					  unsigned int sg_max,
273 					  iov_iter_extraction_t extraction_flags)
274 {
275 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
276 	struct xarray *xa = iter->xarray;
277 	struct folio *folio;
278 	loff_t start = iter->xarray_start + iter->iov_offset;
279 	pgoff_t index = start / PAGE_SIZE;
280 	ssize_t ret = 0;
281 	size_t offset, len;
282 	XA_STATE(xas, xa, index);
283 
284 	rcu_read_lock();
285 
286 	xas_for_each(&xas, folio, ULONG_MAX) {
287 		if (xas_retry(&xas, folio))
288 			continue;
289 		if (WARN_ON(xa_is_value(folio)))
290 			break;
291 		if (WARN_ON(folio_test_hugetlb(folio)))
292 			break;
293 
294 		offset = offset_in_folio(folio, start);
295 		len = min_t(size_t, maxsize, folio_size(folio) - offset);
296 
297 		sg_set_page(sg, folio_page(folio, 0), len, offset);
298 		sgtable->nents++;
299 		sg++;
300 		sg_max--;
301 
302 		maxsize -= len;
303 		ret += len;
304 		if (maxsize <= 0 || sg_max == 0)
305 			break;
306 	}
307 
308 	rcu_read_unlock();
309 	if (ret > 0)
310 		iov_iter_advance(iter, ret);
311 	return ret;
312 }
313 
314 /**
315  * netfs_extract_iter_to_sg - Extract pages from an iterator and add ot an sglist
316  * @iter: The iterator to extract from
317  * @maxsize: The amount of iterator to copy
318  * @sgtable: The scatterlist table to fill in
319  * @sg_max: Maximum number of elements in @sgtable that may be filled
320  * @extraction_flags: Flags to qualify the request
321  *
322  * Extract the page fragments from the given amount of the source iterator and
323  * add them to a scatterlist that refers to all of those bits, to a maximum
324  * addition of @sg_max elements.
325  *
326  * The pages referred to by UBUF- and IOVEC-type iterators are extracted and
327  * pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE-
328  * and DISCARD-type are not supported.
329  *
330  * No end mark is placed on the scatterlist; that's left to the caller.
331  *
332  * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
333  * be allowed on the pages extracted.
334  *
335  * If successul, @sgtable->nents is updated to include the number of elements
336  * added and the number of bytes added is returned.  @sgtable->orig_nents is
337  * left unaltered.
338  *
339  * The iov_iter_extract_mode() function should be used to query how cleanup
340  * should be performed.
341  */
342 ssize_t netfs_extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
343 				 struct sg_table *sgtable, unsigned int sg_max,
344 				 iov_iter_extraction_t extraction_flags)
345 {
346 	if (maxsize == 0)
347 		return 0;
348 
349 	switch (iov_iter_type(iter)) {
350 	case ITER_UBUF:
351 	case ITER_IOVEC:
352 		return netfs_extract_user_to_sg(iter, maxsize, sgtable, sg_max,
353 						extraction_flags);
354 	case ITER_BVEC:
355 		return netfs_extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
356 						extraction_flags);
357 	case ITER_KVEC:
358 		return netfs_extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
359 						extraction_flags);
360 	case ITER_XARRAY:
361 		return netfs_extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
362 						  extraction_flags);
363 	default:
364 		pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter));
365 		WARN_ON_ONCE(1);
366 		return -EIO;
367 	}
368 }
369 EXPORT_SYMBOL_GPL(netfs_extract_iter_to_sg);
370