xref: /linux/lib/scatterlist.c (revision 23b0f90ba871f096474e1c27c3d14f455189d2d9)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2007 Jens Axboe <jens.axboe@oracle.com>
4  *
5  * Scatterlist handling helpers.
6  */
7 #include <linux/export.h>
8 #include <linux/slab.h>
9 #include <linux/scatterlist.h>
10 #include <linux/highmem.h>
11 #include <linux/kmemleak.h>
12 #include <linux/bvec.h>
13 #include <linux/uio.h>
14 #include <linux/folio_queue.h>
15 
16 /**
17  * sg_nents - return total count of entries in scatterlist
18  * @sg:		The scatterlist
19  *
20  * Description:
21  * Allows to know how many entries are in sg, taking into account
22  * chaining as well
23  *
24  **/
25 int sg_nents(struct scatterlist *sg)
26 {
27 	int nents;
28 	for (nents = 0; sg; sg = sg_next(sg))
29 		nents++;
30 	return nents;
31 }
32 EXPORT_SYMBOL(sg_nents);
33 
34 /**
35  * sg_nents_for_len - return total count of entries in scatterlist
36  *                    needed to satisfy the supplied length
37  * @sg:		The scatterlist
38  * @len:	The total required length
39  *
40  * Description:
41  * Determines the number of entries in sg that are required to meet
42  * the supplied length, taking into account chaining as well
43  *
44  * Returns:
45  *   the number of sg entries needed, negative error on failure
46  *
47  **/
48 int sg_nents_for_len(struct scatterlist *sg, u64 len)
49 {
50 	int nents;
51 	u64 total;
52 
53 	if (!len)
54 		return 0;
55 
56 	for (nents = 0, total = 0; sg; sg = sg_next(sg)) {
57 		nents++;
58 		total += sg->length;
59 		if (total >= len)
60 			return nents;
61 	}
62 
63 	return -EINVAL;
64 }
65 EXPORT_SYMBOL(sg_nents_for_len);
66 
67 /**
68  * sg_nents_for_dma - return the count of DMA-capable entries in scatterlist
69  * @sgl:	The scatterlist
70  * @sglen:	The current number of entries
71  * @len:	The maximum length of DMA-capable block
72  *
73  * Description:
74  * Determines the number of entries in @sgl which would be permitted in
75  * DMA-capable transfer if list had been split accordingly, taking into
76  * account chaining as well.
77  *
78  * Returns:
79  *   the number of sgl entries needed
80  *
81  **/
82 int sg_nents_for_dma(struct scatterlist *sgl, unsigned int sglen, size_t len)
83 {
84 	struct scatterlist *sg;
85 	int i, nents = 0;
86 
87 	for_each_sg(sgl, sg, sglen, i)
88 		nents += DIV_ROUND_UP(sg_dma_len(sg), len);
89 	return nents;
90 }
91 EXPORT_SYMBOL(sg_nents_for_dma);
92 
93 /**
94  * sg_last - return the last scatterlist entry in a list
95  * @sgl:	First entry in the scatterlist
96  * @nents:	Number of entries in the scatterlist
97  *
98  * Description:
99  *   Should only be used casually, it (currently) scans the entire list
100  *   to get the last entry.
101  *
102  *   Note that the @sgl pointer passed in need not be the first one,
103  *   the important bit is that @nents denotes the number of entries that
104  *   exist from @sgl.
105  *
106  **/
107 struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
108 {
109 	struct scatterlist *sg, *ret = NULL;
110 	unsigned int i;
111 
112 	for_each_sg(sgl, sg, nents, i)
113 		ret = sg;
114 
115 	BUG_ON(!sg_is_last(ret));
116 	return ret;
117 }
118 EXPORT_SYMBOL(sg_last);
119 
120 /**
121  * sg_init_table - Initialize SG table
122  * @sgl:	   The SG table
123  * @nents:	   Number of entries in table
124  *
125  * Notes:
126  *   If this is part of a chained sg table, sg_mark_end() should be
127  *   used only on the last table part.
128  *
129  **/
130 void sg_init_table(struct scatterlist *sgl, unsigned int nents)
131 {
132 	memset(sgl, 0, sizeof(*sgl) * nents);
133 	sg_init_marker(sgl, nents);
134 }
135 EXPORT_SYMBOL(sg_init_table);
136 
137 /**
138  * sg_init_one - Initialize a single entry sg list
139  * @sg:		 SG entry
140  * @buf:	 Virtual address for IO
141  * @buflen:	 IO length
142  *
143  **/
144 void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen)
145 {
146 	sg_init_table(sg, 1);
147 	sg_set_buf(sg, buf, buflen);
148 }
149 EXPORT_SYMBOL(sg_init_one);
150 
151 /*
152  * The default behaviour of sg_alloc_table() is to use these kmalloc/kfree
153  * helpers.
154  */
155 static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
156 {
157 	if (nents == SG_MAX_SINGLE_ALLOC) {
158 		/*
159 		 * Kmemleak doesn't track page allocations as they are not
160 		 * commonly used (in a raw form) for kernel data structures.
161 		 * As we chain together a list of pages and then a normal
162 		 * kmalloc (tracked by kmemleak), in order to for that last
163 		 * allocation not to become decoupled (and thus a
164 		 * false-positive) we need to inform kmemleak of all the
165 		 * intermediate allocations.
166 		 */
167 		void *ptr = (void *) __get_free_page(gfp_mask);
168 		kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
169 		return ptr;
170 	} else
171 		return kmalloc_array(nents, sizeof(struct scatterlist),
172 				     gfp_mask);
173 }
174 
175 static void sg_kfree(struct scatterlist *sg, unsigned int nents)
176 {
177 	if (nents == SG_MAX_SINGLE_ALLOC) {
178 		kmemleak_free(sg);
179 		free_page((unsigned long) sg);
180 	} else
181 		kfree(sg);
182 }
183 
184 /**
185  * __sg_free_table - Free a previously mapped sg table
186  * @table:	The sg table header to use
187  * @max_ents:	The maximum number of entries per single scatterlist
188  * @nents_first_chunk: Number of entries int the (preallocated) first
189  * 	scatterlist chunk, 0 means no such preallocated first chunk
190  * @free_fn:	Free function
191  * @num_ents:	Number of entries in the table
192  *
193  *  Description:
194  *    Free an sg table previously allocated and setup with
195  *    __sg_alloc_table().  The @max_ents value must be identical to
196  *    that previously used with __sg_alloc_table().
197  *
198  **/
199 void __sg_free_table(struct sg_table *table, unsigned int max_ents,
200 		     unsigned int nents_first_chunk, sg_free_fn *free_fn,
201 		     unsigned int num_ents)
202 {
203 	struct scatterlist *sgl, *next;
204 	unsigned curr_max_ents = nents_first_chunk ?: max_ents;
205 
206 	if (unlikely(!table->sgl))
207 		return;
208 
209 	sgl = table->sgl;
210 	while (num_ents) {
211 		unsigned int alloc_size = num_ents;
212 		unsigned int sg_size;
213 
214 		/*
215 		 * If we have more than max_ents segments left,
216 		 * then assign 'next' to the sg table after the current one.
217 		 * sg_size is then one less than alloc size, since the last
218 		 * element is the chain pointer.
219 		 */
220 		if (alloc_size > curr_max_ents) {
221 			next = sg_chain_ptr(&sgl[curr_max_ents - 1]);
222 			alloc_size = curr_max_ents;
223 			sg_size = alloc_size - 1;
224 		} else {
225 			sg_size = alloc_size;
226 			next = NULL;
227 		}
228 
229 		num_ents -= sg_size;
230 		if (nents_first_chunk)
231 			nents_first_chunk = 0;
232 		else
233 			free_fn(sgl, alloc_size);
234 		sgl = next;
235 		curr_max_ents = max_ents;
236 	}
237 
238 	table->sgl = NULL;
239 }
240 EXPORT_SYMBOL(__sg_free_table);
241 
242 /**
243  * sg_free_append_table - Free a previously allocated append sg table.
244  * @table:	 The mapped sg append table header
245  *
246  **/
247 void sg_free_append_table(struct sg_append_table *table)
248 {
249 	__sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, 0, sg_kfree,
250 			table->total_nents);
251 }
252 EXPORT_SYMBOL(sg_free_append_table);
253 
254 
255 /**
256  * sg_free_table - Free a previously allocated sg table
257  * @table:	The mapped sg table header
258  *
259  **/
260 void sg_free_table(struct sg_table *table)
261 {
262 	__sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree,
263 			table->orig_nents);
264 }
265 EXPORT_SYMBOL(sg_free_table);
266 
267 /**
268  * __sg_alloc_table - Allocate and initialize an sg table with given allocator
269  * @table:	The sg table header to use
270  * @nents:	Number of entries in sg list
271  * @max_ents:	The maximum number of entries the allocator returns per call
272  * @first_chunk: first SGL if preallocated (may be %NULL)
273  * @nents_first_chunk: Number of entries in the (preallocated) first
274  * 	scatterlist chunk, 0 means no such preallocated chunk provided by user
275  * @gfp_mask:	GFP allocation mask
276  * @alloc_fn:	Allocator to use
277  *
278  * Description:
279  *   This function returns a @table @nents long. The allocator is
280  *   defined to return scatterlist chunks of maximum size @max_ents.
281  *   Thus if @nents is bigger than @max_ents, the scatterlists will be
282  *   chained in units of @max_ents.
283  *
284  * Notes:
285  *   If this function returns non-0 (eg failure), the caller must call
286  *   __sg_free_table() to cleanup any leftover allocations.
287  *
288  **/
289 int __sg_alloc_table(struct sg_table *table, unsigned int nents,
290 		     unsigned int max_ents, struct scatterlist *first_chunk,
291 		     unsigned int nents_first_chunk, gfp_t gfp_mask,
292 		     sg_alloc_fn *alloc_fn)
293 {
294 	struct scatterlist *sg, *prv;
295 	unsigned int left;
296 	unsigned curr_max_ents = nents_first_chunk ?: max_ents;
297 	unsigned prv_max_ents;
298 
299 	memset(table, 0, sizeof(*table));
300 
301 	if (nents == 0)
302 		return -EINVAL;
303 #ifdef CONFIG_ARCH_NO_SG_CHAIN
304 	if (WARN_ON_ONCE(nents > max_ents))
305 		return -EINVAL;
306 #endif
307 
308 	left = nents;
309 	prv = NULL;
310 	do {
311 		unsigned int sg_size, alloc_size = left;
312 
313 		if (alloc_size > curr_max_ents) {
314 			alloc_size = curr_max_ents;
315 			sg_size = alloc_size - 1;
316 		} else
317 			sg_size = alloc_size;
318 
319 		left -= sg_size;
320 
321 		if (first_chunk) {
322 			sg = first_chunk;
323 			first_chunk = NULL;
324 		} else {
325 			sg = alloc_fn(alloc_size, gfp_mask);
326 		}
327 		if (unlikely(!sg)) {
328 			/*
329 			 * Adjust entry count to reflect that the last
330 			 * entry of the previous table won't be used for
331 			 * linkage.  Without this, sg_kfree() may get
332 			 * confused.
333 			 */
334 			if (prv)
335 				table->nents = ++table->orig_nents;
336 
337 			return -ENOMEM;
338 		}
339 
340 		sg_init_table(sg, alloc_size);
341 		table->nents = table->orig_nents += sg_size;
342 
343 		/*
344 		 * If this is the first mapping, assign the sg table header.
345 		 * If this is not the first mapping, chain previous part.
346 		 */
347 		if (prv)
348 			sg_chain(prv, prv_max_ents, sg);
349 		else
350 			table->sgl = sg;
351 
352 		/*
353 		 * If no more entries after this one, mark the end
354 		 */
355 		if (!left)
356 			sg_mark_end(&sg[sg_size - 1]);
357 
358 		prv = sg;
359 		prv_max_ents = curr_max_ents;
360 		curr_max_ents = max_ents;
361 	} while (left);
362 
363 	return 0;
364 }
365 EXPORT_SYMBOL(__sg_alloc_table);
366 
367 /**
368  * sg_alloc_table - Allocate and initialize an sg table
369  * @table:	The sg table header to use
370  * @nents:	Number of entries in sg list
371  * @gfp_mask:	GFP allocation mask
372  *
373  *  Description:
374  *    Allocate and initialize an sg table. If @nents is larger than
375  *    SG_MAX_SINGLE_ALLOC a chained sg table will be setup.
376  *
377  **/
378 int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
379 {
380 	int ret;
381 
382 	ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
383 			       NULL, 0, gfp_mask, sg_kmalloc);
384 	if (unlikely(ret))
385 		sg_free_table(table);
386 	return ret;
387 }
388 EXPORT_SYMBOL(sg_alloc_table);
389 
390 static struct scatterlist *get_next_sg(struct sg_append_table *table,
391 				       struct scatterlist *cur,
392 				       unsigned long needed_sges,
393 				       gfp_t gfp_mask)
394 {
395 	struct scatterlist *new_sg, *next_sg;
396 	unsigned int alloc_size;
397 
398 	if (cur) {
399 		next_sg = sg_next(cur);
400 		/* Check if last entry should be keeped for chainning */
401 		if (!sg_is_last(next_sg) || needed_sges == 1)
402 			return next_sg;
403 	}
404 
405 	alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC);
406 	new_sg = sg_kmalloc(alloc_size, gfp_mask);
407 	if (!new_sg)
408 		return ERR_PTR(-ENOMEM);
409 	sg_init_table(new_sg, alloc_size);
410 	if (cur) {
411 		table->total_nents += alloc_size - 1;
412 		__sg_chain(next_sg, new_sg);
413 	} else {
414 		table->sgt.sgl = new_sg;
415 		table->total_nents = alloc_size;
416 	}
417 	return new_sg;
418 }
419 
420 static bool pages_are_mergeable(struct page *a, struct page *b)
421 {
422 	if (page_to_pfn(a) != page_to_pfn(b) + 1)
423 		return false;
424 	if (!zone_device_pages_have_same_pgmap(a, b))
425 		return false;
426 	return true;
427 }
428 
429 /**
430  * sg_alloc_append_table_from_pages - Allocate and initialize an append sg
431  *                                    table from an array of pages
432  * @sgt_append:  The sg append table to use
433  * @pages:       Pointer to an array of page pointers
434  * @n_pages:     Number of pages in the pages array
435  * @offset:      Offset from start of the first page to the start of a buffer
436  * @size:        Number of valid bytes in the buffer (after offset)
437  * @max_segment: Maximum size of a scatterlist element in bytes
438  * @left_pages:  Left pages caller have to set after this call
439  * @gfp_mask:	 GFP allocation mask
440  *
441  * Description:
442  *    In the first call it allocate and initialize an sg table from a list of
443  *    pages, else reuse the scatterlist from sgt_append. Contiguous ranges of
444  *    the pages are squashed into a single scatterlist entry up to the maximum
445  *    size specified in @max_segment.  A user may provide an offset at a start
446  *    and a size of valid data in a buffer specified by the page array. The
447  *    returned sg table is released by sg_free_append_table
448  *
449  * Returns:
450  *   0 on success, negative error on failure
451  *
452  * Notes:
453  *   If this function returns non-0 (eg failure), the caller must call
454  *   sg_free_append_table() to cleanup any leftover allocations.
455  *
456  *   In the fist call, sgt_append must by initialized.
457  */
458 int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
459 		struct page **pages, unsigned int n_pages, unsigned int offset,
460 		unsigned long size, unsigned int max_segment,
461 		unsigned int left_pages, gfp_t gfp_mask)
462 {
463 	unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
464 	unsigned int added_nents = 0;
465 	struct scatterlist *s = sgt_append->prv;
466 	struct page *last_pg;
467 
468 	/*
469 	 * The algorithm below requires max_segment to be aligned to PAGE_SIZE
470 	 * otherwise it can overshoot.
471 	 */
472 	max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE);
473 	if (WARN_ON(max_segment < PAGE_SIZE))
474 		return -EINVAL;
475 
476 	if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && sgt_append->prv)
477 		return -EOPNOTSUPP;
478 
479 	if (sgt_append->prv) {
480 		unsigned long next_pfn;
481 
482 		if (WARN_ON(offset))
483 			return -EINVAL;
484 
485 		/* Merge contiguous pages into the last SG */
486 		prv_len = sgt_append->prv->length;
487 		next_pfn = (sg_phys(sgt_append->prv) + prv_len) / PAGE_SIZE;
488 		if (page_to_pfn(pages[0]) == next_pfn) {
489 			last_pg = pfn_to_page(next_pfn - 1);
490 			while (n_pages && pages_are_mergeable(pages[0], last_pg)) {
491 				if (sgt_append->prv->length + PAGE_SIZE > max_segment)
492 					break;
493 				sgt_append->prv->length += PAGE_SIZE;
494 				last_pg = pages[0];
495 				pages++;
496 				n_pages--;
497 			}
498 			if (!n_pages)
499 				goto out;
500 		}
501 	}
502 
503 	/* compute number of contiguous chunks */
504 	chunks = 1;
505 	seg_len = 0;
506 	for (i = 1; i < n_pages; i++) {
507 		seg_len += PAGE_SIZE;
508 		if (seg_len >= max_segment ||
509 		    !pages_are_mergeable(pages[i], pages[i - 1])) {
510 			chunks++;
511 			seg_len = 0;
512 		}
513 	}
514 
515 	/* merging chunks and putting them into the scatterlist */
516 	cur_page = 0;
517 	for (i = 0; i < chunks; i++) {
518 		unsigned int j, chunk_size;
519 
520 		/* look for the end of the current chunk */
521 		seg_len = 0;
522 		for (j = cur_page + 1; j < n_pages; j++) {
523 			seg_len += PAGE_SIZE;
524 			if (seg_len >= max_segment ||
525 			    !pages_are_mergeable(pages[j], pages[j - 1]))
526 				break;
527 		}
528 
529 		/* Pass how many chunks might be left */
530 		s = get_next_sg(sgt_append, s, chunks - i + left_pages,
531 				gfp_mask);
532 		if (IS_ERR(s)) {
533 			/*
534 			 * Adjust entry length to be as before function was
535 			 * called.
536 			 */
537 			if (sgt_append->prv)
538 				sgt_append->prv->length = prv_len;
539 			return PTR_ERR(s);
540 		}
541 		chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
542 		sg_set_page(s, pages[cur_page],
543 			    min_t(unsigned long, size, chunk_size), offset);
544 		added_nents++;
545 		size -= chunk_size;
546 		offset = 0;
547 		cur_page = j;
548 	}
549 	sgt_append->sgt.nents += added_nents;
550 	sgt_append->sgt.orig_nents = sgt_append->sgt.nents;
551 	sgt_append->prv = s;
552 out:
553 	if (!left_pages)
554 		sg_mark_end(s);
555 	return 0;
556 }
557 EXPORT_SYMBOL(sg_alloc_append_table_from_pages);
558 
559 /**
560  * sg_alloc_table_from_pages_segment - Allocate and initialize an sg table from
561  *                                     an array of pages and given maximum
562  *                                     segment.
563  * @sgt:	 The sg table header to use
564  * @pages:	 Pointer to an array of page pointers
565  * @n_pages:	 Number of pages in the pages array
566  * @offset:      Offset from start of the first page to the start of a buffer
567  * @size:        Number of valid bytes in the buffer (after offset)
568  * @max_segment: Maximum size of a scatterlist element in bytes
569  * @gfp_mask:	 GFP allocation mask
570  *
571  *  Description:
572  *    Allocate and initialize an sg table from a list of pages. Contiguous
573  *    ranges of the pages are squashed into a single scatterlist node up to the
574  *    maximum size specified in @max_segment. A user may provide an offset at a
575  *    start and a size of valid data in a buffer specified by the page array.
576  *
577  *    The returned sg table is released by sg_free_table.
578  *
579  *  Returns:
580  *   0 on success, negative error on failure
581  */
582 int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages,
583 				unsigned int n_pages, unsigned int offset,
584 				unsigned long size, unsigned int max_segment,
585 				gfp_t gfp_mask)
586 {
587 	struct sg_append_table append = {};
588 	int err;
589 
590 	err = sg_alloc_append_table_from_pages(&append, pages, n_pages, offset,
591 					       size, max_segment, 0, gfp_mask);
592 	if (err) {
593 		sg_free_append_table(&append);
594 		return err;
595 	}
596 	memcpy(sgt, &append.sgt, sizeof(*sgt));
597 	WARN_ON(append.total_nents != sgt->orig_nents);
598 	return 0;
599 }
600 EXPORT_SYMBOL(sg_alloc_table_from_pages_segment);
601 
602 #ifdef CONFIG_SGL_ALLOC
603 
604 /**
605  * sgl_alloc_order - allocate a scatterlist and its pages
606  * @length: Length in bytes of the scatterlist. Must be at least one
607  * @order: Second argument for alloc_pages()
608  * @chainable: Whether or not to allocate an extra element in the scatterlist
609  *	for scatterlist chaining purposes
610  * @gfp: Memory allocation flags
611  * @nent_p: [out] Number of entries in the scatterlist that have pages
612  *
613  * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
614  */
615 struct scatterlist *sgl_alloc_order(unsigned long long length,
616 				    unsigned int order, bool chainable,
617 				    gfp_t gfp, unsigned int *nent_p)
618 {
619 	struct scatterlist *sgl, *sg;
620 	struct page *page;
621 	unsigned int nent, nalloc;
622 	u32 elem_len;
623 
624 	nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order);
625 	/* Check for integer overflow */
626 	if (length > (nent << (PAGE_SHIFT + order)))
627 		return NULL;
628 	nalloc = nent;
629 	if (chainable) {
630 		/* Check for integer overflow */
631 		if (nalloc + 1 < nalloc)
632 			return NULL;
633 		nalloc++;
634 	}
635 	sgl = kmalloc_array(nalloc, sizeof(struct scatterlist),
636 			    gfp & ~GFP_DMA);
637 	if (!sgl)
638 		return NULL;
639 
640 	sg_init_table(sgl, nalloc);
641 	sg = sgl;
642 	while (length) {
643 		elem_len = min_t(u64, length, PAGE_SIZE << order);
644 		page = alloc_pages(gfp, order);
645 		if (!page) {
646 			sgl_free_order(sgl, order);
647 			return NULL;
648 		}
649 
650 		sg_set_page(sg, page, elem_len, 0);
651 		length -= elem_len;
652 		sg = sg_next(sg);
653 	}
654 	WARN_ONCE(length, "length = %lld\n", length);
655 	if (nent_p)
656 		*nent_p = nent;
657 	return sgl;
658 }
659 EXPORT_SYMBOL(sgl_alloc_order);
660 
661 /**
662  * sgl_alloc - allocate a scatterlist and its pages
663  * @length: Length in bytes of the scatterlist
664  * @gfp: Memory allocation flags
665  * @nent_p: [out] Number of entries in the scatterlist
666  *
667  * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
668  */
669 struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp,
670 			      unsigned int *nent_p)
671 {
672 	return sgl_alloc_order(length, 0, false, gfp, nent_p);
673 }
674 EXPORT_SYMBOL(sgl_alloc);
675 
676 /**
677  * sgl_free_n_order - free a scatterlist and its pages
678  * @sgl: Scatterlist with one or more elements
679  * @nents: Maximum number of elements to free
680  * @order: Second argument for __free_pages()
681  *
682  * Notes:
683  * - If several scatterlists have been chained and each chain element is
684  *   freed separately then it's essential to set nents correctly to avoid that a
685  *   page would get freed twice.
686  * - All pages in a chained scatterlist can be freed at once by setting @nents
687  *   to a high number.
688  */
689 void sgl_free_n_order(struct scatterlist *sgl, int nents, int order)
690 {
691 	struct scatterlist *sg;
692 	struct page *page;
693 	int i;
694 
695 	for_each_sg(sgl, sg, nents, i) {
696 		if (!sg)
697 			break;
698 		page = sg_page(sg);
699 		if (page)
700 			__free_pages(page, order);
701 	}
702 	kfree(sgl);
703 }
704 EXPORT_SYMBOL(sgl_free_n_order);
705 
706 /**
707  * sgl_free_order - free a scatterlist and its pages
708  * @sgl: Scatterlist with one or more elements
709  * @order: Second argument for __free_pages()
710  */
711 void sgl_free_order(struct scatterlist *sgl, int order)
712 {
713 	sgl_free_n_order(sgl, INT_MAX, order);
714 }
715 EXPORT_SYMBOL(sgl_free_order);
716 
717 /**
718  * sgl_free - free a scatterlist and its pages
719  * @sgl: Scatterlist with one or more elements
720  */
721 void sgl_free(struct scatterlist *sgl)
722 {
723 	sgl_free_order(sgl, 0);
724 }
725 EXPORT_SYMBOL(sgl_free);
726 
727 #endif /* CONFIG_SGL_ALLOC */
728 
729 void __sg_page_iter_start(struct sg_page_iter *piter,
730 			  struct scatterlist *sglist, unsigned int nents,
731 			  unsigned long pgoffset)
732 {
733 	piter->__pg_advance = 0;
734 	piter->__nents = nents;
735 
736 	piter->sg = sglist;
737 	piter->sg_pgoffset = pgoffset;
738 }
739 EXPORT_SYMBOL(__sg_page_iter_start);
740 
741 static int sg_page_count(struct scatterlist *sg)
742 {
743 	return PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT;
744 }
745 
746 bool __sg_page_iter_next(struct sg_page_iter *piter)
747 {
748 	if (!piter->__nents || !piter->sg)
749 		return false;
750 
751 	piter->sg_pgoffset += piter->__pg_advance;
752 	piter->__pg_advance = 1;
753 
754 	while (piter->sg_pgoffset >= sg_page_count(piter->sg)) {
755 		piter->sg_pgoffset -= sg_page_count(piter->sg);
756 		piter->sg = sg_next(piter->sg);
757 		if (!--piter->__nents || !piter->sg)
758 			return false;
759 	}
760 
761 	return true;
762 }
763 EXPORT_SYMBOL(__sg_page_iter_next);
764 
765 static int sg_dma_page_count(struct scatterlist *sg)
766 {
767 	return PAGE_ALIGN(sg->offset + sg_dma_len(sg)) >> PAGE_SHIFT;
768 }
769 
770 bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter)
771 {
772 	struct sg_page_iter *piter = &dma_iter->base;
773 
774 	if (!piter->__nents || !piter->sg)
775 		return false;
776 
777 	piter->sg_pgoffset += piter->__pg_advance;
778 	piter->__pg_advance = 1;
779 
780 	while (piter->sg_pgoffset >= sg_dma_page_count(piter->sg)) {
781 		piter->sg_pgoffset -= sg_dma_page_count(piter->sg);
782 		piter->sg = sg_next(piter->sg);
783 		if (!--piter->__nents || !piter->sg)
784 			return false;
785 	}
786 
787 	return true;
788 }
789 EXPORT_SYMBOL(__sg_page_iter_dma_next);
790 
791 /**
792  * sg_miter_start - start mapping iteration over a sg list
793  * @miter: sg mapping iter to be started
794  * @sgl: sg list to iterate over
795  * @nents: number of sg entries
796  * @flags: sg iterator flags
797  *
798  * Description:
799  *   Starts mapping iterator @miter.
800  *
801  * Context:
802  *   Don't care.
803  */
804 void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl,
805 		    unsigned int nents, unsigned int flags)
806 {
807 	memset(miter, 0, sizeof(struct sg_mapping_iter));
808 
809 	__sg_page_iter_start(&miter->piter, sgl, nents, 0);
810 	WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG)));
811 	miter->__flags = flags;
812 }
813 EXPORT_SYMBOL(sg_miter_start);
814 
815 static bool sg_miter_get_next_page(struct sg_mapping_iter *miter)
816 {
817 	if (!miter->__remaining) {
818 		struct scatterlist *sg;
819 
820 		if (!__sg_page_iter_next(&miter->piter))
821 			return false;
822 
823 		sg = miter->piter.sg;
824 
825 		miter->__offset = miter->piter.sg_pgoffset ? 0 : sg->offset;
826 		miter->piter.sg_pgoffset += miter->__offset >> PAGE_SHIFT;
827 		miter->__offset &= PAGE_SIZE - 1;
828 		miter->__remaining = sg->offset + sg->length -
829 				     (miter->piter.sg_pgoffset << PAGE_SHIFT) -
830 				     miter->__offset;
831 		miter->__remaining = min_t(unsigned long, miter->__remaining,
832 					   PAGE_SIZE - miter->__offset);
833 	}
834 
835 	return true;
836 }
837 
838 /**
839  * sg_miter_skip - reposition mapping iterator
840  * @miter: sg mapping iter to be skipped
841  * @offset: number of bytes to plus the current location
842  *
843  * Description:
844  *   Sets the offset of @miter to its current location plus @offset bytes.
845  *   If mapping iterator @miter has been proceeded by sg_miter_next(), this
846  *   stops @miter.
847  *
848  * Context:
849  *   Don't care.
850  *
851  * Returns:
852  *   true if @miter contains the valid mapping.  false if end of sg
853  *   list is reached.
854  */
855 bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset)
856 {
857 	sg_miter_stop(miter);
858 
859 	while (offset) {
860 		off_t consumed;
861 
862 		if (!sg_miter_get_next_page(miter))
863 			return false;
864 
865 		consumed = min_t(off_t, offset, miter->__remaining);
866 		miter->__offset += consumed;
867 		miter->__remaining -= consumed;
868 		offset -= consumed;
869 	}
870 
871 	return true;
872 }
873 EXPORT_SYMBOL(sg_miter_skip);
874 
875 /**
876  * sg_miter_next - proceed mapping iterator to the next mapping
877  * @miter: sg mapping iter to proceed
878  *
879  * Description:
880  *   Proceeds @miter to the next mapping.  @miter should have been started
881  *   using sg_miter_start().  On successful return, @miter->page,
882  *   @miter->addr and @miter->length point to the current mapping.
883  *
884  * Context:
885  *   May sleep if !SG_MITER_ATOMIC && !SG_MITER_LOCAL.
886  *
887  * Returns:
888  *   true if @miter contains the next mapping.  false if end of sg
889  *   list is reached.
890  */
891 bool sg_miter_next(struct sg_mapping_iter *miter)
892 {
893 	sg_miter_stop(miter);
894 
895 	/*
896 	 * Get to the next page if necessary.
897 	 * __remaining, __offset is adjusted by sg_miter_stop
898 	 */
899 	if (!sg_miter_get_next_page(miter))
900 		return false;
901 
902 	miter->page = sg_page_iter_page(&miter->piter);
903 	miter->consumed = miter->length = miter->__remaining;
904 
905 	if (miter->__flags & SG_MITER_ATOMIC)
906 		miter->addr = kmap_atomic(miter->page) + miter->__offset;
907 	else if (miter->__flags & SG_MITER_LOCAL)
908 		miter->addr = kmap_local_page(miter->page) + miter->__offset;
909 	else
910 		miter->addr = kmap(miter->page) + miter->__offset;
911 
912 	return true;
913 }
914 EXPORT_SYMBOL(sg_miter_next);
915 
916 /**
917  * sg_miter_stop - stop mapping iteration
918  * @miter: sg mapping iter to be stopped
919  *
920  * Description:
921  *   Stops mapping iterator @miter.  @miter should have been started
922  *   using sg_miter_start().  A stopped iteration can be resumed by
923  *   calling sg_miter_next() on it.  This is useful when resources (kmap)
924  *   need to be released during iteration.
925  *
926  * Context:
927  *   Don't care otherwise.
928  */
929 void sg_miter_stop(struct sg_mapping_iter *miter)
930 {
931 	WARN_ON(miter->consumed > miter->length);
932 
933 	/* drop resources from the last iteration */
934 	if (miter->addr) {
935 		miter->__offset += miter->consumed;
936 		miter->__remaining -= miter->consumed;
937 
938 		if (miter->__flags & SG_MITER_TO_SG)
939 			flush_dcache_page(miter->page);
940 
941 		if (miter->__flags & SG_MITER_ATOMIC) {
942 			WARN_ON_ONCE(!pagefault_disabled());
943 			kunmap_atomic(miter->addr);
944 		} else if (miter->__flags & SG_MITER_LOCAL)
945 			kunmap_local(miter->addr);
946 		else
947 			kunmap(miter->page);
948 
949 		miter->page = NULL;
950 		miter->addr = NULL;
951 		miter->length = 0;
952 		miter->consumed = 0;
953 	}
954 }
955 EXPORT_SYMBOL(sg_miter_stop);
956 
957 /**
958  * sg_copy_buffer - Copy data between a linear buffer and an SG list
959  * @sgl:		 The SG list
960  * @nents:		 Number of SG entries
961  * @buf:		 Where to copy from
962  * @buflen:		 The number of bytes to copy
963  * @skip:		 Number of bytes to skip before copying
964  * @to_buffer:		 transfer direction (true == from an sg list to a
965  *			 buffer, false == from a buffer to an sg list)
966  *
967  * Returns the number of copied bytes.
968  *
969  **/
970 size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
971 		      size_t buflen, off_t skip, bool to_buffer)
972 {
973 	unsigned int offset = 0;
974 	struct sg_mapping_iter miter;
975 	unsigned int sg_flags = SG_MITER_LOCAL;
976 
977 	if (to_buffer)
978 		sg_flags |= SG_MITER_FROM_SG;
979 	else
980 		sg_flags |= SG_MITER_TO_SG;
981 
982 	sg_miter_start(&miter, sgl, nents, sg_flags);
983 
984 	if (!sg_miter_skip(&miter, skip))
985 		return 0;
986 
987 	while ((offset < buflen) && sg_miter_next(&miter)) {
988 		unsigned int len;
989 
990 		len = min(miter.length, buflen - offset);
991 
992 		if (to_buffer)
993 			memcpy(buf + offset, miter.addr, len);
994 		else
995 			memcpy(miter.addr, buf + offset, len);
996 
997 		offset += len;
998 	}
999 
1000 	sg_miter_stop(&miter);
1001 
1002 	return offset;
1003 }
1004 EXPORT_SYMBOL(sg_copy_buffer);
1005 
1006 /**
1007  * sg_copy_from_buffer - Copy from a linear buffer to an SG list
1008  * @sgl:		 The SG list
1009  * @nents:		 Number of SG entries
1010  * @buf:		 Where to copy from
1011  * @buflen:		 The number of bytes to copy
1012  *
1013  * Returns the number of copied bytes.
1014  *
1015  **/
1016 size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents,
1017 			   const void *buf, size_t buflen)
1018 {
1019 	return sg_copy_buffer(sgl, nents, (void *)buf, buflen, 0, false);
1020 }
1021 EXPORT_SYMBOL(sg_copy_from_buffer);
1022 
1023 /**
1024  * sg_copy_to_buffer - Copy from an SG list to a linear buffer
1025  * @sgl:		 The SG list
1026  * @nents:		 Number of SG entries
1027  * @buf:		 Where to copy to
1028  * @buflen:		 The number of bytes to copy
1029  *
1030  * Returns the number of copied bytes.
1031  *
1032  **/
1033 size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents,
1034 			 void *buf, size_t buflen)
1035 {
1036 	return sg_copy_buffer(sgl, nents, buf, buflen, 0, true);
1037 }
1038 EXPORT_SYMBOL(sg_copy_to_buffer);
1039 
1040 /**
1041  * sg_pcopy_from_buffer - Copy from a linear buffer to an SG list
1042  * @sgl:		 The SG list
1043  * @nents:		 Number of SG entries
1044  * @buf:		 Where to copy from
1045  * @buflen:		 The number of bytes to copy
1046  * @skip:		 Number of bytes to skip before copying
1047  *
1048  * Returns the number of copied bytes.
1049  *
1050  **/
1051 size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents,
1052 			    const void *buf, size_t buflen, off_t skip)
1053 {
1054 	return sg_copy_buffer(sgl, nents, (void *)buf, buflen, skip, false);
1055 }
1056 EXPORT_SYMBOL(sg_pcopy_from_buffer);
1057 
1058 /**
1059  * sg_pcopy_to_buffer - Copy from an SG list to a linear buffer
1060  * @sgl:		 The SG list
1061  * @nents:		 Number of SG entries
1062  * @buf:		 Where to copy to
1063  * @buflen:		 The number of bytes to copy
1064  * @skip:		 Number of bytes to skip before copying
1065  *
1066  * Returns the number of copied bytes.
1067  *
1068  **/
1069 size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
1070 			  void *buf, size_t buflen, off_t skip)
1071 {
1072 	return sg_copy_buffer(sgl, nents, buf, buflen, skip, true);
1073 }
1074 EXPORT_SYMBOL(sg_pcopy_to_buffer);
1075 
1076 /**
1077  * sg_zero_buffer - Zero-out a part of a SG list
1078  * @sgl:		 The SG list
1079  * @nents:		 Number of SG entries
1080  * @buflen:		 The number of bytes to zero out
1081  * @skip:		 Number of bytes to skip before zeroing
1082  *
1083  * Returns the number of bytes zeroed.
1084  **/
1085 size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
1086 		       size_t buflen, off_t skip)
1087 {
1088 	unsigned int offset = 0;
1089 	struct sg_mapping_iter miter;
1090 	unsigned int sg_flags = SG_MITER_LOCAL | SG_MITER_TO_SG;
1091 
1092 	sg_miter_start(&miter, sgl, nents, sg_flags);
1093 
1094 	if (!sg_miter_skip(&miter, skip))
1095 		return false;
1096 
1097 	while (offset < buflen && sg_miter_next(&miter)) {
1098 		unsigned int len;
1099 
1100 		len = min(miter.length, buflen - offset);
1101 		memset(miter.addr, 0, len);
1102 
1103 		offset += len;
1104 	}
1105 
1106 	sg_miter_stop(&miter);
1107 	return offset;
1108 }
1109 EXPORT_SYMBOL(sg_zero_buffer);
1110 
1111 /*
1112  * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class
1113  * iterators, and add them to the scatterlist.
1114  */
1115 static ssize_t extract_user_to_sg(struct iov_iter *iter,
1116 				  ssize_t maxsize,
1117 				  struct sg_table *sgtable,
1118 				  unsigned int sg_max,
1119 				  iov_iter_extraction_t extraction_flags)
1120 {
1121 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1122 	struct page **pages;
1123 	unsigned int npages;
1124 	ssize_t ret = 0, res;
1125 	size_t len, off;
1126 
1127 	/* We decant the page list into the tail of the scatterlist */
1128 	pages = (void *)sgtable->sgl +
1129 		array_size(sg_max, sizeof(struct scatterlist));
1130 	pages -= sg_max;
1131 
1132 	do {
1133 		res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max,
1134 					     extraction_flags, &off);
1135 		if (res <= 0)
1136 			goto failed;
1137 
1138 		len = res;
1139 		maxsize -= len;
1140 		ret += len;
1141 		npages = DIV_ROUND_UP(off + len, PAGE_SIZE);
1142 		sg_max -= npages;
1143 
1144 		for (; npages > 0; npages--) {
1145 			struct page *page = *pages;
1146 			size_t seg = min_t(size_t, PAGE_SIZE - off, len);
1147 
1148 			*pages++ = NULL;
1149 			sg_set_page(sg, page, seg, off);
1150 			sgtable->nents++;
1151 			sg++;
1152 			len -= seg;
1153 			off = 0;
1154 		}
1155 	} while (maxsize > 0 && sg_max > 0);
1156 
1157 	return ret;
1158 
1159 failed:
1160 	while (sgtable->nents > sgtable->orig_nents)
1161 		unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents]));
1162 	return res;
1163 }
1164 
1165 /*
1166  * Extract up to sg_max pages from a BVEC-type iterator and add them to the
1167  * scatterlist.  The pages are not pinned.
1168  */
1169 static ssize_t extract_bvec_to_sg(struct iov_iter *iter,
1170 				  ssize_t maxsize,
1171 				  struct sg_table *sgtable,
1172 				  unsigned int sg_max,
1173 				  iov_iter_extraction_t extraction_flags)
1174 {
1175 	const struct bio_vec *bv = iter->bvec;
1176 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1177 	unsigned long start = iter->iov_offset;
1178 	unsigned int i;
1179 	ssize_t ret = 0;
1180 
1181 	for (i = 0; i < iter->nr_segs; i++) {
1182 		size_t off, len;
1183 
1184 		len = bv[i].bv_len;
1185 		if (start >= len) {
1186 			start -= len;
1187 			continue;
1188 		}
1189 
1190 		len = min_t(size_t, maxsize, len - start);
1191 		off = bv[i].bv_offset + start;
1192 
1193 		sg_set_page(sg, bv[i].bv_page, len, off);
1194 		sgtable->nents++;
1195 		sg++;
1196 		sg_max--;
1197 
1198 		ret += len;
1199 		maxsize -= len;
1200 		if (maxsize <= 0 || sg_max == 0)
1201 			break;
1202 		start = 0;
1203 	}
1204 
1205 	if (ret > 0)
1206 		iov_iter_advance(iter, ret);
1207 	return ret;
1208 }
1209 
1210 /*
1211  * Extract up to sg_max pages from a KVEC-type iterator and add them to the
1212  * scatterlist.  This can deal with vmalloc'd buffers as well as kmalloc'd or
1213  * static buffers.  The pages are not pinned.
1214  */
1215 static ssize_t extract_kvec_to_sg(struct iov_iter *iter,
1216 				  ssize_t maxsize,
1217 				  struct sg_table *sgtable,
1218 				  unsigned int sg_max,
1219 				  iov_iter_extraction_t extraction_flags)
1220 {
1221 	const struct kvec *kv = iter->kvec;
1222 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1223 	unsigned long start = iter->iov_offset;
1224 	unsigned int i;
1225 	ssize_t ret = 0;
1226 
1227 	for (i = 0; i < iter->nr_segs; i++) {
1228 		struct page *page;
1229 		unsigned long kaddr;
1230 		size_t off, len, seg;
1231 
1232 		len = kv[i].iov_len;
1233 		if (start >= len) {
1234 			start -= len;
1235 			continue;
1236 		}
1237 
1238 		kaddr = (unsigned long)kv[i].iov_base + start;
1239 		off = kaddr & ~PAGE_MASK;
1240 		len = min_t(size_t, maxsize, len - start);
1241 		kaddr &= PAGE_MASK;
1242 
1243 		maxsize -= len;
1244 		ret += len;
1245 		do {
1246 			seg = min_t(size_t, len, PAGE_SIZE - off);
1247 			if (is_vmalloc_or_module_addr((void *)kaddr))
1248 				page = vmalloc_to_page((void *)kaddr);
1249 			else
1250 				page = virt_to_page((void *)kaddr);
1251 
1252 			sg_set_page(sg, page, len, off);
1253 			sgtable->nents++;
1254 			sg++;
1255 			sg_max--;
1256 
1257 			len -= seg;
1258 			kaddr += PAGE_SIZE;
1259 			off = 0;
1260 		} while (len > 0 && sg_max > 0);
1261 
1262 		if (maxsize <= 0 || sg_max == 0)
1263 			break;
1264 		start = 0;
1265 	}
1266 
1267 	if (ret > 0)
1268 		iov_iter_advance(iter, ret);
1269 	return ret;
1270 }
1271 
1272 /*
1273  * Extract up to sg_max folios from an FOLIOQ-type iterator and add them to
1274  * the scatterlist.  The pages are not pinned.
1275  */
1276 static ssize_t extract_folioq_to_sg(struct iov_iter *iter,
1277 				   ssize_t maxsize,
1278 				   struct sg_table *sgtable,
1279 				   unsigned int sg_max,
1280 				   iov_iter_extraction_t extraction_flags)
1281 {
1282 	const struct folio_queue *folioq = iter->folioq;
1283 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1284 	unsigned int slot = iter->folioq_slot;
1285 	ssize_t ret = 0;
1286 	size_t offset = iter->iov_offset;
1287 
1288 	BUG_ON(!folioq);
1289 
1290 	if (slot >= folioq_nr_slots(folioq)) {
1291 		folioq = folioq->next;
1292 		if (WARN_ON_ONCE(!folioq))
1293 			return 0;
1294 		slot = 0;
1295 	}
1296 
1297 	do {
1298 		struct folio *folio = folioq_folio(folioq, slot);
1299 		size_t fsize = folioq_folio_size(folioq, slot);
1300 
1301 		if (offset < fsize) {
1302 			size_t part = umin(maxsize - ret, fsize - offset);
1303 
1304 			sg_set_page(sg, folio_page(folio, 0), part, offset);
1305 			sgtable->nents++;
1306 			sg++;
1307 			sg_max--;
1308 			offset += part;
1309 			ret += part;
1310 		}
1311 
1312 		if (offset >= fsize) {
1313 			offset = 0;
1314 			slot++;
1315 			if (slot >= folioq_nr_slots(folioq)) {
1316 				if (!folioq->next) {
1317 					WARN_ON_ONCE(ret < iter->count);
1318 					break;
1319 				}
1320 				folioq = folioq->next;
1321 				slot = 0;
1322 			}
1323 		}
1324 	} while (sg_max > 0 && ret < maxsize);
1325 
1326 	iter->folioq = folioq;
1327 	iter->folioq_slot = slot;
1328 	iter->iov_offset = offset;
1329 	iter->count -= ret;
1330 	return ret;
1331 }
1332 
1333 /*
1334  * Extract up to sg_max folios from an XARRAY-type iterator and add them to
1335  * the scatterlist.  The pages are not pinned.
1336  */
1337 static ssize_t extract_xarray_to_sg(struct iov_iter *iter,
1338 				    ssize_t maxsize,
1339 				    struct sg_table *sgtable,
1340 				    unsigned int sg_max,
1341 				    iov_iter_extraction_t extraction_flags)
1342 {
1343 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1344 	struct xarray *xa = iter->xarray;
1345 	struct folio *folio;
1346 	loff_t start = iter->xarray_start + iter->iov_offset;
1347 	pgoff_t index = start / PAGE_SIZE;
1348 	ssize_t ret = 0;
1349 	size_t offset, len;
1350 	XA_STATE(xas, xa, index);
1351 
1352 	rcu_read_lock();
1353 
1354 	xas_for_each(&xas, folio, ULONG_MAX) {
1355 		if (xas_retry(&xas, folio))
1356 			continue;
1357 		if (WARN_ON(xa_is_value(folio)))
1358 			break;
1359 		if (WARN_ON(folio_test_hugetlb(folio)))
1360 			break;
1361 
1362 		offset = offset_in_folio(folio, start);
1363 		len = min_t(size_t, maxsize, folio_size(folio) - offset);
1364 
1365 		sg_set_page(sg, folio_page(folio, 0), len, offset);
1366 		sgtable->nents++;
1367 		sg++;
1368 		sg_max--;
1369 
1370 		maxsize -= len;
1371 		ret += len;
1372 		if (maxsize <= 0 || sg_max == 0)
1373 			break;
1374 	}
1375 
1376 	rcu_read_unlock();
1377 	if (ret > 0)
1378 		iov_iter_advance(iter, ret);
1379 	return ret;
1380 }
1381 
1382 /**
1383  * extract_iter_to_sg - Extract pages from an iterator and add to an sglist
1384  * @iter: The iterator to extract from
1385  * @maxsize: The amount of iterator to copy
1386  * @sgtable: The scatterlist table to fill in
1387  * @sg_max: Maximum number of elements in @sgtable that may be filled
1388  * @extraction_flags: Flags to qualify the request
1389  *
1390  * Extract the page fragments from the given amount of the source iterator and
1391  * add them to a scatterlist that refers to all of those bits, to a maximum
1392  * addition of @sg_max elements.
1393  *
1394  * The pages referred to by UBUF- and IOVEC-type iterators are extracted and
1395  * pinned; BVEC-, KVEC-, FOLIOQ- and XARRAY-type are extracted but aren't
1396  * pinned; DISCARD-type is not supported.
1397  *
1398  * No end mark is placed on the scatterlist; that's left to the caller.
1399  *
1400  * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
1401  * be allowed on the pages extracted.
1402  *
1403  * If successful, @sgtable->nents is updated to include the number of elements
1404  * added and the number of bytes added is returned.  @sgtable->orig_nents is
1405  * left unaltered.
1406  *
1407  * The iov_iter_extract_mode() function should be used to query how cleanup
1408  * should be performed.
1409  */
1410 ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
1411 			   struct sg_table *sgtable, unsigned int sg_max,
1412 			   iov_iter_extraction_t extraction_flags)
1413 {
1414 	if (maxsize == 0)
1415 		return 0;
1416 
1417 	switch (iov_iter_type(iter)) {
1418 	case ITER_UBUF:
1419 	case ITER_IOVEC:
1420 		return extract_user_to_sg(iter, maxsize, sgtable, sg_max,
1421 					  extraction_flags);
1422 	case ITER_BVEC:
1423 		return extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
1424 					  extraction_flags);
1425 	case ITER_KVEC:
1426 		return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
1427 					  extraction_flags);
1428 	case ITER_FOLIOQ:
1429 		return extract_folioq_to_sg(iter, maxsize, sgtable, sg_max,
1430 					    extraction_flags);
1431 	case ITER_XARRAY:
1432 		return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
1433 					    extraction_flags);
1434 	default:
1435 		pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter));
1436 		WARN_ON_ONCE(1);
1437 		return -EIO;
1438 	}
1439 }
1440 EXPORT_SYMBOL_GPL(extract_iter_to_sg);
1441