xref: /linux/lib/scatterlist.c (revision 27b9989b87119da2f33f2c0fcbb8984ab4ebdf1a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2007 Jens Axboe <jens.axboe@oracle.com>
4  *
5  * Scatterlist handling helpers.
6  */
7 #include <linux/export.h>
8 #include <linux/slab.h>
9 #include <linux/scatterlist.h>
10 #include <linux/highmem.h>
11 #include <linux/kmemleak.h>
12 #include <linux/bvec.h>
13 #include <linux/uio.h>
14 #include <linux/folio_queue.h>
15 
16 /**
17  * sg_nents - return total count of entries in scatterlist
18  * @sg:		The scatterlist
19  *
20  * Description:
21  * Allows to know how many entries are in sg, taking into account
22  * chaining as well
23  *
24  **/
sg_nents(struct scatterlist * sg)25 int sg_nents(struct scatterlist *sg)
26 {
27 	int nents;
28 	for (nents = 0; sg; sg = sg_next(sg))
29 		nents++;
30 	return nents;
31 }
32 EXPORT_SYMBOL(sg_nents);
33 
34 /**
35  * sg_nents_for_len - return total count of entries in scatterlist
36  *                    needed to satisfy the supplied length
37  * @sg:		The scatterlist
38  * @len:	The total required length
39  *
40  * Description:
41  * Determines the number of entries in sg that are required to meet
42  * the supplied length, taking into account chaining as well
43  *
44  * Returns:
45  *   the number of sg entries needed, negative error on failure
46  *
47  **/
sg_nents_for_len(struct scatterlist * sg,u64 len)48 int sg_nents_for_len(struct scatterlist *sg, u64 len)
49 {
50 	int nents;
51 	u64 total;
52 
53 	if (!len)
54 		return 0;
55 
56 	for (nents = 0, total = 0; sg; sg = sg_next(sg)) {
57 		nents++;
58 		total += sg->length;
59 		if (total >= len)
60 			return nents;
61 	}
62 
63 	return -EINVAL;
64 }
65 EXPORT_SYMBOL(sg_nents_for_len);
66 
67 /**
68  * sg_last - return the last scatterlist entry in a list
69  * @sgl:	First entry in the scatterlist
70  * @nents:	Number of entries in the scatterlist
71  *
72  * Description:
73  *   Should only be used casually, it (currently) scans the entire list
74  *   to get the last entry.
75  *
76  *   Note that the @sgl pointer passed in need not be the first one,
77  *   the important bit is that @nents denotes the number of entries that
78  *   exist from @sgl.
79  *
80  **/
sg_last(struct scatterlist * sgl,unsigned int nents)81 struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
82 {
83 	struct scatterlist *sg, *ret = NULL;
84 	unsigned int i;
85 
86 	for_each_sg(sgl, sg, nents, i)
87 		ret = sg;
88 
89 	BUG_ON(!sg_is_last(ret));
90 	return ret;
91 }
92 EXPORT_SYMBOL(sg_last);
93 
94 /**
95  * sg_init_table - Initialize SG table
96  * @sgl:	   The SG table
97  * @nents:	   Number of entries in table
98  *
99  * Notes:
100  *   If this is part of a chained sg table, sg_mark_end() should be
101  *   used only on the last table part.
102  *
103  **/
sg_init_table(struct scatterlist * sgl,unsigned int nents)104 void sg_init_table(struct scatterlist *sgl, unsigned int nents)
105 {
106 	memset(sgl, 0, sizeof(*sgl) * nents);
107 	sg_init_marker(sgl, nents);
108 }
109 EXPORT_SYMBOL(sg_init_table);
110 
111 /**
112  * sg_init_one - Initialize a single entry sg list
113  * @sg:		 SG entry
114  * @buf:	 Virtual address for IO
115  * @buflen:	 IO length
116  *
117  **/
sg_init_one(struct scatterlist * sg,const void * buf,unsigned int buflen)118 void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen)
119 {
120 	sg_init_table(sg, 1);
121 	sg_set_buf(sg, buf, buflen);
122 }
123 EXPORT_SYMBOL(sg_init_one);
124 
125 /*
126  * The default behaviour of sg_alloc_table() is to use these kmalloc/kfree
127  * helpers.
128  */
sg_kmalloc(unsigned int nents,gfp_t gfp_mask)129 static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
130 {
131 	if (nents == SG_MAX_SINGLE_ALLOC) {
132 		/*
133 		 * Kmemleak doesn't track page allocations as they are not
134 		 * commonly used (in a raw form) for kernel data structures.
135 		 * As we chain together a list of pages and then a normal
136 		 * kmalloc (tracked by kmemleak), in order to for that last
137 		 * allocation not to become decoupled (and thus a
138 		 * false-positive) we need to inform kmemleak of all the
139 		 * intermediate allocations.
140 		 */
141 		void *ptr = (void *) __get_free_page(gfp_mask);
142 		kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
143 		return ptr;
144 	} else
145 		return kmalloc_array(nents, sizeof(struct scatterlist),
146 				     gfp_mask);
147 }
148 
sg_kfree(struct scatterlist * sg,unsigned int nents)149 static void sg_kfree(struct scatterlist *sg, unsigned int nents)
150 {
151 	if (nents == SG_MAX_SINGLE_ALLOC) {
152 		kmemleak_free(sg);
153 		free_page((unsigned long) sg);
154 	} else
155 		kfree(sg);
156 }
157 
158 /**
159  * __sg_free_table - Free a previously mapped sg table
160  * @table:	The sg table header to use
161  * @max_ents:	The maximum number of entries per single scatterlist
162  * @nents_first_chunk: Number of entries int the (preallocated) first
163  * 	scatterlist chunk, 0 means no such preallocated first chunk
164  * @free_fn:	Free function
165  * @num_ents:	Number of entries in the table
166  *
167  *  Description:
168  *    Free an sg table previously allocated and setup with
169  *    __sg_alloc_table().  The @max_ents value must be identical to
170  *    that previously used with __sg_alloc_table().
171  *
172  **/
__sg_free_table(struct sg_table * table,unsigned int max_ents,unsigned int nents_first_chunk,sg_free_fn * free_fn,unsigned int num_ents)173 void __sg_free_table(struct sg_table *table, unsigned int max_ents,
174 		     unsigned int nents_first_chunk, sg_free_fn *free_fn,
175 		     unsigned int num_ents)
176 {
177 	struct scatterlist *sgl, *next;
178 	unsigned curr_max_ents = nents_first_chunk ?: max_ents;
179 
180 	if (unlikely(!table->sgl))
181 		return;
182 
183 	sgl = table->sgl;
184 	while (num_ents) {
185 		unsigned int alloc_size = num_ents;
186 		unsigned int sg_size;
187 
188 		/*
189 		 * If we have more than max_ents segments left,
190 		 * then assign 'next' to the sg table after the current one.
191 		 * sg_size is then one less than alloc size, since the last
192 		 * element is the chain pointer.
193 		 */
194 		if (alloc_size > curr_max_ents) {
195 			next = sg_chain_ptr(&sgl[curr_max_ents - 1]);
196 			alloc_size = curr_max_ents;
197 			sg_size = alloc_size - 1;
198 		} else {
199 			sg_size = alloc_size;
200 			next = NULL;
201 		}
202 
203 		num_ents -= sg_size;
204 		if (nents_first_chunk)
205 			nents_first_chunk = 0;
206 		else
207 			free_fn(sgl, alloc_size);
208 		sgl = next;
209 		curr_max_ents = max_ents;
210 	}
211 
212 	table->sgl = NULL;
213 }
214 EXPORT_SYMBOL(__sg_free_table);
215 
216 /**
217  * sg_free_append_table - Free a previously allocated append sg table.
218  * @table:	 The mapped sg append table header
219  *
220  **/
sg_free_append_table(struct sg_append_table * table)221 void sg_free_append_table(struct sg_append_table *table)
222 {
223 	__sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, 0, sg_kfree,
224 			table->total_nents);
225 }
226 EXPORT_SYMBOL(sg_free_append_table);
227 
228 
229 /**
230  * sg_free_table - Free a previously allocated sg table
231  * @table:	The mapped sg table header
232  *
233  **/
sg_free_table(struct sg_table * table)234 void sg_free_table(struct sg_table *table)
235 {
236 	__sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree,
237 			table->orig_nents);
238 }
239 EXPORT_SYMBOL(sg_free_table);
240 
241 /**
242  * __sg_alloc_table - Allocate and initialize an sg table with given allocator
243  * @table:	The sg table header to use
244  * @nents:	Number of entries in sg list
245  * @max_ents:	The maximum number of entries the allocator returns per call
246  * @first_chunk: first SGL if preallocated (may be %NULL)
247  * @nents_first_chunk: Number of entries in the (preallocated) first
248  * 	scatterlist chunk, 0 means no such preallocated chunk provided by user
249  * @gfp_mask:	GFP allocation mask
250  * @alloc_fn:	Allocator to use
251  *
252  * Description:
253  *   This function returns a @table @nents long. The allocator is
254  *   defined to return scatterlist chunks of maximum size @max_ents.
255  *   Thus if @nents is bigger than @max_ents, the scatterlists will be
256  *   chained in units of @max_ents.
257  *
258  * Notes:
259  *   If this function returns non-0 (eg failure), the caller must call
260  *   __sg_free_table() to cleanup any leftover allocations.
261  *
262  **/
__sg_alloc_table(struct sg_table * table,unsigned int nents,unsigned int max_ents,struct scatterlist * first_chunk,unsigned int nents_first_chunk,gfp_t gfp_mask,sg_alloc_fn * alloc_fn)263 int __sg_alloc_table(struct sg_table *table, unsigned int nents,
264 		     unsigned int max_ents, struct scatterlist *first_chunk,
265 		     unsigned int nents_first_chunk, gfp_t gfp_mask,
266 		     sg_alloc_fn *alloc_fn)
267 {
268 	struct scatterlist *sg, *prv;
269 	unsigned int left;
270 	unsigned curr_max_ents = nents_first_chunk ?: max_ents;
271 	unsigned prv_max_ents;
272 
273 	memset(table, 0, sizeof(*table));
274 
275 	if (nents == 0)
276 		return -EINVAL;
277 #ifdef CONFIG_ARCH_NO_SG_CHAIN
278 	if (WARN_ON_ONCE(nents > max_ents))
279 		return -EINVAL;
280 #endif
281 
282 	left = nents;
283 	prv = NULL;
284 	do {
285 		unsigned int sg_size, alloc_size = left;
286 
287 		if (alloc_size > curr_max_ents) {
288 			alloc_size = curr_max_ents;
289 			sg_size = alloc_size - 1;
290 		} else
291 			sg_size = alloc_size;
292 
293 		left -= sg_size;
294 
295 		if (first_chunk) {
296 			sg = first_chunk;
297 			first_chunk = NULL;
298 		} else {
299 			sg = alloc_fn(alloc_size, gfp_mask);
300 		}
301 		if (unlikely(!sg)) {
302 			/*
303 			 * Adjust entry count to reflect that the last
304 			 * entry of the previous table won't be used for
305 			 * linkage.  Without this, sg_kfree() may get
306 			 * confused.
307 			 */
308 			if (prv)
309 				table->nents = ++table->orig_nents;
310 
311 			return -ENOMEM;
312 		}
313 
314 		sg_init_table(sg, alloc_size);
315 		table->nents = table->orig_nents += sg_size;
316 
317 		/*
318 		 * If this is the first mapping, assign the sg table header.
319 		 * If this is not the first mapping, chain previous part.
320 		 */
321 		if (prv)
322 			sg_chain(prv, prv_max_ents, sg);
323 		else
324 			table->sgl = sg;
325 
326 		/*
327 		 * If no more entries after this one, mark the end
328 		 */
329 		if (!left)
330 			sg_mark_end(&sg[sg_size - 1]);
331 
332 		prv = sg;
333 		prv_max_ents = curr_max_ents;
334 		curr_max_ents = max_ents;
335 	} while (left);
336 
337 	return 0;
338 }
339 EXPORT_SYMBOL(__sg_alloc_table);
340 
341 /**
342  * sg_alloc_table - Allocate and initialize an sg table
343  * @table:	The sg table header to use
344  * @nents:	Number of entries in sg list
345  * @gfp_mask:	GFP allocation mask
346  *
347  *  Description:
348  *    Allocate and initialize an sg table. If @nents is larger than
349  *    SG_MAX_SINGLE_ALLOC a chained sg table will be setup.
350  *
351  **/
sg_alloc_table(struct sg_table * table,unsigned int nents,gfp_t gfp_mask)352 int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
353 {
354 	int ret;
355 
356 	ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
357 			       NULL, 0, gfp_mask, sg_kmalloc);
358 	if (unlikely(ret))
359 		sg_free_table(table);
360 	return ret;
361 }
362 EXPORT_SYMBOL(sg_alloc_table);
363 
get_next_sg(struct sg_append_table * table,struct scatterlist * cur,unsigned long needed_sges,gfp_t gfp_mask)364 static struct scatterlist *get_next_sg(struct sg_append_table *table,
365 				       struct scatterlist *cur,
366 				       unsigned long needed_sges,
367 				       gfp_t gfp_mask)
368 {
369 	struct scatterlist *new_sg, *next_sg;
370 	unsigned int alloc_size;
371 
372 	if (cur) {
373 		next_sg = sg_next(cur);
374 		/* Check if last entry should be keeped for chainning */
375 		if (!sg_is_last(next_sg) || needed_sges == 1)
376 			return next_sg;
377 	}
378 
379 	alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC);
380 	new_sg = sg_kmalloc(alloc_size, gfp_mask);
381 	if (!new_sg)
382 		return ERR_PTR(-ENOMEM);
383 	sg_init_table(new_sg, alloc_size);
384 	if (cur) {
385 		table->total_nents += alloc_size - 1;
386 		__sg_chain(next_sg, new_sg);
387 	} else {
388 		table->sgt.sgl = new_sg;
389 		table->total_nents = alloc_size;
390 	}
391 	return new_sg;
392 }
393 
pages_are_mergeable(struct page * a,struct page * b)394 static bool pages_are_mergeable(struct page *a, struct page *b)
395 {
396 	if (page_to_pfn(a) != page_to_pfn(b) + 1)
397 		return false;
398 	if (!zone_device_pages_have_same_pgmap(a, b))
399 		return false;
400 	return true;
401 }
402 
403 /**
404  * sg_alloc_append_table_from_pages - Allocate and initialize an append sg
405  *                                    table from an array of pages
406  * @sgt_append:  The sg append table to use
407  * @pages:       Pointer to an array of page pointers
408  * @n_pages:     Number of pages in the pages array
409  * @offset:      Offset from start of the first page to the start of a buffer
410  * @size:        Number of valid bytes in the buffer (after offset)
411  * @max_segment: Maximum size of a scatterlist element in bytes
412  * @left_pages:  Left pages caller have to set after this call
413  * @gfp_mask:	 GFP allocation mask
414  *
415  * Description:
416  *    In the first call it allocate and initialize an sg table from a list of
417  *    pages, else reuse the scatterlist from sgt_append. Contiguous ranges of
418  *    the pages are squashed into a single scatterlist entry up to the maximum
419  *    size specified in @max_segment.  A user may provide an offset at a start
420  *    and a size of valid data in a buffer specified by the page array. The
421  *    returned sg table is released by sg_free_append_table
422  *
423  * Returns:
424  *   0 on success, negative error on failure
425  *
426  * Notes:
427  *   If this function returns non-0 (eg failure), the caller must call
428  *   sg_free_append_table() to cleanup any leftover allocations.
429  *
430  *   In the fist call, sgt_append must by initialized.
431  */
sg_alloc_append_table_from_pages(struct sg_append_table * sgt_append,struct page ** pages,unsigned int n_pages,unsigned int offset,unsigned long size,unsigned int max_segment,unsigned int left_pages,gfp_t gfp_mask)432 int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
433 		struct page **pages, unsigned int n_pages, unsigned int offset,
434 		unsigned long size, unsigned int max_segment,
435 		unsigned int left_pages, gfp_t gfp_mask)
436 {
437 	unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
438 	unsigned int added_nents = 0;
439 	struct scatterlist *s = sgt_append->prv;
440 	struct page *last_pg;
441 
442 	/*
443 	 * The algorithm below requires max_segment to be aligned to PAGE_SIZE
444 	 * otherwise it can overshoot.
445 	 */
446 	max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE);
447 	if (WARN_ON(max_segment < PAGE_SIZE))
448 		return -EINVAL;
449 
450 	if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && sgt_append->prv)
451 		return -EOPNOTSUPP;
452 
453 	if (sgt_append->prv) {
454 		unsigned long next_pfn;
455 
456 		if (WARN_ON(offset))
457 			return -EINVAL;
458 
459 		/* Merge contiguous pages into the last SG */
460 		prv_len = sgt_append->prv->length;
461 		next_pfn = (sg_phys(sgt_append->prv) + prv_len) / PAGE_SIZE;
462 		if (page_to_pfn(pages[0]) == next_pfn) {
463 			last_pg = pfn_to_page(next_pfn - 1);
464 			while (n_pages && pages_are_mergeable(pages[0], last_pg)) {
465 				if (sgt_append->prv->length + PAGE_SIZE > max_segment)
466 					break;
467 				sgt_append->prv->length += PAGE_SIZE;
468 				last_pg = pages[0];
469 				pages++;
470 				n_pages--;
471 			}
472 			if (!n_pages)
473 				goto out;
474 		}
475 	}
476 
477 	/* compute number of contiguous chunks */
478 	chunks = 1;
479 	seg_len = 0;
480 	for (i = 1; i < n_pages; i++) {
481 		seg_len += PAGE_SIZE;
482 		if (seg_len >= max_segment ||
483 		    !pages_are_mergeable(pages[i], pages[i - 1])) {
484 			chunks++;
485 			seg_len = 0;
486 		}
487 	}
488 
489 	/* merging chunks and putting them into the scatterlist */
490 	cur_page = 0;
491 	for (i = 0; i < chunks; i++) {
492 		unsigned int j, chunk_size;
493 
494 		/* look for the end of the current chunk */
495 		seg_len = 0;
496 		for (j = cur_page + 1; j < n_pages; j++) {
497 			seg_len += PAGE_SIZE;
498 			if (seg_len >= max_segment ||
499 			    !pages_are_mergeable(pages[j], pages[j - 1]))
500 				break;
501 		}
502 
503 		/* Pass how many chunks might be left */
504 		s = get_next_sg(sgt_append, s, chunks - i + left_pages,
505 				gfp_mask);
506 		if (IS_ERR(s)) {
507 			/*
508 			 * Adjust entry length to be as before function was
509 			 * called.
510 			 */
511 			if (sgt_append->prv)
512 				sgt_append->prv->length = prv_len;
513 			return PTR_ERR(s);
514 		}
515 		chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
516 		sg_set_page(s, pages[cur_page],
517 			    min_t(unsigned long, size, chunk_size), offset);
518 		added_nents++;
519 		size -= chunk_size;
520 		offset = 0;
521 		cur_page = j;
522 	}
523 	sgt_append->sgt.nents += added_nents;
524 	sgt_append->sgt.orig_nents = sgt_append->sgt.nents;
525 	sgt_append->prv = s;
526 out:
527 	if (!left_pages)
528 		sg_mark_end(s);
529 	return 0;
530 }
531 EXPORT_SYMBOL(sg_alloc_append_table_from_pages);
532 
533 /**
534  * sg_alloc_table_from_pages_segment - Allocate and initialize an sg table from
535  *                                     an array of pages and given maximum
536  *                                     segment.
537  * @sgt:	 The sg table header to use
538  * @pages:	 Pointer to an array of page pointers
539  * @n_pages:	 Number of pages in the pages array
540  * @offset:      Offset from start of the first page to the start of a buffer
541  * @size:        Number of valid bytes in the buffer (after offset)
542  * @max_segment: Maximum size of a scatterlist element in bytes
543  * @gfp_mask:	 GFP allocation mask
544  *
545  *  Description:
546  *    Allocate and initialize an sg table from a list of pages. Contiguous
547  *    ranges of the pages are squashed into a single scatterlist node up to the
548  *    maximum size specified in @max_segment. A user may provide an offset at a
549  *    start and a size of valid data in a buffer specified by the page array.
550  *
551  *    The returned sg table is released by sg_free_table.
552  *
553  *  Returns:
554  *   0 on success, negative error on failure
555  */
sg_alloc_table_from_pages_segment(struct sg_table * sgt,struct page ** pages,unsigned int n_pages,unsigned int offset,unsigned long size,unsigned int max_segment,gfp_t gfp_mask)556 int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages,
557 				unsigned int n_pages, unsigned int offset,
558 				unsigned long size, unsigned int max_segment,
559 				gfp_t gfp_mask)
560 {
561 	struct sg_append_table append = {};
562 	int err;
563 
564 	err = sg_alloc_append_table_from_pages(&append, pages, n_pages, offset,
565 					       size, max_segment, 0, gfp_mask);
566 	if (err) {
567 		sg_free_append_table(&append);
568 		return err;
569 	}
570 	memcpy(sgt, &append.sgt, sizeof(*sgt));
571 	WARN_ON(append.total_nents != sgt->orig_nents);
572 	return 0;
573 }
574 EXPORT_SYMBOL(sg_alloc_table_from_pages_segment);
575 
576 #ifdef CONFIG_SGL_ALLOC
577 
578 /**
579  * sgl_alloc_order - allocate a scatterlist and its pages
580  * @length: Length in bytes of the scatterlist. Must be at least one
581  * @order: Second argument for alloc_pages()
582  * @chainable: Whether or not to allocate an extra element in the scatterlist
583  *	for scatterlist chaining purposes
584  * @gfp: Memory allocation flags
585  * @nent_p: [out] Number of entries in the scatterlist that have pages
586  *
587  * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
588  */
sgl_alloc_order(unsigned long long length,unsigned int order,bool chainable,gfp_t gfp,unsigned int * nent_p)589 struct scatterlist *sgl_alloc_order(unsigned long long length,
590 				    unsigned int order, bool chainable,
591 				    gfp_t gfp, unsigned int *nent_p)
592 {
593 	struct scatterlist *sgl, *sg;
594 	struct page *page;
595 	unsigned int nent, nalloc;
596 	u32 elem_len;
597 
598 	nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order);
599 	/* Check for integer overflow */
600 	if (length > (nent << (PAGE_SHIFT + order)))
601 		return NULL;
602 	nalloc = nent;
603 	if (chainable) {
604 		/* Check for integer overflow */
605 		if (nalloc + 1 < nalloc)
606 			return NULL;
607 		nalloc++;
608 	}
609 	sgl = kmalloc_array(nalloc, sizeof(struct scatterlist),
610 			    gfp & ~GFP_DMA);
611 	if (!sgl)
612 		return NULL;
613 
614 	sg_init_table(sgl, nalloc);
615 	sg = sgl;
616 	while (length) {
617 		elem_len = min_t(u64, length, PAGE_SIZE << order);
618 		page = alloc_pages(gfp, order);
619 		if (!page) {
620 			sgl_free_order(sgl, order);
621 			return NULL;
622 		}
623 
624 		sg_set_page(sg, page, elem_len, 0);
625 		length -= elem_len;
626 		sg = sg_next(sg);
627 	}
628 	WARN_ONCE(length, "length = %lld\n", length);
629 	if (nent_p)
630 		*nent_p = nent;
631 	return sgl;
632 }
633 EXPORT_SYMBOL(sgl_alloc_order);
634 
635 /**
636  * sgl_alloc - allocate a scatterlist and its pages
637  * @length: Length in bytes of the scatterlist
638  * @gfp: Memory allocation flags
639  * @nent_p: [out] Number of entries in the scatterlist
640  *
641  * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
642  */
sgl_alloc(unsigned long long length,gfp_t gfp,unsigned int * nent_p)643 struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp,
644 			      unsigned int *nent_p)
645 {
646 	return sgl_alloc_order(length, 0, false, gfp, nent_p);
647 }
648 EXPORT_SYMBOL(sgl_alloc);
649 
650 /**
651  * sgl_free_n_order - free a scatterlist and its pages
652  * @sgl: Scatterlist with one or more elements
653  * @nents: Maximum number of elements to free
654  * @order: Second argument for __free_pages()
655  *
656  * Notes:
657  * - If several scatterlists have been chained and each chain element is
658  *   freed separately then it's essential to set nents correctly to avoid that a
659  *   page would get freed twice.
660  * - All pages in a chained scatterlist can be freed at once by setting @nents
661  *   to a high number.
662  */
sgl_free_n_order(struct scatterlist * sgl,int nents,int order)663 void sgl_free_n_order(struct scatterlist *sgl, int nents, int order)
664 {
665 	struct scatterlist *sg;
666 	struct page *page;
667 	int i;
668 
669 	for_each_sg(sgl, sg, nents, i) {
670 		if (!sg)
671 			break;
672 		page = sg_page(sg);
673 		if (page)
674 			__free_pages(page, order);
675 	}
676 	kfree(sgl);
677 }
678 EXPORT_SYMBOL(sgl_free_n_order);
679 
680 /**
681  * sgl_free_order - free a scatterlist and its pages
682  * @sgl: Scatterlist with one or more elements
683  * @order: Second argument for __free_pages()
684  */
sgl_free_order(struct scatterlist * sgl,int order)685 void sgl_free_order(struct scatterlist *sgl, int order)
686 {
687 	sgl_free_n_order(sgl, INT_MAX, order);
688 }
689 EXPORT_SYMBOL(sgl_free_order);
690 
691 /**
692  * sgl_free - free a scatterlist and its pages
693  * @sgl: Scatterlist with one or more elements
694  */
sgl_free(struct scatterlist * sgl)695 void sgl_free(struct scatterlist *sgl)
696 {
697 	sgl_free_order(sgl, 0);
698 }
699 EXPORT_SYMBOL(sgl_free);
700 
701 #endif /* CONFIG_SGL_ALLOC */
702 
__sg_page_iter_start(struct sg_page_iter * piter,struct scatterlist * sglist,unsigned int nents,unsigned long pgoffset)703 void __sg_page_iter_start(struct sg_page_iter *piter,
704 			  struct scatterlist *sglist, unsigned int nents,
705 			  unsigned long pgoffset)
706 {
707 	piter->__pg_advance = 0;
708 	piter->__nents = nents;
709 
710 	piter->sg = sglist;
711 	piter->sg_pgoffset = pgoffset;
712 }
713 EXPORT_SYMBOL(__sg_page_iter_start);
714 
sg_page_count(struct scatterlist * sg)715 static int sg_page_count(struct scatterlist *sg)
716 {
717 	return PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT;
718 }
719 
__sg_page_iter_next(struct sg_page_iter * piter)720 bool __sg_page_iter_next(struct sg_page_iter *piter)
721 {
722 	if (!piter->__nents || !piter->sg)
723 		return false;
724 
725 	piter->sg_pgoffset += piter->__pg_advance;
726 	piter->__pg_advance = 1;
727 
728 	while (piter->sg_pgoffset >= sg_page_count(piter->sg)) {
729 		piter->sg_pgoffset -= sg_page_count(piter->sg);
730 		piter->sg = sg_next(piter->sg);
731 		if (!--piter->__nents || !piter->sg)
732 			return false;
733 	}
734 
735 	return true;
736 }
737 EXPORT_SYMBOL(__sg_page_iter_next);
738 
sg_dma_page_count(struct scatterlist * sg)739 static int sg_dma_page_count(struct scatterlist *sg)
740 {
741 	return PAGE_ALIGN(sg->offset + sg_dma_len(sg)) >> PAGE_SHIFT;
742 }
743 
__sg_page_iter_dma_next(struct sg_dma_page_iter * dma_iter)744 bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter)
745 {
746 	struct sg_page_iter *piter = &dma_iter->base;
747 
748 	if (!piter->__nents || !piter->sg)
749 		return false;
750 
751 	piter->sg_pgoffset += piter->__pg_advance;
752 	piter->__pg_advance = 1;
753 
754 	while (piter->sg_pgoffset >= sg_dma_page_count(piter->sg)) {
755 		piter->sg_pgoffset -= sg_dma_page_count(piter->sg);
756 		piter->sg = sg_next(piter->sg);
757 		if (!--piter->__nents || !piter->sg)
758 			return false;
759 	}
760 
761 	return true;
762 }
763 EXPORT_SYMBOL(__sg_page_iter_dma_next);
764 
765 /**
766  * sg_miter_start - start mapping iteration over a sg list
767  * @miter: sg mapping iter to be started
768  * @sgl: sg list to iterate over
769  * @nents: number of sg entries
770  * @flags: sg iterator flags
771  *
772  * Description:
773  *   Starts mapping iterator @miter.
774  *
775  * Context:
776  *   Don't care.
777  */
sg_miter_start(struct sg_mapping_iter * miter,struct scatterlist * sgl,unsigned int nents,unsigned int flags)778 void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl,
779 		    unsigned int nents, unsigned int flags)
780 {
781 	memset(miter, 0, sizeof(struct sg_mapping_iter));
782 
783 	__sg_page_iter_start(&miter->piter, sgl, nents, 0);
784 	WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG)));
785 	miter->__flags = flags;
786 }
787 EXPORT_SYMBOL(sg_miter_start);
788 
sg_miter_get_next_page(struct sg_mapping_iter * miter)789 static bool sg_miter_get_next_page(struct sg_mapping_iter *miter)
790 {
791 	if (!miter->__remaining) {
792 		struct scatterlist *sg;
793 
794 		if (!__sg_page_iter_next(&miter->piter))
795 			return false;
796 
797 		sg = miter->piter.sg;
798 
799 		miter->__offset = miter->piter.sg_pgoffset ? 0 : sg->offset;
800 		miter->piter.sg_pgoffset += miter->__offset >> PAGE_SHIFT;
801 		miter->__offset &= PAGE_SIZE - 1;
802 		miter->__remaining = sg->offset + sg->length -
803 				     (miter->piter.sg_pgoffset << PAGE_SHIFT) -
804 				     miter->__offset;
805 		miter->__remaining = min_t(unsigned long, miter->__remaining,
806 					   PAGE_SIZE - miter->__offset);
807 	}
808 
809 	return true;
810 }
811 
812 /**
813  * sg_miter_skip - reposition mapping iterator
814  * @miter: sg mapping iter to be skipped
815  * @offset: number of bytes to plus the current location
816  *
817  * Description:
818  *   Sets the offset of @miter to its current location plus @offset bytes.
819  *   If mapping iterator @miter has been proceeded by sg_miter_next(), this
820  *   stops @miter.
821  *
822  * Context:
823  *   Don't care.
824  *
825  * Returns:
826  *   true if @miter contains the valid mapping.  false if end of sg
827  *   list is reached.
828  */
sg_miter_skip(struct sg_mapping_iter * miter,off_t offset)829 bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset)
830 {
831 	sg_miter_stop(miter);
832 
833 	while (offset) {
834 		off_t consumed;
835 
836 		if (!sg_miter_get_next_page(miter))
837 			return false;
838 
839 		consumed = min_t(off_t, offset, miter->__remaining);
840 		miter->__offset += consumed;
841 		miter->__remaining -= consumed;
842 		offset -= consumed;
843 	}
844 
845 	return true;
846 }
847 EXPORT_SYMBOL(sg_miter_skip);
848 
849 /**
850  * sg_miter_next - proceed mapping iterator to the next mapping
851  * @miter: sg mapping iter to proceed
852  *
853  * Description:
854  *   Proceeds @miter to the next mapping.  @miter should have been started
855  *   using sg_miter_start().  On successful return, @miter->page,
856  *   @miter->addr and @miter->length point to the current mapping.
857  *
858  * Context:
859  *   May sleep if !SG_MITER_ATOMIC && !SG_MITER_LOCAL.
860  *
861  * Returns:
862  *   true if @miter contains the next mapping.  false if end of sg
863  *   list is reached.
864  */
sg_miter_next(struct sg_mapping_iter * miter)865 bool sg_miter_next(struct sg_mapping_iter *miter)
866 {
867 	sg_miter_stop(miter);
868 
869 	/*
870 	 * Get to the next page if necessary.
871 	 * __remaining, __offset is adjusted by sg_miter_stop
872 	 */
873 	if (!sg_miter_get_next_page(miter))
874 		return false;
875 
876 	miter->page = sg_page_iter_page(&miter->piter);
877 	miter->consumed = miter->length = miter->__remaining;
878 
879 	if (miter->__flags & SG_MITER_ATOMIC)
880 		miter->addr = kmap_atomic(miter->page) + miter->__offset;
881 	else if (miter->__flags & SG_MITER_LOCAL)
882 		miter->addr = kmap_local_page(miter->page) + miter->__offset;
883 	else
884 		miter->addr = kmap(miter->page) + miter->__offset;
885 
886 	return true;
887 }
888 EXPORT_SYMBOL(sg_miter_next);
889 
890 /**
891  * sg_miter_stop - stop mapping iteration
892  * @miter: sg mapping iter to be stopped
893  *
894  * Description:
895  *   Stops mapping iterator @miter.  @miter should have been started
896  *   using sg_miter_start().  A stopped iteration can be resumed by
897  *   calling sg_miter_next() on it.  This is useful when resources (kmap)
898  *   need to be released during iteration.
899  *
900  * Context:
901  *   Don't care otherwise.
902  */
sg_miter_stop(struct sg_mapping_iter * miter)903 void sg_miter_stop(struct sg_mapping_iter *miter)
904 {
905 	WARN_ON(miter->consumed > miter->length);
906 
907 	/* drop resources from the last iteration */
908 	if (miter->addr) {
909 		miter->__offset += miter->consumed;
910 		miter->__remaining -= miter->consumed;
911 
912 		if (miter->__flags & SG_MITER_TO_SG)
913 			flush_dcache_page(miter->page);
914 
915 		if (miter->__flags & SG_MITER_ATOMIC) {
916 			WARN_ON_ONCE(!pagefault_disabled());
917 			kunmap_atomic(miter->addr);
918 		} else if (miter->__flags & SG_MITER_LOCAL)
919 			kunmap_local(miter->addr);
920 		else
921 			kunmap(miter->page);
922 
923 		miter->page = NULL;
924 		miter->addr = NULL;
925 		miter->length = 0;
926 		miter->consumed = 0;
927 	}
928 }
929 EXPORT_SYMBOL(sg_miter_stop);
930 
931 /**
932  * sg_copy_buffer - Copy data between a linear buffer and an SG list
933  * @sgl:		 The SG list
934  * @nents:		 Number of SG entries
935  * @buf:		 Where to copy from
936  * @buflen:		 The number of bytes to copy
937  * @skip:		 Number of bytes to skip before copying
938  * @to_buffer:		 transfer direction (true == from an sg list to a
939  *			 buffer, false == from a buffer to an sg list)
940  *
941  * Returns the number of copied bytes.
942  *
943  **/
sg_copy_buffer(struct scatterlist * sgl,unsigned int nents,void * buf,size_t buflen,off_t skip,bool to_buffer)944 size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
945 		      size_t buflen, off_t skip, bool to_buffer)
946 {
947 	unsigned int offset = 0;
948 	struct sg_mapping_iter miter;
949 	unsigned int sg_flags = SG_MITER_LOCAL;
950 
951 	if (to_buffer)
952 		sg_flags |= SG_MITER_FROM_SG;
953 	else
954 		sg_flags |= SG_MITER_TO_SG;
955 
956 	sg_miter_start(&miter, sgl, nents, sg_flags);
957 
958 	if (!sg_miter_skip(&miter, skip))
959 		return 0;
960 
961 	while ((offset < buflen) && sg_miter_next(&miter)) {
962 		unsigned int len;
963 
964 		len = min(miter.length, buflen - offset);
965 
966 		if (to_buffer)
967 			memcpy(buf + offset, miter.addr, len);
968 		else
969 			memcpy(miter.addr, buf + offset, len);
970 
971 		offset += len;
972 	}
973 
974 	sg_miter_stop(&miter);
975 
976 	return offset;
977 }
978 EXPORT_SYMBOL(sg_copy_buffer);
979 
980 /**
981  * sg_copy_from_buffer - Copy from a linear buffer to an SG list
982  * @sgl:		 The SG list
983  * @nents:		 Number of SG entries
984  * @buf:		 Where to copy from
985  * @buflen:		 The number of bytes to copy
986  *
987  * Returns the number of copied bytes.
988  *
989  **/
sg_copy_from_buffer(struct scatterlist * sgl,unsigned int nents,const void * buf,size_t buflen)990 size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents,
991 			   const void *buf, size_t buflen)
992 {
993 	return sg_copy_buffer(sgl, nents, (void *)buf, buflen, 0, false);
994 }
995 EXPORT_SYMBOL(sg_copy_from_buffer);
996 
997 /**
998  * sg_copy_to_buffer - Copy from an SG list to a linear buffer
999  * @sgl:		 The SG list
1000  * @nents:		 Number of SG entries
1001  * @buf:		 Where to copy to
1002  * @buflen:		 The number of bytes to copy
1003  *
1004  * Returns the number of copied bytes.
1005  *
1006  **/
sg_copy_to_buffer(struct scatterlist * sgl,unsigned int nents,void * buf,size_t buflen)1007 size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents,
1008 			 void *buf, size_t buflen)
1009 {
1010 	return sg_copy_buffer(sgl, nents, buf, buflen, 0, true);
1011 }
1012 EXPORT_SYMBOL(sg_copy_to_buffer);
1013 
1014 /**
1015  * sg_pcopy_from_buffer - Copy from a linear buffer to an SG list
1016  * @sgl:		 The SG list
1017  * @nents:		 Number of SG entries
1018  * @buf:		 Where to copy from
1019  * @buflen:		 The number of bytes to copy
1020  * @skip:		 Number of bytes to skip before copying
1021  *
1022  * Returns the number of copied bytes.
1023  *
1024  **/
sg_pcopy_from_buffer(struct scatterlist * sgl,unsigned int nents,const void * buf,size_t buflen,off_t skip)1025 size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents,
1026 			    const void *buf, size_t buflen, off_t skip)
1027 {
1028 	return sg_copy_buffer(sgl, nents, (void *)buf, buflen, skip, false);
1029 }
1030 EXPORT_SYMBOL(sg_pcopy_from_buffer);
1031 
1032 /**
1033  * sg_pcopy_to_buffer - Copy from an SG list to a linear buffer
1034  * @sgl:		 The SG list
1035  * @nents:		 Number of SG entries
1036  * @buf:		 Where to copy to
1037  * @buflen:		 The number of bytes to copy
1038  * @skip:		 Number of bytes to skip before copying
1039  *
1040  * Returns the number of copied bytes.
1041  *
1042  **/
sg_pcopy_to_buffer(struct scatterlist * sgl,unsigned int nents,void * buf,size_t buflen,off_t skip)1043 size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
1044 			  void *buf, size_t buflen, off_t skip)
1045 {
1046 	return sg_copy_buffer(sgl, nents, buf, buflen, skip, true);
1047 }
1048 EXPORT_SYMBOL(sg_pcopy_to_buffer);
1049 
1050 /**
1051  * sg_zero_buffer - Zero-out a part of a SG list
1052  * @sgl:		 The SG list
1053  * @nents:		 Number of SG entries
1054  * @buflen:		 The number of bytes to zero out
1055  * @skip:		 Number of bytes to skip before zeroing
1056  *
1057  * Returns the number of bytes zeroed.
1058  **/
sg_zero_buffer(struct scatterlist * sgl,unsigned int nents,size_t buflen,off_t skip)1059 size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
1060 		       size_t buflen, off_t skip)
1061 {
1062 	unsigned int offset = 0;
1063 	struct sg_mapping_iter miter;
1064 	unsigned int sg_flags = SG_MITER_LOCAL | SG_MITER_TO_SG;
1065 
1066 	sg_miter_start(&miter, sgl, nents, sg_flags);
1067 
1068 	if (!sg_miter_skip(&miter, skip))
1069 		return false;
1070 
1071 	while (offset < buflen && sg_miter_next(&miter)) {
1072 		unsigned int len;
1073 
1074 		len = min(miter.length, buflen - offset);
1075 		memset(miter.addr, 0, len);
1076 
1077 		offset += len;
1078 	}
1079 
1080 	sg_miter_stop(&miter);
1081 	return offset;
1082 }
1083 EXPORT_SYMBOL(sg_zero_buffer);
1084 
1085 /*
1086  * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class
1087  * iterators, and add them to the scatterlist.
1088  */
extract_user_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1089 static ssize_t extract_user_to_sg(struct iov_iter *iter,
1090 				  ssize_t maxsize,
1091 				  struct sg_table *sgtable,
1092 				  unsigned int sg_max,
1093 				  iov_iter_extraction_t extraction_flags)
1094 {
1095 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1096 	struct page **pages;
1097 	unsigned int npages;
1098 	ssize_t ret = 0, res;
1099 	size_t len, off;
1100 
1101 	/* We decant the page list into the tail of the scatterlist */
1102 	pages = (void *)sgtable->sgl +
1103 		array_size(sg_max, sizeof(struct scatterlist));
1104 	pages -= sg_max;
1105 
1106 	do {
1107 		res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max,
1108 					     extraction_flags, &off);
1109 		if (res <= 0)
1110 			goto failed;
1111 
1112 		len = res;
1113 		maxsize -= len;
1114 		ret += len;
1115 		npages = DIV_ROUND_UP(off + len, PAGE_SIZE);
1116 		sg_max -= npages;
1117 
1118 		for (; npages > 0; npages--) {
1119 			struct page *page = *pages;
1120 			size_t seg = min_t(size_t, PAGE_SIZE - off, len);
1121 
1122 			*pages++ = NULL;
1123 			sg_set_page(sg, page, seg, off);
1124 			sgtable->nents++;
1125 			sg++;
1126 			len -= seg;
1127 			off = 0;
1128 		}
1129 	} while (maxsize > 0 && sg_max > 0);
1130 
1131 	return ret;
1132 
1133 failed:
1134 	while (sgtable->nents > sgtable->orig_nents)
1135 		unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents]));
1136 	return res;
1137 }
1138 
1139 /*
1140  * Extract up to sg_max pages from a BVEC-type iterator and add them to the
1141  * scatterlist.  The pages are not pinned.
1142  */
extract_bvec_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1143 static ssize_t extract_bvec_to_sg(struct iov_iter *iter,
1144 				  ssize_t maxsize,
1145 				  struct sg_table *sgtable,
1146 				  unsigned int sg_max,
1147 				  iov_iter_extraction_t extraction_flags)
1148 {
1149 	const struct bio_vec *bv = iter->bvec;
1150 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1151 	unsigned long start = iter->iov_offset;
1152 	unsigned int i;
1153 	ssize_t ret = 0;
1154 
1155 	for (i = 0; i < iter->nr_segs; i++) {
1156 		size_t off, len;
1157 
1158 		len = bv[i].bv_len;
1159 		if (start >= len) {
1160 			start -= len;
1161 			continue;
1162 		}
1163 
1164 		len = min_t(size_t, maxsize, len - start);
1165 		off = bv[i].bv_offset + start;
1166 
1167 		sg_set_page(sg, bv[i].bv_page, len, off);
1168 		sgtable->nents++;
1169 		sg++;
1170 		sg_max--;
1171 
1172 		ret += len;
1173 		maxsize -= len;
1174 		if (maxsize <= 0 || sg_max == 0)
1175 			break;
1176 		start = 0;
1177 	}
1178 
1179 	if (ret > 0)
1180 		iov_iter_advance(iter, ret);
1181 	return ret;
1182 }
1183 
1184 /*
1185  * Extract up to sg_max pages from a KVEC-type iterator and add them to the
1186  * scatterlist.  This can deal with vmalloc'd buffers as well as kmalloc'd or
1187  * static buffers.  The pages are not pinned.
1188  */
extract_kvec_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1189 static ssize_t extract_kvec_to_sg(struct iov_iter *iter,
1190 				  ssize_t maxsize,
1191 				  struct sg_table *sgtable,
1192 				  unsigned int sg_max,
1193 				  iov_iter_extraction_t extraction_flags)
1194 {
1195 	const struct kvec *kv = iter->kvec;
1196 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1197 	unsigned long start = iter->iov_offset;
1198 	unsigned int i;
1199 	ssize_t ret = 0;
1200 
1201 	for (i = 0; i < iter->nr_segs; i++) {
1202 		struct page *page;
1203 		unsigned long kaddr;
1204 		size_t off, len, seg;
1205 
1206 		len = kv[i].iov_len;
1207 		if (start >= len) {
1208 			start -= len;
1209 			continue;
1210 		}
1211 
1212 		kaddr = (unsigned long)kv[i].iov_base + start;
1213 		off = kaddr & ~PAGE_MASK;
1214 		len = min_t(size_t, maxsize, len - start);
1215 		kaddr &= PAGE_MASK;
1216 
1217 		maxsize -= len;
1218 		ret += len;
1219 		do {
1220 			seg = min_t(size_t, len, PAGE_SIZE - off);
1221 			if (is_vmalloc_or_module_addr((void *)kaddr))
1222 				page = vmalloc_to_page((void *)kaddr);
1223 			else
1224 				page = virt_to_page((void *)kaddr);
1225 
1226 			sg_set_page(sg, page, len, off);
1227 			sgtable->nents++;
1228 			sg++;
1229 			sg_max--;
1230 
1231 			len -= seg;
1232 			kaddr += PAGE_SIZE;
1233 			off = 0;
1234 		} while (len > 0 && sg_max > 0);
1235 
1236 		if (maxsize <= 0 || sg_max == 0)
1237 			break;
1238 		start = 0;
1239 	}
1240 
1241 	if (ret > 0)
1242 		iov_iter_advance(iter, ret);
1243 	return ret;
1244 }
1245 
1246 /*
1247  * Extract up to sg_max folios from an FOLIOQ-type iterator and add them to
1248  * the scatterlist.  The pages are not pinned.
1249  */
extract_folioq_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1250 static ssize_t extract_folioq_to_sg(struct iov_iter *iter,
1251 				   ssize_t maxsize,
1252 				   struct sg_table *sgtable,
1253 				   unsigned int sg_max,
1254 				   iov_iter_extraction_t extraction_flags)
1255 {
1256 	const struct folio_queue *folioq = iter->folioq;
1257 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1258 	unsigned int slot = iter->folioq_slot;
1259 	ssize_t ret = 0;
1260 	size_t offset = iter->iov_offset;
1261 
1262 	BUG_ON(!folioq);
1263 
1264 	if (slot >= folioq_nr_slots(folioq)) {
1265 		folioq = folioq->next;
1266 		if (WARN_ON_ONCE(!folioq))
1267 			return 0;
1268 		slot = 0;
1269 	}
1270 
1271 	do {
1272 		struct folio *folio = folioq_folio(folioq, slot);
1273 		size_t fsize = folioq_folio_size(folioq, slot);
1274 
1275 		if (offset < fsize) {
1276 			size_t part = umin(maxsize - ret, fsize - offset);
1277 
1278 			sg_set_page(sg, folio_page(folio, 0), part, offset);
1279 			sgtable->nents++;
1280 			sg++;
1281 			sg_max--;
1282 			offset += part;
1283 			ret += part;
1284 		}
1285 
1286 		if (offset >= fsize) {
1287 			offset = 0;
1288 			slot++;
1289 			if (slot >= folioq_nr_slots(folioq)) {
1290 				if (!folioq->next) {
1291 					WARN_ON_ONCE(ret < iter->count);
1292 					break;
1293 				}
1294 				folioq = folioq->next;
1295 				slot = 0;
1296 			}
1297 		}
1298 	} while (sg_max > 0 && ret < maxsize);
1299 
1300 	iter->folioq = folioq;
1301 	iter->folioq_slot = slot;
1302 	iter->iov_offset = offset;
1303 	iter->count -= ret;
1304 	return ret;
1305 }
1306 
1307 /*
1308  * Extract up to sg_max folios from an XARRAY-type iterator and add them to
1309  * the scatterlist.  The pages are not pinned.
1310  */
extract_xarray_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1311 static ssize_t extract_xarray_to_sg(struct iov_iter *iter,
1312 				    ssize_t maxsize,
1313 				    struct sg_table *sgtable,
1314 				    unsigned int sg_max,
1315 				    iov_iter_extraction_t extraction_flags)
1316 {
1317 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1318 	struct xarray *xa = iter->xarray;
1319 	struct folio *folio;
1320 	loff_t start = iter->xarray_start + iter->iov_offset;
1321 	pgoff_t index = start / PAGE_SIZE;
1322 	ssize_t ret = 0;
1323 	size_t offset, len;
1324 	XA_STATE(xas, xa, index);
1325 
1326 	rcu_read_lock();
1327 
1328 	xas_for_each(&xas, folio, ULONG_MAX) {
1329 		if (xas_retry(&xas, folio))
1330 			continue;
1331 		if (WARN_ON(xa_is_value(folio)))
1332 			break;
1333 		if (WARN_ON(folio_test_hugetlb(folio)))
1334 			break;
1335 
1336 		offset = offset_in_folio(folio, start);
1337 		len = min_t(size_t, maxsize, folio_size(folio) - offset);
1338 
1339 		sg_set_page(sg, folio_page(folio, 0), len, offset);
1340 		sgtable->nents++;
1341 		sg++;
1342 		sg_max--;
1343 
1344 		maxsize -= len;
1345 		ret += len;
1346 		if (maxsize <= 0 || sg_max == 0)
1347 			break;
1348 	}
1349 
1350 	rcu_read_unlock();
1351 	if (ret > 0)
1352 		iov_iter_advance(iter, ret);
1353 	return ret;
1354 }
1355 
1356 /**
1357  * extract_iter_to_sg - Extract pages from an iterator and add to an sglist
1358  * @iter: The iterator to extract from
1359  * @maxsize: The amount of iterator to copy
1360  * @sgtable: The scatterlist table to fill in
1361  * @sg_max: Maximum number of elements in @sgtable that may be filled
1362  * @extraction_flags: Flags to qualify the request
1363  *
1364  * Extract the page fragments from the given amount of the source iterator and
1365  * add them to a scatterlist that refers to all of those bits, to a maximum
1366  * addition of @sg_max elements.
1367  *
1368  * The pages referred to by UBUF- and IOVEC-type iterators are extracted and
1369  * pinned; BVEC-, KVEC-, FOLIOQ- and XARRAY-type are extracted but aren't
1370  * pinned; DISCARD-type is not supported.
1371  *
1372  * No end mark is placed on the scatterlist; that's left to the caller.
1373  *
1374  * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
1375  * be allowed on the pages extracted.
1376  *
1377  * If successful, @sgtable->nents is updated to include the number of elements
1378  * added and the number of bytes added is returned.  @sgtable->orig_nents is
1379  * left unaltered.
1380  *
1381  * The iov_iter_extract_mode() function should be used to query how cleanup
1382  * should be performed.
1383  */
extract_iter_to_sg(struct iov_iter * iter,size_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1384 ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
1385 			   struct sg_table *sgtable, unsigned int sg_max,
1386 			   iov_iter_extraction_t extraction_flags)
1387 {
1388 	if (maxsize == 0)
1389 		return 0;
1390 
1391 	switch (iov_iter_type(iter)) {
1392 	case ITER_UBUF:
1393 	case ITER_IOVEC:
1394 		return extract_user_to_sg(iter, maxsize, sgtable, sg_max,
1395 					  extraction_flags);
1396 	case ITER_BVEC:
1397 		return extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
1398 					  extraction_flags);
1399 	case ITER_KVEC:
1400 		return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
1401 					  extraction_flags);
1402 	case ITER_FOLIOQ:
1403 		return extract_folioq_to_sg(iter, maxsize, sgtable, sg_max,
1404 					    extraction_flags);
1405 	case ITER_XARRAY:
1406 		return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
1407 					    extraction_flags);
1408 	default:
1409 		pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter));
1410 		WARN_ON_ONCE(1);
1411 		return -EIO;
1412 	}
1413 }
1414 EXPORT_SYMBOL_GPL(extract_iter_to_sg);
1415