1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007 Jens Axboe <jens.axboe@oracle.com> 4 * 5 * Scatterlist handling helpers. 6 */ 7 #include <linux/export.h> 8 #include <linux/slab.h> 9 #include <linux/scatterlist.h> 10 #include <linux/highmem.h> 11 #include <linux/kmemleak.h> 12 #include <linux/bvec.h> 13 #include <linux/uio.h> 14 #include <linux/folio_queue.h> 15 16 /** 17 * sg_nents - return total count of entries in scatterlist 18 * @sg: The scatterlist 19 * 20 * Description: 21 * Allows to know how many entries are in sg, taking into account 22 * chaining as well 23 * 24 **/ 25 int sg_nents(struct scatterlist *sg) 26 { 27 int nents; 28 for (nents = 0; sg; sg = sg_next(sg)) 29 nents++; 30 return nents; 31 } 32 EXPORT_SYMBOL(sg_nents); 33 34 /** 35 * sg_nents_for_len - return total count of entries in scatterlist 36 * needed to satisfy the supplied length 37 * @sg: The scatterlist 38 * @len: The total required length 39 * 40 * Description: 41 * Determines the number of entries in sg that are required to meet 42 * the supplied length, taking into account chaining as well 43 * 44 * Returns: 45 * the number of sg entries needed, negative error on failure 46 * 47 **/ 48 int sg_nents_for_len(struct scatterlist *sg, u64 len) 49 { 50 int nents; 51 u64 total; 52 53 if (!len) 54 return 0; 55 56 for (nents = 0, total = 0; sg; sg = sg_next(sg)) { 57 nents++; 58 total += sg->length; 59 if (total >= len) 60 return nents; 61 } 62 63 return -EINVAL; 64 } 65 EXPORT_SYMBOL(sg_nents_for_len); 66 67 /** 68 * sg_last - return the last scatterlist entry in a list 69 * @sgl: First entry in the scatterlist 70 * @nents: Number of entries in the scatterlist 71 * 72 * Description: 73 * Should only be used casually, it (currently) scans the entire list 74 * to get the last entry. 75 * 76 * Note that the @sgl pointer passed in need not be the first one, 77 * the important bit is that @nents denotes the number of entries that 78 * exist from @sgl. 79 * 80 **/ 81 struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) 82 { 83 struct scatterlist *sg, *ret = NULL; 84 unsigned int i; 85 86 for_each_sg(sgl, sg, nents, i) 87 ret = sg; 88 89 BUG_ON(!sg_is_last(ret)); 90 return ret; 91 } 92 EXPORT_SYMBOL(sg_last); 93 94 /** 95 * sg_init_table - Initialize SG table 96 * @sgl: The SG table 97 * @nents: Number of entries in table 98 * 99 * Notes: 100 * If this is part of a chained sg table, sg_mark_end() should be 101 * used only on the last table part. 102 * 103 **/ 104 void sg_init_table(struct scatterlist *sgl, unsigned int nents) 105 { 106 memset(sgl, 0, sizeof(*sgl) * nents); 107 sg_init_marker(sgl, nents); 108 } 109 EXPORT_SYMBOL(sg_init_table); 110 111 /** 112 * sg_init_one - Initialize a single entry sg list 113 * @sg: SG entry 114 * @buf: Virtual address for IO 115 * @buflen: IO length 116 * 117 **/ 118 void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen) 119 { 120 sg_init_table(sg, 1); 121 sg_set_buf(sg, buf, buflen); 122 } 123 EXPORT_SYMBOL(sg_init_one); 124 125 /* 126 * The default behaviour of sg_alloc_table() is to use these kmalloc/kfree 127 * helpers. 128 */ 129 static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask) 130 { 131 if (nents == SG_MAX_SINGLE_ALLOC) { 132 /* 133 * Kmemleak doesn't track page allocations as they are not 134 * commonly used (in a raw form) for kernel data structures. 135 * As we chain together a list of pages and then a normal 136 * kmalloc (tracked by kmemleak), in order to for that last 137 * allocation not to become decoupled (and thus a 138 * false-positive) we need to inform kmemleak of all the 139 * intermediate allocations. 140 */ 141 void *ptr = (void *) __get_free_page(gfp_mask); 142 kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask); 143 return ptr; 144 } else 145 return kmalloc_array(nents, sizeof(struct scatterlist), 146 gfp_mask); 147 } 148 149 static void sg_kfree(struct scatterlist *sg, unsigned int nents) 150 { 151 if (nents == SG_MAX_SINGLE_ALLOC) { 152 kmemleak_free(sg); 153 free_page((unsigned long) sg); 154 } else 155 kfree(sg); 156 } 157 158 /** 159 * __sg_free_table - Free a previously mapped sg table 160 * @table: The sg table header to use 161 * @max_ents: The maximum number of entries per single scatterlist 162 * @nents_first_chunk: Number of entries int the (preallocated) first 163 * scatterlist chunk, 0 means no such preallocated first chunk 164 * @free_fn: Free function 165 * @num_ents: Number of entries in the table 166 * 167 * Description: 168 * Free an sg table previously allocated and setup with 169 * __sg_alloc_table(). The @max_ents value must be identical to 170 * that previously used with __sg_alloc_table(). 171 * 172 **/ 173 void __sg_free_table(struct sg_table *table, unsigned int max_ents, 174 unsigned int nents_first_chunk, sg_free_fn *free_fn, 175 unsigned int num_ents) 176 { 177 struct scatterlist *sgl, *next; 178 unsigned curr_max_ents = nents_first_chunk ?: max_ents; 179 180 if (unlikely(!table->sgl)) 181 return; 182 183 sgl = table->sgl; 184 while (num_ents) { 185 unsigned int alloc_size = num_ents; 186 unsigned int sg_size; 187 188 /* 189 * If we have more than max_ents segments left, 190 * then assign 'next' to the sg table after the current one. 191 * sg_size is then one less than alloc size, since the last 192 * element is the chain pointer. 193 */ 194 if (alloc_size > curr_max_ents) { 195 next = sg_chain_ptr(&sgl[curr_max_ents - 1]); 196 alloc_size = curr_max_ents; 197 sg_size = alloc_size - 1; 198 } else { 199 sg_size = alloc_size; 200 next = NULL; 201 } 202 203 num_ents -= sg_size; 204 if (nents_first_chunk) 205 nents_first_chunk = 0; 206 else 207 free_fn(sgl, alloc_size); 208 sgl = next; 209 curr_max_ents = max_ents; 210 } 211 212 table->sgl = NULL; 213 } 214 EXPORT_SYMBOL(__sg_free_table); 215 216 /** 217 * sg_free_append_table - Free a previously allocated append sg table. 218 * @table: The mapped sg append table header 219 * 220 **/ 221 void sg_free_append_table(struct sg_append_table *table) 222 { 223 __sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, 0, sg_kfree, 224 table->total_nents); 225 } 226 EXPORT_SYMBOL(sg_free_append_table); 227 228 229 /** 230 * sg_free_table - Free a previously allocated sg table 231 * @table: The mapped sg table header 232 * 233 **/ 234 void sg_free_table(struct sg_table *table) 235 { 236 __sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree, 237 table->orig_nents); 238 } 239 EXPORT_SYMBOL(sg_free_table); 240 241 /** 242 * __sg_alloc_table - Allocate and initialize an sg table with given allocator 243 * @table: The sg table header to use 244 * @nents: Number of entries in sg list 245 * @max_ents: The maximum number of entries the allocator returns per call 246 * @first_chunk: first SGL if preallocated (may be %NULL) 247 * @nents_first_chunk: Number of entries in the (preallocated) first 248 * scatterlist chunk, 0 means no such preallocated chunk provided by user 249 * @gfp_mask: GFP allocation mask 250 * @alloc_fn: Allocator to use 251 * 252 * Description: 253 * This function returns a @table @nents long. The allocator is 254 * defined to return scatterlist chunks of maximum size @max_ents. 255 * Thus if @nents is bigger than @max_ents, the scatterlists will be 256 * chained in units of @max_ents. 257 * 258 * Notes: 259 * If this function returns non-0 (eg failure), the caller must call 260 * __sg_free_table() to cleanup any leftover allocations. 261 * 262 **/ 263 int __sg_alloc_table(struct sg_table *table, unsigned int nents, 264 unsigned int max_ents, struct scatterlist *first_chunk, 265 unsigned int nents_first_chunk, gfp_t gfp_mask, 266 sg_alloc_fn *alloc_fn) 267 { 268 struct scatterlist *sg, *prv; 269 unsigned int left; 270 unsigned curr_max_ents = nents_first_chunk ?: max_ents; 271 unsigned prv_max_ents; 272 273 memset(table, 0, sizeof(*table)); 274 275 if (nents == 0) 276 return -EINVAL; 277 #ifdef CONFIG_ARCH_NO_SG_CHAIN 278 if (WARN_ON_ONCE(nents > max_ents)) 279 return -EINVAL; 280 #endif 281 282 left = nents; 283 prv = NULL; 284 do { 285 unsigned int sg_size, alloc_size = left; 286 287 if (alloc_size > curr_max_ents) { 288 alloc_size = curr_max_ents; 289 sg_size = alloc_size - 1; 290 } else 291 sg_size = alloc_size; 292 293 left -= sg_size; 294 295 if (first_chunk) { 296 sg = first_chunk; 297 first_chunk = NULL; 298 } else { 299 sg = alloc_fn(alloc_size, gfp_mask); 300 } 301 if (unlikely(!sg)) { 302 /* 303 * Adjust entry count to reflect that the last 304 * entry of the previous table won't be used for 305 * linkage. Without this, sg_kfree() may get 306 * confused. 307 */ 308 if (prv) 309 table->nents = ++table->orig_nents; 310 311 return -ENOMEM; 312 } 313 314 sg_init_table(sg, alloc_size); 315 table->nents = table->orig_nents += sg_size; 316 317 /* 318 * If this is the first mapping, assign the sg table header. 319 * If this is not the first mapping, chain previous part. 320 */ 321 if (prv) 322 sg_chain(prv, prv_max_ents, sg); 323 else 324 table->sgl = sg; 325 326 /* 327 * If no more entries after this one, mark the end 328 */ 329 if (!left) 330 sg_mark_end(&sg[sg_size - 1]); 331 332 prv = sg; 333 prv_max_ents = curr_max_ents; 334 curr_max_ents = max_ents; 335 } while (left); 336 337 return 0; 338 } 339 EXPORT_SYMBOL(__sg_alloc_table); 340 341 /** 342 * sg_alloc_table - Allocate and initialize an sg table 343 * @table: The sg table header to use 344 * @nents: Number of entries in sg list 345 * @gfp_mask: GFP allocation mask 346 * 347 * Description: 348 * Allocate and initialize an sg table. If @nents is larger than 349 * SG_MAX_SINGLE_ALLOC a chained sg table will be setup. 350 * 351 **/ 352 int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask) 353 { 354 int ret; 355 356 ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC, 357 NULL, 0, gfp_mask, sg_kmalloc); 358 if (unlikely(ret)) 359 sg_free_table(table); 360 return ret; 361 } 362 EXPORT_SYMBOL(sg_alloc_table); 363 364 static struct scatterlist *get_next_sg(struct sg_append_table *table, 365 struct scatterlist *cur, 366 unsigned long needed_sges, 367 gfp_t gfp_mask) 368 { 369 struct scatterlist *new_sg, *next_sg; 370 unsigned int alloc_size; 371 372 if (cur) { 373 next_sg = sg_next(cur); 374 /* Check if last entry should be keeped for chainning */ 375 if (!sg_is_last(next_sg) || needed_sges == 1) 376 return next_sg; 377 } 378 379 alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC); 380 new_sg = sg_kmalloc(alloc_size, gfp_mask); 381 if (!new_sg) 382 return ERR_PTR(-ENOMEM); 383 sg_init_table(new_sg, alloc_size); 384 if (cur) { 385 table->total_nents += alloc_size - 1; 386 __sg_chain(next_sg, new_sg); 387 } else { 388 table->sgt.sgl = new_sg; 389 table->total_nents = alloc_size; 390 } 391 return new_sg; 392 } 393 394 static bool pages_are_mergeable(struct page *a, struct page *b) 395 { 396 if (page_to_pfn(a) != page_to_pfn(b) + 1) 397 return false; 398 if (!zone_device_pages_have_same_pgmap(a, b)) 399 return false; 400 return true; 401 } 402 403 /** 404 * sg_alloc_append_table_from_pages - Allocate and initialize an append sg 405 * table from an array of pages 406 * @sgt_append: The sg append table to use 407 * @pages: Pointer to an array of page pointers 408 * @n_pages: Number of pages in the pages array 409 * @offset: Offset from start of the first page to the start of a buffer 410 * @size: Number of valid bytes in the buffer (after offset) 411 * @max_segment: Maximum size of a scatterlist element in bytes 412 * @left_pages: Left pages caller have to set after this call 413 * @gfp_mask: GFP allocation mask 414 * 415 * Description: 416 * In the first call it allocate and initialize an sg table from a list of 417 * pages, else reuse the scatterlist from sgt_append. Contiguous ranges of 418 * the pages are squashed into a single scatterlist entry up to the maximum 419 * size specified in @max_segment. A user may provide an offset at a start 420 * and a size of valid data in a buffer specified by the page array. The 421 * returned sg table is released by sg_free_append_table 422 * 423 * Returns: 424 * 0 on success, negative error on failure 425 * 426 * Notes: 427 * If this function returns non-0 (eg failure), the caller must call 428 * sg_free_append_table() to cleanup any leftover allocations. 429 * 430 * In the fist call, sgt_append must by initialized. 431 */ 432 int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append, 433 struct page **pages, unsigned int n_pages, unsigned int offset, 434 unsigned long size, unsigned int max_segment, 435 unsigned int left_pages, gfp_t gfp_mask) 436 { 437 unsigned int chunks, cur_page, seg_len, i, prv_len = 0; 438 unsigned int added_nents = 0; 439 struct scatterlist *s = sgt_append->prv; 440 struct page *last_pg; 441 442 /* 443 * The algorithm below requires max_segment to be aligned to PAGE_SIZE 444 * otherwise it can overshoot. 445 */ 446 max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE); 447 if (WARN_ON(max_segment < PAGE_SIZE)) 448 return -EINVAL; 449 450 if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && sgt_append->prv) 451 return -EOPNOTSUPP; 452 453 if (sgt_append->prv) { 454 unsigned long next_pfn; 455 456 if (WARN_ON(offset)) 457 return -EINVAL; 458 459 /* Merge contiguous pages into the last SG */ 460 prv_len = sgt_append->prv->length; 461 next_pfn = (sg_phys(sgt_append->prv) + prv_len) / PAGE_SIZE; 462 if (page_to_pfn(pages[0]) == next_pfn) { 463 last_pg = pfn_to_page(next_pfn - 1); 464 while (n_pages && pages_are_mergeable(pages[0], last_pg)) { 465 if (sgt_append->prv->length + PAGE_SIZE > max_segment) 466 break; 467 sgt_append->prv->length += PAGE_SIZE; 468 last_pg = pages[0]; 469 pages++; 470 n_pages--; 471 } 472 if (!n_pages) 473 goto out; 474 } 475 } 476 477 /* compute number of contiguous chunks */ 478 chunks = 1; 479 seg_len = 0; 480 for (i = 1; i < n_pages; i++) { 481 seg_len += PAGE_SIZE; 482 if (seg_len >= max_segment || 483 !pages_are_mergeable(pages[i], pages[i - 1])) { 484 chunks++; 485 seg_len = 0; 486 } 487 } 488 489 /* merging chunks and putting them into the scatterlist */ 490 cur_page = 0; 491 for (i = 0; i < chunks; i++) { 492 unsigned int j, chunk_size; 493 494 /* look for the end of the current chunk */ 495 seg_len = 0; 496 for (j = cur_page + 1; j < n_pages; j++) { 497 seg_len += PAGE_SIZE; 498 if (seg_len >= max_segment || 499 !pages_are_mergeable(pages[j], pages[j - 1])) 500 break; 501 } 502 503 /* Pass how many chunks might be left */ 504 s = get_next_sg(sgt_append, s, chunks - i + left_pages, 505 gfp_mask); 506 if (IS_ERR(s)) { 507 /* 508 * Adjust entry length to be as before function was 509 * called. 510 */ 511 if (sgt_append->prv) 512 sgt_append->prv->length = prv_len; 513 return PTR_ERR(s); 514 } 515 chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset; 516 sg_set_page(s, pages[cur_page], 517 min_t(unsigned long, size, chunk_size), offset); 518 added_nents++; 519 size -= chunk_size; 520 offset = 0; 521 cur_page = j; 522 } 523 sgt_append->sgt.nents += added_nents; 524 sgt_append->sgt.orig_nents = sgt_append->sgt.nents; 525 sgt_append->prv = s; 526 out: 527 if (!left_pages) 528 sg_mark_end(s); 529 return 0; 530 } 531 EXPORT_SYMBOL(sg_alloc_append_table_from_pages); 532 533 /** 534 * sg_alloc_table_from_pages_segment - Allocate and initialize an sg table from 535 * an array of pages and given maximum 536 * segment. 537 * @sgt: The sg table header to use 538 * @pages: Pointer to an array of page pointers 539 * @n_pages: Number of pages in the pages array 540 * @offset: Offset from start of the first page to the start of a buffer 541 * @size: Number of valid bytes in the buffer (after offset) 542 * @max_segment: Maximum size of a scatterlist element in bytes 543 * @gfp_mask: GFP allocation mask 544 * 545 * Description: 546 * Allocate and initialize an sg table from a list of pages. Contiguous 547 * ranges of the pages are squashed into a single scatterlist node up to the 548 * maximum size specified in @max_segment. A user may provide an offset at a 549 * start and a size of valid data in a buffer specified by the page array. 550 * 551 * The returned sg table is released by sg_free_table. 552 * 553 * Returns: 554 * 0 on success, negative error on failure 555 */ 556 int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages, 557 unsigned int n_pages, unsigned int offset, 558 unsigned long size, unsigned int max_segment, 559 gfp_t gfp_mask) 560 { 561 struct sg_append_table append = {}; 562 int err; 563 564 err = sg_alloc_append_table_from_pages(&append, pages, n_pages, offset, 565 size, max_segment, 0, gfp_mask); 566 if (err) { 567 sg_free_append_table(&append); 568 return err; 569 } 570 memcpy(sgt, &append.sgt, sizeof(*sgt)); 571 WARN_ON(append.total_nents != sgt->orig_nents); 572 return 0; 573 } 574 EXPORT_SYMBOL(sg_alloc_table_from_pages_segment); 575 576 #ifdef CONFIG_SGL_ALLOC 577 578 /** 579 * sgl_alloc_order - allocate a scatterlist and its pages 580 * @length: Length in bytes of the scatterlist. Must be at least one 581 * @order: Second argument for alloc_pages() 582 * @chainable: Whether or not to allocate an extra element in the scatterlist 583 * for scatterlist chaining purposes 584 * @gfp: Memory allocation flags 585 * @nent_p: [out] Number of entries in the scatterlist that have pages 586 * 587 * Returns: A pointer to an initialized scatterlist or %NULL upon failure. 588 */ 589 struct scatterlist *sgl_alloc_order(unsigned long long length, 590 unsigned int order, bool chainable, 591 gfp_t gfp, unsigned int *nent_p) 592 { 593 struct scatterlist *sgl, *sg; 594 struct page *page; 595 unsigned int nent, nalloc; 596 u32 elem_len; 597 598 nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order); 599 /* Check for integer overflow */ 600 if (length > (nent << (PAGE_SHIFT + order))) 601 return NULL; 602 nalloc = nent; 603 if (chainable) { 604 /* Check for integer overflow */ 605 if (nalloc + 1 < nalloc) 606 return NULL; 607 nalloc++; 608 } 609 sgl = kmalloc_array(nalloc, sizeof(struct scatterlist), 610 gfp & ~GFP_DMA); 611 if (!sgl) 612 return NULL; 613 614 sg_init_table(sgl, nalloc); 615 sg = sgl; 616 while (length) { 617 elem_len = min_t(u64, length, PAGE_SIZE << order); 618 page = alloc_pages(gfp, order); 619 if (!page) { 620 sgl_free_order(sgl, order); 621 return NULL; 622 } 623 624 sg_set_page(sg, page, elem_len, 0); 625 length -= elem_len; 626 sg = sg_next(sg); 627 } 628 WARN_ONCE(length, "length = %lld\n", length); 629 if (nent_p) 630 *nent_p = nent; 631 return sgl; 632 } 633 EXPORT_SYMBOL(sgl_alloc_order); 634 635 /** 636 * sgl_alloc - allocate a scatterlist and its pages 637 * @length: Length in bytes of the scatterlist 638 * @gfp: Memory allocation flags 639 * @nent_p: [out] Number of entries in the scatterlist 640 * 641 * Returns: A pointer to an initialized scatterlist or %NULL upon failure. 642 */ 643 struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, 644 unsigned int *nent_p) 645 { 646 return sgl_alloc_order(length, 0, false, gfp, nent_p); 647 } 648 EXPORT_SYMBOL(sgl_alloc); 649 650 /** 651 * sgl_free_n_order - free a scatterlist and its pages 652 * @sgl: Scatterlist with one or more elements 653 * @nents: Maximum number of elements to free 654 * @order: Second argument for __free_pages() 655 * 656 * Notes: 657 * - If several scatterlists have been chained and each chain element is 658 * freed separately then it's essential to set nents correctly to avoid that a 659 * page would get freed twice. 660 * - All pages in a chained scatterlist can be freed at once by setting @nents 661 * to a high number. 662 */ 663 void sgl_free_n_order(struct scatterlist *sgl, int nents, int order) 664 { 665 struct scatterlist *sg; 666 struct page *page; 667 int i; 668 669 for_each_sg(sgl, sg, nents, i) { 670 if (!sg) 671 break; 672 page = sg_page(sg); 673 if (page) 674 __free_pages(page, order); 675 } 676 kfree(sgl); 677 } 678 EXPORT_SYMBOL(sgl_free_n_order); 679 680 /** 681 * sgl_free_order - free a scatterlist and its pages 682 * @sgl: Scatterlist with one or more elements 683 * @order: Second argument for __free_pages() 684 */ 685 void sgl_free_order(struct scatterlist *sgl, int order) 686 { 687 sgl_free_n_order(sgl, INT_MAX, order); 688 } 689 EXPORT_SYMBOL(sgl_free_order); 690 691 /** 692 * sgl_free - free a scatterlist and its pages 693 * @sgl: Scatterlist with one or more elements 694 */ 695 void sgl_free(struct scatterlist *sgl) 696 { 697 sgl_free_order(sgl, 0); 698 } 699 EXPORT_SYMBOL(sgl_free); 700 701 #endif /* CONFIG_SGL_ALLOC */ 702 703 void __sg_page_iter_start(struct sg_page_iter *piter, 704 struct scatterlist *sglist, unsigned int nents, 705 unsigned long pgoffset) 706 { 707 piter->__pg_advance = 0; 708 piter->__nents = nents; 709 710 piter->sg = sglist; 711 piter->sg_pgoffset = pgoffset; 712 } 713 EXPORT_SYMBOL(__sg_page_iter_start); 714 715 static int sg_page_count(struct scatterlist *sg) 716 { 717 return PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT; 718 } 719 720 bool __sg_page_iter_next(struct sg_page_iter *piter) 721 { 722 if (!piter->__nents || !piter->sg) 723 return false; 724 725 piter->sg_pgoffset += piter->__pg_advance; 726 piter->__pg_advance = 1; 727 728 while (piter->sg_pgoffset >= sg_page_count(piter->sg)) { 729 piter->sg_pgoffset -= sg_page_count(piter->sg); 730 piter->sg = sg_next(piter->sg); 731 if (!--piter->__nents || !piter->sg) 732 return false; 733 } 734 735 return true; 736 } 737 EXPORT_SYMBOL(__sg_page_iter_next); 738 739 static int sg_dma_page_count(struct scatterlist *sg) 740 { 741 return PAGE_ALIGN(sg->offset + sg_dma_len(sg)) >> PAGE_SHIFT; 742 } 743 744 bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter) 745 { 746 struct sg_page_iter *piter = &dma_iter->base; 747 748 if (!piter->__nents || !piter->sg) 749 return false; 750 751 piter->sg_pgoffset += piter->__pg_advance; 752 piter->__pg_advance = 1; 753 754 while (piter->sg_pgoffset >= sg_dma_page_count(piter->sg)) { 755 piter->sg_pgoffset -= sg_dma_page_count(piter->sg); 756 piter->sg = sg_next(piter->sg); 757 if (!--piter->__nents || !piter->sg) 758 return false; 759 } 760 761 return true; 762 } 763 EXPORT_SYMBOL(__sg_page_iter_dma_next); 764 765 /** 766 * sg_miter_start - start mapping iteration over a sg list 767 * @miter: sg mapping iter to be started 768 * @sgl: sg list to iterate over 769 * @nents: number of sg entries 770 * @flags: sg iterator flags 771 * 772 * Description: 773 * Starts mapping iterator @miter. 774 * 775 * Context: 776 * Don't care. 777 */ 778 void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl, 779 unsigned int nents, unsigned int flags) 780 { 781 memset(miter, 0, sizeof(struct sg_mapping_iter)); 782 783 __sg_page_iter_start(&miter->piter, sgl, nents, 0); 784 WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG))); 785 miter->__flags = flags; 786 } 787 EXPORT_SYMBOL(sg_miter_start); 788 789 static bool sg_miter_get_next_page(struct sg_mapping_iter *miter) 790 { 791 if (!miter->__remaining) { 792 struct scatterlist *sg; 793 794 if (!__sg_page_iter_next(&miter->piter)) 795 return false; 796 797 sg = miter->piter.sg; 798 799 miter->__offset = miter->piter.sg_pgoffset ? 0 : sg->offset; 800 miter->piter.sg_pgoffset += miter->__offset >> PAGE_SHIFT; 801 miter->__offset &= PAGE_SIZE - 1; 802 miter->__remaining = sg->offset + sg->length - 803 (miter->piter.sg_pgoffset << PAGE_SHIFT) - 804 miter->__offset; 805 miter->__remaining = min_t(unsigned long, miter->__remaining, 806 PAGE_SIZE - miter->__offset); 807 } 808 809 return true; 810 } 811 812 /** 813 * sg_miter_skip - reposition mapping iterator 814 * @miter: sg mapping iter to be skipped 815 * @offset: number of bytes to plus the current location 816 * 817 * Description: 818 * Sets the offset of @miter to its current location plus @offset bytes. 819 * If mapping iterator @miter has been proceeded by sg_miter_next(), this 820 * stops @miter. 821 * 822 * Context: 823 * Don't care. 824 * 825 * Returns: 826 * true if @miter contains the valid mapping. false if end of sg 827 * list is reached. 828 */ 829 bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) 830 { 831 sg_miter_stop(miter); 832 833 while (offset) { 834 off_t consumed; 835 836 if (!sg_miter_get_next_page(miter)) 837 return false; 838 839 consumed = min_t(off_t, offset, miter->__remaining); 840 miter->__offset += consumed; 841 miter->__remaining -= consumed; 842 offset -= consumed; 843 } 844 845 return true; 846 } 847 EXPORT_SYMBOL(sg_miter_skip); 848 849 /** 850 * sg_miter_next - proceed mapping iterator to the next mapping 851 * @miter: sg mapping iter to proceed 852 * 853 * Description: 854 * Proceeds @miter to the next mapping. @miter should have been started 855 * using sg_miter_start(). On successful return, @miter->page, 856 * @miter->addr and @miter->length point to the current mapping. 857 * 858 * Context: 859 * May sleep if !SG_MITER_ATOMIC && !SG_MITER_LOCAL. 860 * 861 * Returns: 862 * true if @miter contains the next mapping. false if end of sg 863 * list is reached. 864 */ 865 bool sg_miter_next(struct sg_mapping_iter *miter) 866 { 867 sg_miter_stop(miter); 868 869 /* 870 * Get to the next page if necessary. 871 * __remaining, __offset is adjusted by sg_miter_stop 872 */ 873 if (!sg_miter_get_next_page(miter)) 874 return false; 875 876 miter->page = sg_page_iter_page(&miter->piter); 877 miter->consumed = miter->length = miter->__remaining; 878 879 if (miter->__flags & SG_MITER_ATOMIC) 880 miter->addr = kmap_atomic(miter->page) + miter->__offset; 881 else if (miter->__flags & SG_MITER_LOCAL) 882 miter->addr = kmap_local_page(miter->page) + miter->__offset; 883 else 884 miter->addr = kmap(miter->page) + miter->__offset; 885 886 return true; 887 } 888 EXPORT_SYMBOL(sg_miter_next); 889 890 /** 891 * sg_miter_stop - stop mapping iteration 892 * @miter: sg mapping iter to be stopped 893 * 894 * Description: 895 * Stops mapping iterator @miter. @miter should have been started 896 * using sg_miter_start(). A stopped iteration can be resumed by 897 * calling sg_miter_next() on it. This is useful when resources (kmap) 898 * need to be released during iteration. 899 * 900 * Context: 901 * Don't care otherwise. 902 */ 903 void sg_miter_stop(struct sg_mapping_iter *miter) 904 { 905 WARN_ON(miter->consumed > miter->length); 906 907 /* drop resources from the last iteration */ 908 if (miter->addr) { 909 miter->__offset += miter->consumed; 910 miter->__remaining -= miter->consumed; 911 912 if (miter->__flags & SG_MITER_TO_SG) 913 flush_dcache_page(miter->page); 914 915 if (miter->__flags & SG_MITER_ATOMIC) { 916 WARN_ON_ONCE(!pagefault_disabled()); 917 kunmap_atomic(miter->addr); 918 } else if (miter->__flags & SG_MITER_LOCAL) 919 kunmap_local(miter->addr); 920 else 921 kunmap(miter->page); 922 923 miter->page = NULL; 924 miter->addr = NULL; 925 miter->length = 0; 926 miter->consumed = 0; 927 } 928 } 929 EXPORT_SYMBOL(sg_miter_stop); 930 931 /** 932 * sg_copy_buffer - Copy data between a linear buffer and an SG list 933 * @sgl: The SG list 934 * @nents: Number of SG entries 935 * @buf: Where to copy from 936 * @buflen: The number of bytes to copy 937 * @skip: Number of bytes to skip before copying 938 * @to_buffer: transfer direction (true == from an sg list to a 939 * buffer, false == from a buffer to an sg list) 940 * 941 * Returns the number of copied bytes. 942 * 943 **/ 944 size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, 945 size_t buflen, off_t skip, bool to_buffer) 946 { 947 unsigned int offset = 0; 948 struct sg_mapping_iter miter; 949 unsigned int sg_flags = SG_MITER_LOCAL; 950 951 if (to_buffer) 952 sg_flags |= SG_MITER_FROM_SG; 953 else 954 sg_flags |= SG_MITER_TO_SG; 955 956 sg_miter_start(&miter, sgl, nents, sg_flags); 957 958 if (!sg_miter_skip(&miter, skip)) 959 return 0; 960 961 while ((offset < buflen) && sg_miter_next(&miter)) { 962 unsigned int len; 963 964 len = min(miter.length, buflen - offset); 965 966 if (to_buffer) 967 memcpy(buf + offset, miter.addr, len); 968 else 969 memcpy(miter.addr, buf + offset, len); 970 971 offset += len; 972 } 973 974 sg_miter_stop(&miter); 975 976 return offset; 977 } 978 EXPORT_SYMBOL(sg_copy_buffer); 979 980 /** 981 * sg_copy_from_buffer - Copy from a linear buffer to an SG list 982 * @sgl: The SG list 983 * @nents: Number of SG entries 984 * @buf: Where to copy from 985 * @buflen: The number of bytes to copy 986 * 987 * Returns the number of copied bytes. 988 * 989 **/ 990 size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents, 991 const void *buf, size_t buflen) 992 { 993 return sg_copy_buffer(sgl, nents, (void *)buf, buflen, 0, false); 994 } 995 EXPORT_SYMBOL(sg_copy_from_buffer); 996 997 /** 998 * sg_copy_to_buffer - Copy from an SG list to a linear buffer 999 * @sgl: The SG list 1000 * @nents: Number of SG entries 1001 * @buf: Where to copy to 1002 * @buflen: The number of bytes to copy 1003 * 1004 * Returns the number of copied bytes. 1005 * 1006 **/ 1007 size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents, 1008 void *buf, size_t buflen) 1009 { 1010 return sg_copy_buffer(sgl, nents, buf, buflen, 0, true); 1011 } 1012 EXPORT_SYMBOL(sg_copy_to_buffer); 1013 1014 /** 1015 * sg_pcopy_from_buffer - Copy from a linear buffer to an SG list 1016 * @sgl: The SG list 1017 * @nents: Number of SG entries 1018 * @buf: Where to copy from 1019 * @buflen: The number of bytes to copy 1020 * @skip: Number of bytes to skip before copying 1021 * 1022 * Returns the number of copied bytes. 1023 * 1024 **/ 1025 size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents, 1026 const void *buf, size_t buflen, off_t skip) 1027 { 1028 return sg_copy_buffer(sgl, nents, (void *)buf, buflen, skip, false); 1029 } 1030 EXPORT_SYMBOL(sg_pcopy_from_buffer); 1031 1032 /** 1033 * sg_pcopy_to_buffer - Copy from an SG list to a linear buffer 1034 * @sgl: The SG list 1035 * @nents: Number of SG entries 1036 * @buf: Where to copy to 1037 * @buflen: The number of bytes to copy 1038 * @skip: Number of bytes to skip before copying 1039 * 1040 * Returns the number of copied bytes. 1041 * 1042 **/ 1043 size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, 1044 void *buf, size_t buflen, off_t skip) 1045 { 1046 return sg_copy_buffer(sgl, nents, buf, buflen, skip, true); 1047 } 1048 EXPORT_SYMBOL(sg_pcopy_to_buffer); 1049 1050 /** 1051 * sg_zero_buffer - Zero-out a part of a SG list 1052 * @sgl: The SG list 1053 * @nents: Number of SG entries 1054 * @buflen: The number of bytes to zero out 1055 * @skip: Number of bytes to skip before zeroing 1056 * 1057 * Returns the number of bytes zeroed. 1058 **/ 1059 size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, 1060 size_t buflen, off_t skip) 1061 { 1062 unsigned int offset = 0; 1063 struct sg_mapping_iter miter; 1064 unsigned int sg_flags = SG_MITER_LOCAL | SG_MITER_TO_SG; 1065 1066 sg_miter_start(&miter, sgl, nents, sg_flags); 1067 1068 if (!sg_miter_skip(&miter, skip)) 1069 return false; 1070 1071 while (offset < buflen && sg_miter_next(&miter)) { 1072 unsigned int len; 1073 1074 len = min(miter.length, buflen - offset); 1075 memset(miter.addr, 0, len); 1076 1077 offset += len; 1078 } 1079 1080 sg_miter_stop(&miter); 1081 return offset; 1082 } 1083 EXPORT_SYMBOL(sg_zero_buffer); 1084 1085 /* 1086 * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class 1087 * iterators, and add them to the scatterlist. 1088 */ 1089 static ssize_t extract_user_to_sg(struct iov_iter *iter, 1090 ssize_t maxsize, 1091 struct sg_table *sgtable, 1092 unsigned int sg_max, 1093 iov_iter_extraction_t extraction_flags) 1094 { 1095 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1096 struct page **pages; 1097 unsigned int npages; 1098 ssize_t ret = 0, res; 1099 size_t len, off; 1100 1101 /* We decant the page list into the tail of the scatterlist */ 1102 pages = (void *)sgtable->sgl + 1103 array_size(sg_max, sizeof(struct scatterlist)); 1104 pages -= sg_max; 1105 1106 do { 1107 res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max, 1108 extraction_flags, &off); 1109 if (res <= 0) 1110 goto failed; 1111 1112 len = res; 1113 maxsize -= len; 1114 ret += len; 1115 npages = DIV_ROUND_UP(off + len, PAGE_SIZE); 1116 sg_max -= npages; 1117 1118 for (; npages > 0; npages--) { 1119 struct page *page = *pages; 1120 size_t seg = min_t(size_t, PAGE_SIZE - off, len); 1121 1122 *pages++ = NULL; 1123 sg_set_page(sg, page, seg, off); 1124 sgtable->nents++; 1125 sg++; 1126 len -= seg; 1127 off = 0; 1128 } 1129 } while (maxsize > 0 && sg_max > 0); 1130 1131 return ret; 1132 1133 failed: 1134 while (sgtable->nents > sgtable->orig_nents) 1135 unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents])); 1136 return res; 1137 } 1138 1139 /* 1140 * Extract up to sg_max pages from a BVEC-type iterator and add them to the 1141 * scatterlist. The pages are not pinned. 1142 */ 1143 static ssize_t extract_bvec_to_sg(struct iov_iter *iter, 1144 ssize_t maxsize, 1145 struct sg_table *sgtable, 1146 unsigned int sg_max, 1147 iov_iter_extraction_t extraction_flags) 1148 { 1149 const struct bio_vec *bv = iter->bvec; 1150 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1151 unsigned long start = iter->iov_offset; 1152 unsigned int i; 1153 ssize_t ret = 0; 1154 1155 for (i = 0; i < iter->nr_segs; i++) { 1156 size_t off, len; 1157 1158 len = bv[i].bv_len; 1159 if (start >= len) { 1160 start -= len; 1161 continue; 1162 } 1163 1164 len = min_t(size_t, maxsize, len - start); 1165 off = bv[i].bv_offset + start; 1166 1167 sg_set_page(sg, bv[i].bv_page, len, off); 1168 sgtable->nents++; 1169 sg++; 1170 sg_max--; 1171 1172 ret += len; 1173 maxsize -= len; 1174 if (maxsize <= 0 || sg_max == 0) 1175 break; 1176 start = 0; 1177 } 1178 1179 if (ret > 0) 1180 iov_iter_advance(iter, ret); 1181 return ret; 1182 } 1183 1184 /* 1185 * Extract up to sg_max pages from a KVEC-type iterator and add them to the 1186 * scatterlist. This can deal with vmalloc'd buffers as well as kmalloc'd or 1187 * static buffers. The pages are not pinned. 1188 */ 1189 static ssize_t extract_kvec_to_sg(struct iov_iter *iter, 1190 ssize_t maxsize, 1191 struct sg_table *sgtable, 1192 unsigned int sg_max, 1193 iov_iter_extraction_t extraction_flags) 1194 { 1195 const struct kvec *kv = iter->kvec; 1196 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1197 unsigned long start = iter->iov_offset; 1198 unsigned int i; 1199 ssize_t ret = 0; 1200 1201 for (i = 0; i < iter->nr_segs; i++) { 1202 struct page *page; 1203 unsigned long kaddr; 1204 size_t off, len, seg; 1205 1206 len = kv[i].iov_len; 1207 if (start >= len) { 1208 start -= len; 1209 continue; 1210 } 1211 1212 kaddr = (unsigned long)kv[i].iov_base + start; 1213 off = kaddr & ~PAGE_MASK; 1214 len = min_t(size_t, maxsize, len - start); 1215 kaddr &= PAGE_MASK; 1216 1217 maxsize -= len; 1218 ret += len; 1219 do { 1220 seg = min_t(size_t, len, PAGE_SIZE - off); 1221 if (is_vmalloc_or_module_addr((void *)kaddr)) 1222 page = vmalloc_to_page((void *)kaddr); 1223 else 1224 page = virt_to_page((void *)kaddr); 1225 1226 sg_set_page(sg, page, len, off); 1227 sgtable->nents++; 1228 sg++; 1229 sg_max--; 1230 1231 len -= seg; 1232 kaddr += PAGE_SIZE; 1233 off = 0; 1234 } while (len > 0 && sg_max > 0); 1235 1236 if (maxsize <= 0 || sg_max == 0) 1237 break; 1238 start = 0; 1239 } 1240 1241 if (ret > 0) 1242 iov_iter_advance(iter, ret); 1243 return ret; 1244 } 1245 1246 /* 1247 * Extract up to sg_max folios from an FOLIOQ-type iterator and add them to 1248 * the scatterlist. The pages are not pinned. 1249 */ 1250 static ssize_t extract_folioq_to_sg(struct iov_iter *iter, 1251 ssize_t maxsize, 1252 struct sg_table *sgtable, 1253 unsigned int sg_max, 1254 iov_iter_extraction_t extraction_flags) 1255 { 1256 const struct folio_queue *folioq = iter->folioq; 1257 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1258 unsigned int slot = iter->folioq_slot; 1259 ssize_t ret = 0; 1260 size_t offset = iter->iov_offset; 1261 1262 BUG_ON(!folioq); 1263 1264 if (slot >= folioq_nr_slots(folioq)) { 1265 folioq = folioq->next; 1266 if (WARN_ON_ONCE(!folioq)) 1267 return 0; 1268 slot = 0; 1269 } 1270 1271 do { 1272 struct folio *folio = folioq_folio(folioq, slot); 1273 size_t fsize = folioq_folio_size(folioq, slot); 1274 1275 if (offset < fsize) { 1276 size_t part = umin(maxsize - ret, fsize - offset); 1277 1278 sg_set_page(sg, folio_page(folio, 0), part, offset); 1279 sgtable->nents++; 1280 sg++; 1281 sg_max--; 1282 offset += part; 1283 ret += part; 1284 } 1285 1286 if (offset >= fsize) { 1287 offset = 0; 1288 slot++; 1289 if (slot >= folioq_nr_slots(folioq)) { 1290 if (!folioq->next) { 1291 WARN_ON_ONCE(ret < iter->count); 1292 break; 1293 } 1294 folioq = folioq->next; 1295 slot = 0; 1296 } 1297 } 1298 } while (sg_max > 0 && ret < maxsize); 1299 1300 iter->folioq = folioq; 1301 iter->folioq_slot = slot; 1302 iter->iov_offset = offset; 1303 iter->count -= ret; 1304 return ret; 1305 } 1306 1307 /* 1308 * Extract up to sg_max folios from an XARRAY-type iterator and add them to 1309 * the scatterlist. The pages are not pinned. 1310 */ 1311 static ssize_t extract_xarray_to_sg(struct iov_iter *iter, 1312 ssize_t maxsize, 1313 struct sg_table *sgtable, 1314 unsigned int sg_max, 1315 iov_iter_extraction_t extraction_flags) 1316 { 1317 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1318 struct xarray *xa = iter->xarray; 1319 struct folio *folio; 1320 loff_t start = iter->xarray_start + iter->iov_offset; 1321 pgoff_t index = start / PAGE_SIZE; 1322 ssize_t ret = 0; 1323 size_t offset, len; 1324 XA_STATE(xas, xa, index); 1325 1326 rcu_read_lock(); 1327 1328 xas_for_each(&xas, folio, ULONG_MAX) { 1329 if (xas_retry(&xas, folio)) 1330 continue; 1331 if (WARN_ON(xa_is_value(folio))) 1332 break; 1333 if (WARN_ON(folio_test_hugetlb(folio))) 1334 break; 1335 1336 offset = offset_in_folio(folio, start); 1337 len = min_t(size_t, maxsize, folio_size(folio) - offset); 1338 1339 sg_set_page(sg, folio_page(folio, 0), len, offset); 1340 sgtable->nents++; 1341 sg++; 1342 sg_max--; 1343 1344 maxsize -= len; 1345 ret += len; 1346 if (maxsize <= 0 || sg_max == 0) 1347 break; 1348 } 1349 1350 rcu_read_unlock(); 1351 if (ret > 0) 1352 iov_iter_advance(iter, ret); 1353 return ret; 1354 } 1355 1356 /** 1357 * extract_iter_to_sg - Extract pages from an iterator and add to an sglist 1358 * @iter: The iterator to extract from 1359 * @maxsize: The amount of iterator to copy 1360 * @sgtable: The scatterlist table to fill in 1361 * @sg_max: Maximum number of elements in @sgtable that may be filled 1362 * @extraction_flags: Flags to qualify the request 1363 * 1364 * Extract the page fragments from the given amount of the source iterator and 1365 * add them to a scatterlist that refers to all of those bits, to a maximum 1366 * addition of @sg_max elements. 1367 * 1368 * The pages referred to by UBUF- and IOVEC-type iterators are extracted and 1369 * pinned; BVEC-, KVEC-, FOLIOQ- and XARRAY-type are extracted but aren't 1370 * pinned; DISCARD-type is not supported. 1371 * 1372 * No end mark is placed on the scatterlist; that's left to the caller. 1373 * 1374 * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA 1375 * be allowed on the pages extracted. 1376 * 1377 * If successful, @sgtable->nents is updated to include the number of elements 1378 * added and the number of bytes added is returned. @sgtable->orig_nents is 1379 * left unaltered. 1380 * 1381 * The iov_iter_extract_mode() function should be used to query how cleanup 1382 * should be performed. 1383 */ 1384 ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize, 1385 struct sg_table *sgtable, unsigned int sg_max, 1386 iov_iter_extraction_t extraction_flags) 1387 { 1388 if (maxsize == 0) 1389 return 0; 1390 1391 switch (iov_iter_type(iter)) { 1392 case ITER_UBUF: 1393 case ITER_IOVEC: 1394 return extract_user_to_sg(iter, maxsize, sgtable, sg_max, 1395 extraction_flags); 1396 case ITER_BVEC: 1397 return extract_bvec_to_sg(iter, maxsize, sgtable, sg_max, 1398 extraction_flags); 1399 case ITER_KVEC: 1400 return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max, 1401 extraction_flags); 1402 case ITER_FOLIOQ: 1403 return extract_folioq_to_sg(iter, maxsize, sgtable, sg_max, 1404 extraction_flags); 1405 case ITER_XARRAY: 1406 return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max, 1407 extraction_flags); 1408 default: 1409 pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter)); 1410 WARN_ON_ONCE(1); 1411 return -EIO; 1412 } 1413 } 1414 EXPORT_SYMBOL_GPL(extract_iter_to_sg); 1415