1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007 Jens Axboe <jens.axboe@oracle.com> 4 * 5 * Scatterlist handling helpers. 6 */ 7 #include <linux/export.h> 8 #include <linux/slab.h> 9 #include <linux/scatterlist.h> 10 #include <linux/highmem.h> 11 #include <linux/kmemleak.h> 12 #include <linux/bvec.h> 13 #include <linux/uio.h> 14 #include <linux/folio_queue.h> 15 16 /** 17 * sg_next - return the next scatterlist entry in a list 18 * @sg: The current sg entry 19 * 20 * Description: 21 * Usually the next entry will be @sg@ + 1, but if this sg element is part 22 * of a chained scatterlist, it could jump to the start of a new 23 * scatterlist array. 24 * 25 **/ 26 struct scatterlist *sg_next(struct scatterlist *sg) 27 { 28 if (sg_is_last(sg)) 29 return NULL; 30 31 sg++; 32 if (unlikely(sg_is_chain(sg))) 33 sg = sg_chain_ptr(sg); 34 35 return sg; 36 } 37 EXPORT_SYMBOL(sg_next); 38 39 /** 40 * sg_nents - return total count of entries in scatterlist 41 * @sg: The scatterlist 42 * 43 * Description: 44 * Allows to know how many entries are in sg, taking into account 45 * chaining as well 46 * 47 **/ 48 int sg_nents(struct scatterlist *sg) 49 { 50 int nents; 51 for (nents = 0; sg; sg = sg_next(sg)) 52 nents++; 53 return nents; 54 } 55 EXPORT_SYMBOL(sg_nents); 56 57 /** 58 * sg_nents_for_len - return total count of entries in scatterlist 59 * needed to satisfy the supplied length 60 * @sg: The scatterlist 61 * @len: The total required length 62 * 63 * Description: 64 * Determines the number of entries in sg that are required to meet 65 * the supplied length, taking into account chaining as well 66 * 67 * Returns: 68 * the number of sg entries needed, negative error on failure 69 * 70 **/ 71 int sg_nents_for_len(struct scatterlist *sg, u64 len) 72 { 73 int nents; 74 u64 total; 75 76 if (!len) 77 return 0; 78 79 for (nents = 0, total = 0; sg; sg = sg_next(sg)) { 80 nents++; 81 total += sg->length; 82 if (total >= len) 83 return nents; 84 } 85 86 return -EINVAL; 87 } 88 EXPORT_SYMBOL(sg_nents_for_len); 89 90 /** 91 * sg_last - return the last scatterlist entry in a list 92 * @sgl: First entry in the scatterlist 93 * @nents: Number of entries in the scatterlist 94 * 95 * Description: 96 * Should only be used casually, it (currently) scans the entire list 97 * to get the last entry. 98 * 99 * Note that the @sgl@ pointer passed in need not be the first one, 100 * the important bit is that @nents@ denotes the number of entries that 101 * exist from @sgl@. 102 * 103 **/ 104 struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) 105 { 106 struct scatterlist *sg, *ret = NULL; 107 unsigned int i; 108 109 for_each_sg(sgl, sg, nents, i) 110 ret = sg; 111 112 BUG_ON(!sg_is_last(ret)); 113 return ret; 114 } 115 EXPORT_SYMBOL(sg_last); 116 117 /** 118 * sg_init_table - Initialize SG table 119 * @sgl: The SG table 120 * @nents: Number of entries in table 121 * 122 * Notes: 123 * If this is part of a chained sg table, sg_mark_end() should be 124 * used only on the last table part. 125 * 126 **/ 127 void sg_init_table(struct scatterlist *sgl, unsigned int nents) 128 { 129 memset(sgl, 0, sizeof(*sgl) * nents); 130 sg_init_marker(sgl, nents); 131 } 132 EXPORT_SYMBOL(sg_init_table); 133 134 /** 135 * sg_init_one - Initialize a single entry sg list 136 * @sg: SG entry 137 * @buf: Virtual address for IO 138 * @buflen: IO length 139 * 140 **/ 141 void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen) 142 { 143 sg_init_table(sg, 1); 144 sg_set_buf(sg, buf, buflen); 145 } 146 EXPORT_SYMBOL(sg_init_one); 147 148 /* 149 * The default behaviour of sg_alloc_table() is to use these kmalloc/kfree 150 * helpers. 151 */ 152 static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask) 153 { 154 if (nents == SG_MAX_SINGLE_ALLOC) { 155 /* 156 * Kmemleak doesn't track page allocations as they are not 157 * commonly used (in a raw form) for kernel data structures. 158 * As we chain together a list of pages and then a normal 159 * kmalloc (tracked by kmemleak), in order to for that last 160 * allocation not to become decoupled (and thus a 161 * false-positive) we need to inform kmemleak of all the 162 * intermediate allocations. 163 */ 164 void *ptr = (void *) __get_free_page(gfp_mask); 165 kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask); 166 return ptr; 167 } else 168 return kmalloc_array(nents, sizeof(struct scatterlist), 169 gfp_mask); 170 } 171 172 static void sg_kfree(struct scatterlist *sg, unsigned int nents) 173 { 174 if (nents == SG_MAX_SINGLE_ALLOC) { 175 kmemleak_free(sg); 176 free_page((unsigned long) sg); 177 } else 178 kfree(sg); 179 } 180 181 /** 182 * __sg_free_table - Free a previously mapped sg table 183 * @table: The sg table header to use 184 * @max_ents: The maximum number of entries per single scatterlist 185 * @nents_first_chunk: Number of entries int the (preallocated) first 186 * scatterlist chunk, 0 means no such preallocated first chunk 187 * @free_fn: Free function 188 * @num_ents: Number of entries in the table 189 * 190 * Description: 191 * Free an sg table previously allocated and setup with 192 * __sg_alloc_table(). The @max_ents value must be identical to 193 * that previously used with __sg_alloc_table(). 194 * 195 **/ 196 void __sg_free_table(struct sg_table *table, unsigned int max_ents, 197 unsigned int nents_first_chunk, sg_free_fn *free_fn, 198 unsigned int num_ents) 199 { 200 struct scatterlist *sgl, *next; 201 unsigned curr_max_ents = nents_first_chunk ?: max_ents; 202 203 if (unlikely(!table->sgl)) 204 return; 205 206 sgl = table->sgl; 207 while (num_ents) { 208 unsigned int alloc_size = num_ents; 209 unsigned int sg_size; 210 211 /* 212 * If we have more than max_ents segments left, 213 * then assign 'next' to the sg table after the current one. 214 * sg_size is then one less than alloc size, since the last 215 * element is the chain pointer. 216 */ 217 if (alloc_size > curr_max_ents) { 218 next = sg_chain_ptr(&sgl[curr_max_ents - 1]); 219 alloc_size = curr_max_ents; 220 sg_size = alloc_size - 1; 221 } else { 222 sg_size = alloc_size; 223 next = NULL; 224 } 225 226 num_ents -= sg_size; 227 if (nents_first_chunk) 228 nents_first_chunk = 0; 229 else 230 free_fn(sgl, alloc_size); 231 sgl = next; 232 curr_max_ents = max_ents; 233 } 234 235 table->sgl = NULL; 236 } 237 EXPORT_SYMBOL(__sg_free_table); 238 239 /** 240 * sg_free_append_table - Free a previously allocated append sg table. 241 * @table: The mapped sg append table header 242 * 243 **/ 244 void sg_free_append_table(struct sg_append_table *table) 245 { 246 __sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, 0, sg_kfree, 247 table->total_nents); 248 } 249 EXPORT_SYMBOL(sg_free_append_table); 250 251 252 /** 253 * sg_free_table - Free a previously allocated sg table 254 * @table: The mapped sg table header 255 * 256 **/ 257 void sg_free_table(struct sg_table *table) 258 { 259 __sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree, 260 table->orig_nents); 261 } 262 EXPORT_SYMBOL(sg_free_table); 263 264 /** 265 * __sg_alloc_table - Allocate and initialize an sg table with given allocator 266 * @table: The sg table header to use 267 * @nents: Number of entries in sg list 268 * @max_ents: The maximum number of entries the allocator returns per call 269 * @first_chunk: first SGL if preallocated (may be %NULL) 270 * @nents_first_chunk: Number of entries in the (preallocated) first 271 * scatterlist chunk, 0 means no such preallocated chunk provided by user 272 * @gfp_mask: GFP allocation mask 273 * @alloc_fn: Allocator to use 274 * 275 * Description: 276 * This function returns a @table @nents long. The allocator is 277 * defined to return scatterlist chunks of maximum size @max_ents. 278 * Thus if @nents is bigger than @max_ents, the scatterlists will be 279 * chained in units of @max_ents. 280 * 281 * Notes: 282 * If this function returns non-0 (eg failure), the caller must call 283 * __sg_free_table() to cleanup any leftover allocations. 284 * 285 **/ 286 int __sg_alloc_table(struct sg_table *table, unsigned int nents, 287 unsigned int max_ents, struct scatterlist *first_chunk, 288 unsigned int nents_first_chunk, gfp_t gfp_mask, 289 sg_alloc_fn *alloc_fn) 290 { 291 struct scatterlist *sg, *prv; 292 unsigned int left; 293 unsigned curr_max_ents = nents_first_chunk ?: max_ents; 294 unsigned prv_max_ents; 295 296 memset(table, 0, sizeof(*table)); 297 298 if (nents == 0) 299 return -EINVAL; 300 #ifdef CONFIG_ARCH_NO_SG_CHAIN 301 if (WARN_ON_ONCE(nents > max_ents)) 302 return -EINVAL; 303 #endif 304 305 left = nents; 306 prv = NULL; 307 do { 308 unsigned int sg_size, alloc_size = left; 309 310 if (alloc_size > curr_max_ents) { 311 alloc_size = curr_max_ents; 312 sg_size = alloc_size - 1; 313 } else 314 sg_size = alloc_size; 315 316 left -= sg_size; 317 318 if (first_chunk) { 319 sg = first_chunk; 320 first_chunk = NULL; 321 } else { 322 sg = alloc_fn(alloc_size, gfp_mask); 323 } 324 if (unlikely(!sg)) { 325 /* 326 * Adjust entry count to reflect that the last 327 * entry of the previous table won't be used for 328 * linkage. Without this, sg_kfree() may get 329 * confused. 330 */ 331 if (prv) 332 table->nents = ++table->orig_nents; 333 334 return -ENOMEM; 335 } 336 337 sg_init_table(sg, alloc_size); 338 table->nents = table->orig_nents += sg_size; 339 340 /* 341 * If this is the first mapping, assign the sg table header. 342 * If this is not the first mapping, chain previous part. 343 */ 344 if (prv) 345 sg_chain(prv, prv_max_ents, sg); 346 else 347 table->sgl = sg; 348 349 /* 350 * If no more entries after this one, mark the end 351 */ 352 if (!left) 353 sg_mark_end(&sg[sg_size - 1]); 354 355 prv = sg; 356 prv_max_ents = curr_max_ents; 357 curr_max_ents = max_ents; 358 } while (left); 359 360 return 0; 361 } 362 EXPORT_SYMBOL(__sg_alloc_table); 363 364 /** 365 * sg_alloc_table - Allocate and initialize an sg table 366 * @table: The sg table header to use 367 * @nents: Number of entries in sg list 368 * @gfp_mask: GFP allocation mask 369 * 370 * Description: 371 * Allocate and initialize an sg table. If @nents@ is larger than 372 * SG_MAX_SINGLE_ALLOC a chained sg table will be setup. 373 * 374 **/ 375 int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask) 376 { 377 int ret; 378 379 ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC, 380 NULL, 0, gfp_mask, sg_kmalloc); 381 if (unlikely(ret)) 382 sg_free_table(table); 383 return ret; 384 } 385 EXPORT_SYMBOL(sg_alloc_table); 386 387 static struct scatterlist *get_next_sg(struct sg_append_table *table, 388 struct scatterlist *cur, 389 unsigned long needed_sges, 390 gfp_t gfp_mask) 391 { 392 struct scatterlist *new_sg, *next_sg; 393 unsigned int alloc_size; 394 395 if (cur) { 396 next_sg = sg_next(cur); 397 /* Check if last entry should be keeped for chainning */ 398 if (!sg_is_last(next_sg) || needed_sges == 1) 399 return next_sg; 400 } 401 402 alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC); 403 new_sg = sg_kmalloc(alloc_size, gfp_mask); 404 if (!new_sg) 405 return ERR_PTR(-ENOMEM); 406 sg_init_table(new_sg, alloc_size); 407 if (cur) { 408 table->total_nents += alloc_size - 1; 409 __sg_chain(next_sg, new_sg); 410 } else { 411 table->sgt.sgl = new_sg; 412 table->total_nents = alloc_size; 413 } 414 return new_sg; 415 } 416 417 static bool pages_are_mergeable(struct page *a, struct page *b) 418 { 419 if (page_to_pfn(a) != page_to_pfn(b) + 1) 420 return false; 421 if (!zone_device_pages_have_same_pgmap(a, b)) 422 return false; 423 return true; 424 } 425 426 /** 427 * sg_alloc_append_table_from_pages - Allocate and initialize an append sg 428 * table from an array of pages 429 * @sgt_append: The sg append table to use 430 * @pages: Pointer to an array of page pointers 431 * @n_pages: Number of pages in the pages array 432 * @offset: Offset from start of the first page to the start of a buffer 433 * @size: Number of valid bytes in the buffer (after offset) 434 * @max_segment: Maximum size of a scatterlist element in bytes 435 * @left_pages: Left pages caller have to set after this call 436 * @gfp_mask: GFP allocation mask 437 * 438 * Description: 439 * In the first call it allocate and initialize an sg table from a list of 440 * pages, else reuse the scatterlist from sgt_append. Contiguous ranges of 441 * the pages are squashed into a single scatterlist entry up to the maximum 442 * size specified in @max_segment. A user may provide an offset at a start 443 * and a size of valid data in a buffer specified by the page array. The 444 * returned sg table is released by sg_free_append_table 445 * 446 * Returns: 447 * 0 on success, negative error on failure 448 * 449 * Notes: 450 * If this function returns non-0 (eg failure), the caller must call 451 * sg_free_append_table() to cleanup any leftover allocations. 452 * 453 * In the fist call, sgt_append must by initialized. 454 */ 455 int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append, 456 struct page **pages, unsigned int n_pages, unsigned int offset, 457 unsigned long size, unsigned int max_segment, 458 unsigned int left_pages, gfp_t gfp_mask) 459 { 460 unsigned int chunks, cur_page, seg_len, i, prv_len = 0; 461 unsigned int added_nents = 0; 462 struct scatterlist *s = sgt_append->prv; 463 struct page *last_pg; 464 465 /* 466 * The algorithm below requires max_segment to be aligned to PAGE_SIZE 467 * otherwise it can overshoot. 468 */ 469 max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE); 470 if (WARN_ON(max_segment < PAGE_SIZE)) 471 return -EINVAL; 472 473 if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && sgt_append->prv) 474 return -EOPNOTSUPP; 475 476 if (sgt_append->prv) { 477 unsigned long next_pfn; 478 479 if (WARN_ON(offset)) 480 return -EINVAL; 481 482 /* Merge contiguous pages into the last SG */ 483 prv_len = sgt_append->prv->length; 484 next_pfn = (sg_phys(sgt_append->prv) + prv_len) / PAGE_SIZE; 485 if (page_to_pfn(pages[0]) == next_pfn) { 486 last_pg = pfn_to_page(next_pfn - 1); 487 while (n_pages && pages_are_mergeable(pages[0], last_pg)) { 488 if (sgt_append->prv->length + PAGE_SIZE > max_segment) 489 break; 490 sgt_append->prv->length += PAGE_SIZE; 491 last_pg = pages[0]; 492 pages++; 493 n_pages--; 494 } 495 if (!n_pages) 496 goto out; 497 } 498 } 499 500 /* compute number of contiguous chunks */ 501 chunks = 1; 502 seg_len = 0; 503 for (i = 1; i < n_pages; i++) { 504 seg_len += PAGE_SIZE; 505 if (seg_len >= max_segment || 506 !pages_are_mergeable(pages[i], pages[i - 1])) { 507 chunks++; 508 seg_len = 0; 509 } 510 } 511 512 /* merging chunks and putting them into the scatterlist */ 513 cur_page = 0; 514 for (i = 0; i < chunks; i++) { 515 unsigned int j, chunk_size; 516 517 /* look for the end of the current chunk */ 518 seg_len = 0; 519 for (j = cur_page + 1; j < n_pages; j++) { 520 seg_len += PAGE_SIZE; 521 if (seg_len >= max_segment || 522 !pages_are_mergeable(pages[j], pages[j - 1])) 523 break; 524 } 525 526 /* Pass how many chunks might be left */ 527 s = get_next_sg(sgt_append, s, chunks - i + left_pages, 528 gfp_mask); 529 if (IS_ERR(s)) { 530 /* 531 * Adjust entry length to be as before function was 532 * called. 533 */ 534 if (sgt_append->prv) 535 sgt_append->prv->length = prv_len; 536 return PTR_ERR(s); 537 } 538 chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset; 539 sg_set_page(s, pages[cur_page], 540 min_t(unsigned long, size, chunk_size), offset); 541 added_nents++; 542 size -= chunk_size; 543 offset = 0; 544 cur_page = j; 545 } 546 sgt_append->sgt.nents += added_nents; 547 sgt_append->sgt.orig_nents = sgt_append->sgt.nents; 548 sgt_append->prv = s; 549 out: 550 if (!left_pages) 551 sg_mark_end(s); 552 return 0; 553 } 554 EXPORT_SYMBOL(sg_alloc_append_table_from_pages); 555 556 /** 557 * sg_alloc_table_from_pages_segment - Allocate and initialize an sg table from 558 * an array of pages and given maximum 559 * segment. 560 * @sgt: The sg table header to use 561 * @pages: Pointer to an array of page pointers 562 * @n_pages: Number of pages in the pages array 563 * @offset: Offset from start of the first page to the start of a buffer 564 * @size: Number of valid bytes in the buffer (after offset) 565 * @max_segment: Maximum size of a scatterlist element in bytes 566 * @gfp_mask: GFP allocation mask 567 * 568 * Description: 569 * Allocate and initialize an sg table from a list of pages. Contiguous 570 * ranges of the pages are squashed into a single scatterlist node up to the 571 * maximum size specified in @max_segment. A user may provide an offset at a 572 * start and a size of valid data in a buffer specified by the page array. 573 * 574 * The returned sg table is released by sg_free_table. 575 * 576 * Returns: 577 * 0 on success, negative error on failure 578 */ 579 int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages, 580 unsigned int n_pages, unsigned int offset, 581 unsigned long size, unsigned int max_segment, 582 gfp_t gfp_mask) 583 { 584 struct sg_append_table append = {}; 585 int err; 586 587 err = sg_alloc_append_table_from_pages(&append, pages, n_pages, offset, 588 size, max_segment, 0, gfp_mask); 589 if (err) { 590 sg_free_append_table(&append); 591 return err; 592 } 593 memcpy(sgt, &append.sgt, sizeof(*sgt)); 594 WARN_ON(append.total_nents != sgt->orig_nents); 595 return 0; 596 } 597 EXPORT_SYMBOL(sg_alloc_table_from_pages_segment); 598 599 #ifdef CONFIG_SGL_ALLOC 600 601 /** 602 * sgl_alloc_order - allocate a scatterlist and its pages 603 * @length: Length in bytes of the scatterlist. Must be at least one 604 * @order: Second argument for alloc_pages() 605 * @chainable: Whether or not to allocate an extra element in the scatterlist 606 * for scatterlist chaining purposes 607 * @gfp: Memory allocation flags 608 * @nent_p: [out] Number of entries in the scatterlist that have pages 609 * 610 * Returns: A pointer to an initialized scatterlist or %NULL upon failure. 611 */ 612 struct scatterlist *sgl_alloc_order(unsigned long long length, 613 unsigned int order, bool chainable, 614 gfp_t gfp, unsigned int *nent_p) 615 { 616 struct scatterlist *sgl, *sg; 617 struct page *page; 618 unsigned int nent, nalloc; 619 u32 elem_len; 620 621 nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order); 622 /* Check for integer overflow */ 623 if (length > (nent << (PAGE_SHIFT + order))) 624 return NULL; 625 nalloc = nent; 626 if (chainable) { 627 /* Check for integer overflow */ 628 if (nalloc + 1 < nalloc) 629 return NULL; 630 nalloc++; 631 } 632 sgl = kmalloc_array(nalloc, sizeof(struct scatterlist), 633 gfp & ~GFP_DMA); 634 if (!sgl) 635 return NULL; 636 637 sg_init_table(sgl, nalloc); 638 sg = sgl; 639 while (length) { 640 elem_len = min_t(u64, length, PAGE_SIZE << order); 641 page = alloc_pages(gfp, order); 642 if (!page) { 643 sgl_free_order(sgl, order); 644 return NULL; 645 } 646 647 sg_set_page(sg, page, elem_len, 0); 648 length -= elem_len; 649 sg = sg_next(sg); 650 } 651 WARN_ONCE(length, "length = %lld\n", length); 652 if (nent_p) 653 *nent_p = nent; 654 return sgl; 655 } 656 EXPORT_SYMBOL(sgl_alloc_order); 657 658 /** 659 * sgl_alloc - allocate a scatterlist and its pages 660 * @length: Length in bytes of the scatterlist 661 * @gfp: Memory allocation flags 662 * @nent_p: [out] Number of entries in the scatterlist 663 * 664 * Returns: A pointer to an initialized scatterlist or %NULL upon failure. 665 */ 666 struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, 667 unsigned int *nent_p) 668 { 669 return sgl_alloc_order(length, 0, false, gfp, nent_p); 670 } 671 EXPORT_SYMBOL(sgl_alloc); 672 673 /** 674 * sgl_free_n_order - free a scatterlist and its pages 675 * @sgl: Scatterlist with one or more elements 676 * @nents: Maximum number of elements to free 677 * @order: Second argument for __free_pages() 678 * 679 * Notes: 680 * - If several scatterlists have been chained and each chain element is 681 * freed separately then it's essential to set nents correctly to avoid that a 682 * page would get freed twice. 683 * - All pages in a chained scatterlist can be freed at once by setting @nents 684 * to a high number. 685 */ 686 void sgl_free_n_order(struct scatterlist *sgl, int nents, int order) 687 { 688 struct scatterlist *sg; 689 struct page *page; 690 int i; 691 692 for_each_sg(sgl, sg, nents, i) { 693 if (!sg) 694 break; 695 page = sg_page(sg); 696 if (page) 697 __free_pages(page, order); 698 } 699 kfree(sgl); 700 } 701 EXPORT_SYMBOL(sgl_free_n_order); 702 703 /** 704 * sgl_free_order - free a scatterlist and its pages 705 * @sgl: Scatterlist with one or more elements 706 * @order: Second argument for __free_pages() 707 */ 708 void sgl_free_order(struct scatterlist *sgl, int order) 709 { 710 sgl_free_n_order(sgl, INT_MAX, order); 711 } 712 EXPORT_SYMBOL(sgl_free_order); 713 714 /** 715 * sgl_free - free a scatterlist and its pages 716 * @sgl: Scatterlist with one or more elements 717 */ 718 void sgl_free(struct scatterlist *sgl) 719 { 720 sgl_free_order(sgl, 0); 721 } 722 EXPORT_SYMBOL(sgl_free); 723 724 #endif /* CONFIG_SGL_ALLOC */ 725 726 void __sg_page_iter_start(struct sg_page_iter *piter, 727 struct scatterlist *sglist, unsigned int nents, 728 unsigned long pgoffset) 729 { 730 piter->__pg_advance = 0; 731 piter->__nents = nents; 732 733 piter->sg = sglist; 734 piter->sg_pgoffset = pgoffset; 735 } 736 EXPORT_SYMBOL(__sg_page_iter_start); 737 738 static int sg_page_count(struct scatterlist *sg) 739 { 740 return PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT; 741 } 742 743 bool __sg_page_iter_next(struct sg_page_iter *piter) 744 { 745 if (!piter->__nents || !piter->sg) 746 return false; 747 748 piter->sg_pgoffset += piter->__pg_advance; 749 piter->__pg_advance = 1; 750 751 while (piter->sg_pgoffset >= sg_page_count(piter->sg)) { 752 piter->sg_pgoffset -= sg_page_count(piter->sg); 753 piter->sg = sg_next(piter->sg); 754 if (!--piter->__nents || !piter->sg) 755 return false; 756 } 757 758 return true; 759 } 760 EXPORT_SYMBOL(__sg_page_iter_next); 761 762 static int sg_dma_page_count(struct scatterlist *sg) 763 { 764 return PAGE_ALIGN(sg->offset + sg_dma_len(sg)) >> PAGE_SHIFT; 765 } 766 767 bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter) 768 { 769 struct sg_page_iter *piter = &dma_iter->base; 770 771 if (!piter->__nents || !piter->sg) 772 return false; 773 774 piter->sg_pgoffset += piter->__pg_advance; 775 piter->__pg_advance = 1; 776 777 while (piter->sg_pgoffset >= sg_dma_page_count(piter->sg)) { 778 piter->sg_pgoffset -= sg_dma_page_count(piter->sg); 779 piter->sg = sg_next(piter->sg); 780 if (!--piter->__nents || !piter->sg) 781 return false; 782 } 783 784 return true; 785 } 786 EXPORT_SYMBOL(__sg_page_iter_dma_next); 787 788 /** 789 * sg_miter_start - start mapping iteration over a sg list 790 * @miter: sg mapping iter to be started 791 * @sgl: sg list to iterate over 792 * @nents: number of sg entries 793 * @flags: sg iterator flags 794 * 795 * Description: 796 * Starts mapping iterator @miter. 797 * 798 * Context: 799 * Don't care. 800 */ 801 void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl, 802 unsigned int nents, unsigned int flags) 803 { 804 memset(miter, 0, sizeof(struct sg_mapping_iter)); 805 806 __sg_page_iter_start(&miter->piter, sgl, nents, 0); 807 WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG))); 808 miter->__flags = flags; 809 } 810 EXPORT_SYMBOL(sg_miter_start); 811 812 static bool sg_miter_get_next_page(struct sg_mapping_iter *miter) 813 { 814 if (!miter->__remaining) { 815 struct scatterlist *sg; 816 817 if (!__sg_page_iter_next(&miter->piter)) 818 return false; 819 820 sg = miter->piter.sg; 821 822 miter->__offset = miter->piter.sg_pgoffset ? 0 : sg->offset; 823 miter->piter.sg_pgoffset += miter->__offset >> PAGE_SHIFT; 824 miter->__offset &= PAGE_SIZE - 1; 825 miter->__remaining = sg->offset + sg->length - 826 (miter->piter.sg_pgoffset << PAGE_SHIFT) - 827 miter->__offset; 828 miter->__remaining = min_t(unsigned long, miter->__remaining, 829 PAGE_SIZE - miter->__offset); 830 } 831 832 return true; 833 } 834 835 /** 836 * sg_miter_skip - reposition mapping iterator 837 * @miter: sg mapping iter to be skipped 838 * @offset: number of bytes to plus the current location 839 * 840 * Description: 841 * Sets the offset of @miter to its current location plus @offset bytes. 842 * If mapping iterator @miter has been proceeded by sg_miter_next(), this 843 * stops @miter. 844 * 845 * Context: 846 * Don't care. 847 * 848 * Returns: 849 * true if @miter contains the valid mapping. false if end of sg 850 * list is reached. 851 */ 852 bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) 853 { 854 sg_miter_stop(miter); 855 856 while (offset) { 857 off_t consumed; 858 859 if (!sg_miter_get_next_page(miter)) 860 return false; 861 862 consumed = min_t(off_t, offset, miter->__remaining); 863 miter->__offset += consumed; 864 miter->__remaining -= consumed; 865 offset -= consumed; 866 } 867 868 return true; 869 } 870 EXPORT_SYMBOL(sg_miter_skip); 871 872 /** 873 * sg_miter_next - proceed mapping iterator to the next mapping 874 * @miter: sg mapping iter to proceed 875 * 876 * Description: 877 * Proceeds @miter to the next mapping. @miter should have been started 878 * using sg_miter_start(). On successful return, @miter->page, 879 * @miter->addr and @miter->length point to the current mapping. 880 * 881 * Context: 882 * May sleep if !SG_MITER_ATOMIC. 883 * 884 * Returns: 885 * true if @miter contains the next mapping. false if end of sg 886 * list is reached. 887 */ 888 bool sg_miter_next(struct sg_mapping_iter *miter) 889 { 890 sg_miter_stop(miter); 891 892 /* 893 * Get to the next page if necessary. 894 * __remaining, __offset is adjusted by sg_miter_stop 895 */ 896 if (!sg_miter_get_next_page(miter)) 897 return false; 898 899 miter->page = sg_page_iter_page(&miter->piter); 900 miter->consumed = miter->length = miter->__remaining; 901 902 if (miter->__flags & SG_MITER_ATOMIC) 903 miter->addr = kmap_atomic(miter->page) + miter->__offset; 904 else 905 miter->addr = kmap(miter->page) + miter->__offset; 906 907 return true; 908 } 909 EXPORT_SYMBOL(sg_miter_next); 910 911 /** 912 * sg_miter_stop - stop mapping iteration 913 * @miter: sg mapping iter to be stopped 914 * 915 * Description: 916 * Stops mapping iterator @miter. @miter should have been started 917 * using sg_miter_start(). A stopped iteration can be resumed by 918 * calling sg_miter_next() on it. This is useful when resources (kmap) 919 * need to be released during iteration. 920 * 921 * Context: 922 * Don't care otherwise. 923 */ 924 void sg_miter_stop(struct sg_mapping_iter *miter) 925 { 926 WARN_ON(miter->consumed > miter->length); 927 928 /* drop resources from the last iteration */ 929 if (miter->addr) { 930 miter->__offset += miter->consumed; 931 miter->__remaining -= miter->consumed; 932 933 if (miter->__flags & SG_MITER_TO_SG) 934 flush_dcache_page(miter->page); 935 936 if (miter->__flags & SG_MITER_ATOMIC) { 937 WARN_ON_ONCE(!pagefault_disabled()); 938 kunmap_atomic(miter->addr); 939 } else 940 kunmap(miter->page); 941 942 miter->page = NULL; 943 miter->addr = NULL; 944 miter->length = 0; 945 miter->consumed = 0; 946 } 947 } 948 EXPORT_SYMBOL(sg_miter_stop); 949 950 /** 951 * sg_copy_buffer - Copy data between a linear buffer and an SG list 952 * @sgl: The SG list 953 * @nents: Number of SG entries 954 * @buf: Where to copy from 955 * @buflen: The number of bytes to copy 956 * @skip: Number of bytes to skip before copying 957 * @to_buffer: transfer direction (true == from an sg list to a 958 * buffer, false == from a buffer to an sg list) 959 * 960 * Returns the number of copied bytes. 961 * 962 **/ 963 size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, 964 size_t buflen, off_t skip, bool to_buffer) 965 { 966 unsigned int offset = 0; 967 struct sg_mapping_iter miter; 968 unsigned int sg_flags = SG_MITER_ATOMIC; 969 970 if (to_buffer) 971 sg_flags |= SG_MITER_FROM_SG; 972 else 973 sg_flags |= SG_MITER_TO_SG; 974 975 sg_miter_start(&miter, sgl, nents, sg_flags); 976 977 if (!sg_miter_skip(&miter, skip)) 978 return 0; 979 980 while ((offset < buflen) && sg_miter_next(&miter)) { 981 unsigned int len; 982 983 len = min(miter.length, buflen - offset); 984 985 if (to_buffer) 986 memcpy(buf + offset, miter.addr, len); 987 else 988 memcpy(miter.addr, buf + offset, len); 989 990 offset += len; 991 } 992 993 sg_miter_stop(&miter); 994 995 return offset; 996 } 997 EXPORT_SYMBOL(sg_copy_buffer); 998 999 /** 1000 * sg_copy_from_buffer - Copy from a linear buffer to an SG list 1001 * @sgl: The SG list 1002 * @nents: Number of SG entries 1003 * @buf: Where to copy from 1004 * @buflen: The number of bytes to copy 1005 * 1006 * Returns the number of copied bytes. 1007 * 1008 **/ 1009 size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents, 1010 const void *buf, size_t buflen) 1011 { 1012 return sg_copy_buffer(sgl, nents, (void *)buf, buflen, 0, false); 1013 } 1014 EXPORT_SYMBOL(sg_copy_from_buffer); 1015 1016 /** 1017 * sg_copy_to_buffer - Copy from an SG list to a linear buffer 1018 * @sgl: The SG list 1019 * @nents: Number of SG entries 1020 * @buf: Where to copy to 1021 * @buflen: The number of bytes to copy 1022 * 1023 * Returns the number of copied bytes. 1024 * 1025 **/ 1026 size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents, 1027 void *buf, size_t buflen) 1028 { 1029 return sg_copy_buffer(sgl, nents, buf, buflen, 0, true); 1030 } 1031 EXPORT_SYMBOL(sg_copy_to_buffer); 1032 1033 /** 1034 * sg_pcopy_from_buffer - Copy from a linear buffer to an SG list 1035 * @sgl: The SG list 1036 * @nents: Number of SG entries 1037 * @buf: Where to copy from 1038 * @buflen: The number of bytes to copy 1039 * @skip: Number of bytes to skip before copying 1040 * 1041 * Returns the number of copied bytes. 1042 * 1043 **/ 1044 size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents, 1045 const void *buf, size_t buflen, off_t skip) 1046 { 1047 return sg_copy_buffer(sgl, nents, (void *)buf, buflen, skip, false); 1048 } 1049 EXPORT_SYMBOL(sg_pcopy_from_buffer); 1050 1051 /** 1052 * sg_pcopy_to_buffer - Copy from an SG list to a linear buffer 1053 * @sgl: The SG list 1054 * @nents: Number of SG entries 1055 * @buf: Where to copy to 1056 * @buflen: The number of bytes to copy 1057 * @skip: Number of bytes to skip before copying 1058 * 1059 * Returns the number of copied bytes. 1060 * 1061 **/ 1062 size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, 1063 void *buf, size_t buflen, off_t skip) 1064 { 1065 return sg_copy_buffer(sgl, nents, buf, buflen, skip, true); 1066 } 1067 EXPORT_SYMBOL(sg_pcopy_to_buffer); 1068 1069 /** 1070 * sg_zero_buffer - Zero-out a part of a SG list 1071 * @sgl: The SG list 1072 * @nents: Number of SG entries 1073 * @buflen: The number of bytes to zero out 1074 * @skip: Number of bytes to skip before zeroing 1075 * 1076 * Returns the number of bytes zeroed. 1077 **/ 1078 size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, 1079 size_t buflen, off_t skip) 1080 { 1081 unsigned int offset = 0; 1082 struct sg_mapping_iter miter; 1083 unsigned int sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG; 1084 1085 sg_miter_start(&miter, sgl, nents, sg_flags); 1086 1087 if (!sg_miter_skip(&miter, skip)) 1088 return false; 1089 1090 while (offset < buflen && sg_miter_next(&miter)) { 1091 unsigned int len; 1092 1093 len = min(miter.length, buflen - offset); 1094 memset(miter.addr, 0, len); 1095 1096 offset += len; 1097 } 1098 1099 sg_miter_stop(&miter); 1100 return offset; 1101 } 1102 EXPORT_SYMBOL(sg_zero_buffer); 1103 1104 /* 1105 * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class 1106 * iterators, and add them to the scatterlist. 1107 */ 1108 static ssize_t extract_user_to_sg(struct iov_iter *iter, 1109 ssize_t maxsize, 1110 struct sg_table *sgtable, 1111 unsigned int sg_max, 1112 iov_iter_extraction_t extraction_flags) 1113 { 1114 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1115 struct page **pages; 1116 unsigned int npages; 1117 ssize_t ret = 0, res; 1118 size_t len, off; 1119 1120 /* We decant the page list into the tail of the scatterlist */ 1121 pages = (void *)sgtable->sgl + 1122 array_size(sg_max, sizeof(struct scatterlist)); 1123 pages -= sg_max; 1124 1125 do { 1126 res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max, 1127 extraction_flags, &off); 1128 if (res <= 0) 1129 goto failed; 1130 1131 len = res; 1132 maxsize -= len; 1133 ret += len; 1134 npages = DIV_ROUND_UP(off + len, PAGE_SIZE); 1135 sg_max -= npages; 1136 1137 for (; npages > 0; npages--) { 1138 struct page *page = *pages; 1139 size_t seg = min_t(size_t, PAGE_SIZE - off, len); 1140 1141 *pages++ = NULL; 1142 sg_set_page(sg, page, seg, off); 1143 sgtable->nents++; 1144 sg++; 1145 len -= seg; 1146 off = 0; 1147 } 1148 } while (maxsize > 0 && sg_max > 0); 1149 1150 return ret; 1151 1152 failed: 1153 while (sgtable->nents > sgtable->orig_nents) 1154 unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents])); 1155 return res; 1156 } 1157 1158 /* 1159 * Extract up to sg_max pages from a BVEC-type iterator and add them to the 1160 * scatterlist. The pages are not pinned. 1161 */ 1162 static ssize_t extract_bvec_to_sg(struct iov_iter *iter, 1163 ssize_t maxsize, 1164 struct sg_table *sgtable, 1165 unsigned int sg_max, 1166 iov_iter_extraction_t extraction_flags) 1167 { 1168 const struct bio_vec *bv = iter->bvec; 1169 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1170 unsigned long start = iter->iov_offset; 1171 unsigned int i; 1172 ssize_t ret = 0; 1173 1174 for (i = 0; i < iter->nr_segs; i++) { 1175 size_t off, len; 1176 1177 len = bv[i].bv_len; 1178 if (start >= len) { 1179 start -= len; 1180 continue; 1181 } 1182 1183 len = min_t(size_t, maxsize, len - start); 1184 off = bv[i].bv_offset + start; 1185 1186 sg_set_page(sg, bv[i].bv_page, len, off); 1187 sgtable->nents++; 1188 sg++; 1189 sg_max--; 1190 1191 ret += len; 1192 maxsize -= len; 1193 if (maxsize <= 0 || sg_max == 0) 1194 break; 1195 start = 0; 1196 } 1197 1198 if (ret > 0) 1199 iov_iter_advance(iter, ret); 1200 return ret; 1201 } 1202 1203 /* 1204 * Extract up to sg_max pages from a KVEC-type iterator and add them to the 1205 * scatterlist. This can deal with vmalloc'd buffers as well as kmalloc'd or 1206 * static buffers. The pages are not pinned. 1207 */ 1208 static ssize_t extract_kvec_to_sg(struct iov_iter *iter, 1209 ssize_t maxsize, 1210 struct sg_table *sgtable, 1211 unsigned int sg_max, 1212 iov_iter_extraction_t extraction_flags) 1213 { 1214 const struct kvec *kv = iter->kvec; 1215 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1216 unsigned long start = iter->iov_offset; 1217 unsigned int i; 1218 ssize_t ret = 0; 1219 1220 for (i = 0; i < iter->nr_segs; i++) { 1221 struct page *page; 1222 unsigned long kaddr; 1223 size_t off, len, seg; 1224 1225 len = kv[i].iov_len; 1226 if (start >= len) { 1227 start -= len; 1228 continue; 1229 } 1230 1231 kaddr = (unsigned long)kv[i].iov_base + start; 1232 off = kaddr & ~PAGE_MASK; 1233 len = min_t(size_t, maxsize, len - start); 1234 kaddr &= PAGE_MASK; 1235 1236 maxsize -= len; 1237 ret += len; 1238 do { 1239 seg = min_t(size_t, len, PAGE_SIZE - off); 1240 if (is_vmalloc_or_module_addr((void *)kaddr)) 1241 page = vmalloc_to_page((void *)kaddr); 1242 else 1243 page = virt_to_page((void *)kaddr); 1244 1245 sg_set_page(sg, page, len, off); 1246 sgtable->nents++; 1247 sg++; 1248 sg_max--; 1249 1250 len -= seg; 1251 kaddr += PAGE_SIZE; 1252 off = 0; 1253 } while (len > 0 && sg_max > 0); 1254 1255 if (maxsize <= 0 || sg_max == 0) 1256 break; 1257 start = 0; 1258 } 1259 1260 if (ret > 0) 1261 iov_iter_advance(iter, ret); 1262 return ret; 1263 } 1264 1265 /* 1266 * Extract up to sg_max folios from an FOLIOQ-type iterator and add them to 1267 * the scatterlist. The pages are not pinned. 1268 */ 1269 static ssize_t extract_folioq_to_sg(struct iov_iter *iter, 1270 ssize_t maxsize, 1271 struct sg_table *sgtable, 1272 unsigned int sg_max, 1273 iov_iter_extraction_t extraction_flags) 1274 { 1275 const struct folio_queue *folioq = iter->folioq; 1276 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1277 unsigned int slot = iter->folioq_slot; 1278 ssize_t ret = 0; 1279 size_t offset = iter->iov_offset; 1280 1281 BUG_ON(!folioq); 1282 1283 if (slot >= folioq_nr_slots(folioq)) { 1284 folioq = folioq->next; 1285 if (WARN_ON_ONCE(!folioq)) 1286 return 0; 1287 slot = 0; 1288 } 1289 1290 do { 1291 struct folio *folio = folioq_folio(folioq, slot); 1292 size_t fsize = folioq_folio_size(folioq, slot); 1293 1294 if (offset < fsize) { 1295 size_t part = umin(maxsize - ret, fsize - offset); 1296 1297 sg_set_page(sg, folio_page(folio, 0), part, offset); 1298 sgtable->nents++; 1299 sg++; 1300 sg_max--; 1301 offset += part; 1302 ret += part; 1303 } 1304 1305 if (offset >= fsize) { 1306 offset = 0; 1307 slot++; 1308 if (slot >= folioq_nr_slots(folioq)) { 1309 if (!folioq->next) { 1310 WARN_ON_ONCE(ret < iter->count); 1311 break; 1312 } 1313 folioq = folioq->next; 1314 slot = 0; 1315 } 1316 } 1317 } while (sg_max > 0 && ret < maxsize); 1318 1319 iter->folioq = folioq; 1320 iter->folioq_slot = slot; 1321 iter->iov_offset = offset; 1322 iter->count -= ret; 1323 return ret; 1324 } 1325 1326 /* 1327 * Extract up to sg_max folios from an XARRAY-type iterator and add them to 1328 * the scatterlist. The pages are not pinned. 1329 */ 1330 static ssize_t extract_xarray_to_sg(struct iov_iter *iter, 1331 ssize_t maxsize, 1332 struct sg_table *sgtable, 1333 unsigned int sg_max, 1334 iov_iter_extraction_t extraction_flags) 1335 { 1336 struct scatterlist *sg = sgtable->sgl + sgtable->nents; 1337 struct xarray *xa = iter->xarray; 1338 struct folio *folio; 1339 loff_t start = iter->xarray_start + iter->iov_offset; 1340 pgoff_t index = start / PAGE_SIZE; 1341 ssize_t ret = 0; 1342 size_t offset, len; 1343 XA_STATE(xas, xa, index); 1344 1345 rcu_read_lock(); 1346 1347 xas_for_each(&xas, folio, ULONG_MAX) { 1348 if (xas_retry(&xas, folio)) 1349 continue; 1350 if (WARN_ON(xa_is_value(folio))) 1351 break; 1352 if (WARN_ON(folio_test_hugetlb(folio))) 1353 break; 1354 1355 offset = offset_in_folio(folio, start); 1356 len = min_t(size_t, maxsize, folio_size(folio) - offset); 1357 1358 sg_set_page(sg, folio_page(folio, 0), len, offset); 1359 sgtable->nents++; 1360 sg++; 1361 sg_max--; 1362 1363 maxsize -= len; 1364 ret += len; 1365 if (maxsize <= 0 || sg_max == 0) 1366 break; 1367 } 1368 1369 rcu_read_unlock(); 1370 if (ret > 0) 1371 iov_iter_advance(iter, ret); 1372 return ret; 1373 } 1374 1375 /** 1376 * extract_iter_to_sg - Extract pages from an iterator and add to an sglist 1377 * @iter: The iterator to extract from 1378 * @maxsize: The amount of iterator to copy 1379 * @sgtable: The scatterlist table to fill in 1380 * @sg_max: Maximum number of elements in @sgtable that may be filled 1381 * @extraction_flags: Flags to qualify the request 1382 * 1383 * Extract the page fragments from the given amount of the source iterator and 1384 * add them to a scatterlist that refers to all of those bits, to a maximum 1385 * addition of @sg_max elements. 1386 * 1387 * The pages referred to by UBUF- and IOVEC-type iterators are extracted and 1388 * pinned; BVEC-, KVEC-, FOLIOQ- and XARRAY-type are extracted but aren't 1389 * pinned; DISCARD-type is not supported. 1390 * 1391 * No end mark is placed on the scatterlist; that's left to the caller. 1392 * 1393 * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA 1394 * be allowed on the pages extracted. 1395 * 1396 * If successful, @sgtable->nents is updated to include the number of elements 1397 * added and the number of bytes added is returned. @sgtable->orig_nents is 1398 * left unaltered. 1399 * 1400 * The iov_iter_extract_mode() function should be used to query how cleanup 1401 * should be performed. 1402 */ 1403 ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize, 1404 struct sg_table *sgtable, unsigned int sg_max, 1405 iov_iter_extraction_t extraction_flags) 1406 { 1407 if (maxsize == 0) 1408 return 0; 1409 1410 switch (iov_iter_type(iter)) { 1411 case ITER_UBUF: 1412 case ITER_IOVEC: 1413 return extract_user_to_sg(iter, maxsize, sgtable, sg_max, 1414 extraction_flags); 1415 case ITER_BVEC: 1416 return extract_bvec_to_sg(iter, maxsize, sgtable, sg_max, 1417 extraction_flags); 1418 case ITER_KVEC: 1419 return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max, 1420 extraction_flags); 1421 case ITER_FOLIOQ: 1422 return extract_folioq_to_sg(iter, maxsize, sgtable, sg_max, 1423 extraction_flags); 1424 case ITER_XARRAY: 1425 return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max, 1426 extraction_flags); 1427 default: 1428 pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter)); 1429 WARN_ON_ONCE(1); 1430 return -EIO; 1431 } 1432 } 1433 EXPORT_SYMBOL_GPL(extract_iter_to_sg); 1434