1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/export.h> 3 #include <linux/bvec.h> 4 #include <linux/uio.h> 5 #include <linux/pagemap.h> 6 #include <linux/slab.h> 7 #include <linux/vmalloc.h> 8 #include <linux/splice.h> 9 #include <net/checksum.h> 10 #include <linux/scatterlist.h> 11 12 #define PIPE_PARANOIA /* for now */ 13 14 #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 15 size_t left; \ 16 size_t wanted = n; \ 17 __p = i->iov; \ 18 __v.iov_len = min(n, __p->iov_len - skip); \ 19 if (likely(__v.iov_len)) { \ 20 __v.iov_base = __p->iov_base + skip; \ 21 left = (STEP); \ 22 __v.iov_len -= left; \ 23 skip += __v.iov_len; \ 24 n -= __v.iov_len; \ 25 } else { \ 26 left = 0; \ 27 } \ 28 while (unlikely(!left && n)) { \ 29 __p++; \ 30 __v.iov_len = min(n, __p->iov_len); \ 31 if (unlikely(!__v.iov_len)) \ 32 continue; \ 33 __v.iov_base = __p->iov_base; \ 34 left = (STEP); \ 35 __v.iov_len -= left; \ 36 skip = __v.iov_len; \ 37 n -= __v.iov_len; \ 38 } \ 39 n = wanted - n; \ 40 } 41 42 #define iterate_kvec(i, n, __v, __p, skip, STEP) { \ 43 size_t wanted = n; \ 44 __p = i->kvec; \ 45 __v.iov_len = min(n, __p->iov_len - skip); \ 46 if (likely(__v.iov_len)) { \ 47 __v.iov_base = __p->iov_base + skip; \ 48 (void)(STEP); \ 49 skip += __v.iov_len; \ 50 n -= __v.iov_len; \ 51 } \ 52 while (unlikely(n)) { \ 53 __p++; \ 54 __v.iov_len = min(n, __p->iov_len); \ 55 if (unlikely(!__v.iov_len)) \ 56 continue; \ 57 __v.iov_base = __p->iov_base; \ 58 (void)(STEP); \ 59 skip = __v.iov_len; \ 60 n -= __v.iov_len; \ 61 } \ 62 n = wanted; \ 63 } 64 65 #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ 66 struct bvec_iter __start; \ 67 __start.bi_size = n; \ 68 __start.bi_bvec_done = skip; \ 69 __start.bi_idx = 0; \ 70 for_each_bvec(__v, i->bvec, __bi, __start) { \ 71 if (!__v.bv_len) \ 72 continue; \ 73 (void)(STEP); \ 74 } \ 75 } 76 77 #define iterate_all_kinds(i, n, v, I, B, K) { \ 78 if (likely(n)) { \ 79 size_t skip = i->iov_offset; \ 80 if (unlikely(i->type & ITER_BVEC)) { \ 81 struct bio_vec v; \ 82 struct bvec_iter __bi; \ 83 iterate_bvec(i, n, v, __bi, skip, (B)) \ 84 } else if (unlikely(i->type & ITER_KVEC)) { \ 85 const struct kvec *kvec; \ 86 struct kvec v; \ 87 iterate_kvec(i, n, v, kvec, skip, (K)) \ 88 } else if (unlikely(i->type & ITER_DISCARD)) { \ 89 } else { \ 90 const struct iovec *iov; \ 91 struct iovec v; \ 92 iterate_iovec(i, n, v, iov, skip, (I)) \ 93 } \ 94 } \ 95 } 96 97 #define iterate_and_advance(i, n, v, I, B, K) { \ 98 if (unlikely(i->count < n)) \ 99 n = i->count; \ 100 if (i->count) { \ 101 size_t skip = i->iov_offset; \ 102 if (unlikely(i->type & ITER_BVEC)) { \ 103 const struct bio_vec *bvec = i->bvec; \ 104 struct bio_vec v; \ 105 struct bvec_iter __bi; \ 106 iterate_bvec(i, n, v, __bi, skip, (B)) \ 107 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \ 108 i->nr_segs -= i->bvec - bvec; \ 109 skip = __bi.bi_bvec_done; \ 110 } else if (unlikely(i->type & ITER_KVEC)) { \ 111 const struct kvec *kvec; \ 112 struct kvec v; \ 113 iterate_kvec(i, n, v, kvec, skip, (K)) \ 114 if (skip == kvec->iov_len) { \ 115 kvec++; \ 116 skip = 0; \ 117 } \ 118 i->nr_segs -= kvec - i->kvec; \ 119 i->kvec = kvec; \ 120 } else if (unlikely(i->type & ITER_DISCARD)) { \ 121 skip += n; \ 122 } else { \ 123 const struct iovec *iov; \ 124 struct iovec v; \ 125 iterate_iovec(i, n, v, iov, skip, (I)) \ 126 if (skip == iov->iov_len) { \ 127 iov++; \ 128 skip = 0; \ 129 } \ 130 i->nr_segs -= iov - i->iov; \ 131 i->iov = iov; \ 132 } \ 133 i->count -= n; \ 134 i->iov_offset = skip; \ 135 } \ 136 } 137 138 static int copyout(void __user *to, const void *from, size_t n) 139 { 140 if (access_ok(to, n)) { 141 kasan_check_read(from, n); 142 n = raw_copy_to_user(to, from, n); 143 } 144 return n; 145 } 146 147 static int copyin(void *to, const void __user *from, size_t n) 148 { 149 if (access_ok(from, n)) { 150 kasan_check_write(to, n); 151 n = raw_copy_from_user(to, from, n); 152 } 153 return n; 154 } 155 156 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, 157 struct iov_iter *i) 158 { 159 size_t skip, copy, left, wanted; 160 const struct iovec *iov; 161 char __user *buf; 162 void *kaddr, *from; 163 164 if (unlikely(bytes > i->count)) 165 bytes = i->count; 166 167 if (unlikely(!bytes)) 168 return 0; 169 170 might_fault(); 171 wanted = bytes; 172 iov = i->iov; 173 skip = i->iov_offset; 174 buf = iov->iov_base + skip; 175 copy = min(bytes, iov->iov_len - skip); 176 177 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) { 178 kaddr = kmap_atomic(page); 179 from = kaddr + offset; 180 181 /* first chunk, usually the only one */ 182 left = copyout(buf, from, copy); 183 copy -= left; 184 skip += copy; 185 from += copy; 186 bytes -= copy; 187 188 while (unlikely(!left && bytes)) { 189 iov++; 190 buf = iov->iov_base; 191 copy = min(bytes, iov->iov_len); 192 left = copyout(buf, from, copy); 193 copy -= left; 194 skip = copy; 195 from += copy; 196 bytes -= copy; 197 } 198 if (likely(!bytes)) { 199 kunmap_atomic(kaddr); 200 goto done; 201 } 202 offset = from - kaddr; 203 buf += copy; 204 kunmap_atomic(kaddr); 205 copy = min(bytes, iov->iov_len - skip); 206 } 207 /* Too bad - revert to non-atomic kmap */ 208 209 kaddr = kmap(page); 210 from = kaddr + offset; 211 left = copyout(buf, from, copy); 212 copy -= left; 213 skip += copy; 214 from += copy; 215 bytes -= copy; 216 while (unlikely(!left && bytes)) { 217 iov++; 218 buf = iov->iov_base; 219 copy = min(bytes, iov->iov_len); 220 left = copyout(buf, from, copy); 221 copy -= left; 222 skip = copy; 223 from += copy; 224 bytes -= copy; 225 } 226 kunmap(page); 227 228 done: 229 if (skip == iov->iov_len) { 230 iov++; 231 skip = 0; 232 } 233 i->count -= wanted - bytes; 234 i->nr_segs -= iov - i->iov; 235 i->iov = iov; 236 i->iov_offset = skip; 237 return wanted - bytes; 238 } 239 240 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, 241 struct iov_iter *i) 242 { 243 size_t skip, copy, left, wanted; 244 const struct iovec *iov; 245 char __user *buf; 246 void *kaddr, *to; 247 248 if (unlikely(bytes > i->count)) 249 bytes = i->count; 250 251 if (unlikely(!bytes)) 252 return 0; 253 254 might_fault(); 255 wanted = bytes; 256 iov = i->iov; 257 skip = i->iov_offset; 258 buf = iov->iov_base + skip; 259 copy = min(bytes, iov->iov_len - skip); 260 261 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) { 262 kaddr = kmap_atomic(page); 263 to = kaddr + offset; 264 265 /* first chunk, usually the only one */ 266 left = copyin(to, buf, copy); 267 copy -= left; 268 skip += copy; 269 to += copy; 270 bytes -= copy; 271 272 while (unlikely(!left && bytes)) { 273 iov++; 274 buf = iov->iov_base; 275 copy = min(bytes, iov->iov_len); 276 left = copyin(to, buf, copy); 277 copy -= left; 278 skip = copy; 279 to += copy; 280 bytes -= copy; 281 } 282 if (likely(!bytes)) { 283 kunmap_atomic(kaddr); 284 goto done; 285 } 286 offset = to - kaddr; 287 buf += copy; 288 kunmap_atomic(kaddr); 289 copy = min(bytes, iov->iov_len - skip); 290 } 291 /* Too bad - revert to non-atomic kmap */ 292 293 kaddr = kmap(page); 294 to = kaddr + offset; 295 left = copyin(to, buf, copy); 296 copy -= left; 297 skip += copy; 298 to += copy; 299 bytes -= copy; 300 while (unlikely(!left && bytes)) { 301 iov++; 302 buf = iov->iov_base; 303 copy = min(bytes, iov->iov_len); 304 left = copyin(to, buf, copy); 305 copy -= left; 306 skip = copy; 307 to += copy; 308 bytes -= copy; 309 } 310 kunmap(page); 311 312 done: 313 if (skip == iov->iov_len) { 314 iov++; 315 skip = 0; 316 } 317 i->count -= wanted - bytes; 318 i->nr_segs -= iov - i->iov; 319 i->iov = iov; 320 i->iov_offset = skip; 321 return wanted - bytes; 322 } 323 324 #ifdef PIPE_PARANOIA 325 static bool sanity(const struct iov_iter *i) 326 { 327 struct pipe_inode_info *pipe = i->pipe; 328 unsigned int p_head = pipe->head; 329 unsigned int p_tail = pipe->tail; 330 unsigned int p_mask = pipe->ring_size - 1; 331 unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); 332 unsigned int i_head = i->head; 333 unsigned int idx; 334 335 if (i->iov_offset) { 336 struct pipe_buffer *p; 337 if (unlikely(p_occupancy == 0)) 338 goto Bad; // pipe must be non-empty 339 if (unlikely(i_head != p_head - 1)) 340 goto Bad; // must be at the last buffer... 341 342 p = &pipe->bufs[i_head & p_mask]; 343 if (unlikely(p->offset + p->len != i->iov_offset)) 344 goto Bad; // ... at the end of segment 345 } else { 346 if (i_head != p_head) 347 goto Bad; // must be right after the last buffer 348 } 349 return true; 350 Bad: 351 printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); 352 printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", 353 p_head, p_tail, pipe->ring_size); 354 for (idx = 0; idx < pipe->ring_size; idx++) 355 printk(KERN_ERR "[%p %p %d %d]\n", 356 pipe->bufs[idx].ops, 357 pipe->bufs[idx].page, 358 pipe->bufs[idx].offset, 359 pipe->bufs[idx].len); 360 WARN_ON(1); 361 return false; 362 } 363 #else 364 #define sanity(i) true 365 #endif 366 367 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, 368 struct iov_iter *i) 369 { 370 struct pipe_inode_info *pipe = i->pipe; 371 struct pipe_buffer *buf; 372 unsigned int p_tail = pipe->tail; 373 unsigned int p_mask = pipe->ring_size - 1; 374 unsigned int i_head = i->head; 375 size_t off; 376 377 if (unlikely(bytes > i->count)) 378 bytes = i->count; 379 380 if (unlikely(!bytes)) 381 return 0; 382 383 if (!sanity(i)) 384 return 0; 385 386 off = i->iov_offset; 387 buf = &pipe->bufs[i_head & p_mask]; 388 if (off) { 389 if (offset == off && buf->page == page) { 390 /* merge with the last one */ 391 buf->len += bytes; 392 i->iov_offset += bytes; 393 goto out; 394 } 395 i_head++; 396 buf = &pipe->bufs[i_head & p_mask]; 397 } 398 if (pipe_full(i_head, p_tail, pipe->max_usage)) 399 return 0; 400 401 buf->ops = &page_cache_pipe_buf_ops; 402 get_page(page); 403 buf->page = page; 404 buf->offset = offset; 405 buf->len = bytes; 406 407 pipe->head = i_head + 1; 408 i->iov_offset = offset + bytes; 409 i->head = i_head; 410 out: 411 i->count -= bytes; 412 return bytes; 413 } 414 415 /* 416 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 417 * bytes. For each iovec, fault in each page that constitutes the iovec. 418 * 419 * Return 0 on success, or non-zero if the memory could not be accessed (i.e. 420 * because it is an invalid address). 421 */ 422 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 423 { 424 size_t skip = i->iov_offset; 425 const struct iovec *iov; 426 int err; 427 struct iovec v; 428 429 if (!(i->type & (ITER_BVEC|ITER_KVEC))) { 430 iterate_iovec(i, bytes, v, iov, skip, ({ 431 err = fault_in_pages_readable(v.iov_base, v.iov_len); 432 if (unlikely(err)) 433 return err; 434 0;})) 435 } 436 return 0; 437 } 438 EXPORT_SYMBOL(iov_iter_fault_in_readable); 439 440 void iov_iter_init(struct iov_iter *i, unsigned int direction, 441 const struct iovec *iov, unsigned long nr_segs, 442 size_t count) 443 { 444 WARN_ON(direction & ~(READ | WRITE)); 445 direction &= READ | WRITE; 446 447 /* It will get better. Eventually... */ 448 if (uaccess_kernel()) { 449 i->type = ITER_KVEC | direction; 450 i->kvec = (struct kvec *)iov; 451 } else { 452 i->type = ITER_IOVEC | direction; 453 i->iov = iov; 454 } 455 i->nr_segs = nr_segs; 456 i->iov_offset = 0; 457 i->count = count; 458 } 459 EXPORT_SYMBOL(iov_iter_init); 460 461 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) 462 { 463 char *from = kmap_atomic(page); 464 memcpy(to, from + offset, len); 465 kunmap_atomic(from); 466 } 467 468 static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) 469 { 470 char *to = kmap_atomic(page); 471 memcpy(to + offset, from, len); 472 kunmap_atomic(to); 473 } 474 475 static void memzero_page(struct page *page, size_t offset, size_t len) 476 { 477 char *addr = kmap_atomic(page); 478 memset(addr + offset, 0, len); 479 kunmap_atomic(addr); 480 } 481 482 static inline bool allocated(struct pipe_buffer *buf) 483 { 484 return buf->ops == &default_pipe_buf_ops; 485 } 486 487 static inline void data_start(const struct iov_iter *i, 488 unsigned int *iter_headp, size_t *offp) 489 { 490 unsigned int p_mask = i->pipe->ring_size - 1; 491 unsigned int iter_head = i->head; 492 size_t off = i->iov_offset; 493 494 if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || 495 off == PAGE_SIZE)) { 496 iter_head++; 497 off = 0; 498 } 499 *iter_headp = iter_head; 500 *offp = off; 501 } 502 503 static size_t push_pipe(struct iov_iter *i, size_t size, 504 int *iter_headp, size_t *offp) 505 { 506 struct pipe_inode_info *pipe = i->pipe; 507 unsigned int p_tail = pipe->tail; 508 unsigned int p_mask = pipe->ring_size - 1; 509 unsigned int iter_head; 510 size_t off; 511 ssize_t left; 512 513 if (unlikely(size > i->count)) 514 size = i->count; 515 if (unlikely(!size)) 516 return 0; 517 518 left = size; 519 data_start(i, &iter_head, &off); 520 *iter_headp = iter_head; 521 *offp = off; 522 if (off) { 523 left -= PAGE_SIZE - off; 524 if (left <= 0) { 525 pipe->bufs[iter_head & p_mask].len += size; 526 return size; 527 } 528 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; 529 iter_head++; 530 } 531 while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { 532 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; 533 struct page *page = alloc_page(GFP_USER); 534 if (!page) 535 break; 536 537 buf->ops = &default_pipe_buf_ops; 538 buf->page = page; 539 buf->offset = 0; 540 buf->len = min_t(ssize_t, left, PAGE_SIZE); 541 left -= buf->len; 542 iter_head++; 543 pipe->head = iter_head; 544 545 if (left == 0) 546 return size; 547 } 548 return size - left; 549 } 550 551 static size_t copy_pipe_to_iter(const void *addr, size_t bytes, 552 struct iov_iter *i) 553 { 554 struct pipe_inode_info *pipe = i->pipe; 555 unsigned int p_mask = pipe->ring_size - 1; 556 unsigned int i_head; 557 size_t n, off; 558 559 if (!sanity(i)) 560 return 0; 561 562 bytes = n = push_pipe(i, bytes, &i_head, &off); 563 if (unlikely(!n)) 564 return 0; 565 do { 566 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 567 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); 568 i->head = i_head; 569 i->iov_offset = off + chunk; 570 n -= chunk; 571 addr += chunk; 572 off = 0; 573 i_head++; 574 } while (n); 575 i->count -= bytes; 576 return bytes; 577 } 578 579 static __wsum csum_and_memcpy(void *to, const void *from, size_t len, 580 __wsum sum, size_t off) 581 { 582 __wsum next = csum_partial_copy_nocheck(from, to, len, 0); 583 return csum_block_add(sum, next, off); 584 } 585 586 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, 587 __wsum *csum, struct iov_iter *i) 588 { 589 struct pipe_inode_info *pipe = i->pipe; 590 unsigned int p_mask = pipe->ring_size - 1; 591 unsigned int i_head; 592 size_t n, r; 593 size_t off = 0; 594 __wsum sum = *csum; 595 596 if (!sanity(i)) 597 return 0; 598 599 bytes = n = push_pipe(i, bytes, &i_head, &r); 600 if (unlikely(!n)) 601 return 0; 602 do { 603 size_t chunk = min_t(size_t, n, PAGE_SIZE - r); 604 char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page); 605 sum = csum_and_memcpy(p + r, addr, chunk, sum, off); 606 kunmap_atomic(p); 607 i->head = i_head; 608 i->iov_offset = r + chunk; 609 n -= chunk; 610 off += chunk; 611 addr += chunk; 612 r = 0; 613 i_head++; 614 } while (n); 615 i->count -= bytes; 616 *csum = sum; 617 return bytes; 618 } 619 620 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 621 { 622 const char *from = addr; 623 if (unlikely(iov_iter_is_pipe(i))) 624 return copy_pipe_to_iter(addr, bytes, i); 625 if (iter_is_iovec(i)) 626 might_fault(); 627 iterate_and_advance(i, bytes, v, 628 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), 629 memcpy_to_page(v.bv_page, v.bv_offset, 630 (from += v.bv_len) - v.bv_len, v.bv_len), 631 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) 632 ) 633 634 return bytes; 635 } 636 EXPORT_SYMBOL(_copy_to_iter); 637 638 #ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE 639 static int copyout_mcsafe(void __user *to, const void *from, size_t n) 640 { 641 if (access_ok(to, n)) { 642 kasan_check_read(from, n); 643 n = copy_to_user_mcsafe((__force void *) to, from, n); 644 } 645 return n; 646 } 647 648 static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, 649 const char *from, size_t len) 650 { 651 unsigned long ret; 652 char *to; 653 654 to = kmap_atomic(page); 655 ret = memcpy_mcsafe(to + offset, from, len); 656 kunmap_atomic(to); 657 658 return ret; 659 } 660 661 static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, 662 struct iov_iter *i) 663 { 664 struct pipe_inode_info *pipe = i->pipe; 665 unsigned int p_mask = pipe->ring_size - 1; 666 unsigned int i_head; 667 size_t n, off, xfer = 0; 668 669 if (!sanity(i)) 670 return 0; 671 672 bytes = n = push_pipe(i, bytes, &i_head, &off); 673 if (unlikely(!n)) 674 return 0; 675 do { 676 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 677 unsigned long rem; 678 679 rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page, 680 off, addr, chunk); 681 i->head = i_head; 682 i->iov_offset = off + chunk - rem; 683 xfer += chunk - rem; 684 if (rem) 685 break; 686 n -= chunk; 687 addr += chunk; 688 off = 0; 689 i_head++; 690 } while (n); 691 i->count -= xfer; 692 return xfer; 693 } 694 695 /** 696 * _copy_to_iter_mcsafe - copy to user with source-read error exception handling 697 * @addr: source kernel address 698 * @bytes: total transfer length 699 * @iter: destination iterator 700 * 701 * The pmem driver arranges for filesystem-dax to use this facility via 702 * dax_copy_to_iter() for protecting read/write to persistent memory. 703 * Unless / until an architecture can guarantee identical performance 704 * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a 705 * performance regression to switch more users to the mcsafe version. 706 * 707 * Otherwise, the main differences between this and typical _copy_to_iter(). 708 * 709 * * Typical tail/residue handling after a fault retries the copy 710 * byte-by-byte until the fault happens again. Re-triggering machine 711 * checks is potentially fatal so the implementation uses source 712 * alignment and poison alignment assumptions to avoid re-triggering 713 * hardware exceptions. 714 * 715 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. 716 * Compare to copy_to_iter() where only ITER_IOVEC attempts might return 717 * a short copy. 718 * 719 * See MCSAFE_TEST for self-test. 720 */ 721 size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) 722 { 723 const char *from = addr; 724 unsigned long rem, curr_addr, s_addr = (unsigned long) addr; 725 726 if (unlikely(iov_iter_is_pipe(i))) 727 return copy_pipe_to_iter_mcsafe(addr, bytes, i); 728 if (iter_is_iovec(i)) 729 might_fault(); 730 iterate_and_advance(i, bytes, v, 731 copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), 732 ({ 733 rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset, 734 (from += v.bv_len) - v.bv_len, v.bv_len); 735 if (rem) { 736 curr_addr = (unsigned long) from; 737 bytes = curr_addr - s_addr - rem; 738 return bytes; 739 } 740 }), 741 ({ 742 rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, 743 v.iov_len); 744 if (rem) { 745 curr_addr = (unsigned long) from; 746 bytes = curr_addr - s_addr - rem; 747 return bytes; 748 } 749 }) 750 ) 751 752 return bytes; 753 } 754 EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe); 755 #endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */ 756 757 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 758 { 759 char *to = addr; 760 if (unlikely(iov_iter_is_pipe(i))) { 761 WARN_ON(1); 762 return 0; 763 } 764 if (iter_is_iovec(i)) 765 might_fault(); 766 iterate_and_advance(i, bytes, v, 767 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 768 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 769 v.bv_offset, v.bv_len), 770 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 771 ) 772 773 return bytes; 774 } 775 EXPORT_SYMBOL(_copy_from_iter); 776 777 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) 778 { 779 char *to = addr; 780 if (unlikely(iov_iter_is_pipe(i))) { 781 WARN_ON(1); 782 return false; 783 } 784 if (unlikely(i->count < bytes)) 785 return false; 786 787 if (iter_is_iovec(i)) 788 might_fault(); 789 iterate_all_kinds(i, bytes, v, ({ 790 if (copyin((to += v.iov_len) - v.iov_len, 791 v.iov_base, v.iov_len)) 792 return false; 793 0;}), 794 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 795 v.bv_offset, v.bv_len), 796 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 797 ) 798 799 iov_iter_advance(i, bytes); 800 return true; 801 } 802 EXPORT_SYMBOL(_copy_from_iter_full); 803 804 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 805 { 806 char *to = addr; 807 if (unlikely(iov_iter_is_pipe(i))) { 808 WARN_ON(1); 809 return 0; 810 } 811 iterate_and_advance(i, bytes, v, 812 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 813 v.iov_base, v.iov_len), 814 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 815 v.bv_offset, v.bv_len), 816 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 817 ) 818 819 return bytes; 820 } 821 EXPORT_SYMBOL(_copy_from_iter_nocache); 822 823 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 824 /** 825 * _copy_from_iter_flushcache - write destination through cpu cache 826 * @addr: destination kernel address 827 * @bytes: total transfer length 828 * @iter: source iterator 829 * 830 * The pmem driver arranges for filesystem-dax to use this facility via 831 * dax_copy_from_iter() for ensuring that writes to persistent memory 832 * are flushed through the CPU cache. It is differentiated from 833 * _copy_from_iter_nocache() in that guarantees all data is flushed for 834 * all iterator types. The _copy_from_iter_nocache() only attempts to 835 * bypass the cache for the ITER_IOVEC case, and on some archs may use 836 * instructions that strand dirty-data in the cache. 837 */ 838 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) 839 { 840 char *to = addr; 841 if (unlikely(iov_iter_is_pipe(i))) { 842 WARN_ON(1); 843 return 0; 844 } 845 iterate_and_advance(i, bytes, v, 846 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len, 847 v.iov_base, v.iov_len), 848 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, 849 v.bv_offset, v.bv_len), 850 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, 851 v.iov_len) 852 ) 853 854 return bytes; 855 } 856 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache); 857 #endif 858 859 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) 860 { 861 char *to = addr; 862 if (unlikely(iov_iter_is_pipe(i))) { 863 WARN_ON(1); 864 return false; 865 } 866 if (unlikely(i->count < bytes)) 867 return false; 868 iterate_all_kinds(i, bytes, v, ({ 869 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 870 v.iov_base, v.iov_len)) 871 return false; 872 0;}), 873 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 874 v.bv_offset, v.bv_len), 875 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 876 ) 877 878 iov_iter_advance(i, bytes); 879 return true; 880 } 881 EXPORT_SYMBOL(_copy_from_iter_full_nocache); 882 883 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) 884 { 885 struct page *head; 886 size_t v = n + offset; 887 888 /* 889 * The general case needs to access the page order in order 890 * to compute the page size. 891 * However, we mostly deal with order-0 pages and thus can 892 * avoid a possible cache line miss for requests that fit all 893 * page orders. 894 */ 895 if (n <= v && v <= PAGE_SIZE) 896 return true; 897 898 head = compound_head(page); 899 v += (page - head) << PAGE_SHIFT; 900 901 if (likely(n <= v && v <= (page_size(head)))) 902 return true; 903 WARN_ON(1); 904 return false; 905 } 906 907 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 908 struct iov_iter *i) 909 { 910 if (unlikely(!page_copy_sane(page, offset, bytes))) 911 return 0; 912 if (i->type & (ITER_BVEC|ITER_KVEC)) { 913 void *kaddr = kmap_atomic(page); 914 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 915 kunmap_atomic(kaddr); 916 return wanted; 917 } else if (unlikely(iov_iter_is_discard(i))) 918 return bytes; 919 else if (likely(!iov_iter_is_pipe(i))) 920 return copy_page_to_iter_iovec(page, offset, bytes, i); 921 else 922 return copy_page_to_iter_pipe(page, offset, bytes, i); 923 } 924 EXPORT_SYMBOL(copy_page_to_iter); 925 926 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 927 struct iov_iter *i) 928 { 929 if (unlikely(!page_copy_sane(page, offset, bytes))) 930 return 0; 931 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 932 WARN_ON(1); 933 return 0; 934 } 935 if (i->type & (ITER_BVEC|ITER_KVEC)) { 936 void *kaddr = kmap_atomic(page); 937 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); 938 kunmap_atomic(kaddr); 939 return wanted; 940 } else 941 return copy_page_from_iter_iovec(page, offset, bytes, i); 942 } 943 EXPORT_SYMBOL(copy_page_from_iter); 944 945 static size_t pipe_zero(size_t bytes, struct iov_iter *i) 946 { 947 struct pipe_inode_info *pipe = i->pipe; 948 unsigned int p_mask = pipe->ring_size - 1; 949 unsigned int i_head; 950 size_t n, off; 951 952 if (!sanity(i)) 953 return 0; 954 955 bytes = n = push_pipe(i, bytes, &i_head, &off); 956 if (unlikely(!n)) 957 return 0; 958 959 do { 960 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 961 memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk); 962 i->head = i_head; 963 i->iov_offset = off + chunk; 964 n -= chunk; 965 off = 0; 966 i_head++; 967 } while (n); 968 i->count -= bytes; 969 return bytes; 970 } 971 972 size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 973 { 974 if (unlikely(iov_iter_is_pipe(i))) 975 return pipe_zero(bytes, i); 976 iterate_and_advance(i, bytes, v, 977 clear_user(v.iov_base, v.iov_len), 978 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 979 memset(v.iov_base, 0, v.iov_len) 980 ) 981 982 return bytes; 983 } 984 EXPORT_SYMBOL(iov_iter_zero); 985 986 size_t iov_iter_copy_from_user_atomic(struct page *page, 987 struct iov_iter *i, unsigned long offset, size_t bytes) 988 { 989 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 990 if (unlikely(!page_copy_sane(page, offset, bytes))) { 991 kunmap_atomic(kaddr); 992 return 0; 993 } 994 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 995 kunmap_atomic(kaddr); 996 WARN_ON(1); 997 return 0; 998 } 999 iterate_all_kinds(i, bytes, v, 1000 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 1001 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, 1002 v.bv_offset, v.bv_len), 1003 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 1004 ) 1005 kunmap_atomic(kaddr); 1006 return bytes; 1007 } 1008 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 1009 1010 static inline void pipe_truncate(struct iov_iter *i) 1011 { 1012 struct pipe_inode_info *pipe = i->pipe; 1013 unsigned int p_tail = pipe->tail; 1014 unsigned int p_head = pipe->head; 1015 unsigned int p_mask = pipe->ring_size - 1; 1016 1017 if (!pipe_empty(p_head, p_tail)) { 1018 struct pipe_buffer *buf; 1019 unsigned int i_head = i->head; 1020 size_t off = i->iov_offset; 1021 1022 if (off) { 1023 buf = &pipe->bufs[i_head & p_mask]; 1024 buf->len = off - buf->offset; 1025 i_head++; 1026 } 1027 while (p_head != i_head) { 1028 p_head--; 1029 pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); 1030 } 1031 1032 pipe->head = p_head; 1033 } 1034 } 1035 1036 static void pipe_advance(struct iov_iter *i, size_t size) 1037 { 1038 struct pipe_inode_info *pipe = i->pipe; 1039 if (unlikely(i->count < size)) 1040 size = i->count; 1041 if (size) { 1042 struct pipe_buffer *buf; 1043 unsigned int p_mask = pipe->ring_size - 1; 1044 unsigned int i_head = i->head; 1045 size_t off = i->iov_offset, left = size; 1046 1047 if (off) /* make it relative to the beginning of buffer */ 1048 left += off - pipe->bufs[i_head & p_mask].offset; 1049 while (1) { 1050 buf = &pipe->bufs[i_head & p_mask]; 1051 if (left <= buf->len) 1052 break; 1053 left -= buf->len; 1054 i_head++; 1055 } 1056 i->head = i_head; 1057 i->iov_offset = buf->offset + left; 1058 } 1059 i->count -= size; 1060 /* ... and discard everything past that point */ 1061 pipe_truncate(i); 1062 } 1063 1064 void iov_iter_advance(struct iov_iter *i, size_t size) 1065 { 1066 if (unlikely(iov_iter_is_pipe(i))) { 1067 pipe_advance(i, size); 1068 return; 1069 } 1070 if (unlikely(iov_iter_is_discard(i))) { 1071 i->count -= size; 1072 return; 1073 } 1074 iterate_and_advance(i, size, v, 0, 0, 0) 1075 } 1076 EXPORT_SYMBOL(iov_iter_advance); 1077 1078 void iov_iter_revert(struct iov_iter *i, size_t unroll) 1079 { 1080 if (!unroll) 1081 return; 1082 if (WARN_ON(unroll > MAX_RW_COUNT)) 1083 return; 1084 i->count += unroll; 1085 if (unlikely(iov_iter_is_pipe(i))) { 1086 struct pipe_inode_info *pipe = i->pipe; 1087 unsigned int p_mask = pipe->ring_size - 1; 1088 unsigned int i_head = i->head; 1089 size_t off = i->iov_offset; 1090 while (1) { 1091 struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; 1092 size_t n = off - b->offset; 1093 if (unroll < n) { 1094 off -= unroll; 1095 break; 1096 } 1097 unroll -= n; 1098 if (!unroll && i_head == i->start_head) { 1099 off = 0; 1100 break; 1101 } 1102 i_head--; 1103 b = &pipe->bufs[i_head & p_mask]; 1104 off = b->offset + b->len; 1105 } 1106 i->iov_offset = off; 1107 i->head = i_head; 1108 pipe_truncate(i); 1109 return; 1110 } 1111 if (unlikely(iov_iter_is_discard(i))) 1112 return; 1113 if (unroll <= i->iov_offset) { 1114 i->iov_offset -= unroll; 1115 return; 1116 } 1117 unroll -= i->iov_offset; 1118 if (iov_iter_is_bvec(i)) { 1119 const struct bio_vec *bvec = i->bvec; 1120 while (1) { 1121 size_t n = (--bvec)->bv_len; 1122 i->nr_segs++; 1123 if (unroll <= n) { 1124 i->bvec = bvec; 1125 i->iov_offset = n - unroll; 1126 return; 1127 } 1128 unroll -= n; 1129 } 1130 } else { /* same logics for iovec and kvec */ 1131 const struct iovec *iov = i->iov; 1132 while (1) { 1133 size_t n = (--iov)->iov_len; 1134 i->nr_segs++; 1135 if (unroll <= n) { 1136 i->iov = iov; 1137 i->iov_offset = n - unroll; 1138 return; 1139 } 1140 unroll -= n; 1141 } 1142 } 1143 } 1144 EXPORT_SYMBOL(iov_iter_revert); 1145 1146 /* 1147 * Return the count of just the current iov_iter segment. 1148 */ 1149 size_t iov_iter_single_seg_count(const struct iov_iter *i) 1150 { 1151 if (unlikely(iov_iter_is_pipe(i))) 1152 return i->count; // it is a silly place, anyway 1153 if (i->nr_segs == 1) 1154 return i->count; 1155 if (unlikely(iov_iter_is_discard(i))) 1156 return i->count; 1157 else if (iov_iter_is_bvec(i)) 1158 return min(i->count, i->bvec->bv_len - i->iov_offset); 1159 else 1160 return min(i->count, i->iov->iov_len - i->iov_offset); 1161 } 1162 EXPORT_SYMBOL(iov_iter_single_seg_count); 1163 1164 void iov_iter_kvec(struct iov_iter *i, unsigned int direction, 1165 const struct kvec *kvec, unsigned long nr_segs, 1166 size_t count) 1167 { 1168 WARN_ON(direction & ~(READ | WRITE)); 1169 i->type = ITER_KVEC | (direction & (READ | WRITE)); 1170 i->kvec = kvec; 1171 i->nr_segs = nr_segs; 1172 i->iov_offset = 0; 1173 i->count = count; 1174 } 1175 EXPORT_SYMBOL(iov_iter_kvec); 1176 1177 void iov_iter_bvec(struct iov_iter *i, unsigned int direction, 1178 const struct bio_vec *bvec, unsigned long nr_segs, 1179 size_t count) 1180 { 1181 WARN_ON(direction & ~(READ | WRITE)); 1182 i->type = ITER_BVEC | (direction & (READ | WRITE)); 1183 i->bvec = bvec; 1184 i->nr_segs = nr_segs; 1185 i->iov_offset = 0; 1186 i->count = count; 1187 } 1188 EXPORT_SYMBOL(iov_iter_bvec); 1189 1190 void iov_iter_pipe(struct iov_iter *i, unsigned int direction, 1191 struct pipe_inode_info *pipe, 1192 size_t count) 1193 { 1194 BUG_ON(direction != READ); 1195 WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size)); 1196 i->type = ITER_PIPE | READ; 1197 i->pipe = pipe; 1198 i->head = pipe->head; 1199 i->iov_offset = 0; 1200 i->count = count; 1201 i->start_head = i->head; 1202 } 1203 EXPORT_SYMBOL(iov_iter_pipe); 1204 1205 /** 1206 * iov_iter_discard - Initialise an I/O iterator that discards data 1207 * @i: The iterator to initialise. 1208 * @direction: The direction of the transfer. 1209 * @count: The size of the I/O buffer in bytes. 1210 * 1211 * Set up an I/O iterator that just discards everything that's written to it. 1212 * It's only available as a READ iterator. 1213 */ 1214 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) 1215 { 1216 BUG_ON(direction != READ); 1217 i->type = ITER_DISCARD | READ; 1218 i->count = count; 1219 i->iov_offset = 0; 1220 } 1221 EXPORT_SYMBOL(iov_iter_discard); 1222 1223 unsigned long iov_iter_alignment(const struct iov_iter *i) 1224 { 1225 unsigned int p_mask = i->pipe->ring_size - 1; 1226 unsigned long res = 0; 1227 size_t size = i->count; 1228 1229 if (unlikely(iov_iter_is_pipe(i))) { 1230 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) 1231 return size | i->iov_offset; 1232 return size; 1233 } 1234 iterate_all_kinds(i, size, v, 1235 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 1236 res |= v.bv_offset | v.bv_len, 1237 res |= (unsigned long)v.iov_base | v.iov_len 1238 ) 1239 return res; 1240 } 1241 EXPORT_SYMBOL(iov_iter_alignment); 1242 1243 unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 1244 { 1245 unsigned long res = 0; 1246 size_t size = i->count; 1247 1248 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1249 WARN_ON(1); 1250 return ~0U; 1251 } 1252 1253 iterate_all_kinds(i, size, v, 1254 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1255 (size != v.iov_len ? size : 0), 0), 1256 (res |= (!res ? 0 : (unsigned long)v.bv_offset) | 1257 (size != v.bv_len ? size : 0)), 1258 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1259 (size != v.iov_len ? size : 0)) 1260 ); 1261 return res; 1262 } 1263 EXPORT_SYMBOL(iov_iter_gap_alignment); 1264 1265 static inline ssize_t __pipe_get_pages(struct iov_iter *i, 1266 size_t maxsize, 1267 struct page **pages, 1268 int iter_head, 1269 size_t *start) 1270 { 1271 struct pipe_inode_info *pipe = i->pipe; 1272 unsigned int p_mask = pipe->ring_size - 1; 1273 ssize_t n = push_pipe(i, maxsize, &iter_head, start); 1274 if (!n) 1275 return -EFAULT; 1276 1277 maxsize = n; 1278 n += *start; 1279 while (n > 0) { 1280 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); 1281 iter_head++; 1282 n -= PAGE_SIZE; 1283 } 1284 1285 return maxsize; 1286 } 1287 1288 static ssize_t pipe_get_pages(struct iov_iter *i, 1289 struct page **pages, size_t maxsize, unsigned maxpages, 1290 size_t *start) 1291 { 1292 unsigned int iter_head, npages; 1293 size_t capacity; 1294 1295 if (!maxsize) 1296 return 0; 1297 1298 if (!sanity(i)) 1299 return -EFAULT; 1300 1301 data_start(i, &iter_head, start); 1302 /* Amount of free space: some of this one + all after this one */ 1303 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1304 capacity = min(npages, maxpages) * PAGE_SIZE - *start; 1305 1306 return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); 1307 } 1308 1309 ssize_t iov_iter_get_pages(struct iov_iter *i, 1310 struct page **pages, size_t maxsize, unsigned maxpages, 1311 size_t *start) 1312 { 1313 if (maxsize > i->count) 1314 maxsize = i->count; 1315 1316 if (unlikely(iov_iter_is_pipe(i))) 1317 return pipe_get_pages(i, pages, maxsize, maxpages, start); 1318 if (unlikely(iov_iter_is_discard(i))) 1319 return -EFAULT; 1320 1321 iterate_all_kinds(i, maxsize, v, ({ 1322 unsigned long addr = (unsigned long)v.iov_base; 1323 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1324 int n; 1325 int res; 1326 1327 if (len > maxpages * PAGE_SIZE) 1328 len = maxpages * PAGE_SIZE; 1329 addr &= ~(PAGE_SIZE - 1); 1330 n = DIV_ROUND_UP(len, PAGE_SIZE); 1331 res = get_user_pages_fast(addr, n, 1332 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, 1333 pages); 1334 if (unlikely(res < 0)) 1335 return res; 1336 return (res == n ? len : res * PAGE_SIZE) - *start; 1337 0;}),({ 1338 /* can't be more than PAGE_SIZE */ 1339 *start = v.bv_offset; 1340 get_page(*pages = v.bv_page); 1341 return v.bv_len; 1342 }),({ 1343 return -EFAULT; 1344 }) 1345 ) 1346 return 0; 1347 } 1348 EXPORT_SYMBOL(iov_iter_get_pages); 1349 1350 static struct page **get_pages_array(size_t n) 1351 { 1352 return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); 1353 } 1354 1355 static ssize_t pipe_get_pages_alloc(struct iov_iter *i, 1356 struct page ***pages, size_t maxsize, 1357 size_t *start) 1358 { 1359 struct page **p; 1360 unsigned int iter_head, npages; 1361 ssize_t n; 1362 1363 if (!maxsize) 1364 return 0; 1365 1366 if (!sanity(i)) 1367 return -EFAULT; 1368 1369 data_start(i, &iter_head, start); 1370 /* Amount of free space: some of this one + all after this one */ 1371 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1372 n = npages * PAGE_SIZE - *start; 1373 if (maxsize > n) 1374 maxsize = n; 1375 else 1376 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 1377 p = get_pages_array(npages); 1378 if (!p) 1379 return -ENOMEM; 1380 n = __pipe_get_pages(i, maxsize, p, iter_head, start); 1381 if (n > 0) 1382 *pages = p; 1383 else 1384 kvfree(p); 1385 return n; 1386 } 1387 1388 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 1389 struct page ***pages, size_t maxsize, 1390 size_t *start) 1391 { 1392 struct page **p; 1393 1394 if (maxsize > i->count) 1395 maxsize = i->count; 1396 1397 if (unlikely(iov_iter_is_pipe(i))) 1398 return pipe_get_pages_alloc(i, pages, maxsize, start); 1399 if (unlikely(iov_iter_is_discard(i))) 1400 return -EFAULT; 1401 1402 iterate_all_kinds(i, maxsize, v, ({ 1403 unsigned long addr = (unsigned long)v.iov_base; 1404 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1405 int n; 1406 int res; 1407 1408 addr &= ~(PAGE_SIZE - 1); 1409 n = DIV_ROUND_UP(len, PAGE_SIZE); 1410 p = get_pages_array(n); 1411 if (!p) 1412 return -ENOMEM; 1413 res = get_user_pages_fast(addr, n, 1414 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p); 1415 if (unlikely(res < 0)) { 1416 kvfree(p); 1417 return res; 1418 } 1419 *pages = p; 1420 return (res == n ? len : res * PAGE_SIZE) - *start; 1421 0;}),({ 1422 /* can't be more than PAGE_SIZE */ 1423 *start = v.bv_offset; 1424 *pages = p = get_pages_array(1); 1425 if (!p) 1426 return -ENOMEM; 1427 get_page(*p = v.bv_page); 1428 return v.bv_len; 1429 }),({ 1430 return -EFAULT; 1431 }) 1432 ) 1433 return 0; 1434 } 1435 EXPORT_SYMBOL(iov_iter_get_pages_alloc); 1436 1437 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, 1438 struct iov_iter *i) 1439 { 1440 char *to = addr; 1441 __wsum sum, next; 1442 size_t off = 0; 1443 sum = *csum; 1444 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1445 WARN_ON(1); 1446 return 0; 1447 } 1448 iterate_and_advance(i, bytes, v, ({ 1449 int err = 0; 1450 next = csum_and_copy_from_user(v.iov_base, 1451 (to += v.iov_len) - v.iov_len, 1452 v.iov_len, 0, &err); 1453 if (!err) { 1454 sum = csum_block_add(sum, next, off); 1455 off += v.iov_len; 1456 } 1457 err ? v.iov_len : 0; 1458 }), ({ 1459 char *p = kmap_atomic(v.bv_page); 1460 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1461 p + v.bv_offset, v.bv_len, 1462 sum, off); 1463 kunmap_atomic(p); 1464 off += v.bv_len; 1465 }),({ 1466 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, 1467 v.iov_base, v.iov_len, 1468 sum, off); 1469 off += v.iov_len; 1470 }) 1471 ) 1472 *csum = sum; 1473 return bytes; 1474 } 1475 EXPORT_SYMBOL(csum_and_copy_from_iter); 1476 1477 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, 1478 struct iov_iter *i) 1479 { 1480 char *to = addr; 1481 __wsum sum, next; 1482 size_t off = 0; 1483 sum = *csum; 1484 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1485 WARN_ON(1); 1486 return false; 1487 } 1488 if (unlikely(i->count < bytes)) 1489 return false; 1490 iterate_all_kinds(i, bytes, v, ({ 1491 int err = 0; 1492 next = csum_and_copy_from_user(v.iov_base, 1493 (to += v.iov_len) - v.iov_len, 1494 v.iov_len, 0, &err); 1495 if (err) 1496 return false; 1497 sum = csum_block_add(sum, next, off); 1498 off += v.iov_len; 1499 0; 1500 }), ({ 1501 char *p = kmap_atomic(v.bv_page); 1502 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1503 p + v.bv_offset, v.bv_len, 1504 sum, off); 1505 kunmap_atomic(p); 1506 off += v.bv_len; 1507 }),({ 1508 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, 1509 v.iov_base, v.iov_len, 1510 sum, off); 1511 off += v.iov_len; 1512 }) 1513 ) 1514 *csum = sum; 1515 iov_iter_advance(i, bytes); 1516 return true; 1517 } 1518 EXPORT_SYMBOL(csum_and_copy_from_iter_full); 1519 1520 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, 1521 struct iov_iter *i) 1522 { 1523 const char *from = addr; 1524 __wsum *csum = csump; 1525 __wsum sum, next; 1526 size_t off = 0; 1527 1528 if (unlikely(iov_iter_is_pipe(i))) 1529 return csum_and_copy_to_pipe_iter(addr, bytes, csum, i); 1530 1531 sum = *csum; 1532 if (unlikely(iov_iter_is_discard(i))) { 1533 WARN_ON(1); /* for now */ 1534 return 0; 1535 } 1536 iterate_and_advance(i, bytes, v, ({ 1537 int err = 0; 1538 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1539 v.iov_base, 1540 v.iov_len, 0, &err); 1541 if (!err) { 1542 sum = csum_block_add(sum, next, off); 1543 off += v.iov_len; 1544 } 1545 err ? v.iov_len : 0; 1546 }), ({ 1547 char *p = kmap_atomic(v.bv_page); 1548 sum = csum_and_memcpy(p + v.bv_offset, 1549 (from += v.bv_len) - v.bv_len, 1550 v.bv_len, sum, off); 1551 kunmap_atomic(p); 1552 off += v.bv_len; 1553 }),({ 1554 sum = csum_and_memcpy(v.iov_base, 1555 (from += v.iov_len) - v.iov_len, 1556 v.iov_len, sum, off); 1557 off += v.iov_len; 1558 }) 1559 ) 1560 *csum = sum; 1561 return bytes; 1562 } 1563 EXPORT_SYMBOL(csum_and_copy_to_iter); 1564 1565 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, 1566 struct iov_iter *i) 1567 { 1568 #ifdef CONFIG_CRYPTO 1569 struct ahash_request *hash = hashp; 1570 struct scatterlist sg; 1571 size_t copied; 1572 1573 copied = copy_to_iter(addr, bytes, i); 1574 sg_init_one(&sg, addr, copied); 1575 ahash_request_set_crypt(hash, &sg, NULL, copied); 1576 crypto_ahash_update(hash); 1577 return copied; 1578 #else 1579 return 0; 1580 #endif 1581 } 1582 EXPORT_SYMBOL(hash_and_copy_to_iter); 1583 1584 int iov_iter_npages(const struct iov_iter *i, int maxpages) 1585 { 1586 size_t size = i->count; 1587 int npages = 0; 1588 1589 if (!size) 1590 return 0; 1591 if (unlikely(iov_iter_is_discard(i))) 1592 return 0; 1593 1594 if (unlikely(iov_iter_is_pipe(i))) { 1595 struct pipe_inode_info *pipe = i->pipe; 1596 unsigned int iter_head; 1597 size_t off; 1598 1599 if (!sanity(i)) 1600 return 0; 1601 1602 data_start(i, &iter_head, &off); 1603 /* some of this one + all after this one */ 1604 npages = pipe_space_for_user(iter_head, pipe->tail, pipe); 1605 if (npages >= maxpages) 1606 return maxpages; 1607 } else iterate_all_kinds(i, size, v, ({ 1608 unsigned long p = (unsigned long)v.iov_base; 1609 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1610 - p / PAGE_SIZE; 1611 if (npages >= maxpages) 1612 return maxpages; 1613 0;}),({ 1614 npages++; 1615 if (npages >= maxpages) 1616 return maxpages; 1617 }),({ 1618 unsigned long p = (unsigned long)v.iov_base; 1619 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1620 - p / PAGE_SIZE; 1621 if (npages >= maxpages) 1622 return maxpages; 1623 }) 1624 ) 1625 return npages; 1626 } 1627 EXPORT_SYMBOL(iov_iter_npages); 1628 1629 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1630 { 1631 *new = *old; 1632 if (unlikely(iov_iter_is_pipe(new))) { 1633 WARN_ON(1); 1634 return NULL; 1635 } 1636 if (unlikely(iov_iter_is_discard(new))) 1637 return NULL; 1638 if (iov_iter_is_bvec(new)) 1639 return new->bvec = kmemdup(new->bvec, 1640 new->nr_segs * sizeof(struct bio_vec), 1641 flags); 1642 else 1643 /* iovec and kvec have identical layout */ 1644 return new->iov = kmemdup(new->iov, 1645 new->nr_segs * sizeof(struct iovec), 1646 flags); 1647 } 1648 EXPORT_SYMBOL(dup_iter); 1649 1650 /** 1651 * import_iovec() - Copy an array of &struct iovec from userspace 1652 * into the kernel, check that it is valid, and initialize a new 1653 * &struct iov_iter iterator to access it. 1654 * 1655 * @type: One of %READ or %WRITE. 1656 * @uvector: Pointer to the userspace array. 1657 * @nr_segs: Number of elements in userspace array. 1658 * @fast_segs: Number of elements in @iov. 1659 * @iov: (input and output parameter) Pointer to pointer to (usually small 1660 * on-stack) kernel array. 1661 * @i: Pointer to iterator that will be initialized on success. 1662 * 1663 * If the array pointed to by *@iov is large enough to hold all @nr_segs, 1664 * then this function places %NULL in *@iov on return. Otherwise, a new 1665 * array will be allocated and the result placed in *@iov. This means that 1666 * the caller may call kfree() on *@iov regardless of whether the small 1667 * on-stack array was used or not (and regardless of whether this function 1668 * returns an error or not). 1669 * 1670 * Return: Negative error code on error, bytes imported on success 1671 */ 1672 ssize_t import_iovec(int type, const struct iovec __user * uvector, 1673 unsigned nr_segs, unsigned fast_segs, 1674 struct iovec **iov, struct iov_iter *i) 1675 { 1676 ssize_t n; 1677 struct iovec *p; 1678 n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, 1679 *iov, &p); 1680 if (n < 0) { 1681 if (p != *iov) 1682 kfree(p); 1683 *iov = NULL; 1684 return n; 1685 } 1686 iov_iter_init(i, type, p, nr_segs, n); 1687 *iov = p == *iov ? NULL : p; 1688 return n; 1689 } 1690 EXPORT_SYMBOL(import_iovec); 1691 1692 #ifdef CONFIG_COMPAT 1693 #include <linux/compat.h> 1694 1695 ssize_t compat_import_iovec(int type, 1696 const struct compat_iovec __user * uvector, 1697 unsigned nr_segs, unsigned fast_segs, 1698 struct iovec **iov, struct iov_iter *i) 1699 { 1700 ssize_t n; 1701 struct iovec *p; 1702 n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, 1703 *iov, &p); 1704 if (n < 0) { 1705 if (p != *iov) 1706 kfree(p); 1707 *iov = NULL; 1708 return n; 1709 } 1710 iov_iter_init(i, type, p, nr_segs, n); 1711 *iov = p == *iov ? NULL : p; 1712 return n; 1713 } 1714 EXPORT_SYMBOL(compat_import_iovec); 1715 #endif 1716 1717 int import_single_range(int rw, void __user *buf, size_t len, 1718 struct iovec *iov, struct iov_iter *i) 1719 { 1720 if (len > MAX_RW_COUNT) 1721 len = MAX_RW_COUNT; 1722 if (unlikely(!access_ok(buf, len))) 1723 return -EFAULT; 1724 1725 iov->iov_base = buf; 1726 iov->iov_len = len; 1727 iov_iter_init(i, rw, iov, 1, len); 1728 return 0; 1729 } 1730 EXPORT_SYMBOL(import_single_range); 1731 1732 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, 1733 int (*f)(struct kvec *vec, void *context), 1734 void *context) 1735 { 1736 struct kvec w; 1737 int err = -EINVAL; 1738 if (!bytes) 1739 return 0; 1740 1741 iterate_all_kinds(i, bytes, v, -EINVAL, ({ 1742 w.iov_base = kmap(v.bv_page) + v.bv_offset; 1743 w.iov_len = v.bv_len; 1744 err = f(&w, context); 1745 kunmap(v.bv_page); 1746 err;}), ({ 1747 w = v; 1748 err = f(&w, context);}) 1749 ) 1750 return err; 1751 } 1752 EXPORT_SYMBOL(iov_iter_for_each_range); 1753