1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <crypto/hash.h> 3 #include <linux/export.h> 4 #include <linux/bvec.h> 5 #include <linux/fault-inject-usercopy.h> 6 #include <linux/uio.h> 7 #include <linux/pagemap.h> 8 #include <linux/slab.h> 9 #include <linux/vmalloc.h> 10 #include <linux/splice.h> 11 #include <linux/compat.h> 12 #include <net/checksum.h> 13 #include <linux/scatterlist.h> 14 #include <linux/instrumented.h> 15 16 #define PIPE_PARANOIA /* for now */ 17 18 #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 19 size_t left; \ 20 size_t wanted = n; \ 21 __p = i->iov; \ 22 __v.iov_len = min(n, __p->iov_len - skip); \ 23 if (likely(__v.iov_len)) { \ 24 __v.iov_base = __p->iov_base + skip; \ 25 left = (STEP); \ 26 __v.iov_len -= left; \ 27 skip += __v.iov_len; \ 28 n -= __v.iov_len; \ 29 } else { \ 30 left = 0; \ 31 } \ 32 while (unlikely(!left && n)) { \ 33 __p++; \ 34 __v.iov_len = min(n, __p->iov_len); \ 35 if (unlikely(!__v.iov_len)) \ 36 continue; \ 37 __v.iov_base = __p->iov_base; \ 38 left = (STEP); \ 39 __v.iov_len -= left; \ 40 skip = __v.iov_len; \ 41 n -= __v.iov_len; \ 42 } \ 43 n = wanted - n; \ 44 } 45 46 #define iterate_kvec(i, n, __v, __p, skip, STEP) { \ 47 size_t wanted = n; \ 48 __p = i->kvec; \ 49 __v.iov_len = min(n, __p->iov_len - skip); \ 50 if (likely(__v.iov_len)) { \ 51 __v.iov_base = __p->iov_base + skip; \ 52 (void)(STEP); \ 53 skip += __v.iov_len; \ 54 n -= __v.iov_len; \ 55 } \ 56 while (unlikely(n)) { \ 57 __p++; \ 58 __v.iov_len = min(n, __p->iov_len); \ 59 if (unlikely(!__v.iov_len)) \ 60 continue; \ 61 __v.iov_base = __p->iov_base; \ 62 (void)(STEP); \ 63 skip = __v.iov_len; \ 64 n -= __v.iov_len; \ 65 } \ 66 n = wanted; \ 67 } 68 69 #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ 70 struct bvec_iter __start; \ 71 __start.bi_size = n; \ 72 __start.bi_bvec_done = skip; \ 73 __start.bi_idx = 0; \ 74 for_each_bvec(__v, i->bvec, __bi, __start) { \ 75 (void)(STEP); \ 76 } \ 77 } 78 79 #define iterate_all_kinds(i, n, v, I, B, K) { \ 80 if (likely(n)) { \ 81 size_t skip = i->iov_offset; \ 82 if (unlikely(i->type & ITER_BVEC)) { \ 83 struct bio_vec v; \ 84 struct bvec_iter __bi; \ 85 iterate_bvec(i, n, v, __bi, skip, (B)) \ 86 } else if (unlikely(i->type & ITER_KVEC)) { \ 87 const struct kvec *kvec; \ 88 struct kvec v; \ 89 iterate_kvec(i, n, v, kvec, skip, (K)) \ 90 } else if (unlikely(i->type & ITER_DISCARD)) { \ 91 } else { \ 92 const struct iovec *iov; \ 93 struct iovec v; \ 94 iterate_iovec(i, n, v, iov, skip, (I)) \ 95 } \ 96 } \ 97 } 98 99 #define iterate_and_advance(i, n, v, I, B, K) { \ 100 if (unlikely(i->count < n)) \ 101 n = i->count; \ 102 if (i->count) { \ 103 size_t skip = i->iov_offset; \ 104 if (unlikely(i->type & ITER_BVEC)) { \ 105 const struct bio_vec *bvec = i->bvec; \ 106 struct bio_vec v; \ 107 struct bvec_iter __bi; \ 108 iterate_bvec(i, n, v, __bi, skip, (B)) \ 109 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \ 110 i->nr_segs -= i->bvec - bvec; \ 111 skip = __bi.bi_bvec_done; \ 112 } else if (unlikely(i->type & ITER_KVEC)) { \ 113 const struct kvec *kvec; \ 114 struct kvec v; \ 115 iterate_kvec(i, n, v, kvec, skip, (K)) \ 116 if (skip == kvec->iov_len) { \ 117 kvec++; \ 118 skip = 0; \ 119 } \ 120 i->nr_segs -= kvec - i->kvec; \ 121 i->kvec = kvec; \ 122 } else if (unlikely(i->type & ITER_DISCARD)) { \ 123 skip += n; \ 124 } else { \ 125 const struct iovec *iov; \ 126 struct iovec v; \ 127 iterate_iovec(i, n, v, iov, skip, (I)) \ 128 if (skip == iov->iov_len) { \ 129 iov++; \ 130 skip = 0; \ 131 } \ 132 i->nr_segs -= iov - i->iov; \ 133 i->iov = iov; \ 134 } \ 135 i->count -= n; \ 136 i->iov_offset = skip; \ 137 } \ 138 } 139 140 static int copyout(void __user *to, const void *from, size_t n) 141 { 142 if (should_fail_usercopy()) 143 return n; 144 if (access_ok(to, n)) { 145 instrument_copy_to_user(to, from, n); 146 n = raw_copy_to_user(to, from, n); 147 } 148 return n; 149 } 150 151 static int copyin(void *to, const void __user *from, size_t n) 152 { 153 if (should_fail_usercopy()) 154 return n; 155 if (access_ok(from, n)) { 156 instrument_copy_from_user(to, from, n); 157 n = raw_copy_from_user(to, from, n); 158 } 159 return n; 160 } 161 162 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, 163 struct iov_iter *i) 164 { 165 size_t skip, copy, left, wanted; 166 const struct iovec *iov; 167 char __user *buf; 168 void *kaddr, *from; 169 170 if (unlikely(bytes > i->count)) 171 bytes = i->count; 172 173 if (unlikely(!bytes)) 174 return 0; 175 176 might_fault(); 177 wanted = bytes; 178 iov = i->iov; 179 skip = i->iov_offset; 180 buf = iov->iov_base + skip; 181 copy = min(bytes, iov->iov_len - skip); 182 183 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) { 184 kaddr = kmap_atomic(page); 185 from = kaddr + offset; 186 187 /* first chunk, usually the only one */ 188 left = copyout(buf, from, copy); 189 copy -= left; 190 skip += copy; 191 from += copy; 192 bytes -= copy; 193 194 while (unlikely(!left && bytes)) { 195 iov++; 196 buf = iov->iov_base; 197 copy = min(bytes, iov->iov_len); 198 left = copyout(buf, from, copy); 199 copy -= left; 200 skip = copy; 201 from += copy; 202 bytes -= copy; 203 } 204 if (likely(!bytes)) { 205 kunmap_atomic(kaddr); 206 goto done; 207 } 208 offset = from - kaddr; 209 buf += copy; 210 kunmap_atomic(kaddr); 211 copy = min(bytes, iov->iov_len - skip); 212 } 213 /* Too bad - revert to non-atomic kmap */ 214 215 kaddr = kmap(page); 216 from = kaddr + offset; 217 left = copyout(buf, from, copy); 218 copy -= left; 219 skip += copy; 220 from += copy; 221 bytes -= copy; 222 while (unlikely(!left && bytes)) { 223 iov++; 224 buf = iov->iov_base; 225 copy = min(bytes, iov->iov_len); 226 left = copyout(buf, from, copy); 227 copy -= left; 228 skip = copy; 229 from += copy; 230 bytes -= copy; 231 } 232 kunmap(page); 233 234 done: 235 if (skip == iov->iov_len) { 236 iov++; 237 skip = 0; 238 } 239 i->count -= wanted - bytes; 240 i->nr_segs -= iov - i->iov; 241 i->iov = iov; 242 i->iov_offset = skip; 243 return wanted - bytes; 244 } 245 246 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, 247 struct iov_iter *i) 248 { 249 size_t skip, copy, left, wanted; 250 const struct iovec *iov; 251 char __user *buf; 252 void *kaddr, *to; 253 254 if (unlikely(bytes > i->count)) 255 bytes = i->count; 256 257 if (unlikely(!bytes)) 258 return 0; 259 260 might_fault(); 261 wanted = bytes; 262 iov = i->iov; 263 skip = i->iov_offset; 264 buf = iov->iov_base + skip; 265 copy = min(bytes, iov->iov_len - skip); 266 267 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) { 268 kaddr = kmap_atomic(page); 269 to = kaddr + offset; 270 271 /* first chunk, usually the only one */ 272 left = copyin(to, buf, copy); 273 copy -= left; 274 skip += copy; 275 to += copy; 276 bytes -= copy; 277 278 while (unlikely(!left && bytes)) { 279 iov++; 280 buf = iov->iov_base; 281 copy = min(bytes, iov->iov_len); 282 left = copyin(to, buf, copy); 283 copy -= left; 284 skip = copy; 285 to += copy; 286 bytes -= copy; 287 } 288 if (likely(!bytes)) { 289 kunmap_atomic(kaddr); 290 goto done; 291 } 292 offset = to - kaddr; 293 buf += copy; 294 kunmap_atomic(kaddr); 295 copy = min(bytes, iov->iov_len - skip); 296 } 297 /* Too bad - revert to non-atomic kmap */ 298 299 kaddr = kmap(page); 300 to = kaddr + offset; 301 left = copyin(to, buf, copy); 302 copy -= left; 303 skip += copy; 304 to += copy; 305 bytes -= copy; 306 while (unlikely(!left && bytes)) { 307 iov++; 308 buf = iov->iov_base; 309 copy = min(bytes, iov->iov_len); 310 left = copyin(to, buf, copy); 311 copy -= left; 312 skip = copy; 313 to += copy; 314 bytes -= copy; 315 } 316 kunmap(page); 317 318 done: 319 if (skip == iov->iov_len) { 320 iov++; 321 skip = 0; 322 } 323 i->count -= wanted - bytes; 324 i->nr_segs -= iov - i->iov; 325 i->iov = iov; 326 i->iov_offset = skip; 327 return wanted - bytes; 328 } 329 330 #ifdef PIPE_PARANOIA 331 static bool sanity(const struct iov_iter *i) 332 { 333 struct pipe_inode_info *pipe = i->pipe; 334 unsigned int p_head = pipe->head; 335 unsigned int p_tail = pipe->tail; 336 unsigned int p_mask = pipe->ring_size - 1; 337 unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); 338 unsigned int i_head = i->head; 339 unsigned int idx; 340 341 if (i->iov_offset) { 342 struct pipe_buffer *p; 343 if (unlikely(p_occupancy == 0)) 344 goto Bad; // pipe must be non-empty 345 if (unlikely(i_head != p_head - 1)) 346 goto Bad; // must be at the last buffer... 347 348 p = &pipe->bufs[i_head & p_mask]; 349 if (unlikely(p->offset + p->len != i->iov_offset)) 350 goto Bad; // ... at the end of segment 351 } else { 352 if (i_head != p_head) 353 goto Bad; // must be right after the last buffer 354 } 355 return true; 356 Bad: 357 printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); 358 printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", 359 p_head, p_tail, pipe->ring_size); 360 for (idx = 0; idx < pipe->ring_size; idx++) 361 printk(KERN_ERR "[%p %p %d %d]\n", 362 pipe->bufs[idx].ops, 363 pipe->bufs[idx].page, 364 pipe->bufs[idx].offset, 365 pipe->bufs[idx].len); 366 WARN_ON(1); 367 return false; 368 } 369 #else 370 #define sanity(i) true 371 #endif 372 373 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, 374 struct iov_iter *i) 375 { 376 struct pipe_inode_info *pipe = i->pipe; 377 struct pipe_buffer *buf; 378 unsigned int p_tail = pipe->tail; 379 unsigned int p_mask = pipe->ring_size - 1; 380 unsigned int i_head = i->head; 381 size_t off; 382 383 if (unlikely(bytes > i->count)) 384 bytes = i->count; 385 386 if (unlikely(!bytes)) 387 return 0; 388 389 if (!sanity(i)) 390 return 0; 391 392 off = i->iov_offset; 393 buf = &pipe->bufs[i_head & p_mask]; 394 if (off) { 395 if (offset == off && buf->page == page) { 396 /* merge with the last one */ 397 buf->len += bytes; 398 i->iov_offset += bytes; 399 goto out; 400 } 401 i_head++; 402 buf = &pipe->bufs[i_head & p_mask]; 403 } 404 if (pipe_full(i_head, p_tail, pipe->max_usage)) 405 return 0; 406 407 buf->ops = &page_cache_pipe_buf_ops; 408 get_page(page); 409 buf->page = page; 410 buf->offset = offset; 411 buf->len = bytes; 412 413 pipe->head = i_head + 1; 414 i->iov_offset = offset + bytes; 415 i->head = i_head; 416 out: 417 i->count -= bytes; 418 return bytes; 419 } 420 421 /* 422 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 423 * bytes. For each iovec, fault in each page that constitutes the iovec. 424 * 425 * Return 0 on success, or non-zero if the memory could not be accessed (i.e. 426 * because it is an invalid address). 427 */ 428 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 429 { 430 size_t skip = i->iov_offset; 431 const struct iovec *iov; 432 int err; 433 struct iovec v; 434 435 if (!(i->type & (ITER_BVEC|ITER_KVEC))) { 436 iterate_iovec(i, bytes, v, iov, skip, ({ 437 err = fault_in_pages_readable(v.iov_base, v.iov_len); 438 if (unlikely(err)) 439 return err; 440 0;})) 441 } 442 return 0; 443 } 444 EXPORT_SYMBOL(iov_iter_fault_in_readable); 445 446 void iov_iter_init(struct iov_iter *i, unsigned int direction, 447 const struct iovec *iov, unsigned long nr_segs, 448 size_t count) 449 { 450 WARN_ON(direction & ~(READ | WRITE)); 451 direction &= READ | WRITE; 452 453 /* It will get better. Eventually... */ 454 if (uaccess_kernel()) { 455 i->type = ITER_KVEC | direction; 456 i->kvec = (struct kvec *)iov; 457 } else { 458 i->type = ITER_IOVEC | direction; 459 i->iov = iov; 460 } 461 i->nr_segs = nr_segs; 462 i->iov_offset = 0; 463 i->count = count; 464 } 465 EXPORT_SYMBOL(iov_iter_init); 466 467 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) 468 { 469 char *from = kmap_atomic(page); 470 memcpy(to, from + offset, len); 471 kunmap_atomic(from); 472 } 473 474 static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) 475 { 476 char *to = kmap_atomic(page); 477 memcpy(to + offset, from, len); 478 kunmap_atomic(to); 479 } 480 481 static void memzero_page(struct page *page, size_t offset, size_t len) 482 { 483 char *addr = kmap_atomic(page); 484 memset(addr + offset, 0, len); 485 kunmap_atomic(addr); 486 } 487 488 static inline bool allocated(struct pipe_buffer *buf) 489 { 490 return buf->ops == &default_pipe_buf_ops; 491 } 492 493 static inline void data_start(const struct iov_iter *i, 494 unsigned int *iter_headp, size_t *offp) 495 { 496 unsigned int p_mask = i->pipe->ring_size - 1; 497 unsigned int iter_head = i->head; 498 size_t off = i->iov_offset; 499 500 if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || 501 off == PAGE_SIZE)) { 502 iter_head++; 503 off = 0; 504 } 505 *iter_headp = iter_head; 506 *offp = off; 507 } 508 509 static size_t push_pipe(struct iov_iter *i, size_t size, 510 int *iter_headp, size_t *offp) 511 { 512 struct pipe_inode_info *pipe = i->pipe; 513 unsigned int p_tail = pipe->tail; 514 unsigned int p_mask = pipe->ring_size - 1; 515 unsigned int iter_head; 516 size_t off; 517 ssize_t left; 518 519 if (unlikely(size > i->count)) 520 size = i->count; 521 if (unlikely(!size)) 522 return 0; 523 524 left = size; 525 data_start(i, &iter_head, &off); 526 *iter_headp = iter_head; 527 *offp = off; 528 if (off) { 529 left -= PAGE_SIZE - off; 530 if (left <= 0) { 531 pipe->bufs[iter_head & p_mask].len += size; 532 return size; 533 } 534 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; 535 iter_head++; 536 } 537 while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { 538 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; 539 struct page *page = alloc_page(GFP_USER); 540 if (!page) 541 break; 542 543 buf->ops = &default_pipe_buf_ops; 544 buf->page = page; 545 buf->offset = 0; 546 buf->len = min_t(ssize_t, left, PAGE_SIZE); 547 left -= buf->len; 548 iter_head++; 549 pipe->head = iter_head; 550 551 if (left == 0) 552 return size; 553 } 554 return size - left; 555 } 556 557 static size_t copy_pipe_to_iter(const void *addr, size_t bytes, 558 struct iov_iter *i) 559 { 560 struct pipe_inode_info *pipe = i->pipe; 561 unsigned int p_mask = pipe->ring_size - 1; 562 unsigned int i_head; 563 size_t n, off; 564 565 if (!sanity(i)) 566 return 0; 567 568 bytes = n = push_pipe(i, bytes, &i_head, &off); 569 if (unlikely(!n)) 570 return 0; 571 do { 572 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 573 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); 574 i->head = i_head; 575 i->iov_offset = off + chunk; 576 n -= chunk; 577 addr += chunk; 578 off = 0; 579 i_head++; 580 } while (n); 581 i->count -= bytes; 582 return bytes; 583 } 584 585 static __wsum csum_and_memcpy(void *to, const void *from, size_t len, 586 __wsum sum, size_t off) 587 { 588 __wsum next = csum_partial_copy_nocheck(from, to, len); 589 return csum_block_add(sum, next, off); 590 } 591 592 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, 593 struct csum_state *csstate, 594 struct iov_iter *i) 595 { 596 struct pipe_inode_info *pipe = i->pipe; 597 unsigned int p_mask = pipe->ring_size - 1; 598 __wsum sum = csstate->csum; 599 size_t off = csstate->off; 600 unsigned int i_head; 601 size_t n, r; 602 603 if (!sanity(i)) 604 return 0; 605 606 bytes = n = push_pipe(i, bytes, &i_head, &r); 607 if (unlikely(!n)) 608 return 0; 609 do { 610 size_t chunk = min_t(size_t, n, PAGE_SIZE - r); 611 char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page); 612 sum = csum_and_memcpy(p + r, addr, chunk, sum, off); 613 kunmap_atomic(p); 614 i->head = i_head; 615 i->iov_offset = r + chunk; 616 n -= chunk; 617 off += chunk; 618 addr += chunk; 619 r = 0; 620 i_head++; 621 } while (n); 622 i->count -= bytes; 623 csstate->csum = sum; 624 csstate->off = off; 625 return bytes; 626 } 627 628 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 629 { 630 const char *from = addr; 631 if (unlikely(iov_iter_is_pipe(i))) 632 return copy_pipe_to_iter(addr, bytes, i); 633 if (iter_is_iovec(i)) 634 might_fault(); 635 iterate_and_advance(i, bytes, v, 636 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), 637 memcpy_to_page(v.bv_page, v.bv_offset, 638 (from += v.bv_len) - v.bv_len, v.bv_len), 639 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) 640 ) 641 642 return bytes; 643 } 644 EXPORT_SYMBOL(_copy_to_iter); 645 646 #ifdef CONFIG_ARCH_HAS_COPY_MC 647 static int copyout_mc(void __user *to, const void *from, size_t n) 648 { 649 if (access_ok(to, n)) { 650 instrument_copy_to_user(to, from, n); 651 n = copy_mc_to_user((__force void *) to, from, n); 652 } 653 return n; 654 } 655 656 static unsigned long copy_mc_to_page(struct page *page, size_t offset, 657 const char *from, size_t len) 658 { 659 unsigned long ret; 660 char *to; 661 662 to = kmap_atomic(page); 663 ret = copy_mc_to_kernel(to + offset, from, len); 664 kunmap_atomic(to); 665 666 return ret; 667 } 668 669 static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, 670 struct iov_iter *i) 671 { 672 struct pipe_inode_info *pipe = i->pipe; 673 unsigned int p_mask = pipe->ring_size - 1; 674 unsigned int i_head; 675 size_t n, off, xfer = 0; 676 677 if (!sanity(i)) 678 return 0; 679 680 bytes = n = push_pipe(i, bytes, &i_head, &off); 681 if (unlikely(!n)) 682 return 0; 683 do { 684 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 685 unsigned long rem; 686 687 rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page, 688 off, addr, chunk); 689 i->head = i_head; 690 i->iov_offset = off + chunk - rem; 691 xfer += chunk - rem; 692 if (rem) 693 break; 694 n -= chunk; 695 addr += chunk; 696 off = 0; 697 i_head++; 698 } while (n); 699 i->count -= xfer; 700 return xfer; 701 } 702 703 /** 704 * _copy_mc_to_iter - copy to iter with source memory error exception handling 705 * @addr: source kernel address 706 * @bytes: total transfer length 707 * @iter: destination iterator 708 * 709 * The pmem driver deploys this for the dax operation 710 * (dax_copy_to_iter()) for dax reads (bypass page-cache and the 711 * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes 712 * successfully copied. 713 * 714 * The main differences between this and typical _copy_to_iter(). 715 * 716 * * Typical tail/residue handling after a fault retries the copy 717 * byte-by-byte until the fault happens again. Re-triggering machine 718 * checks is potentially fatal so the implementation uses source 719 * alignment and poison alignment assumptions to avoid re-triggering 720 * hardware exceptions. 721 * 722 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. 723 * Compare to copy_to_iter() where only ITER_IOVEC attempts might return 724 * a short copy. 725 */ 726 size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 727 { 728 const char *from = addr; 729 unsigned long rem, curr_addr, s_addr = (unsigned long) addr; 730 731 if (unlikely(iov_iter_is_pipe(i))) 732 return copy_mc_pipe_to_iter(addr, bytes, i); 733 if (iter_is_iovec(i)) 734 might_fault(); 735 iterate_and_advance(i, bytes, v, 736 copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len, 737 v.iov_len), 738 ({ 739 rem = copy_mc_to_page(v.bv_page, v.bv_offset, 740 (from += v.bv_len) - v.bv_len, v.bv_len); 741 if (rem) { 742 curr_addr = (unsigned long) from; 743 bytes = curr_addr - s_addr - rem; 744 return bytes; 745 } 746 }), 747 ({ 748 rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len) 749 - v.iov_len, v.iov_len); 750 if (rem) { 751 curr_addr = (unsigned long) from; 752 bytes = curr_addr - s_addr - rem; 753 return bytes; 754 } 755 }) 756 ) 757 758 return bytes; 759 } 760 EXPORT_SYMBOL_GPL(_copy_mc_to_iter); 761 #endif /* CONFIG_ARCH_HAS_COPY_MC */ 762 763 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 764 { 765 char *to = addr; 766 if (unlikely(iov_iter_is_pipe(i))) { 767 WARN_ON(1); 768 return 0; 769 } 770 if (iter_is_iovec(i)) 771 might_fault(); 772 iterate_and_advance(i, bytes, v, 773 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 774 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 775 v.bv_offset, v.bv_len), 776 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 777 ) 778 779 return bytes; 780 } 781 EXPORT_SYMBOL(_copy_from_iter); 782 783 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) 784 { 785 char *to = addr; 786 if (unlikely(iov_iter_is_pipe(i))) { 787 WARN_ON(1); 788 return false; 789 } 790 if (unlikely(i->count < bytes)) 791 return false; 792 793 if (iter_is_iovec(i)) 794 might_fault(); 795 iterate_all_kinds(i, bytes, v, ({ 796 if (copyin((to += v.iov_len) - v.iov_len, 797 v.iov_base, v.iov_len)) 798 return false; 799 0;}), 800 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 801 v.bv_offset, v.bv_len), 802 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 803 ) 804 805 iov_iter_advance(i, bytes); 806 return true; 807 } 808 EXPORT_SYMBOL(_copy_from_iter_full); 809 810 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 811 { 812 char *to = addr; 813 if (unlikely(iov_iter_is_pipe(i))) { 814 WARN_ON(1); 815 return 0; 816 } 817 iterate_and_advance(i, bytes, v, 818 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 819 v.iov_base, v.iov_len), 820 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 821 v.bv_offset, v.bv_len), 822 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 823 ) 824 825 return bytes; 826 } 827 EXPORT_SYMBOL(_copy_from_iter_nocache); 828 829 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 830 /** 831 * _copy_from_iter_flushcache - write destination through cpu cache 832 * @addr: destination kernel address 833 * @bytes: total transfer length 834 * @iter: source iterator 835 * 836 * The pmem driver arranges for filesystem-dax to use this facility via 837 * dax_copy_from_iter() for ensuring that writes to persistent memory 838 * are flushed through the CPU cache. It is differentiated from 839 * _copy_from_iter_nocache() in that guarantees all data is flushed for 840 * all iterator types. The _copy_from_iter_nocache() only attempts to 841 * bypass the cache for the ITER_IOVEC case, and on some archs may use 842 * instructions that strand dirty-data in the cache. 843 */ 844 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) 845 { 846 char *to = addr; 847 if (unlikely(iov_iter_is_pipe(i))) { 848 WARN_ON(1); 849 return 0; 850 } 851 iterate_and_advance(i, bytes, v, 852 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len, 853 v.iov_base, v.iov_len), 854 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, 855 v.bv_offset, v.bv_len), 856 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, 857 v.iov_len) 858 ) 859 860 return bytes; 861 } 862 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache); 863 #endif 864 865 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) 866 { 867 char *to = addr; 868 if (unlikely(iov_iter_is_pipe(i))) { 869 WARN_ON(1); 870 return false; 871 } 872 if (unlikely(i->count < bytes)) 873 return false; 874 iterate_all_kinds(i, bytes, v, ({ 875 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 876 v.iov_base, v.iov_len)) 877 return false; 878 0;}), 879 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 880 v.bv_offset, v.bv_len), 881 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 882 ) 883 884 iov_iter_advance(i, bytes); 885 return true; 886 } 887 EXPORT_SYMBOL(_copy_from_iter_full_nocache); 888 889 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) 890 { 891 struct page *head; 892 size_t v = n + offset; 893 894 /* 895 * The general case needs to access the page order in order 896 * to compute the page size. 897 * However, we mostly deal with order-0 pages and thus can 898 * avoid a possible cache line miss for requests that fit all 899 * page orders. 900 */ 901 if (n <= v && v <= PAGE_SIZE) 902 return true; 903 904 head = compound_head(page); 905 v += (page - head) << PAGE_SHIFT; 906 907 if (likely(n <= v && v <= (page_size(head)))) 908 return true; 909 WARN_ON(1); 910 return false; 911 } 912 913 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 914 struct iov_iter *i) 915 { 916 if (unlikely(!page_copy_sane(page, offset, bytes))) 917 return 0; 918 if (i->type & (ITER_BVEC|ITER_KVEC)) { 919 void *kaddr = kmap_atomic(page); 920 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 921 kunmap_atomic(kaddr); 922 return wanted; 923 } else if (unlikely(iov_iter_is_discard(i))) 924 return bytes; 925 else if (likely(!iov_iter_is_pipe(i))) 926 return copy_page_to_iter_iovec(page, offset, bytes, i); 927 else 928 return copy_page_to_iter_pipe(page, offset, bytes, i); 929 } 930 EXPORT_SYMBOL(copy_page_to_iter); 931 932 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 933 struct iov_iter *i) 934 { 935 if (unlikely(!page_copy_sane(page, offset, bytes))) 936 return 0; 937 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 938 WARN_ON(1); 939 return 0; 940 } 941 if (i->type & (ITER_BVEC|ITER_KVEC)) { 942 void *kaddr = kmap_atomic(page); 943 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); 944 kunmap_atomic(kaddr); 945 return wanted; 946 } else 947 return copy_page_from_iter_iovec(page, offset, bytes, i); 948 } 949 EXPORT_SYMBOL(copy_page_from_iter); 950 951 static size_t pipe_zero(size_t bytes, struct iov_iter *i) 952 { 953 struct pipe_inode_info *pipe = i->pipe; 954 unsigned int p_mask = pipe->ring_size - 1; 955 unsigned int i_head; 956 size_t n, off; 957 958 if (!sanity(i)) 959 return 0; 960 961 bytes = n = push_pipe(i, bytes, &i_head, &off); 962 if (unlikely(!n)) 963 return 0; 964 965 do { 966 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 967 memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk); 968 i->head = i_head; 969 i->iov_offset = off + chunk; 970 n -= chunk; 971 off = 0; 972 i_head++; 973 } while (n); 974 i->count -= bytes; 975 return bytes; 976 } 977 978 size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 979 { 980 if (unlikely(iov_iter_is_pipe(i))) 981 return pipe_zero(bytes, i); 982 iterate_and_advance(i, bytes, v, 983 clear_user(v.iov_base, v.iov_len), 984 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 985 memset(v.iov_base, 0, v.iov_len) 986 ) 987 988 return bytes; 989 } 990 EXPORT_SYMBOL(iov_iter_zero); 991 992 size_t iov_iter_copy_from_user_atomic(struct page *page, 993 struct iov_iter *i, unsigned long offset, size_t bytes) 994 { 995 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 996 if (unlikely(!page_copy_sane(page, offset, bytes))) { 997 kunmap_atomic(kaddr); 998 return 0; 999 } 1000 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1001 kunmap_atomic(kaddr); 1002 WARN_ON(1); 1003 return 0; 1004 } 1005 iterate_all_kinds(i, bytes, v, 1006 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 1007 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, 1008 v.bv_offset, v.bv_len), 1009 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 1010 ) 1011 kunmap_atomic(kaddr); 1012 return bytes; 1013 } 1014 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 1015 1016 static inline void pipe_truncate(struct iov_iter *i) 1017 { 1018 struct pipe_inode_info *pipe = i->pipe; 1019 unsigned int p_tail = pipe->tail; 1020 unsigned int p_head = pipe->head; 1021 unsigned int p_mask = pipe->ring_size - 1; 1022 1023 if (!pipe_empty(p_head, p_tail)) { 1024 struct pipe_buffer *buf; 1025 unsigned int i_head = i->head; 1026 size_t off = i->iov_offset; 1027 1028 if (off) { 1029 buf = &pipe->bufs[i_head & p_mask]; 1030 buf->len = off - buf->offset; 1031 i_head++; 1032 } 1033 while (p_head != i_head) { 1034 p_head--; 1035 pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); 1036 } 1037 1038 pipe->head = p_head; 1039 } 1040 } 1041 1042 static void pipe_advance(struct iov_iter *i, size_t size) 1043 { 1044 struct pipe_inode_info *pipe = i->pipe; 1045 if (unlikely(i->count < size)) 1046 size = i->count; 1047 if (size) { 1048 struct pipe_buffer *buf; 1049 unsigned int p_mask = pipe->ring_size - 1; 1050 unsigned int i_head = i->head; 1051 size_t off = i->iov_offset, left = size; 1052 1053 if (off) /* make it relative to the beginning of buffer */ 1054 left += off - pipe->bufs[i_head & p_mask].offset; 1055 while (1) { 1056 buf = &pipe->bufs[i_head & p_mask]; 1057 if (left <= buf->len) 1058 break; 1059 left -= buf->len; 1060 i_head++; 1061 } 1062 i->head = i_head; 1063 i->iov_offset = buf->offset + left; 1064 } 1065 i->count -= size; 1066 /* ... and discard everything past that point */ 1067 pipe_truncate(i); 1068 } 1069 1070 static void iov_iter_bvec_advance(struct iov_iter *i, size_t size) 1071 { 1072 struct bvec_iter bi; 1073 1074 bi.bi_size = i->count; 1075 bi.bi_bvec_done = i->iov_offset; 1076 bi.bi_idx = 0; 1077 bvec_iter_advance(i->bvec, &bi, size); 1078 1079 i->bvec += bi.bi_idx; 1080 i->nr_segs -= bi.bi_idx; 1081 i->count = bi.bi_size; 1082 i->iov_offset = bi.bi_bvec_done; 1083 } 1084 1085 void iov_iter_advance(struct iov_iter *i, size_t size) 1086 { 1087 if (unlikely(iov_iter_is_pipe(i))) { 1088 pipe_advance(i, size); 1089 return; 1090 } 1091 if (unlikely(iov_iter_is_discard(i))) { 1092 i->count -= size; 1093 return; 1094 } 1095 if (iov_iter_is_bvec(i)) { 1096 iov_iter_bvec_advance(i, size); 1097 return; 1098 } 1099 iterate_and_advance(i, size, v, 0, 0, 0) 1100 } 1101 EXPORT_SYMBOL(iov_iter_advance); 1102 1103 void iov_iter_revert(struct iov_iter *i, size_t unroll) 1104 { 1105 if (!unroll) 1106 return; 1107 if (WARN_ON(unroll > MAX_RW_COUNT)) 1108 return; 1109 i->count += unroll; 1110 if (unlikely(iov_iter_is_pipe(i))) { 1111 struct pipe_inode_info *pipe = i->pipe; 1112 unsigned int p_mask = pipe->ring_size - 1; 1113 unsigned int i_head = i->head; 1114 size_t off = i->iov_offset; 1115 while (1) { 1116 struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; 1117 size_t n = off - b->offset; 1118 if (unroll < n) { 1119 off -= unroll; 1120 break; 1121 } 1122 unroll -= n; 1123 if (!unroll && i_head == i->start_head) { 1124 off = 0; 1125 break; 1126 } 1127 i_head--; 1128 b = &pipe->bufs[i_head & p_mask]; 1129 off = b->offset + b->len; 1130 } 1131 i->iov_offset = off; 1132 i->head = i_head; 1133 pipe_truncate(i); 1134 return; 1135 } 1136 if (unlikely(iov_iter_is_discard(i))) 1137 return; 1138 if (unroll <= i->iov_offset) { 1139 i->iov_offset -= unroll; 1140 return; 1141 } 1142 unroll -= i->iov_offset; 1143 if (iov_iter_is_bvec(i)) { 1144 const struct bio_vec *bvec = i->bvec; 1145 while (1) { 1146 size_t n = (--bvec)->bv_len; 1147 i->nr_segs++; 1148 if (unroll <= n) { 1149 i->bvec = bvec; 1150 i->iov_offset = n - unroll; 1151 return; 1152 } 1153 unroll -= n; 1154 } 1155 } else { /* same logics for iovec and kvec */ 1156 const struct iovec *iov = i->iov; 1157 while (1) { 1158 size_t n = (--iov)->iov_len; 1159 i->nr_segs++; 1160 if (unroll <= n) { 1161 i->iov = iov; 1162 i->iov_offset = n - unroll; 1163 return; 1164 } 1165 unroll -= n; 1166 } 1167 } 1168 } 1169 EXPORT_SYMBOL(iov_iter_revert); 1170 1171 /* 1172 * Return the count of just the current iov_iter segment. 1173 */ 1174 size_t iov_iter_single_seg_count(const struct iov_iter *i) 1175 { 1176 if (unlikely(iov_iter_is_pipe(i))) 1177 return i->count; // it is a silly place, anyway 1178 if (i->nr_segs == 1) 1179 return i->count; 1180 if (unlikely(iov_iter_is_discard(i))) 1181 return i->count; 1182 else if (iov_iter_is_bvec(i)) 1183 return min(i->count, i->bvec->bv_len - i->iov_offset); 1184 else 1185 return min(i->count, i->iov->iov_len - i->iov_offset); 1186 } 1187 EXPORT_SYMBOL(iov_iter_single_seg_count); 1188 1189 void iov_iter_kvec(struct iov_iter *i, unsigned int direction, 1190 const struct kvec *kvec, unsigned long nr_segs, 1191 size_t count) 1192 { 1193 WARN_ON(direction & ~(READ | WRITE)); 1194 i->type = ITER_KVEC | (direction & (READ | WRITE)); 1195 i->kvec = kvec; 1196 i->nr_segs = nr_segs; 1197 i->iov_offset = 0; 1198 i->count = count; 1199 } 1200 EXPORT_SYMBOL(iov_iter_kvec); 1201 1202 void iov_iter_bvec(struct iov_iter *i, unsigned int direction, 1203 const struct bio_vec *bvec, unsigned long nr_segs, 1204 size_t count) 1205 { 1206 WARN_ON(direction & ~(READ | WRITE)); 1207 i->type = ITER_BVEC | (direction & (READ | WRITE)); 1208 i->bvec = bvec; 1209 i->nr_segs = nr_segs; 1210 i->iov_offset = 0; 1211 i->count = count; 1212 } 1213 EXPORT_SYMBOL(iov_iter_bvec); 1214 1215 void iov_iter_pipe(struct iov_iter *i, unsigned int direction, 1216 struct pipe_inode_info *pipe, 1217 size_t count) 1218 { 1219 BUG_ON(direction != READ); 1220 WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size)); 1221 i->type = ITER_PIPE | READ; 1222 i->pipe = pipe; 1223 i->head = pipe->head; 1224 i->iov_offset = 0; 1225 i->count = count; 1226 i->start_head = i->head; 1227 } 1228 EXPORT_SYMBOL(iov_iter_pipe); 1229 1230 /** 1231 * iov_iter_discard - Initialise an I/O iterator that discards data 1232 * @i: The iterator to initialise. 1233 * @direction: The direction of the transfer. 1234 * @count: The size of the I/O buffer in bytes. 1235 * 1236 * Set up an I/O iterator that just discards everything that's written to it. 1237 * It's only available as a READ iterator. 1238 */ 1239 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) 1240 { 1241 BUG_ON(direction != READ); 1242 i->type = ITER_DISCARD | READ; 1243 i->count = count; 1244 i->iov_offset = 0; 1245 } 1246 EXPORT_SYMBOL(iov_iter_discard); 1247 1248 unsigned long iov_iter_alignment(const struct iov_iter *i) 1249 { 1250 unsigned long res = 0; 1251 size_t size = i->count; 1252 1253 if (unlikely(iov_iter_is_pipe(i))) { 1254 unsigned int p_mask = i->pipe->ring_size - 1; 1255 1256 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) 1257 return size | i->iov_offset; 1258 return size; 1259 } 1260 iterate_all_kinds(i, size, v, 1261 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 1262 res |= v.bv_offset | v.bv_len, 1263 res |= (unsigned long)v.iov_base | v.iov_len 1264 ) 1265 return res; 1266 } 1267 EXPORT_SYMBOL(iov_iter_alignment); 1268 1269 unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 1270 { 1271 unsigned long res = 0; 1272 size_t size = i->count; 1273 1274 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1275 WARN_ON(1); 1276 return ~0U; 1277 } 1278 1279 iterate_all_kinds(i, size, v, 1280 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1281 (size != v.iov_len ? size : 0), 0), 1282 (res |= (!res ? 0 : (unsigned long)v.bv_offset) | 1283 (size != v.bv_len ? size : 0)), 1284 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1285 (size != v.iov_len ? size : 0)) 1286 ); 1287 return res; 1288 } 1289 EXPORT_SYMBOL(iov_iter_gap_alignment); 1290 1291 static inline ssize_t __pipe_get_pages(struct iov_iter *i, 1292 size_t maxsize, 1293 struct page **pages, 1294 int iter_head, 1295 size_t *start) 1296 { 1297 struct pipe_inode_info *pipe = i->pipe; 1298 unsigned int p_mask = pipe->ring_size - 1; 1299 ssize_t n = push_pipe(i, maxsize, &iter_head, start); 1300 if (!n) 1301 return -EFAULT; 1302 1303 maxsize = n; 1304 n += *start; 1305 while (n > 0) { 1306 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); 1307 iter_head++; 1308 n -= PAGE_SIZE; 1309 } 1310 1311 return maxsize; 1312 } 1313 1314 static ssize_t pipe_get_pages(struct iov_iter *i, 1315 struct page **pages, size_t maxsize, unsigned maxpages, 1316 size_t *start) 1317 { 1318 unsigned int iter_head, npages; 1319 size_t capacity; 1320 1321 if (!maxsize) 1322 return 0; 1323 1324 if (!sanity(i)) 1325 return -EFAULT; 1326 1327 data_start(i, &iter_head, start); 1328 /* Amount of free space: some of this one + all after this one */ 1329 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1330 capacity = min(npages, maxpages) * PAGE_SIZE - *start; 1331 1332 return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); 1333 } 1334 1335 ssize_t iov_iter_get_pages(struct iov_iter *i, 1336 struct page **pages, size_t maxsize, unsigned maxpages, 1337 size_t *start) 1338 { 1339 if (maxsize > i->count) 1340 maxsize = i->count; 1341 1342 if (unlikely(iov_iter_is_pipe(i))) 1343 return pipe_get_pages(i, pages, maxsize, maxpages, start); 1344 if (unlikely(iov_iter_is_discard(i))) 1345 return -EFAULT; 1346 1347 iterate_all_kinds(i, maxsize, v, ({ 1348 unsigned long addr = (unsigned long)v.iov_base; 1349 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1350 int n; 1351 int res; 1352 1353 if (len > maxpages * PAGE_SIZE) 1354 len = maxpages * PAGE_SIZE; 1355 addr &= ~(PAGE_SIZE - 1); 1356 n = DIV_ROUND_UP(len, PAGE_SIZE); 1357 res = get_user_pages_fast(addr, n, 1358 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, 1359 pages); 1360 if (unlikely(res < 0)) 1361 return res; 1362 return (res == n ? len : res * PAGE_SIZE) - *start; 1363 0;}),({ 1364 /* can't be more than PAGE_SIZE */ 1365 *start = v.bv_offset; 1366 get_page(*pages = v.bv_page); 1367 return v.bv_len; 1368 }),({ 1369 return -EFAULT; 1370 }) 1371 ) 1372 return 0; 1373 } 1374 EXPORT_SYMBOL(iov_iter_get_pages); 1375 1376 static struct page **get_pages_array(size_t n) 1377 { 1378 return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); 1379 } 1380 1381 static ssize_t pipe_get_pages_alloc(struct iov_iter *i, 1382 struct page ***pages, size_t maxsize, 1383 size_t *start) 1384 { 1385 struct page **p; 1386 unsigned int iter_head, npages; 1387 ssize_t n; 1388 1389 if (!maxsize) 1390 return 0; 1391 1392 if (!sanity(i)) 1393 return -EFAULT; 1394 1395 data_start(i, &iter_head, start); 1396 /* Amount of free space: some of this one + all after this one */ 1397 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1398 n = npages * PAGE_SIZE - *start; 1399 if (maxsize > n) 1400 maxsize = n; 1401 else 1402 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 1403 p = get_pages_array(npages); 1404 if (!p) 1405 return -ENOMEM; 1406 n = __pipe_get_pages(i, maxsize, p, iter_head, start); 1407 if (n > 0) 1408 *pages = p; 1409 else 1410 kvfree(p); 1411 return n; 1412 } 1413 1414 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 1415 struct page ***pages, size_t maxsize, 1416 size_t *start) 1417 { 1418 struct page **p; 1419 1420 if (maxsize > i->count) 1421 maxsize = i->count; 1422 1423 if (unlikely(iov_iter_is_pipe(i))) 1424 return pipe_get_pages_alloc(i, pages, maxsize, start); 1425 if (unlikely(iov_iter_is_discard(i))) 1426 return -EFAULT; 1427 1428 iterate_all_kinds(i, maxsize, v, ({ 1429 unsigned long addr = (unsigned long)v.iov_base; 1430 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1431 int n; 1432 int res; 1433 1434 addr &= ~(PAGE_SIZE - 1); 1435 n = DIV_ROUND_UP(len, PAGE_SIZE); 1436 p = get_pages_array(n); 1437 if (!p) 1438 return -ENOMEM; 1439 res = get_user_pages_fast(addr, n, 1440 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p); 1441 if (unlikely(res < 0)) { 1442 kvfree(p); 1443 return res; 1444 } 1445 *pages = p; 1446 return (res == n ? len : res * PAGE_SIZE) - *start; 1447 0;}),({ 1448 /* can't be more than PAGE_SIZE */ 1449 *start = v.bv_offset; 1450 *pages = p = get_pages_array(1); 1451 if (!p) 1452 return -ENOMEM; 1453 get_page(*p = v.bv_page); 1454 return v.bv_len; 1455 }),({ 1456 return -EFAULT; 1457 }) 1458 ) 1459 return 0; 1460 } 1461 EXPORT_SYMBOL(iov_iter_get_pages_alloc); 1462 1463 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, 1464 struct iov_iter *i) 1465 { 1466 char *to = addr; 1467 __wsum sum, next; 1468 size_t off = 0; 1469 sum = *csum; 1470 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1471 WARN_ON(1); 1472 return 0; 1473 } 1474 iterate_and_advance(i, bytes, v, ({ 1475 next = csum_and_copy_from_user(v.iov_base, 1476 (to += v.iov_len) - v.iov_len, 1477 v.iov_len); 1478 if (next) { 1479 sum = csum_block_add(sum, next, off); 1480 off += v.iov_len; 1481 } 1482 next ? 0 : v.iov_len; 1483 }), ({ 1484 char *p = kmap_atomic(v.bv_page); 1485 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1486 p + v.bv_offset, v.bv_len, 1487 sum, off); 1488 kunmap_atomic(p); 1489 off += v.bv_len; 1490 }),({ 1491 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, 1492 v.iov_base, v.iov_len, 1493 sum, off); 1494 off += v.iov_len; 1495 }) 1496 ) 1497 *csum = sum; 1498 return bytes; 1499 } 1500 EXPORT_SYMBOL(csum_and_copy_from_iter); 1501 1502 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, 1503 struct iov_iter *i) 1504 { 1505 char *to = addr; 1506 __wsum sum, next; 1507 size_t off = 0; 1508 sum = *csum; 1509 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1510 WARN_ON(1); 1511 return false; 1512 } 1513 if (unlikely(i->count < bytes)) 1514 return false; 1515 iterate_all_kinds(i, bytes, v, ({ 1516 next = csum_and_copy_from_user(v.iov_base, 1517 (to += v.iov_len) - v.iov_len, 1518 v.iov_len); 1519 if (!next) 1520 return false; 1521 sum = csum_block_add(sum, next, off); 1522 off += v.iov_len; 1523 0; 1524 }), ({ 1525 char *p = kmap_atomic(v.bv_page); 1526 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1527 p + v.bv_offset, v.bv_len, 1528 sum, off); 1529 kunmap_atomic(p); 1530 off += v.bv_len; 1531 }),({ 1532 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, 1533 v.iov_base, v.iov_len, 1534 sum, off); 1535 off += v.iov_len; 1536 }) 1537 ) 1538 *csum = sum; 1539 iov_iter_advance(i, bytes); 1540 return true; 1541 } 1542 EXPORT_SYMBOL(csum_and_copy_from_iter_full); 1543 1544 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, 1545 struct iov_iter *i) 1546 { 1547 struct csum_state *csstate = _csstate; 1548 const char *from = addr; 1549 __wsum sum, next; 1550 size_t off; 1551 1552 if (unlikely(iov_iter_is_pipe(i))) 1553 return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i); 1554 1555 sum = csstate->csum; 1556 off = csstate->off; 1557 if (unlikely(iov_iter_is_discard(i))) { 1558 WARN_ON(1); /* for now */ 1559 return 0; 1560 } 1561 iterate_and_advance(i, bytes, v, ({ 1562 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1563 v.iov_base, 1564 v.iov_len); 1565 if (next) { 1566 sum = csum_block_add(sum, next, off); 1567 off += v.iov_len; 1568 } 1569 next ? 0 : v.iov_len; 1570 }), ({ 1571 char *p = kmap_atomic(v.bv_page); 1572 sum = csum_and_memcpy(p + v.bv_offset, 1573 (from += v.bv_len) - v.bv_len, 1574 v.bv_len, sum, off); 1575 kunmap_atomic(p); 1576 off += v.bv_len; 1577 }),({ 1578 sum = csum_and_memcpy(v.iov_base, 1579 (from += v.iov_len) - v.iov_len, 1580 v.iov_len, sum, off); 1581 off += v.iov_len; 1582 }) 1583 ) 1584 csstate->csum = sum; 1585 csstate->off = off; 1586 return bytes; 1587 } 1588 EXPORT_SYMBOL(csum_and_copy_to_iter); 1589 1590 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, 1591 struct iov_iter *i) 1592 { 1593 #ifdef CONFIG_CRYPTO_HASH 1594 struct ahash_request *hash = hashp; 1595 struct scatterlist sg; 1596 size_t copied; 1597 1598 copied = copy_to_iter(addr, bytes, i); 1599 sg_init_one(&sg, addr, copied); 1600 ahash_request_set_crypt(hash, &sg, NULL, copied); 1601 crypto_ahash_update(hash); 1602 return copied; 1603 #else 1604 return 0; 1605 #endif 1606 } 1607 EXPORT_SYMBOL(hash_and_copy_to_iter); 1608 1609 int iov_iter_npages(const struct iov_iter *i, int maxpages) 1610 { 1611 size_t size = i->count; 1612 int npages = 0; 1613 1614 if (!size) 1615 return 0; 1616 if (unlikely(iov_iter_is_discard(i))) 1617 return 0; 1618 1619 if (unlikely(iov_iter_is_pipe(i))) { 1620 struct pipe_inode_info *pipe = i->pipe; 1621 unsigned int iter_head; 1622 size_t off; 1623 1624 if (!sanity(i)) 1625 return 0; 1626 1627 data_start(i, &iter_head, &off); 1628 /* some of this one + all after this one */ 1629 npages = pipe_space_for_user(iter_head, pipe->tail, pipe); 1630 if (npages >= maxpages) 1631 return maxpages; 1632 } else iterate_all_kinds(i, size, v, ({ 1633 unsigned long p = (unsigned long)v.iov_base; 1634 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1635 - p / PAGE_SIZE; 1636 if (npages >= maxpages) 1637 return maxpages; 1638 0;}),({ 1639 npages++; 1640 if (npages >= maxpages) 1641 return maxpages; 1642 }),({ 1643 unsigned long p = (unsigned long)v.iov_base; 1644 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1645 - p / PAGE_SIZE; 1646 if (npages >= maxpages) 1647 return maxpages; 1648 }) 1649 ) 1650 return npages; 1651 } 1652 EXPORT_SYMBOL(iov_iter_npages); 1653 1654 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1655 { 1656 *new = *old; 1657 if (unlikely(iov_iter_is_pipe(new))) { 1658 WARN_ON(1); 1659 return NULL; 1660 } 1661 if (unlikely(iov_iter_is_discard(new))) 1662 return NULL; 1663 if (iov_iter_is_bvec(new)) 1664 return new->bvec = kmemdup(new->bvec, 1665 new->nr_segs * sizeof(struct bio_vec), 1666 flags); 1667 else 1668 /* iovec and kvec have identical layout */ 1669 return new->iov = kmemdup(new->iov, 1670 new->nr_segs * sizeof(struct iovec), 1671 flags); 1672 } 1673 EXPORT_SYMBOL(dup_iter); 1674 1675 static int copy_compat_iovec_from_user(struct iovec *iov, 1676 const struct iovec __user *uvec, unsigned long nr_segs) 1677 { 1678 const struct compat_iovec __user *uiov = 1679 (const struct compat_iovec __user *)uvec; 1680 int ret = -EFAULT, i; 1681 1682 if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) 1683 return -EFAULT; 1684 1685 for (i = 0; i < nr_segs; i++) { 1686 compat_uptr_t buf; 1687 compat_ssize_t len; 1688 1689 unsafe_get_user(len, &uiov[i].iov_len, uaccess_end); 1690 unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end); 1691 1692 /* check for compat_size_t not fitting in compat_ssize_t .. */ 1693 if (len < 0) { 1694 ret = -EINVAL; 1695 goto uaccess_end; 1696 } 1697 iov[i].iov_base = compat_ptr(buf); 1698 iov[i].iov_len = len; 1699 } 1700 1701 ret = 0; 1702 uaccess_end: 1703 user_access_end(); 1704 return ret; 1705 } 1706 1707 static int copy_iovec_from_user(struct iovec *iov, 1708 const struct iovec __user *uvec, unsigned long nr_segs) 1709 { 1710 unsigned long seg; 1711 1712 if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec))) 1713 return -EFAULT; 1714 for (seg = 0; seg < nr_segs; seg++) { 1715 if ((ssize_t)iov[seg].iov_len < 0) 1716 return -EINVAL; 1717 } 1718 1719 return 0; 1720 } 1721 1722 struct iovec *iovec_from_user(const struct iovec __user *uvec, 1723 unsigned long nr_segs, unsigned long fast_segs, 1724 struct iovec *fast_iov, bool compat) 1725 { 1726 struct iovec *iov = fast_iov; 1727 int ret; 1728 1729 /* 1730 * SuS says "The readv() function *may* fail if the iovcnt argument was 1731 * less than or equal to 0, or greater than {IOV_MAX}. Linux has 1732 * traditionally returned zero for zero segments, so... 1733 */ 1734 if (nr_segs == 0) 1735 return iov; 1736 if (nr_segs > UIO_MAXIOV) 1737 return ERR_PTR(-EINVAL); 1738 if (nr_segs > fast_segs) { 1739 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL); 1740 if (!iov) 1741 return ERR_PTR(-ENOMEM); 1742 } 1743 1744 if (compat) 1745 ret = copy_compat_iovec_from_user(iov, uvec, nr_segs); 1746 else 1747 ret = copy_iovec_from_user(iov, uvec, nr_segs); 1748 if (ret) { 1749 if (iov != fast_iov) 1750 kfree(iov); 1751 return ERR_PTR(ret); 1752 } 1753 1754 return iov; 1755 } 1756 1757 ssize_t __import_iovec(int type, const struct iovec __user *uvec, 1758 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, 1759 struct iov_iter *i, bool compat) 1760 { 1761 ssize_t total_len = 0; 1762 unsigned long seg; 1763 struct iovec *iov; 1764 1765 iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat); 1766 if (IS_ERR(iov)) { 1767 *iovp = NULL; 1768 return PTR_ERR(iov); 1769 } 1770 1771 /* 1772 * According to the Single Unix Specification we should return EINVAL if 1773 * an element length is < 0 when cast to ssize_t or if the total length 1774 * would overflow the ssize_t return value of the system call. 1775 * 1776 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 1777 * overflow case. 1778 */ 1779 for (seg = 0; seg < nr_segs; seg++) { 1780 ssize_t len = (ssize_t)iov[seg].iov_len; 1781 1782 if (!access_ok(iov[seg].iov_base, len)) { 1783 if (iov != *iovp) 1784 kfree(iov); 1785 *iovp = NULL; 1786 return -EFAULT; 1787 } 1788 1789 if (len > MAX_RW_COUNT - total_len) { 1790 len = MAX_RW_COUNT - total_len; 1791 iov[seg].iov_len = len; 1792 } 1793 total_len += len; 1794 } 1795 1796 iov_iter_init(i, type, iov, nr_segs, total_len); 1797 if (iov == *iovp) 1798 *iovp = NULL; 1799 else 1800 *iovp = iov; 1801 return total_len; 1802 } 1803 1804 /** 1805 * import_iovec() - Copy an array of &struct iovec from userspace 1806 * into the kernel, check that it is valid, and initialize a new 1807 * &struct iov_iter iterator to access it. 1808 * 1809 * @type: One of %READ or %WRITE. 1810 * @uvec: Pointer to the userspace array. 1811 * @nr_segs: Number of elements in userspace array. 1812 * @fast_segs: Number of elements in @iov. 1813 * @iovp: (input and output parameter) Pointer to pointer to (usually small 1814 * on-stack) kernel array. 1815 * @i: Pointer to iterator that will be initialized on success. 1816 * 1817 * If the array pointed to by *@iov is large enough to hold all @nr_segs, 1818 * then this function places %NULL in *@iov on return. Otherwise, a new 1819 * array will be allocated and the result placed in *@iov. This means that 1820 * the caller may call kfree() on *@iov regardless of whether the small 1821 * on-stack array was used or not (and regardless of whether this function 1822 * returns an error or not). 1823 * 1824 * Return: Negative error code on error, bytes imported on success 1825 */ 1826 ssize_t import_iovec(int type, const struct iovec __user *uvec, 1827 unsigned nr_segs, unsigned fast_segs, 1828 struct iovec **iovp, struct iov_iter *i) 1829 { 1830 return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, 1831 in_compat_syscall()); 1832 } 1833 EXPORT_SYMBOL(import_iovec); 1834 1835 int import_single_range(int rw, void __user *buf, size_t len, 1836 struct iovec *iov, struct iov_iter *i) 1837 { 1838 if (len > MAX_RW_COUNT) 1839 len = MAX_RW_COUNT; 1840 if (unlikely(!access_ok(buf, len))) 1841 return -EFAULT; 1842 1843 iov->iov_base = buf; 1844 iov->iov_len = len; 1845 iov_iter_init(i, rw, iov, 1, len); 1846 return 0; 1847 } 1848 EXPORT_SYMBOL(import_single_range); 1849 1850 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, 1851 int (*f)(struct kvec *vec, void *context), 1852 void *context) 1853 { 1854 struct kvec w; 1855 int err = -EINVAL; 1856 if (!bytes) 1857 return 0; 1858 1859 iterate_all_kinds(i, bytes, v, -EINVAL, ({ 1860 w.iov_base = kmap(v.bv_page) + v.bv_offset; 1861 w.iov_len = v.bv_len; 1862 err = f(&w, context); 1863 kunmap(v.bv_page); 1864 err;}), ({ 1865 w = v; 1866 err = f(&w, context);}) 1867 ) 1868 return err; 1869 } 1870 EXPORT_SYMBOL(iov_iter_for_each_range); 1871