1 #include <linux/export.h> 2 #include <linux/bvec.h> 3 #include <linux/uio.h> 4 #include <linux/pagemap.h> 5 #include <linux/slab.h> 6 #include <linux/vmalloc.h> 7 #include <linux/splice.h> 8 #include <net/checksum.h> 9 10 #define PIPE_PARANOIA /* for now */ 11 12 #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 13 size_t left; \ 14 size_t wanted = n; \ 15 __p = i->iov; \ 16 __v.iov_len = min(n, __p->iov_len - skip); \ 17 if (likely(__v.iov_len)) { \ 18 __v.iov_base = __p->iov_base + skip; \ 19 left = (STEP); \ 20 __v.iov_len -= left; \ 21 skip += __v.iov_len; \ 22 n -= __v.iov_len; \ 23 } else { \ 24 left = 0; \ 25 } \ 26 while (unlikely(!left && n)) { \ 27 __p++; \ 28 __v.iov_len = min(n, __p->iov_len); \ 29 if (unlikely(!__v.iov_len)) \ 30 continue; \ 31 __v.iov_base = __p->iov_base; \ 32 left = (STEP); \ 33 __v.iov_len -= left; \ 34 skip = __v.iov_len; \ 35 n -= __v.iov_len; \ 36 } \ 37 n = wanted - n; \ 38 } 39 40 #define iterate_kvec(i, n, __v, __p, skip, STEP) { \ 41 size_t wanted = n; \ 42 __p = i->kvec; \ 43 __v.iov_len = min(n, __p->iov_len - skip); \ 44 if (likely(__v.iov_len)) { \ 45 __v.iov_base = __p->iov_base + skip; \ 46 (void)(STEP); \ 47 skip += __v.iov_len; \ 48 n -= __v.iov_len; \ 49 } \ 50 while (unlikely(n)) { \ 51 __p++; \ 52 __v.iov_len = min(n, __p->iov_len); \ 53 if (unlikely(!__v.iov_len)) \ 54 continue; \ 55 __v.iov_base = __p->iov_base; \ 56 (void)(STEP); \ 57 skip = __v.iov_len; \ 58 n -= __v.iov_len; \ 59 } \ 60 n = wanted; \ 61 } 62 63 #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ 64 struct bvec_iter __start; \ 65 __start.bi_size = n; \ 66 __start.bi_bvec_done = skip; \ 67 __start.bi_idx = 0; \ 68 for_each_bvec(__v, i->bvec, __bi, __start) { \ 69 if (!__v.bv_len) \ 70 continue; \ 71 (void)(STEP); \ 72 } \ 73 } 74 75 #define iterate_all_kinds(i, n, v, I, B, K) { \ 76 if (likely(n)) { \ 77 size_t skip = i->iov_offset; \ 78 if (unlikely(i->type & ITER_BVEC)) { \ 79 struct bio_vec v; \ 80 struct bvec_iter __bi; \ 81 iterate_bvec(i, n, v, __bi, skip, (B)) \ 82 } else if (unlikely(i->type & ITER_KVEC)) { \ 83 const struct kvec *kvec; \ 84 struct kvec v; \ 85 iterate_kvec(i, n, v, kvec, skip, (K)) \ 86 } else if (unlikely(i->type & ITER_DISCARD)) { \ 87 } else { \ 88 const struct iovec *iov; \ 89 struct iovec v; \ 90 iterate_iovec(i, n, v, iov, skip, (I)) \ 91 } \ 92 } \ 93 } 94 95 #define iterate_and_advance(i, n, v, I, B, K) { \ 96 if (unlikely(i->count < n)) \ 97 n = i->count; \ 98 if (i->count) { \ 99 size_t skip = i->iov_offset; \ 100 if (unlikely(i->type & ITER_BVEC)) { \ 101 const struct bio_vec *bvec = i->bvec; \ 102 struct bio_vec v; \ 103 struct bvec_iter __bi; \ 104 iterate_bvec(i, n, v, __bi, skip, (B)) \ 105 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \ 106 i->nr_segs -= i->bvec - bvec; \ 107 skip = __bi.bi_bvec_done; \ 108 } else if (unlikely(i->type & ITER_KVEC)) { \ 109 const struct kvec *kvec; \ 110 struct kvec v; \ 111 iterate_kvec(i, n, v, kvec, skip, (K)) \ 112 if (skip == kvec->iov_len) { \ 113 kvec++; \ 114 skip = 0; \ 115 } \ 116 i->nr_segs -= kvec - i->kvec; \ 117 i->kvec = kvec; \ 118 } else if (unlikely(i->type & ITER_DISCARD)) { \ 119 skip += n; \ 120 } else { \ 121 const struct iovec *iov; \ 122 struct iovec v; \ 123 iterate_iovec(i, n, v, iov, skip, (I)) \ 124 if (skip == iov->iov_len) { \ 125 iov++; \ 126 skip = 0; \ 127 } \ 128 i->nr_segs -= iov - i->iov; \ 129 i->iov = iov; \ 130 } \ 131 i->count -= n; \ 132 i->iov_offset = skip; \ 133 } \ 134 } 135 136 static int copyout(void __user *to, const void *from, size_t n) 137 { 138 if (access_ok(VERIFY_WRITE, to, n)) { 139 kasan_check_read(from, n); 140 n = raw_copy_to_user(to, from, n); 141 } 142 return n; 143 } 144 145 static int copyin(void *to, const void __user *from, size_t n) 146 { 147 if (access_ok(VERIFY_READ, from, n)) { 148 kasan_check_write(to, n); 149 n = raw_copy_from_user(to, from, n); 150 } 151 return n; 152 } 153 154 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, 155 struct iov_iter *i) 156 { 157 size_t skip, copy, left, wanted; 158 const struct iovec *iov; 159 char __user *buf; 160 void *kaddr, *from; 161 162 if (unlikely(bytes > i->count)) 163 bytes = i->count; 164 165 if (unlikely(!bytes)) 166 return 0; 167 168 might_fault(); 169 wanted = bytes; 170 iov = i->iov; 171 skip = i->iov_offset; 172 buf = iov->iov_base + skip; 173 copy = min(bytes, iov->iov_len - skip); 174 175 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) { 176 kaddr = kmap_atomic(page); 177 from = kaddr + offset; 178 179 /* first chunk, usually the only one */ 180 left = copyout(buf, from, copy); 181 copy -= left; 182 skip += copy; 183 from += copy; 184 bytes -= copy; 185 186 while (unlikely(!left && bytes)) { 187 iov++; 188 buf = iov->iov_base; 189 copy = min(bytes, iov->iov_len); 190 left = copyout(buf, from, copy); 191 copy -= left; 192 skip = copy; 193 from += copy; 194 bytes -= copy; 195 } 196 if (likely(!bytes)) { 197 kunmap_atomic(kaddr); 198 goto done; 199 } 200 offset = from - kaddr; 201 buf += copy; 202 kunmap_atomic(kaddr); 203 copy = min(bytes, iov->iov_len - skip); 204 } 205 /* Too bad - revert to non-atomic kmap */ 206 207 kaddr = kmap(page); 208 from = kaddr + offset; 209 left = copyout(buf, from, copy); 210 copy -= left; 211 skip += copy; 212 from += copy; 213 bytes -= copy; 214 while (unlikely(!left && bytes)) { 215 iov++; 216 buf = iov->iov_base; 217 copy = min(bytes, iov->iov_len); 218 left = copyout(buf, from, copy); 219 copy -= left; 220 skip = copy; 221 from += copy; 222 bytes -= copy; 223 } 224 kunmap(page); 225 226 done: 227 if (skip == iov->iov_len) { 228 iov++; 229 skip = 0; 230 } 231 i->count -= wanted - bytes; 232 i->nr_segs -= iov - i->iov; 233 i->iov = iov; 234 i->iov_offset = skip; 235 return wanted - bytes; 236 } 237 238 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, 239 struct iov_iter *i) 240 { 241 size_t skip, copy, left, wanted; 242 const struct iovec *iov; 243 char __user *buf; 244 void *kaddr, *to; 245 246 if (unlikely(bytes > i->count)) 247 bytes = i->count; 248 249 if (unlikely(!bytes)) 250 return 0; 251 252 might_fault(); 253 wanted = bytes; 254 iov = i->iov; 255 skip = i->iov_offset; 256 buf = iov->iov_base + skip; 257 copy = min(bytes, iov->iov_len - skip); 258 259 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) { 260 kaddr = kmap_atomic(page); 261 to = kaddr + offset; 262 263 /* first chunk, usually the only one */ 264 left = copyin(to, buf, copy); 265 copy -= left; 266 skip += copy; 267 to += copy; 268 bytes -= copy; 269 270 while (unlikely(!left && bytes)) { 271 iov++; 272 buf = iov->iov_base; 273 copy = min(bytes, iov->iov_len); 274 left = copyin(to, buf, copy); 275 copy -= left; 276 skip = copy; 277 to += copy; 278 bytes -= copy; 279 } 280 if (likely(!bytes)) { 281 kunmap_atomic(kaddr); 282 goto done; 283 } 284 offset = to - kaddr; 285 buf += copy; 286 kunmap_atomic(kaddr); 287 copy = min(bytes, iov->iov_len - skip); 288 } 289 /* Too bad - revert to non-atomic kmap */ 290 291 kaddr = kmap(page); 292 to = kaddr + offset; 293 left = copyin(to, buf, copy); 294 copy -= left; 295 skip += copy; 296 to += copy; 297 bytes -= copy; 298 while (unlikely(!left && bytes)) { 299 iov++; 300 buf = iov->iov_base; 301 copy = min(bytes, iov->iov_len); 302 left = copyin(to, buf, copy); 303 copy -= left; 304 skip = copy; 305 to += copy; 306 bytes -= copy; 307 } 308 kunmap(page); 309 310 done: 311 if (skip == iov->iov_len) { 312 iov++; 313 skip = 0; 314 } 315 i->count -= wanted - bytes; 316 i->nr_segs -= iov - i->iov; 317 i->iov = iov; 318 i->iov_offset = skip; 319 return wanted - bytes; 320 } 321 322 #ifdef PIPE_PARANOIA 323 static bool sanity(const struct iov_iter *i) 324 { 325 struct pipe_inode_info *pipe = i->pipe; 326 int idx = i->idx; 327 int next = pipe->curbuf + pipe->nrbufs; 328 if (i->iov_offset) { 329 struct pipe_buffer *p; 330 if (unlikely(!pipe->nrbufs)) 331 goto Bad; // pipe must be non-empty 332 if (unlikely(idx != ((next - 1) & (pipe->buffers - 1)))) 333 goto Bad; // must be at the last buffer... 334 335 p = &pipe->bufs[idx]; 336 if (unlikely(p->offset + p->len != i->iov_offset)) 337 goto Bad; // ... at the end of segment 338 } else { 339 if (idx != (next & (pipe->buffers - 1))) 340 goto Bad; // must be right after the last buffer 341 } 342 return true; 343 Bad: 344 printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset); 345 printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n", 346 pipe->curbuf, pipe->nrbufs, pipe->buffers); 347 for (idx = 0; idx < pipe->buffers; idx++) 348 printk(KERN_ERR "[%p %p %d %d]\n", 349 pipe->bufs[idx].ops, 350 pipe->bufs[idx].page, 351 pipe->bufs[idx].offset, 352 pipe->bufs[idx].len); 353 WARN_ON(1); 354 return false; 355 } 356 #else 357 #define sanity(i) true 358 #endif 359 360 static inline int next_idx(int idx, struct pipe_inode_info *pipe) 361 { 362 return (idx + 1) & (pipe->buffers - 1); 363 } 364 365 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, 366 struct iov_iter *i) 367 { 368 struct pipe_inode_info *pipe = i->pipe; 369 struct pipe_buffer *buf; 370 size_t off; 371 int idx; 372 373 if (unlikely(bytes > i->count)) 374 bytes = i->count; 375 376 if (unlikely(!bytes)) 377 return 0; 378 379 if (!sanity(i)) 380 return 0; 381 382 off = i->iov_offset; 383 idx = i->idx; 384 buf = &pipe->bufs[idx]; 385 if (off) { 386 if (offset == off && buf->page == page) { 387 /* merge with the last one */ 388 buf->len += bytes; 389 i->iov_offset += bytes; 390 goto out; 391 } 392 idx = next_idx(idx, pipe); 393 buf = &pipe->bufs[idx]; 394 } 395 if (idx == pipe->curbuf && pipe->nrbufs) 396 return 0; 397 pipe->nrbufs++; 398 buf->ops = &page_cache_pipe_buf_ops; 399 get_page(buf->page = page); 400 buf->offset = offset; 401 buf->len = bytes; 402 i->iov_offset = offset + bytes; 403 i->idx = idx; 404 out: 405 i->count -= bytes; 406 return bytes; 407 } 408 409 /* 410 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 411 * bytes. For each iovec, fault in each page that constitutes the iovec. 412 * 413 * Return 0 on success, or non-zero if the memory could not be accessed (i.e. 414 * because it is an invalid address). 415 */ 416 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 417 { 418 size_t skip = i->iov_offset; 419 const struct iovec *iov; 420 int err; 421 struct iovec v; 422 423 if (!(i->type & (ITER_BVEC|ITER_KVEC))) { 424 iterate_iovec(i, bytes, v, iov, skip, ({ 425 err = fault_in_pages_readable(v.iov_base, v.iov_len); 426 if (unlikely(err)) 427 return err; 428 0;})) 429 } 430 return 0; 431 } 432 EXPORT_SYMBOL(iov_iter_fault_in_readable); 433 434 void iov_iter_init(struct iov_iter *i, unsigned int direction, 435 const struct iovec *iov, unsigned long nr_segs, 436 size_t count) 437 { 438 WARN_ON(direction & ~(READ | WRITE)); 439 direction &= READ | WRITE; 440 441 /* It will get better. Eventually... */ 442 if (uaccess_kernel()) { 443 i->type = ITER_KVEC | direction; 444 i->kvec = (struct kvec *)iov; 445 } else { 446 i->type = ITER_IOVEC | direction; 447 i->iov = iov; 448 } 449 i->nr_segs = nr_segs; 450 i->iov_offset = 0; 451 i->count = count; 452 } 453 EXPORT_SYMBOL(iov_iter_init); 454 455 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) 456 { 457 char *from = kmap_atomic(page); 458 memcpy(to, from + offset, len); 459 kunmap_atomic(from); 460 } 461 462 static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) 463 { 464 char *to = kmap_atomic(page); 465 memcpy(to + offset, from, len); 466 kunmap_atomic(to); 467 } 468 469 static void memzero_page(struct page *page, size_t offset, size_t len) 470 { 471 char *addr = kmap_atomic(page); 472 memset(addr + offset, 0, len); 473 kunmap_atomic(addr); 474 } 475 476 static inline bool allocated(struct pipe_buffer *buf) 477 { 478 return buf->ops == &default_pipe_buf_ops; 479 } 480 481 static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp) 482 { 483 size_t off = i->iov_offset; 484 int idx = i->idx; 485 if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) { 486 idx = next_idx(idx, i->pipe); 487 off = 0; 488 } 489 *idxp = idx; 490 *offp = off; 491 } 492 493 static size_t push_pipe(struct iov_iter *i, size_t size, 494 int *idxp, size_t *offp) 495 { 496 struct pipe_inode_info *pipe = i->pipe; 497 size_t off; 498 int idx; 499 ssize_t left; 500 501 if (unlikely(size > i->count)) 502 size = i->count; 503 if (unlikely(!size)) 504 return 0; 505 506 left = size; 507 data_start(i, &idx, &off); 508 *idxp = idx; 509 *offp = off; 510 if (off) { 511 left -= PAGE_SIZE - off; 512 if (left <= 0) { 513 pipe->bufs[idx].len += size; 514 return size; 515 } 516 pipe->bufs[idx].len = PAGE_SIZE; 517 idx = next_idx(idx, pipe); 518 } 519 while (idx != pipe->curbuf || !pipe->nrbufs) { 520 struct page *page = alloc_page(GFP_USER); 521 if (!page) 522 break; 523 pipe->nrbufs++; 524 pipe->bufs[idx].ops = &default_pipe_buf_ops; 525 pipe->bufs[idx].page = page; 526 pipe->bufs[idx].offset = 0; 527 if (left <= PAGE_SIZE) { 528 pipe->bufs[idx].len = left; 529 return size; 530 } 531 pipe->bufs[idx].len = PAGE_SIZE; 532 left -= PAGE_SIZE; 533 idx = next_idx(idx, pipe); 534 } 535 return size - left; 536 } 537 538 static size_t copy_pipe_to_iter(const void *addr, size_t bytes, 539 struct iov_iter *i) 540 { 541 struct pipe_inode_info *pipe = i->pipe; 542 size_t n, off; 543 int idx; 544 545 if (!sanity(i)) 546 return 0; 547 548 bytes = n = push_pipe(i, bytes, &idx, &off); 549 if (unlikely(!n)) 550 return 0; 551 for ( ; n; idx = next_idx(idx, pipe), off = 0) { 552 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 553 memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk); 554 i->idx = idx; 555 i->iov_offset = off + chunk; 556 n -= chunk; 557 addr += chunk; 558 } 559 i->count -= bytes; 560 return bytes; 561 } 562 563 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, 564 __wsum *csum, struct iov_iter *i) 565 { 566 struct pipe_inode_info *pipe = i->pipe; 567 size_t n, r; 568 size_t off = 0; 569 __wsum sum = *csum, next; 570 int idx; 571 572 if (!sanity(i)) 573 return 0; 574 575 bytes = n = push_pipe(i, bytes, &idx, &r); 576 if (unlikely(!n)) 577 return 0; 578 for ( ; n; idx = next_idx(idx, pipe), r = 0) { 579 size_t chunk = min_t(size_t, n, PAGE_SIZE - r); 580 char *p = kmap_atomic(pipe->bufs[idx].page); 581 next = csum_partial_copy_nocheck(addr, p + r, chunk, 0); 582 sum = csum_block_add(sum, next, off); 583 kunmap_atomic(p); 584 i->idx = idx; 585 i->iov_offset = r + chunk; 586 n -= chunk; 587 off += chunk; 588 addr += chunk; 589 } 590 i->count -= bytes; 591 *csum = sum; 592 return bytes; 593 } 594 595 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 596 { 597 const char *from = addr; 598 if (unlikely(iov_iter_is_pipe(i))) 599 return copy_pipe_to_iter(addr, bytes, i); 600 if (iter_is_iovec(i)) 601 might_fault(); 602 iterate_and_advance(i, bytes, v, 603 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), 604 memcpy_to_page(v.bv_page, v.bv_offset, 605 (from += v.bv_len) - v.bv_len, v.bv_len), 606 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) 607 ) 608 609 return bytes; 610 } 611 EXPORT_SYMBOL(_copy_to_iter); 612 613 #ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE 614 static int copyout_mcsafe(void __user *to, const void *from, size_t n) 615 { 616 if (access_ok(VERIFY_WRITE, to, n)) { 617 kasan_check_read(from, n); 618 n = copy_to_user_mcsafe((__force void *) to, from, n); 619 } 620 return n; 621 } 622 623 static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, 624 const char *from, size_t len) 625 { 626 unsigned long ret; 627 char *to; 628 629 to = kmap_atomic(page); 630 ret = memcpy_mcsafe(to + offset, from, len); 631 kunmap_atomic(to); 632 633 return ret; 634 } 635 636 static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, 637 struct iov_iter *i) 638 { 639 struct pipe_inode_info *pipe = i->pipe; 640 size_t n, off, xfer = 0; 641 int idx; 642 643 if (!sanity(i)) 644 return 0; 645 646 bytes = n = push_pipe(i, bytes, &idx, &off); 647 if (unlikely(!n)) 648 return 0; 649 for ( ; n; idx = next_idx(idx, pipe), off = 0) { 650 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 651 unsigned long rem; 652 653 rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr, 654 chunk); 655 i->idx = idx; 656 i->iov_offset = off + chunk - rem; 657 xfer += chunk - rem; 658 if (rem) 659 break; 660 n -= chunk; 661 addr += chunk; 662 } 663 i->count -= xfer; 664 return xfer; 665 } 666 667 /** 668 * _copy_to_iter_mcsafe - copy to user with source-read error exception handling 669 * @addr: source kernel address 670 * @bytes: total transfer length 671 * @iter: destination iterator 672 * 673 * The pmem driver arranges for filesystem-dax to use this facility via 674 * dax_copy_to_iter() for protecting read/write to persistent memory. 675 * Unless / until an architecture can guarantee identical performance 676 * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a 677 * performance regression to switch more users to the mcsafe version. 678 * 679 * Otherwise, the main differences between this and typical _copy_to_iter(). 680 * 681 * * Typical tail/residue handling after a fault retries the copy 682 * byte-by-byte until the fault happens again. Re-triggering machine 683 * checks is potentially fatal so the implementation uses source 684 * alignment and poison alignment assumptions to avoid re-triggering 685 * hardware exceptions. 686 * 687 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. 688 * Compare to copy_to_iter() where only ITER_IOVEC attempts might return 689 * a short copy. 690 * 691 * See MCSAFE_TEST for self-test. 692 */ 693 size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) 694 { 695 const char *from = addr; 696 unsigned long rem, curr_addr, s_addr = (unsigned long) addr; 697 698 if (unlikely(iov_iter_is_pipe(i))) 699 return copy_pipe_to_iter_mcsafe(addr, bytes, i); 700 if (iter_is_iovec(i)) 701 might_fault(); 702 iterate_and_advance(i, bytes, v, 703 copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), 704 ({ 705 rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset, 706 (from += v.bv_len) - v.bv_len, v.bv_len); 707 if (rem) { 708 curr_addr = (unsigned long) from; 709 bytes = curr_addr - s_addr - rem; 710 return bytes; 711 } 712 }), 713 ({ 714 rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, 715 v.iov_len); 716 if (rem) { 717 curr_addr = (unsigned long) from; 718 bytes = curr_addr - s_addr - rem; 719 return bytes; 720 } 721 }) 722 ) 723 724 return bytes; 725 } 726 EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe); 727 #endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */ 728 729 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 730 { 731 char *to = addr; 732 if (unlikely(iov_iter_is_pipe(i))) { 733 WARN_ON(1); 734 return 0; 735 } 736 if (iter_is_iovec(i)) 737 might_fault(); 738 iterate_and_advance(i, bytes, v, 739 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 740 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 741 v.bv_offset, v.bv_len), 742 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 743 ) 744 745 return bytes; 746 } 747 EXPORT_SYMBOL(_copy_from_iter); 748 749 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) 750 { 751 char *to = addr; 752 if (unlikely(iov_iter_is_pipe(i))) { 753 WARN_ON(1); 754 return false; 755 } 756 if (unlikely(i->count < bytes)) 757 return false; 758 759 if (iter_is_iovec(i)) 760 might_fault(); 761 iterate_all_kinds(i, bytes, v, ({ 762 if (copyin((to += v.iov_len) - v.iov_len, 763 v.iov_base, v.iov_len)) 764 return false; 765 0;}), 766 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 767 v.bv_offset, v.bv_len), 768 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 769 ) 770 771 iov_iter_advance(i, bytes); 772 return true; 773 } 774 EXPORT_SYMBOL(_copy_from_iter_full); 775 776 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 777 { 778 char *to = addr; 779 if (unlikely(iov_iter_is_pipe(i))) { 780 WARN_ON(1); 781 return 0; 782 } 783 iterate_and_advance(i, bytes, v, 784 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 785 v.iov_base, v.iov_len), 786 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 787 v.bv_offset, v.bv_len), 788 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 789 ) 790 791 return bytes; 792 } 793 EXPORT_SYMBOL(_copy_from_iter_nocache); 794 795 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 796 /** 797 * _copy_from_iter_flushcache - write destination through cpu cache 798 * @addr: destination kernel address 799 * @bytes: total transfer length 800 * @iter: source iterator 801 * 802 * The pmem driver arranges for filesystem-dax to use this facility via 803 * dax_copy_from_iter() for ensuring that writes to persistent memory 804 * are flushed through the CPU cache. It is differentiated from 805 * _copy_from_iter_nocache() in that guarantees all data is flushed for 806 * all iterator types. The _copy_from_iter_nocache() only attempts to 807 * bypass the cache for the ITER_IOVEC case, and on some archs may use 808 * instructions that strand dirty-data in the cache. 809 */ 810 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) 811 { 812 char *to = addr; 813 if (unlikely(iov_iter_is_pipe(i))) { 814 WARN_ON(1); 815 return 0; 816 } 817 iterate_and_advance(i, bytes, v, 818 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len, 819 v.iov_base, v.iov_len), 820 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, 821 v.bv_offset, v.bv_len), 822 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, 823 v.iov_len) 824 ) 825 826 return bytes; 827 } 828 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache); 829 #endif 830 831 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) 832 { 833 char *to = addr; 834 if (unlikely(iov_iter_is_pipe(i))) { 835 WARN_ON(1); 836 return false; 837 } 838 if (unlikely(i->count < bytes)) 839 return false; 840 iterate_all_kinds(i, bytes, v, ({ 841 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 842 v.iov_base, v.iov_len)) 843 return false; 844 0;}), 845 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 846 v.bv_offset, v.bv_len), 847 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 848 ) 849 850 iov_iter_advance(i, bytes); 851 return true; 852 } 853 EXPORT_SYMBOL(_copy_from_iter_full_nocache); 854 855 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) 856 { 857 struct page *head = compound_head(page); 858 size_t v = n + offset + page_address(page) - page_address(head); 859 860 if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head)))) 861 return true; 862 WARN_ON(1); 863 return false; 864 } 865 866 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 867 struct iov_iter *i) 868 { 869 if (unlikely(!page_copy_sane(page, offset, bytes))) 870 return 0; 871 if (i->type & (ITER_BVEC|ITER_KVEC)) { 872 void *kaddr = kmap_atomic(page); 873 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 874 kunmap_atomic(kaddr); 875 return wanted; 876 } else if (unlikely(iov_iter_is_discard(i))) 877 return bytes; 878 else if (likely(!iov_iter_is_pipe(i))) 879 return copy_page_to_iter_iovec(page, offset, bytes, i); 880 else 881 return copy_page_to_iter_pipe(page, offset, bytes, i); 882 } 883 EXPORT_SYMBOL(copy_page_to_iter); 884 885 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 886 struct iov_iter *i) 887 { 888 if (unlikely(!page_copy_sane(page, offset, bytes))) 889 return 0; 890 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 891 WARN_ON(1); 892 return 0; 893 } 894 if (i->type & (ITER_BVEC|ITER_KVEC)) { 895 void *kaddr = kmap_atomic(page); 896 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); 897 kunmap_atomic(kaddr); 898 return wanted; 899 } else 900 return copy_page_from_iter_iovec(page, offset, bytes, i); 901 } 902 EXPORT_SYMBOL(copy_page_from_iter); 903 904 static size_t pipe_zero(size_t bytes, struct iov_iter *i) 905 { 906 struct pipe_inode_info *pipe = i->pipe; 907 size_t n, off; 908 int idx; 909 910 if (!sanity(i)) 911 return 0; 912 913 bytes = n = push_pipe(i, bytes, &idx, &off); 914 if (unlikely(!n)) 915 return 0; 916 917 for ( ; n; idx = next_idx(idx, pipe), off = 0) { 918 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 919 memzero_page(pipe->bufs[idx].page, off, chunk); 920 i->idx = idx; 921 i->iov_offset = off + chunk; 922 n -= chunk; 923 } 924 i->count -= bytes; 925 return bytes; 926 } 927 928 size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 929 { 930 if (unlikely(iov_iter_is_pipe(i))) 931 return pipe_zero(bytes, i); 932 iterate_and_advance(i, bytes, v, 933 clear_user(v.iov_base, v.iov_len), 934 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 935 memset(v.iov_base, 0, v.iov_len) 936 ) 937 938 return bytes; 939 } 940 EXPORT_SYMBOL(iov_iter_zero); 941 942 size_t iov_iter_copy_from_user_atomic(struct page *page, 943 struct iov_iter *i, unsigned long offset, size_t bytes) 944 { 945 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 946 if (unlikely(!page_copy_sane(page, offset, bytes))) { 947 kunmap_atomic(kaddr); 948 return 0; 949 } 950 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 951 kunmap_atomic(kaddr); 952 WARN_ON(1); 953 return 0; 954 } 955 iterate_all_kinds(i, bytes, v, 956 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 957 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, 958 v.bv_offset, v.bv_len), 959 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 960 ) 961 kunmap_atomic(kaddr); 962 return bytes; 963 } 964 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 965 966 static inline void pipe_truncate(struct iov_iter *i) 967 { 968 struct pipe_inode_info *pipe = i->pipe; 969 if (pipe->nrbufs) { 970 size_t off = i->iov_offset; 971 int idx = i->idx; 972 int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1); 973 if (off) { 974 pipe->bufs[idx].len = off - pipe->bufs[idx].offset; 975 idx = next_idx(idx, pipe); 976 nrbufs++; 977 } 978 while (pipe->nrbufs > nrbufs) { 979 pipe_buf_release(pipe, &pipe->bufs[idx]); 980 idx = next_idx(idx, pipe); 981 pipe->nrbufs--; 982 } 983 } 984 } 985 986 static void pipe_advance(struct iov_iter *i, size_t size) 987 { 988 struct pipe_inode_info *pipe = i->pipe; 989 if (unlikely(i->count < size)) 990 size = i->count; 991 if (size) { 992 struct pipe_buffer *buf; 993 size_t off = i->iov_offset, left = size; 994 int idx = i->idx; 995 if (off) /* make it relative to the beginning of buffer */ 996 left += off - pipe->bufs[idx].offset; 997 while (1) { 998 buf = &pipe->bufs[idx]; 999 if (left <= buf->len) 1000 break; 1001 left -= buf->len; 1002 idx = next_idx(idx, pipe); 1003 } 1004 i->idx = idx; 1005 i->iov_offset = buf->offset + left; 1006 } 1007 i->count -= size; 1008 /* ... and discard everything past that point */ 1009 pipe_truncate(i); 1010 } 1011 1012 void iov_iter_advance(struct iov_iter *i, size_t size) 1013 { 1014 if (unlikely(iov_iter_is_pipe(i))) { 1015 pipe_advance(i, size); 1016 return; 1017 } 1018 if (unlikely(iov_iter_is_discard(i))) { 1019 i->count -= size; 1020 return; 1021 } 1022 iterate_and_advance(i, size, v, 0, 0, 0) 1023 } 1024 EXPORT_SYMBOL(iov_iter_advance); 1025 1026 void iov_iter_revert(struct iov_iter *i, size_t unroll) 1027 { 1028 if (!unroll) 1029 return; 1030 if (WARN_ON(unroll > MAX_RW_COUNT)) 1031 return; 1032 i->count += unroll; 1033 if (unlikely(iov_iter_is_pipe(i))) { 1034 struct pipe_inode_info *pipe = i->pipe; 1035 int idx = i->idx; 1036 size_t off = i->iov_offset; 1037 while (1) { 1038 size_t n = off - pipe->bufs[idx].offset; 1039 if (unroll < n) { 1040 off -= unroll; 1041 break; 1042 } 1043 unroll -= n; 1044 if (!unroll && idx == i->start_idx) { 1045 off = 0; 1046 break; 1047 } 1048 if (!idx--) 1049 idx = pipe->buffers - 1; 1050 off = pipe->bufs[idx].offset + pipe->bufs[idx].len; 1051 } 1052 i->iov_offset = off; 1053 i->idx = idx; 1054 pipe_truncate(i); 1055 return; 1056 } 1057 if (unlikely(iov_iter_is_discard(i))) 1058 return; 1059 if (unroll <= i->iov_offset) { 1060 i->iov_offset -= unroll; 1061 return; 1062 } 1063 unroll -= i->iov_offset; 1064 if (iov_iter_is_bvec(i)) { 1065 const struct bio_vec *bvec = i->bvec; 1066 while (1) { 1067 size_t n = (--bvec)->bv_len; 1068 i->nr_segs++; 1069 if (unroll <= n) { 1070 i->bvec = bvec; 1071 i->iov_offset = n - unroll; 1072 return; 1073 } 1074 unroll -= n; 1075 } 1076 } else { /* same logics for iovec and kvec */ 1077 const struct iovec *iov = i->iov; 1078 while (1) { 1079 size_t n = (--iov)->iov_len; 1080 i->nr_segs++; 1081 if (unroll <= n) { 1082 i->iov = iov; 1083 i->iov_offset = n - unroll; 1084 return; 1085 } 1086 unroll -= n; 1087 } 1088 } 1089 } 1090 EXPORT_SYMBOL(iov_iter_revert); 1091 1092 /* 1093 * Return the count of just the current iov_iter segment. 1094 */ 1095 size_t iov_iter_single_seg_count(const struct iov_iter *i) 1096 { 1097 if (unlikely(iov_iter_is_pipe(i))) 1098 return i->count; // it is a silly place, anyway 1099 if (i->nr_segs == 1) 1100 return i->count; 1101 if (unlikely(iov_iter_is_discard(i))) 1102 return i->count; 1103 else if (iov_iter_is_bvec(i)) 1104 return min(i->count, i->bvec->bv_len - i->iov_offset); 1105 else 1106 return min(i->count, i->iov->iov_len - i->iov_offset); 1107 } 1108 EXPORT_SYMBOL(iov_iter_single_seg_count); 1109 1110 void iov_iter_kvec(struct iov_iter *i, unsigned int direction, 1111 const struct kvec *kvec, unsigned long nr_segs, 1112 size_t count) 1113 { 1114 WARN_ON(direction & ~(READ | WRITE)); 1115 i->type = ITER_KVEC | (direction & (READ | WRITE)); 1116 i->kvec = kvec; 1117 i->nr_segs = nr_segs; 1118 i->iov_offset = 0; 1119 i->count = count; 1120 } 1121 EXPORT_SYMBOL(iov_iter_kvec); 1122 1123 void iov_iter_bvec(struct iov_iter *i, unsigned int direction, 1124 const struct bio_vec *bvec, unsigned long nr_segs, 1125 size_t count) 1126 { 1127 WARN_ON(direction & ~(READ | WRITE)); 1128 i->type = ITER_BVEC | (direction & (READ | WRITE)); 1129 i->bvec = bvec; 1130 i->nr_segs = nr_segs; 1131 i->iov_offset = 0; 1132 i->count = count; 1133 } 1134 EXPORT_SYMBOL(iov_iter_bvec); 1135 1136 void iov_iter_pipe(struct iov_iter *i, unsigned int direction, 1137 struct pipe_inode_info *pipe, 1138 size_t count) 1139 { 1140 BUG_ON(direction != READ); 1141 WARN_ON(pipe->nrbufs == pipe->buffers); 1142 i->type = ITER_PIPE | READ; 1143 i->pipe = pipe; 1144 i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); 1145 i->iov_offset = 0; 1146 i->count = count; 1147 i->start_idx = i->idx; 1148 } 1149 EXPORT_SYMBOL(iov_iter_pipe); 1150 1151 /** 1152 * iov_iter_discard - Initialise an I/O iterator that discards data 1153 * @i: The iterator to initialise. 1154 * @direction: The direction of the transfer. 1155 * @count: The size of the I/O buffer in bytes. 1156 * 1157 * Set up an I/O iterator that just discards everything that's written to it. 1158 * It's only available as a READ iterator. 1159 */ 1160 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) 1161 { 1162 BUG_ON(direction != READ); 1163 i->type = ITER_DISCARD | READ; 1164 i->count = count; 1165 i->iov_offset = 0; 1166 } 1167 EXPORT_SYMBOL(iov_iter_discard); 1168 1169 unsigned long iov_iter_alignment(const struct iov_iter *i) 1170 { 1171 unsigned long res = 0; 1172 size_t size = i->count; 1173 1174 if (unlikely(iov_iter_is_pipe(i))) { 1175 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx])) 1176 return size | i->iov_offset; 1177 return size; 1178 } 1179 iterate_all_kinds(i, size, v, 1180 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 1181 res |= v.bv_offset | v.bv_len, 1182 res |= (unsigned long)v.iov_base | v.iov_len 1183 ) 1184 return res; 1185 } 1186 EXPORT_SYMBOL(iov_iter_alignment); 1187 1188 unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 1189 { 1190 unsigned long res = 0; 1191 size_t size = i->count; 1192 1193 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1194 WARN_ON(1); 1195 return ~0U; 1196 } 1197 1198 iterate_all_kinds(i, size, v, 1199 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1200 (size != v.iov_len ? size : 0), 0), 1201 (res |= (!res ? 0 : (unsigned long)v.bv_offset) | 1202 (size != v.bv_len ? size : 0)), 1203 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1204 (size != v.iov_len ? size : 0)) 1205 ); 1206 return res; 1207 } 1208 EXPORT_SYMBOL(iov_iter_gap_alignment); 1209 1210 static inline ssize_t __pipe_get_pages(struct iov_iter *i, 1211 size_t maxsize, 1212 struct page **pages, 1213 int idx, 1214 size_t *start) 1215 { 1216 struct pipe_inode_info *pipe = i->pipe; 1217 ssize_t n = push_pipe(i, maxsize, &idx, start); 1218 if (!n) 1219 return -EFAULT; 1220 1221 maxsize = n; 1222 n += *start; 1223 while (n > 0) { 1224 get_page(*pages++ = pipe->bufs[idx].page); 1225 idx = next_idx(idx, pipe); 1226 n -= PAGE_SIZE; 1227 } 1228 1229 return maxsize; 1230 } 1231 1232 static ssize_t pipe_get_pages(struct iov_iter *i, 1233 struct page **pages, size_t maxsize, unsigned maxpages, 1234 size_t *start) 1235 { 1236 unsigned npages; 1237 size_t capacity; 1238 int idx; 1239 1240 if (!maxsize) 1241 return 0; 1242 1243 if (!sanity(i)) 1244 return -EFAULT; 1245 1246 data_start(i, &idx, start); 1247 /* some of this one + all after this one */ 1248 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; 1249 capacity = min(npages,maxpages) * PAGE_SIZE - *start; 1250 1251 return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start); 1252 } 1253 1254 ssize_t iov_iter_get_pages(struct iov_iter *i, 1255 struct page **pages, size_t maxsize, unsigned maxpages, 1256 size_t *start) 1257 { 1258 if (maxsize > i->count) 1259 maxsize = i->count; 1260 1261 if (unlikely(iov_iter_is_pipe(i))) 1262 return pipe_get_pages(i, pages, maxsize, maxpages, start); 1263 if (unlikely(iov_iter_is_discard(i))) 1264 return -EFAULT; 1265 1266 iterate_all_kinds(i, maxsize, v, ({ 1267 unsigned long addr = (unsigned long)v.iov_base; 1268 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1269 int n; 1270 int res; 1271 1272 if (len > maxpages * PAGE_SIZE) 1273 len = maxpages * PAGE_SIZE; 1274 addr &= ~(PAGE_SIZE - 1); 1275 n = DIV_ROUND_UP(len, PAGE_SIZE); 1276 res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, pages); 1277 if (unlikely(res < 0)) 1278 return res; 1279 return (res == n ? len : res * PAGE_SIZE) - *start; 1280 0;}),({ 1281 /* can't be more than PAGE_SIZE */ 1282 *start = v.bv_offset; 1283 get_page(*pages = v.bv_page); 1284 return v.bv_len; 1285 }),({ 1286 return -EFAULT; 1287 }) 1288 ) 1289 return 0; 1290 } 1291 EXPORT_SYMBOL(iov_iter_get_pages); 1292 1293 static struct page **get_pages_array(size_t n) 1294 { 1295 return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); 1296 } 1297 1298 static ssize_t pipe_get_pages_alloc(struct iov_iter *i, 1299 struct page ***pages, size_t maxsize, 1300 size_t *start) 1301 { 1302 struct page **p; 1303 ssize_t n; 1304 int idx; 1305 int npages; 1306 1307 if (!maxsize) 1308 return 0; 1309 1310 if (!sanity(i)) 1311 return -EFAULT; 1312 1313 data_start(i, &idx, start); 1314 /* some of this one + all after this one */ 1315 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; 1316 n = npages * PAGE_SIZE - *start; 1317 if (maxsize > n) 1318 maxsize = n; 1319 else 1320 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 1321 p = get_pages_array(npages); 1322 if (!p) 1323 return -ENOMEM; 1324 n = __pipe_get_pages(i, maxsize, p, idx, start); 1325 if (n > 0) 1326 *pages = p; 1327 else 1328 kvfree(p); 1329 return n; 1330 } 1331 1332 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 1333 struct page ***pages, size_t maxsize, 1334 size_t *start) 1335 { 1336 struct page **p; 1337 1338 if (maxsize > i->count) 1339 maxsize = i->count; 1340 1341 if (unlikely(iov_iter_is_pipe(i))) 1342 return pipe_get_pages_alloc(i, pages, maxsize, start); 1343 if (unlikely(iov_iter_is_discard(i))) 1344 return -EFAULT; 1345 1346 iterate_all_kinds(i, maxsize, v, ({ 1347 unsigned long addr = (unsigned long)v.iov_base; 1348 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1349 int n; 1350 int res; 1351 1352 addr &= ~(PAGE_SIZE - 1); 1353 n = DIV_ROUND_UP(len, PAGE_SIZE); 1354 p = get_pages_array(n); 1355 if (!p) 1356 return -ENOMEM; 1357 res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, p); 1358 if (unlikely(res < 0)) { 1359 kvfree(p); 1360 return res; 1361 } 1362 *pages = p; 1363 return (res == n ? len : res * PAGE_SIZE) - *start; 1364 0;}),({ 1365 /* can't be more than PAGE_SIZE */ 1366 *start = v.bv_offset; 1367 *pages = p = get_pages_array(1); 1368 if (!p) 1369 return -ENOMEM; 1370 get_page(*p = v.bv_page); 1371 return v.bv_len; 1372 }),({ 1373 return -EFAULT; 1374 }) 1375 ) 1376 return 0; 1377 } 1378 EXPORT_SYMBOL(iov_iter_get_pages_alloc); 1379 1380 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, 1381 struct iov_iter *i) 1382 { 1383 char *to = addr; 1384 __wsum sum, next; 1385 size_t off = 0; 1386 sum = *csum; 1387 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1388 WARN_ON(1); 1389 return 0; 1390 } 1391 iterate_and_advance(i, bytes, v, ({ 1392 int err = 0; 1393 next = csum_and_copy_from_user(v.iov_base, 1394 (to += v.iov_len) - v.iov_len, 1395 v.iov_len, 0, &err); 1396 if (!err) { 1397 sum = csum_block_add(sum, next, off); 1398 off += v.iov_len; 1399 } 1400 err ? v.iov_len : 0; 1401 }), ({ 1402 char *p = kmap_atomic(v.bv_page); 1403 next = csum_partial_copy_nocheck(p + v.bv_offset, 1404 (to += v.bv_len) - v.bv_len, 1405 v.bv_len, 0); 1406 kunmap_atomic(p); 1407 sum = csum_block_add(sum, next, off); 1408 off += v.bv_len; 1409 }),({ 1410 next = csum_partial_copy_nocheck(v.iov_base, 1411 (to += v.iov_len) - v.iov_len, 1412 v.iov_len, 0); 1413 sum = csum_block_add(sum, next, off); 1414 off += v.iov_len; 1415 }) 1416 ) 1417 *csum = sum; 1418 return bytes; 1419 } 1420 EXPORT_SYMBOL(csum_and_copy_from_iter); 1421 1422 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, 1423 struct iov_iter *i) 1424 { 1425 char *to = addr; 1426 __wsum sum, next; 1427 size_t off = 0; 1428 sum = *csum; 1429 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1430 WARN_ON(1); 1431 return false; 1432 } 1433 if (unlikely(i->count < bytes)) 1434 return false; 1435 iterate_all_kinds(i, bytes, v, ({ 1436 int err = 0; 1437 next = csum_and_copy_from_user(v.iov_base, 1438 (to += v.iov_len) - v.iov_len, 1439 v.iov_len, 0, &err); 1440 if (err) 1441 return false; 1442 sum = csum_block_add(sum, next, off); 1443 off += v.iov_len; 1444 0; 1445 }), ({ 1446 char *p = kmap_atomic(v.bv_page); 1447 next = csum_partial_copy_nocheck(p + v.bv_offset, 1448 (to += v.bv_len) - v.bv_len, 1449 v.bv_len, 0); 1450 kunmap_atomic(p); 1451 sum = csum_block_add(sum, next, off); 1452 off += v.bv_len; 1453 }),({ 1454 next = csum_partial_copy_nocheck(v.iov_base, 1455 (to += v.iov_len) - v.iov_len, 1456 v.iov_len, 0); 1457 sum = csum_block_add(sum, next, off); 1458 off += v.iov_len; 1459 }) 1460 ) 1461 *csum = sum; 1462 iov_iter_advance(i, bytes); 1463 return true; 1464 } 1465 EXPORT_SYMBOL(csum_and_copy_from_iter_full); 1466 1467 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, 1468 struct iov_iter *i) 1469 { 1470 const char *from = addr; 1471 __wsum sum, next; 1472 size_t off = 0; 1473 1474 if (unlikely(iov_iter_is_pipe(i))) 1475 return csum_and_copy_to_pipe_iter(addr, bytes, csum, i); 1476 1477 sum = *csum; 1478 if (unlikely(iov_iter_is_discard(i))) { 1479 WARN_ON(1); /* for now */ 1480 return 0; 1481 } 1482 iterate_and_advance(i, bytes, v, ({ 1483 int err = 0; 1484 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1485 v.iov_base, 1486 v.iov_len, 0, &err); 1487 if (!err) { 1488 sum = csum_block_add(sum, next, off); 1489 off += v.iov_len; 1490 } 1491 err ? v.iov_len : 0; 1492 }), ({ 1493 char *p = kmap_atomic(v.bv_page); 1494 next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len, 1495 p + v.bv_offset, 1496 v.bv_len, 0); 1497 kunmap_atomic(p); 1498 sum = csum_block_add(sum, next, off); 1499 off += v.bv_len; 1500 }),({ 1501 next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len, 1502 v.iov_base, 1503 v.iov_len, 0); 1504 sum = csum_block_add(sum, next, off); 1505 off += v.iov_len; 1506 }) 1507 ) 1508 *csum = sum; 1509 return bytes; 1510 } 1511 EXPORT_SYMBOL(csum_and_copy_to_iter); 1512 1513 int iov_iter_npages(const struct iov_iter *i, int maxpages) 1514 { 1515 size_t size = i->count; 1516 int npages = 0; 1517 1518 if (!size) 1519 return 0; 1520 if (unlikely(iov_iter_is_discard(i))) 1521 return 0; 1522 1523 if (unlikely(iov_iter_is_pipe(i))) { 1524 struct pipe_inode_info *pipe = i->pipe; 1525 size_t off; 1526 int idx; 1527 1528 if (!sanity(i)) 1529 return 0; 1530 1531 data_start(i, &idx, &off); 1532 /* some of this one + all after this one */ 1533 npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1; 1534 if (npages >= maxpages) 1535 return maxpages; 1536 } else iterate_all_kinds(i, size, v, ({ 1537 unsigned long p = (unsigned long)v.iov_base; 1538 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1539 - p / PAGE_SIZE; 1540 if (npages >= maxpages) 1541 return maxpages; 1542 0;}),({ 1543 npages++; 1544 if (npages >= maxpages) 1545 return maxpages; 1546 }),({ 1547 unsigned long p = (unsigned long)v.iov_base; 1548 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1549 - p / PAGE_SIZE; 1550 if (npages >= maxpages) 1551 return maxpages; 1552 }) 1553 ) 1554 return npages; 1555 } 1556 EXPORT_SYMBOL(iov_iter_npages); 1557 1558 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1559 { 1560 *new = *old; 1561 if (unlikely(iov_iter_is_pipe(new))) { 1562 WARN_ON(1); 1563 return NULL; 1564 } 1565 if (unlikely(iov_iter_is_discard(new))) 1566 return NULL; 1567 if (iov_iter_is_bvec(new)) 1568 return new->bvec = kmemdup(new->bvec, 1569 new->nr_segs * sizeof(struct bio_vec), 1570 flags); 1571 else 1572 /* iovec and kvec have identical layout */ 1573 return new->iov = kmemdup(new->iov, 1574 new->nr_segs * sizeof(struct iovec), 1575 flags); 1576 } 1577 EXPORT_SYMBOL(dup_iter); 1578 1579 /** 1580 * import_iovec() - Copy an array of &struct iovec from userspace 1581 * into the kernel, check that it is valid, and initialize a new 1582 * &struct iov_iter iterator to access it. 1583 * 1584 * @type: One of %READ or %WRITE. 1585 * @uvector: Pointer to the userspace array. 1586 * @nr_segs: Number of elements in userspace array. 1587 * @fast_segs: Number of elements in @iov. 1588 * @iov: (input and output parameter) Pointer to pointer to (usually small 1589 * on-stack) kernel array. 1590 * @i: Pointer to iterator that will be initialized on success. 1591 * 1592 * If the array pointed to by *@iov is large enough to hold all @nr_segs, 1593 * then this function places %NULL in *@iov on return. Otherwise, a new 1594 * array will be allocated and the result placed in *@iov. This means that 1595 * the caller may call kfree() on *@iov regardless of whether the small 1596 * on-stack array was used or not (and regardless of whether this function 1597 * returns an error or not). 1598 * 1599 * Return: 0 on success or negative error code on error. 1600 */ 1601 int import_iovec(int type, const struct iovec __user * uvector, 1602 unsigned nr_segs, unsigned fast_segs, 1603 struct iovec **iov, struct iov_iter *i) 1604 { 1605 ssize_t n; 1606 struct iovec *p; 1607 n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, 1608 *iov, &p); 1609 if (n < 0) { 1610 if (p != *iov) 1611 kfree(p); 1612 *iov = NULL; 1613 return n; 1614 } 1615 iov_iter_init(i, type, p, nr_segs, n); 1616 *iov = p == *iov ? NULL : p; 1617 return 0; 1618 } 1619 EXPORT_SYMBOL(import_iovec); 1620 1621 #ifdef CONFIG_COMPAT 1622 #include <linux/compat.h> 1623 1624 int compat_import_iovec(int type, const struct compat_iovec __user * uvector, 1625 unsigned nr_segs, unsigned fast_segs, 1626 struct iovec **iov, struct iov_iter *i) 1627 { 1628 ssize_t n; 1629 struct iovec *p; 1630 n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, 1631 *iov, &p); 1632 if (n < 0) { 1633 if (p != *iov) 1634 kfree(p); 1635 *iov = NULL; 1636 return n; 1637 } 1638 iov_iter_init(i, type, p, nr_segs, n); 1639 *iov = p == *iov ? NULL : p; 1640 return 0; 1641 } 1642 #endif 1643 1644 int import_single_range(int rw, void __user *buf, size_t len, 1645 struct iovec *iov, struct iov_iter *i) 1646 { 1647 if (len > MAX_RW_COUNT) 1648 len = MAX_RW_COUNT; 1649 if (unlikely(!access_ok(!rw, buf, len))) 1650 return -EFAULT; 1651 1652 iov->iov_base = buf; 1653 iov->iov_len = len; 1654 iov_iter_init(i, rw, iov, 1, len); 1655 return 0; 1656 } 1657 EXPORT_SYMBOL(import_single_range); 1658 1659 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, 1660 int (*f)(struct kvec *vec, void *context), 1661 void *context) 1662 { 1663 struct kvec w; 1664 int err = -EINVAL; 1665 if (!bytes) 1666 return 0; 1667 1668 iterate_all_kinds(i, bytes, v, -EINVAL, ({ 1669 w.iov_base = kmap(v.bv_page) + v.bv_offset; 1670 w.iov_len = v.bv_len; 1671 err = f(&w, context); 1672 kunmap(v.bv_page); 1673 err;}), ({ 1674 w = v; 1675 err = f(&w, context);}) 1676 ) 1677 return err; 1678 } 1679 EXPORT_SYMBOL(iov_iter_for_each_range); 1680