1 #include <linux/export.h> 2 #include <linux/bvec.h> 3 #include <linux/uio.h> 4 #include <linux/pagemap.h> 5 #include <linux/slab.h> 6 #include <linux/vmalloc.h> 7 #include <linux/splice.h> 8 #include <net/checksum.h> 9 10 #define PIPE_PARANOIA /* for now */ 11 12 #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 13 size_t left; \ 14 size_t wanted = n; \ 15 __p = i->iov; \ 16 __v.iov_len = min(n, __p->iov_len - skip); \ 17 if (likely(__v.iov_len)) { \ 18 __v.iov_base = __p->iov_base + skip; \ 19 left = (STEP); \ 20 __v.iov_len -= left; \ 21 skip += __v.iov_len; \ 22 n -= __v.iov_len; \ 23 } else { \ 24 left = 0; \ 25 } \ 26 while (unlikely(!left && n)) { \ 27 __p++; \ 28 __v.iov_len = min(n, __p->iov_len); \ 29 if (unlikely(!__v.iov_len)) \ 30 continue; \ 31 __v.iov_base = __p->iov_base; \ 32 left = (STEP); \ 33 __v.iov_len -= left; \ 34 skip = __v.iov_len; \ 35 n -= __v.iov_len; \ 36 } \ 37 n = wanted - n; \ 38 } 39 40 #define iterate_kvec(i, n, __v, __p, skip, STEP) { \ 41 size_t wanted = n; \ 42 __p = i->kvec; \ 43 __v.iov_len = min(n, __p->iov_len - skip); \ 44 if (likely(__v.iov_len)) { \ 45 __v.iov_base = __p->iov_base + skip; \ 46 (void)(STEP); \ 47 skip += __v.iov_len; \ 48 n -= __v.iov_len; \ 49 } \ 50 while (unlikely(n)) { \ 51 __p++; \ 52 __v.iov_len = min(n, __p->iov_len); \ 53 if (unlikely(!__v.iov_len)) \ 54 continue; \ 55 __v.iov_base = __p->iov_base; \ 56 (void)(STEP); \ 57 skip = __v.iov_len; \ 58 n -= __v.iov_len; \ 59 } \ 60 n = wanted; \ 61 } 62 63 #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ 64 struct bvec_iter __start; \ 65 __start.bi_size = n; \ 66 __start.bi_bvec_done = skip; \ 67 __start.bi_idx = 0; \ 68 for_each_bvec(__v, i->bvec, __bi, __start) { \ 69 if (!__v.bv_len) \ 70 continue; \ 71 (void)(STEP); \ 72 } \ 73 } 74 75 #define iterate_all_kinds(i, n, v, I, B, K) { \ 76 size_t skip = i->iov_offset; \ 77 if (unlikely(i->type & ITER_BVEC)) { \ 78 struct bio_vec v; \ 79 struct bvec_iter __bi; \ 80 iterate_bvec(i, n, v, __bi, skip, (B)) \ 81 } else if (unlikely(i->type & ITER_KVEC)) { \ 82 const struct kvec *kvec; \ 83 struct kvec v; \ 84 iterate_kvec(i, n, v, kvec, skip, (K)) \ 85 } else { \ 86 const struct iovec *iov; \ 87 struct iovec v; \ 88 iterate_iovec(i, n, v, iov, skip, (I)) \ 89 } \ 90 } 91 92 #define iterate_and_advance(i, n, v, I, B, K) { \ 93 if (unlikely(i->count < n)) \ 94 n = i->count; \ 95 if (i->count) { \ 96 size_t skip = i->iov_offset; \ 97 if (unlikely(i->type & ITER_BVEC)) { \ 98 const struct bio_vec *bvec = i->bvec; \ 99 struct bio_vec v; \ 100 struct bvec_iter __bi; \ 101 iterate_bvec(i, n, v, __bi, skip, (B)) \ 102 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \ 103 i->nr_segs -= i->bvec - bvec; \ 104 skip = __bi.bi_bvec_done; \ 105 } else if (unlikely(i->type & ITER_KVEC)) { \ 106 const struct kvec *kvec; \ 107 struct kvec v; \ 108 iterate_kvec(i, n, v, kvec, skip, (K)) \ 109 if (skip == kvec->iov_len) { \ 110 kvec++; \ 111 skip = 0; \ 112 } \ 113 i->nr_segs -= kvec - i->kvec; \ 114 i->kvec = kvec; \ 115 } else { \ 116 const struct iovec *iov; \ 117 struct iovec v; \ 118 iterate_iovec(i, n, v, iov, skip, (I)) \ 119 if (skip == iov->iov_len) { \ 120 iov++; \ 121 skip = 0; \ 122 } \ 123 i->nr_segs -= iov - i->iov; \ 124 i->iov = iov; \ 125 } \ 126 i->count -= n; \ 127 i->iov_offset = skip; \ 128 } \ 129 } 130 131 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, 132 struct iov_iter *i) 133 { 134 size_t skip, copy, left, wanted; 135 const struct iovec *iov; 136 char __user *buf; 137 void *kaddr, *from; 138 139 if (unlikely(bytes > i->count)) 140 bytes = i->count; 141 142 if (unlikely(!bytes)) 143 return 0; 144 145 wanted = bytes; 146 iov = i->iov; 147 skip = i->iov_offset; 148 buf = iov->iov_base + skip; 149 copy = min(bytes, iov->iov_len - skip); 150 151 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) { 152 kaddr = kmap_atomic(page); 153 from = kaddr + offset; 154 155 /* first chunk, usually the only one */ 156 left = __copy_to_user_inatomic(buf, from, copy); 157 copy -= left; 158 skip += copy; 159 from += copy; 160 bytes -= copy; 161 162 while (unlikely(!left && bytes)) { 163 iov++; 164 buf = iov->iov_base; 165 copy = min(bytes, iov->iov_len); 166 left = __copy_to_user_inatomic(buf, from, copy); 167 copy -= left; 168 skip = copy; 169 from += copy; 170 bytes -= copy; 171 } 172 if (likely(!bytes)) { 173 kunmap_atomic(kaddr); 174 goto done; 175 } 176 offset = from - kaddr; 177 buf += copy; 178 kunmap_atomic(kaddr); 179 copy = min(bytes, iov->iov_len - skip); 180 } 181 /* Too bad - revert to non-atomic kmap */ 182 183 kaddr = kmap(page); 184 from = kaddr + offset; 185 left = __copy_to_user(buf, from, copy); 186 copy -= left; 187 skip += copy; 188 from += copy; 189 bytes -= copy; 190 while (unlikely(!left && bytes)) { 191 iov++; 192 buf = iov->iov_base; 193 copy = min(bytes, iov->iov_len); 194 left = __copy_to_user(buf, from, copy); 195 copy -= left; 196 skip = copy; 197 from += copy; 198 bytes -= copy; 199 } 200 kunmap(page); 201 202 done: 203 if (skip == iov->iov_len) { 204 iov++; 205 skip = 0; 206 } 207 i->count -= wanted - bytes; 208 i->nr_segs -= iov - i->iov; 209 i->iov = iov; 210 i->iov_offset = skip; 211 return wanted - bytes; 212 } 213 214 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, 215 struct iov_iter *i) 216 { 217 size_t skip, copy, left, wanted; 218 const struct iovec *iov; 219 char __user *buf; 220 void *kaddr, *to; 221 222 if (unlikely(bytes > i->count)) 223 bytes = i->count; 224 225 if (unlikely(!bytes)) 226 return 0; 227 228 wanted = bytes; 229 iov = i->iov; 230 skip = i->iov_offset; 231 buf = iov->iov_base + skip; 232 copy = min(bytes, iov->iov_len - skip); 233 234 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) { 235 kaddr = kmap_atomic(page); 236 to = kaddr + offset; 237 238 /* first chunk, usually the only one */ 239 left = __copy_from_user_inatomic(to, buf, copy); 240 copy -= left; 241 skip += copy; 242 to += copy; 243 bytes -= copy; 244 245 while (unlikely(!left && bytes)) { 246 iov++; 247 buf = iov->iov_base; 248 copy = min(bytes, iov->iov_len); 249 left = __copy_from_user_inatomic(to, buf, copy); 250 copy -= left; 251 skip = copy; 252 to += copy; 253 bytes -= copy; 254 } 255 if (likely(!bytes)) { 256 kunmap_atomic(kaddr); 257 goto done; 258 } 259 offset = to - kaddr; 260 buf += copy; 261 kunmap_atomic(kaddr); 262 copy = min(bytes, iov->iov_len - skip); 263 } 264 /* Too bad - revert to non-atomic kmap */ 265 266 kaddr = kmap(page); 267 to = kaddr + offset; 268 left = __copy_from_user(to, buf, copy); 269 copy -= left; 270 skip += copy; 271 to += copy; 272 bytes -= copy; 273 while (unlikely(!left && bytes)) { 274 iov++; 275 buf = iov->iov_base; 276 copy = min(bytes, iov->iov_len); 277 left = __copy_from_user(to, buf, copy); 278 copy -= left; 279 skip = copy; 280 to += copy; 281 bytes -= copy; 282 } 283 kunmap(page); 284 285 done: 286 if (skip == iov->iov_len) { 287 iov++; 288 skip = 0; 289 } 290 i->count -= wanted - bytes; 291 i->nr_segs -= iov - i->iov; 292 i->iov = iov; 293 i->iov_offset = skip; 294 return wanted - bytes; 295 } 296 297 #ifdef PIPE_PARANOIA 298 static bool sanity(const struct iov_iter *i) 299 { 300 struct pipe_inode_info *pipe = i->pipe; 301 int idx = i->idx; 302 int next = pipe->curbuf + pipe->nrbufs; 303 if (i->iov_offset) { 304 struct pipe_buffer *p; 305 if (unlikely(!pipe->nrbufs)) 306 goto Bad; // pipe must be non-empty 307 if (unlikely(idx != ((next - 1) & (pipe->buffers - 1)))) 308 goto Bad; // must be at the last buffer... 309 310 p = &pipe->bufs[idx]; 311 if (unlikely(p->offset + p->len != i->iov_offset)) 312 goto Bad; // ... at the end of segment 313 } else { 314 if (idx != (next & (pipe->buffers - 1))) 315 goto Bad; // must be right after the last buffer 316 } 317 return true; 318 Bad: 319 printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset); 320 printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n", 321 pipe->curbuf, pipe->nrbufs, pipe->buffers); 322 for (idx = 0; idx < pipe->buffers; idx++) 323 printk(KERN_ERR "[%p %p %d %d]\n", 324 pipe->bufs[idx].ops, 325 pipe->bufs[idx].page, 326 pipe->bufs[idx].offset, 327 pipe->bufs[idx].len); 328 WARN_ON(1); 329 return false; 330 } 331 #else 332 #define sanity(i) true 333 #endif 334 335 static inline int next_idx(int idx, struct pipe_inode_info *pipe) 336 { 337 return (idx + 1) & (pipe->buffers - 1); 338 } 339 340 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, 341 struct iov_iter *i) 342 { 343 struct pipe_inode_info *pipe = i->pipe; 344 struct pipe_buffer *buf; 345 size_t off; 346 int idx; 347 348 if (unlikely(bytes > i->count)) 349 bytes = i->count; 350 351 if (unlikely(!bytes)) 352 return 0; 353 354 if (!sanity(i)) 355 return 0; 356 357 off = i->iov_offset; 358 idx = i->idx; 359 buf = &pipe->bufs[idx]; 360 if (off) { 361 if (offset == off && buf->page == page) { 362 /* merge with the last one */ 363 buf->len += bytes; 364 i->iov_offset += bytes; 365 goto out; 366 } 367 idx = next_idx(idx, pipe); 368 buf = &pipe->bufs[idx]; 369 } 370 if (idx == pipe->curbuf && pipe->nrbufs) 371 return 0; 372 pipe->nrbufs++; 373 buf->ops = &page_cache_pipe_buf_ops; 374 get_page(buf->page = page); 375 buf->offset = offset; 376 buf->len = bytes; 377 i->iov_offset = offset + bytes; 378 i->idx = idx; 379 out: 380 i->count -= bytes; 381 return bytes; 382 } 383 384 /* 385 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 386 * bytes. For each iovec, fault in each page that constitutes the iovec. 387 * 388 * Return 0 on success, or non-zero if the memory could not be accessed (i.e. 389 * because it is an invalid address). 390 */ 391 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 392 { 393 size_t skip = i->iov_offset; 394 const struct iovec *iov; 395 int err; 396 struct iovec v; 397 398 if (!(i->type & (ITER_BVEC|ITER_KVEC))) { 399 iterate_iovec(i, bytes, v, iov, skip, ({ 400 err = fault_in_pages_readable(v.iov_base, v.iov_len); 401 if (unlikely(err)) 402 return err; 403 0;})) 404 } 405 return 0; 406 } 407 EXPORT_SYMBOL(iov_iter_fault_in_readable); 408 409 void iov_iter_init(struct iov_iter *i, int direction, 410 const struct iovec *iov, unsigned long nr_segs, 411 size_t count) 412 { 413 /* It will get better. Eventually... */ 414 if (segment_eq(get_fs(), KERNEL_DS)) { 415 direction |= ITER_KVEC; 416 i->type = direction; 417 i->kvec = (struct kvec *)iov; 418 } else { 419 i->type = direction; 420 i->iov = iov; 421 } 422 i->nr_segs = nr_segs; 423 i->iov_offset = 0; 424 i->count = count; 425 } 426 EXPORT_SYMBOL(iov_iter_init); 427 428 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) 429 { 430 char *from = kmap_atomic(page); 431 memcpy(to, from + offset, len); 432 kunmap_atomic(from); 433 } 434 435 static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) 436 { 437 char *to = kmap_atomic(page); 438 memcpy(to + offset, from, len); 439 kunmap_atomic(to); 440 } 441 442 static void memzero_page(struct page *page, size_t offset, size_t len) 443 { 444 char *addr = kmap_atomic(page); 445 memset(addr + offset, 0, len); 446 kunmap_atomic(addr); 447 } 448 449 static inline bool allocated(struct pipe_buffer *buf) 450 { 451 return buf->ops == &default_pipe_buf_ops; 452 } 453 454 static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp) 455 { 456 size_t off = i->iov_offset; 457 int idx = i->idx; 458 if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) { 459 idx = next_idx(idx, i->pipe); 460 off = 0; 461 } 462 *idxp = idx; 463 *offp = off; 464 } 465 466 static size_t push_pipe(struct iov_iter *i, size_t size, 467 int *idxp, size_t *offp) 468 { 469 struct pipe_inode_info *pipe = i->pipe; 470 size_t off; 471 int idx; 472 ssize_t left; 473 474 if (unlikely(size > i->count)) 475 size = i->count; 476 if (unlikely(!size)) 477 return 0; 478 479 left = size; 480 data_start(i, &idx, &off); 481 *idxp = idx; 482 *offp = off; 483 if (off) { 484 left -= PAGE_SIZE - off; 485 if (left <= 0) { 486 pipe->bufs[idx].len += size; 487 return size; 488 } 489 pipe->bufs[idx].len = PAGE_SIZE; 490 idx = next_idx(idx, pipe); 491 } 492 while (idx != pipe->curbuf || !pipe->nrbufs) { 493 struct page *page = alloc_page(GFP_USER); 494 if (!page) 495 break; 496 pipe->nrbufs++; 497 pipe->bufs[idx].ops = &default_pipe_buf_ops; 498 pipe->bufs[idx].page = page; 499 pipe->bufs[idx].offset = 0; 500 if (left <= PAGE_SIZE) { 501 pipe->bufs[idx].len = left; 502 return size; 503 } 504 pipe->bufs[idx].len = PAGE_SIZE; 505 left -= PAGE_SIZE; 506 idx = next_idx(idx, pipe); 507 } 508 return size - left; 509 } 510 511 static size_t copy_pipe_to_iter(const void *addr, size_t bytes, 512 struct iov_iter *i) 513 { 514 struct pipe_inode_info *pipe = i->pipe; 515 size_t n, off; 516 int idx; 517 518 if (!sanity(i)) 519 return 0; 520 521 bytes = n = push_pipe(i, bytes, &idx, &off); 522 if (unlikely(!n)) 523 return 0; 524 for ( ; n; idx = next_idx(idx, pipe), off = 0) { 525 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 526 memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk); 527 i->idx = idx; 528 i->iov_offset = off + chunk; 529 n -= chunk; 530 addr += chunk; 531 } 532 i->count -= bytes; 533 return bytes; 534 } 535 536 size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 537 { 538 const char *from = addr; 539 if (unlikely(i->type & ITER_PIPE)) 540 return copy_pipe_to_iter(addr, bytes, i); 541 iterate_and_advance(i, bytes, v, 542 __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, 543 v.iov_len), 544 memcpy_to_page(v.bv_page, v.bv_offset, 545 (from += v.bv_len) - v.bv_len, v.bv_len), 546 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) 547 ) 548 549 return bytes; 550 } 551 EXPORT_SYMBOL(copy_to_iter); 552 553 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 554 { 555 char *to = addr; 556 if (unlikely(i->type & ITER_PIPE)) { 557 WARN_ON(1); 558 return 0; 559 } 560 iterate_and_advance(i, bytes, v, 561 __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, 562 v.iov_len), 563 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 564 v.bv_offset, v.bv_len), 565 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 566 ) 567 568 return bytes; 569 } 570 EXPORT_SYMBOL(copy_from_iter); 571 572 bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) 573 { 574 char *to = addr; 575 if (unlikely(i->type & ITER_PIPE)) { 576 WARN_ON(1); 577 return false; 578 } 579 if (unlikely(i->count < bytes)) \ 580 return false; 581 582 iterate_all_kinds(i, bytes, v, ({ 583 if (__copy_from_user((to += v.iov_len) - v.iov_len, 584 v.iov_base, v.iov_len)) 585 return false; 586 0;}), 587 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 588 v.bv_offset, v.bv_len), 589 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 590 ) 591 592 iov_iter_advance(i, bytes); 593 return true; 594 } 595 EXPORT_SYMBOL(copy_from_iter_full); 596 597 size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 598 { 599 char *to = addr; 600 if (unlikely(i->type & ITER_PIPE)) { 601 WARN_ON(1); 602 return 0; 603 } 604 iterate_and_advance(i, bytes, v, 605 __copy_from_user_nocache((to += v.iov_len) - v.iov_len, 606 v.iov_base, v.iov_len), 607 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 608 v.bv_offset, v.bv_len), 609 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 610 ) 611 612 return bytes; 613 } 614 EXPORT_SYMBOL(copy_from_iter_nocache); 615 616 bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) 617 { 618 char *to = addr; 619 if (unlikely(i->type & ITER_PIPE)) { 620 WARN_ON(1); 621 return false; 622 } 623 if (unlikely(i->count < bytes)) \ 624 return false; 625 iterate_all_kinds(i, bytes, v, ({ 626 if (__copy_from_user_nocache((to += v.iov_len) - v.iov_len, 627 v.iov_base, v.iov_len)) 628 return false; 629 0;}), 630 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 631 v.bv_offset, v.bv_len), 632 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 633 ) 634 635 iov_iter_advance(i, bytes); 636 return true; 637 } 638 EXPORT_SYMBOL(copy_from_iter_full_nocache); 639 640 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 641 struct iov_iter *i) 642 { 643 if (i->type & (ITER_BVEC|ITER_KVEC)) { 644 void *kaddr = kmap_atomic(page); 645 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 646 kunmap_atomic(kaddr); 647 return wanted; 648 } else if (likely(!(i->type & ITER_PIPE))) 649 return copy_page_to_iter_iovec(page, offset, bytes, i); 650 else 651 return copy_page_to_iter_pipe(page, offset, bytes, i); 652 } 653 EXPORT_SYMBOL(copy_page_to_iter); 654 655 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 656 struct iov_iter *i) 657 { 658 if (unlikely(i->type & ITER_PIPE)) { 659 WARN_ON(1); 660 return 0; 661 } 662 if (i->type & (ITER_BVEC|ITER_KVEC)) { 663 void *kaddr = kmap_atomic(page); 664 size_t wanted = copy_from_iter(kaddr + offset, bytes, i); 665 kunmap_atomic(kaddr); 666 return wanted; 667 } else 668 return copy_page_from_iter_iovec(page, offset, bytes, i); 669 } 670 EXPORT_SYMBOL(copy_page_from_iter); 671 672 static size_t pipe_zero(size_t bytes, struct iov_iter *i) 673 { 674 struct pipe_inode_info *pipe = i->pipe; 675 size_t n, off; 676 int idx; 677 678 if (!sanity(i)) 679 return 0; 680 681 bytes = n = push_pipe(i, bytes, &idx, &off); 682 if (unlikely(!n)) 683 return 0; 684 685 for ( ; n; idx = next_idx(idx, pipe), off = 0) { 686 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 687 memzero_page(pipe->bufs[idx].page, off, chunk); 688 i->idx = idx; 689 i->iov_offset = off + chunk; 690 n -= chunk; 691 } 692 i->count -= bytes; 693 return bytes; 694 } 695 696 size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 697 { 698 if (unlikely(i->type & ITER_PIPE)) 699 return pipe_zero(bytes, i); 700 iterate_and_advance(i, bytes, v, 701 __clear_user(v.iov_base, v.iov_len), 702 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 703 memset(v.iov_base, 0, v.iov_len) 704 ) 705 706 return bytes; 707 } 708 EXPORT_SYMBOL(iov_iter_zero); 709 710 size_t iov_iter_copy_from_user_atomic(struct page *page, 711 struct iov_iter *i, unsigned long offset, size_t bytes) 712 { 713 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 714 if (unlikely(i->type & ITER_PIPE)) { 715 kunmap_atomic(kaddr); 716 WARN_ON(1); 717 return 0; 718 } 719 iterate_all_kinds(i, bytes, v, 720 __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, 721 v.iov_base, v.iov_len), 722 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, 723 v.bv_offset, v.bv_len), 724 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 725 ) 726 kunmap_atomic(kaddr); 727 return bytes; 728 } 729 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 730 731 static void pipe_advance(struct iov_iter *i, size_t size) 732 { 733 struct pipe_inode_info *pipe = i->pipe; 734 struct pipe_buffer *buf; 735 int idx = i->idx; 736 size_t off = i->iov_offset, orig_sz; 737 738 if (unlikely(i->count < size)) 739 size = i->count; 740 orig_sz = size; 741 742 if (size) { 743 if (off) /* make it relative to the beginning of buffer */ 744 size += off - pipe->bufs[idx].offset; 745 while (1) { 746 buf = &pipe->bufs[idx]; 747 if (size <= buf->len) 748 break; 749 size -= buf->len; 750 idx = next_idx(idx, pipe); 751 } 752 buf->len = size; 753 i->idx = idx; 754 off = i->iov_offset = buf->offset + size; 755 } 756 if (off) 757 idx = next_idx(idx, pipe); 758 if (pipe->nrbufs) { 759 int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); 760 /* [curbuf,unused) is in use. Free [idx,unused) */ 761 while (idx != unused) { 762 pipe_buf_release(pipe, &pipe->bufs[idx]); 763 idx = next_idx(idx, pipe); 764 pipe->nrbufs--; 765 } 766 } 767 i->count -= orig_sz; 768 } 769 770 void iov_iter_advance(struct iov_iter *i, size_t size) 771 { 772 if (unlikely(i->type & ITER_PIPE)) { 773 pipe_advance(i, size); 774 return; 775 } 776 iterate_and_advance(i, size, v, 0, 0, 0) 777 } 778 EXPORT_SYMBOL(iov_iter_advance); 779 780 /* 781 * Return the count of just the current iov_iter segment. 782 */ 783 size_t iov_iter_single_seg_count(const struct iov_iter *i) 784 { 785 if (unlikely(i->type & ITER_PIPE)) 786 return i->count; // it is a silly place, anyway 787 if (i->nr_segs == 1) 788 return i->count; 789 else if (i->type & ITER_BVEC) 790 return min(i->count, i->bvec->bv_len - i->iov_offset); 791 else 792 return min(i->count, i->iov->iov_len - i->iov_offset); 793 } 794 EXPORT_SYMBOL(iov_iter_single_seg_count); 795 796 void iov_iter_kvec(struct iov_iter *i, int direction, 797 const struct kvec *kvec, unsigned long nr_segs, 798 size_t count) 799 { 800 BUG_ON(!(direction & ITER_KVEC)); 801 i->type = direction; 802 i->kvec = kvec; 803 i->nr_segs = nr_segs; 804 i->iov_offset = 0; 805 i->count = count; 806 } 807 EXPORT_SYMBOL(iov_iter_kvec); 808 809 void iov_iter_bvec(struct iov_iter *i, int direction, 810 const struct bio_vec *bvec, unsigned long nr_segs, 811 size_t count) 812 { 813 BUG_ON(!(direction & ITER_BVEC)); 814 i->type = direction; 815 i->bvec = bvec; 816 i->nr_segs = nr_segs; 817 i->iov_offset = 0; 818 i->count = count; 819 } 820 EXPORT_SYMBOL(iov_iter_bvec); 821 822 void iov_iter_pipe(struct iov_iter *i, int direction, 823 struct pipe_inode_info *pipe, 824 size_t count) 825 { 826 BUG_ON(direction != ITER_PIPE); 827 i->type = direction; 828 i->pipe = pipe; 829 i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); 830 i->iov_offset = 0; 831 i->count = count; 832 } 833 EXPORT_SYMBOL(iov_iter_pipe); 834 835 unsigned long iov_iter_alignment(const struct iov_iter *i) 836 { 837 unsigned long res = 0; 838 size_t size = i->count; 839 840 if (!size) 841 return 0; 842 843 if (unlikely(i->type & ITER_PIPE)) { 844 if (i->iov_offset && allocated(&i->pipe->bufs[i->idx])) 845 return size | i->iov_offset; 846 return size; 847 } 848 iterate_all_kinds(i, size, v, 849 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 850 res |= v.bv_offset | v.bv_len, 851 res |= (unsigned long)v.iov_base | v.iov_len 852 ) 853 return res; 854 } 855 EXPORT_SYMBOL(iov_iter_alignment); 856 857 unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 858 { 859 unsigned long res = 0; 860 size_t size = i->count; 861 if (!size) 862 return 0; 863 864 if (unlikely(i->type & ITER_PIPE)) { 865 WARN_ON(1); 866 return ~0U; 867 } 868 869 iterate_all_kinds(i, size, v, 870 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 871 (size != v.iov_len ? size : 0), 0), 872 (res |= (!res ? 0 : (unsigned long)v.bv_offset) | 873 (size != v.bv_len ? size : 0)), 874 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 875 (size != v.iov_len ? size : 0)) 876 ); 877 return res; 878 } 879 EXPORT_SYMBOL(iov_iter_gap_alignment); 880 881 static inline size_t __pipe_get_pages(struct iov_iter *i, 882 size_t maxsize, 883 struct page **pages, 884 int idx, 885 size_t *start) 886 { 887 struct pipe_inode_info *pipe = i->pipe; 888 ssize_t n = push_pipe(i, maxsize, &idx, start); 889 if (!n) 890 return -EFAULT; 891 892 maxsize = n; 893 n += *start; 894 while (n > 0) { 895 get_page(*pages++ = pipe->bufs[idx].page); 896 idx = next_idx(idx, pipe); 897 n -= PAGE_SIZE; 898 } 899 900 return maxsize; 901 } 902 903 static ssize_t pipe_get_pages(struct iov_iter *i, 904 struct page **pages, size_t maxsize, unsigned maxpages, 905 size_t *start) 906 { 907 unsigned npages; 908 size_t capacity; 909 int idx; 910 911 if (!sanity(i)) 912 return -EFAULT; 913 914 data_start(i, &idx, start); 915 /* some of this one + all after this one */ 916 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; 917 capacity = min(npages,maxpages) * PAGE_SIZE - *start; 918 919 return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start); 920 } 921 922 ssize_t iov_iter_get_pages(struct iov_iter *i, 923 struct page **pages, size_t maxsize, unsigned maxpages, 924 size_t *start) 925 { 926 if (maxsize > i->count) 927 maxsize = i->count; 928 929 if (!maxsize) 930 return 0; 931 932 if (unlikely(i->type & ITER_PIPE)) 933 return pipe_get_pages(i, pages, maxsize, maxpages, start); 934 iterate_all_kinds(i, maxsize, v, ({ 935 unsigned long addr = (unsigned long)v.iov_base; 936 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 937 int n; 938 int res; 939 940 if (len > maxpages * PAGE_SIZE) 941 len = maxpages * PAGE_SIZE; 942 addr &= ~(PAGE_SIZE - 1); 943 n = DIV_ROUND_UP(len, PAGE_SIZE); 944 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); 945 if (unlikely(res < 0)) 946 return res; 947 return (res == n ? len : res * PAGE_SIZE) - *start; 948 0;}),({ 949 /* can't be more than PAGE_SIZE */ 950 *start = v.bv_offset; 951 get_page(*pages = v.bv_page); 952 return v.bv_len; 953 }),({ 954 return -EFAULT; 955 }) 956 ) 957 return 0; 958 } 959 EXPORT_SYMBOL(iov_iter_get_pages); 960 961 static struct page **get_pages_array(size_t n) 962 { 963 struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL); 964 if (!p) 965 p = vmalloc(n * sizeof(struct page *)); 966 return p; 967 } 968 969 static ssize_t pipe_get_pages_alloc(struct iov_iter *i, 970 struct page ***pages, size_t maxsize, 971 size_t *start) 972 { 973 struct page **p; 974 size_t n; 975 int idx; 976 int npages; 977 978 if (!sanity(i)) 979 return -EFAULT; 980 981 data_start(i, &idx, start); 982 /* some of this one + all after this one */ 983 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; 984 n = npages * PAGE_SIZE - *start; 985 if (maxsize > n) 986 maxsize = n; 987 else 988 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 989 p = get_pages_array(npages); 990 if (!p) 991 return -ENOMEM; 992 n = __pipe_get_pages(i, maxsize, p, idx, start); 993 if (n > 0) 994 *pages = p; 995 else 996 kvfree(p); 997 return n; 998 } 999 1000 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 1001 struct page ***pages, size_t maxsize, 1002 size_t *start) 1003 { 1004 struct page **p; 1005 1006 if (maxsize > i->count) 1007 maxsize = i->count; 1008 1009 if (!maxsize) 1010 return 0; 1011 1012 if (unlikely(i->type & ITER_PIPE)) 1013 return pipe_get_pages_alloc(i, pages, maxsize, start); 1014 iterate_all_kinds(i, maxsize, v, ({ 1015 unsigned long addr = (unsigned long)v.iov_base; 1016 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1017 int n; 1018 int res; 1019 1020 addr &= ~(PAGE_SIZE - 1); 1021 n = DIV_ROUND_UP(len, PAGE_SIZE); 1022 p = get_pages_array(n); 1023 if (!p) 1024 return -ENOMEM; 1025 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); 1026 if (unlikely(res < 0)) { 1027 kvfree(p); 1028 return res; 1029 } 1030 *pages = p; 1031 return (res == n ? len : res * PAGE_SIZE) - *start; 1032 0;}),({ 1033 /* can't be more than PAGE_SIZE */ 1034 *start = v.bv_offset; 1035 *pages = p = get_pages_array(1); 1036 if (!p) 1037 return -ENOMEM; 1038 get_page(*p = v.bv_page); 1039 return v.bv_len; 1040 }),({ 1041 return -EFAULT; 1042 }) 1043 ) 1044 return 0; 1045 } 1046 EXPORT_SYMBOL(iov_iter_get_pages_alloc); 1047 1048 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, 1049 struct iov_iter *i) 1050 { 1051 char *to = addr; 1052 __wsum sum, next; 1053 size_t off = 0; 1054 sum = *csum; 1055 if (unlikely(i->type & ITER_PIPE)) { 1056 WARN_ON(1); 1057 return 0; 1058 } 1059 iterate_and_advance(i, bytes, v, ({ 1060 int err = 0; 1061 next = csum_and_copy_from_user(v.iov_base, 1062 (to += v.iov_len) - v.iov_len, 1063 v.iov_len, 0, &err); 1064 if (!err) { 1065 sum = csum_block_add(sum, next, off); 1066 off += v.iov_len; 1067 } 1068 err ? v.iov_len : 0; 1069 }), ({ 1070 char *p = kmap_atomic(v.bv_page); 1071 next = csum_partial_copy_nocheck(p + v.bv_offset, 1072 (to += v.bv_len) - v.bv_len, 1073 v.bv_len, 0); 1074 kunmap_atomic(p); 1075 sum = csum_block_add(sum, next, off); 1076 off += v.bv_len; 1077 }),({ 1078 next = csum_partial_copy_nocheck(v.iov_base, 1079 (to += v.iov_len) - v.iov_len, 1080 v.iov_len, 0); 1081 sum = csum_block_add(sum, next, off); 1082 off += v.iov_len; 1083 }) 1084 ) 1085 *csum = sum; 1086 return bytes; 1087 } 1088 EXPORT_SYMBOL(csum_and_copy_from_iter); 1089 1090 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, 1091 struct iov_iter *i) 1092 { 1093 char *to = addr; 1094 __wsum sum, next; 1095 size_t off = 0; 1096 sum = *csum; 1097 if (unlikely(i->type & ITER_PIPE)) { 1098 WARN_ON(1); 1099 return false; 1100 } 1101 if (unlikely(i->count < bytes)) 1102 return false; 1103 iterate_all_kinds(i, bytes, v, ({ 1104 int err = 0; 1105 next = csum_and_copy_from_user(v.iov_base, 1106 (to += v.iov_len) - v.iov_len, 1107 v.iov_len, 0, &err); 1108 if (err) 1109 return false; 1110 sum = csum_block_add(sum, next, off); 1111 off += v.iov_len; 1112 0; 1113 }), ({ 1114 char *p = kmap_atomic(v.bv_page); 1115 next = csum_partial_copy_nocheck(p + v.bv_offset, 1116 (to += v.bv_len) - v.bv_len, 1117 v.bv_len, 0); 1118 kunmap_atomic(p); 1119 sum = csum_block_add(sum, next, off); 1120 off += v.bv_len; 1121 }),({ 1122 next = csum_partial_copy_nocheck(v.iov_base, 1123 (to += v.iov_len) - v.iov_len, 1124 v.iov_len, 0); 1125 sum = csum_block_add(sum, next, off); 1126 off += v.iov_len; 1127 }) 1128 ) 1129 *csum = sum; 1130 iov_iter_advance(i, bytes); 1131 return true; 1132 } 1133 EXPORT_SYMBOL(csum_and_copy_from_iter_full); 1134 1135 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, 1136 struct iov_iter *i) 1137 { 1138 const char *from = addr; 1139 __wsum sum, next; 1140 size_t off = 0; 1141 sum = *csum; 1142 if (unlikely(i->type & ITER_PIPE)) { 1143 WARN_ON(1); /* for now */ 1144 return 0; 1145 } 1146 iterate_and_advance(i, bytes, v, ({ 1147 int err = 0; 1148 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1149 v.iov_base, 1150 v.iov_len, 0, &err); 1151 if (!err) { 1152 sum = csum_block_add(sum, next, off); 1153 off += v.iov_len; 1154 } 1155 err ? v.iov_len : 0; 1156 }), ({ 1157 char *p = kmap_atomic(v.bv_page); 1158 next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len, 1159 p + v.bv_offset, 1160 v.bv_len, 0); 1161 kunmap_atomic(p); 1162 sum = csum_block_add(sum, next, off); 1163 off += v.bv_len; 1164 }),({ 1165 next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len, 1166 v.iov_base, 1167 v.iov_len, 0); 1168 sum = csum_block_add(sum, next, off); 1169 off += v.iov_len; 1170 }) 1171 ) 1172 *csum = sum; 1173 return bytes; 1174 } 1175 EXPORT_SYMBOL(csum_and_copy_to_iter); 1176 1177 int iov_iter_npages(const struct iov_iter *i, int maxpages) 1178 { 1179 size_t size = i->count; 1180 int npages = 0; 1181 1182 if (!size) 1183 return 0; 1184 1185 if (unlikely(i->type & ITER_PIPE)) { 1186 struct pipe_inode_info *pipe = i->pipe; 1187 size_t off; 1188 int idx; 1189 1190 if (!sanity(i)) 1191 return 0; 1192 1193 data_start(i, &idx, &off); 1194 /* some of this one + all after this one */ 1195 npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1; 1196 if (npages >= maxpages) 1197 return maxpages; 1198 } else iterate_all_kinds(i, size, v, ({ 1199 unsigned long p = (unsigned long)v.iov_base; 1200 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1201 - p / PAGE_SIZE; 1202 if (npages >= maxpages) 1203 return maxpages; 1204 0;}),({ 1205 npages++; 1206 if (npages >= maxpages) 1207 return maxpages; 1208 }),({ 1209 unsigned long p = (unsigned long)v.iov_base; 1210 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1211 - p / PAGE_SIZE; 1212 if (npages >= maxpages) 1213 return maxpages; 1214 }) 1215 ) 1216 return npages; 1217 } 1218 EXPORT_SYMBOL(iov_iter_npages); 1219 1220 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1221 { 1222 *new = *old; 1223 if (unlikely(new->type & ITER_PIPE)) { 1224 WARN_ON(1); 1225 return NULL; 1226 } 1227 if (new->type & ITER_BVEC) 1228 return new->bvec = kmemdup(new->bvec, 1229 new->nr_segs * sizeof(struct bio_vec), 1230 flags); 1231 else 1232 /* iovec and kvec have identical layout */ 1233 return new->iov = kmemdup(new->iov, 1234 new->nr_segs * sizeof(struct iovec), 1235 flags); 1236 } 1237 EXPORT_SYMBOL(dup_iter); 1238 1239 /** 1240 * import_iovec() - Copy an array of &struct iovec from userspace 1241 * into the kernel, check that it is valid, and initialize a new 1242 * &struct iov_iter iterator to access it. 1243 * 1244 * @type: One of %READ or %WRITE. 1245 * @uvector: Pointer to the userspace array. 1246 * @nr_segs: Number of elements in userspace array. 1247 * @fast_segs: Number of elements in @iov. 1248 * @iov: (input and output parameter) Pointer to pointer to (usually small 1249 * on-stack) kernel array. 1250 * @i: Pointer to iterator that will be initialized on success. 1251 * 1252 * If the array pointed to by *@iov is large enough to hold all @nr_segs, 1253 * then this function places %NULL in *@iov on return. Otherwise, a new 1254 * array will be allocated and the result placed in *@iov. This means that 1255 * the caller may call kfree() on *@iov regardless of whether the small 1256 * on-stack array was used or not (and regardless of whether this function 1257 * returns an error or not). 1258 * 1259 * Return: 0 on success or negative error code on error. 1260 */ 1261 int import_iovec(int type, const struct iovec __user * uvector, 1262 unsigned nr_segs, unsigned fast_segs, 1263 struct iovec **iov, struct iov_iter *i) 1264 { 1265 ssize_t n; 1266 struct iovec *p; 1267 n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, 1268 *iov, &p); 1269 if (n < 0) { 1270 if (p != *iov) 1271 kfree(p); 1272 *iov = NULL; 1273 return n; 1274 } 1275 iov_iter_init(i, type, p, nr_segs, n); 1276 *iov = p == *iov ? NULL : p; 1277 return 0; 1278 } 1279 EXPORT_SYMBOL(import_iovec); 1280 1281 #ifdef CONFIG_COMPAT 1282 #include <linux/compat.h> 1283 1284 int compat_import_iovec(int type, const struct compat_iovec __user * uvector, 1285 unsigned nr_segs, unsigned fast_segs, 1286 struct iovec **iov, struct iov_iter *i) 1287 { 1288 ssize_t n; 1289 struct iovec *p; 1290 n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, 1291 *iov, &p); 1292 if (n < 0) { 1293 if (p != *iov) 1294 kfree(p); 1295 *iov = NULL; 1296 return n; 1297 } 1298 iov_iter_init(i, type, p, nr_segs, n); 1299 *iov = p == *iov ? NULL : p; 1300 return 0; 1301 } 1302 #endif 1303 1304 int import_single_range(int rw, void __user *buf, size_t len, 1305 struct iovec *iov, struct iov_iter *i) 1306 { 1307 if (len > MAX_RW_COUNT) 1308 len = MAX_RW_COUNT; 1309 if (unlikely(!access_ok(!rw, buf, len))) 1310 return -EFAULT; 1311 1312 iov->iov_base = buf; 1313 iov->iov_len = len; 1314 iov_iter_init(i, rw, iov, 1, len); 1315 return 0; 1316 } 1317 EXPORT_SYMBOL(import_single_range); 1318