1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <crypto/hash.h> 3 #include <linux/export.h> 4 #include <linux/bvec.h> 5 #include <linux/fault-inject-usercopy.h> 6 #include <linux/uio.h> 7 #include <linux/pagemap.h> 8 #include <linux/slab.h> 9 #include <linux/vmalloc.h> 10 #include <linux/splice.h> 11 #include <linux/compat.h> 12 #include <net/checksum.h> 13 #include <linux/scatterlist.h> 14 #include <linux/instrumented.h> 15 16 #define PIPE_PARANOIA /* for now */ 17 18 #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 19 size_t left; \ 20 size_t wanted = n; \ 21 __p = i->iov; \ 22 __v.iov_len = min(n, __p->iov_len - skip); \ 23 if (likely(__v.iov_len)) { \ 24 __v.iov_base = __p->iov_base + skip; \ 25 left = (STEP); \ 26 __v.iov_len -= left; \ 27 skip += __v.iov_len; \ 28 n -= __v.iov_len; \ 29 } else { \ 30 left = 0; \ 31 } \ 32 while (unlikely(!left && n)) { \ 33 __p++; \ 34 __v.iov_len = min(n, __p->iov_len); \ 35 if (unlikely(!__v.iov_len)) \ 36 continue; \ 37 __v.iov_base = __p->iov_base; \ 38 left = (STEP); \ 39 __v.iov_len -= left; \ 40 skip = __v.iov_len; \ 41 n -= __v.iov_len; \ 42 } \ 43 n = wanted - n; \ 44 } 45 46 #define iterate_kvec(i, n, __v, __p, skip, STEP) { \ 47 size_t wanted = n; \ 48 __p = i->kvec; \ 49 __v.iov_len = min(n, __p->iov_len - skip); \ 50 if (likely(__v.iov_len)) { \ 51 __v.iov_base = __p->iov_base + skip; \ 52 (void)(STEP); \ 53 skip += __v.iov_len; \ 54 n -= __v.iov_len; \ 55 } \ 56 while (unlikely(n)) { \ 57 __p++; \ 58 __v.iov_len = min(n, __p->iov_len); \ 59 if (unlikely(!__v.iov_len)) \ 60 continue; \ 61 __v.iov_base = __p->iov_base; \ 62 (void)(STEP); \ 63 skip = __v.iov_len; \ 64 n -= __v.iov_len; \ 65 } \ 66 n = wanted; \ 67 } 68 69 #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ 70 struct bvec_iter __start; \ 71 __start.bi_size = n; \ 72 __start.bi_bvec_done = skip; \ 73 __start.bi_idx = 0; \ 74 for_each_bvec(__v, i->bvec, __bi, __start) { \ 75 (void)(STEP); \ 76 } \ 77 } 78 79 #define iterate_xarray(i, n, __v, skip, STEP) { \ 80 struct page *head = NULL; \ 81 size_t wanted = n, seg, offset; \ 82 loff_t start = i->xarray_start + skip; \ 83 pgoff_t index = start >> PAGE_SHIFT; \ 84 int j; \ 85 \ 86 XA_STATE(xas, i->xarray, index); \ 87 \ 88 rcu_read_lock(); \ 89 xas_for_each(&xas, head, ULONG_MAX) { \ 90 if (xas_retry(&xas, head)) \ 91 continue; \ 92 if (WARN_ON(xa_is_value(head))) \ 93 break; \ 94 if (WARN_ON(PageHuge(head))) \ 95 break; \ 96 for (j = (head->index < index) ? index - head->index : 0; \ 97 j < thp_nr_pages(head); j++) { \ 98 __v.bv_page = head + j; \ 99 offset = (i->xarray_start + skip) & ~PAGE_MASK; \ 100 seg = PAGE_SIZE - offset; \ 101 __v.bv_offset = offset; \ 102 __v.bv_len = min(n, seg); \ 103 (void)(STEP); \ 104 n -= __v.bv_len; \ 105 skip += __v.bv_len; \ 106 if (n == 0) \ 107 break; \ 108 } \ 109 if (n == 0) \ 110 break; \ 111 } \ 112 rcu_read_unlock(); \ 113 n = wanted - n; \ 114 } 115 116 #define iterate_all_kinds(i, n, v, I, B, K, X) { \ 117 if (likely(n)) { \ 118 size_t skip = i->iov_offset; \ 119 if (unlikely(i->type & ITER_BVEC)) { \ 120 struct bio_vec v; \ 121 struct bvec_iter __bi; \ 122 iterate_bvec(i, n, v, __bi, skip, (B)) \ 123 } else if (unlikely(i->type & ITER_KVEC)) { \ 124 const struct kvec *kvec; \ 125 struct kvec v; \ 126 iterate_kvec(i, n, v, kvec, skip, (K)) \ 127 } else if (unlikely(i->type & ITER_DISCARD)) { \ 128 } else if (unlikely(i->type & ITER_XARRAY)) { \ 129 struct bio_vec v; \ 130 iterate_xarray(i, n, v, skip, (X)); \ 131 } else { \ 132 const struct iovec *iov; \ 133 struct iovec v; \ 134 iterate_iovec(i, n, v, iov, skip, (I)) \ 135 } \ 136 } \ 137 } 138 139 #define iterate_and_advance(i, n, v, I, B, K, X) { \ 140 if (unlikely(i->count < n)) \ 141 n = i->count; \ 142 if (i->count) { \ 143 size_t skip = i->iov_offset; \ 144 if (unlikely(i->type & ITER_BVEC)) { \ 145 const struct bio_vec *bvec = i->bvec; \ 146 struct bio_vec v; \ 147 struct bvec_iter __bi; \ 148 iterate_bvec(i, n, v, __bi, skip, (B)) \ 149 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \ 150 i->nr_segs -= i->bvec - bvec; \ 151 skip = __bi.bi_bvec_done; \ 152 } else if (unlikely(i->type & ITER_KVEC)) { \ 153 const struct kvec *kvec; \ 154 struct kvec v; \ 155 iterate_kvec(i, n, v, kvec, skip, (K)) \ 156 if (skip == kvec->iov_len) { \ 157 kvec++; \ 158 skip = 0; \ 159 } \ 160 i->nr_segs -= kvec - i->kvec; \ 161 i->kvec = kvec; \ 162 } else if (unlikely(i->type & ITER_DISCARD)) { \ 163 skip += n; \ 164 } else if (unlikely(i->type & ITER_XARRAY)) { \ 165 struct bio_vec v; \ 166 iterate_xarray(i, n, v, skip, (X)) \ 167 } else { \ 168 const struct iovec *iov; \ 169 struct iovec v; \ 170 iterate_iovec(i, n, v, iov, skip, (I)) \ 171 if (skip == iov->iov_len) { \ 172 iov++; \ 173 skip = 0; \ 174 } \ 175 i->nr_segs -= iov - i->iov; \ 176 i->iov = iov; \ 177 } \ 178 i->count -= n; \ 179 i->iov_offset = skip; \ 180 } \ 181 } 182 183 static int copyout(void __user *to, const void *from, size_t n) 184 { 185 if (should_fail_usercopy()) 186 return n; 187 if (access_ok(to, n)) { 188 instrument_copy_to_user(to, from, n); 189 n = raw_copy_to_user(to, from, n); 190 } 191 return n; 192 } 193 194 static int copyin(void *to, const void __user *from, size_t n) 195 { 196 if (should_fail_usercopy()) 197 return n; 198 if (access_ok(from, n)) { 199 instrument_copy_from_user(to, from, n); 200 n = raw_copy_from_user(to, from, n); 201 } 202 return n; 203 } 204 205 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, 206 struct iov_iter *i) 207 { 208 size_t skip, copy, left, wanted; 209 const struct iovec *iov; 210 char __user *buf; 211 void *kaddr, *from; 212 213 if (unlikely(bytes > i->count)) 214 bytes = i->count; 215 216 if (unlikely(!bytes)) 217 return 0; 218 219 might_fault(); 220 wanted = bytes; 221 iov = i->iov; 222 skip = i->iov_offset; 223 buf = iov->iov_base + skip; 224 copy = min(bytes, iov->iov_len - skip); 225 226 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) { 227 kaddr = kmap_atomic(page); 228 from = kaddr + offset; 229 230 /* first chunk, usually the only one */ 231 left = copyout(buf, from, copy); 232 copy -= left; 233 skip += copy; 234 from += copy; 235 bytes -= copy; 236 237 while (unlikely(!left && bytes)) { 238 iov++; 239 buf = iov->iov_base; 240 copy = min(bytes, iov->iov_len); 241 left = copyout(buf, from, copy); 242 copy -= left; 243 skip = copy; 244 from += copy; 245 bytes -= copy; 246 } 247 if (likely(!bytes)) { 248 kunmap_atomic(kaddr); 249 goto done; 250 } 251 offset = from - kaddr; 252 buf += copy; 253 kunmap_atomic(kaddr); 254 copy = min(bytes, iov->iov_len - skip); 255 } 256 /* Too bad - revert to non-atomic kmap */ 257 258 kaddr = kmap(page); 259 from = kaddr + offset; 260 left = copyout(buf, from, copy); 261 copy -= left; 262 skip += copy; 263 from += copy; 264 bytes -= copy; 265 while (unlikely(!left && bytes)) { 266 iov++; 267 buf = iov->iov_base; 268 copy = min(bytes, iov->iov_len); 269 left = copyout(buf, from, copy); 270 copy -= left; 271 skip = copy; 272 from += copy; 273 bytes -= copy; 274 } 275 kunmap(page); 276 277 done: 278 if (skip == iov->iov_len) { 279 iov++; 280 skip = 0; 281 } 282 i->count -= wanted - bytes; 283 i->nr_segs -= iov - i->iov; 284 i->iov = iov; 285 i->iov_offset = skip; 286 return wanted - bytes; 287 } 288 289 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, 290 struct iov_iter *i) 291 { 292 size_t skip, copy, left, wanted; 293 const struct iovec *iov; 294 char __user *buf; 295 void *kaddr, *to; 296 297 if (unlikely(bytes > i->count)) 298 bytes = i->count; 299 300 if (unlikely(!bytes)) 301 return 0; 302 303 might_fault(); 304 wanted = bytes; 305 iov = i->iov; 306 skip = i->iov_offset; 307 buf = iov->iov_base + skip; 308 copy = min(bytes, iov->iov_len - skip); 309 310 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) { 311 kaddr = kmap_atomic(page); 312 to = kaddr + offset; 313 314 /* first chunk, usually the only one */ 315 left = copyin(to, buf, copy); 316 copy -= left; 317 skip += copy; 318 to += copy; 319 bytes -= copy; 320 321 while (unlikely(!left && bytes)) { 322 iov++; 323 buf = iov->iov_base; 324 copy = min(bytes, iov->iov_len); 325 left = copyin(to, buf, copy); 326 copy -= left; 327 skip = copy; 328 to += copy; 329 bytes -= copy; 330 } 331 if (likely(!bytes)) { 332 kunmap_atomic(kaddr); 333 goto done; 334 } 335 offset = to - kaddr; 336 buf += copy; 337 kunmap_atomic(kaddr); 338 copy = min(bytes, iov->iov_len - skip); 339 } 340 /* Too bad - revert to non-atomic kmap */ 341 342 kaddr = kmap(page); 343 to = kaddr + offset; 344 left = copyin(to, buf, copy); 345 copy -= left; 346 skip += copy; 347 to += copy; 348 bytes -= copy; 349 while (unlikely(!left && bytes)) { 350 iov++; 351 buf = iov->iov_base; 352 copy = min(bytes, iov->iov_len); 353 left = copyin(to, buf, copy); 354 copy -= left; 355 skip = copy; 356 to += copy; 357 bytes -= copy; 358 } 359 kunmap(page); 360 361 done: 362 if (skip == iov->iov_len) { 363 iov++; 364 skip = 0; 365 } 366 i->count -= wanted - bytes; 367 i->nr_segs -= iov - i->iov; 368 i->iov = iov; 369 i->iov_offset = skip; 370 return wanted - bytes; 371 } 372 373 #ifdef PIPE_PARANOIA 374 static bool sanity(const struct iov_iter *i) 375 { 376 struct pipe_inode_info *pipe = i->pipe; 377 unsigned int p_head = pipe->head; 378 unsigned int p_tail = pipe->tail; 379 unsigned int p_mask = pipe->ring_size - 1; 380 unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); 381 unsigned int i_head = i->head; 382 unsigned int idx; 383 384 if (i->iov_offset) { 385 struct pipe_buffer *p; 386 if (unlikely(p_occupancy == 0)) 387 goto Bad; // pipe must be non-empty 388 if (unlikely(i_head != p_head - 1)) 389 goto Bad; // must be at the last buffer... 390 391 p = &pipe->bufs[i_head & p_mask]; 392 if (unlikely(p->offset + p->len != i->iov_offset)) 393 goto Bad; // ... at the end of segment 394 } else { 395 if (i_head != p_head) 396 goto Bad; // must be right after the last buffer 397 } 398 return true; 399 Bad: 400 printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); 401 printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", 402 p_head, p_tail, pipe->ring_size); 403 for (idx = 0; idx < pipe->ring_size; idx++) 404 printk(KERN_ERR "[%p %p %d %d]\n", 405 pipe->bufs[idx].ops, 406 pipe->bufs[idx].page, 407 pipe->bufs[idx].offset, 408 pipe->bufs[idx].len); 409 WARN_ON(1); 410 return false; 411 } 412 #else 413 #define sanity(i) true 414 #endif 415 416 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, 417 struct iov_iter *i) 418 { 419 struct pipe_inode_info *pipe = i->pipe; 420 struct pipe_buffer *buf; 421 unsigned int p_tail = pipe->tail; 422 unsigned int p_mask = pipe->ring_size - 1; 423 unsigned int i_head = i->head; 424 size_t off; 425 426 if (unlikely(bytes > i->count)) 427 bytes = i->count; 428 429 if (unlikely(!bytes)) 430 return 0; 431 432 if (!sanity(i)) 433 return 0; 434 435 off = i->iov_offset; 436 buf = &pipe->bufs[i_head & p_mask]; 437 if (off) { 438 if (offset == off && buf->page == page) { 439 /* merge with the last one */ 440 buf->len += bytes; 441 i->iov_offset += bytes; 442 goto out; 443 } 444 i_head++; 445 buf = &pipe->bufs[i_head & p_mask]; 446 } 447 if (pipe_full(i_head, p_tail, pipe->max_usage)) 448 return 0; 449 450 buf->ops = &page_cache_pipe_buf_ops; 451 get_page(page); 452 buf->page = page; 453 buf->offset = offset; 454 buf->len = bytes; 455 456 pipe->head = i_head + 1; 457 i->iov_offset = offset + bytes; 458 i->head = i_head; 459 out: 460 i->count -= bytes; 461 return bytes; 462 } 463 464 /* 465 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 466 * bytes. For each iovec, fault in each page that constitutes the iovec. 467 * 468 * Return 0 on success, or non-zero if the memory could not be accessed (i.e. 469 * because it is an invalid address). 470 */ 471 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 472 { 473 size_t skip = i->iov_offset; 474 const struct iovec *iov; 475 int err; 476 struct iovec v; 477 478 if (!(i->type & (ITER_BVEC|ITER_KVEC))) { 479 iterate_iovec(i, bytes, v, iov, skip, ({ 480 err = fault_in_pages_readable(v.iov_base, v.iov_len); 481 if (unlikely(err)) 482 return err; 483 0;})) 484 } 485 return 0; 486 } 487 EXPORT_SYMBOL(iov_iter_fault_in_readable); 488 489 void iov_iter_init(struct iov_iter *i, unsigned int direction, 490 const struct iovec *iov, unsigned long nr_segs, 491 size_t count) 492 { 493 WARN_ON(direction & ~(READ | WRITE)); 494 direction &= READ | WRITE; 495 496 /* It will get better. Eventually... */ 497 if (uaccess_kernel()) { 498 i->type = ITER_KVEC | direction; 499 i->kvec = (struct kvec *)iov; 500 } else { 501 i->type = ITER_IOVEC | direction; 502 i->iov = iov; 503 } 504 i->nr_segs = nr_segs; 505 i->iov_offset = 0; 506 i->count = count; 507 } 508 EXPORT_SYMBOL(iov_iter_init); 509 510 static void memzero_page(struct page *page, size_t offset, size_t len) 511 { 512 char *addr = kmap_atomic(page); 513 memset(addr + offset, 0, len); 514 kunmap_atomic(addr); 515 } 516 517 static inline bool allocated(struct pipe_buffer *buf) 518 { 519 return buf->ops == &default_pipe_buf_ops; 520 } 521 522 static inline void data_start(const struct iov_iter *i, 523 unsigned int *iter_headp, size_t *offp) 524 { 525 unsigned int p_mask = i->pipe->ring_size - 1; 526 unsigned int iter_head = i->head; 527 size_t off = i->iov_offset; 528 529 if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || 530 off == PAGE_SIZE)) { 531 iter_head++; 532 off = 0; 533 } 534 *iter_headp = iter_head; 535 *offp = off; 536 } 537 538 static size_t push_pipe(struct iov_iter *i, size_t size, 539 int *iter_headp, size_t *offp) 540 { 541 struct pipe_inode_info *pipe = i->pipe; 542 unsigned int p_tail = pipe->tail; 543 unsigned int p_mask = pipe->ring_size - 1; 544 unsigned int iter_head; 545 size_t off; 546 ssize_t left; 547 548 if (unlikely(size > i->count)) 549 size = i->count; 550 if (unlikely(!size)) 551 return 0; 552 553 left = size; 554 data_start(i, &iter_head, &off); 555 *iter_headp = iter_head; 556 *offp = off; 557 if (off) { 558 left -= PAGE_SIZE - off; 559 if (left <= 0) { 560 pipe->bufs[iter_head & p_mask].len += size; 561 return size; 562 } 563 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; 564 iter_head++; 565 } 566 while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { 567 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; 568 struct page *page = alloc_page(GFP_USER); 569 if (!page) 570 break; 571 572 buf->ops = &default_pipe_buf_ops; 573 buf->page = page; 574 buf->offset = 0; 575 buf->len = min_t(ssize_t, left, PAGE_SIZE); 576 left -= buf->len; 577 iter_head++; 578 pipe->head = iter_head; 579 580 if (left == 0) 581 return size; 582 } 583 return size - left; 584 } 585 586 static size_t copy_pipe_to_iter(const void *addr, size_t bytes, 587 struct iov_iter *i) 588 { 589 struct pipe_inode_info *pipe = i->pipe; 590 unsigned int p_mask = pipe->ring_size - 1; 591 unsigned int i_head; 592 size_t n, off; 593 594 if (!sanity(i)) 595 return 0; 596 597 bytes = n = push_pipe(i, bytes, &i_head, &off); 598 if (unlikely(!n)) 599 return 0; 600 do { 601 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 602 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); 603 i->head = i_head; 604 i->iov_offset = off + chunk; 605 n -= chunk; 606 addr += chunk; 607 off = 0; 608 i_head++; 609 } while (n); 610 i->count -= bytes; 611 return bytes; 612 } 613 614 static __wsum csum_and_memcpy(void *to, const void *from, size_t len, 615 __wsum sum, size_t off) 616 { 617 __wsum next = csum_partial_copy_nocheck(from, to, len); 618 return csum_block_add(sum, next, off); 619 } 620 621 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, 622 struct csum_state *csstate, 623 struct iov_iter *i) 624 { 625 struct pipe_inode_info *pipe = i->pipe; 626 unsigned int p_mask = pipe->ring_size - 1; 627 __wsum sum = csstate->csum; 628 size_t off = csstate->off; 629 unsigned int i_head; 630 size_t n, r; 631 632 if (!sanity(i)) 633 return 0; 634 635 bytes = n = push_pipe(i, bytes, &i_head, &r); 636 if (unlikely(!n)) 637 return 0; 638 do { 639 size_t chunk = min_t(size_t, n, PAGE_SIZE - r); 640 char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page); 641 sum = csum_and_memcpy(p + r, addr, chunk, sum, off); 642 kunmap_atomic(p); 643 i->head = i_head; 644 i->iov_offset = r + chunk; 645 n -= chunk; 646 off += chunk; 647 addr += chunk; 648 r = 0; 649 i_head++; 650 } while (n); 651 i->count -= bytes; 652 csstate->csum = sum; 653 csstate->off = off; 654 return bytes; 655 } 656 657 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 658 { 659 const char *from = addr; 660 if (unlikely(iov_iter_is_pipe(i))) 661 return copy_pipe_to_iter(addr, bytes, i); 662 if (iter_is_iovec(i)) 663 might_fault(); 664 iterate_and_advance(i, bytes, v, 665 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), 666 memcpy_to_page(v.bv_page, v.bv_offset, 667 (from += v.bv_len) - v.bv_len, v.bv_len), 668 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), 669 memcpy_to_page(v.bv_page, v.bv_offset, 670 (from += v.bv_len) - v.bv_len, v.bv_len) 671 ) 672 673 return bytes; 674 } 675 EXPORT_SYMBOL(_copy_to_iter); 676 677 #ifdef CONFIG_ARCH_HAS_COPY_MC 678 static int copyout_mc(void __user *to, const void *from, size_t n) 679 { 680 if (access_ok(to, n)) { 681 instrument_copy_to_user(to, from, n); 682 n = copy_mc_to_user((__force void *) to, from, n); 683 } 684 return n; 685 } 686 687 static unsigned long copy_mc_to_page(struct page *page, size_t offset, 688 const char *from, size_t len) 689 { 690 unsigned long ret; 691 char *to; 692 693 to = kmap_atomic(page); 694 ret = copy_mc_to_kernel(to + offset, from, len); 695 kunmap_atomic(to); 696 697 return ret; 698 } 699 700 static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, 701 struct iov_iter *i) 702 { 703 struct pipe_inode_info *pipe = i->pipe; 704 unsigned int p_mask = pipe->ring_size - 1; 705 unsigned int i_head; 706 size_t n, off, xfer = 0; 707 708 if (!sanity(i)) 709 return 0; 710 711 bytes = n = push_pipe(i, bytes, &i_head, &off); 712 if (unlikely(!n)) 713 return 0; 714 do { 715 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 716 unsigned long rem; 717 718 rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page, 719 off, addr, chunk); 720 i->head = i_head; 721 i->iov_offset = off + chunk - rem; 722 xfer += chunk - rem; 723 if (rem) 724 break; 725 n -= chunk; 726 addr += chunk; 727 off = 0; 728 i_head++; 729 } while (n); 730 i->count -= xfer; 731 return xfer; 732 } 733 734 /** 735 * _copy_mc_to_iter - copy to iter with source memory error exception handling 736 * @addr: source kernel address 737 * @bytes: total transfer length 738 * @iter: destination iterator 739 * 740 * The pmem driver deploys this for the dax operation 741 * (dax_copy_to_iter()) for dax reads (bypass page-cache and the 742 * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes 743 * successfully copied. 744 * 745 * The main differences between this and typical _copy_to_iter(). 746 * 747 * * Typical tail/residue handling after a fault retries the copy 748 * byte-by-byte until the fault happens again. Re-triggering machine 749 * checks is potentially fatal so the implementation uses source 750 * alignment and poison alignment assumptions to avoid re-triggering 751 * hardware exceptions. 752 * 753 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. 754 * Compare to copy_to_iter() where only ITER_IOVEC attempts might return 755 * a short copy. 756 */ 757 size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 758 { 759 const char *from = addr; 760 unsigned long rem, curr_addr, s_addr = (unsigned long) addr; 761 762 if (unlikely(iov_iter_is_pipe(i))) 763 return copy_mc_pipe_to_iter(addr, bytes, i); 764 if (iter_is_iovec(i)) 765 might_fault(); 766 iterate_and_advance(i, bytes, v, 767 copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len, 768 v.iov_len), 769 ({ 770 rem = copy_mc_to_page(v.bv_page, v.bv_offset, 771 (from += v.bv_len) - v.bv_len, v.bv_len); 772 if (rem) { 773 curr_addr = (unsigned long) from; 774 bytes = curr_addr - s_addr - rem; 775 return bytes; 776 } 777 }), 778 ({ 779 rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len) 780 - v.iov_len, v.iov_len); 781 if (rem) { 782 curr_addr = (unsigned long) from; 783 bytes = curr_addr - s_addr - rem; 784 return bytes; 785 } 786 }), 787 ({ 788 rem = copy_mc_to_page(v.bv_page, v.bv_offset, 789 (from += v.bv_len) - v.bv_len, v.bv_len); 790 if (rem) { 791 curr_addr = (unsigned long) from; 792 bytes = curr_addr - s_addr - rem; 793 rcu_read_unlock(); 794 i->iov_offset += bytes; 795 i->count -= bytes; 796 return bytes; 797 } 798 }) 799 ) 800 801 return bytes; 802 } 803 EXPORT_SYMBOL_GPL(_copy_mc_to_iter); 804 #endif /* CONFIG_ARCH_HAS_COPY_MC */ 805 806 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 807 { 808 char *to = addr; 809 if (unlikely(iov_iter_is_pipe(i))) { 810 WARN_ON(1); 811 return 0; 812 } 813 if (iter_is_iovec(i)) 814 might_fault(); 815 iterate_and_advance(i, bytes, v, 816 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 817 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 818 v.bv_offset, v.bv_len), 819 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 820 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 821 v.bv_offset, v.bv_len) 822 ) 823 824 return bytes; 825 } 826 EXPORT_SYMBOL(_copy_from_iter); 827 828 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) 829 { 830 char *to = addr; 831 if (unlikely(iov_iter_is_pipe(i))) { 832 WARN_ON(1); 833 return false; 834 } 835 if (unlikely(i->count < bytes)) 836 return false; 837 838 if (iter_is_iovec(i)) 839 might_fault(); 840 iterate_all_kinds(i, bytes, v, ({ 841 if (copyin((to += v.iov_len) - v.iov_len, 842 v.iov_base, v.iov_len)) 843 return false; 844 0;}), 845 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 846 v.bv_offset, v.bv_len), 847 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 848 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 849 v.bv_offset, v.bv_len) 850 ) 851 852 iov_iter_advance(i, bytes); 853 return true; 854 } 855 EXPORT_SYMBOL(_copy_from_iter_full); 856 857 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 858 { 859 char *to = addr; 860 if (unlikely(iov_iter_is_pipe(i))) { 861 WARN_ON(1); 862 return 0; 863 } 864 iterate_and_advance(i, bytes, v, 865 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 866 v.iov_base, v.iov_len), 867 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 868 v.bv_offset, v.bv_len), 869 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 870 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 871 v.bv_offset, v.bv_len) 872 ) 873 874 return bytes; 875 } 876 EXPORT_SYMBOL(_copy_from_iter_nocache); 877 878 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 879 /** 880 * _copy_from_iter_flushcache - write destination through cpu cache 881 * @addr: destination kernel address 882 * @bytes: total transfer length 883 * @iter: source iterator 884 * 885 * The pmem driver arranges for filesystem-dax to use this facility via 886 * dax_copy_from_iter() for ensuring that writes to persistent memory 887 * are flushed through the CPU cache. It is differentiated from 888 * _copy_from_iter_nocache() in that guarantees all data is flushed for 889 * all iterator types. The _copy_from_iter_nocache() only attempts to 890 * bypass the cache for the ITER_IOVEC case, and on some archs may use 891 * instructions that strand dirty-data in the cache. 892 */ 893 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) 894 { 895 char *to = addr; 896 if (unlikely(iov_iter_is_pipe(i))) { 897 WARN_ON(1); 898 return 0; 899 } 900 iterate_and_advance(i, bytes, v, 901 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len, 902 v.iov_base, v.iov_len), 903 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, 904 v.bv_offset, v.bv_len), 905 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, 906 v.iov_len), 907 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, 908 v.bv_offset, v.bv_len) 909 ) 910 911 return bytes; 912 } 913 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache); 914 #endif 915 916 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) 917 { 918 char *to = addr; 919 if (unlikely(iov_iter_is_pipe(i))) { 920 WARN_ON(1); 921 return false; 922 } 923 if (unlikely(i->count < bytes)) 924 return false; 925 iterate_all_kinds(i, bytes, v, ({ 926 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 927 v.iov_base, v.iov_len)) 928 return false; 929 0;}), 930 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 931 v.bv_offset, v.bv_len), 932 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 933 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 934 v.bv_offset, v.bv_len) 935 ) 936 937 iov_iter_advance(i, bytes); 938 return true; 939 } 940 EXPORT_SYMBOL(_copy_from_iter_full_nocache); 941 942 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) 943 { 944 struct page *head; 945 size_t v = n + offset; 946 947 /* 948 * The general case needs to access the page order in order 949 * to compute the page size. 950 * However, we mostly deal with order-0 pages and thus can 951 * avoid a possible cache line miss for requests that fit all 952 * page orders. 953 */ 954 if (n <= v && v <= PAGE_SIZE) 955 return true; 956 957 head = compound_head(page); 958 v += (page - head) << PAGE_SHIFT; 959 960 if (likely(n <= v && v <= (page_size(head)))) 961 return true; 962 WARN_ON(1); 963 return false; 964 } 965 966 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 967 struct iov_iter *i) 968 { 969 if (unlikely(!page_copy_sane(page, offset, bytes))) 970 return 0; 971 if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) { 972 void *kaddr = kmap_atomic(page); 973 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 974 kunmap_atomic(kaddr); 975 return wanted; 976 } else if (unlikely(iov_iter_is_discard(i))) 977 return bytes; 978 else if (likely(!iov_iter_is_pipe(i))) 979 return copy_page_to_iter_iovec(page, offset, bytes, i); 980 else 981 return copy_page_to_iter_pipe(page, offset, bytes, i); 982 } 983 EXPORT_SYMBOL(copy_page_to_iter); 984 985 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 986 struct iov_iter *i) 987 { 988 if (unlikely(!page_copy_sane(page, offset, bytes))) 989 return 0; 990 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 991 WARN_ON(1); 992 return 0; 993 } 994 if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) { 995 void *kaddr = kmap_atomic(page); 996 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); 997 kunmap_atomic(kaddr); 998 return wanted; 999 } else 1000 return copy_page_from_iter_iovec(page, offset, bytes, i); 1001 } 1002 EXPORT_SYMBOL(copy_page_from_iter); 1003 1004 static size_t pipe_zero(size_t bytes, struct iov_iter *i) 1005 { 1006 struct pipe_inode_info *pipe = i->pipe; 1007 unsigned int p_mask = pipe->ring_size - 1; 1008 unsigned int i_head; 1009 size_t n, off; 1010 1011 if (!sanity(i)) 1012 return 0; 1013 1014 bytes = n = push_pipe(i, bytes, &i_head, &off); 1015 if (unlikely(!n)) 1016 return 0; 1017 1018 do { 1019 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 1020 memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk); 1021 i->head = i_head; 1022 i->iov_offset = off + chunk; 1023 n -= chunk; 1024 off = 0; 1025 i_head++; 1026 } while (n); 1027 i->count -= bytes; 1028 return bytes; 1029 } 1030 1031 size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 1032 { 1033 if (unlikely(iov_iter_is_pipe(i))) 1034 return pipe_zero(bytes, i); 1035 iterate_and_advance(i, bytes, v, 1036 clear_user(v.iov_base, v.iov_len), 1037 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 1038 memset(v.iov_base, 0, v.iov_len), 1039 memzero_page(v.bv_page, v.bv_offset, v.bv_len) 1040 ) 1041 1042 return bytes; 1043 } 1044 EXPORT_SYMBOL(iov_iter_zero); 1045 1046 size_t iov_iter_copy_from_user_atomic(struct page *page, 1047 struct iov_iter *i, unsigned long offset, size_t bytes) 1048 { 1049 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 1050 if (unlikely(!page_copy_sane(page, offset, bytes))) { 1051 kunmap_atomic(kaddr); 1052 return 0; 1053 } 1054 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1055 kunmap_atomic(kaddr); 1056 WARN_ON(1); 1057 return 0; 1058 } 1059 iterate_all_kinds(i, bytes, v, 1060 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 1061 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, 1062 v.bv_offset, v.bv_len), 1063 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 1064 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, 1065 v.bv_offset, v.bv_len) 1066 ) 1067 kunmap_atomic(kaddr); 1068 return bytes; 1069 } 1070 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 1071 1072 static inline void pipe_truncate(struct iov_iter *i) 1073 { 1074 struct pipe_inode_info *pipe = i->pipe; 1075 unsigned int p_tail = pipe->tail; 1076 unsigned int p_head = pipe->head; 1077 unsigned int p_mask = pipe->ring_size - 1; 1078 1079 if (!pipe_empty(p_head, p_tail)) { 1080 struct pipe_buffer *buf; 1081 unsigned int i_head = i->head; 1082 size_t off = i->iov_offset; 1083 1084 if (off) { 1085 buf = &pipe->bufs[i_head & p_mask]; 1086 buf->len = off - buf->offset; 1087 i_head++; 1088 } 1089 while (p_head != i_head) { 1090 p_head--; 1091 pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); 1092 } 1093 1094 pipe->head = p_head; 1095 } 1096 } 1097 1098 static void pipe_advance(struct iov_iter *i, size_t size) 1099 { 1100 struct pipe_inode_info *pipe = i->pipe; 1101 if (unlikely(i->count < size)) 1102 size = i->count; 1103 if (size) { 1104 struct pipe_buffer *buf; 1105 unsigned int p_mask = pipe->ring_size - 1; 1106 unsigned int i_head = i->head; 1107 size_t off = i->iov_offset, left = size; 1108 1109 if (off) /* make it relative to the beginning of buffer */ 1110 left += off - pipe->bufs[i_head & p_mask].offset; 1111 while (1) { 1112 buf = &pipe->bufs[i_head & p_mask]; 1113 if (left <= buf->len) 1114 break; 1115 left -= buf->len; 1116 i_head++; 1117 } 1118 i->head = i_head; 1119 i->iov_offset = buf->offset + left; 1120 } 1121 i->count -= size; 1122 /* ... and discard everything past that point */ 1123 pipe_truncate(i); 1124 } 1125 1126 static void iov_iter_bvec_advance(struct iov_iter *i, size_t size) 1127 { 1128 struct bvec_iter bi; 1129 1130 bi.bi_size = i->count; 1131 bi.bi_bvec_done = i->iov_offset; 1132 bi.bi_idx = 0; 1133 bvec_iter_advance(i->bvec, &bi, size); 1134 1135 i->bvec += bi.bi_idx; 1136 i->nr_segs -= bi.bi_idx; 1137 i->count = bi.bi_size; 1138 i->iov_offset = bi.bi_bvec_done; 1139 } 1140 1141 void iov_iter_advance(struct iov_iter *i, size_t size) 1142 { 1143 if (unlikely(iov_iter_is_pipe(i))) { 1144 pipe_advance(i, size); 1145 return; 1146 } 1147 if (unlikely(iov_iter_is_discard(i))) { 1148 i->count -= size; 1149 return; 1150 } 1151 if (unlikely(iov_iter_is_xarray(i))) { 1152 size = min(size, i->count); 1153 i->iov_offset += size; 1154 i->count -= size; 1155 return; 1156 } 1157 if (iov_iter_is_bvec(i)) { 1158 iov_iter_bvec_advance(i, size); 1159 return; 1160 } 1161 iterate_and_advance(i, size, v, 0, 0, 0, 0) 1162 } 1163 EXPORT_SYMBOL(iov_iter_advance); 1164 1165 void iov_iter_revert(struct iov_iter *i, size_t unroll) 1166 { 1167 if (!unroll) 1168 return; 1169 if (WARN_ON(unroll > MAX_RW_COUNT)) 1170 return; 1171 i->count += unroll; 1172 if (unlikely(iov_iter_is_pipe(i))) { 1173 struct pipe_inode_info *pipe = i->pipe; 1174 unsigned int p_mask = pipe->ring_size - 1; 1175 unsigned int i_head = i->head; 1176 size_t off = i->iov_offset; 1177 while (1) { 1178 struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; 1179 size_t n = off - b->offset; 1180 if (unroll < n) { 1181 off -= unroll; 1182 break; 1183 } 1184 unroll -= n; 1185 if (!unroll && i_head == i->start_head) { 1186 off = 0; 1187 break; 1188 } 1189 i_head--; 1190 b = &pipe->bufs[i_head & p_mask]; 1191 off = b->offset + b->len; 1192 } 1193 i->iov_offset = off; 1194 i->head = i_head; 1195 pipe_truncate(i); 1196 return; 1197 } 1198 if (unlikely(iov_iter_is_discard(i))) 1199 return; 1200 if (unroll <= i->iov_offset) { 1201 i->iov_offset -= unroll; 1202 return; 1203 } 1204 unroll -= i->iov_offset; 1205 if (iov_iter_is_xarray(i)) { 1206 BUG(); /* We should never go beyond the start of the specified 1207 * range since we might then be straying into pages that 1208 * aren't pinned. 1209 */ 1210 } else if (iov_iter_is_bvec(i)) { 1211 const struct bio_vec *bvec = i->bvec; 1212 while (1) { 1213 size_t n = (--bvec)->bv_len; 1214 i->nr_segs++; 1215 if (unroll <= n) { 1216 i->bvec = bvec; 1217 i->iov_offset = n - unroll; 1218 return; 1219 } 1220 unroll -= n; 1221 } 1222 } else { /* same logics for iovec and kvec */ 1223 const struct iovec *iov = i->iov; 1224 while (1) { 1225 size_t n = (--iov)->iov_len; 1226 i->nr_segs++; 1227 if (unroll <= n) { 1228 i->iov = iov; 1229 i->iov_offset = n - unroll; 1230 return; 1231 } 1232 unroll -= n; 1233 } 1234 } 1235 } 1236 EXPORT_SYMBOL(iov_iter_revert); 1237 1238 /* 1239 * Return the count of just the current iov_iter segment. 1240 */ 1241 size_t iov_iter_single_seg_count(const struct iov_iter *i) 1242 { 1243 if (unlikely(iov_iter_is_pipe(i))) 1244 return i->count; // it is a silly place, anyway 1245 if (i->nr_segs == 1) 1246 return i->count; 1247 if (unlikely(iov_iter_is_discard(i) || iov_iter_is_xarray(i))) 1248 return i->count; 1249 if (iov_iter_is_bvec(i)) 1250 return min(i->count, i->bvec->bv_len - i->iov_offset); 1251 else 1252 return min(i->count, i->iov->iov_len - i->iov_offset); 1253 } 1254 EXPORT_SYMBOL(iov_iter_single_seg_count); 1255 1256 void iov_iter_kvec(struct iov_iter *i, unsigned int direction, 1257 const struct kvec *kvec, unsigned long nr_segs, 1258 size_t count) 1259 { 1260 WARN_ON(direction & ~(READ | WRITE)); 1261 i->type = ITER_KVEC | (direction & (READ | WRITE)); 1262 i->kvec = kvec; 1263 i->nr_segs = nr_segs; 1264 i->iov_offset = 0; 1265 i->count = count; 1266 } 1267 EXPORT_SYMBOL(iov_iter_kvec); 1268 1269 void iov_iter_bvec(struct iov_iter *i, unsigned int direction, 1270 const struct bio_vec *bvec, unsigned long nr_segs, 1271 size_t count) 1272 { 1273 WARN_ON(direction & ~(READ | WRITE)); 1274 i->type = ITER_BVEC | (direction & (READ | WRITE)); 1275 i->bvec = bvec; 1276 i->nr_segs = nr_segs; 1277 i->iov_offset = 0; 1278 i->count = count; 1279 } 1280 EXPORT_SYMBOL(iov_iter_bvec); 1281 1282 void iov_iter_pipe(struct iov_iter *i, unsigned int direction, 1283 struct pipe_inode_info *pipe, 1284 size_t count) 1285 { 1286 BUG_ON(direction != READ); 1287 WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size)); 1288 i->type = ITER_PIPE | READ; 1289 i->pipe = pipe; 1290 i->head = pipe->head; 1291 i->iov_offset = 0; 1292 i->count = count; 1293 i->start_head = i->head; 1294 } 1295 EXPORT_SYMBOL(iov_iter_pipe); 1296 1297 /** 1298 * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray 1299 * @i: The iterator to initialise. 1300 * @direction: The direction of the transfer. 1301 * @xarray: The xarray to access. 1302 * @start: The start file position. 1303 * @count: The size of the I/O buffer in bytes. 1304 * 1305 * Set up an I/O iterator to either draw data out of the pages attached to an 1306 * inode or to inject data into those pages. The pages *must* be prevented 1307 * from evaporation, either by taking a ref on them or locking them by the 1308 * caller. 1309 */ 1310 void iov_iter_xarray(struct iov_iter *i, unsigned int direction, 1311 struct xarray *xarray, loff_t start, size_t count) 1312 { 1313 BUG_ON(direction & ~1); 1314 i->type = ITER_XARRAY | (direction & (READ | WRITE)); 1315 i->xarray = xarray; 1316 i->xarray_start = start; 1317 i->count = count; 1318 i->iov_offset = 0; 1319 } 1320 EXPORT_SYMBOL(iov_iter_xarray); 1321 1322 /** 1323 * iov_iter_discard - Initialise an I/O iterator that discards data 1324 * @i: The iterator to initialise. 1325 * @direction: The direction of the transfer. 1326 * @count: The size of the I/O buffer in bytes. 1327 * 1328 * Set up an I/O iterator that just discards everything that's written to it. 1329 * It's only available as a READ iterator. 1330 */ 1331 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) 1332 { 1333 BUG_ON(direction != READ); 1334 i->type = ITER_DISCARD | READ; 1335 i->count = count; 1336 i->iov_offset = 0; 1337 } 1338 EXPORT_SYMBOL(iov_iter_discard); 1339 1340 unsigned long iov_iter_alignment(const struct iov_iter *i) 1341 { 1342 unsigned long res = 0; 1343 size_t size = i->count; 1344 1345 if (unlikely(iov_iter_is_pipe(i))) { 1346 unsigned int p_mask = i->pipe->ring_size - 1; 1347 1348 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) 1349 return size | i->iov_offset; 1350 return size; 1351 } 1352 if (unlikely(iov_iter_is_xarray(i))) 1353 return (i->xarray_start + i->iov_offset) | i->count; 1354 iterate_all_kinds(i, size, v, 1355 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 1356 res |= v.bv_offset | v.bv_len, 1357 res |= (unsigned long)v.iov_base | v.iov_len, 1358 res |= v.bv_offset | v.bv_len 1359 ) 1360 return res; 1361 } 1362 EXPORT_SYMBOL(iov_iter_alignment); 1363 1364 unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 1365 { 1366 unsigned long res = 0; 1367 size_t size = i->count; 1368 1369 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1370 WARN_ON(1); 1371 return ~0U; 1372 } 1373 1374 iterate_all_kinds(i, size, v, 1375 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1376 (size != v.iov_len ? size : 0), 0), 1377 (res |= (!res ? 0 : (unsigned long)v.bv_offset) | 1378 (size != v.bv_len ? size : 0)), 1379 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1380 (size != v.iov_len ? size : 0)), 1381 (res |= (!res ? 0 : (unsigned long)v.bv_offset) | 1382 (size != v.bv_len ? size : 0)) 1383 ); 1384 return res; 1385 } 1386 EXPORT_SYMBOL(iov_iter_gap_alignment); 1387 1388 static inline ssize_t __pipe_get_pages(struct iov_iter *i, 1389 size_t maxsize, 1390 struct page **pages, 1391 int iter_head, 1392 size_t *start) 1393 { 1394 struct pipe_inode_info *pipe = i->pipe; 1395 unsigned int p_mask = pipe->ring_size - 1; 1396 ssize_t n = push_pipe(i, maxsize, &iter_head, start); 1397 if (!n) 1398 return -EFAULT; 1399 1400 maxsize = n; 1401 n += *start; 1402 while (n > 0) { 1403 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); 1404 iter_head++; 1405 n -= PAGE_SIZE; 1406 } 1407 1408 return maxsize; 1409 } 1410 1411 static ssize_t pipe_get_pages(struct iov_iter *i, 1412 struct page **pages, size_t maxsize, unsigned maxpages, 1413 size_t *start) 1414 { 1415 unsigned int iter_head, npages; 1416 size_t capacity; 1417 1418 if (!maxsize) 1419 return 0; 1420 1421 if (!sanity(i)) 1422 return -EFAULT; 1423 1424 data_start(i, &iter_head, start); 1425 /* Amount of free space: some of this one + all after this one */ 1426 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1427 capacity = min(npages, maxpages) * PAGE_SIZE - *start; 1428 1429 return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); 1430 } 1431 1432 static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, 1433 pgoff_t index, unsigned int nr_pages) 1434 { 1435 XA_STATE(xas, xa, index); 1436 struct page *page; 1437 unsigned int ret = 0; 1438 1439 rcu_read_lock(); 1440 for (page = xas_load(&xas); page; page = xas_next(&xas)) { 1441 if (xas_retry(&xas, page)) 1442 continue; 1443 1444 /* Has the page moved or been split? */ 1445 if (unlikely(page != xas_reload(&xas))) { 1446 xas_reset(&xas); 1447 continue; 1448 } 1449 1450 pages[ret] = find_subpage(page, xas.xa_index); 1451 get_page(pages[ret]); 1452 if (++ret == nr_pages) 1453 break; 1454 } 1455 rcu_read_unlock(); 1456 return ret; 1457 } 1458 1459 static ssize_t iter_xarray_get_pages(struct iov_iter *i, 1460 struct page **pages, size_t maxsize, 1461 unsigned maxpages, size_t *_start_offset) 1462 { 1463 unsigned nr, offset; 1464 pgoff_t index, count; 1465 size_t size = maxsize, actual; 1466 loff_t pos; 1467 1468 if (!size || !maxpages) 1469 return 0; 1470 1471 pos = i->xarray_start + i->iov_offset; 1472 index = pos >> PAGE_SHIFT; 1473 offset = pos & ~PAGE_MASK; 1474 *_start_offset = offset; 1475 1476 count = 1; 1477 if (size > PAGE_SIZE - offset) { 1478 size -= PAGE_SIZE - offset; 1479 count += size >> PAGE_SHIFT; 1480 size &= ~PAGE_MASK; 1481 if (size) 1482 count++; 1483 } 1484 1485 if (count > maxpages) 1486 count = maxpages; 1487 1488 nr = iter_xarray_populate_pages(pages, i->xarray, index, count); 1489 if (nr == 0) 1490 return 0; 1491 1492 actual = PAGE_SIZE * nr; 1493 actual -= offset; 1494 if (nr == count && size > 0) { 1495 unsigned last_offset = (nr > 1) ? 0 : offset; 1496 actual -= PAGE_SIZE - (last_offset + size); 1497 } 1498 return actual; 1499 } 1500 1501 ssize_t iov_iter_get_pages(struct iov_iter *i, 1502 struct page **pages, size_t maxsize, unsigned maxpages, 1503 size_t *start) 1504 { 1505 if (maxsize > i->count) 1506 maxsize = i->count; 1507 1508 if (unlikely(iov_iter_is_pipe(i))) 1509 return pipe_get_pages(i, pages, maxsize, maxpages, start); 1510 if (unlikely(iov_iter_is_xarray(i))) 1511 return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); 1512 if (unlikely(iov_iter_is_discard(i))) 1513 return -EFAULT; 1514 1515 iterate_all_kinds(i, maxsize, v, ({ 1516 unsigned long addr = (unsigned long)v.iov_base; 1517 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1518 int n; 1519 int res; 1520 1521 if (len > maxpages * PAGE_SIZE) 1522 len = maxpages * PAGE_SIZE; 1523 addr &= ~(PAGE_SIZE - 1); 1524 n = DIV_ROUND_UP(len, PAGE_SIZE); 1525 res = get_user_pages_fast(addr, n, 1526 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, 1527 pages); 1528 if (unlikely(res < 0)) 1529 return res; 1530 return (res == n ? len : res * PAGE_SIZE) - *start; 1531 0;}),({ 1532 /* can't be more than PAGE_SIZE */ 1533 *start = v.bv_offset; 1534 get_page(*pages = v.bv_page); 1535 return v.bv_len; 1536 }),({ 1537 return -EFAULT; 1538 }), 1539 0 1540 ) 1541 return 0; 1542 } 1543 EXPORT_SYMBOL(iov_iter_get_pages); 1544 1545 static struct page **get_pages_array(size_t n) 1546 { 1547 return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); 1548 } 1549 1550 static ssize_t pipe_get_pages_alloc(struct iov_iter *i, 1551 struct page ***pages, size_t maxsize, 1552 size_t *start) 1553 { 1554 struct page **p; 1555 unsigned int iter_head, npages; 1556 ssize_t n; 1557 1558 if (!maxsize) 1559 return 0; 1560 1561 if (!sanity(i)) 1562 return -EFAULT; 1563 1564 data_start(i, &iter_head, start); 1565 /* Amount of free space: some of this one + all after this one */ 1566 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1567 n = npages * PAGE_SIZE - *start; 1568 if (maxsize > n) 1569 maxsize = n; 1570 else 1571 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 1572 p = get_pages_array(npages); 1573 if (!p) 1574 return -ENOMEM; 1575 n = __pipe_get_pages(i, maxsize, p, iter_head, start); 1576 if (n > 0) 1577 *pages = p; 1578 else 1579 kvfree(p); 1580 return n; 1581 } 1582 1583 static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, 1584 struct page ***pages, size_t maxsize, 1585 size_t *_start_offset) 1586 { 1587 struct page **p; 1588 unsigned nr, offset; 1589 pgoff_t index, count; 1590 size_t size = maxsize, actual; 1591 loff_t pos; 1592 1593 if (!size) 1594 return 0; 1595 1596 pos = i->xarray_start + i->iov_offset; 1597 index = pos >> PAGE_SHIFT; 1598 offset = pos & ~PAGE_MASK; 1599 *_start_offset = offset; 1600 1601 count = 1; 1602 if (size > PAGE_SIZE - offset) { 1603 size -= PAGE_SIZE - offset; 1604 count += size >> PAGE_SHIFT; 1605 size &= ~PAGE_MASK; 1606 if (size) 1607 count++; 1608 } 1609 1610 p = get_pages_array(count); 1611 if (!p) 1612 return -ENOMEM; 1613 *pages = p; 1614 1615 nr = iter_xarray_populate_pages(p, i->xarray, index, count); 1616 if (nr == 0) 1617 return 0; 1618 1619 actual = PAGE_SIZE * nr; 1620 actual -= offset; 1621 if (nr == count && size > 0) { 1622 unsigned last_offset = (nr > 1) ? 0 : offset; 1623 actual -= PAGE_SIZE - (last_offset + size); 1624 } 1625 return actual; 1626 } 1627 1628 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 1629 struct page ***pages, size_t maxsize, 1630 size_t *start) 1631 { 1632 struct page **p; 1633 1634 if (maxsize > i->count) 1635 maxsize = i->count; 1636 1637 if (unlikely(iov_iter_is_pipe(i))) 1638 return pipe_get_pages_alloc(i, pages, maxsize, start); 1639 if (unlikely(iov_iter_is_xarray(i))) 1640 return iter_xarray_get_pages_alloc(i, pages, maxsize, start); 1641 if (unlikely(iov_iter_is_discard(i))) 1642 return -EFAULT; 1643 1644 iterate_all_kinds(i, maxsize, v, ({ 1645 unsigned long addr = (unsigned long)v.iov_base; 1646 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1647 int n; 1648 int res; 1649 1650 addr &= ~(PAGE_SIZE - 1); 1651 n = DIV_ROUND_UP(len, PAGE_SIZE); 1652 p = get_pages_array(n); 1653 if (!p) 1654 return -ENOMEM; 1655 res = get_user_pages_fast(addr, n, 1656 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p); 1657 if (unlikely(res < 0)) { 1658 kvfree(p); 1659 return res; 1660 } 1661 *pages = p; 1662 return (res == n ? len : res * PAGE_SIZE) - *start; 1663 0;}),({ 1664 /* can't be more than PAGE_SIZE */ 1665 *start = v.bv_offset; 1666 *pages = p = get_pages_array(1); 1667 if (!p) 1668 return -ENOMEM; 1669 get_page(*p = v.bv_page); 1670 return v.bv_len; 1671 }),({ 1672 return -EFAULT; 1673 }), 0 1674 ) 1675 return 0; 1676 } 1677 EXPORT_SYMBOL(iov_iter_get_pages_alloc); 1678 1679 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, 1680 struct iov_iter *i) 1681 { 1682 char *to = addr; 1683 __wsum sum, next; 1684 size_t off = 0; 1685 sum = *csum; 1686 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1687 WARN_ON(1); 1688 return 0; 1689 } 1690 iterate_and_advance(i, bytes, v, ({ 1691 next = csum_and_copy_from_user(v.iov_base, 1692 (to += v.iov_len) - v.iov_len, 1693 v.iov_len); 1694 if (next) { 1695 sum = csum_block_add(sum, next, off); 1696 off += v.iov_len; 1697 } 1698 next ? 0 : v.iov_len; 1699 }), ({ 1700 char *p = kmap_atomic(v.bv_page); 1701 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1702 p + v.bv_offset, v.bv_len, 1703 sum, off); 1704 kunmap_atomic(p); 1705 off += v.bv_len; 1706 }),({ 1707 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, 1708 v.iov_base, v.iov_len, 1709 sum, off); 1710 off += v.iov_len; 1711 }), ({ 1712 char *p = kmap_atomic(v.bv_page); 1713 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1714 p + v.bv_offset, v.bv_len, 1715 sum, off); 1716 kunmap_atomic(p); 1717 off += v.bv_len; 1718 }) 1719 ) 1720 *csum = sum; 1721 return bytes; 1722 } 1723 EXPORT_SYMBOL(csum_and_copy_from_iter); 1724 1725 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, 1726 struct iov_iter *i) 1727 { 1728 char *to = addr; 1729 __wsum sum, next; 1730 size_t off = 0; 1731 sum = *csum; 1732 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1733 WARN_ON(1); 1734 return false; 1735 } 1736 if (unlikely(i->count < bytes)) 1737 return false; 1738 iterate_all_kinds(i, bytes, v, ({ 1739 next = csum_and_copy_from_user(v.iov_base, 1740 (to += v.iov_len) - v.iov_len, 1741 v.iov_len); 1742 if (!next) 1743 return false; 1744 sum = csum_block_add(sum, next, off); 1745 off += v.iov_len; 1746 0; 1747 }), ({ 1748 char *p = kmap_atomic(v.bv_page); 1749 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1750 p + v.bv_offset, v.bv_len, 1751 sum, off); 1752 kunmap_atomic(p); 1753 off += v.bv_len; 1754 }),({ 1755 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, 1756 v.iov_base, v.iov_len, 1757 sum, off); 1758 off += v.iov_len; 1759 }), ({ 1760 char *p = kmap_atomic(v.bv_page); 1761 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1762 p + v.bv_offset, v.bv_len, 1763 sum, off); 1764 kunmap_atomic(p); 1765 off += v.bv_len; 1766 }) 1767 ) 1768 *csum = sum; 1769 iov_iter_advance(i, bytes); 1770 return true; 1771 } 1772 EXPORT_SYMBOL(csum_and_copy_from_iter_full); 1773 1774 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, 1775 struct iov_iter *i) 1776 { 1777 struct csum_state *csstate = _csstate; 1778 const char *from = addr; 1779 __wsum sum, next; 1780 size_t off; 1781 1782 if (unlikely(iov_iter_is_pipe(i))) 1783 return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i); 1784 1785 sum = csstate->csum; 1786 off = csstate->off; 1787 if (unlikely(iov_iter_is_discard(i))) { 1788 WARN_ON(1); /* for now */ 1789 return 0; 1790 } 1791 iterate_and_advance(i, bytes, v, ({ 1792 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1793 v.iov_base, 1794 v.iov_len); 1795 if (next) { 1796 sum = csum_block_add(sum, next, off); 1797 off += v.iov_len; 1798 } 1799 next ? 0 : v.iov_len; 1800 }), ({ 1801 char *p = kmap_atomic(v.bv_page); 1802 sum = csum_and_memcpy(p + v.bv_offset, 1803 (from += v.bv_len) - v.bv_len, 1804 v.bv_len, sum, off); 1805 kunmap_atomic(p); 1806 off += v.bv_len; 1807 }),({ 1808 sum = csum_and_memcpy(v.iov_base, 1809 (from += v.iov_len) - v.iov_len, 1810 v.iov_len, sum, off); 1811 off += v.iov_len; 1812 }), ({ 1813 char *p = kmap_atomic(v.bv_page); 1814 sum = csum_and_memcpy(p + v.bv_offset, 1815 (from += v.bv_len) - v.bv_len, 1816 v.bv_len, sum, off); 1817 kunmap_atomic(p); 1818 off += v.bv_len; 1819 }) 1820 ) 1821 csstate->csum = sum; 1822 csstate->off = off; 1823 return bytes; 1824 } 1825 EXPORT_SYMBOL(csum_and_copy_to_iter); 1826 1827 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, 1828 struct iov_iter *i) 1829 { 1830 #ifdef CONFIG_CRYPTO_HASH 1831 struct ahash_request *hash = hashp; 1832 struct scatterlist sg; 1833 size_t copied; 1834 1835 copied = copy_to_iter(addr, bytes, i); 1836 sg_init_one(&sg, addr, copied); 1837 ahash_request_set_crypt(hash, &sg, NULL, copied); 1838 crypto_ahash_update(hash); 1839 return copied; 1840 #else 1841 return 0; 1842 #endif 1843 } 1844 EXPORT_SYMBOL(hash_and_copy_to_iter); 1845 1846 int iov_iter_npages(const struct iov_iter *i, int maxpages) 1847 { 1848 size_t size = i->count; 1849 int npages = 0; 1850 1851 if (!size) 1852 return 0; 1853 if (unlikely(iov_iter_is_discard(i))) 1854 return 0; 1855 1856 if (unlikely(iov_iter_is_pipe(i))) { 1857 struct pipe_inode_info *pipe = i->pipe; 1858 unsigned int iter_head; 1859 size_t off; 1860 1861 if (!sanity(i)) 1862 return 0; 1863 1864 data_start(i, &iter_head, &off); 1865 /* some of this one + all after this one */ 1866 npages = pipe_space_for_user(iter_head, pipe->tail, pipe); 1867 if (npages >= maxpages) 1868 return maxpages; 1869 } else if (unlikely(iov_iter_is_xarray(i))) { 1870 unsigned offset; 1871 1872 offset = (i->xarray_start + i->iov_offset) & ~PAGE_MASK; 1873 1874 npages = 1; 1875 if (size > PAGE_SIZE - offset) { 1876 size -= PAGE_SIZE - offset; 1877 npages += size >> PAGE_SHIFT; 1878 size &= ~PAGE_MASK; 1879 if (size) 1880 npages++; 1881 } 1882 if (npages >= maxpages) 1883 return maxpages; 1884 } else iterate_all_kinds(i, size, v, ({ 1885 unsigned long p = (unsigned long)v.iov_base; 1886 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1887 - p / PAGE_SIZE; 1888 if (npages >= maxpages) 1889 return maxpages; 1890 0;}),({ 1891 npages++; 1892 if (npages >= maxpages) 1893 return maxpages; 1894 }),({ 1895 unsigned long p = (unsigned long)v.iov_base; 1896 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1897 - p / PAGE_SIZE; 1898 if (npages >= maxpages) 1899 return maxpages; 1900 }), 1901 0 1902 ) 1903 return npages; 1904 } 1905 EXPORT_SYMBOL(iov_iter_npages); 1906 1907 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1908 { 1909 *new = *old; 1910 if (unlikely(iov_iter_is_pipe(new))) { 1911 WARN_ON(1); 1912 return NULL; 1913 } 1914 if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new))) 1915 return NULL; 1916 if (iov_iter_is_bvec(new)) 1917 return new->bvec = kmemdup(new->bvec, 1918 new->nr_segs * sizeof(struct bio_vec), 1919 flags); 1920 else 1921 /* iovec and kvec have identical layout */ 1922 return new->iov = kmemdup(new->iov, 1923 new->nr_segs * sizeof(struct iovec), 1924 flags); 1925 } 1926 EXPORT_SYMBOL(dup_iter); 1927 1928 static int copy_compat_iovec_from_user(struct iovec *iov, 1929 const struct iovec __user *uvec, unsigned long nr_segs) 1930 { 1931 const struct compat_iovec __user *uiov = 1932 (const struct compat_iovec __user *)uvec; 1933 int ret = -EFAULT, i; 1934 1935 if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) 1936 return -EFAULT; 1937 1938 for (i = 0; i < nr_segs; i++) { 1939 compat_uptr_t buf; 1940 compat_ssize_t len; 1941 1942 unsafe_get_user(len, &uiov[i].iov_len, uaccess_end); 1943 unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end); 1944 1945 /* check for compat_size_t not fitting in compat_ssize_t .. */ 1946 if (len < 0) { 1947 ret = -EINVAL; 1948 goto uaccess_end; 1949 } 1950 iov[i].iov_base = compat_ptr(buf); 1951 iov[i].iov_len = len; 1952 } 1953 1954 ret = 0; 1955 uaccess_end: 1956 user_access_end(); 1957 return ret; 1958 } 1959 1960 static int copy_iovec_from_user(struct iovec *iov, 1961 const struct iovec __user *uvec, unsigned long nr_segs) 1962 { 1963 unsigned long seg; 1964 1965 if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec))) 1966 return -EFAULT; 1967 for (seg = 0; seg < nr_segs; seg++) { 1968 if ((ssize_t)iov[seg].iov_len < 0) 1969 return -EINVAL; 1970 } 1971 1972 return 0; 1973 } 1974 1975 struct iovec *iovec_from_user(const struct iovec __user *uvec, 1976 unsigned long nr_segs, unsigned long fast_segs, 1977 struct iovec *fast_iov, bool compat) 1978 { 1979 struct iovec *iov = fast_iov; 1980 int ret; 1981 1982 /* 1983 * SuS says "The readv() function *may* fail if the iovcnt argument was 1984 * less than or equal to 0, or greater than {IOV_MAX}. Linux has 1985 * traditionally returned zero for zero segments, so... 1986 */ 1987 if (nr_segs == 0) 1988 return iov; 1989 if (nr_segs > UIO_MAXIOV) 1990 return ERR_PTR(-EINVAL); 1991 if (nr_segs > fast_segs) { 1992 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL); 1993 if (!iov) 1994 return ERR_PTR(-ENOMEM); 1995 } 1996 1997 if (compat) 1998 ret = copy_compat_iovec_from_user(iov, uvec, nr_segs); 1999 else 2000 ret = copy_iovec_from_user(iov, uvec, nr_segs); 2001 if (ret) { 2002 if (iov != fast_iov) 2003 kfree(iov); 2004 return ERR_PTR(ret); 2005 } 2006 2007 return iov; 2008 } 2009 2010 ssize_t __import_iovec(int type, const struct iovec __user *uvec, 2011 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, 2012 struct iov_iter *i, bool compat) 2013 { 2014 ssize_t total_len = 0; 2015 unsigned long seg; 2016 struct iovec *iov; 2017 2018 iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat); 2019 if (IS_ERR(iov)) { 2020 *iovp = NULL; 2021 return PTR_ERR(iov); 2022 } 2023 2024 /* 2025 * According to the Single Unix Specification we should return EINVAL if 2026 * an element length is < 0 when cast to ssize_t or if the total length 2027 * would overflow the ssize_t return value of the system call. 2028 * 2029 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 2030 * overflow case. 2031 */ 2032 for (seg = 0; seg < nr_segs; seg++) { 2033 ssize_t len = (ssize_t)iov[seg].iov_len; 2034 2035 if (!access_ok(iov[seg].iov_base, len)) { 2036 if (iov != *iovp) 2037 kfree(iov); 2038 *iovp = NULL; 2039 return -EFAULT; 2040 } 2041 2042 if (len > MAX_RW_COUNT - total_len) { 2043 len = MAX_RW_COUNT - total_len; 2044 iov[seg].iov_len = len; 2045 } 2046 total_len += len; 2047 } 2048 2049 iov_iter_init(i, type, iov, nr_segs, total_len); 2050 if (iov == *iovp) 2051 *iovp = NULL; 2052 else 2053 *iovp = iov; 2054 return total_len; 2055 } 2056 2057 /** 2058 * import_iovec() - Copy an array of &struct iovec from userspace 2059 * into the kernel, check that it is valid, and initialize a new 2060 * &struct iov_iter iterator to access it. 2061 * 2062 * @type: One of %READ or %WRITE. 2063 * @uvec: Pointer to the userspace array. 2064 * @nr_segs: Number of elements in userspace array. 2065 * @fast_segs: Number of elements in @iov. 2066 * @iovp: (input and output parameter) Pointer to pointer to (usually small 2067 * on-stack) kernel array. 2068 * @i: Pointer to iterator that will be initialized on success. 2069 * 2070 * If the array pointed to by *@iov is large enough to hold all @nr_segs, 2071 * then this function places %NULL in *@iov on return. Otherwise, a new 2072 * array will be allocated and the result placed in *@iov. This means that 2073 * the caller may call kfree() on *@iov regardless of whether the small 2074 * on-stack array was used or not (and regardless of whether this function 2075 * returns an error or not). 2076 * 2077 * Return: Negative error code on error, bytes imported on success 2078 */ 2079 ssize_t import_iovec(int type, const struct iovec __user *uvec, 2080 unsigned nr_segs, unsigned fast_segs, 2081 struct iovec **iovp, struct iov_iter *i) 2082 { 2083 return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, 2084 in_compat_syscall()); 2085 } 2086 EXPORT_SYMBOL(import_iovec); 2087 2088 int import_single_range(int rw, void __user *buf, size_t len, 2089 struct iovec *iov, struct iov_iter *i) 2090 { 2091 if (len > MAX_RW_COUNT) 2092 len = MAX_RW_COUNT; 2093 if (unlikely(!access_ok(buf, len))) 2094 return -EFAULT; 2095 2096 iov->iov_base = buf; 2097 iov->iov_len = len; 2098 iov_iter_init(i, rw, iov, 1, len); 2099 return 0; 2100 } 2101 EXPORT_SYMBOL(import_single_range); 2102 2103 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, 2104 int (*f)(struct kvec *vec, void *context), 2105 void *context) 2106 { 2107 struct kvec w; 2108 int err = -EINVAL; 2109 if (!bytes) 2110 return 0; 2111 2112 iterate_all_kinds(i, bytes, v, -EINVAL, ({ 2113 w.iov_base = kmap(v.bv_page) + v.bv_offset; 2114 w.iov_len = v.bv_len; 2115 err = f(&w, context); 2116 kunmap(v.bv_page); 2117 err;}), ({ 2118 w = v; 2119 err = f(&w, context);}), ({ 2120 w.iov_base = kmap(v.bv_page) + v.bv_offset; 2121 w.iov_len = v.bv_len; 2122 err = f(&w, context); 2123 kunmap(v.bv_page); 2124 err;}) 2125 ) 2126 return err; 2127 } 2128 EXPORT_SYMBOL(iov_iter_for_each_range); 2129