1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to mapping data to requests 4 */ 5 #include <linux/kernel.h> 6 #include <linux/sched/task_stack.h> 7 #include <linux/module.h> 8 #include <linux/bio.h> 9 #include <linux/blkdev.h> 10 #include <linux/uio.h> 11 12 #include "blk.h" 13 14 struct bio_map_data { 15 bool is_our_pages : 1; 16 bool is_null_mapped : 1; 17 struct iov_iter iter; 18 struct iovec iov[]; 19 }; 20 21 static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, 22 gfp_t gfp_mask) 23 { 24 struct bio_map_data *bmd; 25 26 if (data->nr_segs > UIO_MAXIOV) 27 return NULL; 28 29 bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask); 30 if (!bmd) 31 return NULL; 32 bmd->iter = *data; 33 if (iter_is_iovec(data)) { 34 memcpy(bmd->iov, iter_iov(data), sizeof(struct iovec) * data->nr_segs); 35 bmd->iter.__iov = bmd->iov; 36 } 37 return bmd; 38 } 39 40 /** 41 * bio_copy_from_iter - copy all pages from iov_iter to bio 42 * @bio: The &struct bio which describes the I/O as destination 43 * @iter: iov_iter as source 44 * 45 * Copy all pages from iov_iter to bio. 46 * Returns 0 on success, or error on failure. 47 */ 48 static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) 49 { 50 struct bio_vec *bvec; 51 struct bvec_iter_all iter_all; 52 53 bio_for_each_segment_all(bvec, bio, iter_all) { 54 ssize_t ret; 55 56 ret = copy_page_from_iter(bvec->bv_page, 57 bvec->bv_offset, 58 bvec->bv_len, 59 iter); 60 61 if (!iov_iter_count(iter)) 62 break; 63 64 if (ret < bvec->bv_len) 65 return -EFAULT; 66 } 67 68 return 0; 69 } 70 71 /** 72 * bio_copy_to_iter - copy all pages from bio to iov_iter 73 * @bio: The &struct bio which describes the I/O as source 74 * @iter: iov_iter as destination 75 * 76 * Copy all pages from bio to iov_iter. 77 * Returns 0 on success, or error on failure. 78 */ 79 static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) 80 { 81 struct bio_vec *bvec; 82 struct bvec_iter_all iter_all; 83 84 bio_for_each_segment_all(bvec, bio, iter_all) { 85 ssize_t ret; 86 87 ret = copy_page_to_iter(bvec->bv_page, 88 bvec->bv_offset, 89 bvec->bv_len, 90 &iter); 91 92 if (!iov_iter_count(&iter)) 93 break; 94 95 if (ret < bvec->bv_len) 96 return -EFAULT; 97 } 98 99 return 0; 100 } 101 102 /** 103 * bio_uncopy_user - finish previously mapped bio 104 * @bio: bio being terminated 105 * 106 * Free pages allocated from bio_copy_user_iov() and write back data 107 * to user space in case of a read. 108 */ 109 static int bio_uncopy_user(struct bio *bio) 110 { 111 struct bio_map_data *bmd = bio->bi_private; 112 int ret = 0; 113 114 if (!bmd->is_null_mapped) { 115 /* 116 * if we're in a workqueue, the request is orphaned, so 117 * don't copy into a random user address space, just free 118 * and return -EINTR so user space doesn't expect any data. 119 */ 120 if (!current->mm) 121 ret = -EINTR; 122 else if (bio_data_dir(bio) == READ) 123 ret = bio_copy_to_iter(bio, bmd->iter); 124 if (bmd->is_our_pages) 125 bio_free_pages(bio); 126 } 127 kfree(bmd); 128 return ret; 129 } 130 131 static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, 132 struct iov_iter *iter, gfp_t gfp_mask) 133 { 134 struct bio_map_data *bmd; 135 struct page *page; 136 struct bio *bio; 137 int i = 0, ret; 138 int nr_pages; 139 unsigned int len = iter->count; 140 unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; 141 142 bmd = bio_alloc_map_data(iter, gfp_mask); 143 if (!bmd) 144 return -ENOMEM; 145 146 /* 147 * We need to do a deep copy of the iov_iter including the iovecs. 148 * The caller provided iov might point to an on-stack or otherwise 149 * shortlived one. 150 */ 151 bmd->is_our_pages = !map_data; 152 bmd->is_null_mapped = (map_data && map_data->null_mapped); 153 154 nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE)); 155 156 ret = -ENOMEM; 157 bio = bio_kmalloc(nr_pages, gfp_mask); 158 if (!bio) 159 goto out_bmd; 160 bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, req_op(rq)); 161 162 if (map_data) { 163 nr_pages = 1U << map_data->page_order; 164 i = map_data->offset / PAGE_SIZE; 165 } 166 while (len) { 167 unsigned int bytes = PAGE_SIZE; 168 169 bytes -= offset; 170 171 if (bytes > len) 172 bytes = len; 173 174 if (map_data) { 175 if (i == map_data->nr_entries * nr_pages) { 176 ret = -ENOMEM; 177 goto cleanup; 178 } 179 180 page = map_data->pages[i / nr_pages]; 181 page += (i % nr_pages); 182 183 i++; 184 } else { 185 page = alloc_page(GFP_NOIO | gfp_mask); 186 if (!page) { 187 ret = -ENOMEM; 188 goto cleanup; 189 } 190 } 191 192 if (bio_add_page(bio, page, bytes, offset) < bytes) { 193 if (!map_data) 194 __free_page(page); 195 break; 196 } 197 198 len -= bytes; 199 offset = 0; 200 } 201 202 if (map_data) 203 map_data->offset += bio->bi_iter.bi_size; 204 205 /* 206 * success 207 */ 208 if (iov_iter_rw(iter) == WRITE && 209 (!map_data || !map_data->null_mapped)) { 210 ret = bio_copy_from_iter(bio, iter); 211 if (ret) 212 goto cleanup; 213 } else if (map_data && map_data->from_user) { 214 struct iov_iter iter2 = *iter; 215 216 /* This is the copy-in part of SG_DXFER_TO_FROM_DEV. */ 217 iter2.data_source = ITER_SOURCE; 218 ret = bio_copy_from_iter(bio, &iter2); 219 if (ret) 220 goto cleanup; 221 } else { 222 if (bmd->is_our_pages) 223 zero_fill_bio(bio); 224 iov_iter_advance(iter, bio->bi_iter.bi_size); 225 } 226 227 bio->bi_private = bmd; 228 229 ret = blk_rq_append_bio(rq, bio); 230 if (ret) 231 goto cleanup; 232 return 0; 233 cleanup: 234 if (!map_data) 235 bio_free_pages(bio); 236 bio_uninit(bio); 237 kfree(bio); 238 out_bmd: 239 kfree(bmd); 240 return ret; 241 } 242 243 static void blk_mq_map_bio_put(struct bio *bio) 244 { 245 if (bio->bi_opf & REQ_ALLOC_CACHE) { 246 bio_put(bio); 247 } else { 248 bio_uninit(bio); 249 kfree(bio); 250 } 251 } 252 253 static struct bio *blk_rq_map_bio_alloc(struct request *rq, 254 unsigned int nr_vecs, gfp_t gfp_mask) 255 { 256 struct bio *bio; 257 258 if (rq->cmd_flags & REQ_ALLOC_CACHE && (nr_vecs <= BIO_INLINE_VECS)) { 259 bio = bio_alloc_bioset(NULL, nr_vecs, rq->cmd_flags, gfp_mask, 260 &fs_bio_set); 261 if (!bio) 262 return NULL; 263 } else { 264 bio = bio_kmalloc(nr_vecs, gfp_mask); 265 if (!bio) 266 return NULL; 267 bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq)); 268 } 269 return bio; 270 } 271 272 static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, 273 gfp_t gfp_mask) 274 { 275 unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS); 276 struct bio *bio; 277 int ret; 278 279 if (!iov_iter_count(iter)) 280 return -EINVAL; 281 282 bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask); 283 if (!bio) 284 return -ENOMEM; 285 ret = bio_iov_iter_get_pages(bio, iter); 286 if (ret) 287 goto out_put; 288 ret = blk_rq_append_bio(rq, bio); 289 if (ret) 290 goto out_release; 291 return 0; 292 293 out_release: 294 bio_release_pages(bio, false); 295 out_put: 296 blk_mq_map_bio_put(bio); 297 return ret; 298 } 299 300 static void bio_invalidate_vmalloc_pages(struct bio *bio) 301 { 302 #ifdef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 303 if (bio->bi_private && !op_is_write(bio_op(bio))) { 304 unsigned long i, len = 0; 305 306 for (i = 0; i < bio->bi_vcnt; i++) 307 len += bio->bi_io_vec[i].bv_len; 308 invalidate_kernel_vmap_range(bio->bi_private, len); 309 } 310 #endif 311 } 312 313 static void bio_map_kern_endio(struct bio *bio) 314 { 315 bio_invalidate_vmalloc_pages(bio); 316 bio_uninit(bio); 317 kfree(bio); 318 } 319 320 /** 321 * bio_map_kern - map kernel address into bio 322 * @q: the struct request_queue for the bio 323 * @data: pointer to buffer to map 324 * @len: length in bytes 325 * @gfp_mask: allocation flags for bio allocation 326 * 327 * Map the kernel address into a bio suitable for io to a block 328 * device. Returns an error pointer in case of error. 329 */ 330 static struct bio *bio_map_kern(struct request_queue *q, void *data, 331 unsigned int len, gfp_t gfp_mask) 332 { 333 unsigned long kaddr = (unsigned long)data; 334 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 335 unsigned long start = kaddr >> PAGE_SHIFT; 336 const int nr_pages = end - start; 337 bool is_vmalloc = is_vmalloc_addr(data); 338 struct page *page; 339 int offset, i; 340 struct bio *bio; 341 342 bio = bio_kmalloc(nr_pages, gfp_mask); 343 if (!bio) 344 return ERR_PTR(-ENOMEM); 345 bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0); 346 347 if (is_vmalloc) { 348 flush_kernel_vmap_range(data, len); 349 bio->bi_private = data; 350 } 351 352 offset = offset_in_page(kaddr); 353 for (i = 0; i < nr_pages; i++) { 354 unsigned int bytes = PAGE_SIZE - offset; 355 356 if (len <= 0) 357 break; 358 359 if (bytes > len) 360 bytes = len; 361 362 if (!is_vmalloc) 363 page = virt_to_page(data); 364 else 365 page = vmalloc_to_page(data); 366 if (bio_add_page(bio, page, bytes, offset) < bytes) { 367 /* we don't support partial mappings */ 368 bio_uninit(bio); 369 kfree(bio); 370 return ERR_PTR(-EINVAL); 371 } 372 373 data += bytes; 374 len -= bytes; 375 offset = 0; 376 } 377 378 bio->bi_end_io = bio_map_kern_endio; 379 return bio; 380 } 381 382 static void bio_copy_kern_endio(struct bio *bio) 383 { 384 bio_free_pages(bio); 385 bio_uninit(bio); 386 kfree(bio); 387 } 388 389 static void bio_copy_kern_endio_read(struct bio *bio) 390 { 391 char *p = bio->bi_private; 392 struct bio_vec *bvec; 393 struct bvec_iter_all iter_all; 394 395 bio_for_each_segment_all(bvec, bio, iter_all) { 396 memcpy_from_bvec(p, bvec); 397 p += bvec->bv_len; 398 } 399 400 bio_copy_kern_endio(bio); 401 } 402 403 /** 404 * bio_copy_kern - copy kernel address into bio 405 * @q: the struct request_queue for the bio 406 * @data: pointer to buffer to copy 407 * @len: length in bytes 408 * @gfp_mask: allocation flags for bio and page allocation 409 * @reading: data direction is READ 410 * 411 * copy the kernel address into a bio suitable for io to a block 412 * device. Returns an error pointer in case of error. 413 */ 414 static struct bio *bio_copy_kern(struct request_queue *q, void *data, 415 unsigned int len, gfp_t gfp_mask, int reading) 416 { 417 unsigned long kaddr = (unsigned long)data; 418 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 419 unsigned long start = kaddr >> PAGE_SHIFT; 420 struct bio *bio; 421 void *p = data; 422 int nr_pages = 0; 423 424 /* 425 * Overflow, abort 426 */ 427 if (end < start) 428 return ERR_PTR(-EINVAL); 429 430 nr_pages = end - start; 431 bio = bio_kmalloc(nr_pages, gfp_mask); 432 if (!bio) 433 return ERR_PTR(-ENOMEM); 434 bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0); 435 436 while (len) { 437 struct page *page; 438 unsigned int bytes = PAGE_SIZE; 439 440 if (bytes > len) 441 bytes = len; 442 443 page = alloc_page(GFP_NOIO | __GFP_ZERO | gfp_mask); 444 if (!page) 445 goto cleanup; 446 447 if (!reading) 448 memcpy(page_address(page), p, bytes); 449 450 if (bio_add_page(bio, page, bytes, 0) < bytes) 451 break; 452 453 len -= bytes; 454 p += bytes; 455 } 456 457 if (reading) { 458 bio->bi_end_io = bio_copy_kern_endio_read; 459 bio->bi_private = data; 460 } else { 461 bio->bi_end_io = bio_copy_kern_endio; 462 } 463 464 return bio; 465 466 cleanup: 467 bio_free_pages(bio); 468 bio_uninit(bio); 469 kfree(bio); 470 return ERR_PTR(-ENOMEM); 471 } 472 473 /* 474 * Append a bio to a passthrough request. Only works if the bio can be merged 475 * into the request based on the driver constraints. 476 */ 477 int blk_rq_append_bio(struct request *rq, struct bio *bio) 478 { 479 const struct queue_limits *lim = &rq->q->limits; 480 unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT; 481 unsigned int nr_segs = 0; 482 int ret; 483 484 /* check that the data layout matches the hardware restrictions */ 485 ret = bio_split_rw_at(bio, lim, &nr_segs, max_bytes); 486 if (ret) { 487 /* if we would have to split the bio, copy instead */ 488 if (ret > 0) 489 ret = -EREMOTEIO; 490 return ret; 491 } 492 493 if (rq->bio) { 494 if (!ll_back_merge_fn(rq, bio, nr_segs)) 495 return -EINVAL; 496 rq->biotail->bi_next = bio; 497 rq->biotail = bio; 498 rq->__data_len += bio->bi_iter.bi_size; 499 bio_crypt_free_ctx(bio); 500 return 0; 501 } 502 503 rq->nr_phys_segments = nr_segs; 504 rq->bio = rq->biotail = bio; 505 rq->__data_len = bio->bi_iter.bi_size; 506 return 0; 507 } 508 EXPORT_SYMBOL(blk_rq_append_bio); 509 510 /* Prepare bio for passthrough IO given ITER_BVEC iter */ 511 static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter) 512 { 513 unsigned int max_bytes = rq->q->limits.max_hw_sectors << SECTOR_SHIFT; 514 struct bio *bio; 515 int ret; 516 517 if (!iov_iter_count(iter) || iov_iter_count(iter) > max_bytes) 518 return -EINVAL; 519 520 /* reuse the bvecs from the iterator instead of allocating new ones */ 521 bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL); 522 if (!bio) 523 return -ENOMEM; 524 bio_iov_bvec_set(bio, iter); 525 526 ret = blk_rq_append_bio(rq, bio); 527 if (ret) 528 blk_mq_map_bio_put(bio); 529 return ret; 530 } 531 532 /** 533 * blk_rq_map_user_iov - map user data to a request, for passthrough requests 534 * @q: request queue where request should be inserted 535 * @rq: request to map data to 536 * @map_data: pointer to the rq_map_data holding pages (if necessary) 537 * @iter: iovec iterator 538 * @gfp_mask: memory allocation flags 539 * 540 * Description: 541 * Data will be mapped directly for zero copy I/O, if possible. Otherwise 542 * a kernel bounce buffer is used. 543 * 544 * A matching blk_rq_unmap_user() must be issued at the end of I/O, while 545 * still in process context. 546 */ 547 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 548 struct rq_map_data *map_data, 549 const struct iov_iter *iter, gfp_t gfp_mask) 550 { 551 bool copy = false, map_bvec = false; 552 unsigned long align = blk_lim_dma_alignment_and_pad(&q->limits); 553 struct bio *bio = NULL; 554 struct iov_iter i; 555 int ret = -EINVAL; 556 557 if (map_data) 558 copy = true; 559 else if (blk_queue_may_bounce(q)) 560 copy = true; 561 else if (iov_iter_alignment(iter) & align) 562 copy = true; 563 else if (iov_iter_is_bvec(iter)) 564 map_bvec = true; 565 else if (!user_backed_iter(iter)) 566 copy = true; 567 else if (queue_virt_boundary(q)) 568 copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); 569 570 if (map_bvec) { 571 ret = blk_rq_map_user_bvec(rq, iter); 572 if (!ret) 573 return 0; 574 if (ret != -EREMOTEIO) 575 goto fail; 576 /* fall back to copying the data on limits mismatches */ 577 copy = true; 578 } 579 580 i = *iter; 581 do { 582 if (copy) 583 ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask); 584 else 585 ret = bio_map_user_iov(rq, &i, gfp_mask); 586 if (ret) { 587 if (ret == -EREMOTEIO) 588 ret = -EINVAL; 589 goto unmap_rq; 590 } 591 if (!bio) 592 bio = rq->bio; 593 } while (iov_iter_count(&i)); 594 595 return 0; 596 597 unmap_rq: 598 blk_rq_unmap_user(bio); 599 fail: 600 rq->bio = NULL; 601 return ret; 602 } 603 EXPORT_SYMBOL(blk_rq_map_user_iov); 604 605 int blk_rq_map_user(struct request_queue *q, struct request *rq, 606 struct rq_map_data *map_data, void __user *ubuf, 607 unsigned long len, gfp_t gfp_mask) 608 { 609 struct iov_iter i; 610 int ret = import_ubuf(rq_data_dir(rq), ubuf, len, &i); 611 612 if (unlikely(ret < 0)) 613 return ret; 614 615 return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask); 616 } 617 EXPORT_SYMBOL(blk_rq_map_user); 618 619 int blk_rq_map_user_io(struct request *req, struct rq_map_data *map_data, 620 void __user *ubuf, unsigned long buf_len, gfp_t gfp_mask, 621 bool vec, int iov_count, bool check_iter_count, int rw) 622 { 623 int ret = 0; 624 625 if (vec) { 626 struct iovec fast_iov[UIO_FASTIOV]; 627 struct iovec *iov = fast_iov; 628 struct iov_iter iter; 629 630 ret = import_iovec(rw, ubuf, iov_count ? iov_count : buf_len, 631 UIO_FASTIOV, &iov, &iter); 632 if (ret < 0) 633 return ret; 634 635 if (iov_count) { 636 /* SG_IO howto says that the shorter of the two wins */ 637 iov_iter_truncate(&iter, buf_len); 638 if (check_iter_count && !iov_iter_count(&iter)) { 639 kfree(iov); 640 return -EINVAL; 641 } 642 } 643 644 ret = blk_rq_map_user_iov(req->q, req, map_data, &iter, 645 gfp_mask); 646 kfree(iov); 647 } else if (buf_len) { 648 ret = blk_rq_map_user(req->q, req, map_data, ubuf, buf_len, 649 gfp_mask); 650 } 651 return ret; 652 } 653 EXPORT_SYMBOL(blk_rq_map_user_io); 654 655 /** 656 * blk_rq_unmap_user - unmap a request with user data 657 * @bio: start of bio list 658 * 659 * Description: 660 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must 661 * supply the original rq->bio from the blk_rq_map_user() return, since 662 * the I/O completion may have changed rq->bio. 663 */ 664 int blk_rq_unmap_user(struct bio *bio) 665 { 666 struct bio *next_bio; 667 int ret = 0, ret2; 668 669 while (bio) { 670 if (bio->bi_private) { 671 ret2 = bio_uncopy_user(bio); 672 if (ret2 && !ret) 673 ret = ret2; 674 } else { 675 bio_release_pages(bio, bio_data_dir(bio) == READ); 676 } 677 678 if (bio_integrity(bio)) 679 bio_integrity_unmap_user(bio); 680 681 next_bio = bio; 682 bio = bio->bi_next; 683 blk_mq_map_bio_put(next_bio); 684 } 685 686 return ret; 687 } 688 EXPORT_SYMBOL(blk_rq_unmap_user); 689 690 /** 691 * blk_rq_map_kern - map kernel data to a request, for passthrough requests 692 * @q: request queue where request should be inserted 693 * @rq: request to fill 694 * @kbuf: the kernel buffer 695 * @len: length of user data 696 * @gfp_mask: memory allocation flags 697 * 698 * Description: 699 * Data will be mapped directly if possible. Otherwise a bounce 700 * buffer is used. Can be called multiple times to append multiple 701 * buffers. 702 */ 703 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, 704 unsigned int len, gfp_t gfp_mask) 705 { 706 int reading = rq_data_dir(rq) == READ; 707 unsigned long addr = (unsigned long) kbuf; 708 struct bio *bio; 709 int ret; 710 711 if (len > (queue_max_hw_sectors(q) << 9)) 712 return -EINVAL; 713 if (!len || !kbuf) 714 return -EINVAL; 715 716 if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf) || 717 blk_queue_may_bounce(q)) 718 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); 719 else 720 bio = bio_map_kern(q, kbuf, len, gfp_mask); 721 722 if (IS_ERR(bio)) 723 return PTR_ERR(bio); 724 725 bio->bi_opf &= ~REQ_OP_MASK; 726 bio->bi_opf |= req_op(rq); 727 728 ret = blk_rq_append_bio(rq, bio); 729 if (unlikely(ret)) { 730 bio_uninit(bio); 731 kfree(bio); 732 } 733 return ret; 734 } 735 EXPORT_SYMBOL(blk_rq_map_kern); 736