1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to mapping data to requests 4 */ 5 #include <linux/kernel.h> 6 #include <linux/sched/task_stack.h> 7 #include <linux/module.h> 8 #include <linux/bio.h> 9 #include <linux/blkdev.h> 10 #include <linux/uio.h> 11 12 #include "blk.h" 13 14 struct bio_map_data { 15 bool is_our_pages : 1; 16 bool is_null_mapped : 1; 17 struct iov_iter iter; 18 struct iovec iov[]; 19 }; 20 21 static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, 22 gfp_t gfp_mask) 23 { 24 struct bio_map_data *bmd; 25 26 if (data->nr_segs > UIO_MAXIOV) 27 return NULL; 28 29 bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask); 30 if (!bmd) 31 return NULL; 32 bmd->iter = *data; 33 if (iter_is_iovec(data)) { 34 memcpy(bmd->iov, iter_iov(data), sizeof(struct iovec) * data->nr_segs); 35 bmd->iter.__iov = bmd->iov; 36 } 37 return bmd; 38 } 39 40 static inline void blk_mq_map_bio_put(struct bio *bio) 41 { 42 bio_put(bio); 43 } 44 45 static struct bio *blk_rq_map_bio_alloc(struct request *rq, 46 unsigned int nr_vecs, gfp_t gfp_mask) 47 { 48 struct block_device *bdev = rq->q->disk ? rq->q->disk->part0 : NULL; 49 struct bio *bio; 50 51 bio = bio_alloc_bioset(bdev, nr_vecs, rq->cmd_flags, gfp_mask, 52 &fs_bio_set); 53 if (!bio) 54 return NULL; 55 56 return bio; 57 } 58 59 /** 60 * bio_copy_from_iter - copy all pages from iov_iter to bio 61 * @bio: The &struct bio which describes the I/O as destination 62 * @iter: iov_iter as source 63 * 64 * Copy all pages from iov_iter to bio. 65 * Returns 0 on success, or error on failure. 66 */ 67 static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) 68 { 69 struct bio_vec *bvec; 70 struct bvec_iter_all iter_all; 71 72 bio_for_each_segment_all(bvec, bio, iter_all) { 73 ssize_t ret; 74 75 ret = copy_page_from_iter(bvec->bv_page, 76 bvec->bv_offset, 77 bvec->bv_len, 78 iter); 79 80 if (!iov_iter_count(iter)) 81 break; 82 83 if (ret < bvec->bv_len) 84 return -EFAULT; 85 } 86 87 return 0; 88 } 89 90 /** 91 * bio_copy_to_iter - copy all pages from bio to iov_iter 92 * @bio: The &struct bio which describes the I/O as source 93 * @iter: iov_iter as destination 94 * 95 * Copy all pages from bio to iov_iter. 96 * Returns 0 on success, or error on failure. 97 */ 98 static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) 99 { 100 struct bio_vec *bvec; 101 struct bvec_iter_all iter_all; 102 103 bio_for_each_segment_all(bvec, bio, iter_all) { 104 ssize_t ret; 105 106 ret = copy_page_to_iter(bvec->bv_page, 107 bvec->bv_offset, 108 bvec->bv_len, 109 &iter); 110 111 if (!iov_iter_count(&iter)) 112 break; 113 114 if (ret < bvec->bv_len) 115 return -EFAULT; 116 } 117 118 return 0; 119 } 120 121 /** 122 * bio_uncopy_user - finish previously mapped bio 123 * @bio: bio being terminated 124 * 125 * Free pages allocated from bio_copy_user_iov() and write back data 126 * to user space in case of a read. 127 */ 128 static int bio_uncopy_user(struct bio *bio) 129 { 130 struct bio_map_data *bmd = bio->bi_private; 131 int ret = 0; 132 133 if (!bmd->is_null_mapped) { 134 /* 135 * if we're in a workqueue, the request is orphaned, so 136 * don't copy into a random user address space, just free 137 * and return -EINTR so user space doesn't expect any data. 138 */ 139 if (!current->mm) 140 ret = -EINTR; 141 else if (bio_data_dir(bio) == READ) 142 ret = bio_copy_to_iter(bio, bmd->iter); 143 if (bmd->is_our_pages) 144 bio_free_pages(bio); 145 } 146 kfree(bmd); 147 return ret; 148 } 149 150 static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, 151 struct iov_iter *iter, gfp_t gfp_mask) 152 { 153 struct bio_map_data *bmd; 154 struct page *page; 155 struct bio *bio; 156 int i = 0, ret; 157 int nr_pages; 158 unsigned int len = iter->count; 159 unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; 160 161 bmd = bio_alloc_map_data(iter, gfp_mask); 162 if (!bmd) 163 return -ENOMEM; 164 165 /* 166 * We need to do a deep copy of the iov_iter including the iovecs. 167 * The caller provided iov might point to an on-stack or otherwise 168 * shortlived one. 169 */ 170 bmd->is_our_pages = !map_data; 171 bmd->is_null_mapped = (map_data && map_data->null_mapped); 172 173 nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE)); 174 175 ret = -ENOMEM; 176 bio = blk_rq_map_bio_alloc(rq, nr_pages, gfp_mask); 177 if (!bio) 178 goto out_bmd; 179 180 if (map_data) { 181 nr_pages = 1U << map_data->page_order; 182 i = map_data->offset / PAGE_SIZE; 183 } 184 while (len) { 185 unsigned int bytes = PAGE_SIZE; 186 187 bytes -= offset; 188 189 if (bytes > len) 190 bytes = len; 191 192 if (map_data) { 193 if (i == map_data->nr_entries * nr_pages) { 194 ret = -ENOMEM; 195 goto cleanup; 196 } 197 198 page = map_data->pages[i / nr_pages]; 199 page += (i % nr_pages); 200 201 i++; 202 } else { 203 page = alloc_page(GFP_NOIO | gfp_mask); 204 if (!page) { 205 ret = -ENOMEM; 206 goto cleanup; 207 } 208 } 209 210 if (bio_add_page(bio, page, bytes, offset) < bytes) { 211 if (!map_data) 212 __free_page(page); 213 break; 214 } 215 216 len -= bytes; 217 offset = 0; 218 } 219 220 if (map_data) 221 map_data->offset += bio->bi_iter.bi_size; 222 223 /* 224 * success 225 */ 226 if (iov_iter_rw(iter) == WRITE && 227 (!map_data || !map_data->null_mapped)) { 228 ret = bio_copy_from_iter(bio, iter); 229 if (ret) 230 goto cleanup; 231 } else if (map_data && map_data->from_user) { 232 struct iov_iter iter2 = *iter; 233 234 /* This is the copy-in part of SG_DXFER_TO_FROM_DEV. */ 235 iter2.data_source = ITER_SOURCE; 236 ret = bio_copy_from_iter(bio, &iter2); 237 if (ret) 238 goto cleanup; 239 } else { 240 if (bmd->is_our_pages) 241 zero_fill_bio(bio); 242 iov_iter_advance(iter, bio->bi_iter.bi_size); 243 } 244 245 bio->bi_private = bmd; 246 247 ret = blk_rq_append_bio(rq, bio); 248 if (ret) 249 goto cleanup; 250 return 0; 251 cleanup: 252 if (!map_data) 253 bio_free_pages(bio); 254 blk_mq_map_bio_put(bio); 255 out_bmd: 256 kfree(bmd); 257 return ret; 258 } 259 260 static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, 261 gfp_t gfp_mask) 262 { 263 unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS); 264 struct bio *bio; 265 int ret; 266 267 if (!iov_iter_count(iter)) 268 return -EINVAL; 269 270 bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask); 271 if (!bio) 272 return -ENOMEM; 273 /* 274 * No alignment requirements on our part to support arbitrary 275 * passthrough commands. 276 */ 277 ret = bio_iov_iter_get_pages(bio, iter, 0); 278 if (ret) 279 goto out_put; 280 ret = blk_rq_append_bio(rq, bio); 281 if (ret) 282 goto out_release; 283 return 0; 284 285 out_release: 286 bio_release_pages(bio, false); 287 out_put: 288 blk_mq_map_bio_put(bio); 289 return ret; 290 } 291 292 static void bio_invalidate_vmalloc_pages(struct bio *bio) 293 { 294 #ifdef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 295 if (bio->bi_private && !op_is_write(bio_op(bio))) { 296 unsigned long i, len = 0; 297 298 for (i = 0; i < bio->bi_vcnt; i++) 299 len += bio->bi_io_vec[i].bv_len; 300 invalidate_kernel_vmap_range(bio->bi_private, len); 301 } 302 #endif 303 } 304 305 static void bio_map_kern_endio(struct bio *bio) 306 { 307 bio_invalidate_vmalloc_pages(bio); 308 blk_mq_map_bio_put(bio); 309 } 310 311 static struct bio *bio_map_kern(struct request *rq, void *data, unsigned int len, 312 gfp_t gfp_mask) 313 { 314 unsigned int nr_vecs = bio_add_max_vecs(data, len); 315 struct bio *bio; 316 317 bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask); 318 if (!bio) 319 return ERR_PTR(-ENOMEM); 320 321 if (is_vmalloc_addr(data)) { 322 bio->bi_private = data; 323 if (!bio_add_vmalloc(bio, data, len)) { 324 blk_mq_map_bio_put(bio); 325 return ERR_PTR(-EINVAL); 326 } 327 } else { 328 bio_add_virt_nofail(bio, data, len); 329 } 330 bio->bi_end_io = bio_map_kern_endio; 331 return bio; 332 } 333 334 static void bio_copy_kern_endio(struct bio *bio) 335 { 336 bio_free_pages(bio); 337 blk_mq_map_bio_put(bio); 338 } 339 340 static void bio_copy_kern_endio_read(struct bio *bio) 341 { 342 char *p = bio->bi_private; 343 struct bio_vec *bvec; 344 struct bvec_iter_all iter_all; 345 346 bio_for_each_segment_all(bvec, bio, iter_all) { 347 memcpy_from_bvec(p, bvec); 348 p += bvec->bv_len; 349 } 350 351 bio_copy_kern_endio(bio); 352 } 353 354 /** 355 * bio_copy_kern - copy kernel address into bio 356 * @rq: request to fill 357 * @data: pointer to buffer to copy 358 * @len: length in bytes 359 * @op: bio/request operation 360 * @gfp_mask: allocation flags for bio and page allocation 361 * 362 * copy the kernel address into a bio suitable for io to a block 363 * device. Returns an error pointer in case of error. 364 */ 365 static struct bio *bio_copy_kern(struct request *rq, void *data, unsigned int len, 366 gfp_t gfp_mask) 367 { 368 enum req_op op = req_op(rq); 369 unsigned long kaddr = (unsigned long)data; 370 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 371 unsigned long start = kaddr >> PAGE_SHIFT; 372 struct bio *bio; 373 void *p = data; 374 int nr_pages = 0; 375 376 /* 377 * Overflow, abort 378 */ 379 if (end < start) 380 return ERR_PTR(-EINVAL); 381 382 nr_pages = end - start; 383 bio = blk_rq_map_bio_alloc(rq, nr_pages, gfp_mask); 384 if (!bio) 385 return ERR_PTR(-ENOMEM); 386 387 while (len) { 388 struct page *page; 389 unsigned int bytes = PAGE_SIZE; 390 391 if (bytes > len) 392 bytes = len; 393 394 page = alloc_page(GFP_NOIO | __GFP_ZERO | gfp_mask); 395 if (!page) 396 goto cleanup; 397 398 if (op_is_write(op)) 399 memcpy(page_address(page), p, bytes); 400 401 if (bio_add_page(bio, page, bytes, 0) < bytes) 402 break; 403 404 len -= bytes; 405 p += bytes; 406 } 407 408 if (op_is_write(op)) { 409 bio->bi_end_io = bio_copy_kern_endio; 410 } else { 411 bio->bi_end_io = bio_copy_kern_endio_read; 412 bio->bi_private = data; 413 } 414 415 return bio; 416 417 cleanup: 418 bio_free_pages(bio); 419 blk_mq_map_bio_put(bio); 420 return ERR_PTR(-ENOMEM); 421 } 422 423 /* 424 * Append a bio to a passthrough request. Only works if the bio can be merged 425 * into the request based on the driver constraints. 426 */ 427 int blk_rq_append_bio(struct request *rq, struct bio *bio) 428 { 429 const struct queue_limits *lim = &rq->q->limits; 430 unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT; 431 unsigned int nr_segs = 0; 432 int ret; 433 434 /* check that the data layout matches the hardware restrictions */ 435 ret = bio_split_io_at(bio, lim, &nr_segs, max_bytes, 0); 436 if (ret) { 437 /* if we would have to split the bio, copy instead */ 438 if (ret > 0) 439 ret = -EREMOTEIO; 440 return ret; 441 } 442 443 if (rq->bio) { 444 if (!ll_back_merge_fn(rq, bio, nr_segs)) 445 return -EINVAL; 446 rq->phys_gap_bit = bio_seg_gap(rq->q, rq->biotail, bio, 447 rq->phys_gap_bit); 448 rq->biotail->bi_next = bio; 449 rq->biotail = bio; 450 rq->__data_len += bio->bi_iter.bi_size; 451 bio_crypt_free_ctx(bio); 452 return 0; 453 } 454 455 rq->nr_phys_segments = nr_segs; 456 rq->bio = rq->biotail = bio; 457 rq->__data_len = bio->bi_iter.bi_size; 458 rq->phys_gap_bit = bio->bi_bvec_gap_bit; 459 return 0; 460 } 461 EXPORT_SYMBOL(blk_rq_append_bio); 462 463 /* Prepare bio for passthrough IO given ITER_BVEC iter */ 464 static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter) 465 { 466 unsigned int max_bytes = rq->q->limits.max_hw_sectors << SECTOR_SHIFT; 467 struct bio *bio; 468 int ret; 469 470 if (!iov_iter_count(iter) || iov_iter_count(iter) > max_bytes) 471 return -EINVAL; 472 473 /* reuse the bvecs from the iterator instead of allocating new ones */ 474 bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL); 475 if (!bio) 476 return -ENOMEM; 477 bio_iov_bvec_set(bio, iter); 478 479 ret = blk_rq_append_bio(rq, bio); 480 if (ret) 481 blk_mq_map_bio_put(bio); 482 return ret; 483 } 484 485 /** 486 * blk_rq_map_user_iov - map user data to a request, for passthrough requests 487 * @q: request queue where request should be inserted 488 * @rq: request to map data to 489 * @map_data: pointer to the rq_map_data holding pages (if necessary) 490 * @iter: iovec iterator 491 * @gfp_mask: memory allocation flags 492 * 493 * Description: 494 * Data will be mapped directly for zero copy I/O, if possible. Otherwise 495 * a kernel bounce buffer is used. 496 * 497 * A matching blk_rq_unmap_user() must be issued at the end of I/O, while 498 * still in process context. 499 */ 500 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 501 struct rq_map_data *map_data, 502 const struct iov_iter *iter, gfp_t gfp_mask) 503 { 504 bool copy = false, map_bvec = false; 505 unsigned long align = blk_lim_dma_alignment_and_pad(&q->limits); 506 struct bio *bio = NULL; 507 struct iov_iter i; 508 int ret = -EINVAL; 509 510 if (map_data) 511 copy = true; 512 else if (iov_iter_alignment(iter) & align) 513 copy = true; 514 else if (iov_iter_is_bvec(iter)) 515 map_bvec = true; 516 else if (!user_backed_iter(iter)) 517 copy = true; 518 else if (queue_virt_boundary(q)) 519 copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); 520 521 if (map_bvec) { 522 ret = blk_rq_map_user_bvec(rq, iter); 523 if (!ret) 524 return 0; 525 if (ret != -EREMOTEIO) 526 goto fail; 527 /* fall back to copying the data on limits mismatches */ 528 copy = true; 529 } 530 531 i = *iter; 532 do { 533 if (copy) 534 ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask); 535 else 536 ret = bio_map_user_iov(rq, &i, gfp_mask); 537 if (ret) { 538 if (ret == -EREMOTEIO) 539 ret = -EINVAL; 540 goto unmap_rq; 541 } 542 if (!bio) 543 bio = rq->bio; 544 } while (iov_iter_count(&i)); 545 546 return 0; 547 548 unmap_rq: 549 blk_rq_unmap_user(bio); 550 fail: 551 rq->bio = NULL; 552 return ret; 553 } 554 EXPORT_SYMBOL(blk_rq_map_user_iov); 555 556 int blk_rq_map_user(struct request_queue *q, struct request *rq, 557 struct rq_map_data *map_data, void __user *ubuf, 558 unsigned long len, gfp_t gfp_mask) 559 { 560 struct iov_iter i; 561 int ret = import_ubuf(rq_data_dir(rq), ubuf, len, &i); 562 563 if (unlikely(ret < 0)) 564 return ret; 565 566 return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask); 567 } 568 EXPORT_SYMBOL(blk_rq_map_user); 569 570 int blk_rq_map_user_io(struct request *req, struct rq_map_data *map_data, 571 void __user *ubuf, unsigned long buf_len, gfp_t gfp_mask, 572 bool vec, int iov_count, bool check_iter_count, int rw) 573 { 574 int ret = 0; 575 576 if (vec) { 577 struct iovec fast_iov[UIO_FASTIOV]; 578 struct iovec *iov = fast_iov; 579 struct iov_iter iter; 580 581 ret = import_iovec(rw, ubuf, iov_count ? iov_count : buf_len, 582 UIO_FASTIOV, &iov, &iter); 583 if (ret < 0) 584 return ret; 585 586 if (iov_count) { 587 /* SG_IO howto says that the shorter of the two wins */ 588 iov_iter_truncate(&iter, buf_len); 589 if (check_iter_count && !iov_iter_count(&iter)) { 590 kfree(iov); 591 return -EINVAL; 592 } 593 } 594 595 ret = blk_rq_map_user_iov(req->q, req, map_data, &iter, 596 gfp_mask); 597 kfree(iov); 598 } else if (buf_len) { 599 ret = blk_rq_map_user(req->q, req, map_data, ubuf, buf_len, 600 gfp_mask); 601 } 602 return ret; 603 } 604 EXPORT_SYMBOL(blk_rq_map_user_io); 605 606 /** 607 * blk_rq_unmap_user - unmap a request with user data 608 * @bio: start of bio list 609 * 610 * Description: 611 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must 612 * supply the original rq->bio from the blk_rq_map_user() return, since 613 * the I/O completion may have changed rq->bio. 614 */ 615 int blk_rq_unmap_user(struct bio *bio) 616 { 617 struct bio *next_bio; 618 int ret = 0, ret2; 619 620 while (bio) { 621 if (bio->bi_private) { 622 ret2 = bio_uncopy_user(bio); 623 if (ret2 && !ret) 624 ret = ret2; 625 } else { 626 bio_release_pages(bio, bio_data_dir(bio) == READ); 627 } 628 629 if (bio_integrity(bio)) 630 bio_integrity_unmap_user(bio); 631 632 next_bio = bio; 633 bio = bio->bi_next; 634 blk_mq_map_bio_put(next_bio); 635 } 636 637 return ret; 638 } 639 EXPORT_SYMBOL(blk_rq_unmap_user); 640 641 /** 642 * blk_rq_map_kern - map kernel data to a request, for passthrough requests 643 * @rq: request to fill 644 * @kbuf: the kernel buffer 645 * @len: length of user data 646 * @gfp_mask: memory allocation flags 647 * 648 * Description: 649 * Data will be mapped directly if possible. Otherwise a bounce 650 * buffer is used. Can be called multiple times to append multiple 651 * buffers. 652 */ 653 int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len, 654 gfp_t gfp_mask) 655 { 656 unsigned long addr = (unsigned long) kbuf; 657 struct bio *bio; 658 int ret; 659 660 if (len > (queue_max_hw_sectors(rq->q) << SECTOR_SHIFT)) 661 return -EINVAL; 662 if (!len || !kbuf) 663 return -EINVAL; 664 665 if (!blk_rq_aligned(rq->q, addr, len) || object_is_on_stack(kbuf)) 666 bio = bio_copy_kern(rq, kbuf, len, gfp_mask); 667 else 668 bio = bio_map_kern(rq, kbuf, len, gfp_mask); 669 670 if (IS_ERR(bio)) 671 return PTR_ERR(bio); 672 673 ret = blk_rq_append_bio(rq, bio); 674 if (unlikely(ret)) 675 blk_mq_map_bio_put(bio); 676 return ret; 677 } 678 EXPORT_SYMBOL(blk_rq_map_kern); 679