xref: /linux/block/blk-map.c (revision b615879dbfea6cf1236acbc3f2fb25ae84e07071)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Functions related to mapping data to requests
4  */
5 #include <linux/kernel.h>
6 #include <linux/sched/task_stack.h>
7 #include <linux/module.h>
8 #include <linux/bio.h>
9 #include <linux/blkdev.h>
10 #include <linux/uio.h>
11 
12 #include "blk.h"
13 
14 struct bio_map_data {
15 	bool is_our_pages : 1;
16 	bool is_null_mapped : 1;
17 	struct iov_iter iter;
18 	struct iovec iov[];
19 };
20 
21 static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
22 					       gfp_t gfp_mask)
23 {
24 	struct bio_map_data *bmd;
25 
26 	if (data->nr_segs > UIO_MAXIOV)
27 		return NULL;
28 
29 	bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask);
30 	if (!bmd)
31 		return NULL;
32 	bmd->iter = *data;
33 	if (iter_is_iovec(data)) {
34 		memcpy(bmd->iov, iter_iov(data), sizeof(struct iovec) * data->nr_segs);
35 		bmd->iter.__iov = bmd->iov;
36 	}
37 	return bmd;
38 }
39 
40 /**
41  * bio_copy_from_iter - copy all pages from iov_iter to bio
42  * @bio: The &struct bio which describes the I/O as destination
43  * @iter: iov_iter as source
44  *
45  * Copy all pages from iov_iter to bio.
46  * Returns 0 on success, or error on failure.
47  */
48 static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
49 {
50 	struct bio_vec *bvec;
51 	struct bvec_iter_all iter_all;
52 
53 	bio_for_each_segment_all(bvec, bio, iter_all) {
54 		ssize_t ret;
55 
56 		ret = copy_page_from_iter(bvec->bv_page,
57 					  bvec->bv_offset,
58 					  bvec->bv_len,
59 					  iter);
60 
61 		if (!iov_iter_count(iter))
62 			break;
63 
64 		if (ret < bvec->bv_len)
65 			return -EFAULT;
66 	}
67 
68 	return 0;
69 }
70 
71 /**
72  * bio_copy_to_iter - copy all pages from bio to iov_iter
73  * @bio: The &struct bio which describes the I/O as source
74  * @iter: iov_iter as destination
75  *
76  * Copy all pages from bio to iov_iter.
77  * Returns 0 on success, or error on failure.
78  */
79 static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
80 {
81 	struct bio_vec *bvec;
82 	struct bvec_iter_all iter_all;
83 
84 	bio_for_each_segment_all(bvec, bio, iter_all) {
85 		ssize_t ret;
86 
87 		ret = copy_page_to_iter(bvec->bv_page,
88 					bvec->bv_offset,
89 					bvec->bv_len,
90 					&iter);
91 
92 		if (!iov_iter_count(&iter))
93 			break;
94 
95 		if (ret < bvec->bv_len)
96 			return -EFAULT;
97 	}
98 
99 	return 0;
100 }
101 
102 /**
103  *	bio_uncopy_user	-	finish previously mapped bio
104  *	@bio: bio being terminated
105  *
106  *	Free pages allocated from bio_copy_user_iov() and write back data
107  *	to user space in case of a read.
108  */
109 static int bio_uncopy_user(struct bio *bio)
110 {
111 	struct bio_map_data *bmd = bio->bi_private;
112 	int ret = 0;
113 
114 	if (!bmd->is_null_mapped) {
115 		/*
116 		 * if we're in a workqueue, the request is orphaned, so
117 		 * don't copy into a random user address space, just free
118 		 * and return -EINTR so user space doesn't expect any data.
119 		 */
120 		if (!current->mm)
121 			ret = -EINTR;
122 		else if (bio_data_dir(bio) == READ)
123 			ret = bio_copy_to_iter(bio, bmd->iter);
124 		if (bmd->is_our_pages)
125 			bio_free_pages(bio);
126 	}
127 	kfree(bmd);
128 	return ret;
129 }
130 
131 static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
132 		struct iov_iter *iter, gfp_t gfp_mask)
133 {
134 	struct bio_map_data *bmd;
135 	struct page *page;
136 	struct bio *bio;
137 	int i = 0, ret;
138 	int nr_pages;
139 	unsigned int len = iter->count;
140 	unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
141 
142 	bmd = bio_alloc_map_data(iter, gfp_mask);
143 	if (!bmd)
144 		return -ENOMEM;
145 
146 	/*
147 	 * We need to do a deep copy of the iov_iter including the iovecs.
148 	 * The caller provided iov might point to an on-stack or otherwise
149 	 * shortlived one.
150 	 */
151 	bmd->is_our_pages = !map_data;
152 	bmd->is_null_mapped = (map_data && map_data->null_mapped);
153 
154 	nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE));
155 
156 	ret = -ENOMEM;
157 	bio = bio_kmalloc(nr_pages, gfp_mask);
158 	if (!bio)
159 		goto out_bmd;
160 	bio_init_inline(bio, NULL, nr_pages, req_op(rq));
161 
162 	if (map_data) {
163 		nr_pages = 1U << map_data->page_order;
164 		i = map_data->offset / PAGE_SIZE;
165 	}
166 	while (len) {
167 		unsigned int bytes = PAGE_SIZE;
168 
169 		bytes -= offset;
170 
171 		if (bytes > len)
172 			bytes = len;
173 
174 		if (map_data) {
175 			if (i == map_data->nr_entries * nr_pages) {
176 				ret = -ENOMEM;
177 				goto cleanup;
178 			}
179 
180 			page = map_data->pages[i / nr_pages];
181 			page += (i % nr_pages);
182 
183 			i++;
184 		} else {
185 			page = alloc_page(GFP_NOIO | gfp_mask);
186 			if (!page) {
187 				ret = -ENOMEM;
188 				goto cleanup;
189 			}
190 		}
191 
192 		if (bio_add_page(bio, page, bytes, offset) < bytes) {
193 			if (!map_data)
194 				__free_page(page);
195 			break;
196 		}
197 
198 		len -= bytes;
199 		offset = 0;
200 	}
201 
202 	if (map_data)
203 		map_data->offset += bio->bi_iter.bi_size;
204 
205 	/*
206 	 * success
207 	 */
208 	if (iov_iter_rw(iter) == WRITE &&
209 	     (!map_data || !map_data->null_mapped)) {
210 		ret = bio_copy_from_iter(bio, iter);
211 		if (ret)
212 			goto cleanup;
213 	} else if (map_data && map_data->from_user) {
214 		struct iov_iter iter2 = *iter;
215 
216 		/* This is the copy-in part of SG_DXFER_TO_FROM_DEV. */
217 		iter2.data_source = ITER_SOURCE;
218 		ret = bio_copy_from_iter(bio, &iter2);
219 		if (ret)
220 			goto cleanup;
221 	} else {
222 		if (bmd->is_our_pages)
223 			zero_fill_bio(bio);
224 		iov_iter_advance(iter, bio->bi_iter.bi_size);
225 	}
226 
227 	bio->bi_private = bmd;
228 
229 	ret = blk_rq_append_bio(rq, bio);
230 	if (ret)
231 		goto cleanup;
232 	return 0;
233 cleanup:
234 	if (!map_data)
235 		bio_free_pages(bio);
236 	bio_uninit(bio);
237 	kfree(bio);
238 out_bmd:
239 	kfree(bmd);
240 	return ret;
241 }
242 
243 static void blk_mq_map_bio_put(struct bio *bio)
244 {
245 	if (bio->bi_opf & REQ_ALLOC_CACHE) {
246 		bio_put(bio);
247 	} else {
248 		bio_uninit(bio);
249 		kfree(bio);
250 	}
251 }
252 
253 static struct bio *blk_rq_map_bio_alloc(struct request *rq,
254 		unsigned int nr_vecs, gfp_t gfp_mask)
255 {
256 	struct block_device *bdev = rq->q->disk ? rq->q->disk->part0 : NULL;
257 	struct bio *bio;
258 
259 	if (rq->cmd_flags & REQ_ALLOC_CACHE && (nr_vecs <= BIO_INLINE_VECS)) {
260 		bio = bio_alloc_bioset(bdev, nr_vecs, rq->cmd_flags, gfp_mask,
261 					&fs_bio_set);
262 		if (!bio)
263 			return NULL;
264 	} else {
265 		bio = bio_kmalloc(nr_vecs, gfp_mask);
266 		if (!bio)
267 			return NULL;
268 		bio_init_inline(bio, bdev, nr_vecs, req_op(rq));
269 	}
270 	return bio;
271 }
272 
273 static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
274 		gfp_t gfp_mask)
275 {
276 	unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
277 	struct bio *bio;
278 	int ret;
279 
280 	if (!iov_iter_count(iter))
281 		return -EINVAL;
282 
283 	bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask);
284 	if (!bio)
285 		return -ENOMEM;
286 	ret = bio_iov_iter_get_pages(bio, iter);
287 	if (ret)
288 		goto out_put;
289 	ret = blk_rq_append_bio(rq, bio);
290 	if (ret)
291 		goto out_release;
292 	return 0;
293 
294 out_release:
295 	bio_release_pages(bio, false);
296 out_put:
297 	blk_mq_map_bio_put(bio);
298 	return ret;
299 }
300 
301 static void bio_invalidate_vmalloc_pages(struct bio *bio)
302 {
303 #ifdef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE
304 	if (bio->bi_private && !op_is_write(bio_op(bio))) {
305 		unsigned long i, len = 0;
306 
307 		for (i = 0; i < bio->bi_vcnt; i++)
308 			len += bio->bi_io_vec[i].bv_len;
309 		invalidate_kernel_vmap_range(bio->bi_private, len);
310 	}
311 #endif
312 }
313 
314 static void bio_map_kern_endio(struct bio *bio)
315 {
316 	bio_invalidate_vmalloc_pages(bio);
317 	bio_uninit(bio);
318 	kfree(bio);
319 }
320 
321 static struct bio *bio_map_kern(void *data, unsigned int len, enum req_op op,
322 		gfp_t gfp_mask)
323 {
324 	unsigned int nr_vecs = bio_add_max_vecs(data, len);
325 	struct bio *bio;
326 
327 	bio = bio_kmalloc(nr_vecs, gfp_mask);
328 	if (!bio)
329 		return ERR_PTR(-ENOMEM);
330 	bio_init_inline(bio, NULL, nr_vecs, op);
331 	if (is_vmalloc_addr(data)) {
332 		bio->bi_private = data;
333 		if (!bio_add_vmalloc(bio, data, len)) {
334 			bio_uninit(bio);
335 			kfree(bio);
336 			return ERR_PTR(-EINVAL);
337 		}
338 	} else {
339 		bio_add_virt_nofail(bio, data, len);
340 	}
341 	bio->bi_end_io = bio_map_kern_endio;
342 	return bio;
343 }
344 
345 static void bio_copy_kern_endio(struct bio *bio)
346 {
347 	bio_free_pages(bio);
348 	bio_uninit(bio);
349 	kfree(bio);
350 }
351 
352 static void bio_copy_kern_endio_read(struct bio *bio)
353 {
354 	char *p = bio->bi_private;
355 	struct bio_vec *bvec;
356 	struct bvec_iter_all iter_all;
357 
358 	bio_for_each_segment_all(bvec, bio, iter_all) {
359 		memcpy_from_bvec(p, bvec);
360 		p += bvec->bv_len;
361 	}
362 
363 	bio_copy_kern_endio(bio);
364 }
365 
366 /**
367  *	bio_copy_kern	-	copy kernel address into bio
368  *	@data: pointer to buffer to copy
369  *	@len: length in bytes
370  *	@op: bio/request operation
371  *	@gfp_mask: allocation flags for bio and page allocation
372  *
373  *	copy the kernel address into a bio suitable for io to a block
374  *	device. Returns an error pointer in case of error.
375  */
376 static struct bio *bio_copy_kern(void *data, unsigned int len, enum req_op op,
377 		gfp_t gfp_mask)
378 {
379 	unsigned long kaddr = (unsigned long)data;
380 	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
381 	unsigned long start = kaddr >> PAGE_SHIFT;
382 	struct bio *bio;
383 	void *p = data;
384 	int nr_pages = 0;
385 
386 	/*
387 	 * Overflow, abort
388 	 */
389 	if (end < start)
390 		return ERR_PTR(-EINVAL);
391 
392 	nr_pages = end - start;
393 	bio = bio_kmalloc(nr_pages, gfp_mask);
394 	if (!bio)
395 		return ERR_PTR(-ENOMEM);
396 	bio_init_inline(bio, NULL, nr_pages, op);
397 
398 	while (len) {
399 		struct page *page;
400 		unsigned int bytes = PAGE_SIZE;
401 
402 		if (bytes > len)
403 			bytes = len;
404 
405 		page = alloc_page(GFP_NOIO | __GFP_ZERO | gfp_mask);
406 		if (!page)
407 			goto cleanup;
408 
409 		if (op_is_write(op))
410 			memcpy(page_address(page), p, bytes);
411 
412 		if (bio_add_page(bio, page, bytes, 0) < bytes)
413 			break;
414 
415 		len -= bytes;
416 		p += bytes;
417 	}
418 
419 	if (op_is_write(op)) {
420 		bio->bi_end_io = bio_copy_kern_endio;
421 	} else {
422 		bio->bi_end_io = bio_copy_kern_endio_read;
423 		bio->bi_private = data;
424 	}
425 
426 	return bio;
427 
428 cleanup:
429 	bio_free_pages(bio);
430 	bio_uninit(bio);
431 	kfree(bio);
432 	return ERR_PTR(-ENOMEM);
433 }
434 
435 /*
436  * Append a bio to a passthrough request.  Only works if the bio can be merged
437  * into the request based on the driver constraints.
438  */
439 int blk_rq_append_bio(struct request *rq, struct bio *bio)
440 {
441 	const struct queue_limits *lim = &rq->q->limits;
442 	unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
443 	unsigned int nr_segs = 0;
444 	int ret;
445 
446 	/* check that the data layout matches the hardware restrictions */
447 	ret = bio_split_io_at(bio, lim, &nr_segs, max_bytes, 0);
448 	if (ret) {
449 		/* if we would have to split the bio, copy instead */
450 		if (ret > 0)
451 			ret = -EREMOTEIO;
452 		return ret;
453 	}
454 
455 	if (rq->bio) {
456 		if (!ll_back_merge_fn(rq, bio, nr_segs))
457 			return -EINVAL;
458 		rq->biotail->bi_next = bio;
459 		rq->biotail = bio;
460 		rq->__data_len += bio->bi_iter.bi_size;
461 		bio_crypt_free_ctx(bio);
462 		return 0;
463 	}
464 
465 	rq->nr_phys_segments = nr_segs;
466 	rq->bio = rq->biotail = bio;
467 	rq->__data_len = bio->bi_iter.bi_size;
468 	return 0;
469 }
470 EXPORT_SYMBOL(blk_rq_append_bio);
471 
472 /* Prepare bio for passthrough IO given ITER_BVEC iter */
473 static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
474 {
475 	unsigned int max_bytes = rq->q->limits.max_hw_sectors << SECTOR_SHIFT;
476 	struct bio *bio;
477 	int ret;
478 
479 	if (!iov_iter_count(iter) || iov_iter_count(iter) > max_bytes)
480 		return -EINVAL;
481 
482 	/* reuse the bvecs from the iterator instead of allocating new ones */
483 	bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL);
484 	if (!bio)
485 		return -ENOMEM;
486 	bio_iov_bvec_set(bio, iter);
487 
488 	ret = blk_rq_append_bio(rq, bio);
489 	if (ret)
490 		blk_mq_map_bio_put(bio);
491 	return ret;
492 }
493 
494 /**
495  * blk_rq_map_user_iov - map user data to a request, for passthrough requests
496  * @q:		request queue where request should be inserted
497  * @rq:		request to map data to
498  * @map_data:   pointer to the rq_map_data holding pages (if necessary)
499  * @iter:	iovec iterator
500  * @gfp_mask:	memory allocation flags
501  *
502  * Description:
503  *    Data will be mapped directly for zero copy I/O, if possible. Otherwise
504  *    a kernel bounce buffer is used.
505  *
506  *    A matching blk_rq_unmap_user() must be issued at the end of I/O, while
507  *    still in process context.
508  */
509 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
510 			struct rq_map_data *map_data,
511 			const struct iov_iter *iter, gfp_t gfp_mask)
512 {
513 	bool copy = false, map_bvec = false;
514 	unsigned long align = blk_lim_dma_alignment_and_pad(&q->limits);
515 	struct bio *bio = NULL;
516 	struct iov_iter i;
517 	int ret = -EINVAL;
518 
519 	if (map_data)
520 		copy = true;
521 	else if (iov_iter_alignment(iter) & align)
522 		copy = true;
523 	else if (iov_iter_is_bvec(iter))
524 		map_bvec = true;
525 	else if (!user_backed_iter(iter))
526 		copy = true;
527 	else if (queue_virt_boundary(q))
528 		copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter);
529 
530 	if (map_bvec) {
531 		ret = blk_rq_map_user_bvec(rq, iter);
532 		if (!ret)
533 			return 0;
534 		if (ret != -EREMOTEIO)
535 			goto fail;
536 		/* fall back to copying the data on limits mismatches */
537 		copy = true;
538 	}
539 
540 	i = *iter;
541 	do {
542 		if (copy)
543 			ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask);
544 		else
545 			ret = bio_map_user_iov(rq, &i, gfp_mask);
546 		if (ret) {
547 			if (ret == -EREMOTEIO)
548 				ret = -EINVAL;
549 			goto unmap_rq;
550 		}
551 		if (!bio)
552 			bio = rq->bio;
553 	} while (iov_iter_count(&i));
554 
555 	return 0;
556 
557 unmap_rq:
558 	blk_rq_unmap_user(bio);
559 fail:
560 	rq->bio = NULL;
561 	return ret;
562 }
563 EXPORT_SYMBOL(blk_rq_map_user_iov);
564 
565 int blk_rq_map_user(struct request_queue *q, struct request *rq,
566 		    struct rq_map_data *map_data, void __user *ubuf,
567 		    unsigned long len, gfp_t gfp_mask)
568 {
569 	struct iov_iter i;
570 	int ret = import_ubuf(rq_data_dir(rq), ubuf, len, &i);
571 
572 	if (unlikely(ret < 0))
573 		return ret;
574 
575 	return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask);
576 }
577 EXPORT_SYMBOL(blk_rq_map_user);
578 
579 int blk_rq_map_user_io(struct request *req, struct rq_map_data *map_data,
580 		void __user *ubuf, unsigned long buf_len, gfp_t gfp_mask,
581 		bool vec, int iov_count, bool check_iter_count, int rw)
582 {
583 	int ret = 0;
584 
585 	if (vec) {
586 		struct iovec fast_iov[UIO_FASTIOV];
587 		struct iovec *iov = fast_iov;
588 		struct iov_iter iter;
589 
590 		ret = import_iovec(rw, ubuf, iov_count ? iov_count : buf_len,
591 				UIO_FASTIOV, &iov, &iter);
592 		if (ret < 0)
593 			return ret;
594 
595 		if (iov_count) {
596 			/* SG_IO howto says that the shorter of the two wins */
597 			iov_iter_truncate(&iter, buf_len);
598 			if (check_iter_count && !iov_iter_count(&iter)) {
599 				kfree(iov);
600 				return -EINVAL;
601 			}
602 		}
603 
604 		ret = blk_rq_map_user_iov(req->q, req, map_data, &iter,
605 				gfp_mask);
606 		kfree(iov);
607 	} else if (buf_len) {
608 		ret = blk_rq_map_user(req->q, req, map_data, ubuf, buf_len,
609 				gfp_mask);
610 	}
611 	return ret;
612 }
613 EXPORT_SYMBOL(blk_rq_map_user_io);
614 
615 /**
616  * blk_rq_unmap_user - unmap a request with user data
617  * @bio:	       start of bio list
618  *
619  * Description:
620  *    Unmap a rq previously mapped by blk_rq_map_user(). The caller must
621  *    supply the original rq->bio from the blk_rq_map_user() return, since
622  *    the I/O completion may have changed rq->bio.
623  */
624 int blk_rq_unmap_user(struct bio *bio)
625 {
626 	struct bio *next_bio;
627 	int ret = 0, ret2;
628 
629 	while (bio) {
630 		if (bio->bi_private) {
631 			ret2 = bio_uncopy_user(bio);
632 			if (ret2 && !ret)
633 				ret = ret2;
634 		} else {
635 			bio_release_pages(bio, bio_data_dir(bio) == READ);
636 		}
637 
638 		if (bio_integrity(bio))
639 			bio_integrity_unmap_user(bio);
640 
641 		next_bio = bio;
642 		bio = bio->bi_next;
643 		blk_mq_map_bio_put(next_bio);
644 	}
645 
646 	return ret;
647 }
648 EXPORT_SYMBOL(blk_rq_unmap_user);
649 
650 /**
651  * blk_rq_map_kern - map kernel data to a request, for passthrough requests
652  * @rq:		request to fill
653  * @kbuf:	the kernel buffer
654  * @len:	length of user data
655  * @gfp_mask:	memory allocation flags
656  *
657  * Description:
658  *    Data will be mapped directly if possible. Otherwise a bounce
659  *    buffer is used. Can be called multiple times to append multiple
660  *    buffers.
661  */
662 int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len,
663 		gfp_t gfp_mask)
664 {
665 	unsigned long addr = (unsigned long) kbuf;
666 	struct bio *bio;
667 	int ret;
668 
669 	if (len > (queue_max_hw_sectors(rq->q) << SECTOR_SHIFT))
670 		return -EINVAL;
671 	if (!len || !kbuf)
672 		return -EINVAL;
673 
674 	if (!blk_rq_aligned(rq->q, addr, len) || object_is_on_stack(kbuf))
675 		bio = bio_copy_kern(kbuf, len, req_op(rq), gfp_mask);
676 	else
677 		bio = bio_map_kern(kbuf, len, req_op(rq), gfp_mask);
678 
679 	if (IS_ERR(bio))
680 		return PTR_ERR(bio);
681 
682 	ret = blk_rq_append_bio(rq, bio);
683 	if (unlikely(ret)) {
684 		bio_uninit(bio);
685 		kfree(bio);
686 	}
687 	return ret;
688 }
689 EXPORT_SYMBOL(blk_rq_map_kern);
690