xref: /linux/block/blk-map.c (revision 345dfaaf9f8b8dd0fc563e6f48586922b38ed11c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Functions related to mapping data to requests
4  */
5 #include <linux/kernel.h>
6 #include <linux/sched/task_stack.h>
7 #include <linux/module.h>
8 #include <linux/bio.h>
9 #include <linux/blkdev.h>
10 #include <linux/uio.h>
11 
12 #include "blk.h"
13 
14 struct bio_map_data {
15 	bool is_our_pages : 1;
16 	bool is_null_mapped : 1;
17 	struct iov_iter iter;
18 	struct iovec iov[];
19 };
20 
21 static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
22 					       gfp_t gfp_mask)
23 {
24 	struct bio_map_data *bmd;
25 
26 	if (data->nr_segs > UIO_MAXIOV)
27 		return NULL;
28 
29 	bmd = kmalloc_flex(*bmd, iov, data->nr_segs, gfp_mask);
30 	if (!bmd)
31 		return NULL;
32 	bmd->iter = *data;
33 	if (iter_is_iovec(data)) {
34 		memcpy(bmd->iov, iter_iov(data), sizeof(struct iovec) * data->nr_segs);
35 		bmd->iter.__iov = bmd->iov;
36 	}
37 	return bmd;
38 }
39 
40 static inline void blk_mq_map_bio_put(struct bio *bio)
41 {
42 	bio_put(bio);
43 }
44 
45 static struct bio *blk_rq_map_bio_alloc(struct request *rq,
46 		unsigned int nr_vecs, gfp_t gfp_mask)
47 {
48 	struct block_device *bdev = rq->q->disk ? rq->q->disk->part0 : NULL;
49 	struct bio *bio;
50 
51 	bio = bio_alloc_bioset(bdev, nr_vecs, rq->cmd_flags, gfp_mask,
52 				&fs_bio_set);
53 	if (!bio)
54 		return NULL;
55 
56 	return bio;
57 }
58 
59 /**
60  * bio_copy_from_iter - copy all pages from iov_iter to bio
61  * @bio: The &struct bio which describes the I/O as destination
62  * @iter: iov_iter as source
63  *
64  * Copy all pages from iov_iter to bio.
65  * Returns 0 on success, or error on failure.
66  */
67 static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
68 {
69 	struct bio_vec *bvec;
70 	struct bvec_iter_all iter_all;
71 
72 	bio_for_each_segment_all(bvec, bio, iter_all) {
73 		ssize_t ret;
74 
75 		ret = copy_page_from_iter(bvec->bv_page,
76 					  bvec->bv_offset,
77 					  bvec->bv_len,
78 					  iter);
79 
80 		if (!iov_iter_count(iter))
81 			break;
82 
83 		if (ret < bvec->bv_len)
84 			return -EFAULT;
85 	}
86 
87 	return 0;
88 }
89 
90 /**
91  * bio_copy_to_iter - copy all pages from bio to iov_iter
92  * @bio: The &struct bio which describes the I/O as source
93  * @iter: iov_iter as destination
94  *
95  * Copy all pages from bio to iov_iter.
96  * Returns 0 on success, or error on failure.
97  */
98 static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
99 {
100 	struct bio_vec *bvec;
101 	struct bvec_iter_all iter_all;
102 
103 	bio_for_each_segment_all(bvec, bio, iter_all) {
104 		ssize_t ret;
105 
106 		ret = copy_page_to_iter(bvec->bv_page,
107 					bvec->bv_offset,
108 					bvec->bv_len,
109 					&iter);
110 
111 		if (!iov_iter_count(&iter))
112 			break;
113 
114 		if (ret < bvec->bv_len)
115 			return -EFAULT;
116 	}
117 
118 	return 0;
119 }
120 
121 /**
122  *	bio_uncopy_user	-	finish previously mapped bio
123  *	@bio: bio being terminated
124  *
125  *	Free pages allocated from bio_copy_user_iov() and write back data
126  *	to user space in case of a read.
127  */
128 static int bio_uncopy_user(struct bio *bio)
129 {
130 	struct bio_map_data *bmd = bio->bi_private;
131 	int ret = 0;
132 
133 	if (!bmd->is_null_mapped) {
134 		/*
135 		 * if we're in a workqueue, the request is orphaned, so
136 		 * don't copy into a random user address space, just free
137 		 * and return -EINTR so user space doesn't expect any data.
138 		 */
139 		if (!current->mm)
140 			ret = -EINTR;
141 		else if (bio_data_dir(bio) == READ)
142 			ret = bio_copy_to_iter(bio, bmd->iter);
143 		if (bmd->is_our_pages)
144 			bio_free_pages(bio);
145 	}
146 	kfree(bmd);
147 	return ret;
148 }
149 
150 static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
151 		struct iov_iter *iter, gfp_t gfp_mask)
152 {
153 	struct bio_map_data *bmd;
154 	struct page *page;
155 	struct bio *bio;
156 	int i = 0, ret;
157 	int nr_pages;
158 	unsigned int len = iter->count;
159 	unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
160 
161 	bmd = bio_alloc_map_data(iter, gfp_mask);
162 	if (!bmd)
163 		return -ENOMEM;
164 
165 	/*
166 	 * We need to do a deep copy of the iov_iter including the iovecs.
167 	 * The caller provided iov might point to an on-stack or otherwise
168 	 * shortlived one.
169 	 */
170 	bmd->is_our_pages = !map_data;
171 	bmd->is_null_mapped = (map_data && map_data->null_mapped);
172 
173 	nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE));
174 
175 	ret = -ENOMEM;
176 	bio = blk_rq_map_bio_alloc(rq, nr_pages, gfp_mask);
177 	if (!bio)
178 		goto out_bmd;
179 
180 	if (map_data) {
181 		nr_pages = 1U << map_data->page_order;
182 		i = map_data->offset / PAGE_SIZE;
183 	}
184 	while (len) {
185 		unsigned int bytes = PAGE_SIZE;
186 
187 		bytes -= offset;
188 
189 		if (bytes > len)
190 			bytes = len;
191 
192 		if (map_data) {
193 			if (i == map_data->nr_entries * nr_pages) {
194 				ret = -ENOMEM;
195 				goto cleanup;
196 			}
197 
198 			page = map_data->pages[i / nr_pages];
199 			page += (i % nr_pages);
200 
201 			i++;
202 		} else {
203 			page = alloc_page(GFP_NOIO | gfp_mask);
204 			if (!page) {
205 				ret = -ENOMEM;
206 				goto cleanup;
207 			}
208 		}
209 
210 		if (bio_add_page(bio, page, bytes, offset) < bytes) {
211 			if (!map_data)
212 				__free_page(page);
213 			break;
214 		}
215 
216 		len -= bytes;
217 		offset = 0;
218 	}
219 
220 	if (map_data)
221 		map_data->offset += bio->bi_iter.bi_size;
222 
223 	/*
224 	 * success
225 	 */
226 	if (iov_iter_rw(iter) == WRITE &&
227 	     (!map_data || !map_data->null_mapped)) {
228 		ret = bio_copy_from_iter(bio, iter);
229 		if (ret)
230 			goto cleanup;
231 	} else if (map_data && map_data->from_user) {
232 		struct iov_iter iter2 = *iter;
233 
234 		/* This is the copy-in part of SG_DXFER_TO_FROM_DEV. */
235 		iter2.data_source = ITER_SOURCE;
236 		ret = bio_copy_from_iter(bio, &iter2);
237 		if (ret)
238 			goto cleanup;
239 	} else {
240 		if (bmd->is_our_pages)
241 			zero_fill_bio(bio);
242 		iov_iter_advance(iter, bio->bi_iter.bi_size);
243 	}
244 
245 	bio->bi_private = bmd;
246 
247 	ret = blk_rq_append_bio(rq, bio);
248 	if (ret)
249 		goto cleanup;
250 	return 0;
251 cleanup:
252 	if (!map_data)
253 		bio_free_pages(bio);
254 	blk_mq_map_bio_put(bio);
255 out_bmd:
256 	kfree(bmd);
257 	return ret;
258 }
259 
260 static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
261 		gfp_t gfp_mask)
262 {
263 	unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
264 	struct bio *bio;
265 	int ret;
266 
267 	if (!iov_iter_count(iter))
268 		return -EINVAL;
269 
270 	bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask);
271 	if (!bio)
272 		return -ENOMEM;
273 	/*
274 	 * No alignment requirements on our part to support arbitrary
275 	 * passthrough commands.
276 	 */
277 	ret = bio_iov_iter_get_pages(bio, iter, 0);
278 	if (ret)
279 		goto out_put;
280 	ret = blk_rq_append_bio(rq, bio);
281 	if (ret)
282 		goto out_release;
283 	return 0;
284 
285 out_release:
286 	bio_release_pages(bio, false);
287 out_put:
288 	blk_mq_map_bio_put(bio);
289 	return ret;
290 }
291 
292 static void bio_invalidate_vmalloc_pages(struct bio *bio)
293 {
294 #ifdef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE
295 	if (bio->bi_private && !op_is_write(bio_op(bio))) {
296 		unsigned long i, len = 0;
297 
298 		for (i = 0; i < bio->bi_vcnt; i++)
299 			len += bio->bi_io_vec[i].bv_len;
300 		invalidate_kernel_vmap_range(bio->bi_private, len);
301 	}
302 #endif
303 }
304 
305 static void bio_map_kern_endio(struct bio *bio)
306 {
307 	bio_invalidate_vmalloc_pages(bio);
308 	blk_mq_map_bio_put(bio);
309 }
310 
311 static struct bio *bio_map_kern(struct request *rq, void *data, unsigned int len,
312 		gfp_t gfp_mask)
313 {
314 	unsigned int nr_vecs = bio_add_max_vecs(data, len);
315 	struct bio *bio;
316 
317 	bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask);
318 	if (!bio)
319 		return ERR_PTR(-ENOMEM);
320 
321 	if (is_vmalloc_addr(data)) {
322 		bio->bi_private = data;
323 		if (!bio_add_vmalloc(bio, data, len)) {
324 			blk_mq_map_bio_put(bio);
325 			return ERR_PTR(-EINVAL);
326 		}
327 	} else {
328 		bio_add_virt_nofail(bio, data, len);
329 	}
330 	bio->bi_end_io = bio_map_kern_endio;
331 	return bio;
332 }
333 
334 static void bio_copy_kern_endio(struct bio *bio)
335 {
336 	bio_free_pages(bio);
337 	blk_mq_map_bio_put(bio);
338 }
339 
340 static void bio_copy_kern_endio_read(struct bio *bio)
341 {
342 	char *p = bio->bi_private;
343 	struct bio_vec *bvec;
344 	struct bvec_iter_all iter_all;
345 
346 	bio_for_each_segment_all(bvec, bio, iter_all) {
347 		memcpy_from_bvec(p, bvec);
348 		p += bvec->bv_len;
349 	}
350 
351 	bio_copy_kern_endio(bio);
352 }
353 
354 /**
355  *	bio_copy_kern	-	copy kernel address into bio
356  *	@rq: request to fill
357  *	@data: pointer to buffer to copy
358  *	@len: length in bytes
359  *	@op: bio/request operation
360  *	@gfp_mask: allocation flags for bio and page allocation
361  *
362  *	copy the kernel address into a bio suitable for io to a block
363  *	device. Returns an error pointer in case of error.
364  */
365 static struct bio *bio_copy_kern(struct request *rq, void *data, unsigned int len,
366 		gfp_t gfp_mask)
367 {
368 	enum req_op op = req_op(rq);
369 	unsigned long kaddr = (unsigned long)data;
370 	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
371 	unsigned long start = kaddr >> PAGE_SHIFT;
372 	struct bio *bio;
373 	void *p = data;
374 	int nr_pages = 0;
375 
376 	/*
377 	 * Overflow, abort
378 	 */
379 	if (end < start)
380 		return ERR_PTR(-EINVAL);
381 
382 	nr_pages = end - start;
383 	bio = blk_rq_map_bio_alloc(rq, nr_pages, gfp_mask);
384 	if (!bio)
385 		return ERR_PTR(-ENOMEM);
386 
387 	while (len) {
388 		struct page *page;
389 		unsigned int bytes = PAGE_SIZE;
390 
391 		if (bytes > len)
392 			bytes = len;
393 
394 		page = alloc_page(GFP_NOIO | __GFP_ZERO | gfp_mask);
395 		if (!page)
396 			goto cleanup;
397 
398 		if (op_is_write(op))
399 			memcpy(page_address(page), p, bytes);
400 
401 		__bio_add_page(bio, page, bytes, 0);
402 
403 		len -= bytes;
404 		p += bytes;
405 	}
406 
407 	if (op_is_write(op)) {
408 		bio->bi_end_io = bio_copy_kern_endio;
409 	} else {
410 		bio->bi_end_io = bio_copy_kern_endio_read;
411 		bio->bi_private = data;
412 	}
413 
414 	return bio;
415 
416 cleanup:
417 	bio_free_pages(bio);
418 	blk_mq_map_bio_put(bio);
419 	return ERR_PTR(-ENOMEM);
420 }
421 
422 /*
423  * Append a bio to a passthrough request.  Only works if the bio can be merged
424  * into the request based on the driver constraints.
425  */
426 int blk_rq_append_bio(struct request *rq, struct bio *bio)
427 {
428 	const struct queue_limits *lim = &rq->q->limits;
429 	unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
430 	unsigned int nr_segs = 0;
431 	int ret;
432 
433 	/* check that the data layout matches the hardware restrictions */
434 	ret = bio_split_io_at(bio, lim, &nr_segs, max_bytes, 0);
435 	if (ret) {
436 		/* if we would have to split the bio, copy instead */
437 		if (ret > 0)
438 			ret = -EREMOTEIO;
439 		return ret;
440 	}
441 
442 	if (rq->bio) {
443 		if (!ll_back_merge_fn(rq, bio, nr_segs))
444 			return -EINVAL;
445 		rq->phys_gap_bit = bio_seg_gap(rq->q, rq->biotail, bio,
446 					       rq->phys_gap_bit);
447 		rq->biotail->bi_next = bio;
448 		rq->biotail = bio;
449 		rq->__data_len += bio->bi_iter.bi_size;
450 		bio_crypt_free_ctx(bio);
451 		return 0;
452 	}
453 
454 	rq->nr_phys_segments = nr_segs;
455 	rq->bio = rq->biotail = bio;
456 	rq->__data_len = bio->bi_iter.bi_size;
457 	rq->phys_gap_bit = bio->bi_bvec_gap_bit;
458 	return 0;
459 }
460 EXPORT_SYMBOL(blk_rq_append_bio);
461 
462 /* Prepare bio for passthrough IO given ITER_BVEC iter */
463 static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
464 {
465 	unsigned int max_bytes = rq->q->limits.max_hw_sectors << SECTOR_SHIFT;
466 	struct bio *bio;
467 	int ret;
468 
469 	if (!iov_iter_count(iter) || iov_iter_count(iter) > max_bytes)
470 		return -EINVAL;
471 
472 	/* reuse the bvecs from the iterator instead of allocating new ones */
473 	bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL);
474 	if (!bio)
475 		return -ENOMEM;
476 	bio_iov_bvec_set(bio, iter);
477 
478 	ret = blk_rq_append_bio(rq, bio);
479 	if (ret)
480 		blk_mq_map_bio_put(bio);
481 	return ret;
482 }
483 
484 /**
485  * blk_rq_map_user_iov - map user data to a request, for passthrough requests
486  * @q:		request queue where request should be inserted
487  * @rq:		request to map data to
488  * @map_data:   pointer to the rq_map_data holding pages (if necessary)
489  * @iter:	iovec iterator
490  * @gfp_mask:	memory allocation flags
491  *
492  * Description:
493  *    Data will be mapped directly for zero copy I/O, if possible. Otherwise
494  *    a kernel bounce buffer is used.
495  *
496  *    A matching blk_rq_unmap_user() must be issued at the end of I/O, while
497  *    still in process context.
498  */
499 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
500 			struct rq_map_data *map_data,
501 			const struct iov_iter *iter, gfp_t gfp_mask)
502 {
503 	bool copy = false, map_bvec = false;
504 	unsigned long align = blk_lim_dma_alignment_and_pad(&q->limits);
505 	struct bio *bio = NULL;
506 	struct iov_iter i;
507 	int ret = -EINVAL;
508 
509 	if (map_data)
510 		copy = true;
511 	else if (iov_iter_alignment(iter) & align)
512 		copy = true;
513 	else if (iov_iter_is_bvec(iter))
514 		map_bvec = true;
515 	else if (!user_backed_iter(iter))
516 		copy = true;
517 	else if (queue_virt_boundary(q))
518 		copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter);
519 
520 	if (map_bvec) {
521 		ret = blk_rq_map_user_bvec(rq, iter);
522 		if (!ret)
523 			return 0;
524 		if (ret != -EREMOTEIO)
525 			goto fail;
526 		/* fall back to copying the data on limits mismatches */
527 		copy = true;
528 	}
529 
530 	i = *iter;
531 	do {
532 		if (copy)
533 			ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask);
534 		else
535 			ret = bio_map_user_iov(rq, &i, gfp_mask);
536 		if (ret) {
537 			if (ret == -EREMOTEIO)
538 				ret = -EINVAL;
539 			goto unmap_rq;
540 		}
541 		if (!bio)
542 			bio = rq->bio;
543 	} while (iov_iter_count(&i));
544 
545 	return 0;
546 
547 unmap_rq:
548 	blk_rq_unmap_user(bio);
549 fail:
550 	rq->bio = NULL;
551 	return ret;
552 }
553 EXPORT_SYMBOL(blk_rq_map_user_iov);
554 
555 int blk_rq_map_user(struct request_queue *q, struct request *rq,
556 		    struct rq_map_data *map_data, void __user *ubuf,
557 		    unsigned long len, gfp_t gfp_mask)
558 {
559 	struct iov_iter i;
560 	int ret = import_ubuf(rq_data_dir(rq), ubuf, len, &i);
561 
562 	if (unlikely(ret < 0))
563 		return ret;
564 
565 	return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask);
566 }
567 EXPORT_SYMBOL(blk_rq_map_user);
568 
569 int blk_rq_map_user_io(struct request *req, struct rq_map_data *map_data,
570 		void __user *ubuf, unsigned long buf_len, gfp_t gfp_mask,
571 		bool vec, int iov_count, bool check_iter_count, int rw)
572 {
573 	int ret = 0;
574 
575 	if (vec) {
576 		struct iovec fast_iov[UIO_FASTIOV];
577 		struct iovec *iov = fast_iov;
578 		struct iov_iter iter;
579 
580 		ret = import_iovec(rw, ubuf, iov_count ? iov_count : buf_len,
581 				UIO_FASTIOV, &iov, &iter);
582 		if (ret < 0)
583 			return ret;
584 
585 		if (iov_count) {
586 			/* SG_IO howto says that the shorter of the two wins */
587 			iov_iter_truncate(&iter, buf_len);
588 			if (check_iter_count && !iov_iter_count(&iter)) {
589 				kfree(iov);
590 				return -EINVAL;
591 			}
592 		}
593 
594 		ret = blk_rq_map_user_iov(req->q, req, map_data, &iter,
595 				gfp_mask);
596 		kfree(iov);
597 	} else if (buf_len) {
598 		ret = blk_rq_map_user(req->q, req, map_data, ubuf, buf_len,
599 				gfp_mask);
600 	}
601 	return ret;
602 }
603 EXPORT_SYMBOL(blk_rq_map_user_io);
604 
605 /**
606  * blk_rq_unmap_user - unmap a request with user data
607  * @bio:	       start of bio list
608  *
609  * Description:
610  *    Unmap a rq previously mapped by blk_rq_map_user(). The caller must
611  *    supply the original rq->bio from the blk_rq_map_user() return, since
612  *    the I/O completion may have changed rq->bio.
613  */
614 int blk_rq_unmap_user(struct bio *bio)
615 {
616 	struct bio *next_bio;
617 	int ret = 0, ret2;
618 
619 	while (bio) {
620 		if (bio->bi_private) {
621 			ret2 = bio_uncopy_user(bio);
622 			if (ret2 && !ret)
623 				ret = ret2;
624 		} else {
625 			bio_release_pages(bio, bio_data_dir(bio) == READ);
626 		}
627 
628 		if (bio_integrity(bio))
629 			bio_integrity_unmap_user(bio);
630 
631 		next_bio = bio;
632 		bio = bio->bi_next;
633 		blk_mq_map_bio_put(next_bio);
634 	}
635 
636 	return ret;
637 }
638 EXPORT_SYMBOL(blk_rq_unmap_user);
639 
640 /**
641  * blk_rq_map_kern - map kernel data to a request, for passthrough requests
642  * @rq:		request to fill
643  * @kbuf:	the kernel buffer
644  * @len:	length of user data
645  * @gfp_mask:	memory allocation flags
646  *
647  * Description:
648  *    Data will be mapped directly if possible. Otherwise a bounce
649  *    buffer is used. Can be called multiple times to append multiple
650  *    buffers.
651  */
652 int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len,
653 		gfp_t gfp_mask)
654 {
655 	unsigned long addr = (unsigned long) kbuf;
656 	struct bio *bio;
657 	int ret;
658 
659 	if (len > (queue_max_hw_sectors(rq->q) << SECTOR_SHIFT))
660 		return -EINVAL;
661 	if (!len || !kbuf)
662 		return -EINVAL;
663 
664 	if (!blk_rq_aligned(rq->q, addr, len) || object_is_on_stack(kbuf))
665 		bio = bio_copy_kern(rq, kbuf, len, gfp_mask);
666 	else
667 		bio = bio_map_kern(rq, kbuf, len, gfp_mask);
668 
669 	if (IS_ERR(bio))
670 		return PTR_ERR(bio);
671 
672 	ret = blk_rq_append_bio(rq, bio);
673 	if (unlikely(ret))
674 		blk_mq_map_bio_put(bio);
675 	return ret;
676 }
677 EXPORT_SYMBOL(blk_rq_map_kern);
678