1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * bio-integrity.c - bio data integrity extensions
4 *
5 * Copyright (C) 2007, 2008, 2009 Oracle Corporation
6 * Written by: Martin K. Petersen <martin.petersen@oracle.com>
7 */
8
9 #include <linux/blk-integrity.h>
10 #include <linux/mempool.h>
11 #include <linux/export.h>
12 #include <linux/bio.h>
13 #include <linux/workqueue.h>
14 #include <linux/slab.h>
15 #include "blk.h"
16
17 static struct kmem_cache *bip_slab;
18 static struct workqueue_struct *kintegrityd_wq;
19
blk_flush_integrity(void)20 void blk_flush_integrity(void)
21 {
22 flush_workqueue(kintegrityd_wq);
23 }
24
25 /**
26 * bio_integrity_free - Free bio integrity payload
27 * @bio: bio containing bip to be freed
28 *
29 * Description: Free the integrity portion of a bio.
30 */
bio_integrity_free(struct bio * bio)31 void bio_integrity_free(struct bio *bio)
32 {
33 struct bio_integrity_payload *bip = bio_integrity(bio);
34 struct bio_set *bs = bio->bi_pool;
35
36 if (bs && mempool_initialized(&bs->bio_integrity_pool)) {
37 if (bip->bip_vec)
38 bvec_free(&bs->bvec_integrity_pool, bip->bip_vec,
39 bip->bip_max_vcnt);
40 mempool_free(bip, &bs->bio_integrity_pool);
41 } else {
42 kfree(bip);
43 }
44 bio->bi_integrity = NULL;
45 bio->bi_opf &= ~REQ_INTEGRITY;
46 }
47
48 /**
49 * bio_integrity_alloc - Allocate integrity payload and attach it to bio
50 * @bio: bio to attach integrity metadata to
51 * @gfp_mask: Memory allocation mask
52 * @nr_vecs: Number of integrity metadata scatter-gather elements
53 *
54 * Description: This function prepares a bio for attaching integrity
55 * metadata. nr_vecs specifies the maximum number of pages containing
56 * integrity metadata that can be attached.
57 */
bio_integrity_alloc(struct bio * bio,gfp_t gfp_mask,unsigned int nr_vecs)58 struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
59 gfp_t gfp_mask,
60 unsigned int nr_vecs)
61 {
62 struct bio_integrity_payload *bip;
63 struct bio_set *bs = bio->bi_pool;
64 unsigned inline_vecs;
65
66 if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
67 return ERR_PTR(-EOPNOTSUPP);
68
69 if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) {
70 bip = kmalloc(struct_size(bip, bip_inline_vecs, nr_vecs), gfp_mask);
71 inline_vecs = nr_vecs;
72 } else {
73 bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask);
74 inline_vecs = BIO_INLINE_VECS;
75 }
76
77 if (unlikely(!bip))
78 return ERR_PTR(-ENOMEM);
79
80 memset(bip, 0, sizeof(*bip));
81
82 /* always report as many vecs as asked explicitly, not inline vecs */
83 bip->bip_max_vcnt = nr_vecs;
84 if (nr_vecs > inline_vecs) {
85 bip->bip_vec = bvec_alloc(&bs->bvec_integrity_pool,
86 &bip->bip_max_vcnt, gfp_mask);
87 if (!bip->bip_vec)
88 goto err;
89 } else if (nr_vecs) {
90 bip->bip_vec = bip->bip_inline_vecs;
91 }
92
93 bip->bip_bio = bio;
94 bio->bi_integrity = bip;
95 bio->bi_opf |= REQ_INTEGRITY;
96
97 return bip;
98 err:
99 if (bs && mempool_initialized(&bs->bio_integrity_pool))
100 mempool_free(bip, &bs->bio_integrity_pool);
101 else
102 kfree(bip);
103 return ERR_PTR(-ENOMEM);
104 }
105 EXPORT_SYMBOL(bio_integrity_alloc);
106
bio_integrity_unpin_bvec(struct bio_vec * bv,int nr_vecs,bool dirty)107 static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs,
108 bool dirty)
109 {
110 int i;
111
112 for (i = 0; i < nr_vecs; i++) {
113 if (dirty && !PageCompound(bv[i].bv_page))
114 set_page_dirty_lock(bv[i].bv_page);
115 unpin_user_page(bv[i].bv_page);
116 }
117 }
118
bio_integrity_uncopy_user(struct bio_integrity_payload * bip)119 static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
120 {
121 unsigned short nr_vecs = bip->bip_max_vcnt - 1;
122 struct bio_vec *copy = &bip->bip_vec[1];
123 size_t bytes = bip->bip_iter.bi_size;
124 struct iov_iter iter;
125 int ret;
126
127 iov_iter_bvec(&iter, ITER_DEST, copy, nr_vecs, bytes);
128 ret = copy_to_iter(bvec_virt(bip->bip_vec), bytes, &iter);
129 WARN_ON_ONCE(ret != bytes);
130
131 bio_integrity_unpin_bvec(copy, nr_vecs, true);
132 }
133
134 /**
135 * bio_integrity_unmap_user - Unmap user integrity payload
136 * @bio: bio containing bip to be unmapped
137 *
138 * Unmap the user mapped integrity portion of a bio.
139 */
bio_integrity_unmap_user(struct bio * bio)140 void bio_integrity_unmap_user(struct bio *bio)
141 {
142 struct bio_integrity_payload *bip = bio_integrity(bio);
143
144 if (bip->bip_flags & BIP_COPY_USER) {
145 if (bio_data_dir(bio) == READ)
146 bio_integrity_uncopy_user(bip);
147 kfree(bvec_virt(bip->bip_vec));
148 return;
149 }
150
151 bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt,
152 bio_data_dir(bio) == READ);
153 }
154
155 /**
156 * bio_integrity_add_page - Attach integrity metadata
157 * @bio: bio to update
158 * @page: page containing integrity metadata
159 * @len: number of bytes of integrity metadata in page
160 * @offset: start offset within page
161 *
162 * Description: Attach a page containing integrity metadata to bio.
163 */
bio_integrity_add_page(struct bio * bio,struct page * page,unsigned int len,unsigned int offset)164 int bio_integrity_add_page(struct bio *bio, struct page *page,
165 unsigned int len, unsigned int offset)
166 {
167 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
168 struct bio_integrity_payload *bip = bio_integrity(bio);
169
170 if (bip->bip_vcnt > 0) {
171 struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1];
172 bool same_page = false;
173
174 if (bvec_try_merge_hw_page(q, bv, page, len, offset,
175 &same_page)) {
176 bip->bip_iter.bi_size += len;
177 return len;
178 }
179
180 if (bip->bip_vcnt >=
181 min(bip->bip_max_vcnt, queue_max_integrity_segments(q)))
182 return 0;
183
184 /*
185 * If the queue doesn't support SG gaps and adding this segment
186 * would create a gap, disallow it.
187 */
188 if (bvec_gap_to_prev(&q->limits, bv, offset))
189 return 0;
190 }
191
192 bvec_set_page(&bip->bip_vec[bip->bip_vcnt], page, len, offset);
193 bip->bip_vcnt++;
194 bip->bip_iter.bi_size += len;
195
196 return len;
197 }
198 EXPORT_SYMBOL(bio_integrity_add_page);
199
bio_integrity_copy_user(struct bio * bio,struct bio_vec * bvec,int nr_vecs,unsigned int len,unsigned int direction)200 static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
201 int nr_vecs, unsigned int len,
202 unsigned int direction)
203 {
204 bool write = direction == ITER_SOURCE;
205 struct bio_integrity_payload *bip;
206 struct iov_iter iter;
207 void *buf;
208 int ret;
209
210 buf = kmalloc(len, GFP_KERNEL);
211 if (!buf)
212 return -ENOMEM;
213
214 if (write) {
215 iov_iter_bvec(&iter, direction, bvec, nr_vecs, len);
216 if (!copy_from_iter_full(buf, len, &iter)) {
217 ret = -EFAULT;
218 goto free_buf;
219 }
220
221 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
222 } else {
223 memset(buf, 0, len);
224
225 /*
226 * We need to preserve the original bvec and the number of vecs
227 * in it for completion handling
228 */
229 bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs + 1);
230 }
231
232 if (IS_ERR(bip)) {
233 ret = PTR_ERR(bip);
234 goto free_buf;
235 }
236
237 if (write)
238 bio_integrity_unpin_bvec(bvec, nr_vecs, false);
239 else
240 memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec));
241
242 ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
243 offset_in_page(buf));
244 if (ret != len) {
245 ret = -ENOMEM;
246 goto free_bip;
247 }
248
249 bip->bip_flags |= BIP_COPY_USER;
250 bip->bip_vcnt = nr_vecs;
251 return 0;
252 free_bip:
253 bio_integrity_free(bio);
254 free_buf:
255 kfree(buf);
256 return ret;
257 }
258
bio_integrity_init_user(struct bio * bio,struct bio_vec * bvec,int nr_vecs,unsigned int len)259 static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec,
260 int nr_vecs, unsigned int len)
261 {
262 struct bio_integrity_payload *bip;
263
264 bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs);
265 if (IS_ERR(bip))
266 return PTR_ERR(bip);
267
268 memcpy(bip->bip_vec, bvec, nr_vecs * sizeof(*bvec));
269 bip->bip_iter.bi_size = len;
270 bip->bip_vcnt = nr_vecs;
271 return 0;
272 }
273
bvec_from_pages(struct bio_vec * bvec,struct page ** pages,int nr_vecs,ssize_t bytes,ssize_t offset)274 static unsigned int bvec_from_pages(struct bio_vec *bvec, struct page **pages,
275 int nr_vecs, ssize_t bytes, ssize_t offset)
276 {
277 unsigned int nr_bvecs = 0;
278 int i, j;
279
280 for (i = 0; i < nr_vecs; i = j) {
281 size_t size = min_t(size_t, bytes, PAGE_SIZE - offset);
282 struct folio *folio = page_folio(pages[i]);
283
284 bytes -= size;
285 for (j = i + 1; j < nr_vecs; j++) {
286 size_t next = min_t(size_t, PAGE_SIZE, bytes);
287
288 if (page_folio(pages[j]) != folio ||
289 pages[j] != pages[j - 1] + 1)
290 break;
291 unpin_user_page(pages[j]);
292 size += next;
293 bytes -= next;
294 }
295
296 bvec_set_page(&bvec[nr_bvecs], pages[i], size, offset);
297 offset = 0;
298 nr_bvecs++;
299 }
300
301 return nr_bvecs;
302 }
303
bio_integrity_map_user(struct bio * bio,void __user * ubuf,ssize_t bytes)304 int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes)
305 {
306 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
307 unsigned int align = blk_lim_dma_alignment_and_pad(&q->limits);
308 struct page *stack_pages[UIO_FASTIOV], **pages = stack_pages;
309 struct bio_vec stack_vec[UIO_FASTIOV], *bvec = stack_vec;
310 unsigned int direction, nr_bvecs;
311 struct iov_iter iter;
312 int ret, nr_vecs;
313 size_t offset;
314 bool copy;
315
316 if (bio_integrity(bio))
317 return -EINVAL;
318 if (bytes >> SECTOR_SHIFT > queue_max_hw_sectors(q))
319 return -E2BIG;
320
321 if (bio_data_dir(bio) == READ)
322 direction = ITER_DEST;
323 else
324 direction = ITER_SOURCE;
325
326 iov_iter_ubuf(&iter, direction, ubuf, bytes);
327 nr_vecs = iov_iter_npages(&iter, BIO_MAX_VECS + 1);
328 if (nr_vecs > BIO_MAX_VECS)
329 return -E2BIG;
330 if (nr_vecs > UIO_FASTIOV) {
331 bvec = kcalloc(nr_vecs, sizeof(*bvec), GFP_KERNEL);
332 if (!bvec)
333 return -ENOMEM;
334 pages = NULL;
335 }
336
337 copy = !iov_iter_is_aligned(&iter, align, align);
338 ret = iov_iter_extract_pages(&iter, &pages, bytes, nr_vecs, 0, &offset);
339 if (unlikely(ret < 0))
340 goto free_bvec;
341
342 nr_bvecs = bvec_from_pages(bvec, pages, nr_vecs, bytes, offset);
343 if (pages != stack_pages)
344 kvfree(pages);
345 if (nr_bvecs > queue_max_integrity_segments(q))
346 copy = true;
347
348 if (copy)
349 ret = bio_integrity_copy_user(bio, bvec, nr_bvecs, bytes,
350 direction);
351 else
352 ret = bio_integrity_init_user(bio, bvec, nr_bvecs, bytes);
353 if (ret)
354 goto release_pages;
355 if (bvec != stack_vec)
356 kfree(bvec);
357
358 return 0;
359
360 release_pages:
361 bio_integrity_unpin_bvec(bvec, nr_bvecs, false);
362 free_bvec:
363 if (bvec != stack_vec)
364 kfree(bvec);
365 return ret;
366 }
367
368 /**
369 * bio_integrity_prep - Prepare bio for integrity I/O
370 * @bio: bio to prepare
371 *
372 * Description: Checks if the bio already has an integrity payload attached.
373 * If it does, the payload has been generated by another kernel subsystem,
374 * and we just pass it through. Otherwise allocates integrity payload.
375 * The bio must have data direction, target device and start sector set priot
376 * to calling. In the WRITE case, integrity metadata will be generated using
377 * the block device's integrity function. In the READ case, the buffer
378 * will be prepared for DMA and a suitable end_io handler set up.
379 */
bio_integrity_prep(struct bio * bio)380 bool bio_integrity_prep(struct bio *bio)
381 {
382 struct bio_integrity_payload *bip;
383 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
384 unsigned int len;
385 void *buf;
386 gfp_t gfp = GFP_NOIO;
387
388 if (!bi)
389 return true;
390
391 if (!bio_sectors(bio))
392 return true;
393
394 /* Already protected? */
395 if (bio_integrity(bio))
396 return true;
397
398 switch (bio_op(bio)) {
399 case REQ_OP_READ:
400 if (bi->flags & BLK_INTEGRITY_NOVERIFY)
401 return true;
402 break;
403 case REQ_OP_WRITE:
404 if (bi->flags & BLK_INTEGRITY_NOGENERATE)
405 return true;
406
407 /*
408 * Zero the memory allocated to not leak uninitialized kernel
409 * memory to disk for non-integrity metadata where nothing else
410 * initializes the memory.
411 */
412 if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
413 gfp |= __GFP_ZERO;
414 break;
415 default:
416 return true;
417 }
418
419 /* Allocate kernel buffer for protection data */
420 len = bio_integrity_bytes(bi, bio_sectors(bio));
421 buf = kmalloc(len, gfp);
422 if (unlikely(buf == NULL)) {
423 goto err_end_io;
424 }
425
426 bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
427 if (IS_ERR(bip)) {
428 kfree(buf);
429 goto err_end_io;
430 }
431
432 bip->bip_flags |= BIP_BLOCK_INTEGRITY;
433 bip_set_seed(bip, bio->bi_iter.bi_sector);
434
435 if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
436 bip->bip_flags |= BIP_IP_CHECKSUM;
437
438 if (bio_integrity_add_page(bio, virt_to_page(buf), len,
439 offset_in_page(buf)) < len) {
440 printk(KERN_ERR "could not attach integrity payload\n");
441 goto err_end_io;
442 }
443
444 /* Auto-generate integrity metadata if this is a write */
445 if (bio_data_dir(bio) == WRITE)
446 blk_integrity_generate(bio);
447 else
448 bip->bio_iter = bio->bi_iter;
449 return true;
450
451 err_end_io:
452 bio->bi_status = BLK_STS_RESOURCE;
453 bio_endio(bio);
454 return false;
455 }
456 EXPORT_SYMBOL(bio_integrity_prep);
457
458 /**
459 * bio_integrity_verify_fn - Integrity I/O completion worker
460 * @work: Work struct stored in bio to be verified
461 *
462 * Description: This workqueue function is called to complete a READ
463 * request. The function verifies the transferred integrity metadata
464 * and then calls the original bio end_io function.
465 */
bio_integrity_verify_fn(struct work_struct * work)466 static void bio_integrity_verify_fn(struct work_struct *work)
467 {
468 struct bio_integrity_payload *bip =
469 container_of(work, struct bio_integrity_payload, bip_work);
470 struct bio *bio = bip->bip_bio;
471
472 blk_integrity_verify(bio);
473
474 kfree(bvec_virt(bip->bip_vec));
475 bio_integrity_free(bio);
476 bio_endio(bio);
477 }
478
479 /**
480 * __bio_integrity_endio - Integrity I/O completion function
481 * @bio: Protected bio
482 *
483 * Description: Completion for integrity I/O
484 *
485 * Normally I/O completion is done in interrupt context. However,
486 * verifying I/O integrity is a time-consuming task which must be run
487 * in process context. This function postpones completion
488 * accordingly.
489 */
__bio_integrity_endio(struct bio * bio)490 bool __bio_integrity_endio(struct bio *bio)
491 {
492 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
493 struct bio_integrity_payload *bip = bio_integrity(bio);
494
495 if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) {
496 INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
497 queue_work(kintegrityd_wq, &bip->bip_work);
498 return false;
499 }
500
501 kfree(bvec_virt(bip->bip_vec));
502 bio_integrity_free(bio);
503 return true;
504 }
505
506 /**
507 * bio_integrity_advance - Advance integrity vector
508 * @bio: bio whose integrity vector to update
509 * @bytes_done: number of data bytes that have been completed
510 *
511 * Description: This function calculates how many integrity bytes the
512 * number of completed data bytes correspond to and advances the
513 * integrity vector accordingly.
514 */
bio_integrity_advance(struct bio * bio,unsigned int bytes_done)515 void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
516 {
517 struct bio_integrity_payload *bip = bio_integrity(bio);
518 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
519 unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
520
521 bip->bip_iter.bi_sector += bio_integrity_intervals(bi, bytes_done >> 9);
522 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
523 }
524
525 /**
526 * bio_integrity_trim - Trim integrity vector
527 * @bio: bio whose integrity vector to update
528 *
529 * Description: Used to trim the integrity vector in a cloned bio.
530 */
bio_integrity_trim(struct bio * bio)531 void bio_integrity_trim(struct bio *bio)
532 {
533 struct bio_integrity_payload *bip = bio_integrity(bio);
534 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
535
536 bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
537 }
538 EXPORT_SYMBOL(bio_integrity_trim);
539
540 /**
541 * bio_integrity_clone - Callback for cloning bios with integrity metadata
542 * @bio: New bio
543 * @bio_src: Original bio
544 * @gfp_mask: Memory allocation mask
545 *
546 * Description: Called to allocate a bip when cloning a bio
547 */
bio_integrity_clone(struct bio * bio,struct bio * bio_src,gfp_t gfp_mask)548 int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
549 gfp_t gfp_mask)
550 {
551 struct bio_integrity_payload *bip_src = bio_integrity(bio_src);
552 struct bio_integrity_payload *bip;
553
554 BUG_ON(bip_src == NULL);
555
556 bip = bio_integrity_alloc(bio, gfp_mask, 0);
557 if (IS_ERR(bip))
558 return PTR_ERR(bip);
559
560 bip->bip_vec = bip_src->bip_vec;
561 bip->bip_iter = bip_src->bip_iter;
562 bip->bip_flags = bip_src->bip_flags & ~BIP_BLOCK_INTEGRITY;
563
564 return 0;
565 }
566
bioset_integrity_create(struct bio_set * bs,int pool_size)567 int bioset_integrity_create(struct bio_set *bs, int pool_size)
568 {
569 if (mempool_initialized(&bs->bio_integrity_pool))
570 return 0;
571
572 if (mempool_init_slab_pool(&bs->bio_integrity_pool,
573 pool_size, bip_slab))
574 return -1;
575
576 if (biovec_init_pool(&bs->bvec_integrity_pool, pool_size)) {
577 mempool_exit(&bs->bio_integrity_pool);
578 return -1;
579 }
580
581 return 0;
582 }
583 EXPORT_SYMBOL(bioset_integrity_create);
584
bioset_integrity_free(struct bio_set * bs)585 void bioset_integrity_free(struct bio_set *bs)
586 {
587 mempool_exit(&bs->bio_integrity_pool);
588 mempool_exit(&bs->bvec_integrity_pool);
589 }
590
bio_integrity_init(void)591 void __init bio_integrity_init(void)
592 {
593 /*
594 * kintegrityd won't block much but may burn a lot of CPU cycles.
595 * Make it highpri CPU intensive wq with max concurrency of 1.
596 */
597 kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM |
598 WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1);
599 if (!kintegrityd_wq)
600 panic("Failed to create kintegrityd\n");
601
602 bip_slab = kmem_cache_create("bio_integrity_payload",
603 sizeof(struct bio_integrity_payload) +
604 sizeof(struct bio_vec) * BIO_INLINE_VECS,
605 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
606 }
607