1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * bio-integrity.c - bio data integrity extensions
4 *
5 * Copyright (C) 2007, 2008, 2009 Oracle Corporation
6 * Written by: Martin K. Petersen <martin.petersen@oracle.com>
7 */
8
9 #include <linux/blk-integrity.h>
10 #include <linux/mempool.h>
11 #include <linux/export.h>
12 #include <linux/bio.h>
13 #include <linux/workqueue.h>
14 #include <linux/slab.h>
15 #include "blk.h"
16
17 static struct kmem_cache *bip_slab;
18 static struct workqueue_struct *kintegrityd_wq;
19
blk_flush_integrity(void)20 void blk_flush_integrity(void)
21 {
22 flush_workqueue(kintegrityd_wq);
23 }
24
25 /**
26 * bio_integrity_free - Free bio integrity payload
27 * @bio: bio containing bip to be freed
28 *
29 * Description: Free the integrity portion of a bio.
30 */
bio_integrity_free(struct bio * bio)31 void bio_integrity_free(struct bio *bio)
32 {
33 struct bio_integrity_payload *bip = bio_integrity(bio);
34 struct bio_set *bs = bio->bi_pool;
35
36 if (bs && mempool_initialized(&bs->bio_integrity_pool)) {
37 if (bip->bip_vec)
38 bvec_free(&bs->bvec_integrity_pool, bip->bip_vec,
39 bip->bip_max_vcnt);
40 mempool_free(bip, &bs->bio_integrity_pool);
41 } else {
42 kfree(bip);
43 }
44 bio->bi_integrity = NULL;
45 bio->bi_opf &= ~REQ_INTEGRITY;
46 }
47
48 /**
49 * bio_integrity_alloc - Allocate integrity payload and attach it to bio
50 * @bio: bio to attach integrity metadata to
51 * @gfp_mask: Memory allocation mask
52 * @nr_vecs: Number of integrity metadata scatter-gather elements
53 *
54 * Description: This function prepares a bio for attaching integrity
55 * metadata. nr_vecs specifies the maximum number of pages containing
56 * integrity metadata that can be attached.
57 */
bio_integrity_alloc(struct bio * bio,gfp_t gfp_mask,unsigned int nr_vecs)58 struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
59 gfp_t gfp_mask,
60 unsigned int nr_vecs)
61 {
62 struct bio_integrity_payload *bip;
63 struct bio_set *bs = bio->bi_pool;
64 unsigned inline_vecs;
65
66 if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
67 return ERR_PTR(-EOPNOTSUPP);
68
69 if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) {
70 bip = kmalloc(struct_size(bip, bip_inline_vecs, nr_vecs), gfp_mask);
71 inline_vecs = nr_vecs;
72 } else {
73 bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask);
74 inline_vecs = BIO_INLINE_VECS;
75 }
76
77 if (unlikely(!bip))
78 return ERR_PTR(-ENOMEM);
79
80 memset(bip, 0, sizeof(*bip));
81
82 /* always report as many vecs as asked explicitly, not inline vecs */
83 bip->bip_max_vcnt = nr_vecs;
84 if (nr_vecs > inline_vecs) {
85 bip->bip_vec = bvec_alloc(&bs->bvec_integrity_pool,
86 &bip->bip_max_vcnt, gfp_mask);
87 if (!bip->bip_vec)
88 goto err;
89 } else if (nr_vecs) {
90 bip->bip_vec = bip->bip_inline_vecs;
91 }
92
93 bip->bip_bio = bio;
94 bio->bi_integrity = bip;
95 bio->bi_opf |= REQ_INTEGRITY;
96
97 return bip;
98 err:
99 if (bs && mempool_initialized(&bs->bio_integrity_pool))
100 mempool_free(bip, &bs->bio_integrity_pool);
101 else
102 kfree(bip);
103 return ERR_PTR(-ENOMEM);
104 }
105 EXPORT_SYMBOL(bio_integrity_alloc);
106
bio_integrity_unpin_bvec(struct bio_vec * bv,int nr_vecs,bool dirty)107 static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs,
108 bool dirty)
109 {
110 int i;
111
112 for (i = 0; i < nr_vecs; i++) {
113 if (dirty && !PageCompound(bv[i].bv_page))
114 set_page_dirty_lock(bv[i].bv_page);
115 unpin_user_page(bv[i].bv_page);
116 }
117 }
118
bio_integrity_uncopy_user(struct bio_integrity_payload * bip)119 static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
120 {
121 unsigned short nr_vecs = bip->bip_max_vcnt - 1;
122 struct bio_vec *copy = &bip->bip_vec[1];
123 size_t bytes = bip->bip_iter.bi_size;
124 struct iov_iter iter;
125 int ret;
126
127 iov_iter_bvec(&iter, ITER_DEST, copy, nr_vecs, bytes);
128 ret = copy_to_iter(bvec_virt(bip->bip_vec), bytes, &iter);
129 WARN_ON_ONCE(ret != bytes);
130
131 bio_integrity_unpin_bvec(copy, nr_vecs, true);
132 }
133
134 /**
135 * bio_integrity_unmap_user - Unmap user integrity payload
136 * @bio: bio containing bip to be unmapped
137 *
138 * Unmap the user mapped integrity portion of a bio.
139 */
bio_integrity_unmap_user(struct bio * bio)140 void bio_integrity_unmap_user(struct bio *bio)
141 {
142 struct bio_integrity_payload *bip = bio_integrity(bio);
143
144 if (bip->bip_flags & BIP_COPY_USER) {
145 if (bio_data_dir(bio) == READ)
146 bio_integrity_uncopy_user(bip);
147 kfree(bvec_virt(bip->bip_vec));
148 return;
149 }
150
151 bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt,
152 bio_data_dir(bio) == READ);
153 }
154
155 /**
156 * bio_integrity_add_page - Attach integrity metadata
157 * @bio: bio to update
158 * @page: page containing integrity metadata
159 * @len: number of bytes of integrity metadata in page
160 * @offset: start offset within page
161 *
162 * Description: Attach a page containing integrity metadata to bio.
163 */
bio_integrity_add_page(struct bio * bio,struct page * page,unsigned int len,unsigned int offset)164 int bio_integrity_add_page(struct bio *bio, struct page *page,
165 unsigned int len, unsigned int offset)
166 {
167 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
168 struct bio_integrity_payload *bip = bio_integrity(bio);
169
170 if (bip->bip_vcnt > 0) {
171 struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1];
172 bool same_page = false;
173
174 if (bvec_try_merge_hw_page(q, bv, page, len, offset,
175 &same_page)) {
176 bip->bip_iter.bi_size += len;
177 return len;
178 }
179
180 if (bip->bip_vcnt >=
181 min(bip->bip_max_vcnt, queue_max_integrity_segments(q)))
182 return 0;
183
184 /*
185 * If the queue doesn't support SG gaps and adding this segment
186 * would create a gap, disallow it.
187 */
188 if (bvec_gap_to_prev(&q->limits, bv, offset))
189 return 0;
190 }
191
192 bvec_set_page(&bip->bip_vec[bip->bip_vcnt], page, len, offset);
193 bip->bip_vcnt++;
194 bip->bip_iter.bi_size += len;
195
196 return len;
197 }
198 EXPORT_SYMBOL(bio_integrity_add_page);
199
bio_integrity_copy_user(struct bio * bio,struct bio_vec * bvec,int nr_vecs,unsigned int len,unsigned int direction,u32 seed)200 static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
201 int nr_vecs, unsigned int len,
202 unsigned int direction, u32 seed)
203 {
204 bool write = direction == ITER_SOURCE;
205 struct bio_integrity_payload *bip;
206 struct iov_iter iter;
207 void *buf;
208 int ret;
209
210 buf = kmalloc(len, GFP_KERNEL);
211 if (!buf)
212 return -ENOMEM;
213
214 if (write) {
215 iov_iter_bvec(&iter, direction, bvec, nr_vecs, len);
216 if (!copy_from_iter_full(buf, len, &iter)) {
217 ret = -EFAULT;
218 goto free_buf;
219 }
220
221 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
222 } else {
223 memset(buf, 0, len);
224
225 /*
226 * We need to preserve the original bvec and the number of vecs
227 * in it for completion handling
228 */
229 bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs + 1);
230 }
231
232 if (IS_ERR(bip)) {
233 ret = PTR_ERR(bip);
234 goto free_buf;
235 }
236
237 if (write)
238 bio_integrity_unpin_bvec(bvec, nr_vecs, false);
239 else
240 memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec));
241
242 ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
243 offset_in_page(buf));
244 if (ret != len) {
245 ret = -ENOMEM;
246 goto free_bip;
247 }
248
249 bip->bip_flags |= BIP_COPY_USER;
250 bip->bip_iter.bi_sector = seed;
251 bip->bip_vcnt = nr_vecs;
252 return 0;
253 free_bip:
254 bio_integrity_free(bio);
255 free_buf:
256 kfree(buf);
257 return ret;
258 }
259
bio_integrity_init_user(struct bio * bio,struct bio_vec * bvec,int nr_vecs,unsigned int len,u32 seed)260 static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec,
261 int nr_vecs, unsigned int len, u32 seed)
262 {
263 struct bio_integrity_payload *bip;
264
265 bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs);
266 if (IS_ERR(bip))
267 return PTR_ERR(bip);
268
269 memcpy(bip->bip_vec, bvec, nr_vecs * sizeof(*bvec));
270 bip->bip_iter.bi_sector = seed;
271 bip->bip_iter.bi_size = len;
272 bip->bip_vcnt = nr_vecs;
273 return 0;
274 }
275
bvec_from_pages(struct bio_vec * bvec,struct page ** pages,int nr_vecs,ssize_t bytes,ssize_t offset)276 static unsigned int bvec_from_pages(struct bio_vec *bvec, struct page **pages,
277 int nr_vecs, ssize_t bytes, ssize_t offset)
278 {
279 unsigned int nr_bvecs = 0;
280 int i, j;
281
282 for (i = 0; i < nr_vecs; i = j) {
283 size_t size = min_t(size_t, bytes, PAGE_SIZE - offset);
284 struct folio *folio = page_folio(pages[i]);
285
286 bytes -= size;
287 for (j = i + 1; j < nr_vecs; j++) {
288 size_t next = min_t(size_t, PAGE_SIZE, bytes);
289
290 if (page_folio(pages[j]) != folio ||
291 pages[j] != pages[j - 1] + 1)
292 break;
293 unpin_user_page(pages[j]);
294 size += next;
295 bytes -= next;
296 }
297
298 bvec_set_page(&bvec[nr_bvecs], pages[i], size, offset);
299 offset = 0;
300 nr_bvecs++;
301 }
302
303 return nr_bvecs;
304 }
305
bio_integrity_map_user(struct bio * bio,void __user * ubuf,ssize_t bytes,u32 seed)306 int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
307 u32 seed)
308 {
309 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
310 unsigned int align = blk_lim_dma_alignment_and_pad(&q->limits);
311 struct page *stack_pages[UIO_FASTIOV], **pages = stack_pages;
312 struct bio_vec stack_vec[UIO_FASTIOV], *bvec = stack_vec;
313 unsigned int direction, nr_bvecs;
314 struct iov_iter iter;
315 int ret, nr_vecs;
316 size_t offset;
317 bool copy;
318
319 if (bio_integrity(bio))
320 return -EINVAL;
321 if (bytes >> SECTOR_SHIFT > queue_max_hw_sectors(q))
322 return -E2BIG;
323
324 if (bio_data_dir(bio) == READ)
325 direction = ITER_DEST;
326 else
327 direction = ITER_SOURCE;
328
329 iov_iter_ubuf(&iter, direction, ubuf, bytes);
330 nr_vecs = iov_iter_npages(&iter, BIO_MAX_VECS + 1);
331 if (nr_vecs > BIO_MAX_VECS)
332 return -E2BIG;
333 if (nr_vecs > UIO_FASTIOV) {
334 bvec = kcalloc(nr_vecs, sizeof(*bvec), GFP_KERNEL);
335 if (!bvec)
336 return -ENOMEM;
337 pages = NULL;
338 }
339
340 copy = !iov_iter_is_aligned(&iter, align, align);
341 ret = iov_iter_extract_pages(&iter, &pages, bytes, nr_vecs, 0, &offset);
342 if (unlikely(ret < 0))
343 goto free_bvec;
344
345 nr_bvecs = bvec_from_pages(bvec, pages, nr_vecs, bytes, offset);
346 if (pages != stack_pages)
347 kvfree(pages);
348 if (nr_bvecs > queue_max_integrity_segments(q))
349 copy = true;
350
351 if (copy)
352 ret = bio_integrity_copy_user(bio, bvec, nr_bvecs, bytes,
353 direction, seed);
354 else
355 ret = bio_integrity_init_user(bio, bvec, nr_bvecs, bytes, seed);
356 if (ret)
357 goto release_pages;
358 if (bvec != stack_vec)
359 kfree(bvec);
360
361 return 0;
362
363 release_pages:
364 bio_integrity_unpin_bvec(bvec, nr_bvecs, false);
365 free_bvec:
366 if (bvec != stack_vec)
367 kfree(bvec);
368 return ret;
369 }
370
371 /**
372 * bio_integrity_prep - Prepare bio for integrity I/O
373 * @bio: bio to prepare
374 *
375 * Description: Checks if the bio already has an integrity payload attached.
376 * If it does, the payload has been generated by another kernel subsystem,
377 * and we just pass it through. Otherwise allocates integrity payload.
378 * The bio must have data direction, target device and start sector set priot
379 * to calling. In the WRITE case, integrity metadata will be generated using
380 * the block device's integrity function. In the READ case, the buffer
381 * will be prepared for DMA and a suitable end_io handler set up.
382 */
bio_integrity_prep(struct bio * bio)383 bool bio_integrity_prep(struct bio *bio)
384 {
385 struct bio_integrity_payload *bip;
386 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
387 unsigned int len;
388 void *buf;
389 gfp_t gfp = GFP_NOIO;
390
391 if (!bi)
392 return true;
393
394 if (!bio_sectors(bio))
395 return true;
396
397 /* Already protected? */
398 if (bio_integrity(bio))
399 return true;
400
401 switch (bio_op(bio)) {
402 case REQ_OP_READ:
403 if (bi->flags & BLK_INTEGRITY_NOVERIFY)
404 return true;
405 break;
406 case REQ_OP_WRITE:
407 if (bi->flags & BLK_INTEGRITY_NOGENERATE)
408 return true;
409
410 /*
411 * Zero the memory allocated to not leak uninitialized kernel
412 * memory to disk for non-integrity metadata where nothing else
413 * initializes the memory.
414 */
415 if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
416 gfp |= __GFP_ZERO;
417 break;
418 default:
419 return true;
420 }
421
422 /* Allocate kernel buffer for protection data */
423 len = bio_integrity_bytes(bi, bio_sectors(bio));
424 buf = kmalloc(len, gfp);
425 if (unlikely(buf == NULL)) {
426 goto err_end_io;
427 }
428
429 bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
430 if (IS_ERR(bip)) {
431 kfree(buf);
432 goto err_end_io;
433 }
434
435 bip->bip_flags |= BIP_BLOCK_INTEGRITY;
436 bip_set_seed(bip, bio->bi_iter.bi_sector);
437
438 if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
439 bip->bip_flags |= BIP_IP_CHECKSUM;
440
441 if (bio_integrity_add_page(bio, virt_to_page(buf), len,
442 offset_in_page(buf)) < len) {
443 printk(KERN_ERR "could not attach integrity payload\n");
444 goto err_end_io;
445 }
446
447 /* Auto-generate integrity metadata if this is a write */
448 if (bio_data_dir(bio) == WRITE)
449 blk_integrity_generate(bio);
450 else
451 bip->bio_iter = bio->bi_iter;
452 return true;
453
454 err_end_io:
455 bio->bi_status = BLK_STS_RESOURCE;
456 bio_endio(bio);
457 return false;
458 }
459 EXPORT_SYMBOL(bio_integrity_prep);
460
461 /**
462 * bio_integrity_verify_fn - Integrity I/O completion worker
463 * @work: Work struct stored in bio to be verified
464 *
465 * Description: This workqueue function is called to complete a READ
466 * request. The function verifies the transferred integrity metadata
467 * and then calls the original bio end_io function.
468 */
bio_integrity_verify_fn(struct work_struct * work)469 static void bio_integrity_verify_fn(struct work_struct *work)
470 {
471 struct bio_integrity_payload *bip =
472 container_of(work, struct bio_integrity_payload, bip_work);
473 struct bio *bio = bip->bip_bio;
474
475 blk_integrity_verify(bio);
476
477 kfree(bvec_virt(bip->bip_vec));
478 bio_integrity_free(bio);
479 bio_endio(bio);
480 }
481
482 /**
483 * __bio_integrity_endio - Integrity I/O completion function
484 * @bio: Protected bio
485 *
486 * Description: Completion for integrity I/O
487 *
488 * Normally I/O completion is done in interrupt context. However,
489 * verifying I/O integrity is a time-consuming task which must be run
490 * in process context. This function postpones completion
491 * accordingly.
492 */
__bio_integrity_endio(struct bio * bio)493 bool __bio_integrity_endio(struct bio *bio)
494 {
495 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
496 struct bio_integrity_payload *bip = bio_integrity(bio);
497
498 if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) {
499 INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
500 queue_work(kintegrityd_wq, &bip->bip_work);
501 return false;
502 }
503
504 kfree(bvec_virt(bip->bip_vec));
505 bio_integrity_free(bio);
506 return true;
507 }
508
509 /**
510 * bio_integrity_advance - Advance integrity vector
511 * @bio: bio whose integrity vector to update
512 * @bytes_done: number of data bytes that have been completed
513 *
514 * Description: This function calculates how many integrity bytes the
515 * number of completed data bytes correspond to and advances the
516 * integrity vector accordingly.
517 */
bio_integrity_advance(struct bio * bio,unsigned int bytes_done)518 void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
519 {
520 struct bio_integrity_payload *bip = bio_integrity(bio);
521 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
522 unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
523
524 bip->bip_iter.bi_sector += bio_integrity_intervals(bi, bytes_done >> 9);
525 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
526 }
527
528 /**
529 * bio_integrity_trim - Trim integrity vector
530 * @bio: bio whose integrity vector to update
531 *
532 * Description: Used to trim the integrity vector in a cloned bio.
533 */
bio_integrity_trim(struct bio * bio)534 void bio_integrity_trim(struct bio *bio)
535 {
536 struct bio_integrity_payload *bip = bio_integrity(bio);
537 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
538
539 bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
540 }
541 EXPORT_SYMBOL(bio_integrity_trim);
542
543 /**
544 * bio_integrity_clone - Callback for cloning bios with integrity metadata
545 * @bio: New bio
546 * @bio_src: Original bio
547 * @gfp_mask: Memory allocation mask
548 *
549 * Description: Called to allocate a bip when cloning a bio
550 */
bio_integrity_clone(struct bio * bio,struct bio * bio_src,gfp_t gfp_mask)551 int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
552 gfp_t gfp_mask)
553 {
554 struct bio_integrity_payload *bip_src = bio_integrity(bio_src);
555 struct bio_integrity_payload *bip;
556
557 BUG_ON(bip_src == NULL);
558
559 bip = bio_integrity_alloc(bio, gfp_mask, 0);
560 if (IS_ERR(bip))
561 return PTR_ERR(bip);
562
563 bip->bip_vec = bip_src->bip_vec;
564 bip->bip_iter = bip_src->bip_iter;
565 bip->bip_flags = bip_src->bip_flags & ~BIP_BLOCK_INTEGRITY;
566
567 return 0;
568 }
569
bioset_integrity_create(struct bio_set * bs,int pool_size)570 int bioset_integrity_create(struct bio_set *bs, int pool_size)
571 {
572 if (mempool_initialized(&bs->bio_integrity_pool))
573 return 0;
574
575 if (mempool_init_slab_pool(&bs->bio_integrity_pool,
576 pool_size, bip_slab))
577 return -1;
578
579 if (biovec_init_pool(&bs->bvec_integrity_pool, pool_size)) {
580 mempool_exit(&bs->bio_integrity_pool);
581 return -1;
582 }
583
584 return 0;
585 }
586 EXPORT_SYMBOL(bioset_integrity_create);
587
bioset_integrity_free(struct bio_set * bs)588 void bioset_integrity_free(struct bio_set *bs)
589 {
590 mempool_exit(&bs->bio_integrity_pool);
591 mempool_exit(&bs->bvec_integrity_pool);
592 }
593
bio_integrity_init(void)594 void __init bio_integrity_init(void)
595 {
596 /*
597 * kintegrityd won't block much but may burn a lot of CPU cycles.
598 * Make it highpri CPU intensive wq with max concurrency of 1.
599 */
600 kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM |
601 WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1);
602 if (!kintegrityd_wq)
603 panic("Failed to create kintegrityd\n");
604
605 bip_slab = kmem_cache_create("bio_integrity_payload",
606 sizeof(struct bio_integrity_payload) +
607 sizeof(struct bio_vec) * BIO_INLINE_VECS,
608 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
609 }
610