1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * bio-integrity.c - bio data integrity extensions 4 * 5 * Copyright (C) 2007, 2008, 2009 Oracle Corporation 6 * Written by: Martin K. Petersen <martin.petersen@oracle.com> 7 */ 8 9 #include <linux/blk-integrity.h> 10 #include <linux/t10-pi.h> 11 #include "blk.h" 12 13 struct bio_integrity_alloc { 14 struct bio_integrity_payload bip; 15 struct bio_vec bvecs[]; 16 }; 17 18 static mempool_t integrity_buf_pool; 19 20 static bool bi_offload_capable(struct blk_integrity *bi) 21 { 22 return bi->metadata_size == bi->pi_tuple_size; 23 } 24 25 unsigned int __bio_integrity_action(struct bio *bio) 26 { 27 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); 28 29 if (WARN_ON_ONCE(bio_has_crypt_ctx(bio))) 30 return 0; 31 32 switch (bio_op(bio)) { 33 case REQ_OP_READ: 34 if (bi->flags & BLK_INTEGRITY_NOVERIFY) { 35 if (bi_offload_capable(bi)) 36 return 0; 37 return BI_ACT_BUFFER; 38 } 39 return BI_ACT_BUFFER | BI_ACT_CHECK; 40 case REQ_OP_WRITE: 41 /* 42 * Flush masquerading as write? 43 */ 44 if (!bio_sectors(bio)) 45 return 0; 46 47 /* 48 * Zero the memory allocated to not leak uninitialized kernel 49 * memory to disk for non-integrity metadata where nothing else 50 * initializes the memory. 51 */ 52 if (bi->flags & BLK_INTEGRITY_NOGENERATE) { 53 if (bi_offload_capable(bi)) 54 return 0; 55 return BI_ACT_BUFFER | BI_ACT_ZERO; 56 } 57 58 if (bi->metadata_size > bi->pi_tuple_size) 59 return BI_ACT_BUFFER | BI_ACT_CHECK | BI_ACT_ZERO; 60 return BI_ACT_BUFFER | BI_ACT_CHECK; 61 default: 62 return 0; 63 } 64 } 65 EXPORT_SYMBOL_GPL(__bio_integrity_action); 66 67 void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer) 68 { 69 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); 70 struct bio_integrity_payload *bip = bio_integrity(bio); 71 unsigned int len = bio_integrity_bytes(bi, bio_sectors(bio)); 72 gfp_t gfp = GFP_NOIO | (zero_buffer ? __GFP_ZERO : 0); 73 void *buf; 74 75 buf = kmalloc(len, (gfp & ~__GFP_DIRECT_RECLAIM) | 76 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN); 77 if (unlikely(!buf)) { 78 struct page *page; 79 80 page = mempool_alloc(&integrity_buf_pool, GFP_NOFS); 81 if (zero_buffer) 82 memset(page_address(page), 0, len); 83 bvec_set_page(&bip->bip_vec[0], page, len, 0); 84 bip->bip_flags |= BIP_MEMPOOL; 85 } else { 86 bvec_set_page(&bip->bip_vec[0], virt_to_page(buf), len, 87 offset_in_page(buf)); 88 } 89 90 bip->bip_vcnt = 1; 91 bip->bip_iter.bi_size = len; 92 } 93 94 void bio_integrity_free_buf(struct bio_integrity_payload *bip) 95 { 96 struct bio_vec *bv = &bip->bip_vec[0]; 97 98 if (bip->bip_flags & BIP_MEMPOOL) 99 mempool_free(bv->bv_page, &integrity_buf_pool); 100 else 101 kfree(bvec_virt(bv)); 102 } 103 104 void bio_integrity_setup_default(struct bio *bio) 105 { 106 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); 107 struct bio_integrity_payload *bip = bio_integrity(bio); 108 109 bip_set_seed(bip, bio->bi_iter.bi_sector); 110 111 if (bi->csum_type) { 112 bip->bip_flags |= BIP_CHECK_GUARD; 113 if (bi->csum_type == BLK_INTEGRITY_CSUM_IP) 114 bip->bip_flags |= BIP_IP_CHECKSUM; 115 } 116 if (bi->flags & BLK_INTEGRITY_REF_TAG) 117 bip->bip_flags |= BIP_CHECK_REFTAG; 118 } 119 120 /** 121 * bio_integrity_free - Free bio integrity payload 122 * @bio: bio containing bip to be freed 123 * 124 * Description: Free the integrity portion of a bio. 125 */ 126 void bio_integrity_free(struct bio *bio) 127 { 128 kfree(bio_integrity(bio)); 129 bio->bi_integrity = NULL; 130 bio->bi_opf &= ~REQ_INTEGRITY; 131 } 132 133 void bio_integrity_init(struct bio *bio, struct bio_integrity_payload *bip, 134 struct bio_vec *bvecs, unsigned int nr_vecs) 135 { 136 memset(bip, 0, sizeof(*bip)); 137 bip->bip_max_vcnt = nr_vecs; 138 if (nr_vecs) 139 bip->bip_vec = bvecs; 140 141 bio->bi_integrity = bip; 142 bio->bi_opf |= REQ_INTEGRITY; 143 } 144 145 /** 146 * bio_integrity_alloc - Allocate integrity payload and attach it to bio 147 * @bio: bio to attach integrity metadata to 148 * @gfp_mask: Memory allocation mask 149 * @nr_vecs: Number of integrity metadata scatter-gather elements 150 * 151 * Description: This function prepares a bio for attaching integrity 152 * metadata. nr_vecs specifies the maximum number of pages containing 153 * integrity metadata that can be attached. 154 */ 155 struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, 156 gfp_t gfp_mask, 157 unsigned int nr_vecs) 158 { 159 struct bio_integrity_alloc *bia; 160 161 if (WARN_ON_ONCE(bio_has_crypt_ctx(bio))) 162 return ERR_PTR(-EOPNOTSUPP); 163 164 bia = kmalloc_flex(*bia, bvecs, nr_vecs, gfp_mask); 165 if (unlikely(!bia)) 166 return ERR_PTR(-ENOMEM); 167 bio_integrity_init(bio, &bia->bip, bia->bvecs, nr_vecs); 168 return &bia->bip; 169 } 170 EXPORT_SYMBOL(bio_integrity_alloc); 171 172 static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs) 173 { 174 int i; 175 176 for (i = 0; i < nr_vecs; i++) 177 unpin_user_page(bv[i].bv_page); 178 } 179 180 static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip) 181 { 182 unsigned short orig_nr_vecs = bip->bip_max_vcnt - 1; 183 struct bio_vec *orig_bvecs = &bip->bip_vec[1]; 184 struct bio_vec *bounce_bvec = &bip->bip_vec[0]; 185 size_t bytes = bounce_bvec->bv_len; 186 struct iov_iter orig_iter; 187 int ret; 188 189 iov_iter_bvec(&orig_iter, ITER_DEST, orig_bvecs, orig_nr_vecs, bytes); 190 ret = copy_to_iter(bvec_virt(bounce_bvec), bytes, &orig_iter); 191 WARN_ON_ONCE(ret != bytes); 192 193 bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs); 194 } 195 196 /** 197 * bio_integrity_unmap_user - Unmap user integrity payload 198 * @bio: bio containing bip to be unmapped 199 * 200 * Unmap the user mapped integrity portion of a bio. 201 */ 202 void bio_integrity_unmap_user(struct bio *bio) 203 { 204 struct bio_integrity_payload *bip = bio_integrity(bio); 205 206 if (bip->bip_flags & BIP_COPY_USER) { 207 if (bio_data_dir(bio) == READ) 208 bio_integrity_uncopy_user(bip); 209 kfree(bvec_virt(bip->bip_vec)); 210 return; 211 } 212 213 bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt); 214 } 215 216 /** 217 * bio_integrity_add_page - Attach integrity metadata 218 * @bio: bio to update 219 * @page: page containing integrity metadata 220 * @len: number of bytes of integrity metadata in page 221 * @offset: start offset within page 222 * 223 * Description: Attach a page containing integrity metadata to bio. 224 */ 225 int bio_integrity_add_page(struct bio *bio, struct page *page, 226 unsigned int len, unsigned int offset) 227 { 228 struct request_queue *q = bdev_get_queue(bio->bi_bdev); 229 struct bio_integrity_payload *bip = bio_integrity(bio); 230 231 if (bip->bip_vcnt > 0) { 232 struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1]; 233 234 if (!zone_device_pages_compatible(bv->bv_page, page)) 235 return 0; 236 if (zone_device_pages_have_same_pgmap(bv->bv_page, page) && 237 bvec_try_merge_hw_page(q, bv, page, len, offset)) { 238 bip->bip_iter.bi_size += len; 239 return len; 240 } 241 242 if (bip->bip_vcnt >= 243 min(bip->bip_max_vcnt, queue_max_integrity_segments(q))) 244 return 0; 245 246 /* 247 * If the queue doesn't support SG gaps and adding this segment 248 * would create a gap, disallow it. 249 */ 250 if (bvec_gap_to_prev(&q->limits, bv, offset)) 251 return 0; 252 } 253 254 bvec_set_page(&bip->bip_vec[bip->bip_vcnt], page, len, offset); 255 bip->bip_vcnt++; 256 bip->bip_iter.bi_size += len; 257 258 return len; 259 } 260 EXPORT_SYMBOL(bio_integrity_add_page); 261 262 static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec, 263 int nr_vecs, unsigned int len) 264 { 265 bool write = op_is_write(bio_op(bio)); 266 struct bio_integrity_payload *bip; 267 struct iov_iter iter; 268 void *buf; 269 int ret; 270 271 buf = kmalloc(len, GFP_KERNEL); 272 if (!buf) 273 return -ENOMEM; 274 275 if (write) { 276 iov_iter_bvec(&iter, ITER_SOURCE, bvec, nr_vecs, len); 277 if (!copy_from_iter_full(buf, len, &iter)) { 278 ret = -EFAULT; 279 goto free_buf; 280 } 281 282 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); 283 } else { 284 memset(buf, 0, len); 285 286 /* 287 * We need to preserve the original bvec and the number of vecs 288 * in it for completion handling 289 */ 290 bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs + 1); 291 } 292 293 if (IS_ERR(bip)) { 294 ret = PTR_ERR(bip); 295 goto free_buf; 296 } 297 298 if (write) 299 bio_integrity_unpin_bvec(bvec, nr_vecs); 300 else 301 memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec)); 302 303 ret = bio_integrity_add_page(bio, virt_to_page(buf), len, 304 offset_in_page(buf)); 305 if (ret != len) { 306 ret = -ENOMEM; 307 goto free_bip; 308 } 309 310 bip->bip_flags |= BIP_COPY_USER; 311 return 0; 312 free_bip: 313 bio_integrity_free(bio); 314 free_buf: 315 kfree(buf); 316 return ret; 317 } 318 319 static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec, 320 int nr_vecs, unsigned int len) 321 { 322 struct bio_integrity_payload *bip; 323 324 bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs); 325 if (IS_ERR(bip)) 326 return PTR_ERR(bip); 327 328 memcpy(bip->bip_vec, bvec, nr_vecs * sizeof(*bvec)); 329 bip->bip_iter.bi_size = len; 330 bip->bip_vcnt = nr_vecs; 331 return 0; 332 } 333 334 static unsigned int bvec_from_pages(struct bio_vec *bvec, struct page **pages, 335 int nr_vecs, ssize_t bytes, ssize_t offset, 336 bool *is_p2p) 337 { 338 unsigned int nr_bvecs = 0; 339 int i, j; 340 341 for (i = 0; i < nr_vecs; i = j) { 342 size_t size = min_t(size_t, bytes, PAGE_SIZE - offset); 343 struct folio *folio = page_folio(pages[i]); 344 345 bytes -= size; 346 for (j = i + 1; j < nr_vecs; j++) { 347 size_t next = min_t(size_t, PAGE_SIZE, bytes); 348 349 if (page_folio(pages[j]) != folio || 350 pages[j] != pages[j - 1] + 1) 351 break; 352 unpin_user_page(pages[j]); 353 size += next; 354 bytes -= next; 355 } 356 357 if (is_pci_p2pdma_page(pages[i])) 358 *is_p2p = true; 359 360 bvec_set_page(&bvec[nr_bvecs], pages[i], size, offset); 361 offset = 0; 362 nr_bvecs++; 363 } 364 365 return nr_bvecs; 366 } 367 368 int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter) 369 { 370 struct request_queue *q = bdev_get_queue(bio->bi_bdev); 371 struct page *stack_pages[UIO_FASTIOV], **pages = stack_pages; 372 struct bio_vec stack_vec[UIO_FASTIOV], *bvec = stack_vec; 373 iov_iter_extraction_t extraction_flags = 0; 374 size_t offset, bytes = iter->count; 375 bool copy, is_p2p = false; 376 unsigned int nr_bvecs; 377 int ret, nr_vecs; 378 379 if (bio_integrity(bio)) 380 return -EINVAL; 381 if (bytes >> SECTOR_SHIFT > queue_max_hw_sectors(q)) 382 return -E2BIG; 383 384 nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS + 1); 385 if (nr_vecs > BIO_MAX_VECS) 386 return -E2BIG; 387 if (nr_vecs > UIO_FASTIOV) { 388 bvec = kzalloc_objs(*bvec, nr_vecs); 389 if (!bvec) 390 return -ENOMEM; 391 pages = NULL; 392 } 393 394 copy = iov_iter_alignment(iter) & 395 blk_lim_dma_alignment_and_pad(&q->limits); 396 397 if (blk_queue_pci_p2pdma(q)) 398 extraction_flags |= ITER_ALLOW_P2PDMA; 399 400 ret = iov_iter_extract_pages(iter, &pages, bytes, nr_vecs, 401 extraction_flags, &offset); 402 if (unlikely(ret < 0)) 403 goto free_bvec; 404 405 /* 406 * Handle partial pinning. This can happen when pin_user_pages_fast() 407 * returns fewer pages than requested. 408 */ 409 if (user_backed_iter(iter) && unlikely(ret != bytes)) { 410 if (ret > 0) { 411 int npinned = DIV_ROUND_UP(offset + ret, PAGE_SIZE); 412 int i; 413 414 for (i = 0; i < npinned; i++) 415 unpin_user_page(pages[i]); 416 } 417 if (pages != stack_pages) 418 kvfree(pages); 419 ret = -EFAULT; 420 goto free_bvec; 421 } 422 423 nr_bvecs = bvec_from_pages(bvec, pages, nr_vecs, bytes, offset, 424 &is_p2p); 425 if (pages != stack_pages) 426 kvfree(pages); 427 if (nr_bvecs > queue_max_integrity_segments(q)) 428 copy = true; 429 if (is_p2p) 430 bio->bi_opf |= REQ_NOMERGE; 431 432 if (copy) 433 ret = bio_integrity_copy_user(bio, bvec, nr_bvecs, bytes); 434 else 435 ret = bio_integrity_init_user(bio, bvec, nr_bvecs, bytes); 436 if (ret) 437 goto release_pages; 438 if (bvec != stack_vec) 439 kfree(bvec); 440 441 return 0; 442 443 release_pages: 444 bio_integrity_unpin_bvec(bvec, nr_bvecs); 445 free_bvec: 446 if (bvec != stack_vec) 447 kfree(bvec); 448 return ret; 449 } 450 451 static void bio_uio_meta_to_bip(struct bio *bio, struct uio_meta *meta) 452 { 453 struct bio_integrity_payload *bip = bio_integrity(bio); 454 455 if (meta->flags & IO_INTEGRITY_CHK_GUARD) 456 bip->bip_flags |= BIP_CHECK_GUARD; 457 if (meta->flags & IO_INTEGRITY_CHK_APPTAG) 458 bip->bip_flags |= BIP_CHECK_APPTAG; 459 if (meta->flags & IO_INTEGRITY_CHK_REFTAG) 460 bip->bip_flags |= BIP_CHECK_REFTAG; 461 462 bip->app_tag = meta->app_tag; 463 } 464 465 int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta) 466 { 467 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); 468 unsigned int integrity_bytes; 469 int ret; 470 struct iov_iter it; 471 472 if (!bi) 473 return -EINVAL; 474 /* 475 * original meta iterator can be bigger. 476 * process integrity info corresponding to current data buffer only. 477 */ 478 it = meta->iter; 479 integrity_bytes = bio_integrity_bytes(bi, bio_sectors(bio)); 480 if (it.count < integrity_bytes) 481 return -EINVAL; 482 483 /* should fit into two bytes */ 484 BUILD_BUG_ON(IO_INTEGRITY_VALID_FLAGS >= (1 << 16)); 485 486 if (meta->flags && (meta->flags & ~IO_INTEGRITY_VALID_FLAGS)) 487 return -EINVAL; 488 489 it.count = integrity_bytes; 490 ret = bio_integrity_map_user(bio, &it); 491 if (!ret) { 492 bio_uio_meta_to_bip(bio, meta); 493 bip_set_seed(bio_integrity(bio), meta->seed); 494 iov_iter_advance(&meta->iter, integrity_bytes); 495 meta->seed += bio_integrity_intervals(bi, bio_sectors(bio)); 496 } 497 return ret; 498 } 499 500 /** 501 * bio_integrity_advance - Advance integrity vector 502 * @bio: bio whose integrity vector to update 503 * @bytes_done: number of data bytes that have been completed 504 * 505 * Description: This function calculates how many integrity bytes the 506 * number of completed data bytes correspond to and advances the 507 * integrity vector accordingly. 508 */ 509 void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) 510 { 511 struct bio_integrity_payload *bip = bio_integrity(bio); 512 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); 513 unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); 514 515 bip->bip_iter.bi_sector += bio_integrity_intervals(bi, bytes_done >> 9); 516 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes); 517 } 518 519 /** 520 * bio_integrity_trim - Trim integrity vector 521 * @bio: bio whose integrity vector to update 522 * 523 * Description: Used to trim the integrity vector in a cloned bio. 524 */ 525 void bio_integrity_trim(struct bio *bio) 526 { 527 struct bio_integrity_payload *bip = bio_integrity(bio); 528 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); 529 530 bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio)); 531 } 532 EXPORT_SYMBOL(bio_integrity_trim); 533 534 /** 535 * bio_integrity_clone - Callback for cloning bios with integrity metadata 536 * @bio: New bio 537 * @bio_src: Original bio 538 * @gfp_mask: Memory allocation mask 539 * 540 * Description: Called to allocate a bip when cloning a bio 541 */ 542 int bio_integrity_clone(struct bio *bio, struct bio *bio_src, 543 gfp_t gfp_mask) 544 { 545 struct bio_integrity_payload *bip_src = bio_integrity(bio_src); 546 struct bio_integrity_payload *bip; 547 548 BUG_ON(bip_src == NULL); 549 550 bip = bio_integrity_alloc(bio, gfp_mask, 0); 551 if (IS_ERR(bip)) 552 return PTR_ERR(bip); 553 554 bip->bip_vec = bip_src->bip_vec; 555 bip->bip_iter = bip_src->bip_iter; 556 bip->bip_flags = bip_src->bip_flags & BIP_CLONE_FLAGS; 557 bip->app_tag = bip_src->app_tag; 558 559 return 0; 560 } 561 562 static int __init bio_integrity_initfn(void) 563 { 564 if (mempool_init_page_pool(&integrity_buf_pool, BIO_POOL_SIZE, 565 get_order(BLK_INTEGRITY_MAX_SIZE))) 566 panic("bio: can't create integrity buf pool\n"); 567 return 0; 568 } 569 subsys_initcall(bio_integrity_initfn); 570