1 /* 2 * Copyright (C) 2016-2017 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2016-2017 Milan Broz 4 * Copyright (C) 2016-2017 Mikulas Patocka 5 * 6 * This file is released under the GPL. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/device-mapper.h> 11 #include <linux/dm-io.h> 12 #include <linux/vmalloc.h> 13 #include <linux/sort.h> 14 #include <linux/rbtree.h> 15 #include <linux/delay.h> 16 #include <linux/random.h> 17 #include <crypto/hash.h> 18 #include <crypto/skcipher.h> 19 #include <linux/async_tx.h> 20 #include "dm-bufio.h" 21 22 #define DM_MSG_PREFIX "integrity" 23 24 #define DEFAULT_INTERLEAVE_SECTORS 32768 25 #define DEFAULT_JOURNAL_SIZE_FACTOR 7 26 #define DEFAULT_BUFFER_SECTORS 128 27 #define DEFAULT_JOURNAL_WATERMARK 50 28 #define DEFAULT_SYNC_MSEC 10000 29 #define DEFAULT_MAX_JOURNAL_SECTORS 131072 30 #define MIN_LOG2_INTERLEAVE_SECTORS 3 31 #define MAX_LOG2_INTERLEAVE_SECTORS 31 32 #define METADATA_WORKQUEUE_MAX_ACTIVE 16 33 34 /* 35 * Warning - DEBUG_PRINT prints security-sensitive data to the log, 36 * so it should not be enabled in the official kernel 37 */ 38 //#define DEBUG_PRINT 39 //#define INTERNAL_VERIFY 40 41 /* 42 * On disk structures 43 */ 44 45 #define SB_MAGIC "integrt" 46 #define SB_VERSION 1 47 #define SB_SECTORS 8 48 #define MAX_SECTORS_PER_BLOCK 8 49 50 struct superblock { 51 __u8 magic[8]; 52 __u8 version; 53 __u8 log2_interleave_sectors; 54 __u16 integrity_tag_size; 55 __u32 journal_sections; 56 __u64 provided_data_sectors; /* userspace uses this value */ 57 __u32 flags; 58 __u8 log2_sectors_per_block; 59 }; 60 61 #define SB_FLAG_HAVE_JOURNAL_MAC 0x1 62 63 #define JOURNAL_ENTRY_ROUNDUP 8 64 65 typedef __u64 commit_id_t; 66 #define JOURNAL_MAC_PER_SECTOR 8 67 68 struct journal_entry { 69 union { 70 struct { 71 __u32 sector_lo; 72 __u32 sector_hi; 73 } s; 74 __u64 sector; 75 } u; 76 commit_id_t last_bytes[0]; 77 /* __u8 tag[0]; */ 78 }; 79 80 #define journal_entry_tag(ic, je) ((__u8 *)&(je)->last_bytes[(ic)->sectors_per_block]) 81 82 #if BITS_PER_LONG == 64 83 #define journal_entry_set_sector(je, x) do { smp_wmb(); ACCESS_ONCE((je)->u.sector) = cpu_to_le64(x); } while (0) 84 #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector) 85 #elif defined(CONFIG_LBDAF) 86 #define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); ACCESS_ONCE((je)->u.s.sector_hi) = cpu_to_le32((x) >> 32); } while (0) 87 #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector) 88 #else 89 #define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); ACCESS_ONCE((je)->u.s.sector_hi) = cpu_to_le32(0); } while (0) 90 #define journal_entry_get_sector(je) le32_to_cpu((je)->u.s.sector_lo) 91 #endif 92 #define journal_entry_is_unused(je) ((je)->u.s.sector_hi == cpu_to_le32(-1)) 93 #define journal_entry_set_unused(je) do { ((je)->u.s.sector_hi = cpu_to_le32(-1)); } while (0) 94 #define journal_entry_is_inprogress(je) ((je)->u.s.sector_hi == cpu_to_le32(-2)) 95 #define journal_entry_set_inprogress(je) do { ((je)->u.s.sector_hi = cpu_to_le32(-2)); } while (0) 96 97 #define JOURNAL_BLOCK_SECTORS 8 98 #define JOURNAL_SECTOR_DATA ((1 << SECTOR_SHIFT) - sizeof(commit_id_t)) 99 #define JOURNAL_MAC_SIZE (JOURNAL_MAC_PER_SECTOR * JOURNAL_BLOCK_SECTORS) 100 101 struct journal_sector { 102 __u8 entries[JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR]; 103 __u8 mac[JOURNAL_MAC_PER_SECTOR]; 104 commit_id_t commit_id; 105 }; 106 107 #define MAX_TAG_SIZE (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, last_bytes[MAX_SECTORS_PER_BLOCK])) 108 109 #define METADATA_PADDING_SECTORS 8 110 111 #define N_COMMIT_IDS 4 112 113 static unsigned char prev_commit_seq(unsigned char seq) 114 { 115 return (seq + N_COMMIT_IDS - 1) % N_COMMIT_IDS; 116 } 117 118 static unsigned char next_commit_seq(unsigned char seq) 119 { 120 return (seq + 1) % N_COMMIT_IDS; 121 } 122 123 /* 124 * In-memory structures 125 */ 126 127 struct journal_node { 128 struct rb_node node; 129 sector_t sector; 130 }; 131 132 struct alg_spec { 133 char *alg_string; 134 char *key_string; 135 __u8 *key; 136 unsigned key_size; 137 }; 138 139 struct dm_integrity_c { 140 struct dm_dev *dev; 141 unsigned tag_size; 142 __s8 log2_tag_size; 143 sector_t start; 144 mempool_t *journal_io_mempool; 145 struct dm_io_client *io; 146 struct dm_bufio_client *bufio; 147 struct workqueue_struct *metadata_wq; 148 struct superblock *sb; 149 unsigned journal_pages; 150 struct page_list *journal; 151 struct page_list *journal_io; 152 struct page_list *journal_xor; 153 154 struct crypto_skcipher *journal_crypt; 155 struct scatterlist **journal_scatterlist; 156 struct scatterlist **journal_io_scatterlist; 157 struct skcipher_request **sk_requests; 158 159 struct crypto_shash *journal_mac; 160 161 struct journal_node *journal_tree; 162 struct rb_root journal_tree_root; 163 164 sector_t provided_data_sectors; 165 166 unsigned short journal_entry_size; 167 unsigned char journal_entries_per_sector; 168 unsigned char journal_section_entries; 169 unsigned short journal_section_sectors; 170 unsigned journal_sections; 171 unsigned journal_entries; 172 sector_t device_sectors; 173 unsigned initial_sectors; 174 unsigned metadata_run; 175 __s8 log2_metadata_run; 176 __u8 log2_buffer_sectors; 177 __u8 sectors_per_block; 178 179 unsigned char mode; 180 bool suspending; 181 182 int failed; 183 184 struct crypto_shash *internal_hash; 185 186 /* these variables are locked with endio_wait.lock */ 187 struct rb_root in_progress; 188 wait_queue_head_t endio_wait; 189 struct workqueue_struct *wait_wq; 190 191 unsigned char commit_seq; 192 commit_id_t commit_ids[N_COMMIT_IDS]; 193 194 unsigned committed_section; 195 unsigned n_committed_sections; 196 197 unsigned uncommitted_section; 198 unsigned n_uncommitted_sections; 199 200 unsigned free_section; 201 unsigned char free_section_entry; 202 unsigned free_sectors; 203 204 unsigned free_sectors_threshold; 205 206 struct workqueue_struct *commit_wq; 207 struct work_struct commit_work; 208 209 struct workqueue_struct *writer_wq; 210 struct work_struct writer_work; 211 212 struct bio_list flush_bio_list; 213 214 unsigned long autocommit_jiffies; 215 struct timer_list autocommit_timer; 216 unsigned autocommit_msec; 217 218 wait_queue_head_t copy_to_journal_wait; 219 220 struct completion crypto_backoff; 221 222 bool journal_uptodate; 223 bool just_formatted; 224 225 struct alg_spec internal_hash_alg; 226 struct alg_spec journal_crypt_alg; 227 struct alg_spec journal_mac_alg; 228 }; 229 230 struct dm_integrity_range { 231 sector_t logical_sector; 232 unsigned n_sectors; 233 struct rb_node node; 234 }; 235 236 struct dm_integrity_io { 237 struct work_struct work; 238 239 struct dm_integrity_c *ic; 240 bool write; 241 bool fua; 242 243 struct dm_integrity_range range; 244 245 sector_t metadata_block; 246 unsigned metadata_offset; 247 248 atomic_t in_flight; 249 blk_status_t bi_status; 250 251 struct completion *completion; 252 253 struct gendisk *orig_bi_disk; 254 u8 orig_bi_partno; 255 bio_end_io_t *orig_bi_end_io; 256 struct bio_integrity_payload *orig_bi_integrity; 257 struct bvec_iter orig_bi_iter; 258 }; 259 260 struct journal_completion { 261 struct dm_integrity_c *ic; 262 atomic_t in_flight; 263 struct completion comp; 264 }; 265 266 struct journal_io { 267 struct dm_integrity_range range; 268 struct journal_completion *comp; 269 }; 270 271 static struct kmem_cache *journal_io_cache; 272 273 #define JOURNAL_IO_MEMPOOL 32 274 275 #ifdef DEBUG_PRINT 276 #define DEBUG_print(x, ...) printk(KERN_DEBUG x, ##__VA_ARGS__) 277 static void __DEBUG_bytes(__u8 *bytes, size_t len, const char *msg, ...) 278 { 279 va_list args; 280 va_start(args, msg); 281 vprintk(msg, args); 282 va_end(args); 283 if (len) 284 pr_cont(":"); 285 while (len) { 286 pr_cont(" %02x", *bytes); 287 bytes++; 288 len--; 289 } 290 pr_cont("\n"); 291 } 292 #define DEBUG_bytes(bytes, len, msg, ...) __DEBUG_bytes(bytes, len, KERN_DEBUG msg, ##__VA_ARGS__) 293 #else 294 #define DEBUG_print(x, ...) do { } while (0) 295 #define DEBUG_bytes(bytes, len, msg, ...) do { } while (0) 296 #endif 297 298 /* 299 * DM Integrity profile, protection is performed layer above (dm-crypt) 300 */ 301 static struct blk_integrity_profile dm_integrity_profile = { 302 .name = "DM-DIF-EXT-TAG", 303 .generate_fn = NULL, 304 .verify_fn = NULL, 305 }; 306 307 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map); 308 static void integrity_bio_wait(struct work_struct *w); 309 static void dm_integrity_dtr(struct dm_target *ti); 310 311 static void dm_integrity_io_error(struct dm_integrity_c *ic, const char *msg, int err) 312 { 313 if (!cmpxchg(&ic->failed, 0, err)) 314 DMERR("Error on %s: %d", msg, err); 315 } 316 317 static int dm_integrity_failed(struct dm_integrity_c *ic) 318 { 319 return ACCESS_ONCE(ic->failed); 320 } 321 322 static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i, 323 unsigned j, unsigned char seq) 324 { 325 /* 326 * Xor the number with section and sector, so that if a piece of 327 * journal is written at wrong place, it is detected. 328 */ 329 return ic->commit_ids[seq] ^ cpu_to_le64(((__u64)i << 32) ^ j); 330 } 331 332 static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector, 333 sector_t *area, sector_t *offset) 334 { 335 __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors; 336 337 *area = data_sector >> log2_interleave_sectors; 338 *offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1); 339 } 340 341 #define sector_to_block(ic, n) \ 342 do { \ 343 BUG_ON((n) & (unsigned)((ic)->sectors_per_block - 1)); \ 344 (n) >>= (ic)->sb->log2_sectors_per_block; \ 345 } while (0) 346 347 static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t area, 348 sector_t offset, unsigned *metadata_offset) 349 { 350 __u64 ms; 351 unsigned mo; 352 353 ms = area << ic->sb->log2_interleave_sectors; 354 if (likely(ic->log2_metadata_run >= 0)) 355 ms += area << ic->log2_metadata_run; 356 else 357 ms += area * ic->metadata_run; 358 ms >>= ic->log2_buffer_sectors; 359 360 sector_to_block(ic, offset); 361 362 if (likely(ic->log2_tag_size >= 0)) { 363 ms += offset >> (SECTOR_SHIFT + ic->log2_buffer_sectors - ic->log2_tag_size); 364 mo = (offset << ic->log2_tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1); 365 } else { 366 ms += (__u64)offset * ic->tag_size >> (SECTOR_SHIFT + ic->log2_buffer_sectors); 367 mo = (offset * ic->tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1); 368 } 369 *metadata_offset = mo; 370 return ms; 371 } 372 373 static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector_t offset) 374 { 375 sector_t result; 376 377 result = area << ic->sb->log2_interleave_sectors; 378 if (likely(ic->log2_metadata_run >= 0)) 379 result += (area + 1) << ic->log2_metadata_run; 380 else 381 result += (area + 1) * ic->metadata_run; 382 383 result += (sector_t)ic->initial_sectors + offset; 384 return result; 385 } 386 387 static void wraparound_section(struct dm_integrity_c *ic, unsigned *sec_ptr) 388 { 389 if (unlikely(*sec_ptr >= ic->journal_sections)) 390 *sec_ptr -= ic->journal_sections; 391 } 392 393 static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags) 394 { 395 struct dm_io_request io_req; 396 struct dm_io_region io_loc; 397 398 io_req.bi_op = op; 399 io_req.bi_op_flags = op_flags; 400 io_req.mem.type = DM_IO_KMEM; 401 io_req.mem.ptr.addr = ic->sb; 402 io_req.notify.fn = NULL; 403 io_req.client = ic->io; 404 io_loc.bdev = ic->dev->bdev; 405 io_loc.sector = ic->start; 406 io_loc.count = SB_SECTORS; 407 408 return dm_io(&io_req, 1, &io_loc, NULL); 409 } 410 411 static void access_journal_check(struct dm_integrity_c *ic, unsigned section, unsigned offset, 412 bool e, const char *function) 413 { 414 #if defined(CONFIG_DM_DEBUG) || defined(INTERNAL_VERIFY) 415 unsigned limit = e ? ic->journal_section_entries : ic->journal_section_sectors; 416 417 if (unlikely(section >= ic->journal_sections) || 418 unlikely(offset >= limit)) { 419 printk(KERN_CRIT "%s: invalid access at (%u,%u), limit (%u,%u)\n", 420 function, section, offset, ic->journal_sections, limit); 421 BUG(); 422 } 423 #endif 424 } 425 426 static void page_list_location(struct dm_integrity_c *ic, unsigned section, unsigned offset, 427 unsigned *pl_index, unsigned *pl_offset) 428 { 429 unsigned sector; 430 431 access_journal_check(ic, section, offset, false, "page_list_location"); 432 433 sector = section * ic->journal_section_sectors + offset; 434 435 *pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 436 *pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 437 } 438 439 static struct journal_sector *access_page_list(struct dm_integrity_c *ic, struct page_list *pl, 440 unsigned section, unsigned offset, unsigned *n_sectors) 441 { 442 unsigned pl_index, pl_offset; 443 char *va; 444 445 page_list_location(ic, section, offset, &pl_index, &pl_offset); 446 447 if (n_sectors) 448 *n_sectors = (PAGE_SIZE - pl_offset) >> SECTOR_SHIFT; 449 450 va = lowmem_page_address(pl[pl_index].page); 451 452 return (struct journal_sector *)(va + pl_offset); 453 } 454 455 static struct journal_sector *access_journal(struct dm_integrity_c *ic, unsigned section, unsigned offset) 456 { 457 return access_page_list(ic, ic->journal, section, offset, NULL); 458 } 459 460 static struct journal_entry *access_journal_entry(struct dm_integrity_c *ic, unsigned section, unsigned n) 461 { 462 unsigned rel_sector, offset; 463 struct journal_sector *js; 464 465 access_journal_check(ic, section, n, true, "access_journal_entry"); 466 467 rel_sector = n % JOURNAL_BLOCK_SECTORS; 468 offset = n / JOURNAL_BLOCK_SECTORS; 469 470 js = access_journal(ic, section, rel_sector); 471 return (struct journal_entry *)((char *)js + offset * ic->journal_entry_size); 472 } 473 474 static struct journal_sector *access_journal_data(struct dm_integrity_c *ic, unsigned section, unsigned n) 475 { 476 n <<= ic->sb->log2_sectors_per_block; 477 478 n += JOURNAL_BLOCK_SECTORS; 479 480 access_journal_check(ic, section, n, false, "access_journal_data"); 481 482 return access_journal(ic, section, n); 483 } 484 485 static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result[JOURNAL_MAC_SIZE]) 486 { 487 SHASH_DESC_ON_STACK(desc, ic->journal_mac); 488 int r; 489 unsigned j, size; 490 491 desc->tfm = ic->journal_mac; 492 desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; 493 494 r = crypto_shash_init(desc); 495 if (unlikely(r)) { 496 dm_integrity_io_error(ic, "crypto_shash_init", r); 497 goto err; 498 } 499 500 for (j = 0; j < ic->journal_section_entries; j++) { 501 struct journal_entry *je = access_journal_entry(ic, section, j); 502 r = crypto_shash_update(desc, (__u8 *)&je->u.sector, sizeof je->u.sector); 503 if (unlikely(r)) { 504 dm_integrity_io_error(ic, "crypto_shash_update", r); 505 goto err; 506 } 507 } 508 509 size = crypto_shash_digestsize(ic->journal_mac); 510 511 if (likely(size <= JOURNAL_MAC_SIZE)) { 512 r = crypto_shash_final(desc, result); 513 if (unlikely(r)) { 514 dm_integrity_io_error(ic, "crypto_shash_final", r); 515 goto err; 516 } 517 memset(result + size, 0, JOURNAL_MAC_SIZE - size); 518 } else { 519 __u8 digest[size]; 520 r = crypto_shash_final(desc, digest); 521 if (unlikely(r)) { 522 dm_integrity_io_error(ic, "crypto_shash_final", r); 523 goto err; 524 } 525 memcpy(result, digest, JOURNAL_MAC_SIZE); 526 } 527 528 return; 529 err: 530 memset(result, 0, JOURNAL_MAC_SIZE); 531 } 532 533 static void rw_section_mac(struct dm_integrity_c *ic, unsigned section, bool wr) 534 { 535 __u8 result[JOURNAL_MAC_SIZE]; 536 unsigned j; 537 538 if (!ic->journal_mac) 539 return; 540 541 section_mac(ic, section, result); 542 543 for (j = 0; j < JOURNAL_BLOCK_SECTORS; j++) { 544 struct journal_sector *js = access_journal(ic, section, j); 545 546 if (likely(wr)) 547 memcpy(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR); 548 else { 549 if (memcmp(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) 550 dm_integrity_io_error(ic, "journal mac", -EILSEQ); 551 } 552 } 553 } 554 555 static void complete_journal_op(void *context) 556 { 557 struct journal_completion *comp = context; 558 BUG_ON(!atomic_read(&comp->in_flight)); 559 if (likely(atomic_dec_and_test(&comp->in_flight))) 560 complete(&comp->comp); 561 } 562 563 static void xor_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section, 564 unsigned n_sections, struct journal_completion *comp) 565 { 566 struct async_submit_ctl submit; 567 size_t n_bytes = (size_t)(n_sections * ic->journal_section_sectors) << SECTOR_SHIFT; 568 unsigned pl_index, pl_offset, section_index; 569 struct page_list *source_pl, *target_pl; 570 571 if (likely(encrypt)) { 572 source_pl = ic->journal; 573 target_pl = ic->journal_io; 574 } else { 575 source_pl = ic->journal_io; 576 target_pl = ic->journal; 577 } 578 579 page_list_location(ic, section, 0, &pl_index, &pl_offset); 580 581 atomic_add(roundup(pl_offset + n_bytes, PAGE_SIZE) >> PAGE_SHIFT, &comp->in_flight); 582 583 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, complete_journal_op, comp, NULL); 584 585 section_index = pl_index; 586 587 do { 588 size_t this_step; 589 struct page *src_pages[2]; 590 struct page *dst_page; 591 592 while (unlikely(pl_index == section_index)) { 593 unsigned dummy; 594 if (likely(encrypt)) 595 rw_section_mac(ic, section, true); 596 section++; 597 n_sections--; 598 if (!n_sections) 599 break; 600 page_list_location(ic, section, 0, §ion_index, &dummy); 601 } 602 603 this_step = min(n_bytes, (size_t)PAGE_SIZE - pl_offset); 604 dst_page = target_pl[pl_index].page; 605 src_pages[0] = source_pl[pl_index].page; 606 src_pages[1] = ic->journal_xor[pl_index].page; 607 608 async_xor(dst_page, src_pages, pl_offset, 2, this_step, &submit); 609 610 pl_index++; 611 pl_offset = 0; 612 n_bytes -= this_step; 613 } while (n_bytes); 614 615 BUG_ON(n_sections); 616 617 async_tx_issue_pending_all(); 618 } 619 620 static void complete_journal_encrypt(struct crypto_async_request *req, int err) 621 { 622 struct journal_completion *comp = req->data; 623 if (unlikely(err)) { 624 if (likely(err == -EINPROGRESS)) { 625 complete(&comp->ic->crypto_backoff); 626 return; 627 } 628 dm_integrity_io_error(comp->ic, "asynchronous encrypt", err); 629 } 630 complete_journal_op(comp); 631 } 632 633 static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp) 634 { 635 int r; 636 skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, 637 complete_journal_encrypt, comp); 638 if (likely(encrypt)) 639 r = crypto_skcipher_encrypt(req); 640 else 641 r = crypto_skcipher_decrypt(req); 642 if (likely(!r)) 643 return false; 644 if (likely(r == -EINPROGRESS)) 645 return true; 646 if (likely(r == -EBUSY)) { 647 wait_for_completion(&comp->ic->crypto_backoff); 648 reinit_completion(&comp->ic->crypto_backoff); 649 return true; 650 } 651 dm_integrity_io_error(comp->ic, "encrypt", r); 652 return false; 653 } 654 655 static void crypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section, 656 unsigned n_sections, struct journal_completion *comp) 657 { 658 struct scatterlist **source_sg; 659 struct scatterlist **target_sg; 660 661 atomic_add(2, &comp->in_flight); 662 663 if (likely(encrypt)) { 664 source_sg = ic->journal_scatterlist; 665 target_sg = ic->journal_io_scatterlist; 666 } else { 667 source_sg = ic->journal_io_scatterlist; 668 target_sg = ic->journal_scatterlist; 669 } 670 671 do { 672 struct skcipher_request *req; 673 unsigned ivsize; 674 char *iv; 675 676 if (likely(encrypt)) 677 rw_section_mac(ic, section, true); 678 679 req = ic->sk_requests[section]; 680 ivsize = crypto_skcipher_ivsize(ic->journal_crypt); 681 iv = req->iv; 682 683 memcpy(iv, iv + ivsize, ivsize); 684 685 req->src = source_sg[section]; 686 req->dst = target_sg[section]; 687 688 if (unlikely(do_crypt(encrypt, req, comp))) 689 atomic_inc(&comp->in_flight); 690 691 section++; 692 n_sections--; 693 } while (n_sections); 694 695 atomic_dec(&comp->in_flight); 696 complete_journal_op(comp); 697 } 698 699 static void encrypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section, 700 unsigned n_sections, struct journal_completion *comp) 701 { 702 if (ic->journal_xor) 703 return xor_journal(ic, encrypt, section, n_sections, comp); 704 else 705 return crypt_journal(ic, encrypt, section, n_sections, comp); 706 } 707 708 static void complete_journal_io(unsigned long error, void *context) 709 { 710 struct journal_completion *comp = context; 711 if (unlikely(error != 0)) 712 dm_integrity_io_error(comp->ic, "writing journal", -EIO); 713 complete_journal_op(comp); 714 } 715 716 static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned section, 717 unsigned n_sections, struct journal_completion *comp) 718 { 719 struct dm_io_request io_req; 720 struct dm_io_region io_loc; 721 unsigned sector, n_sectors, pl_index, pl_offset; 722 int r; 723 724 if (unlikely(dm_integrity_failed(ic))) { 725 if (comp) 726 complete_journal_io(-1UL, comp); 727 return; 728 } 729 730 sector = section * ic->journal_section_sectors; 731 n_sectors = n_sections * ic->journal_section_sectors; 732 733 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 734 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 735 736 io_req.bi_op = op; 737 io_req.bi_op_flags = op_flags; 738 io_req.mem.type = DM_IO_PAGE_LIST; 739 if (ic->journal_io) 740 io_req.mem.ptr.pl = &ic->journal_io[pl_index]; 741 else 742 io_req.mem.ptr.pl = &ic->journal[pl_index]; 743 io_req.mem.offset = pl_offset; 744 if (likely(comp != NULL)) { 745 io_req.notify.fn = complete_journal_io; 746 io_req.notify.context = comp; 747 } else { 748 io_req.notify.fn = NULL; 749 } 750 io_req.client = ic->io; 751 io_loc.bdev = ic->dev->bdev; 752 io_loc.sector = ic->start + SB_SECTORS + sector; 753 io_loc.count = n_sectors; 754 755 r = dm_io(&io_req, 1, &io_loc, NULL); 756 if (unlikely(r)) { 757 dm_integrity_io_error(ic, op == REQ_OP_READ ? "reading journal" : "writing journal", r); 758 if (comp) { 759 WARN_ONCE(1, "asynchronous dm_io failed: %d", r); 760 complete_journal_io(-1UL, comp); 761 } 762 } 763 } 764 765 static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsigned commit_sections) 766 { 767 struct journal_completion io_comp; 768 struct journal_completion crypt_comp_1; 769 struct journal_completion crypt_comp_2; 770 unsigned i; 771 772 io_comp.ic = ic; 773 io_comp.comp = COMPLETION_INITIALIZER_ONSTACK(io_comp.comp); 774 775 if (commit_start + commit_sections <= ic->journal_sections) { 776 io_comp.in_flight = (atomic_t)ATOMIC_INIT(1); 777 if (ic->journal_io) { 778 crypt_comp_1.ic = ic; 779 crypt_comp_1.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp_1.comp); 780 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); 781 encrypt_journal(ic, true, commit_start, commit_sections, &crypt_comp_1); 782 wait_for_completion_io(&crypt_comp_1.comp); 783 } else { 784 for (i = 0; i < commit_sections; i++) 785 rw_section_mac(ic, commit_start + i, true); 786 } 787 rw_journal(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, commit_start, 788 commit_sections, &io_comp); 789 } else { 790 unsigned to_end; 791 io_comp.in_flight = (atomic_t)ATOMIC_INIT(2); 792 to_end = ic->journal_sections - commit_start; 793 if (ic->journal_io) { 794 crypt_comp_1.ic = ic; 795 crypt_comp_1.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp_1.comp); 796 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); 797 encrypt_journal(ic, true, commit_start, to_end, &crypt_comp_1); 798 if (try_wait_for_completion(&crypt_comp_1.comp)) { 799 rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp); 800 crypt_comp_1.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp_1.comp); 801 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); 802 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_1); 803 wait_for_completion_io(&crypt_comp_1.comp); 804 } else { 805 crypt_comp_2.ic = ic; 806 crypt_comp_2.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp_2.comp); 807 crypt_comp_2.in_flight = (atomic_t)ATOMIC_INIT(0); 808 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_2); 809 wait_for_completion_io(&crypt_comp_1.comp); 810 rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp); 811 wait_for_completion_io(&crypt_comp_2.comp); 812 } 813 } else { 814 for (i = 0; i < to_end; i++) 815 rw_section_mac(ic, commit_start + i, true); 816 rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp); 817 for (i = 0; i < commit_sections - to_end; i++) 818 rw_section_mac(ic, i, true); 819 } 820 rw_journal(ic, REQ_OP_WRITE, REQ_FUA, 0, commit_sections - to_end, &io_comp); 821 } 822 823 wait_for_completion_io(&io_comp.comp); 824 } 825 826 static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsigned offset, 827 unsigned n_sectors, sector_t target, io_notify_fn fn, void *data) 828 { 829 struct dm_io_request io_req; 830 struct dm_io_region io_loc; 831 int r; 832 unsigned sector, pl_index, pl_offset; 833 834 BUG_ON((target | n_sectors | offset) & (unsigned)(ic->sectors_per_block - 1)); 835 836 if (unlikely(dm_integrity_failed(ic))) { 837 fn(-1UL, data); 838 return; 839 } 840 841 sector = section * ic->journal_section_sectors + JOURNAL_BLOCK_SECTORS + offset; 842 843 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 844 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 845 846 io_req.bi_op = REQ_OP_WRITE; 847 io_req.bi_op_flags = 0; 848 io_req.mem.type = DM_IO_PAGE_LIST; 849 io_req.mem.ptr.pl = &ic->journal[pl_index]; 850 io_req.mem.offset = pl_offset; 851 io_req.notify.fn = fn; 852 io_req.notify.context = data; 853 io_req.client = ic->io; 854 io_loc.bdev = ic->dev->bdev; 855 io_loc.sector = ic->start + target; 856 io_loc.count = n_sectors; 857 858 r = dm_io(&io_req, 1, &io_loc, NULL); 859 if (unlikely(r)) { 860 WARN_ONCE(1, "asynchronous dm_io failed: %d", r); 861 fn(-1UL, data); 862 } 863 } 864 865 static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range) 866 { 867 struct rb_node **n = &ic->in_progress.rb_node; 868 struct rb_node *parent; 869 870 BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned)(ic->sectors_per_block - 1)); 871 872 parent = NULL; 873 874 while (*n) { 875 struct dm_integrity_range *range = container_of(*n, struct dm_integrity_range, node); 876 877 parent = *n; 878 if (new_range->logical_sector + new_range->n_sectors <= range->logical_sector) { 879 n = &range->node.rb_left; 880 } else if (new_range->logical_sector >= range->logical_sector + range->n_sectors) { 881 n = &range->node.rb_right; 882 } else { 883 return false; 884 } 885 } 886 887 rb_link_node(&new_range->node, parent, n); 888 rb_insert_color(&new_range->node, &ic->in_progress); 889 890 return true; 891 } 892 893 static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity_range *range) 894 { 895 rb_erase(&range->node, &ic->in_progress); 896 wake_up_locked(&ic->endio_wait); 897 } 898 899 static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *range) 900 { 901 unsigned long flags; 902 903 spin_lock_irqsave(&ic->endio_wait.lock, flags); 904 remove_range_unlocked(ic, range); 905 spin_unlock_irqrestore(&ic->endio_wait.lock, flags); 906 } 907 908 static void init_journal_node(struct journal_node *node) 909 { 910 RB_CLEAR_NODE(&node->node); 911 node->sector = (sector_t)-1; 912 } 913 914 static void add_journal_node(struct dm_integrity_c *ic, struct journal_node *node, sector_t sector) 915 { 916 struct rb_node **link; 917 struct rb_node *parent; 918 919 node->sector = sector; 920 BUG_ON(!RB_EMPTY_NODE(&node->node)); 921 922 link = &ic->journal_tree_root.rb_node; 923 parent = NULL; 924 925 while (*link) { 926 struct journal_node *j; 927 parent = *link; 928 j = container_of(parent, struct journal_node, node); 929 if (sector < j->sector) 930 link = &j->node.rb_left; 931 else 932 link = &j->node.rb_right; 933 } 934 935 rb_link_node(&node->node, parent, link); 936 rb_insert_color(&node->node, &ic->journal_tree_root); 937 } 938 939 static void remove_journal_node(struct dm_integrity_c *ic, struct journal_node *node) 940 { 941 BUG_ON(RB_EMPTY_NODE(&node->node)); 942 rb_erase(&node->node, &ic->journal_tree_root); 943 init_journal_node(node); 944 } 945 946 #define NOT_FOUND (-1U) 947 948 static unsigned find_journal_node(struct dm_integrity_c *ic, sector_t sector, sector_t *next_sector) 949 { 950 struct rb_node *n = ic->journal_tree_root.rb_node; 951 unsigned found = NOT_FOUND; 952 *next_sector = (sector_t)-1; 953 while (n) { 954 struct journal_node *j = container_of(n, struct journal_node, node); 955 if (sector == j->sector) { 956 found = j - ic->journal_tree; 957 } 958 if (sector < j->sector) { 959 *next_sector = j->sector; 960 n = j->node.rb_left; 961 } else { 962 n = j->node.rb_right; 963 } 964 } 965 966 return found; 967 } 968 969 static bool test_journal_node(struct dm_integrity_c *ic, unsigned pos, sector_t sector) 970 { 971 struct journal_node *node, *next_node; 972 struct rb_node *next; 973 974 if (unlikely(pos >= ic->journal_entries)) 975 return false; 976 node = &ic->journal_tree[pos]; 977 if (unlikely(RB_EMPTY_NODE(&node->node))) 978 return false; 979 if (unlikely(node->sector != sector)) 980 return false; 981 982 next = rb_next(&node->node); 983 if (unlikely(!next)) 984 return true; 985 986 next_node = container_of(next, struct journal_node, node); 987 return next_node->sector != sector; 988 } 989 990 static bool find_newer_committed_node(struct dm_integrity_c *ic, struct journal_node *node) 991 { 992 struct rb_node *next; 993 struct journal_node *next_node; 994 unsigned next_section; 995 996 BUG_ON(RB_EMPTY_NODE(&node->node)); 997 998 next = rb_next(&node->node); 999 if (unlikely(!next)) 1000 return false; 1001 1002 next_node = container_of(next, struct journal_node, node); 1003 1004 if (next_node->sector != node->sector) 1005 return false; 1006 1007 next_section = (unsigned)(next_node - ic->journal_tree) / ic->journal_section_entries; 1008 if (next_section >= ic->committed_section && 1009 next_section < ic->committed_section + ic->n_committed_sections) 1010 return true; 1011 if (next_section + ic->journal_sections < ic->committed_section + ic->n_committed_sections) 1012 return true; 1013 1014 return false; 1015 } 1016 1017 #define TAG_READ 0 1018 #define TAG_WRITE 1 1019 #define TAG_CMP 2 1020 1021 static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block, 1022 unsigned *metadata_offset, unsigned total_size, int op) 1023 { 1024 do { 1025 unsigned char *data, *dp; 1026 struct dm_buffer *b; 1027 unsigned to_copy; 1028 int r; 1029 1030 r = dm_integrity_failed(ic); 1031 if (unlikely(r)) 1032 return r; 1033 1034 data = dm_bufio_read(ic->bufio, *metadata_block, &b); 1035 if (unlikely(IS_ERR(data))) 1036 return PTR_ERR(data); 1037 1038 to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size); 1039 dp = data + *metadata_offset; 1040 if (op == TAG_READ) { 1041 memcpy(tag, dp, to_copy); 1042 } else if (op == TAG_WRITE) { 1043 memcpy(dp, tag, to_copy); 1044 dm_bufio_mark_buffer_dirty(b); 1045 } else { 1046 /* e.g.: op == TAG_CMP */ 1047 if (unlikely(memcmp(dp, tag, to_copy))) { 1048 unsigned i; 1049 1050 for (i = 0; i < to_copy; i++) { 1051 if (dp[i] != tag[i]) 1052 break; 1053 total_size--; 1054 } 1055 dm_bufio_release(b); 1056 return total_size; 1057 } 1058 } 1059 dm_bufio_release(b); 1060 1061 tag += to_copy; 1062 *metadata_offset += to_copy; 1063 if (unlikely(*metadata_offset == 1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) { 1064 (*metadata_block)++; 1065 *metadata_offset = 0; 1066 } 1067 total_size -= to_copy; 1068 } while (unlikely(total_size)); 1069 1070 return 0; 1071 } 1072 1073 static void dm_integrity_flush_buffers(struct dm_integrity_c *ic) 1074 { 1075 int r; 1076 r = dm_bufio_write_dirty_buffers(ic->bufio); 1077 if (unlikely(r)) 1078 dm_integrity_io_error(ic, "writing tags", r); 1079 } 1080 1081 static void sleep_on_endio_wait(struct dm_integrity_c *ic) 1082 { 1083 DECLARE_WAITQUEUE(wait, current); 1084 __add_wait_queue(&ic->endio_wait, &wait); 1085 __set_current_state(TASK_UNINTERRUPTIBLE); 1086 spin_unlock_irq(&ic->endio_wait.lock); 1087 io_schedule(); 1088 spin_lock_irq(&ic->endio_wait.lock); 1089 __remove_wait_queue(&ic->endio_wait, &wait); 1090 } 1091 1092 static void autocommit_fn(unsigned long data) 1093 { 1094 struct dm_integrity_c *ic = (struct dm_integrity_c *)data; 1095 1096 if (likely(!dm_integrity_failed(ic))) 1097 queue_work(ic->commit_wq, &ic->commit_work); 1098 } 1099 1100 static void schedule_autocommit(struct dm_integrity_c *ic) 1101 { 1102 if (!timer_pending(&ic->autocommit_timer)) 1103 mod_timer(&ic->autocommit_timer, jiffies + ic->autocommit_jiffies); 1104 } 1105 1106 static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio) 1107 { 1108 struct bio *bio; 1109 unsigned long flags; 1110 1111 spin_lock_irqsave(&ic->endio_wait.lock, flags); 1112 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1113 bio_list_add(&ic->flush_bio_list, bio); 1114 spin_unlock_irqrestore(&ic->endio_wait.lock, flags); 1115 1116 queue_work(ic->commit_wq, &ic->commit_work); 1117 } 1118 1119 static void do_endio(struct dm_integrity_c *ic, struct bio *bio) 1120 { 1121 int r = dm_integrity_failed(ic); 1122 if (unlikely(r) && !bio->bi_status) 1123 bio->bi_status = errno_to_blk_status(r); 1124 bio_endio(bio); 1125 } 1126 1127 static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *dio) 1128 { 1129 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1130 1131 if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic))) 1132 submit_flush_bio(ic, dio); 1133 else 1134 do_endio(ic, bio); 1135 } 1136 1137 static void dec_in_flight(struct dm_integrity_io *dio) 1138 { 1139 if (atomic_dec_and_test(&dio->in_flight)) { 1140 struct dm_integrity_c *ic = dio->ic; 1141 struct bio *bio; 1142 1143 remove_range(ic, &dio->range); 1144 1145 if (unlikely(dio->write)) 1146 schedule_autocommit(ic); 1147 1148 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1149 1150 if (unlikely(dio->bi_status) && !bio->bi_status) 1151 bio->bi_status = dio->bi_status; 1152 if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) { 1153 dio->range.logical_sector += dio->range.n_sectors; 1154 bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT); 1155 INIT_WORK(&dio->work, integrity_bio_wait); 1156 queue_work(ic->wait_wq, &dio->work); 1157 return; 1158 } 1159 do_endio_flush(ic, dio); 1160 } 1161 } 1162 1163 static void integrity_end_io(struct bio *bio) 1164 { 1165 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 1166 1167 bio->bi_iter = dio->orig_bi_iter; 1168 bio->bi_disk = dio->orig_bi_disk; 1169 bio->bi_partno = dio->orig_bi_partno; 1170 if (dio->orig_bi_integrity) { 1171 bio->bi_integrity = dio->orig_bi_integrity; 1172 bio->bi_opf |= REQ_INTEGRITY; 1173 } 1174 bio->bi_end_io = dio->orig_bi_end_io; 1175 1176 if (dio->completion) 1177 complete(dio->completion); 1178 1179 dec_in_flight(dio); 1180 } 1181 1182 static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector, 1183 const char *data, char *result) 1184 { 1185 __u64 sector_le = cpu_to_le64(sector); 1186 SHASH_DESC_ON_STACK(req, ic->internal_hash); 1187 int r; 1188 unsigned digest_size; 1189 1190 req->tfm = ic->internal_hash; 1191 req->flags = 0; 1192 1193 r = crypto_shash_init(req); 1194 if (unlikely(r < 0)) { 1195 dm_integrity_io_error(ic, "crypto_shash_init", r); 1196 goto failed; 1197 } 1198 1199 r = crypto_shash_update(req, (const __u8 *)§or_le, sizeof sector_le); 1200 if (unlikely(r < 0)) { 1201 dm_integrity_io_error(ic, "crypto_shash_update", r); 1202 goto failed; 1203 } 1204 1205 r = crypto_shash_update(req, data, ic->sectors_per_block << SECTOR_SHIFT); 1206 if (unlikely(r < 0)) { 1207 dm_integrity_io_error(ic, "crypto_shash_update", r); 1208 goto failed; 1209 } 1210 1211 r = crypto_shash_final(req, result); 1212 if (unlikely(r < 0)) { 1213 dm_integrity_io_error(ic, "crypto_shash_final", r); 1214 goto failed; 1215 } 1216 1217 digest_size = crypto_shash_digestsize(ic->internal_hash); 1218 if (unlikely(digest_size < ic->tag_size)) 1219 memset(result + digest_size, 0, ic->tag_size - digest_size); 1220 1221 return; 1222 1223 failed: 1224 /* this shouldn't happen anyway, the hash functions have no reason to fail */ 1225 get_random_bytes(result, ic->tag_size); 1226 } 1227 1228 static void integrity_metadata(struct work_struct *w) 1229 { 1230 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); 1231 struct dm_integrity_c *ic = dio->ic; 1232 1233 int r; 1234 1235 if (ic->internal_hash) { 1236 struct bvec_iter iter; 1237 struct bio_vec bv; 1238 unsigned digest_size = crypto_shash_digestsize(ic->internal_hash); 1239 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1240 char *checksums; 1241 unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0; 1242 char checksums_onstack[ic->tag_size + extra_space]; 1243 unsigned sectors_to_process = dio->range.n_sectors; 1244 sector_t sector = dio->range.logical_sector; 1245 1246 if (unlikely(ic->mode == 'R')) 1247 goto skip_io; 1248 1249 checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space, 1250 GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN); 1251 if (!checksums) 1252 checksums = checksums_onstack; 1253 1254 __bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) { 1255 unsigned pos; 1256 char *mem, *checksums_ptr; 1257 1258 again: 1259 mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset; 1260 pos = 0; 1261 checksums_ptr = checksums; 1262 do { 1263 integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr); 1264 checksums_ptr += ic->tag_size; 1265 sectors_to_process -= ic->sectors_per_block; 1266 pos += ic->sectors_per_block << SECTOR_SHIFT; 1267 sector += ic->sectors_per_block; 1268 } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack); 1269 kunmap_atomic(mem); 1270 1271 r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, 1272 checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE); 1273 if (unlikely(r)) { 1274 if (r > 0) { 1275 DMERR("Checksum failed at sector 0x%llx", 1276 (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size))); 1277 r = -EILSEQ; 1278 } 1279 if (likely(checksums != checksums_onstack)) 1280 kfree(checksums); 1281 goto error; 1282 } 1283 1284 if (!sectors_to_process) 1285 break; 1286 1287 if (unlikely(pos < bv.bv_len)) { 1288 bv.bv_offset += pos; 1289 bv.bv_len -= pos; 1290 goto again; 1291 } 1292 } 1293 1294 if (likely(checksums != checksums_onstack)) 1295 kfree(checksums); 1296 } else { 1297 struct bio_integrity_payload *bip = dio->orig_bi_integrity; 1298 1299 if (bip) { 1300 struct bio_vec biv; 1301 struct bvec_iter iter; 1302 unsigned data_to_process = dio->range.n_sectors; 1303 sector_to_block(ic, data_to_process); 1304 data_to_process *= ic->tag_size; 1305 1306 bip_for_each_vec(biv, bip, iter) { 1307 unsigned char *tag; 1308 unsigned this_len; 1309 1310 BUG_ON(PageHighMem(biv.bv_page)); 1311 tag = lowmem_page_address(biv.bv_page) + biv.bv_offset; 1312 this_len = min(biv.bv_len, data_to_process); 1313 r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset, 1314 this_len, !dio->write ? TAG_READ : TAG_WRITE); 1315 if (unlikely(r)) 1316 goto error; 1317 data_to_process -= this_len; 1318 if (!data_to_process) 1319 break; 1320 } 1321 } 1322 } 1323 skip_io: 1324 dec_in_flight(dio); 1325 return; 1326 error: 1327 dio->bi_status = errno_to_blk_status(r); 1328 dec_in_flight(dio); 1329 } 1330 1331 static int dm_integrity_map(struct dm_target *ti, struct bio *bio) 1332 { 1333 struct dm_integrity_c *ic = ti->private; 1334 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 1335 struct bio_integrity_payload *bip; 1336 1337 sector_t area, offset; 1338 1339 dio->ic = ic; 1340 dio->bi_status = 0; 1341 1342 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { 1343 submit_flush_bio(ic, dio); 1344 return DM_MAPIO_SUBMITTED; 1345 } 1346 1347 dio->range.logical_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); 1348 dio->write = bio_op(bio) == REQ_OP_WRITE; 1349 dio->fua = dio->write && bio->bi_opf & REQ_FUA; 1350 if (unlikely(dio->fua)) { 1351 /* 1352 * Don't pass down the FUA flag because we have to flush 1353 * disk cache anyway. 1354 */ 1355 bio->bi_opf &= ~REQ_FUA; 1356 } 1357 if (unlikely(dio->range.logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) { 1358 DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx", 1359 (unsigned long long)dio->range.logical_sector, bio_sectors(bio), 1360 (unsigned long long)ic->provided_data_sectors); 1361 return DM_MAPIO_KILL; 1362 } 1363 if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) { 1364 DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x", 1365 ic->sectors_per_block, 1366 (unsigned long long)dio->range.logical_sector, bio_sectors(bio)); 1367 return DM_MAPIO_KILL; 1368 } 1369 1370 if (ic->sectors_per_block > 1) { 1371 struct bvec_iter iter; 1372 struct bio_vec bv; 1373 bio_for_each_segment(bv, bio, iter) { 1374 if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) { 1375 DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary", 1376 bv.bv_offset, bv.bv_len, ic->sectors_per_block); 1377 return DM_MAPIO_KILL; 1378 } 1379 } 1380 } 1381 1382 bip = bio_integrity(bio); 1383 if (!ic->internal_hash) { 1384 if (bip) { 1385 unsigned wanted_tag_size = bio_sectors(bio) >> ic->sb->log2_sectors_per_block; 1386 if (ic->log2_tag_size >= 0) 1387 wanted_tag_size <<= ic->log2_tag_size; 1388 else 1389 wanted_tag_size *= ic->tag_size; 1390 if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) { 1391 DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size); 1392 return DM_MAPIO_KILL; 1393 } 1394 } 1395 } else { 1396 if (unlikely(bip != NULL)) { 1397 DMERR("Unexpected integrity data when using internal hash"); 1398 return DM_MAPIO_KILL; 1399 } 1400 } 1401 1402 if (unlikely(ic->mode == 'R') && unlikely(dio->write)) 1403 return DM_MAPIO_KILL; 1404 1405 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); 1406 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset); 1407 bio->bi_iter.bi_sector = get_data_sector(ic, area, offset); 1408 1409 dm_integrity_map_continue(dio, true); 1410 return DM_MAPIO_SUBMITTED; 1411 } 1412 1413 static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio, 1414 unsigned journal_section, unsigned journal_entry) 1415 { 1416 struct dm_integrity_c *ic = dio->ic; 1417 sector_t logical_sector; 1418 unsigned n_sectors; 1419 1420 logical_sector = dio->range.logical_sector; 1421 n_sectors = dio->range.n_sectors; 1422 do { 1423 struct bio_vec bv = bio_iovec(bio); 1424 char *mem; 1425 1426 if (unlikely(bv.bv_len >> SECTOR_SHIFT > n_sectors)) 1427 bv.bv_len = n_sectors << SECTOR_SHIFT; 1428 n_sectors -= bv.bv_len >> SECTOR_SHIFT; 1429 bio_advance_iter(bio, &bio->bi_iter, bv.bv_len); 1430 retry_kmap: 1431 mem = kmap_atomic(bv.bv_page); 1432 if (likely(dio->write)) 1433 flush_dcache_page(bv.bv_page); 1434 1435 do { 1436 struct journal_entry *je = access_journal_entry(ic, journal_section, journal_entry); 1437 1438 if (unlikely(!dio->write)) { 1439 struct journal_sector *js; 1440 char *mem_ptr; 1441 unsigned s; 1442 1443 if (unlikely(journal_entry_is_inprogress(je))) { 1444 flush_dcache_page(bv.bv_page); 1445 kunmap_atomic(mem); 1446 1447 __io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je)); 1448 goto retry_kmap; 1449 } 1450 smp_rmb(); 1451 BUG_ON(journal_entry_get_sector(je) != logical_sector); 1452 js = access_journal_data(ic, journal_section, journal_entry); 1453 mem_ptr = mem + bv.bv_offset; 1454 s = 0; 1455 do { 1456 memcpy(mem_ptr, js, JOURNAL_SECTOR_DATA); 1457 *(commit_id_t *)(mem_ptr + JOURNAL_SECTOR_DATA) = je->last_bytes[s]; 1458 js++; 1459 mem_ptr += 1 << SECTOR_SHIFT; 1460 } while (++s < ic->sectors_per_block); 1461 #ifdef INTERNAL_VERIFY 1462 if (ic->internal_hash) { 1463 char checksums_onstack[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)]; 1464 1465 integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack); 1466 if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) { 1467 DMERR("Checksum failed when reading from journal, at sector 0x%llx", 1468 (unsigned long long)logical_sector); 1469 } 1470 } 1471 #endif 1472 } 1473 1474 if (!ic->internal_hash) { 1475 struct bio_integrity_payload *bip = bio_integrity(bio); 1476 unsigned tag_todo = ic->tag_size; 1477 char *tag_ptr = journal_entry_tag(ic, je); 1478 1479 if (bip) do { 1480 struct bio_vec biv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); 1481 unsigned tag_now = min(biv.bv_len, tag_todo); 1482 char *tag_addr; 1483 BUG_ON(PageHighMem(biv.bv_page)); 1484 tag_addr = lowmem_page_address(biv.bv_page) + biv.bv_offset; 1485 if (likely(dio->write)) 1486 memcpy(tag_ptr, tag_addr, tag_now); 1487 else 1488 memcpy(tag_addr, tag_ptr, tag_now); 1489 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, tag_now); 1490 tag_ptr += tag_now; 1491 tag_todo -= tag_now; 1492 } while (unlikely(tag_todo)); else { 1493 if (likely(dio->write)) 1494 memset(tag_ptr, 0, tag_todo); 1495 } 1496 } 1497 1498 if (likely(dio->write)) { 1499 struct journal_sector *js; 1500 unsigned s; 1501 1502 js = access_journal_data(ic, journal_section, journal_entry); 1503 memcpy(js, mem + bv.bv_offset, ic->sectors_per_block << SECTOR_SHIFT); 1504 1505 s = 0; 1506 do { 1507 je->last_bytes[s] = js[s].commit_id; 1508 } while (++s < ic->sectors_per_block); 1509 1510 if (ic->internal_hash) { 1511 unsigned digest_size = crypto_shash_digestsize(ic->internal_hash); 1512 if (unlikely(digest_size > ic->tag_size)) { 1513 char checksums_onstack[digest_size]; 1514 integrity_sector_checksum(ic, logical_sector, (char *)js, checksums_onstack); 1515 memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size); 1516 } else 1517 integrity_sector_checksum(ic, logical_sector, (char *)js, journal_entry_tag(ic, je)); 1518 } 1519 1520 journal_entry_set_sector(je, logical_sector); 1521 } 1522 logical_sector += ic->sectors_per_block; 1523 1524 journal_entry++; 1525 if (unlikely(journal_entry == ic->journal_section_entries)) { 1526 journal_entry = 0; 1527 journal_section++; 1528 wraparound_section(ic, &journal_section); 1529 } 1530 1531 bv.bv_offset += ic->sectors_per_block << SECTOR_SHIFT; 1532 } while (bv.bv_len -= ic->sectors_per_block << SECTOR_SHIFT); 1533 1534 if (unlikely(!dio->write)) 1535 flush_dcache_page(bv.bv_page); 1536 kunmap_atomic(mem); 1537 } while (n_sectors); 1538 1539 if (likely(dio->write)) { 1540 smp_mb(); 1541 if (unlikely(waitqueue_active(&ic->copy_to_journal_wait))) 1542 wake_up(&ic->copy_to_journal_wait); 1543 if (ACCESS_ONCE(ic->free_sectors) <= ic->free_sectors_threshold) { 1544 queue_work(ic->commit_wq, &ic->commit_work); 1545 } else { 1546 schedule_autocommit(ic); 1547 } 1548 } else { 1549 remove_range(ic, &dio->range); 1550 } 1551 1552 if (unlikely(bio->bi_iter.bi_size)) { 1553 sector_t area, offset; 1554 1555 dio->range.logical_sector = logical_sector; 1556 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); 1557 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset); 1558 return true; 1559 } 1560 1561 return false; 1562 } 1563 1564 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map) 1565 { 1566 struct dm_integrity_c *ic = dio->ic; 1567 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1568 unsigned journal_section, journal_entry; 1569 unsigned journal_read_pos; 1570 struct completion read_comp; 1571 bool need_sync_io = ic->internal_hash && !dio->write; 1572 1573 if (need_sync_io && from_map) { 1574 INIT_WORK(&dio->work, integrity_bio_wait); 1575 queue_work(ic->metadata_wq, &dio->work); 1576 return; 1577 } 1578 1579 lock_retry: 1580 spin_lock_irq(&ic->endio_wait.lock); 1581 retry: 1582 if (unlikely(dm_integrity_failed(ic))) { 1583 spin_unlock_irq(&ic->endio_wait.lock); 1584 do_endio(ic, bio); 1585 return; 1586 } 1587 dio->range.n_sectors = bio_sectors(bio); 1588 journal_read_pos = NOT_FOUND; 1589 if (likely(ic->mode == 'J')) { 1590 if (dio->write) { 1591 unsigned next_entry, i, pos; 1592 unsigned ws, we, range_sectors; 1593 1594 dio->range.n_sectors = min(dio->range.n_sectors, 1595 ic->free_sectors << ic->sb->log2_sectors_per_block); 1596 if (unlikely(!dio->range.n_sectors)) 1597 goto sleep; 1598 range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block; 1599 ic->free_sectors -= range_sectors; 1600 journal_section = ic->free_section; 1601 journal_entry = ic->free_section_entry; 1602 1603 next_entry = ic->free_section_entry + range_sectors; 1604 ic->free_section_entry = next_entry % ic->journal_section_entries; 1605 ic->free_section += next_entry / ic->journal_section_entries; 1606 ic->n_uncommitted_sections += next_entry / ic->journal_section_entries; 1607 wraparound_section(ic, &ic->free_section); 1608 1609 pos = journal_section * ic->journal_section_entries + journal_entry; 1610 ws = journal_section; 1611 we = journal_entry; 1612 i = 0; 1613 do { 1614 struct journal_entry *je; 1615 1616 add_journal_node(ic, &ic->journal_tree[pos], dio->range.logical_sector + i); 1617 pos++; 1618 if (unlikely(pos >= ic->journal_entries)) 1619 pos = 0; 1620 1621 je = access_journal_entry(ic, ws, we); 1622 BUG_ON(!journal_entry_is_unused(je)); 1623 journal_entry_set_inprogress(je); 1624 we++; 1625 if (unlikely(we == ic->journal_section_entries)) { 1626 we = 0; 1627 ws++; 1628 wraparound_section(ic, &ws); 1629 } 1630 } while ((i += ic->sectors_per_block) < dio->range.n_sectors); 1631 1632 spin_unlock_irq(&ic->endio_wait.lock); 1633 goto journal_read_write; 1634 } else { 1635 sector_t next_sector; 1636 journal_read_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector); 1637 if (likely(journal_read_pos == NOT_FOUND)) { 1638 if (unlikely(dio->range.n_sectors > next_sector - dio->range.logical_sector)) 1639 dio->range.n_sectors = next_sector - dio->range.logical_sector; 1640 } else { 1641 unsigned i; 1642 unsigned jp = journal_read_pos + 1; 1643 for (i = ic->sectors_per_block; i < dio->range.n_sectors; i += ic->sectors_per_block, jp++) { 1644 if (!test_journal_node(ic, jp, dio->range.logical_sector + i)) 1645 break; 1646 } 1647 dio->range.n_sectors = i; 1648 } 1649 } 1650 } 1651 if (unlikely(!add_new_range(ic, &dio->range))) { 1652 /* 1653 * We must not sleep in the request routine because it could 1654 * stall bios on current->bio_list. 1655 * So, we offload the bio to a workqueue if we have to sleep. 1656 */ 1657 sleep: 1658 if (from_map) { 1659 spin_unlock_irq(&ic->endio_wait.lock); 1660 INIT_WORK(&dio->work, integrity_bio_wait); 1661 queue_work(ic->wait_wq, &dio->work); 1662 return; 1663 } else { 1664 sleep_on_endio_wait(ic); 1665 goto retry; 1666 } 1667 } 1668 spin_unlock_irq(&ic->endio_wait.lock); 1669 1670 if (unlikely(journal_read_pos != NOT_FOUND)) { 1671 journal_section = journal_read_pos / ic->journal_section_entries; 1672 journal_entry = journal_read_pos % ic->journal_section_entries; 1673 goto journal_read_write; 1674 } 1675 1676 dio->in_flight = (atomic_t)ATOMIC_INIT(2); 1677 1678 if (need_sync_io) { 1679 read_comp = COMPLETION_INITIALIZER_ONSTACK(read_comp); 1680 dio->completion = &read_comp; 1681 } else 1682 dio->completion = NULL; 1683 1684 dio->orig_bi_iter = bio->bi_iter; 1685 1686 dio->orig_bi_disk = bio->bi_disk; 1687 dio->orig_bi_partno = bio->bi_partno; 1688 bio_set_dev(bio, ic->dev->bdev); 1689 1690 dio->orig_bi_integrity = bio_integrity(bio); 1691 bio->bi_integrity = NULL; 1692 bio->bi_opf &= ~REQ_INTEGRITY; 1693 1694 dio->orig_bi_end_io = bio->bi_end_io; 1695 bio->bi_end_io = integrity_end_io; 1696 1697 bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT; 1698 bio->bi_iter.bi_sector += ic->start; 1699 generic_make_request(bio); 1700 1701 if (need_sync_io) { 1702 wait_for_completion_io(&read_comp); 1703 integrity_metadata(&dio->work); 1704 } else { 1705 INIT_WORK(&dio->work, integrity_metadata); 1706 queue_work(ic->metadata_wq, &dio->work); 1707 } 1708 1709 return; 1710 1711 journal_read_write: 1712 if (unlikely(__journal_read_write(dio, bio, journal_section, journal_entry))) 1713 goto lock_retry; 1714 1715 do_endio_flush(ic, dio); 1716 } 1717 1718 1719 static void integrity_bio_wait(struct work_struct *w) 1720 { 1721 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); 1722 1723 dm_integrity_map_continue(dio, false); 1724 } 1725 1726 static void pad_uncommitted(struct dm_integrity_c *ic) 1727 { 1728 if (ic->free_section_entry) { 1729 ic->free_sectors -= ic->journal_section_entries - ic->free_section_entry; 1730 ic->free_section_entry = 0; 1731 ic->free_section++; 1732 wraparound_section(ic, &ic->free_section); 1733 ic->n_uncommitted_sections++; 1734 } 1735 WARN_ON(ic->journal_sections * ic->journal_section_entries != 1736 (ic->n_uncommitted_sections + ic->n_committed_sections) * ic->journal_section_entries + ic->free_sectors); 1737 } 1738 1739 static void integrity_commit(struct work_struct *w) 1740 { 1741 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, commit_work); 1742 unsigned commit_start, commit_sections; 1743 unsigned i, j, n; 1744 struct bio *flushes; 1745 1746 del_timer(&ic->autocommit_timer); 1747 1748 spin_lock_irq(&ic->endio_wait.lock); 1749 flushes = bio_list_get(&ic->flush_bio_list); 1750 if (unlikely(ic->mode != 'J')) { 1751 spin_unlock_irq(&ic->endio_wait.lock); 1752 dm_integrity_flush_buffers(ic); 1753 goto release_flush_bios; 1754 } 1755 1756 pad_uncommitted(ic); 1757 commit_start = ic->uncommitted_section; 1758 commit_sections = ic->n_uncommitted_sections; 1759 spin_unlock_irq(&ic->endio_wait.lock); 1760 1761 if (!commit_sections) 1762 goto release_flush_bios; 1763 1764 i = commit_start; 1765 for (n = 0; n < commit_sections; n++) { 1766 for (j = 0; j < ic->journal_section_entries; j++) { 1767 struct journal_entry *je; 1768 je = access_journal_entry(ic, i, j); 1769 io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je)); 1770 } 1771 for (j = 0; j < ic->journal_section_sectors; j++) { 1772 struct journal_sector *js; 1773 js = access_journal(ic, i, j); 1774 js->commit_id = dm_integrity_commit_id(ic, i, j, ic->commit_seq); 1775 } 1776 i++; 1777 if (unlikely(i >= ic->journal_sections)) 1778 ic->commit_seq = next_commit_seq(ic->commit_seq); 1779 wraparound_section(ic, &i); 1780 } 1781 smp_rmb(); 1782 1783 write_journal(ic, commit_start, commit_sections); 1784 1785 spin_lock_irq(&ic->endio_wait.lock); 1786 ic->uncommitted_section += commit_sections; 1787 wraparound_section(ic, &ic->uncommitted_section); 1788 ic->n_uncommitted_sections -= commit_sections; 1789 ic->n_committed_sections += commit_sections; 1790 spin_unlock_irq(&ic->endio_wait.lock); 1791 1792 if (ACCESS_ONCE(ic->free_sectors) <= ic->free_sectors_threshold) 1793 queue_work(ic->writer_wq, &ic->writer_work); 1794 1795 release_flush_bios: 1796 while (flushes) { 1797 struct bio *next = flushes->bi_next; 1798 flushes->bi_next = NULL; 1799 do_endio(ic, flushes); 1800 flushes = next; 1801 } 1802 } 1803 1804 static void complete_copy_from_journal(unsigned long error, void *context) 1805 { 1806 struct journal_io *io = context; 1807 struct journal_completion *comp = io->comp; 1808 struct dm_integrity_c *ic = comp->ic; 1809 remove_range(ic, &io->range); 1810 mempool_free(io, ic->journal_io_mempool); 1811 if (unlikely(error != 0)) 1812 dm_integrity_io_error(ic, "copying from journal", -EIO); 1813 complete_journal_op(comp); 1814 } 1815 1816 static void restore_last_bytes(struct dm_integrity_c *ic, struct journal_sector *js, 1817 struct journal_entry *je) 1818 { 1819 unsigned s = 0; 1820 do { 1821 js->commit_id = je->last_bytes[s]; 1822 js++; 1823 } while (++s < ic->sectors_per_block); 1824 } 1825 1826 static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, 1827 unsigned write_sections, bool from_replay) 1828 { 1829 unsigned i, j, n; 1830 struct journal_completion comp; 1831 struct blk_plug plug; 1832 1833 blk_start_plug(&plug); 1834 1835 comp.ic = ic; 1836 comp.in_flight = (atomic_t)ATOMIC_INIT(1); 1837 comp.comp = COMPLETION_INITIALIZER_ONSTACK(comp.comp); 1838 1839 i = write_start; 1840 for (n = 0; n < write_sections; n++, i++, wraparound_section(ic, &i)) { 1841 #ifndef INTERNAL_VERIFY 1842 if (unlikely(from_replay)) 1843 #endif 1844 rw_section_mac(ic, i, false); 1845 for (j = 0; j < ic->journal_section_entries; j++) { 1846 struct journal_entry *je = access_journal_entry(ic, i, j); 1847 sector_t sec, area, offset; 1848 unsigned k, l, next_loop; 1849 sector_t metadata_block; 1850 unsigned metadata_offset; 1851 struct journal_io *io; 1852 1853 if (journal_entry_is_unused(je)) 1854 continue; 1855 BUG_ON(unlikely(journal_entry_is_inprogress(je)) && !from_replay); 1856 sec = journal_entry_get_sector(je); 1857 if (unlikely(from_replay)) { 1858 if (unlikely(sec & (unsigned)(ic->sectors_per_block - 1))) { 1859 dm_integrity_io_error(ic, "invalid sector in journal", -EIO); 1860 sec &= ~(sector_t)(ic->sectors_per_block - 1); 1861 } 1862 } 1863 get_area_and_offset(ic, sec, &area, &offset); 1864 restore_last_bytes(ic, access_journal_data(ic, i, j), je); 1865 for (k = j + 1; k < ic->journal_section_entries; k++) { 1866 struct journal_entry *je2 = access_journal_entry(ic, i, k); 1867 sector_t sec2, area2, offset2; 1868 if (journal_entry_is_unused(je2)) 1869 break; 1870 BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay); 1871 sec2 = journal_entry_get_sector(je2); 1872 get_area_and_offset(ic, sec2, &area2, &offset2); 1873 if (area2 != area || offset2 != offset + ((k - j) << ic->sb->log2_sectors_per_block)) 1874 break; 1875 restore_last_bytes(ic, access_journal_data(ic, i, k), je2); 1876 } 1877 next_loop = k - 1; 1878 1879 io = mempool_alloc(ic->journal_io_mempool, GFP_NOIO); 1880 io->comp = ∁ 1881 io->range.logical_sector = sec; 1882 io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block; 1883 1884 spin_lock_irq(&ic->endio_wait.lock); 1885 while (unlikely(!add_new_range(ic, &io->range))) 1886 sleep_on_endio_wait(ic); 1887 1888 if (likely(!from_replay)) { 1889 struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries]; 1890 1891 /* don't write if there is newer committed sector */ 1892 while (j < k && find_newer_committed_node(ic, §ion_node[j])) { 1893 struct journal_entry *je2 = access_journal_entry(ic, i, j); 1894 1895 journal_entry_set_unused(je2); 1896 remove_journal_node(ic, §ion_node[j]); 1897 j++; 1898 sec += ic->sectors_per_block; 1899 offset += ic->sectors_per_block; 1900 } 1901 while (j < k && find_newer_committed_node(ic, §ion_node[k - 1])) { 1902 struct journal_entry *je2 = access_journal_entry(ic, i, k - 1); 1903 1904 journal_entry_set_unused(je2); 1905 remove_journal_node(ic, §ion_node[k - 1]); 1906 k--; 1907 } 1908 if (j == k) { 1909 remove_range_unlocked(ic, &io->range); 1910 spin_unlock_irq(&ic->endio_wait.lock); 1911 mempool_free(io, ic->journal_io_mempool); 1912 goto skip_io; 1913 } 1914 for (l = j; l < k; l++) { 1915 remove_journal_node(ic, §ion_node[l]); 1916 } 1917 } 1918 spin_unlock_irq(&ic->endio_wait.lock); 1919 1920 metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset); 1921 for (l = j; l < k; l++) { 1922 int r; 1923 struct journal_entry *je2 = access_journal_entry(ic, i, l); 1924 1925 if ( 1926 #ifndef INTERNAL_VERIFY 1927 unlikely(from_replay) && 1928 #endif 1929 ic->internal_hash) { 1930 char test_tag[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)]; 1931 1932 integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block), 1933 (char *)access_journal_data(ic, i, l), test_tag); 1934 if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) 1935 dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ); 1936 } 1937 1938 journal_entry_set_unused(je2); 1939 r = dm_integrity_rw_tag(ic, journal_entry_tag(ic, je2), &metadata_block, &metadata_offset, 1940 ic->tag_size, TAG_WRITE); 1941 if (unlikely(r)) { 1942 dm_integrity_io_error(ic, "reading tags", r); 1943 } 1944 } 1945 1946 atomic_inc(&comp.in_flight); 1947 copy_from_journal(ic, i, j << ic->sb->log2_sectors_per_block, 1948 (k - j) << ic->sb->log2_sectors_per_block, 1949 get_data_sector(ic, area, offset), 1950 complete_copy_from_journal, io); 1951 skip_io: 1952 j = next_loop; 1953 } 1954 } 1955 1956 dm_bufio_write_dirty_buffers_async(ic->bufio); 1957 1958 blk_finish_plug(&plug); 1959 1960 complete_journal_op(&comp); 1961 wait_for_completion_io(&comp.comp); 1962 1963 dm_integrity_flush_buffers(ic); 1964 } 1965 1966 static void integrity_writer(struct work_struct *w) 1967 { 1968 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, writer_work); 1969 unsigned write_start, write_sections; 1970 1971 unsigned prev_free_sectors; 1972 1973 /* the following test is not needed, but it tests the replay code */ 1974 if (ACCESS_ONCE(ic->suspending)) 1975 return; 1976 1977 spin_lock_irq(&ic->endio_wait.lock); 1978 write_start = ic->committed_section; 1979 write_sections = ic->n_committed_sections; 1980 spin_unlock_irq(&ic->endio_wait.lock); 1981 1982 if (!write_sections) 1983 return; 1984 1985 do_journal_write(ic, write_start, write_sections, false); 1986 1987 spin_lock_irq(&ic->endio_wait.lock); 1988 1989 ic->committed_section += write_sections; 1990 wraparound_section(ic, &ic->committed_section); 1991 ic->n_committed_sections -= write_sections; 1992 1993 prev_free_sectors = ic->free_sectors; 1994 ic->free_sectors += write_sections * ic->journal_section_entries; 1995 if (unlikely(!prev_free_sectors)) 1996 wake_up_locked(&ic->endio_wait); 1997 1998 spin_unlock_irq(&ic->endio_wait.lock); 1999 } 2000 2001 static void init_journal(struct dm_integrity_c *ic, unsigned start_section, 2002 unsigned n_sections, unsigned char commit_seq) 2003 { 2004 unsigned i, j, n; 2005 2006 if (!n_sections) 2007 return; 2008 2009 for (n = 0; n < n_sections; n++) { 2010 i = start_section + n; 2011 wraparound_section(ic, &i); 2012 for (j = 0; j < ic->journal_section_sectors; j++) { 2013 struct journal_sector *js = access_journal(ic, i, j); 2014 memset(&js->entries, 0, JOURNAL_SECTOR_DATA); 2015 js->commit_id = dm_integrity_commit_id(ic, i, j, commit_seq); 2016 } 2017 for (j = 0; j < ic->journal_section_entries; j++) { 2018 struct journal_entry *je = access_journal_entry(ic, i, j); 2019 journal_entry_set_unused(je); 2020 } 2021 } 2022 2023 write_journal(ic, start_section, n_sections); 2024 } 2025 2026 static int find_commit_seq(struct dm_integrity_c *ic, unsigned i, unsigned j, commit_id_t id) 2027 { 2028 unsigned char k; 2029 for (k = 0; k < N_COMMIT_IDS; k++) { 2030 if (dm_integrity_commit_id(ic, i, j, k) == id) 2031 return k; 2032 } 2033 dm_integrity_io_error(ic, "journal commit id", -EIO); 2034 return -EIO; 2035 } 2036 2037 static void replay_journal(struct dm_integrity_c *ic) 2038 { 2039 unsigned i, j; 2040 bool used_commit_ids[N_COMMIT_IDS]; 2041 unsigned max_commit_id_sections[N_COMMIT_IDS]; 2042 unsigned write_start, write_sections; 2043 unsigned continue_section; 2044 bool journal_empty; 2045 unsigned char unused, last_used, want_commit_seq; 2046 2047 if (ic->mode == 'R') 2048 return; 2049 2050 if (ic->journal_uptodate) 2051 return; 2052 2053 last_used = 0; 2054 write_start = 0; 2055 2056 if (!ic->just_formatted) { 2057 DEBUG_print("reading journal\n"); 2058 rw_journal(ic, REQ_OP_READ, 0, 0, ic->journal_sections, NULL); 2059 if (ic->journal_io) 2060 DEBUG_bytes(lowmem_page_address(ic->journal_io[0].page), 64, "read journal"); 2061 if (ic->journal_io) { 2062 struct journal_completion crypt_comp; 2063 crypt_comp.ic = ic; 2064 crypt_comp.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp.comp); 2065 crypt_comp.in_flight = (atomic_t)ATOMIC_INIT(0); 2066 encrypt_journal(ic, false, 0, ic->journal_sections, &crypt_comp); 2067 wait_for_completion(&crypt_comp.comp); 2068 } 2069 DEBUG_bytes(lowmem_page_address(ic->journal[0].page), 64, "decrypted journal"); 2070 } 2071 2072 if (dm_integrity_failed(ic)) 2073 goto clear_journal; 2074 2075 journal_empty = true; 2076 memset(used_commit_ids, 0, sizeof used_commit_ids); 2077 memset(max_commit_id_sections, 0, sizeof max_commit_id_sections); 2078 for (i = 0; i < ic->journal_sections; i++) { 2079 for (j = 0; j < ic->journal_section_sectors; j++) { 2080 int k; 2081 struct journal_sector *js = access_journal(ic, i, j); 2082 k = find_commit_seq(ic, i, j, js->commit_id); 2083 if (k < 0) 2084 goto clear_journal; 2085 used_commit_ids[k] = true; 2086 max_commit_id_sections[k] = i; 2087 } 2088 if (journal_empty) { 2089 for (j = 0; j < ic->journal_section_entries; j++) { 2090 struct journal_entry *je = access_journal_entry(ic, i, j); 2091 if (!journal_entry_is_unused(je)) { 2092 journal_empty = false; 2093 break; 2094 } 2095 } 2096 } 2097 } 2098 2099 if (!used_commit_ids[N_COMMIT_IDS - 1]) { 2100 unused = N_COMMIT_IDS - 1; 2101 while (unused && !used_commit_ids[unused - 1]) 2102 unused--; 2103 } else { 2104 for (unused = 0; unused < N_COMMIT_IDS; unused++) 2105 if (!used_commit_ids[unused]) 2106 break; 2107 if (unused == N_COMMIT_IDS) { 2108 dm_integrity_io_error(ic, "journal commit ids", -EIO); 2109 goto clear_journal; 2110 } 2111 } 2112 DEBUG_print("first unused commit seq %d [%d,%d,%d,%d]\n", 2113 unused, used_commit_ids[0], used_commit_ids[1], 2114 used_commit_ids[2], used_commit_ids[3]); 2115 2116 last_used = prev_commit_seq(unused); 2117 want_commit_seq = prev_commit_seq(last_used); 2118 2119 if (!used_commit_ids[want_commit_seq] && used_commit_ids[prev_commit_seq(want_commit_seq)]) 2120 journal_empty = true; 2121 2122 write_start = max_commit_id_sections[last_used] + 1; 2123 if (unlikely(write_start >= ic->journal_sections)) 2124 want_commit_seq = next_commit_seq(want_commit_seq); 2125 wraparound_section(ic, &write_start); 2126 2127 i = write_start; 2128 for (write_sections = 0; write_sections < ic->journal_sections; write_sections++) { 2129 for (j = 0; j < ic->journal_section_sectors; j++) { 2130 struct journal_sector *js = access_journal(ic, i, j); 2131 2132 if (js->commit_id != dm_integrity_commit_id(ic, i, j, want_commit_seq)) { 2133 /* 2134 * This could be caused by crash during writing. 2135 * We won't replay the inconsistent part of the 2136 * journal. 2137 */ 2138 DEBUG_print("commit id mismatch at position (%u, %u): %d != %d\n", 2139 i, j, find_commit_seq(ic, i, j, js->commit_id), want_commit_seq); 2140 goto brk; 2141 } 2142 } 2143 i++; 2144 if (unlikely(i >= ic->journal_sections)) 2145 want_commit_seq = next_commit_seq(want_commit_seq); 2146 wraparound_section(ic, &i); 2147 } 2148 brk: 2149 2150 if (!journal_empty) { 2151 DEBUG_print("replaying %u sections, starting at %u, commit seq %d\n", 2152 write_sections, write_start, want_commit_seq); 2153 do_journal_write(ic, write_start, write_sections, true); 2154 } 2155 2156 if (write_sections == ic->journal_sections && (ic->mode == 'J' || journal_empty)) { 2157 continue_section = write_start; 2158 ic->commit_seq = want_commit_seq; 2159 DEBUG_print("continuing from section %u, commit seq %d\n", write_start, ic->commit_seq); 2160 } else { 2161 unsigned s; 2162 unsigned char erase_seq; 2163 clear_journal: 2164 DEBUG_print("clearing journal\n"); 2165 2166 erase_seq = prev_commit_seq(prev_commit_seq(last_used)); 2167 s = write_start; 2168 init_journal(ic, s, 1, erase_seq); 2169 s++; 2170 wraparound_section(ic, &s); 2171 if (ic->journal_sections >= 2) { 2172 init_journal(ic, s, ic->journal_sections - 2, erase_seq); 2173 s += ic->journal_sections - 2; 2174 wraparound_section(ic, &s); 2175 init_journal(ic, s, 1, erase_seq); 2176 } 2177 2178 continue_section = 0; 2179 ic->commit_seq = next_commit_seq(erase_seq); 2180 } 2181 2182 ic->committed_section = continue_section; 2183 ic->n_committed_sections = 0; 2184 2185 ic->uncommitted_section = continue_section; 2186 ic->n_uncommitted_sections = 0; 2187 2188 ic->free_section = continue_section; 2189 ic->free_section_entry = 0; 2190 ic->free_sectors = ic->journal_entries; 2191 2192 ic->journal_tree_root = RB_ROOT; 2193 for (i = 0; i < ic->journal_entries; i++) 2194 init_journal_node(&ic->journal_tree[i]); 2195 } 2196 2197 static void dm_integrity_postsuspend(struct dm_target *ti) 2198 { 2199 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private; 2200 2201 del_timer_sync(&ic->autocommit_timer); 2202 2203 ic->suspending = true; 2204 2205 queue_work(ic->commit_wq, &ic->commit_work); 2206 drain_workqueue(ic->commit_wq); 2207 2208 if (ic->mode == 'J') { 2209 drain_workqueue(ic->writer_wq); 2210 dm_integrity_flush_buffers(ic); 2211 } 2212 2213 ic->suspending = false; 2214 2215 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); 2216 2217 ic->journal_uptodate = true; 2218 } 2219 2220 static void dm_integrity_resume(struct dm_target *ti) 2221 { 2222 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private; 2223 2224 replay_journal(ic); 2225 } 2226 2227 static void dm_integrity_status(struct dm_target *ti, status_type_t type, 2228 unsigned status_flags, char *result, unsigned maxlen) 2229 { 2230 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private; 2231 unsigned arg_count; 2232 size_t sz = 0; 2233 2234 switch (type) { 2235 case STATUSTYPE_INFO: 2236 result[0] = '\0'; 2237 break; 2238 2239 case STATUSTYPE_TABLE: { 2240 __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100; 2241 watermark_percentage += ic->journal_entries / 2; 2242 do_div(watermark_percentage, ic->journal_entries); 2243 arg_count = 5; 2244 arg_count += ic->sectors_per_block != 1; 2245 arg_count += !!ic->internal_hash_alg.alg_string; 2246 arg_count += !!ic->journal_crypt_alg.alg_string; 2247 arg_count += !!ic->journal_mac_alg.alg_string; 2248 DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start, 2249 ic->tag_size, ic->mode, arg_count); 2250 DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); 2251 DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); 2252 DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); 2253 DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage); 2254 DMEMIT(" commit_time:%u", ic->autocommit_msec); 2255 if (ic->sectors_per_block != 1) 2256 DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT); 2257 2258 #define EMIT_ALG(a, n) \ 2259 do { \ 2260 if (ic->a.alg_string) { \ 2261 DMEMIT(" %s:%s", n, ic->a.alg_string); \ 2262 if (ic->a.key_string) \ 2263 DMEMIT(":%s", ic->a.key_string);\ 2264 } \ 2265 } while (0) 2266 EMIT_ALG(internal_hash_alg, "internal_hash"); 2267 EMIT_ALG(journal_crypt_alg, "journal_crypt"); 2268 EMIT_ALG(journal_mac_alg, "journal_mac"); 2269 break; 2270 } 2271 } 2272 } 2273 2274 static int dm_integrity_iterate_devices(struct dm_target *ti, 2275 iterate_devices_callout_fn fn, void *data) 2276 { 2277 struct dm_integrity_c *ic = ti->private; 2278 2279 return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data); 2280 } 2281 2282 static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits) 2283 { 2284 struct dm_integrity_c *ic = ti->private; 2285 2286 if (ic->sectors_per_block > 1) { 2287 limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT; 2288 limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT; 2289 blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT); 2290 } 2291 } 2292 2293 static void calculate_journal_section_size(struct dm_integrity_c *ic) 2294 { 2295 unsigned sector_space = JOURNAL_SECTOR_DATA; 2296 2297 ic->journal_sections = le32_to_cpu(ic->sb->journal_sections); 2298 ic->journal_entry_size = roundup(offsetof(struct journal_entry, last_bytes[ic->sectors_per_block]) + ic->tag_size, 2299 JOURNAL_ENTRY_ROUNDUP); 2300 2301 if (ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) 2302 sector_space -= JOURNAL_MAC_PER_SECTOR; 2303 ic->journal_entries_per_sector = sector_space / ic->journal_entry_size; 2304 ic->journal_section_entries = ic->journal_entries_per_sector * JOURNAL_BLOCK_SECTORS; 2305 ic->journal_section_sectors = (ic->journal_section_entries << ic->sb->log2_sectors_per_block) + JOURNAL_BLOCK_SECTORS; 2306 ic->journal_entries = ic->journal_section_entries * ic->journal_sections; 2307 } 2308 2309 static int calculate_device_limits(struct dm_integrity_c *ic) 2310 { 2311 __u64 initial_sectors; 2312 sector_t last_sector, last_area, last_offset; 2313 2314 calculate_journal_section_size(ic); 2315 initial_sectors = SB_SECTORS + (__u64)ic->journal_section_sectors * ic->journal_sections; 2316 if (initial_sectors + METADATA_PADDING_SECTORS >= ic->device_sectors || initial_sectors > UINT_MAX) 2317 return -EINVAL; 2318 ic->initial_sectors = initial_sectors; 2319 2320 ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block), 2321 (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT; 2322 if (!(ic->metadata_run & (ic->metadata_run - 1))) 2323 ic->log2_metadata_run = __ffs(ic->metadata_run); 2324 else 2325 ic->log2_metadata_run = -1; 2326 2327 get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset); 2328 last_sector = get_data_sector(ic, last_area, last_offset); 2329 2330 if (ic->start + last_sector < last_sector || ic->start + last_sector >= ic->device_sectors) 2331 return -EINVAL; 2332 2333 return 0; 2334 } 2335 2336 static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sectors, unsigned interleave_sectors) 2337 { 2338 unsigned journal_sections; 2339 int test_bit; 2340 2341 memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT); 2342 memcpy(ic->sb->magic, SB_MAGIC, 8); 2343 ic->sb->version = SB_VERSION; 2344 ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size); 2345 ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block); 2346 if (ic->journal_mac_alg.alg_string) 2347 ic->sb->flags |= cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC); 2348 2349 calculate_journal_section_size(ic); 2350 journal_sections = journal_sectors / ic->journal_section_sectors; 2351 if (!journal_sections) 2352 journal_sections = 1; 2353 ic->sb->journal_sections = cpu_to_le32(journal_sections); 2354 2355 if (!interleave_sectors) 2356 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; 2357 ic->sb->log2_interleave_sectors = __fls(interleave_sectors); 2358 ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); 2359 ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); 2360 2361 ic->provided_data_sectors = 0; 2362 for (test_bit = fls64(ic->device_sectors) - 1; test_bit >= 3; test_bit--) { 2363 __u64 prev_data_sectors = ic->provided_data_sectors; 2364 2365 ic->provided_data_sectors |= (sector_t)1 << test_bit; 2366 if (calculate_device_limits(ic)) 2367 ic->provided_data_sectors = prev_data_sectors; 2368 } 2369 2370 if (!ic->provided_data_sectors) 2371 return -EINVAL; 2372 2373 ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors); 2374 2375 return 0; 2376 } 2377 2378 static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic) 2379 { 2380 struct gendisk *disk = dm_disk(dm_table_get_md(ti->table)); 2381 struct blk_integrity bi; 2382 2383 memset(&bi, 0, sizeof(bi)); 2384 bi.profile = &dm_integrity_profile; 2385 bi.tuple_size = ic->tag_size; 2386 bi.tag_size = bi.tuple_size; 2387 bi.interval_exp = ic->sb->log2_sectors_per_block + SECTOR_SHIFT; 2388 2389 blk_integrity_register(disk, &bi); 2390 blk_queue_max_integrity_segments(disk->queue, UINT_MAX); 2391 } 2392 2393 static void dm_integrity_free_page_list(struct dm_integrity_c *ic, struct page_list *pl) 2394 { 2395 unsigned i; 2396 2397 if (!pl) 2398 return; 2399 for (i = 0; i < ic->journal_pages; i++) 2400 if (pl[i].page) 2401 __free_page(pl[i].page); 2402 kvfree(pl); 2403 } 2404 2405 static struct page_list *dm_integrity_alloc_page_list(struct dm_integrity_c *ic) 2406 { 2407 size_t page_list_desc_size = ic->journal_pages * sizeof(struct page_list); 2408 struct page_list *pl; 2409 unsigned i; 2410 2411 pl = kvmalloc(page_list_desc_size, GFP_KERNEL | __GFP_ZERO); 2412 if (!pl) 2413 return NULL; 2414 2415 for (i = 0; i < ic->journal_pages; i++) { 2416 pl[i].page = alloc_page(GFP_KERNEL); 2417 if (!pl[i].page) { 2418 dm_integrity_free_page_list(ic, pl); 2419 return NULL; 2420 } 2421 if (i) 2422 pl[i - 1].next = &pl[i]; 2423 } 2424 2425 return pl; 2426 } 2427 2428 static void dm_integrity_free_journal_scatterlist(struct dm_integrity_c *ic, struct scatterlist **sl) 2429 { 2430 unsigned i; 2431 for (i = 0; i < ic->journal_sections; i++) 2432 kvfree(sl[i]); 2433 kfree(sl); 2434 } 2435 2436 static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic, struct page_list *pl) 2437 { 2438 struct scatterlist **sl; 2439 unsigned i; 2440 2441 sl = kvmalloc(ic->journal_sections * sizeof(struct scatterlist *), GFP_KERNEL | __GFP_ZERO); 2442 if (!sl) 2443 return NULL; 2444 2445 for (i = 0; i < ic->journal_sections; i++) { 2446 struct scatterlist *s; 2447 unsigned start_index, start_offset; 2448 unsigned end_index, end_offset; 2449 unsigned n_pages; 2450 unsigned idx; 2451 2452 page_list_location(ic, i, 0, &start_index, &start_offset); 2453 page_list_location(ic, i, ic->journal_section_sectors - 1, &end_index, &end_offset); 2454 2455 n_pages = (end_index - start_index + 1); 2456 2457 s = kvmalloc(n_pages * sizeof(struct scatterlist), GFP_KERNEL); 2458 if (!s) { 2459 dm_integrity_free_journal_scatterlist(ic, sl); 2460 return NULL; 2461 } 2462 2463 sg_init_table(s, n_pages); 2464 for (idx = start_index; idx <= end_index; idx++) { 2465 char *va = lowmem_page_address(pl[idx].page); 2466 unsigned start = 0, end = PAGE_SIZE; 2467 if (idx == start_index) 2468 start = start_offset; 2469 if (idx == end_index) 2470 end = end_offset + (1 << SECTOR_SHIFT); 2471 sg_set_buf(&s[idx - start_index], va + start, end - start); 2472 } 2473 2474 sl[i] = s; 2475 } 2476 2477 return sl; 2478 } 2479 2480 static void free_alg(struct alg_spec *a) 2481 { 2482 kzfree(a->alg_string); 2483 kzfree(a->key); 2484 memset(a, 0, sizeof *a); 2485 } 2486 2487 static int get_alg_and_key(const char *arg, struct alg_spec *a, char **error, char *error_inval) 2488 { 2489 char *k; 2490 2491 free_alg(a); 2492 2493 a->alg_string = kstrdup(strchr(arg, ':') + 1, GFP_KERNEL); 2494 if (!a->alg_string) 2495 goto nomem; 2496 2497 k = strchr(a->alg_string, ':'); 2498 if (k) { 2499 *k = 0; 2500 a->key_string = k + 1; 2501 if (strlen(a->key_string) & 1) 2502 goto inval; 2503 2504 a->key_size = strlen(a->key_string) / 2; 2505 a->key = kmalloc(a->key_size, GFP_KERNEL); 2506 if (!a->key) 2507 goto nomem; 2508 if (hex2bin(a->key, a->key_string, a->key_size)) 2509 goto inval; 2510 } 2511 2512 return 0; 2513 inval: 2514 *error = error_inval; 2515 return -EINVAL; 2516 nomem: 2517 *error = "Out of memory for an argument"; 2518 return -ENOMEM; 2519 } 2520 2521 static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error, 2522 char *error_alg, char *error_key) 2523 { 2524 int r; 2525 2526 if (a->alg_string) { 2527 *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ASYNC); 2528 if (IS_ERR(*hash)) { 2529 *error = error_alg; 2530 r = PTR_ERR(*hash); 2531 *hash = NULL; 2532 return r; 2533 } 2534 2535 if (a->key) { 2536 r = crypto_shash_setkey(*hash, a->key, a->key_size); 2537 if (r) { 2538 *error = error_key; 2539 return r; 2540 } 2541 } 2542 } 2543 2544 return 0; 2545 } 2546 2547 static int create_journal(struct dm_integrity_c *ic, char **error) 2548 { 2549 int r = 0; 2550 unsigned i; 2551 __u64 journal_pages, journal_desc_size, journal_tree_size; 2552 unsigned char *crypt_data = NULL; 2553 2554 ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL); 2555 ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL); 2556 ic->commit_ids[2] = cpu_to_le64(0x3333333333333333ULL); 2557 ic->commit_ids[3] = cpu_to_le64(0x4444444444444444ULL); 2558 2559 journal_pages = roundup((__u64)ic->journal_sections * ic->journal_section_sectors, 2560 PAGE_SIZE >> SECTOR_SHIFT) >> (PAGE_SHIFT - SECTOR_SHIFT); 2561 journal_desc_size = journal_pages * sizeof(struct page_list); 2562 if (journal_pages >= totalram_pages - totalhigh_pages || journal_desc_size > ULONG_MAX) { 2563 *error = "Journal doesn't fit into memory"; 2564 r = -ENOMEM; 2565 goto bad; 2566 } 2567 ic->journal_pages = journal_pages; 2568 2569 ic->journal = dm_integrity_alloc_page_list(ic); 2570 if (!ic->journal) { 2571 *error = "Could not allocate memory for journal"; 2572 r = -ENOMEM; 2573 goto bad; 2574 } 2575 if (ic->journal_crypt_alg.alg_string) { 2576 unsigned ivsize, blocksize; 2577 struct journal_completion comp; 2578 2579 comp.ic = ic; 2580 ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, 0); 2581 if (IS_ERR(ic->journal_crypt)) { 2582 *error = "Invalid journal cipher"; 2583 r = PTR_ERR(ic->journal_crypt); 2584 ic->journal_crypt = NULL; 2585 goto bad; 2586 } 2587 ivsize = crypto_skcipher_ivsize(ic->journal_crypt); 2588 blocksize = crypto_skcipher_blocksize(ic->journal_crypt); 2589 2590 if (ic->journal_crypt_alg.key) { 2591 r = crypto_skcipher_setkey(ic->journal_crypt, ic->journal_crypt_alg.key, 2592 ic->journal_crypt_alg.key_size); 2593 if (r) { 2594 *error = "Error setting encryption key"; 2595 goto bad; 2596 } 2597 } 2598 DEBUG_print("cipher %s, block size %u iv size %u\n", 2599 ic->journal_crypt_alg.alg_string, blocksize, ivsize); 2600 2601 ic->journal_io = dm_integrity_alloc_page_list(ic); 2602 if (!ic->journal_io) { 2603 *error = "Could not allocate memory for journal io"; 2604 r = -ENOMEM; 2605 goto bad; 2606 } 2607 2608 if (blocksize == 1) { 2609 struct scatterlist *sg; 2610 SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt); 2611 unsigned char iv[ivsize]; 2612 skcipher_request_set_tfm(req, ic->journal_crypt); 2613 2614 ic->journal_xor = dm_integrity_alloc_page_list(ic); 2615 if (!ic->journal_xor) { 2616 *error = "Could not allocate memory for journal xor"; 2617 r = -ENOMEM; 2618 goto bad; 2619 } 2620 2621 sg = kvmalloc((ic->journal_pages + 1) * sizeof(struct scatterlist), GFP_KERNEL); 2622 if (!sg) { 2623 *error = "Unable to allocate sg list"; 2624 r = -ENOMEM; 2625 goto bad; 2626 } 2627 sg_init_table(sg, ic->journal_pages + 1); 2628 for (i = 0; i < ic->journal_pages; i++) { 2629 char *va = lowmem_page_address(ic->journal_xor[i].page); 2630 clear_page(va); 2631 sg_set_buf(&sg[i], va, PAGE_SIZE); 2632 } 2633 sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids); 2634 memset(iv, 0x00, ivsize); 2635 2636 skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, iv); 2637 comp.comp = COMPLETION_INITIALIZER_ONSTACK(comp.comp); 2638 comp.in_flight = (atomic_t)ATOMIC_INIT(1); 2639 if (do_crypt(true, req, &comp)) 2640 wait_for_completion(&comp.comp); 2641 kvfree(sg); 2642 r = dm_integrity_failed(ic); 2643 if (r) { 2644 *error = "Unable to encrypt journal"; 2645 goto bad; 2646 } 2647 DEBUG_bytes(lowmem_page_address(ic->journal_xor[0].page), 64, "xor data"); 2648 2649 crypto_free_skcipher(ic->journal_crypt); 2650 ic->journal_crypt = NULL; 2651 } else { 2652 SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt); 2653 unsigned char iv[ivsize]; 2654 unsigned crypt_len = roundup(ivsize, blocksize); 2655 2656 crypt_data = kmalloc(crypt_len, GFP_KERNEL); 2657 if (!crypt_data) { 2658 *error = "Unable to allocate crypt data"; 2659 r = -ENOMEM; 2660 goto bad; 2661 } 2662 2663 skcipher_request_set_tfm(req, ic->journal_crypt); 2664 2665 ic->journal_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal); 2666 if (!ic->journal_scatterlist) { 2667 *error = "Unable to allocate sg list"; 2668 r = -ENOMEM; 2669 goto bad; 2670 } 2671 ic->journal_io_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal_io); 2672 if (!ic->journal_io_scatterlist) { 2673 *error = "Unable to allocate sg list"; 2674 r = -ENOMEM; 2675 goto bad; 2676 } 2677 ic->sk_requests = kvmalloc(ic->journal_sections * sizeof(struct skcipher_request *), GFP_KERNEL | __GFP_ZERO); 2678 if (!ic->sk_requests) { 2679 *error = "Unable to allocate sk requests"; 2680 r = -ENOMEM; 2681 goto bad; 2682 } 2683 for (i = 0; i < ic->journal_sections; i++) { 2684 struct scatterlist sg; 2685 struct skcipher_request *section_req; 2686 __u32 section_le = cpu_to_le32(i); 2687 2688 memset(iv, 0x00, ivsize); 2689 memset(crypt_data, 0x00, crypt_len); 2690 memcpy(crypt_data, §ion_le, min((size_t)crypt_len, sizeof(section_le))); 2691 2692 sg_init_one(&sg, crypt_data, crypt_len); 2693 skcipher_request_set_crypt(req, &sg, &sg, crypt_len, iv); 2694 comp.comp = COMPLETION_INITIALIZER_ONSTACK(comp.comp); 2695 comp.in_flight = (atomic_t)ATOMIC_INIT(1); 2696 if (do_crypt(true, req, &comp)) 2697 wait_for_completion(&comp.comp); 2698 2699 r = dm_integrity_failed(ic); 2700 if (r) { 2701 *error = "Unable to generate iv"; 2702 goto bad; 2703 } 2704 2705 section_req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); 2706 if (!section_req) { 2707 *error = "Unable to allocate crypt request"; 2708 r = -ENOMEM; 2709 goto bad; 2710 } 2711 section_req->iv = kmalloc(ivsize * 2, GFP_KERNEL); 2712 if (!section_req->iv) { 2713 skcipher_request_free(section_req); 2714 *error = "Unable to allocate iv"; 2715 r = -ENOMEM; 2716 goto bad; 2717 } 2718 memcpy(section_req->iv + ivsize, crypt_data, ivsize); 2719 section_req->cryptlen = (size_t)ic->journal_section_sectors << SECTOR_SHIFT; 2720 ic->sk_requests[i] = section_req; 2721 DEBUG_bytes(crypt_data, ivsize, "iv(%u)", i); 2722 } 2723 } 2724 } 2725 2726 for (i = 0; i < N_COMMIT_IDS; i++) { 2727 unsigned j; 2728 retest_commit_id: 2729 for (j = 0; j < i; j++) { 2730 if (ic->commit_ids[j] == ic->commit_ids[i]) { 2731 ic->commit_ids[i] = cpu_to_le64(le64_to_cpu(ic->commit_ids[i]) + 1); 2732 goto retest_commit_id; 2733 } 2734 } 2735 DEBUG_print("commit id %u: %016llx\n", i, ic->commit_ids[i]); 2736 } 2737 2738 journal_tree_size = (__u64)ic->journal_entries * sizeof(struct journal_node); 2739 if (journal_tree_size > ULONG_MAX) { 2740 *error = "Journal doesn't fit into memory"; 2741 r = -ENOMEM; 2742 goto bad; 2743 } 2744 ic->journal_tree = kvmalloc(journal_tree_size, GFP_KERNEL); 2745 if (!ic->journal_tree) { 2746 *error = "Could not allocate memory for journal tree"; 2747 r = -ENOMEM; 2748 } 2749 bad: 2750 kfree(crypt_data); 2751 return r; 2752 } 2753 2754 /* 2755 * Construct a integrity mapping 2756 * 2757 * Arguments: 2758 * device 2759 * offset from the start of the device 2760 * tag size 2761 * D - direct writes, J - journal writes, R - recovery mode 2762 * number of optional arguments 2763 * optional arguments: 2764 * journal_sectors 2765 * interleave_sectors 2766 * buffer_sectors 2767 * journal_watermark 2768 * commit_time 2769 * internal_hash 2770 * journal_crypt 2771 * journal_mac 2772 * block_size 2773 */ 2774 static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) 2775 { 2776 struct dm_integrity_c *ic; 2777 char dummy; 2778 int r; 2779 unsigned extra_args; 2780 struct dm_arg_set as; 2781 static struct dm_arg _args[] = { 2782 {0, 9, "Invalid number of feature args"}, 2783 }; 2784 unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; 2785 bool should_write_sb; 2786 __u64 threshold; 2787 unsigned long long start; 2788 2789 #define DIRECT_ARGUMENTS 4 2790 2791 if (argc <= DIRECT_ARGUMENTS) { 2792 ti->error = "Invalid argument count"; 2793 return -EINVAL; 2794 } 2795 2796 ic = kzalloc(sizeof(struct dm_integrity_c), GFP_KERNEL); 2797 if (!ic) { 2798 ti->error = "Cannot allocate integrity context"; 2799 return -ENOMEM; 2800 } 2801 ti->private = ic; 2802 ti->per_io_data_size = sizeof(struct dm_integrity_io); 2803 2804 ic->in_progress = RB_ROOT; 2805 init_waitqueue_head(&ic->endio_wait); 2806 bio_list_init(&ic->flush_bio_list); 2807 init_waitqueue_head(&ic->copy_to_journal_wait); 2808 init_completion(&ic->crypto_backoff); 2809 2810 r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev); 2811 if (r) { 2812 ti->error = "Device lookup failed"; 2813 goto bad; 2814 } 2815 2816 if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1 || start != (sector_t)start) { 2817 ti->error = "Invalid starting offset"; 2818 r = -EINVAL; 2819 goto bad; 2820 } 2821 ic->start = start; 2822 2823 if (strcmp(argv[2], "-")) { 2824 if (sscanf(argv[2], "%u%c", &ic->tag_size, &dummy) != 1 || !ic->tag_size) { 2825 ti->error = "Invalid tag size"; 2826 r = -EINVAL; 2827 goto bad; 2828 } 2829 } 2830 2831 if (!strcmp(argv[3], "J") || !strcmp(argv[3], "D") || !strcmp(argv[3], "R")) 2832 ic->mode = argv[3][0]; 2833 else { 2834 ti->error = "Invalid mode (expecting J, D, R)"; 2835 r = -EINVAL; 2836 goto bad; 2837 } 2838 2839 ic->device_sectors = i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT; 2840 journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS, 2841 ic->device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR); 2842 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; 2843 buffer_sectors = DEFAULT_BUFFER_SECTORS; 2844 journal_watermark = DEFAULT_JOURNAL_WATERMARK; 2845 sync_msec = DEFAULT_SYNC_MSEC; 2846 ic->sectors_per_block = 1; 2847 2848 as.argc = argc - DIRECT_ARGUMENTS; 2849 as.argv = argv + DIRECT_ARGUMENTS; 2850 r = dm_read_arg_group(_args, &as, &extra_args, &ti->error); 2851 if (r) 2852 goto bad; 2853 2854 while (extra_args--) { 2855 const char *opt_string; 2856 unsigned val; 2857 opt_string = dm_shift_arg(&as); 2858 if (!opt_string) { 2859 r = -EINVAL; 2860 ti->error = "Not enough feature arguments"; 2861 goto bad; 2862 } 2863 if (sscanf(opt_string, "journal_sectors:%u%c", &val, &dummy) == 1) 2864 journal_sectors = val; 2865 else if (sscanf(opt_string, "interleave_sectors:%u%c", &val, &dummy) == 1) 2866 interleave_sectors = val; 2867 else if (sscanf(opt_string, "buffer_sectors:%u%c", &val, &dummy) == 1) 2868 buffer_sectors = val; 2869 else if (sscanf(opt_string, "journal_watermark:%u%c", &val, &dummy) == 1 && val <= 100) 2870 journal_watermark = val; 2871 else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1) 2872 sync_msec = val; 2873 else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) { 2874 if (val < 1 << SECTOR_SHIFT || 2875 val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT || 2876 (val & (val -1))) { 2877 r = -EINVAL; 2878 ti->error = "Invalid block_size argument"; 2879 goto bad; 2880 } 2881 ic->sectors_per_block = val >> SECTOR_SHIFT; 2882 } else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { 2883 r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error, 2884 "Invalid internal_hash argument"); 2885 if (r) 2886 goto bad; 2887 } else if (!memcmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) { 2888 r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error, 2889 "Invalid journal_crypt argument"); 2890 if (r) 2891 goto bad; 2892 } else if (!memcmp(opt_string, "journal_mac:", strlen("journal_mac:"))) { 2893 r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error, 2894 "Invalid journal_mac argument"); 2895 if (r) 2896 goto bad; 2897 } else { 2898 r = -EINVAL; 2899 ti->error = "Invalid argument"; 2900 goto bad; 2901 } 2902 } 2903 2904 r = get_mac(&ic->internal_hash, &ic->internal_hash_alg, &ti->error, 2905 "Invalid internal hash", "Error setting internal hash key"); 2906 if (r) 2907 goto bad; 2908 2909 r = get_mac(&ic->journal_mac, &ic->journal_mac_alg, &ti->error, 2910 "Invalid journal mac", "Error setting journal mac key"); 2911 if (r) 2912 goto bad; 2913 2914 if (!ic->tag_size) { 2915 if (!ic->internal_hash) { 2916 ti->error = "Unknown tag size"; 2917 r = -EINVAL; 2918 goto bad; 2919 } 2920 ic->tag_size = crypto_shash_digestsize(ic->internal_hash); 2921 } 2922 if (ic->tag_size > MAX_TAG_SIZE) { 2923 ti->error = "Too big tag size"; 2924 r = -EINVAL; 2925 goto bad; 2926 } 2927 if (!(ic->tag_size & (ic->tag_size - 1))) 2928 ic->log2_tag_size = __ffs(ic->tag_size); 2929 else 2930 ic->log2_tag_size = -1; 2931 2932 ic->autocommit_jiffies = msecs_to_jiffies(sync_msec); 2933 ic->autocommit_msec = sync_msec; 2934 setup_timer(&ic->autocommit_timer, autocommit_fn, (unsigned long)ic); 2935 2936 ic->io = dm_io_client_create(); 2937 if (IS_ERR(ic->io)) { 2938 r = PTR_ERR(ic->io); 2939 ic->io = NULL; 2940 ti->error = "Cannot allocate dm io"; 2941 goto bad; 2942 } 2943 2944 ic->journal_io_mempool = mempool_create_slab_pool(JOURNAL_IO_MEMPOOL, journal_io_cache); 2945 if (!ic->journal_io_mempool) { 2946 r = -ENOMEM; 2947 ti->error = "Cannot allocate mempool"; 2948 goto bad; 2949 } 2950 2951 ic->metadata_wq = alloc_workqueue("dm-integrity-metadata", 2952 WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE); 2953 if (!ic->metadata_wq) { 2954 ti->error = "Cannot allocate workqueue"; 2955 r = -ENOMEM; 2956 goto bad; 2957 } 2958 2959 /* 2960 * If this workqueue were percpu, it would cause bio reordering 2961 * and reduced performance. 2962 */ 2963 ic->wait_wq = alloc_workqueue("dm-integrity-wait", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); 2964 if (!ic->wait_wq) { 2965 ti->error = "Cannot allocate workqueue"; 2966 r = -ENOMEM; 2967 goto bad; 2968 } 2969 2970 ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1); 2971 if (!ic->commit_wq) { 2972 ti->error = "Cannot allocate workqueue"; 2973 r = -ENOMEM; 2974 goto bad; 2975 } 2976 INIT_WORK(&ic->commit_work, integrity_commit); 2977 2978 if (ic->mode == 'J') { 2979 ic->writer_wq = alloc_workqueue("dm-integrity-writer", WQ_MEM_RECLAIM, 1); 2980 if (!ic->writer_wq) { 2981 ti->error = "Cannot allocate workqueue"; 2982 r = -ENOMEM; 2983 goto bad; 2984 } 2985 INIT_WORK(&ic->writer_work, integrity_writer); 2986 } 2987 2988 ic->sb = alloc_pages_exact(SB_SECTORS << SECTOR_SHIFT, GFP_KERNEL); 2989 if (!ic->sb) { 2990 r = -ENOMEM; 2991 ti->error = "Cannot allocate superblock area"; 2992 goto bad; 2993 } 2994 2995 r = sync_rw_sb(ic, REQ_OP_READ, 0); 2996 if (r) { 2997 ti->error = "Error reading superblock"; 2998 goto bad; 2999 } 3000 should_write_sb = false; 3001 if (memcmp(ic->sb->magic, SB_MAGIC, 8)) { 3002 if (ic->mode != 'R') { 3003 if (memchr_inv(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT)) { 3004 r = -EINVAL; 3005 ti->error = "The device is not initialized"; 3006 goto bad; 3007 } 3008 } 3009 3010 r = initialize_superblock(ic, journal_sectors, interleave_sectors); 3011 if (r) { 3012 ti->error = "Could not initialize superblock"; 3013 goto bad; 3014 } 3015 if (ic->mode != 'R') 3016 should_write_sb = true; 3017 } 3018 3019 if (ic->sb->version != SB_VERSION) { 3020 r = -EINVAL; 3021 ti->error = "Unknown version"; 3022 goto bad; 3023 } 3024 if (le16_to_cpu(ic->sb->integrity_tag_size) != ic->tag_size) { 3025 r = -EINVAL; 3026 ti->error = "Tag size doesn't match the information in superblock"; 3027 goto bad; 3028 } 3029 if (ic->sb->log2_sectors_per_block != __ffs(ic->sectors_per_block)) { 3030 r = -EINVAL; 3031 ti->error = "Block size doesn't match the information in superblock"; 3032 goto bad; 3033 } 3034 if (!le32_to_cpu(ic->sb->journal_sections)) { 3035 r = -EINVAL; 3036 ti->error = "Corrupted superblock, journal_sections is 0"; 3037 goto bad; 3038 } 3039 /* make sure that ti->max_io_len doesn't overflow */ 3040 if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS || 3041 ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) { 3042 r = -EINVAL; 3043 ti->error = "Invalid interleave_sectors in the superblock"; 3044 goto bad; 3045 } 3046 ic->provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors); 3047 if (ic->provided_data_sectors != le64_to_cpu(ic->sb->provided_data_sectors)) { 3048 /* test for overflow */ 3049 r = -EINVAL; 3050 ti->error = "The superblock has 64-bit device size, but the kernel was compiled with 32-bit sectors"; 3051 goto bad; 3052 } 3053 if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) != !!ic->journal_mac_alg.alg_string) { 3054 r = -EINVAL; 3055 ti->error = "Journal mac mismatch"; 3056 goto bad; 3057 } 3058 r = calculate_device_limits(ic); 3059 if (r) { 3060 ti->error = "The device is too small"; 3061 goto bad; 3062 } 3063 if (ti->len > ic->provided_data_sectors) { 3064 r = -EINVAL; 3065 ti->error = "Not enough provided sectors for requested mapping size"; 3066 goto bad; 3067 } 3068 3069 if (!buffer_sectors) 3070 buffer_sectors = 1; 3071 ic->log2_buffer_sectors = min3((int)__fls(buffer_sectors), (int)__ffs(ic->metadata_run), 31 - SECTOR_SHIFT); 3072 3073 threshold = (__u64)ic->journal_entries * (100 - journal_watermark); 3074 threshold += 50; 3075 do_div(threshold, 100); 3076 ic->free_sectors_threshold = threshold; 3077 3078 DEBUG_print("initialized:\n"); 3079 DEBUG_print(" integrity_tag_size %u\n", le16_to_cpu(ic->sb->integrity_tag_size)); 3080 DEBUG_print(" journal_entry_size %u\n", ic->journal_entry_size); 3081 DEBUG_print(" journal_entries_per_sector %u\n", ic->journal_entries_per_sector); 3082 DEBUG_print(" journal_section_entries %u\n", ic->journal_section_entries); 3083 DEBUG_print(" journal_section_sectors %u\n", ic->journal_section_sectors); 3084 DEBUG_print(" journal_sections %u\n", (unsigned)le32_to_cpu(ic->sb->journal_sections)); 3085 DEBUG_print(" journal_entries %u\n", ic->journal_entries); 3086 DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors); 3087 DEBUG_print(" device_sectors 0x%llx\n", (unsigned long long)ic->device_sectors); 3088 DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors); 3089 DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run); 3090 DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run); 3091 DEBUG_print(" provided_data_sectors 0x%llx (%llu)\n", (unsigned long long)ic->provided_data_sectors, 3092 (unsigned long long)ic->provided_data_sectors); 3093 DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors); 3094 3095 ic->bufio = dm_bufio_client_create(ic->dev->bdev, 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 3096 1, 0, NULL, NULL); 3097 if (IS_ERR(ic->bufio)) { 3098 r = PTR_ERR(ic->bufio); 3099 ti->error = "Cannot initialize dm-bufio"; 3100 ic->bufio = NULL; 3101 goto bad; 3102 } 3103 dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors); 3104 3105 if (ic->mode != 'R') { 3106 r = create_journal(ic, &ti->error); 3107 if (r) 3108 goto bad; 3109 } 3110 3111 if (should_write_sb) { 3112 int r; 3113 3114 init_journal(ic, 0, ic->journal_sections, 0); 3115 r = dm_integrity_failed(ic); 3116 if (unlikely(r)) { 3117 ti->error = "Error initializing journal"; 3118 goto bad; 3119 } 3120 r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); 3121 if (r) { 3122 ti->error = "Error initializing superblock"; 3123 goto bad; 3124 } 3125 ic->just_formatted = true; 3126 } 3127 3128 r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors); 3129 if (r) 3130 goto bad; 3131 3132 if (!ic->internal_hash) 3133 dm_integrity_set(ti, ic); 3134 3135 ti->num_flush_bios = 1; 3136 ti->flush_supported = true; 3137 3138 return 0; 3139 bad: 3140 dm_integrity_dtr(ti); 3141 return r; 3142 } 3143 3144 static void dm_integrity_dtr(struct dm_target *ti) 3145 { 3146 struct dm_integrity_c *ic = ti->private; 3147 3148 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); 3149 3150 if (ic->metadata_wq) 3151 destroy_workqueue(ic->metadata_wq); 3152 if (ic->wait_wq) 3153 destroy_workqueue(ic->wait_wq); 3154 if (ic->commit_wq) 3155 destroy_workqueue(ic->commit_wq); 3156 if (ic->writer_wq) 3157 destroy_workqueue(ic->writer_wq); 3158 if (ic->bufio) 3159 dm_bufio_client_destroy(ic->bufio); 3160 mempool_destroy(ic->journal_io_mempool); 3161 if (ic->io) 3162 dm_io_client_destroy(ic->io); 3163 if (ic->dev) 3164 dm_put_device(ti, ic->dev); 3165 dm_integrity_free_page_list(ic, ic->journal); 3166 dm_integrity_free_page_list(ic, ic->journal_io); 3167 dm_integrity_free_page_list(ic, ic->journal_xor); 3168 if (ic->journal_scatterlist) 3169 dm_integrity_free_journal_scatterlist(ic, ic->journal_scatterlist); 3170 if (ic->journal_io_scatterlist) 3171 dm_integrity_free_journal_scatterlist(ic, ic->journal_io_scatterlist); 3172 if (ic->sk_requests) { 3173 unsigned i; 3174 3175 for (i = 0; i < ic->journal_sections; i++) { 3176 struct skcipher_request *req = ic->sk_requests[i]; 3177 if (req) { 3178 kzfree(req->iv); 3179 skcipher_request_free(req); 3180 } 3181 } 3182 kvfree(ic->sk_requests); 3183 } 3184 kvfree(ic->journal_tree); 3185 if (ic->sb) 3186 free_pages_exact(ic->sb, SB_SECTORS << SECTOR_SHIFT); 3187 3188 if (ic->internal_hash) 3189 crypto_free_shash(ic->internal_hash); 3190 free_alg(&ic->internal_hash_alg); 3191 3192 if (ic->journal_crypt) 3193 crypto_free_skcipher(ic->journal_crypt); 3194 free_alg(&ic->journal_crypt_alg); 3195 3196 if (ic->journal_mac) 3197 crypto_free_shash(ic->journal_mac); 3198 free_alg(&ic->journal_mac_alg); 3199 3200 kfree(ic); 3201 } 3202 3203 static struct target_type integrity_target = { 3204 .name = "integrity", 3205 .version = {1, 0, 0}, 3206 .module = THIS_MODULE, 3207 .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, 3208 .ctr = dm_integrity_ctr, 3209 .dtr = dm_integrity_dtr, 3210 .map = dm_integrity_map, 3211 .postsuspend = dm_integrity_postsuspend, 3212 .resume = dm_integrity_resume, 3213 .status = dm_integrity_status, 3214 .iterate_devices = dm_integrity_iterate_devices, 3215 .io_hints = dm_integrity_io_hints, 3216 }; 3217 3218 int __init dm_integrity_init(void) 3219 { 3220 int r; 3221 3222 journal_io_cache = kmem_cache_create("integrity_journal_io", 3223 sizeof(struct journal_io), 0, 0, NULL); 3224 if (!journal_io_cache) { 3225 DMERR("can't allocate journal io cache"); 3226 return -ENOMEM; 3227 } 3228 3229 r = dm_register_target(&integrity_target); 3230 3231 if (r < 0) 3232 DMERR("register failed %d", r); 3233 3234 return r; 3235 } 3236 3237 void dm_integrity_exit(void) 3238 { 3239 dm_unregister_target(&integrity_target); 3240 kmem_cache_destroy(journal_io_cache); 3241 } 3242 3243 module_init(dm_integrity_init); 3244 module_exit(dm_integrity_exit); 3245 3246 MODULE_AUTHOR("Milan Broz"); 3247 MODULE_AUTHOR("Mikulas Patocka"); 3248 MODULE_DESCRIPTION(DM_NAME " target for integrity tags extension"); 3249 MODULE_LICENSE("GPL"); 3250