1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2016-2017 Red Hat, Inc. All rights reserved. 4 * Copyright (C) 2016-2017 Milan Broz 5 * Copyright (C) 2016-2017 Mikulas Patocka 6 * 7 * This file is released under the GPL. 8 */ 9 10 #include "dm-bio-record.h" 11 12 #include <linux/compiler.h> 13 #include <linux/module.h> 14 #include <linux/device-mapper.h> 15 #include <linux/dm-io.h> 16 #include <linux/vmalloc.h> 17 #include <linux/sort.h> 18 #include <linux/rbtree.h> 19 #include <linux/delay.h> 20 #include <linux/random.h> 21 #include <linux/reboot.h> 22 #include <crypto/hash.h> 23 #include <crypto/skcipher.h> 24 #include <linux/async_tx.h> 25 #include <linux/dm-bufio.h> 26 27 #include "dm-audit.h" 28 29 #define DM_MSG_PREFIX "integrity" 30 31 #define DEFAULT_INTERLEAVE_SECTORS 32768 32 #define DEFAULT_JOURNAL_SIZE_FACTOR 7 33 #define DEFAULT_SECTORS_PER_BITMAP_BIT 32768 34 #define DEFAULT_BUFFER_SECTORS 128 35 #define DEFAULT_JOURNAL_WATERMARK 50 36 #define DEFAULT_SYNC_MSEC 10000 37 #define DEFAULT_MAX_JOURNAL_SECTORS 131072 38 #define MIN_LOG2_INTERLEAVE_SECTORS 3 39 #define MAX_LOG2_INTERLEAVE_SECTORS 31 40 #define METADATA_WORKQUEUE_MAX_ACTIVE 16 41 #define RECALC_SECTORS 32768 42 #define RECALC_WRITE_SUPER 16 43 #define BITMAP_BLOCK_SIZE 4096 /* don't change it */ 44 #define BITMAP_FLUSH_INTERVAL (10 * HZ) 45 #define DISCARD_FILLER 0xf6 46 #define SALT_SIZE 16 47 48 /* 49 * Warning - DEBUG_PRINT prints security-sensitive data to the log, 50 * so it should not be enabled in the official kernel 51 */ 52 //#define DEBUG_PRINT 53 //#define INTERNAL_VERIFY 54 55 /* 56 * On disk structures 57 */ 58 59 #define SB_MAGIC "integrt" 60 #define SB_VERSION_1 1 61 #define SB_VERSION_2 2 62 #define SB_VERSION_3 3 63 #define SB_VERSION_4 4 64 #define SB_VERSION_5 5 65 #define SB_SECTORS 8 66 #define MAX_SECTORS_PER_BLOCK 8 67 68 struct superblock { 69 __u8 magic[8]; 70 __u8 version; 71 __u8 log2_interleave_sectors; 72 __le16 integrity_tag_size; 73 __le32 journal_sections; 74 __le64 provided_data_sectors; /* userspace uses this value */ 75 __le32 flags; 76 __u8 log2_sectors_per_block; 77 __u8 log2_blocks_per_bitmap_bit; 78 __u8 pad[2]; 79 __le64 recalc_sector; 80 __u8 pad2[8]; 81 __u8 salt[SALT_SIZE]; 82 }; 83 84 #define SB_FLAG_HAVE_JOURNAL_MAC 0x1 85 #define SB_FLAG_RECALCULATING 0x2 86 #define SB_FLAG_DIRTY_BITMAP 0x4 87 #define SB_FLAG_FIXED_PADDING 0x8 88 #define SB_FLAG_FIXED_HMAC 0x10 89 90 #define JOURNAL_ENTRY_ROUNDUP 8 91 92 typedef __le64 commit_id_t; 93 #define JOURNAL_MAC_PER_SECTOR 8 94 95 struct journal_entry { 96 union { 97 struct { 98 __le32 sector_lo; 99 __le32 sector_hi; 100 } s; 101 __le64 sector; 102 } u; 103 commit_id_t last_bytes[]; 104 /* __u8 tag[0]; */ 105 }; 106 107 #define journal_entry_tag(ic, je) ((__u8 *)&(je)->last_bytes[(ic)->sectors_per_block]) 108 109 #if BITS_PER_LONG == 64 110 #define journal_entry_set_sector(je, x) do { smp_wmb(); WRITE_ONCE((je)->u.sector, cpu_to_le64(x)); } while (0) 111 #else 112 #define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32((x) >> 32)); } while (0) 113 #endif 114 #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector) 115 #define journal_entry_is_unused(je) ((je)->u.s.sector_hi == cpu_to_le32(-1)) 116 #define journal_entry_set_unused(je) ((je)->u.s.sector_hi = cpu_to_le32(-1)) 117 #define journal_entry_is_inprogress(je) ((je)->u.s.sector_hi == cpu_to_le32(-2)) 118 #define journal_entry_set_inprogress(je) ((je)->u.s.sector_hi = cpu_to_le32(-2)) 119 120 #define JOURNAL_BLOCK_SECTORS 8 121 #define JOURNAL_SECTOR_DATA ((1 << SECTOR_SHIFT) - sizeof(commit_id_t)) 122 #define JOURNAL_MAC_SIZE (JOURNAL_MAC_PER_SECTOR * JOURNAL_BLOCK_SECTORS) 123 124 struct journal_sector { 125 struct_group(sectors, 126 __u8 entries[JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR]; 127 __u8 mac[JOURNAL_MAC_PER_SECTOR]; 128 ); 129 commit_id_t commit_id; 130 }; 131 132 #define MAX_TAG_SIZE (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, last_bytes[MAX_SECTORS_PER_BLOCK])) 133 134 #define METADATA_PADDING_SECTORS 8 135 136 #define N_COMMIT_IDS 4 137 138 static unsigned char prev_commit_seq(unsigned char seq) 139 { 140 return (seq + N_COMMIT_IDS - 1) % N_COMMIT_IDS; 141 } 142 143 static unsigned char next_commit_seq(unsigned char seq) 144 { 145 return (seq + 1) % N_COMMIT_IDS; 146 } 147 148 /* 149 * In-memory structures 150 */ 151 152 struct journal_node { 153 struct rb_node node; 154 sector_t sector; 155 }; 156 157 struct alg_spec { 158 char *alg_string; 159 char *key_string; 160 __u8 *key; 161 unsigned int key_size; 162 }; 163 164 struct dm_integrity_c { 165 struct dm_dev *dev; 166 struct dm_dev *meta_dev; 167 unsigned int tag_size; 168 __s8 log2_tag_size; 169 sector_t start; 170 mempool_t journal_io_mempool; 171 struct dm_io_client *io; 172 struct dm_bufio_client *bufio; 173 struct workqueue_struct *metadata_wq; 174 struct superblock *sb; 175 unsigned int journal_pages; 176 unsigned int n_bitmap_blocks; 177 178 struct page_list *journal; 179 struct page_list *journal_io; 180 struct page_list *journal_xor; 181 struct page_list *recalc_bitmap; 182 struct page_list *may_write_bitmap; 183 struct bitmap_block_status *bbs; 184 unsigned int bitmap_flush_interval; 185 int synchronous_mode; 186 struct bio_list synchronous_bios; 187 struct delayed_work bitmap_flush_work; 188 189 struct crypto_skcipher *journal_crypt; 190 struct scatterlist **journal_scatterlist; 191 struct scatterlist **journal_io_scatterlist; 192 struct skcipher_request **sk_requests; 193 194 struct crypto_shash *journal_mac; 195 196 struct journal_node *journal_tree; 197 struct rb_root journal_tree_root; 198 199 sector_t provided_data_sectors; 200 201 unsigned short journal_entry_size; 202 unsigned char journal_entries_per_sector; 203 unsigned char journal_section_entries; 204 unsigned short journal_section_sectors; 205 unsigned int journal_sections; 206 unsigned int journal_entries; 207 sector_t data_device_sectors; 208 sector_t meta_device_sectors; 209 unsigned int initial_sectors; 210 unsigned int metadata_run; 211 __s8 log2_metadata_run; 212 __u8 log2_buffer_sectors; 213 __u8 sectors_per_block; 214 __u8 log2_blocks_per_bitmap_bit; 215 216 unsigned char mode; 217 218 int failed; 219 220 struct crypto_shash *internal_hash; 221 222 struct dm_target *ti; 223 224 /* these variables are locked with endio_wait.lock */ 225 struct rb_root in_progress; 226 struct list_head wait_list; 227 wait_queue_head_t endio_wait; 228 struct workqueue_struct *wait_wq; 229 struct workqueue_struct *offload_wq; 230 231 unsigned char commit_seq; 232 commit_id_t commit_ids[N_COMMIT_IDS]; 233 234 unsigned int committed_section; 235 unsigned int n_committed_sections; 236 237 unsigned int uncommitted_section; 238 unsigned int n_uncommitted_sections; 239 240 unsigned int free_section; 241 unsigned char free_section_entry; 242 unsigned int free_sectors; 243 244 unsigned int free_sectors_threshold; 245 246 struct workqueue_struct *commit_wq; 247 struct work_struct commit_work; 248 249 struct workqueue_struct *writer_wq; 250 struct work_struct writer_work; 251 252 struct workqueue_struct *recalc_wq; 253 struct work_struct recalc_work; 254 u8 *recalc_buffer; 255 u8 *recalc_tags; 256 257 struct bio_list flush_bio_list; 258 259 unsigned long autocommit_jiffies; 260 struct timer_list autocommit_timer; 261 unsigned int autocommit_msec; 262 263 wait_queue_head_t copy_to_journal_wait; 264 265 struct completion crypto_backoff; 266 267 bool wrote_to_journal; 268 bool journal_uptodate; 269 bool just_formatted; 270 bool recalculate_flag; 271 bool reset_recalculate_flag; 272 bool discard; 273 bool fix_padding; 274 bool fix_hmac; 275 bool legacy_recalculate; 276 277 struct alg_spec internal_hash_alg; 278 struct alg_spec journal_crypt_alg; 279 struct alg_spec journal_mac_alg; 280 281 atomic64_t number_of_mismatches; 282 283 struct notifier_block reboot_notifier; 284 }; 285 286 struct dm_integrity_range { 287 sector_t logical_sector; 288 sector_t n_sectors; 289 bool waiting; 290 union { 291 struct rb_node node; 292 struct { 293 struct task_struct *task; 294 struct list_head wait_entry; 295 }; 296 }; 297 }; 298 299 struct dm_integrity_io { 300 struct work_struct work; 301 302 struct dm_integrity_c *ic; 303 enum req_op op; 304 bool fua; 305 306 struct dm_integrity_range range; 307 308 sector_t metadata_block; 309 unsigned int metadata_offset; 310 311 atomic_t in_flight; 312 blk_status_t bi_status; 313 314 struct completion *completion; 315 316 struct dm_bio_details bio_details; 317 }; 318 319 struct journal_completion { 320 struct dm_integrity_c *ic; 321 atomic_t in_flight; 322 struct completion comp; 323 }; 324 325 struct journal_io { 326 struct dm_integrity_range range; 327 struct journal_completion *comp; 328 }; 329 330 struct bitmap_block_status { 331 struct work_struct work; 332 struct dm_integrity_c *ic; 333 unsigned int idx; 334 unsigned long *bitmap; 335 struct bio_list bio_queue; 336 spinlock_t bio_queue_lock; 337 338 }; 339 340 static struct kmem_cache *journal_io_cache; 341 342 #define JOURNAL_IO_MEMPOOL 32 343 344 #ifdef DEBUG_PRINT 345 #define DEBUG_print(x, ...) printk(KERN_DEBUG x, ##__VA_ARGS__) 346 static void __DEBUG_bytes(__u8 *bytes, size_t len, const char *msg, ...) 347 { 348 va_list args; 349 350 va_start(args, msg); 351 vprintk(msg, args); 352 va_end(args); 353 if (len) 354 pr_cont(":"); 355 while (len) { 356 pr_cont(" %02x", *bytes); 357 bytes++; 358 len--; 359 } 360 pr_cont("\n"); 361 } 362 #define DEBUG_bytes(bytes, len, msg, ...) __DEBUG_bytes(bytes, len, KERN_DEBUG msg, ##__VA_ARGS__) 363 #else 364 #define DEBUG_print(x, ...) do { } while (0) 365 #define DEBUG_bytes(bytes, len, msg, ...) do { } while (0) 366 #endif 367 368 static void dm_integrity_prepare(struct request *rq) 369 { 370 } 371 372 static void dm_integrity_complete(struct request *rq, unsigned int nr_bytes) 373 { 374 } 375 376 /* 377 * DM Integrity profile, protection is performed layer above (dm-crypt) 378 */ 379 static const struct blk_integrity_profile dm_integrity_profile = { 380 .name = "DM-DIF-EXT-TAG", 381 .generate_fn = NULL, 382 .verify_fn = NULL, 383 .prepare_fn = dm_integrity_prepare, 384 .complete_fn = dm_integrity_complete, 385 }; 386 387 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map); 388 static void integrity_bio_wait(struct work_struct *w); 389 static void dm_integrity_dtr(struct dm_target *ti); 390 391 static void dm_integrity_io_error(struct dm_integrity_c *ic, const char *msg, int err) 392 { 393 if (err == -EILSEQ) 394 atomic64_inc(&ic->number_of_mismatches); 395 if (!cmpxchg(&ic->failed, 0, err)) 396 DMERR("Error on %s: %d", msg, err); 397 } 398 399 static int dm_integrity_failed(struct dm_integrity_c *ic) 400 { 401 return READ_ONCE(ic->failed); 402 } 403 404 static bool dm_integrity_disable_recalculate(struct dm_integrity_c *ic) 405 { 406 if (ic->legacy_recalculate) 407 return false; 408 if (!(ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) ? 409 ic->internal_hash_alg.key || ic->journal_mac_alg.key : 410 ic->internal_hash_alg.key && !ic->journal_mac_alg.key) 411 return true; 412 return false; 413 } 414 415 static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned int i, 416 unsigned int j, unsigned char seq) 417 { 418 /* 419 * Xor the number with section and sector, so that if a piece of 420 * journal is written at wrong place, it is detected. 421 */ 422 return ic->commit_ids[seq] ^ cpu_to_le64(((__u64)i << 32) ^ j); 423 } 424 425 static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector, 426 sector_t *area, sector_t *offset) 427 { 428 if (!ic->meta_dev) { 429 __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors; 430 *area = data_sector >> log2_interleave_sectors; 431 *offset = (unsigned int)data_sector & ((1U << log2_interleave_sectors) - 1); 432 } else { 433 *area = 0; 434 *offset = data_sector; 435 } 436 } 437 438 #define sector_to_block(ic, n) \ 439 do { \ 440 BUG_ON((n) & (unsigned int)((ic)->sectors_per_block - 1)); \ 441 (n) >>= (ic)->sb->log2_sectors_per_block; \ 442 } while (0) 443 444 static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t area, 445 sector_t offset, unsigned int *metadata_offset) 446 { 447 __u64 ms; 448 unsigned int mo; 449 450 ms = area << ic->sb->log2_interleave_sectors; 451 if (likely(ic->log2_metadata_run >= 0)) 452 ms += area << ic->log2_metadata_run; 453 else 454 ms += area * ic->metadata_run; 455 ms >>= ic->log2_buffer_sectors; 456 457 sector_to_block(ic, offset); 458 459 if (likely(ic->log2_tag_size >= 0)) { 460 ms += offset >> (SECTOR_SHIFT + ic->log2_buffer_sectors - ic->log2_tag_size); 461 mo = (offset << ic->log2_tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1); 462 } else { 463 ms += (__u64)offset * ic->tag_size >> (SECTOR_SHIFT + ic->log2_buffer_sectors); 464 mo = (offset * ic->tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1); 465 } 466 *metadata_offset = mo; 467 return ms; 468 } 469 470 static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector_t offset) 471 { 472 sector_t result; 473 474 if (ic->meta_dev) 475 return offset; 476 477 result = area << ic->sb->log2_interleave_sectors; 478 if (likely(ic->log2_metadata_run >= 0)) 479 result += (area + 1) << ic->log2_metadata_run; 480 else 481 result += (area + 1) * ic->metadata_run; 482 483 result += (sector_t)ic->initial_sectors + offset; 484 result += ic->start; 485 486 return result; 487 } 488 489 static void wraparound_section(struct dm_integrity_c *ic, unsigned int *sec_ptr) 490 { 491 if (unlikely(*sec_ptr >= ic->journal_sections)) 492 *sec_ptr -= ic->journal_sections; 493 } 494 495 static void sb_set_version(struct dm_integrity_c *ic) 496 { 497 if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) 498 ic->sb->version = SB_VERSION_5; 499 else if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) 500 ic->sb->version = SB_VERSION_4; 501 else if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) 502 ic->sb->version = SB_VERSION_3; 503 else if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) 504 ic->sb->version = SB_VERSION_2; 505 else 506 ic->sb->version = SB_VERSION_1; 507 } 508 509 static int sb_mac(struct dm_integrity_c *ic, bool wr) 510 { 511 SHASH_DESC_ON_STACK(desc, ic->journal_mac); 512 int r; 513 unsigned int size = crypto_shash_digestsize(ic->journal_mac); 514 515 if (sizeof(struct superblock) + size > 1 << SECTOR_SHIFT) { 516 dm_integrity_io_error(ic, "digest is too long", -EINVAL); 517 return -EINVAL; 518 } 519 520 desc->tfm = ic->journal_mac; 521 522 r = crypto_shash_init(desc); 523 if (unlikely(r < 0)) { 524 dm_integrity_io_error(ic, "crypto_shash_init", r); 525 return r; 526 } 527 528 r = crypto_shash_update(desc, (__u8 *)ic->sb, (1 << SECTOR_SHIFT) - size); 529 if (unlikely(r < 0)) { 530 dm_integrity_io_error(ic, "crypto_shash_update", r); 531 return r; 532 } 533 534 if (likely(wr)) { 535 r = crypto_shash_final(desc, (__u8 *)ic->sb + (1 << SECTOR_SHIFT) - size); 536 if (unlikely(r < 0)) { 537 dm_integrity_io_error(ic, "crypto_shash_final", r); 538 return r; 539 } 540 } else { 541 __u8 result[HASH_MAX_DIGESTSIZE]; 542 543 r = crypto_shash_final(desc, result); 544 if (unlikely(r < 0)) { 545 dm_integrity_io_error(ic, "crypto_shash_final", r); 546 return r; 547 } 548 if (memcmp((__u8 *)ic->sb + (1 << SECTOR_SHIFT) - size, result, size)) { 549 dm_integrity_io_error(ic, "superblock mac", -EILSEQ); 550 dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0); 551 return -EILSEQ; 552 } 553 } 554 555 return 0; 556 } 557 558 static int sync_rw_sb(struct dm_integrity_c *ic, blk_opf_t opf) 559 { 560 struct dm_io_request io_req; 561 struct dm_io_region io_loc; 562 const enum req_op op = opf & REQ_OP_MASK; 563 int r; 564 565 io_req.bi_opf = opf; 566 io_req.mem.type = DM_IO_KMEM; 567 io_req.mem.ptr.addr = ic->sb; 568 io_req.notify.fn = NULL; 569 io_req.client = ic->io; 570 io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev; 571 io_loc.sector = ic->start; 572 io_loc.count = SB_SECTORS; 573 574 if (op == REQ_OP_WRITE) { 575 sb_set_version(ic); 576 if (ic->journal_mac && ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) { 577 r = sb_mac(ic, true); 578 if (unlikely(r)) 579 return r; 580 } 581 } 582 583 r = dm_io(&io_req, 1, &io_loc, NULL); 584 if (unlikely(r)) 585 return r; 586 587 if (op == REQ_OP_READ) { 588 if (ic->mode != 'R' && ic->journal_mac && ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) { 589 r = sb_mac(ic, false); 590 if (unlikely(r)) 591 return r; 592 } 593 } 594 595 return 0; 596 } 597 598 #define BITMAP_OP_TEST_ALL_SET 0 599 #define BITMAP_OP_TEST_ALL_CLEAR 1 600 #define BITMAP_OP_SET 2 601 #define BITMAP_OP_CLEAR 3 602 603 static bool block_bitmap_op(struct dm_integrity_c *ic, struct page_list *bitmap, 604 sector_t sector, sector_t n_sectors, int mode) 605 { 606 unsigned long bit, end_bit, this_end_bit, page, end_page; 607 unsigned long *data; 608 609 if (unlikely(((sector | n_sectors) & ((1 << ic->sb->log2_sectors_per_block) - 1)) != 0)) { 610 DMCRIT("invalid bitmap access (%llx,%llx,%d,%d,%d)", 611 sector, 612 n_sectors, 613 ic->sb->log2_sectors_per_block, 614 ic->log2_blocks_per_bitmap_bit, 615 mode); 616 BUG(); 617 } 618 619 if (unlikely(!n_sectors)) 620 return true; 621 622 bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 623 end_bit = (sector + n_sectors - 1) >> 624 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 625 626 page = bit / (PAGE_SIZE * 8); 627 bit %= PAGE_SIZE * 8; 628 629 end_page = end_bit / (PAGE_SIZE * 8); 630 end_bit %= PAGE_SIZE * 8; 631 632 repeat: 633 if (page < end_page) 634 this_end_bit = PAGE_SIZE * 8 - 1; 635 else 636 this_end_bit = end_bit; 637 638 data = lowmem_page_address(bitmap[page].page); 639 640 if (mode == BITMAP_OP_TEST_ALL_SET) { 641 while (bit <= this_end_bit) { 642 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) { 643 do { 644 if (data[bit / BITS_PER_LONG] != -1) 645 return false; 646 bit += BITS_PER_LONG; 647 } while (this_end_bit >= bit + BITS_PER_LONG - 1); 648 continue; 649 } 650 if (!test_bit(bit, data)) 651 return false; 652 bit++; 653 } 654 } else if (mode == BITMAP_OP_TEST_ALL_CLEAR) { 655 while (bit <= this_end_bit) { 656 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) { 657 do { 658 if (data[bit / BITS_PER_LONG] != 0) 659 return false; 660 bit += BITS_PER_LONG; 661 } while (this_end_bit >= bit + BITS_PER_LONG - 1); 662 continue; 663 } 664 if (test_bit(bit, data)) 665 return false; 666 bit++; 667 } 668 } else if (mode == BITMAP_OP_SET) { 669 while (bit <= this_end_bit) { 670 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) { 671 do { 672 data[bit / BITS_PER_LONG] = -1; 673 bit += BITS_PER_LONG; 674 } while (this_end_bit >= bit + BITS_PER_LONG - 1); 675 continue; 676 } 677 __set_bit(bit, data); 678 bit++; 679 } 680 } else if (mode == BITMAP_OP_CLEAR) { 681 if (!bit && this_end_bit == PAGE_SIZE * 8 - 1) 682 clear_page(data); 683 else { 684 while (bit <= this_end_bit) { 685 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) { 686 do { 687 data[bit / BITS_PER_LONG] = 0; 688 bit += BITS_PER_LONG; 689 } while (this_end_bit >= bit + BITS_PER_LONG - 1); 690 continue; 691 } 692 __clear_bit(bit, data); 693 bit++; 694 } 695 } 696 } else { 697 BUG(); 698 } 699 700 if (unlikely(page < end_page)) { 701 bit = 0; 702 page++; 703 goto repeat; 704 } 705 706 return true; 707 } 708 709 static void block_bitmap_copy(struct dm_integrity_c *ic, struct page_list *dst, struct page_list *src) 710 { 711 unsigned int n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE); 712 unsigned int i; 713 714 for (i = 0; i < n_bitmap_pages; i++) { 715 unsigned long *dst_data = lowmem_page_address(dst[i].page); 716 unsigned long *src_data = lowmem_page_address(src[i].page); 717 718 copy_page(dst_data, src_data); 719 } 720 } 721 722 static struct bitmap_block_status *sector_to_bitmap_block(struct dm_integrity_c *ic, sector_t sector) 723 { 724 unsigned int bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 725 unsigned int bitmap_block = bit / (BITMAP_BLOCK_SIZE * 8); 726 727 BUG_ON(bitmap_block >= ic->n_bitmap_blocks); 728 return &ic->bbs[bitmap_block]; 729 } 730 731 static void access_journal_check(struct dm_integrity_c *ic, unsigned int section, unsigned int offset, 732 bool e, const char *function) 733 { 734 #if defined(CONFIG_DM_DEBUG) || defined(INTERNAL_VERIFY) 735 unsigned int limit = e ? ic->journal_section_entries : ic->journal_section_sectors; 736 737 if (unlikely(section >= ic->journal_sections) || 738 unlikely(offset >= limit)) { 739 DMCRIT("%s: invalid access at (%u,%u), limit (%u,%u)", 740 function, section, offset, ic->journal_sections, limit); 741 BUG(); 742 } 743 #endif 744 } 745 746 static void page_list_location(struct dm_integrity_c *ic, unsigned int section, unsigned int offset, 747 unsigned int *pl_index, unsigned int *pl_offset) 748 { 749 unsigned int sector; 750 751 access_journal_check(ic, section, offset, false, "page_list_location"); 752 753 sector = section * ic->journal_section_sectors + offset; 754 755 *pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 756 *pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 757 } 758 759 static struct journal_sector *access_page_list(struct dm_integrity_c *ic, struct page_list *pl, 760 unsigned int section, unsigned int offset, unsigned int *n_sectors) 761 { 762 unsigned int pl_index, pl_offset; 763 char *va; 764 765 page_list_location(ic, section, offset, &pl_index, &pl_offset); 766 767 if (n_sectors) 768 *n_sectors = (PAGE_SIZE - pl_offset) >> SECTOR_SHIFT; 769 770 va = lowmem_page_address(pl[pl_index].page); 771 772 return (struct journal_sector *)(va + pl_offset); 773 } 774 775 static struct journal_sector *access_journal(struct dm_integrity_c *ic, unsigned int section, unsigned int offset) 776 { 777 return access_page_list(ic, ic->journal, section, offset, NULL); 778 } 779 780 static struct journal_entry *access_journal_entry(struct dm_integrity_c *ic, unsigned int section, unsigned int n) 781 { 782 unsigned int rel_sector, offset; 783 struct journal_sector *js; 784 785 access_journal_check(ic, section, n, true, "access_journal_entry"); 786 787 rel_sector = n % JOURNAL_BLOCK_SECTORS; 788 offset = n / JOURNAL_BLOCK_SECTORS; 789 790 js = access_journal(ic, section, rel_sector); 791 return (struct journal_entry *)((char *)js + offset * ic->journal_entry_size); 792 } 793 794 static struct journal_sector *access_journal_data(struct dm_integrity_c *ic, unsigned int section, unsigned int n) 795 { 796 n <<= ic->sb->log2_sectors_per_block; 797 798 n += JOURNAL_BLOCK_SECTORS; 799 800 access_journal_check(ic, section, n, false, "access_journal_data"); 801 802 return access_journal(ic, section, n); 803 } 804 805 static void section_mac(struct dm_integrity_c *ic, unsigned int section, __u8 result[JOURNAL_MAC_SIZE]) 806 { 807 SHASH_DESC_ON_STACK(desc, ic->journal_mac); 808 int r; 809 unsigned int j, size; 810 811 desc->tfm = ic->journal_mac; 812 813 r = crypto_shash_init(desc); 814 if (unlikely(r < 0)) { 815 dm_integrity_io_error(ic, "crypto_shash_init", r); 816 goto err; 817 } 818 819 if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) { 820 __le64 section_le; 821 822 r = crypto_shash_update(desc, (__u8 *)&ic->sb->salt, SALT_SIZE); 823 if (unlikely(r < 0)) { 824 dm_integrity_io_error(ic, "crypto_shash_update", r); 825 goto err; 826 } 827 828 section_le = cpu_to_le64(section); 829 r = crypto_shash_update(desc, (__u8 *)§ion_le, sizeof(section_le)); 830 if (unlikely(r < 0)) { 831 dm_integrity_io_error(ic, "crypto_shash_update", r); 832 goto err; 833 } 834 } 835 836 for (j = 0; j < ic->journal_section_entries; j++) { 837 struct journal_entry *je = access_journal_entry(ic, section, j); 838 839 r = crypto_shash_update(desc, (__u8 *)&je->u.sector, sizeof(je->u.sector)); 840 if (unlikely(r < 0)) { 841 dm_integrity_io_error(ic, "crypto_shash_update", r); 842 goto err; 843 } 844 } 845 846 size = crypto_shash_digestsize(ic->journal_mac); 847 848 if (likely(size <= JOURNAL_MAC_SIZE)) { 849 r = crypto_shash_final(desc, result); 850 if (unlikely(r < 0)) { 851 dm_integrity_io_error(ic, "crypto_shash_final", r); 852 goto err; 853 } 854 memset(result + size, 0, JOURNAL_MAC_SIZE - size); 855 } else { 856 __u8 digest[HASH_MAX_DIGESTSIZE]; 857 858 if (WARN_ON(size > sizeof(digest))) { 859 dm_integrity_io_error(ic, "digest_size", -EINVAL); 860 goto err; 861 } 862 r = crypto_shash_final(desc, digest); 863 if (unlikely(r < 0)) { 864 dm_integrity_io_error(ic, "crypto_shash_final", r); 865 goto err; 866 } 867 memcpy(result, digest, JOURNAL_MAC_SIZE); 868 } 869 870 return; 871 err: 872 memset(result, 0, JOURNAL_MAC_SIZE); 873 } 874 875 static void rw_section_mac(struct dm_integrity_c *ic, unsigned int section, bool wr) 876 { 877 __u8 result[JOURNAL_MAC_SIZE]; 878 unsigned int j; 879 880 if (!ic->journal_mac) 881 return; 882 883 section_mac(ic, section, result); 884 885 for (j = 0; j < JOURNAL_BLOCK_SECTORS; j++) { 886 struct journal_sector *js = access_journal(ic, section, j); 887 888 if (likely(wr)) 889 memcpy(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR); 890 else { 891 if (memcmp(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) { 892 dm_integrity_io_error(ic, "journal mac", -EILSEQ); 893 dm_audit_log_target(DM_MSG_PREFIX, "mac-journal", ic->ti, 0); 894 } 895 } 896 } 897 } 898 899 static void complete_journal_op(void *context) 900 { 901 struct journal_completion *comp = context; 902 903 BUG_ON(!atomic_read(&comp->in_flight)); 904 if (likely(atomic_dec_and_test(&comp->in_flight))) 905 complete(&comp->comp); 906 } 907 908 static void xor_journal(struct dm_integrity_c *ic, bool encrypt, unsigned int section, 909 unsigned int n_sections, struct journal_completion *comp) 910 { 911 struct async_submit_ctl submit; 912 size_t n_bytes = (size_t)(n_sections * ic->journal_section_sectors) << SECTOR_SHIFT; 913 unsigned int pl_index, pl_offset, section_index; 914 struct page_list *source_pl, *target_pl; 915 916 if (likely(encrypt)) { 917 source_pl = ic->journal; 918 target_pl = ic->journal_io; 919 } else { 920 source_pl = ic->journal_io; 921 target_pl = ic->journal; 922 } 923 924 page_list_location(ic, section, 0, &pl_index, &pl_offset); 925 926 atomic_add(roundup(pl_offset + n_bytes, PAGE_SIZE) >> PAGE_SHIFT, &comp->in_flight); 927 928 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, complete_journal_op, comp, NULL); 929 930 section_index = pl_index; 931 932 do { 933 size_t this_step; 934 struct page *src_pages[2]; 935 struct page *dst_page; 936 937 while (unlikely(pl_index == section_index)) { 938 unsigned int dummy; 939 940 if (likely(encrypt)) 941 rw_section_mac(ic, section, true); 942 section++; 943 n_sections--; 944 if (!n_sections) 945 break; 946 page_list_location(ic, section, 0, §ion_index, &dummy); 947 } 948 949 this_step = min(n_bytes, (size_t)PAGE_SIZE - pl_offset); 950 dst_page = target_pl[pl_index].page; 951 src_pages[0] = source_pl[pl_index].page; 952 src_pages[1] = ic->journal_xor[pl_index].page; 953 954 async_xor(dst_page, src_pages, pl_offset, 2, this_step, &submit); 955 956 pl_index++; 957 pl_offset = 0; 958 n_bytes -= this_step; 959 } while (n_bytes); 960 961 BUG_ON(n_sections); 962 963 async_tx_issue_pending_all(); 964 } 965 966 static void complete_journal_encrypt(void *data, int err) 967 { 968 struct journal_completion *comp = data; 969 970 if (unlikely(err)) { 971 if (likely(err == -EINPROGRESS)) { 972 complete(&comp->ic->crypto_backoff); 973 return; 974 } 975 dm_integrity_io_error(comp->ic, "asynchronous encrypt", err); 976 } 977 complete_journal_op(comp); 978 } 979 980 static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp) 981 { 982 int r; 983 984 skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 985 complete_journal_encrypt, comp); 986 if (likely(encrypt)) 987 r = crypto_skcipher_encrypt(req); 988 else 989 r = crypto_skcipher_decrypt(req); 990 if (likely(!r)) 991 return false; 992 if (likely(r == -EINPROGRESS)) 993 return true; 994 if (likely(r == -EBUSY)) { 995 wait_for_completion(&comp->ic->crypto_backoff); 996 reinit_completion(&comp->ic->crypto_backoff); 997 return true; 998 } 999 dm_integrity_io_error(comp->ic, "encrypt", r); 1000 return false; 1001 } 1002 1003 static void crypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned int section, 1004 unsigned int n_sections, struct journal_completion *comp) 1005 { 1006 struct scatterlist **source_sg; 1007 struct scatterlist **target_sg; 1008 1009 atomic_add(2, &comp->in_flight); 1010 1011 if (likely(encrypt)) { 1012 source_sg = ic->journal_scatterlist; 1013 target_sg = ic->journal_io_scatterlist; 1014 } else { 1015 source_sg = ic->journal_io_scatterlist; 1016 target_sg = ic->journal_scatterlist; 1017 } 1018 1019 do { 1020 struct skcipher_request *req; 1021 unsigned int ivsize; 1022 char *iv; 1023 1024 if (likely(encrypt)) 1025 rw_section_mac(ic, section, true); 1026 1027 req = ic->sk_requests[section]; 1028 ivsize = crypto_skcipher_ivsize(ic->journal_crypt); 1029 iv = req->iv; 1030 1031 memcpy(iv, iv + ivsize, ivsize); 1032 1033 req->src = source_sg[section]; 1034 req->dst = target_sg[section]; 1035 1036 if (unlikely(do_crypt(encrypt, req, comp))) 1037 atomic_inc(&comp->in_flight); 1038 1039 section++; 1040 n_sections--; 1041 } while (n_sections); 1042 1043 atomic_dec(&comp->in_flight); 1044 complete_journal_op(comp); 1045 } 1046 1047 static void encrypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned int section, 1048 unsigned int n_sections, struct journal_completion *comp) 1049 { 1050 if (ic->journal_xor) 1051 return xor_journal(ic, encrypt, section, n_sections, comp); 1052 else 1053 return crypt_journal(ic, encrypt, section, n_sections, comp); 1054 } 1055 1056 static void complete_journal_io(unsigned long error, void *context) 1057 { 1058 struct journal_completion *comp = context; 1059 1060 if (unlikely(error != 0)) 1061 dm_integrity_io_error(comp->ic, "writing journal", -EIO); 1062 complete_journal_op(comp); 1063 } 1064 1065 static void rw_journal_sectors(struct dm_integrity_c *ic, blk_opf_t opf, 1066 unsigned int sector, unsigned int n_sectors, 1067 struct journal_completion *comp) 1068 { 1069 struct dm_io_request io_req; 1070 struct dm_io_region io_loc; 1071 unsigned int pl_index, pl_offset; 1072 int r; 1073 1074 if (unlikely(dm_integrity_failed(ic))) { 1075 if (comp) 1076 complete_journal_io(-1UL, comp); 1077 return; 1078 } 1079 1080 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 1081 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 1082 1083 io_req.bi_opf = opf; 1084 io_req.mem.type = DM_IO_PAGE_LIST; 1085 if (ic->journal_io) 1086 io_req.mem.ptr.pl = &ic->journal_io[pl_index]; 1087 else 1088 io_req.mem.ptr.pl = &ic->journal[pl_index]; 1089 io_req.mem.offset = pl_offset; 1090 if (likely(comp != NULL)) { 1091 io_req.notify.fn = complete_journal_io; 1092 io_req.notify.context = comp; 1093 } else { 1094 io_req.notify.fn = NULL; 1095 } 1096 io_req.client = ic->io; 1097 io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev; 1098 io_loc.sector = ic->start + SB_SECTORS + sector; 1099 io_loc.count = n_sectors; 1100 1101 r = dm_io(&io_req, 1, &io_loc, NULL); 1102 if (unlikely(r)) { 1103 dm_integrity_io_error(ic, (opf & REQ_OP_MASK) == REQ_OP_READ ? 1104 "reading journal" : "writing journal", r); 1105 if (comp) { 1106 WARN_ONCE(1, "asynchronous dm_io failed: %d", r); 1107 complete_journal_io(-1UL, comp); 1108 } 1109 } 1110 } 1111 1112 static void rw_journal(struct dm_integrity_c *ic, blk_opf_t opf, 1113 unsigned int section, unsigned int n_sections, 1114 struct journal_completion *comp) 1115 { 1116 unsigned int sector, n_sectors; 1117 1118 sector = section * ic->journal_section_sectors; 1119 n_sectors = n_sections * ic->journal_section_sectors; 1120 1121 rw_journal_sectors(ic, opf, sector, n_sectors, comp); 1122 } 1123 1124 static void write_journal(struct dm_integrity_c *ic, unsigned int commit_start, unsigned int commit_sections) 1125 { 1126 struct journal_completion io_comp; 1127 struct journal_completion crypt_comp_1; 1128 struct journal_completion crypt_comp_2; 1129 unsigned int i; 1130 1131 io_comp.ic = ic; 1132 init_completion(&io_comp.comp); 1133 1134 if (commit_start + commit_sections <= ic->journal_sections) { 1135 io_comp.in_flight = (atomic_t)ATOMIC_INIT(1); 1136 if (ic->journal_io) { 1137 crypt_comp_1.ic = ic; 1138 init_completion(&crypt_comp_1.comp); 1139 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); 1140 encrypt_journal(ic, true, commit_start, commit_sections, &crypt_comp_1); 1141 wait_for_completion_io(&crypt_comp_1.comp); 1142 } else { 1143 for (i = 0; i < commit_sections; i++) 1144 rw_section_mac(ic, commit_start + i, true); 1145 } 1146 rw_journal(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, commit_start, 1147 commit_sections, &io_comp); 1148 } else { 1149 unsigned int to_end; 1150 1151 io_comp.in_flight = (atomic_t)ATOMIC_INIT(2); 1152 to_end = ic->journal_sections - commit_start; 1153 if (ic->journal_io) { 1154 crypt_comp_1.ic = ic; 1155 init_completion(&crypt_comp_1.comp); 1156 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); 1157 encrypt_journal(ic, true, commit_start, to_end, &crypt_comp_1); 1158 if (try_wait_for_completion(&crypt_comp_1.comp)) { 1159 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, 1160 commit_start, to_end, &io_comp); 1161 reinit_completion(&crypt_comp_1.comp); 1162 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); 1163 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_1); 1164 wait_for_completion_io(&crypt_comp_1.comp); 1165 } else { 1166 crypt_comp_2.ic = ic; 1167 init_completion(&crypt_comp_2.comp); 1168 crypt_comp_2.in_flight = (atomic_t)ATOMIC_INIT(0); 1169 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_2); 1170 wait_for_completion_io(&crypt_comp_1.comp); 1171 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp); 1172 wait_for_completion_io(&crypt_comp_2.comp); 1173 } 1174 } else { 1175 for (i = 0; i < to_end; i++) 1176 rw_section_mac(ic, commit_start + i, true); 1177 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp); 1178 for (i = 0; i < commit_sections - to_end; i++) 1179 rw_section_mac(ic, i, true); 1180 } 1181 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, 0, commit_sections - to_end, &io_comp); 1182 } 1183 1184 wait_for_completion_io(&io_comp.comp); 1185 } 1186 1187 static void copy_from_journal(struct dm_integrity_c *ic, unsigned int section, unsigned int offset, 1188 unsigned int n_sectors, sector_t target, io_notify_fn fn, void *data) 1189 { 1190 struct dm_io_request io_req; 1191 struct dm_io_region io_loc; 1192 int r; 1193 unsigned int sector, pl_index, pl_offset; 1194 1195 BUG_ON((target | n_sectors | offset) & (unsigned int)(ic->sectors_per_block - 1)); 1196 1197 if (unlikely(dm_integrity_failed(ic))) { 1198 fn(-1UL, data); 1199 return; 1200 } 1201 1202 sector = section * ic->journal_section_sectors + JOURNAL_BLOCK_SECTORS + offset; 1203 1204 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 1205 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 1206 1207 io_req.bi_opf = REQ_OP_WRITE; 1208 io_req.mem.type = DM_IO_PAGE_LIST; 1209 io_req.mem.ptr.pl = &ic->journal[pl_index]; 1210 io_req.mem.offset = pl_offset; 1211 io_req.notify.fn = fn; 1212 io_req.notify.context = data; 1213 io_req.client = ic->io; 1214 io_loc.bdev = ic->dev->bdev; 1215 io_loc.sector = target; 1216 io_loc.count = n_sectors; 1217 1218 r = dm_io(&io_req, 1, &io_loc, NULL); 1219 if (unlikely(r)) { 1220 WARN_ONCE(1, "asynchronous dm_io failed: %d", r); 1221 fn(-1UL, data); 1222 } 1223 } 1224 1225 static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2) 1226 { 1227 return range1->logical_sector < range2->logical_sector + range2->n_sectors && 1228 range1->logical_sector + range1->n_sectors > range2->logical_sector; 1229 } 1230 1231 static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting) 1232 { 1233 struct rb_node **n = &ic->in_progress.rb_node; 1234 struct rb_node *parent; 1235 1236 BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned int)(ic->sectors_per_block - 1)); 1237 1238 if (likely(check_waiting)) { 1239 struct dm_integrity_range *range; 1240 1241 list_for_each_entry(range, &ic->wait_list, wait_entry) { 1242 if (unlikely(ranges_overlap(range, new_range))) 1243 return false; 1244 } 1245 } 1246 1247 parent = NULL; 1248 1249 while (*n) { 1250 struct dm_integrity_range *range = container_of(*n, struct dm_integrity_range, node); 1251 1252 parent = *n; 1253 if (new_range->logical_sector + new_range->n_sectors <= range->logical_sector) 1254 n = &range->node.rb_left; 1255 else if (new_range->logical_sector >= range->logical_sector + range->n_sectors) 1256 n = &range->node.rb_right; 1257 else 1258 return false; 1259 } 1260 1261 rb_link_node(&new_range->node, parent, n); 1262 rb_insert_color(&new_range->node, &ic->in_progress); 1263 1264 return true; 1265 } 1266 1267 static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity_range *range) 1268 { 1269 rb_erase(&range->node, &ic->in_progress); 1270 while (unlikely(!list_empty(&ic->wait_list))) { 1271 struct dm_integrity_range *last_range = 1272 list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry); 1273 struct task_struct *last_range_task; 1274 1275 last_range_task = last_range->task; 1276 list_del(&last_range->wait_entry); 1277 if (!add_new_range(ic, last_range, false)) { 1278 last_range->task = last_range_task; 1279 list_add(&last_range->wait_entry, &ic->wait_list); 1280 break; 1281 } 1282 last_range->waiting = false; 1283 wake_up_process(last_range_task); 1284 } 1285 } 1286 1287 static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *range) 1288 { 1289 unsigned long flags; 1290 1291 spin_lock_irqsave(&ic->endio_wait.lock, flags); 1292 remove_range_unlocked(ic, range); 1293 spin_unlock_irqrestore(&ic->endio_wait.lock, flags); 1294 } 1295 1296 static void wait_and_add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range) 1297 { 1298 new_range->waiting = true; 1299 list_add_tail(&new_range->wait_entry, &ic->wait_list); 1300 new_range->task = current; 1301 do { 1302 __set_current_state(TASK_UNINTERRUPTIBLE); 1303 spin_unlock_irq(&ic->endio_wait.lock); 1304 io_schedule(); 1305 spin_lock_irq(&ic->endio_wait.lock); 1306 } while (unlikely(new_range->waiting)); 1307 } 1308 1309 static void add_new_range_and_wait(struct dm_integrity_c *ic, struct dm_integrity_range *new_range) 1310 { 1311 if (unlikely(!add_new_range(ic, new_range, true))) 1312 wait_and_add_new_range(ic, new_range); 1313 } 1314 1315 static void init_journal_node(struct journal_node *node) 1316 { 1317 RB_CLEAR_NODE(&node->node); 1318 node->sector = (sector_t)-1; 1319 } 1320 1321 static void add_journal_node(struct dm_integrity_c *ic, struct journal_node *node, sector_t sector) 1322 { 1323 struct rb_node **link; 1324 struct rb_node *parent; 1325 1326 node->sector = sector; 1327 BUG_ON(!RB_EMPTY_NODE(&node->node)); 1328 1329 link = &ic->journal_tree_root.rb_node; 1330 parent = NULL; 1331 1332 while (*link) { 1333 struct journal_node *j; 1334 1335 parent = *link; 1336 j = container_of(parent, struct journal_node, node); 1337 if (sector < j->sector) 1338 link = &j->node.rb_left; 1339 else 1340 link = &j->node.rb_right; 1341 } 1342 1343 rb_link_node(&node->node, parent, link); 1344 rb_insert_color(&node->node, &ic->journal_tree_root); 1345 } 1346 1347 static void remove_journal_node(struct dm_integrity_c *ic, struct journal_node *node) 1348 { 1349 BUG_ON(RB_EMPTY_NODE(&node->node)); 1350 rb_erase(&node->node, &ic->journal_tree_root); 1351 init_journal_node(node); 1352 } 1353 1354 #define NOT_FOUND (-1U) 1355 1356 static unsigned int find_journal_node(struct dm_integrity_c *ic, sector_t sector, sector_t *next_sector) 1357 { 1358 struct rb_node *n = ic->journal_tree_root.rb_node; 1359 unsigned int found = NOT_FOUND; 1360 1361 *next_sector = (sector_t)-1; 1362 while (n) { 1363 struct journal_node *j = container_of(n, struct journal_node, node); 1364 1365 if (sector == j->sector) 1366 found = j - ic->journal_tree; 1367 1368 if (sector < j->sector) { 1369 *next_sector = j->sector; 1370 n = j->node.rb_left; 1371 } else 1372 n = j->node.rb_right; 1373 } 1374 1375 return found; 1376 } 1377 1378 static bool test_journal_node(struct dm_integrity_c *ic, unsigned int pos, sector_t sector) 1379 { 1380 struct journal_node *node, *next_node; 1381 struct rb_node *next; 1382 1383 if (unlikely(pos >= ic->journal_entries)) 1384 return false; 1385 node = &ic->journal_tree[pos]; 1386 if (unlikely(RB_EMPTY_NODE(&node->node))) 1387 return false; 1388 if (unlikely(node->sector != sector)) 1389 return false; 1390 1391 next = rb_next(&node->node); 1392 if (unlikely(!next)) 1393 return true; 1394 1395 next_node = container_of(next, struct journal_node, node); 1396 return next_node->sector != sector; 1397 } 1398 1399 static bool find_newer_committed_node(struct dm_integrity_c *ic, struct journal_node *node) 1400 { 1401 struct rb_node *next; 1402 struct journal_node *next_node; 1403 unsigned int next_section; 1404 1405 BUG_ON(RB_EMPTY_NODE(&node->node)); 1406 1407 next = rb_next(&node->node); 1408 if (unlikely(!next)) 1409 return false; 1410 1411 next_node = container_of(next, struct journal_node, node); 1412 1413 if (next_node->sector != node->sector) 1414 return false; 1415 1416 next_section = (unsigned int)(next_node - ic->journal_tree) / ic->journal_section_entries; 1417 if (next_section >= ic->committed_section && 1418 next_section < ic->committed_section + ic->n_committed_sections) 1419 return true; 1420 if (next_section + ic->journal_sections < ic->committed_section + ic->n_committed_sections) 1421 return true; 1422 1423 return false; 1424 } 1425 1426 #define TAG_READ 0 1427 #define TAG_WRITE 1 1428 #define TAG_CMP 2 1429 1430 static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block, 1431 unsigned int *metadata_offset, unsigned int total_size, int op) 1432 { 1433 #define MAY_BE_FILLER 1 1434 #define MAY_BE_HASH 2 1435 unsigned int hash_offset = 0; 1436 unsigned int may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0); 1437 1438 do { 1439 unsigned char *data, *dp; 1440 struct dm_buffer *b; 1441 unsigned int to_copy; 1442 int r; 1443 1444 r = dm_integrity_failed(ic); 1445 if (unlikely(r)) 1446 return r; 1447 1448 data = dm_bufio_read(ic->bufio, *metadata_block, &b); 1449 if (IS_ERR(data)) 1450 return PTR_ERR(data); 1451 1452 to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size); 1453 dp = data + *metadata_offset; 1454 if (op == TAG_READ) { 1455 memcpy(tag, dp, to_copy); 1456 } else if (op == TAG_WRITE) { 1457 if (memcmp(dp, tag, to_copy)) { 1458 memcpy(dp, tag, to_copy); 1459 dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy); 1460 } 1461 } else { 1462 /* e.g.: op == TAG_CMP */ 1463 1464 if (likely(is_power_of_2(ic->tag_size))) { 1465 if (unlikely(memcmp(dp, tag, to_copy))) 1466 if (unlikely(!ic->discard) || 1467 unlikely(memchr_inv(dp, DISCARD_FILLER, to_copy) != NULL)) { 1468 goto thorough_test; 1469 } 1470 } else { 1471 unsigned int i, ts; 1472 thorough_test: 1473 ts = total_size; 1474 1475 for (i = 0; i < to_copy; i++, ts--) { 1476 if (unlikely(dp[i] != tag[i])) 1477 may_be &= ~MAY_BE_HASH; 1478 if (likely(dp[i] != DISCARD_FILLER)) 1479 may_be &= ~MAY_BE_FILLER; 1480 hash_offset++; 1481 if (unlikely(hash_offset == ic->tag_size)) { 1482 if (unlikely(!may_be)) { 1483 dm_bufio_release(b); 1484 return ts; 1485 } 1486 hash_offset = 0; 1487 may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0); 1488 } 1489 } 1490 } 1491 } 1492 dm_bufio_release(b); 1493 1494 tag += to_copy; 1495 *metadata_offset += to_copy; 1496 if (unlikely(*metadata_offset == 1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) { 1497 (*metadata_block)++; 1498 *metadata_offset = 0; 1499 } 1500 1501 if (unlikely(!is_power_of_2(ic->tag_size))) 1502 hash_offset = (hash_offset + to_copy) % ic->tag_size; 1503 1504 total_size -= to_copy; 1505 } while (unlikely(total_size)); 1506 1507 return 0; 1508 #undef MAY_BE_FILLER 1509 #undef MAY_BE_HASH 1510 } 1511 1512 struct flush_request { 1513 struct dm_io_request io_req; 1514 struct dm_io_region io_reg; 1515 struct dm_integrity_c *ic; 1516 struct completion comp; 1517 }; 1518 1519 static void flush_notify(unsigned long error, void *fr_) 1520 { 1521 struct flush_request *fr = fr_; 1522 1523 if (unlikely(error != 0)) 1524 dm_integrity_io_error(fr->ic, "flushing disk cache", -EIO); 1525 complete(&fr->comp); 1526 } 1527 1528 static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data) 1529 { 1530 int r; 1531 struct flush_request fr; 1532 1533 if (!ic->meta_dev) 1534 flush_data = false; 1535 if (flush_data) { 1536 fr.io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC, 1537 fr.io_req.mem.type = DM_IO_KMEM, 1538 fr.io_req.mem.ptr.addr = NULL, 1539 fr.io_req.notify.fn = flush_notify, 1540 fr.io_req.notify.context = &fr; 1541 fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio), 1542 fr.io_reg.bdev = ic->dev->bdev, 1543 fr.io_reg.sector = 0, 1544 fr.io_reg.count = 0, 1545 fr.ic = ic; 1546 init_completion(&fr.comp); 1547 r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL); 1548 BUG_ON(r); 1549 } 1550 1551 r = dm_bufio_write_dirty_buffers(ic->bufio); 1552 if (unlikely(r)) 1553 dm_integrity_io_error(ic, "writing tags", r); 1554 1555 if (flush_data) 1556 wait_for_completion(&fr.comp); 1557 } 1558 1559 static void sleep_on_endio_wait(struct dm_integrity_c *ic) 1560 { 1561 DECLARE_WAITQUEUE(wait, current); 1562 1563 __add_wait_queue(&ic->endio_wait, &wait); 1564 __set_current_state(TASK_UNINTERRUPTIBLE); 1565 spin_unlock_irq(&ic->endio_wait.lock); 1566 io_schedule(); 1567 spin_lock_irq(&ic->endio_wait.lock); 1568 __remove_wait_queue(&ic->endio_wait, &wait); 1569 } 1570 1571 static void autocommit_fn(struct timer_list *t) 1572 { 1573 struct dm_integrity_c *ic = from_timer(ic, t, autocommit_timer); 1574 1575 if (likely(!dm_integrity_failed(ic))) 1576 queue_work(ic->commit_wq, &ic->commit_work); 1577 } 1578 1579 static void schedule_autocommit(struct dm_integrity_c *ic) 1580 { 1581 if (!timer_pending(&ic->autocommit_timer)) 1582 mod_timer(&ic->autocommit_timer, jiffies + ic->autocommit_jiffies); 1583 } 1584 1585 static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio) 1586 { 1587 struct bio *bio; 1588 unsigned long flags; 1589 1590 spin_lock_irqsave(&ic->endio_wait.lock, flags); 1591 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1592 bio_list_add(&ic->flush_bio_list, bio); 1593 spin_unlock_irqrestore(&ic->endio_wait.lock, flags); 1594 1595 queue_work(ic->commit_wq, &ic->commit_work); 1596 } 1597 1598 static void do_endio(struct dm_integrity_c *ic, struct bio *bio) 1599 { 1600 int r; 1601 1602 r = dm_integrity_failed(ic); 1603 if (unlikely(r) && !bio->bi_status) 1604 bio->bi_status = errno_to_blk_status(r); 1605 if (unlikely(ic->synchronous_mode) && bio_op(bio) == REQ_OP_WRITE) { 1606 unsigned long flags; 1607 1608 spin_lock_irqsave(&ic->endio_wait.lock, flags); 1609 bio_list_add(&ic->synchronous_bios, bio); 1610 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0); 1611 spin_unlock_irqrestore(&ic->endio_wait.lock, flags); 1612 return; 1613 } 1614 bio_endio(bio); 1615 } 1616 1617 static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *dio) 1618 { 1619 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1620 1621 if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic))) 1622 submit_flush_bio(ic, dio); 1623 else 1624 do_endio(ic, bio); 1625 } 1626 1627 static void dec_in_flight(struct dm_integrity_io *dio) 1628 { 1629 if (atomic_dec_and_test(&dio->in_flight)) { 1630 struct dm_integrity_c *ic = dio->ic; 1631 struct bio *bio; 1632 1633 remove_range(ic, &dio->range); 1634 1635 if (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD)) 1636 schedule_autocommit(ic); 1637 1638 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1639 if (unlikely(dio->bi_status) && !bio->bi_status) 1640 bio->bi_status = dio->bi_status; 1641 if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) { 1642 dio->range.logical_sector += dio->range.n_sectors; 1643 bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT); 1644 INIT_WORK(&dio->work, integrity_bio_wait); 1645 queue_work(ic->offload_wq, &dio->work); 1646 return; 1647 } 1648 do_endio_flush(ic, dio); 1649 } 1650 } 1651 1652 static void integrity_end_io(struct bio *bio) 1653 { 1654 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 1655 1656 dm_bio_restore(&dio->bio_details, bio); 1657 if (bio->bi_integrity) 1658 bio->bi_opf |= REQ_INTEGRITY; 1659 1660 if (dio->completion) 1661 complete(dio->completion); 1662 1663 dec_in_flight(dio); 1664 } 1665 1666 static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector, 1667 const char *data, char *result) 1668 { 1669 __le64 sector_le = cpu_to_le64(sector); 1670 SHASH_DESC_ON_STACK(req, ic->internal_hash); 1671 int r; 1672 unsigned int digest_size; 1673 1674 req->tfm = ic->internal_hash; 1675 1676 r = crypto_shash_init(req); 1677 if (unlikely(r < 0)) { 1678 dm_integrity_io_error(ic, "crypto_shash_init", r); 1679 goto failed; 1680 } 1681 1682 if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) { 1683 r = crypto_shash_update(req, (__u8 *)&ic->sb->salt, SALT_SIZE); 1684 if (unlikely(r < 0)) { 1685 dm_integrity_io_error(ic, "crypto_shash_update", r); 1686 goto failed; 1687 } 1688 } 1689 1690 r = crypto_shash_update(req, (const __u8 *)§or_le, sizeof(sector_le)); 1691 if (unlikely(r < 0)) { 1692 dm_integrity_io_error(ic, "crypto_shash_update", r); 1693 goto failed; 1694 } 1695 1696 r = crypto_shash_update(req, data, ic->sectors_per_block << SECTOR_SHIFT); 1697 if (unlikely(r < 0)) { 1698 dm_integrity_io_error(ic, "crypto_shash_update", r); 1699 goto failed; 1700 } 1701 1702 r = crypto_shash_final(req, result); 1703 if (unlikely(r < 0)) { 1704 dm_integrity_io_error(ic, "crypto_shash_final", r); 1705 goto failed; 1706 } 1707 1708 digest_size = crypto_shash_digestsize(ic->internal_hash); 1709 if (unlikely(digest_size < ic->tag_size)) 1710 memset(result + digest_size, 0, ic->tag_size - digest_size); 1711 1712 return; 1713 1714 failed: 1715 /* this shouldn't happen anyway, the hash functions have no reason to fail */ 1716 get_random_bytes(result, ic->tag_size); 1717 } 1718 1719 static void integrity_metadata(struct work_struct *w) 1720 { 1721 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); 1722 struct dm_integrity_c *ic = dio->ic; 1723 1724 int r; 1725 1726 if (ic->internal_hash) { 1727 struct bvec_iter iter; 1728 struct bio_vec bv; 1729 unsigned int digest_size = crypto_shash_digestsize(ic->internal_hash); 1730 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1731 char *checksums; 1732 unsigned int extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0; 1733 char checksums_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; 1734 sector_t sector; 1735 unsigned int sectors_to_process; 1736 1737 if (unlikely(ic->mode == 'R')) 1738 goto skip_io; 1739 1740 if (likely(dio->op != REQ_OP_DISCARD)) 1741 checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space, 1742 GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN); 1743 else 1744 checksums = kmalloc(PAGE_SIZE, GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN); 1745 if (!checksums) { 1746 checksums = checksums_onstack; 1747 if (WARN_ON(extra_space && 1748 digest_size > sizeof(checksums_onstack))) { 1749 r = -EINVAL; 1750 goto error; 1751 } 1752 } 1753 1754 if (unlikely(dio->op == REQ_OP_DISCARD)) { 1755 unsigned int bi_size = dio->bio_details.bi_iter.bi_size; 1756 unsigned int max_size = likely(checksums != checksums_onstack) ? PAGE_SIZE : HASH_MAX_DIGESTSIZE; 1757 unsigned int max_blocks = max_size / ic->tag_size; 1758 1759 memset(checksums, DISCARD_FILLER, max_size); 1760 1761 while (bi_size) { 1762 unsigned int this_step_blocks = bi_size >> (SECTOR_SHIFT + ic->sb->log2_sectors_per_block); 1763 1764 this_step_blocks = min(this_step_blocks, max_blocks); 1765 r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, 1766 this_step_blocks * ic->tag_size, TAG_WRITE); 1767 if (unlikely(r)) { 1768 if (likely(checksums != checksums_onstack)) 1769 kfree(checksums); 1770 goto error; 1771 } 1772 1773 bi_size -= this_step_blocks << (SECTOR_SHIFT + ic->sb->log2_sectors_per_block); 1774 } 1775 1776 if (likely(checksums != checksums_onstack)) 1777 kfree(checksums); 1778 goto skip_io; 1779 } 1780 1781 sector = dio->range.logical_sector; 1782 sectors_to_process = dio->range.n_sectors; 1783 1784 __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { 1785 unsigned int pos; 1786 char *mem, *checksums_ptr; 1787 1788 again: 1789 mem = bvec_kmap_local(&bv); 1790 pos = 0; 1791 checksums_ptr = checksums; 1792 do { 1793 integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr); 1794 checksums_ptr += ic->tag_size; 1795 sectors_to_process -= ic->sectors_per_block; 1796 pos += ic->sectors_per_block << SECTOR_SHIFT; 1797 sector += ic->sectors_per_block; 1798 } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack); 1799 kunmap_local(mem); 1800 1801 r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, 1802 checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); 1803 if (unlikely(r)) { 1804 if (r > 0) { 1805 sector_t s; 1806 1807 s = sector - ((r + ic->tag_size - 1) / ic->tag_size); 1808 DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", 1809 bio->bi_bdev, s); 1810 r = -EILSEQ; 1811 atomic64_inc(&ic->number_of_mismatches); 1812 dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", 1813 bio, s, 0); 1814 } 1815 if (likely(checksums != checksums_onstack)) 1816 kfree(checksums); 1817 goto error; 1818 } 1819 1820 if (!sectors_to_process) 1821 break; 1822 1823 if (unlikely(pos < bv.bv_len)) { 1824 bv.bv_offset += pos; 1825 bv.bv_len -= pos; 1826 goto again; 1827 } 1828 } 1829 1830 if (likely(checksums != checksums_onstack)) 1831 kfree(checksums); 1832 } else { 1833 struct bio_integrity_payload *bip = dio->bio_details.bi_integrity; 1834 1835 if (bip) { 1836 struct bio_vec biv; 1837 struct bvec_iter iter; 1838 unsigned int data_to_process = dio->range.n_sectors; 1839 1840 sector_to_block(ic, data_to_process); 1841 data_to_process *= ic->tag_size; 1842 1843 bip_for_each_vec(biv, bip, iter) { 1844 unsigned char *tag; 1845 unsigned int this_len; 1846 1847 BUG_ON(PageHighMem(biv.bv_page)); 1848 tag = bvec_virt(&biv); 1849 this_len = min(biv.bv_len, data_to_process); 1850 r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset, 1851 this_len, dio->op == REQ_OP_READ ? TAG_READ : TAG_WRITE); 1852 if (unlikely(r)) 1853 goto error; 1854 data_to_process -= this_len; 1855 if (!data_to_process) 1856 break; 1857 } 1858 } 1859 } 1860 skip_io: 1861 dec_in_flight(dio); 1862 return; 1863 error: 1864 dio->bi_status = errno_to_blk_status(r); 1865 dec_in_flight(dio); 1866 } 1867 1868 static int dm_integrity_map(struct dm_target *ti, struct bio *bio) 1869 { 1870 struct dm_integrity_c *ic = ti->private; 1871 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 1872 struct bio_integrity_payload *bip; 1873 1874 sector_t area, offset; 1875 1876 dio->ic = ic; 1877 dio->bi_status = 0; 1878 dio->op = bio_op(bio); 1879 1880 if (unlikely(dio->op == REQ_OP_DISCARD)) { 1881 if (ti->max_io_len) { 1882 sector_t sec = dm_target_offset(ti, bio->bi_iter.bi_sector); 1883 unsigned int log2_max_io_len = __fls(ti->max_io_len); 1884 sector_t start_boundary = sec >> log2_max_io_len; 1885 sector_t end_boundary = (sec + bio_sectors(bio) - 1) >> log2_max_io_len; 1886 1887 if (start_boundary < end_boundary) { 1888 sector_t len = ti->max_io_len - (sec & (ti->max_io_len - 1)); 1889 1890 dm_accept_partial_bio(bio, len); 1891 } 1892 } 1893 } 1894 1895 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { 1896 submit_flush_bio(ic, dio); 1897 return DM_MAPIO_SUBMITTED; 1898 } 1899 1900 dio->range.logical_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); 1901 dio->fua = dio->op == REQ_OP_WRITE && bio->bi_opf & REQ_FUA; 1902 if (unlikely(dio->fua)) { 1903 /* 1904 * Don't pass down the FUA flag because we have to flush 1905 * disk cache anyway. 1906 */ 1907 bio->bi_opf &= ~REQ_FUA; 1908 } 1909 if (unlikely(dio->range.logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) { 1910 DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx", 1911 dio->range.logical_sector, bio_sectors(bio), 1912 ic->provided_data_sectors); 1913 return DM_MAPIO_KILL; 1914 } 1915 if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned int)(ic->sectors_per_block - 1))) { 1916 DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x", 1917 ic->sectors_per_block, 1918 dio->range.logical_sector, bio_sectors(bio)); 1919 return DM_MAPIO_KILL; 1920 } 1921 1922 if (ic->sectors_per_block > 1 && likely(dio->op != REQ_OP_DISCARD)) { 1923 struct bvec_iter iter; 1924 struct bio_vec bv; 1925 1926 bio_for_each_segment(bv, bio, iter) { 1927 if (unlikely(bv.bv_len & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) { 1928 DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary", 1929 bv.bv_offset, bv.bv_len, ic->sectors_per_block); 1930 return DM_MAPIO_KILL; 1931 } 1932 } 1933 } 1934 1935 bip = bio_integrity(bio); 1936 if (!ic->internal_hash) { 1937 if (bip) { 1938 unsigned int wanted_tag_size = bio_sectors(bio) >> ic->sb->log2_sectors_per_block; 1939 1940 if (ic->log2_tag_size >= 0) 1941 wanted_tag_size <<= ic->log2_tag_size; 1942 else 1943 wanted_tag_size *= ic->tag_size; 1944 if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) { 1945 DMERR("Invalid integrity data size %u, expected %u", 1946 bip->bip_iter.bi_size, wanted_tag_size); 1947 return DM_MAPIO_KILL; 1948 } 1949 } 1950 } else { 1951 if (unlikely(bip != NULL)) { 1952 DMERR("Unexpected integrity data when using internal hash"); 1953 return DM_MAPIO_KILL; 1954 } 1955 } 1956 1957 if (unlikely(ic->mode == 'R') && unlikely(dio->op != REQ_OP_READ)) 1958 return DM_MAPIO_KILL; 1959 1960 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); 1961 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset); 1962 bio->bi_iter.bi_sector = get_data_sector(ic, area, offset); 1963 1964 dm_integrity_map_continue(dio, true); 1965 return DM_MAPIO_SUBMITTED; 1966 } 1967 1968 static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio, 1969 unsigned int journal_section, unsigned int journal_entry) 1970 { 1971 struct dm_integrity_c *ic = dio->ic; 1972 sector_t logical_sector; 1973 unsigned int n_sectors; 1974 1975 logical_sector = dio->range.logical_sector; 1976 n_sectors = dio->range.n_sectors; 1977 do { 1978 struct bio_vec bv = bio_iovec(bio); 1979 char *mem; 1980 1981 if (unlikely(bv.bv_len >> SECTOR_SHIFT > n_sectors)) 1982 bv.bv_len = n_sectors << SECTOR_SHIFT; 1983 n_sectors -= bv.bv_len >> SECTOR_SHIFT; 1984 bio_advance_iter(bio, &bio->bi_iter, bv.bv_len); 1985 retry_kmap: 1986 mem = kmap_local_page(bv.bv_page); 1987 if (likely(dio->op == REQ_OP_WRITE)) 1988 flush_dcache_page(bv.bv_page); 1989 1990 do { 1991 struct journal_entry *je = access_journal_entry(ic, journal_section, journal_entry); 1992 1993 if (unlikely(dio->op == REQ_OP_READ)) { 1994 struct journal_sector *js; 1995 char *mem_ptr; 1996 unsigned int s; 1997 1998 if (unlikely(journal_entry_is_inprogress(je))) { 1999 flush_dcache_page(bv.bv_page); 2000 kunmap_local(mem); 2001 2002 __io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je)); 2003 goto retry_kmap; 2004 } 2005 smp_rmb(); 2006 BUG_ON(journal_entry_get_sector(je) != logical_sector); 2007 js = access_journal_data(ic, journal_section, journal_entry); 2008 mem_ptr = mem + bv.bv_offset; 2009 s = 0; 2010 do { 2011 memcpy(mem_ptr, js, JOURNAL_SECTOR_DATA); 2012 *(commit_id_t *)(mem_ptr + JOURNAL_SECTOR_DATA) = je->last_bytes[s]; 2013 js++; 2014 mem_ptr += 1 << SECTOR_SHIFT; 2015 } while (++s < ic->sectors_per_block); 2016 #ifdef INTERNAL_VERIFY 2017 if (ic->internal_hash) { 2018 char checksums_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; 2019 2020 integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack); 2021 if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) { 2022 DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx", 2023 logical_sector); 2024 dm_audit_log_bio(DM_MSG_PREFIX, "journal-checksum", 2025 bio, logical_sector, 0); 2026 } 2027 } 2028 #endif 2029 } 2030 2031 if (!ic->internal_hash) { 2032 struct bio_integrity_payload *bip = bio_integrity(bio); 2033 unsigned int tag_todo = ic->tag_size; 2034 char *tag_ptr = journal_entry_tag(ic, je); 2035 2036 if (bip) { 2037 do { 2038 struct bio_vec biv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); 2039 unsigned int tag_now = min(biv.bv_len, tag_todo); 2040 char *tag_addr; 2041 2042 BUG_ON(PageHighMem(biv.bv_page)); 2043 tag_addr = bvec_virt(&biv); 2044 if (likely(dio->op == REQ_OP_WRITE)) 2045 memcpy(tag_ptr, tag_addr, tag_now); 2046 else 2047 memcpy(tag_addr, tag_ptr, tag_now); 2048 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, tag_now); 2049 tag_ptr += tag_now; 2050 tag_todo -= tag_now; 2051 } while (unlikely(tag_todo)); 2052 } else if (likely(dio->op == REQ_OP_WRITE)) 2053 memset(tag_ptr, 0, tag_todo); 2054 } 2055 2056 if (likely(dio->op == REQ_OP_WRITE)) { 2057 struct journal_sector *js; 2058 unsigned int s; 2059 2060 js = access_journal_data(ic, journal_section, journal_entry); 2061 memcpy(js, mem + bv.bv_offset, ic->sectors_per_block << SECTOR_SHIFT); 2062 2063 s = 0; 2064 do { 2065 je->last_bytes[s] = js[s].commit_id; 2066 } while (++s < ic->sectors_per_block); 2067 2068 if (ic->internal_hash) { 2069 unsigned int digest_size = crypto_shash_digestsize(ic->internal_hash); 2070 2071 if (unlikely(digest_size > ic->tag_size)) { 2072 char checksums_onstack[HASH_MAX_DIGESTSIZE]; 2073 2074 integrity_sector_checksum(ic, logical_sector, (char *)js, checksums_onstack); 2075 memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size); 2076 } else 2077 integrity_sector_checksum(ic, logical_sector, (char *)js, journal_entry_tag(ic, je)); 2078 } 2079 2080 journal_entry_set_sector(je, logical_sector); 2081 } 2082 logical_sector += ic->sectors_per_block; 2083 2084 journal_entry++; 2085 if (unlikely(journal_entry == ic->journal_section_entries)) { 2086 journal_entry = 0; 2087 journal_section++; 2088 wraparound_section(ic, &journal_section); 2089 } 2090 2091 bv.bv_offset += ic->sectors_per_block << SECTOR_SHIFT; 2092 } while (bv.bv_len -= ic->sectors_per_block << SECTOR_SHIFT); 2093 2094 if (unlikely(dio->op == REQ_OP_READ)) 2095 flush_dcache_page(bv.bv_page); 2096 kunmap_local(mem); 2097 } while (n_sectors); 2098 2099 if (likely(dio->op == REQ_OP_WRITE)) { 2100 smp_mb(); 2101 if (unlikely(waitqueue_active(&ic->copy_to_journal_wait))) 2102 wake_up(&ic->copy_to_journal_wait); 2103 if (READ_ONCE(ic->free_sectors) <= ic->free_sectors_threshold) 2104 queue_work(ic->commit_wq, &ic->commit_work); 2105 else 2106 schedule_autocommit(ic); 2107 } else 2108 remove_range(ic, &dio->range); 2109 2110 if (unlikely(bio->bi_iter.bi_size)) { 2111 sector_t area, offset; 2112 2113 dio->range.logical_sector = logical_sector; 2114 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); 2115 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset); 2116 return true; 2117 } 2118 2119 return false; 2120 } 2121 2122 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map) 2123 { 2124 struct dm_integrity_c *ic = dio->ic; 2125 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 2126 unsigned int journal_section, journal_entry; 2127 unsigned int journal_read_pos; 2128 struct completion read_comp; 2129 bool discard_retried = false; 2130 bool need_sync_io = ic->internal_hash && dio->op == REQ_OP_READ; 2131 2132 if (unlikely(dio->op == REQ_OP_DISCARD) && ic->mode != 'D') 2133 need_sync_io = true; 2134 2135 if (need_sync_io && from_map) { 2136 INIT_WORK(&dio->work, integrity_bio_wait); 2137 queue_work(ic->offload_wq, &dio->work); 2138 return; 2139 } 2140 2141 lock_retry: 2142 spin_lock_irq(&ic->endio_wait.lock); 2143 retry: 2144 if (unlikely(dm_integrity_failed(ic))) { 2145 spin_unlock_irq(&ic->endio_wait.lock); 2146 do_endio(ic, bio); 2147 return; 2148 } 2149 dio->range.n_sectors = bio_sectors(bio); 2150 journal_read_pos = NOT_FOUND; 2151 if (ic->mode == 'J' && likely(dio->op != REQ_OP_DISCARD)) { 2152 if (dio->op == REQ_OP_WRITE) { 2153 unsigned int next_entry, i, pos; 2154 unsigned int ws, we, range_sectors; 2155 2156 dio->range.n_sectors = min(dio->range.n_sectors, 2157 (sector_t)ic->free_sectors << ic->sb->log2_sectors_per_block); 2158 if (unlikely(!dio->range.n_sectors)) { 2159 if (from_map) 2160 goto offload_to_thread; 2161 sleep_on_endio_wait(ic); 2162 goto retry; 2163 } 2164 range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block; 2165 ic->free_sectors -= range_sectors; 2166 journal_section = ic->free_section; 2167 journal_entry = ic->free_section_entry; 2168 2169 next_entry = ic->free_section_entry + range_sectors; 2170 ic->free_section_entry = next_entry % ic->journal_section_entries; 2171 ic->free_section += next_entry / ic->journal_section_entries; 2172 ic->n_uncommitted_sections += next_entry / ic->journal_section_entries; 2173 wraparound_section(ic, &ic->free_section); 2174 2175 pos = journal_section * ic->journal_section_entries + journal_entry; 2176 ws = journal_section; 2177 we = journal_entry; 2178 i = 0; 2179 do { 2180 struct journal_entry *je; 2181 2182 add_journal_node(ic, &ic->journal_tree[pos], dio->range.logical_sector + i); 2183 pos++; 2184 if (unlikely(pos >= ic->journal_entries)) 2185 pos = 0; 2186 2187 je = access_journal_entry(ic, ws, we); 2188 BUG_ON(!journal_entry_is_unused(je)); 2189 journal_entry_set_inprogress(je); 2190 we++; 2191 if (unlikely(we == ic->journal_section_entries)) { 2192 we = 0; 2193 ws++; 2194 wraparound_section(ic, &ws); 2195 } 2196 } while ((i += ic->sectors_per_block) < dio->range.n_sectors); 2197 2198 spin_unlock_irq(&ic->endio_wait.lock); 2199 goto journal_read_write; 2200 } else { 2201 sector_t next_sector; 2202 2203 journal_read_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector); 2204 if (likely(journal_read_pos == NOT_FOUND)) { 2205 if (unlikely(dio->range.n_sectors > next_sector - dio->range.logical_sector)) 2206 dio->range.n_sectors = next_sector - dio->range.logical_sector; 2207 } else { 2208 unsigned int i; 2209 unsigned int jp = journal_read_pos + 1; 2210 2211 for (i = ic->sectors_per_block; i < dio->range.n_sectors; i += ic->sectors_per_block, jp++) { 2212 if (!test_journal_node(ic, jp, dio->range.logical_sector + i)) 2213 break; 2214 } 2215 dio->range.n_sectors = i; 2216 } 2217 } 2218 } 2219 if (unlikely(!add_new_range(ic, &dio->range, true))) { 2220 /* 2221 * We must not sleep in the request routine because it could 2222 * stall bios on current->bio_list. 2223 * So, we offload the bio to a workqueue if we have to sleep. 2224 */ 2225 if (from_map) { 2226 offload_to_thread: 2227 spin_unlock_irq(&ic->endio_wait.lock); 2228 INIT_WORK(&dio->work, integrity_bio_wait); 2229 queue_work(ic->wait_wq, &dio->work); 2230 return; 2231 } 2232 if (journal_read_pos != NOT_FOUND) 2233 dio->range.n_sectors = ic->sectors_per_block; 2234 wait_and_add_new_range(ic, &dio->range); 2235 /* 2236 * wait_and_add_new_range drops the spinlock, so the journal 2237 * may have been changed arbitrarily. We need to recheck. 2238 * To simplify the code, we restrict I/O size to just one block. 2239 */ 2240 if (journal_read_pos != NOT_FOUND) { 2241 sector_t next_sector; 2242 unsigned int new_pos; 2243 2244 new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector); 2245 if (unlikely(new_pos != journal_read_pos)) { 2246 remove_range_unlocked(ic, &dio->range); 2247 goto retry; 2248 } 2249 } 2250 } 2251 if (ic->mode == 'J' && likely(dio->op == REQ_OP_DISCARD) && !discard_retried) { 2252 sector_t next_sector; 2253 unsigned int new_pos; 2254 2255 new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector); 2256 if (unlikely(new_pos != NOT_FOUND) || 2257 unlikely(next_sector < dio->range.logical_sector - dio->range.n_sectors)) { 2258 remove_range_unlocked(ic, &dio->range); 2259 spin_unlock_irq(&ic->endio_wait.lock); 2260 queue_work(ic->commit_wq, &ic->commit_work); 2261 flush_workqueue(ic->commit_wq); 2262 queue_work(ic->writer_wq, &ic->writer_work); 2263 flush_workqueue(ic->writer_wq); 2264 discard_retried = true; 2265 goto lock_retry; 2266 } 2267 } 2268 spin_unlock_irq(&ic->endio_wait.lock); 2269 2270 if (unlikely(journal_read_pos != NOT_FOUND)) { 2271 journal_section = journal_read_pos / ic->journal_section_entries; 2272 journal_entry = journal_read_pos % ic->journal_section_entries; 2273 goto journal_read_write; 2274 } 2275 2276 if (ic->mode == 'B' && (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD))) { 2277 if (!block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, 2278 dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) { 2279 struct bitmap_block_status *bbs; 2280 2281 bbs = sector_to_bitmap_block(ic, dio->range.logical_sector); 2282 spin_lock(&bbs->bio_queue_lock); 2283 bio_list_add(&bbs->bio_queue, bio); 2284 spin_unlock(&bbs->bio_queue_lock); 2285 queue_work(ic->writer_wq, &bbs->work); 2286 return; 2287 } 2288 } 2289 2290 dio->in_flight = (atomic_t)ATOMIC_INIT(2); 2291 2292 if (need_sync_io) { 2293 init_completion(&read_comp); 2294 dio->completion = &read_comp; 2295 } else 2296 dio->completion = NULL; 2297 2298 dm_bio_record(&dio->bio_details, bio); 2299 bio_set_dev(bio, ic->dev->bdev); 2300 bio->bi_integrity = NULL; 2301 bio->bi_opf &= ~REQ_INTEGRITY; 2302 bio->bi_end_io = integrity_end_io; 2303 bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT; 2304 2305 if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) { 2306 integrity_metadata(&dio->work); 2307 dm_integrity_flush_buffers(ic, false); 2308 2309 dio->in_flight = (atomic_t)ATOMIC_INIT(1); 2310 dio->completion = NULL; 2311 2312 submit_bio_noacct(bio); 2313 2314 return; 2315 } 2316 2317 submit_bio_noacct(bio); 2318 2319 if (need_sync_io) { 2320 wait_for_completion_io(&read_comp); 2321 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && 2322 dio->range.logical_sector + dio->range.n_sectors > le64_to_cpu(ic->sb->recalc_sector)) 2323 goto skip_check; 2324 if (ic->mode == 'B') { 2325 if (!block_bitmap_op(ic, ic->recalc_bitmap, dio->range.logical_sector, 2326 dio->range.n_sectors, BITMAP_OP_TEST_ALL_CLEAR)) 2327 goto skip_check; 2328 } 2329 2330 if (likely(!bio->bi_status)) 2331 integrity_metadata(&dio->work); 2332 else 2333 skip_check: 2334 dec_in_flight(dio); 2335 } else { 2336 INIT_WORK(&dio->work, integrity_metadata); 2337 queue_work(ic->metadata_wq, &dio->work); 2338 } 2339 2340 return; 2341 2342 journal_read_write: 2343 if (unlikely(__journal_read_write(dio, bio, journal_section, journal_entry))) 2344 goto lock_retry; 2345 2346 do_endio_flush(ic, dio); 2347 } 2348 2349 2350 static void integrity_bio_wait(struct work_struct *w) 2351 { 2352 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); 2353 2354 dm_integrity_map_continue(dio, false); 2355 } 2356 2357 static void pad_uncommitted(struct dm_integrity_c *ic) 2358 { 2359 if (ic->free_section_entry) { 2360 ic->free_sectors -= ic->journal_section_entries - ic->free_section_entry; 2361 ic->free_section_entry = 0; 2362 ic->free_section++; 2363 wraparound_section(ic, &ic->free_section); 2364 ic->n_uncommitted_sections++; 2365 } 2366 if (WARN_ON(ic->journal_sections * ic->journal_section_entries != 2367 (ic->n_uncommitted_sections + ic->n_committed_sections) * 2368 ic->journal_section_entries + ic->free_sectors)) { 2369 DMCRIT("journal_sections %u, journal_section_entries %u, " 2370 "n_uncommitted_sections %u, n_committed_sections %u, " 2371 "journal_section_entries %u, free_sectors %u", 2372 ic->journal_sections, ic->journal_section_entries, 2373 ic->n_uncommitted_sections, ic->n_committed_sections, 2374 ic->journal_section_entries, ic->free_sectors); 2375 } 2376 } 2377 2378 static void integrity_commit(struct work_struct *w) 2379 { 2380 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, commit_work); 2381 unsigned int commit_start, commit_sections; 2382 unsigned int i, j, n; 2383 struct bio *flushes; 2384 2385 del_timer(&ic->autocommit_timer); 2386 2387 spin_lock_irq(&ic->endio_wait.lock); 2388 flushes = bio_list_get(&ic->flush_bio_list); 2389 if (unlikely(ic->mode != 'J')) { 2390 spin_unlock_irq(&ic->endio_wait.lock); 2391 dm_integrity_flush_buffers(ic, true); 2392 goto release_flush_bios; 2393 } 2394 2395 pad_uncommitted(ic); 2396 commit_start = ic->uncommitted_section; 2397 commit_sections = ic->n_uncommitted_sections; 2398 spin_unlock_irq(&ic->endio_wait.lock); 2399 2400 if (!commit_sections) 2401 goto release_flush_bios; 2402 2403 ic->wrote_to_journal = true; 2404 2405 i = commit_start; 2406 for (n = 0; n < commit_sections; n++) { 2407 for (j = 0; j < ic->journal_section_entries; j++) { 2408 struct journal_entry *je; 2409 2410 je = access_journal_entry(ic, i, j); 2411 io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je)); 2412 } 2413 for (j = 0; j < ic->journal_section_sectors; j++) { 2414 struct journal_sector *js; 2415 2416 js = access_journal(ic, i, j); 2417 js->commit_id = dm_integrity_commit_id(ic, i, j, ic->commit_seq); 2418 } 2419 i++; 2420 if (unlikely(i >= ic->journal_sections)) 2421 ic->commit_seq = next_commit_seq(ic->commit_seq); 2422 wraparound_section(ic, &i); 2423 } 2424 smp_rmb(); 2425 2426 write_journal(ic, commit_start, commit_sections); 2427 2428 spin_lock_irq(&ic->endio_wait.lock); 2429 ic->uncommitted_section += commit_sections; 2430 wraparound_section(ic, &ic->uncommitted_section); 2431 ic->n_uncommitted_sections -= commit_sections; 2432 ic->n_committed_sections += commit_sections; 2433 spin_unlock_irq(&ic->endio_wait.lock); 2434 2435 if (READ_ONCE(ic->free_sectors) <= ic->free_sectors_threshold) 2436 queue_work(ic->writer_wq, &ic->writer_work); 2437 2438 release_flush_bios: 2439 while (flushes) { 2440 struct bio *next = flushes->bi_next; 2441 2442 flushes->bi_next = NULL; 2443 do_endio(ic, flushes); 2444 flushes = next; 2445 } 2446 } 2447 2448 static void complete_copy_from_journal(unsigned long error, void *context) 2449 { 2450 struct journal_io *io = context; 2451 struct journal_completion *comp = io->comp; 2452 struct dm_integrity_c *ic = comp->ic; 2453 2454 remove_range(ic, &io->range); 2455 mempool_free(io, &ic->journal_io_mempool); 2456 if (unlikely(error != 0)) 2457 dm_integrity_io_error(ic, "copying from journal", -EIO); 2458 complete_journal_op(comp); 2459 } 2460 2461 static void restore_last_bytes(struct dm_integrity_c *ic, struct journal_sector *js, 2462 struct journal_entry *je) 2463 { 2464 unsigned int s = 0; 2465 2466 do { 2467 js->commit_id = je->last_bytes[s]; 2468 js++; 2469 } while (++s < ic->sectors_per_block); 2470 } 2471 2472 static void do_journal_write(struct dm_integrity_c *ic, unsigned int write_start, 2473 unsigned int write_sections, bool from_replay) 2474 { 2475 unsigned int i, j, n; 2476 struct journal_completion comp; 2477 struct blk_plug plug; 2478 2479 blk_start_plug(&plug); 2480 2481 comp.ic = ic; 2482 comp.in_flight = (atomic_t)ATOMIC_INIT(1); 2483 init_completion(&comp.comp); 2484 2485 i = write_start; 2486 for (n = 0; n < write_sections; n++, i++, wraparound_section(ic, &i)) { 2487 #ifndef INTERNAL_VERIFY 2488 if (unlikely(from_replay)) 2489 #endif 2490 rw_section_mac(ic, i, false); 2491 for (j = 0; j < ic->journal_section_entries; j++) { 2492 struct journal_entry *je = access_journal_entry(ic, i, j); 2493 sector_t sec, area, offset; 2494 unsigned int k, l, next_loop; 2495 sector_t metadata_block; 2496 unsigned int metadata_offset; 2497 struct journal_io *io; 2498 2499 if (journal_entry_is_unused(je)) 2500 continue; 2501 BUG_ON(unlikely(journal_entry_is_inprogress(je)) && !from_replay); 2502 sec = journal_entry_get_sector(je); 2503 if (unlikely(from_replay)) { 2504 if (unlikely(sec & (unsigned int)(ic->sectors_per_block - 1))) { 2505 dm_integrity_io_error(ic, "invalid sector in journal", -EIO); 2506 sec &= ~(sector_t)(ic->sectors_per_block - 1); 2507 } 2508 if (unlikely(sec >= ic->provided_data_sectors)) { 2509 journal_entry_set_unused(je); 2510 continue; 2511 } 2512 } 2513 get_area_and_offset(ic, sec, &area, &offset); 2514 restore_last_bytes(ic, access_journal_data(ic, i, j), je); 2515 for (k = j + 1; k < ic->journal_section_entries; k++) { 2516 struct journal_entry *je2 = access_journal_entry(ic, i, k); 2517 sector_t sec2, area2, offset2; 2518 2519 if (journal_entry_is_unused(je2)) 2520 break; 2521 BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay); 2522 sec2 = journal_entry_get_sector(je2); 2523 if (unlikely(sec2 >= ic->provided_data_sectors)) 2524 break; 2525 get_area_and_offset(ic, sec2, &area2, &offset2); 2526 if (area2 != area || offset2 != offset + ((k - j) << ic->sb->log2_sectors_per_block)) 2527 break; 2528 restore_last_bytes(ic, access_journal_data(ic, i, k), je2); 2529 } 2530 next_loop = k - 1; 2531 2532 io = mempool_alloc(&ic->journal_io_mempool, GFP_NOIO); 2533 io->comp = ∁ 2534 io->range.logical_sector = sec; 2535 io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block; 2536 2537 spin_lock_irq(&ic->endio_wait.lock); 2538 add_new_range_and_wait(ic, &io->range); 2539 2540 if (likely(!from_replay)) { 2541 struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries]; 2542 2543 /* don't write if there is newer committed sector */ 2544 while (j < k && find_newer_committed_node(ic, §ion_node[j])) { 2545 struct journal_entry *je2 = access_journal_entry(ic, i, j); 2546 2547 journal_entry_set_unused(je2); 2548 remove_journal_node(ic, §ion_node[j]); 2549 j++; 2550 sec += ic->sectors_per_block; 2551 offset += ic->sectors_per_block; 2552 } 2553 while (j < k && find_newer_committed_node(ic, §ion_node[k - 1])) { 2554 struct journal_entry *je2 = access_journal_entry(ic, i, k - 1); 2555 2556 journal_entry_set_unused(je2); 2557 remove_journal_node(ic, §ion_node[k - 1]); 2558 k--; 2559 } 2560 if (j == k) { 2561 remove_range_unlocked(ic, &io->range); 2562 spin_unlock_irq(&ic->endio_wait.lock); 2563 mempool_free(io, &ic->journal_io_mempool); 2564 goto skip_io; 2565 } 2566 for (l = j; l < k; l++) 2567 remove_journal_node(ic, §ion_node[l]); 2568 } 2569 spin_unlock_irq(&ic->endio_wait.lock); 2570 2571 metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset); 2572 for (l = j; l < k; l++) { 2573 int r; 2574 struct journal_entry *je2 = access_journal_entry(ic, i, l); 2575 2576 if ( 2577 #ifndef INTERNAL_VERIFY 2578 unlikely(from_replay) && 2579 #endif 2580 ic->internal_hash) { 2581 char test_tag[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; 2582 2583 integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block), 2584 (char *)access_journal_data(ic, i, l), test_tag); 2585 if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) { 2586 dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ); 2587 dm_audit_log_target(DM_MSG_PREFIX, "integrity-replay-journal", ic->ti, 0); 2588 } 2589 } 2590 2591 journal_entry_set_unused(je2); 2592 r = dm_integrity_rw_tag(ic, journal_entry_tag(ic, je2), &metadata_block, &metadata_offset, 2593 ic->tag_size, TAG_WRITE); 2594 if (unlikely(r)) 2595 dm_integrity_io_error(ic, "reading tags", r); 2596 } 2597 2598 atomic_inc(&comp.in_flight); 2599 copy_from_journal(ic, i, j << ic->sb->log2_sectors_per_block, 2600 (k - j) << ic->sb->log2_sectors_per_block, 2601 get_data_sector(ic, area, offset), 2602 complete_copy_from_journal, io); 2603 skip_io: 2604 j = next_loop; 2605 } 2606 } 2607 2608 dm_bufio_write_dirty_buffers_async(ic->bufio); 2609 2610 blk_finish_plug(&plug); 2611 2612 complete_journal_op(&comp); 2613 wait_for_completion_io(&comp.comp); 2614 2615 dm_integrity_flush_buffers(ic, true); 2616 } 2617 2618 static void integrity_writer(struct work_struct *w) 2619 { 2620 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, writer_work); 2621 unsigned int write_start, write_sections; 2622 unsigned int prev_free_sectors; 2623 2624 spin_lock_irq(&ic->endio_wait.lock); 2625 write_start = ic->committed_section; 2626 write_sections = ic->n_committed_sections; 2627 spin_unlock_irq(&ic->endio_wait.lock); 2628 2629 if (!write_sections) 2630 return; 2631 2632 do_journal_write(ic, write_start, write_sections, false); 2633 2634 spin_lock_irq(&ic->endio_wait.lock); 2635 2636 ic->committed_section += write_sections; 2637 wraparound_section(ic, &ic->committed_section); 2638 ic->n_committed_sections -= write_sections; 2639 2640 prev_free_sectors = ic->free_sectors; 2641 ic->free_sectors += write_sections * ic->journal_section_entries; 2642 if (unlikely(!prev_free_sectors)) 2643 wake_up_locked(&ic->endio_wait); 2644 2645 spin_unlock_irq(&ic->endio_wait.lock); 2646 } 2647 2648 static void recalc_write_super(struct dm_integrity_c *ic) 2649 { 2650 int r; 2651 2652 dm_integrity_flush_buffers(ic, false); 2653 if (dm_integrity_failed(ic)) 2654 return; 2655 2656 r = sync_rw_sb(ic, REQ_OP_WRITE); 2657 if (unlikely(r)) 2658 dm_integrity_io_error(ic, "writing superblock", r); 2659 } 2660 2661 static void integrity_recalc(struct work_struct *w) 2662 { 2663 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, recalc_work); 2664 struct dm_integrity_range range; 2665 struct dm_io_request io_req; 2666 struct dm_io_region io_loc; 2667 sector_t area, offset; 2668 sector_t metadata_block; 2669 unsigned int metadata_offset; 2670 sector_t logical_sector, n_sectors; 2671 __u8 *t; 2672 unsigned int i; 2673 int r; 2674 unsigned int super_counter = 0; 2675 2676 DEBUG_print("start recalculation... (position %llx)\n", le64_to_cpu(ic->sb->recalc_sector)); 2677 2678 spin_lock_irq(&ic->endio_wait.lock); 2679 2680 next_chunk: 2681 2682 if (unlikely(dm_post_suspending(ic->ti))) 2683 goto unlock_ret; 2684 2685 range.logical_sector = le64_to_cpu(ic->sb->recalc_sector); 2686 if (unlikely(range.logical_sector >= ic->provided_data_sectors)) { 2687 if (ic->mode == 'B') { 2688 block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); 2689 DEBUG_print("queue_delayed_work: bitmap_flush_work\n"); 2690 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0); 2691 } 2692 goto unlock_ret; 2693 } 2694 2695 get_area_and_offset(ic, range.logical_sector, &area, &offset); 2696 range.n_sectors = min((sector_t)RECALC_SECTORS, ic->provided_data_sectors - range.logical_sector); 2697 if (!ic->meta_dev) 2698 range.n_sectors = min(range.n_sectors, ((sector_t)1U << ic->sb->log2_interleave_sectors) - (unsigned int)offset); 2699 2700 add_new_range_and_wait(ic, &range); 2701 spin_unlock_irq(&ic->endio_wait.lock); 2702 logical_sector = range.logical_sector; 2703 n_sectors = range.n_sectors; 2704 2705 if (ic->mode == 'B') { 2706 if (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, n_sectors, BITMAP_OP_TEST_ALL_CLEAR)) 2707 goto advance_and_next; 2708 2709 while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, 2710 ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) { 2711 logical_sector += ic->sectors_per_block; 2712 n_sectors -= ic->sectors_per_block; 2713 cond_resched(); 2714 } 2715 while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector + n_sectors - ic->sectors_per_block, 2716 ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) { 2717 n_sectors -= ic->sectors_per_block; 2718 cond_resched(); 2719 } 2720 get_area_and_offset(ic, logical_sector, &area, &offset); 2721 } 2722 2723 DEBUG_print("recalculating: %llx, %llx\n", logical_sector, n_sectors); 2724 2725 if (unlikely(++super_counter == RECALC_WRITE_SUPER)) { 2726 recalc_write_super(ic); 2727 if (ic->mode == 'B') 2728 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval); 2729 2730 super_counter = 0; 2731 } 2732 2733 if (unlikely(dm_integrity_failed(ic))) 2734 goto err; 2735 2736 io_req.bi_opf = REQ_OP_READ; 2737 io_req.mem.type = DM_IO_VMA; 2738 io_req.mem.ptr.addr = ic->recalc_buffer; 2739 io_req.notify.fn = NULL; 2740 io_req.client = ic->io; 2741 io_loc.bdev = ic->dev->bdev; 2742 io_loc.sector = get_data_sector(ic, area, offset); 2743 io_loc.count = n_sectors; 2744 2745 r = dm_io(&io_req, 1, &io_loc, NULL); 2746 if (unlikely(r)) { 2747 dm_integrity_io_error(ic, "reading data", r); 2748 goto err; 2749 } 2750 2751 t = ic->recalc_tags; 2752 for (i = 0; i < n_sectors; i += ic->sectors_per_block) { 2753 integrity_sector_checksum(ic, logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t); 2754 t += ic->tag_size; 2755 } 2756 2757 metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset); 2758 2759 r = dm_integrity_rw_tag(ic, ic->recalc_tags, &metadata_block, &metadata_offset, t - ic->recalc_tags, TAG_WRITE); 2760 if (unlikely(r)) { 2761 dm_integrity_io_error(ic, "writing tags", r); 2762 goto err; 2763 } 2764 2765 if (ic->mode == 'B') { 2766 sector_t start, end; 2767 2768 start = (range.logical_sector >> 2769 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)) << 2770 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 2771 end = ((range.logical_sector + range.n_sectors) >> 2772 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)) << 2773 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 2774 block_bitmap_op(ic, ic->recalc_bitmap, start, end - start, BITMAP_OP_CLEAR); 2775 } 2776 2777 advance_and_next: 2778 cond_resched(); 2779 2780 spin_lock_irq(&ic->endio_wait.lock); 2781 remove_range_unlocked(ic, &range); 2782 ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors); 2783 goto next_chunk; 2784 2785 err: 2786 remove_range(ic, &range); 2787 return; 2788 2789 unlock_ret: 2790 spin_unlock_irq(&ic->endio_wait.lock); 2791 2792 recalc_write_super(ic); 2793 } 2794 2795 static void bitmap_block_work(struct work_struct *w) 2796 { 2797 struct bitmap_block_status *bbs = container_of(w, struct bitmap_block_status, work); 2798 struct dm_integrity_c *ic = bbs->ic; 2799 struct bio *bio; 2800 struct bio_list bio_queue; 2801 struct bio_list waiting; 2802 2803 bio_list_init(&waiting); 2804 2805 spin_lock(&bbs->bio_queue_lock); 2806 bio_queue = bbs->bio_queue; 2807 bio_list_init(&bbs->bio_queue); 2808 spin_unlock(&bbs->bio_queue_lock); 2809 2810 while ((bio = bio_list_pop(&bio_queue))) { 2811 struct dm_integrity_io *dio; 2812 2813 dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 2814 2815 if (block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, 2816 dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) { 2817 remove_range(ic, &dio->range); 2818 INIT_WORK(&dio->work, integrity_bio_wait); 2819 queue_work(ic->offload_wq, &dio->work); 2820 } else { 2821 block_bitmap_op(ic, ic->journal, dio->range.logical_sector, 2822 dio->range.n_sectors, BITMAP_OP_SET); 2823 bio_list_add(&waiting, bio); 2824 } 2825 } 2826 2827 if (bio_list_empty(&waiting)) 2828 return; 2829 2830 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 2831 bbs->idx * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), 2832 BITMAP_BLOCK_SIZE >> SECTOR_SHIFT, NULL); 2833 2834 while ((bio = bio_list_pop(&waiting))) { 2835 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 2836 2837 block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, 2838 dio->range.n_sectors, BITMAP_OP_SET); 2839 2840 remove_range(ic, &dio->range); 2841 INIT_WORK(&dio->work, integrity_bio_wait); 2842 queue_work(ic->offload_wq, &dio->work); 2843 } 2844 2845 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval); 2846 } 2847 2848 static void bitmap_flush_work(struct work_struct *work) 2849 { 2850 struct dm_integrity_c *ic = container_of(work, struct dm_integrity_c, bitmap_flush_work.work); 2851 struct dm_integrity_range range; 2852 unsigned long limit; 2853 struct bio *bio; 2854 2855 dm_integrity_flush_buffers(ic, false); 2856 2857 range.logical_sector = 0; 2858 range.n_sectors = ic->provided_data_sectors; 2859 2860 spin_lock_irq(&ic->endio_wait.lock); 2861 add_new_range_and_wait(ic, &range); 2862 spin_unlock_irq(&ic->endio_wait.lock); 2863 2864 dm_integrity_flush_buffers(ic, true); 2865 2866 limit = ic->provided_data_sectors; 2867 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { 2868 limit = le64_to_cpu(ic->sb->recalc_sector) 2869 >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit) 2870 << (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 2871 } 2872 /*DEBUG_print("zeroing journal\n");*/ 2873 block_bitmap_op(ic, ic->journal, 0, limit, BITMAP_OP_CLEAR); 2874 block_bitmap_op(ic, ic->may_write_bitmap, 0, limit, BITMAP_OP_CLEAR); 2875 2876 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, 2877 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 2878 2879 spin_lock_irq(&ic->endio_wait.lock); 2880 remove_range_unlocked(ic, &range); 2881 while (unlikely((bio = bio_list_pop(&ic->synchronous_bios)) != NULL)) { 2882 bio_endio(bio); 2883 spin_unlock_irq(&ic->endio_wait.lock); 2884 spin_lock_irq(&ic->endio_wait.lock); 2885 } 2886 spin_unlock_irq(&ic->endio_wait.lock); 2887 } 2888 2889 2890 static void init_journal(struct dm_integrity_c *ic, unsigned int start_section, 2891 unsigned int n_sections, unsigned char commit_seq) 2892 { 2893 unsigned int i, j, n; 2894 2895 if (!n_sections) 2896 return; 2897 2898 for (n = 0; n < n_sections; n++) { 2899 i = start_section + n; 2900 wraparound_section(ic, &i); 2901 for (j = 0; j < ic->journal_section_sectors; j++) { 2902 struct journal_sector *js = access_journal(ic, i, j); 2903 2904 BUILD_BUG_ON(sizeof(js->sectors) != JOURNAL_SECTOR_DATA); 2905 memset(&js->sectors, 0, sizeof(js->sectors)); 2906 js->commit_id = dm_integrity_commit_id(ic, i, j, commit_seq); 2907 } 2908 for (j = 0; j < ic->journal_section_entries; j++) { 2909 struct journal_entry *je = access_journal_entry(ic, i, j); 2910 2911 journal_entry_set_unused(je); 2912 } 2913 } 2914 2915 write_journal(ic, start_section, n_sections); 2916 } 2917 2918 static int find_commit_seq(struct dm_integrity_c *ic, unsigned int i, unsigned int j, commit_id_t id) 2919 { 2920 unsigned char k; 2921 2922 for (k = 0; k < N_COMMIT_IDS; k++) { 2923 if (dm_integrity_commit_id(ic, i, j, k) == id) 2924 return k; 2925 } 2926 dm_integrity_io_error(ic, "journal commit id", -EIO); 2927 return -EIO; 2928 } 2929 2930 static void replay_journal(struct dm_integrity_c *ic) 2931 { 2932 unsigned int i, j; 2933 bool used_commit_ids[N_COMMIT_IDS]; 2934 unsigned int max_commit_id_sections[N_COMMIT_IDS]; 2935 unsigned int write_start, write_sections; 2936 unsigned int continue_section; 2937 bool journal_empty; 2938 unsigned char unused, last_used, want_commit_seq; 2939 2940 if (ic->mode == 'R') 2941 return; 2942 2943 if (ic->journal_uptodate) 2944 return; 2945 2946 last_used = 0; 2947 write_start = 0; 2948 2949 if (!ic->just_formatted) { 2950 DEBUG_print("reading journal\n"); 2951 rw_journal(ic, REQ_OP_READ, 0, ic->journal_sections, NULL); 2952 if (ic->journal_io) 2953 DEBUG_bytes(lowmem_page_address(ic->journal_io[0].page), 64, "read journal"); 2954 if (ic->journal_io) { 2955 struct journal_completion crypt_comp; 2956 2957 crypt_comp.ic = ic; 2958 init_completion(&crypt_comp.comp); 2959 crypt_comp.in_flight = (atomic_t)ATOMIC_INIT(0); 2960 encrypt_journal(ic, false, 0, ic->journal_sections, &crypt_comp); 2961 wait_for_completion(&crypt_comp.comp); 2962 } 2963 DEBUG_bytes(lowmem_page_address(ic->journal[0].page), 64, "decrypted journal"); 2964 } 2965 2966 if (dm_integrity_failed(ic)) 2967 goto clear_journal; 2968 2969 journal_empty = true; 2970 memset(used_commit_ids, 0, sizeof(used_commit_ids)); 2971 memset(max_commit_id_sections, 0, sizeof(max_commit_id_sections)); 2972 for (i = 0; i < ic->journal_sections; i++) { 2973 for (j = 0; j < ic->journal_section_sectors; j++) { 2974 int k; 2975 struct journal_sector *js = access_journal(ic, i, j); 2976 2977 k = find_commit_seq(ic, i, j, js->commit_id); 2978 if (k < 0) 2979 goto clear_journal; 2980 used_commit_ids[k] = true; 2981 max_commit_id_sections[k] = i; 2982 } 2983 if (journal_empty) { 2984 for (j = 0; j < ic->journal_section_entries; j++) { 2985 struct journal_entry *je = access_journal_entry(ic, i, j); 2986 2987 if (!journal_entry_is_unused(je)) { 2988 journal_empty = false; 2989 break; 2990 } 2991 } 2992 } 2993 } 2994 2995 if (!used_commit_ids[N_COMMIT_IDS - 1]) { 2996 unused = N_COMMIT_IDS - 1; 2997 while (unused && !used_commit_ids[unused - 1]) 2998 unused--; 2999 } else { 3000 for (unused = 0; unused < N_COMMIT_IDS; unused++) 3001 if (!used_commit_ids[unused]) 3002 break; 3003 if (unused == N_COMMIT_IDS) { 3004 dm_integrity_io_error(ic, "journal commit ids", -EIO); 3005 goto clear_journal; 3006 } 3007 } 3008 DEBUG_print("first unused commit seq %d [%d,%d,%d,%d]\n", 3009 unused, used_commit_ids[0], used_commit_ids[1], 3010 used_commit_ids[2], used_commit_ids[3]); 3011 3012 last_used = prev_commit_seq(unused); 3013 want_commit_seq = prev_commit_seq(last_used); 3014 3015 if (!used_commit_ids[want_commit_seq] && used_commit_ids[prev_commit_seq(want_commit_seq)]) 3016 journal_empty = true; 3017 3018 write_start = max_commit_id_sections[last_used] + 1; 3019 if (unlikely(write_start >= ic->journal_sections)) 3020 want_commit_seq = next_commit_seq(want_commit_seq); 3021 wraparound_section(ic, &write_start); 3022 3023 i = write_start; 3024 for (write_sections = 0; write_sections < ic->journal_sections; write_sections++) { 3025 for (j = 0; j < ic->journal_section_sectors; j++) { 3026 struct journal_sector *js = access_journal(ic, i, j); 3027 3028 if (js->commit_id != dm_integrity_commit_id(ic, i, j, want_commit_seq)) { 3029 /* 3030 * This could be caused by crash during writing. 3031 * We won't replay the inconsistent part of the 3032 * journal. 3033 */ 3034 DEBUG_print("commit id mismatch at position (%u, %u): %d != %d\n", 3035 i, j, find_commit_seq(ic, i, j, js->commit_id), want_commit_seq); 3036 goto brk; 3037 } 3038 } 3039 i++; 3040 if (unlikely(i >= ic->journal_sections)) 3041 want_commit_seq = next_commit_seq(want_commit_seq); 3042 wraparound_section(ic, &i); 3043 } 3044 brk: 3045 3046 if (!journal_empty) { 3047 DEBUG_print("replaying %u sections, starting at %u, commit seq %d\n", 3048 write_sections, write_start, want_commit_seq); 3049 do_journal_write(ic, write_start, write_sections, true); 3050 } 3051 3052 if (write_sections == ic->journal_sections && (ic->mode == 'J' || journal_empty)) { 3053 continue_section = write_start; 3054 ic->commit_seq = want_commit_seq; 3055 DEBUG_print("continuing from section %u, commit seq %d\n", write_start, ic->commit_seq); 3056 } else { 3057 unsigned int s; 3058 unsigned char erase_seq; 3059 3060 clear_journal: 3061 DEBUG_print("clearing journal\n"); 3062 3063 erase_seq = prev_commit_seq(prev_commit_seq(last_used)); 3064 s = write_start; 3065 init_journal(ic, s, 1, erase_seq); 3066 s++; 3067 wraparound_section(ic, &s); 3068 if (ic->journal_sections >= 2) { 3069 init_journal(ic, s, ic->journal_sections - 2, erase_seq); 3070 s += ic->journal_sections - 2; 3071 wraparound_section(ic, &s); 3072 init_journal(ic, s, 1, erase_seq); 3073 } 3074 3075 continue_section = 0; 3076 ic->commit_seq = next_commit_seq(erase_seq); 3077 } 3078 3079 ic->committed_section = continue_section; 3080 ic->n_committed_sections = 0; 3081 3082 ic->uncommitted_section = continue_section; 3083 ic->n_uncommitted_sections = 0; 3084 3085 ic->free_section = continue_section; 3086 ic->free_section_entry = 0; 3087 ic->free_sectors = ic->journal_entries; 3088 3089 ic->journal_tree_root = RB_ROOT; 3090 for (i = 0; i < ic->journal_entries; i++) 3091 init_journal_node(&ic->journal_tree[i]); 3092 } 3093 3094 static void dm_integrity_enter_synchronous_mode(struct dm_integrity_c *ic) 3095 { 3096 DEBUG_print("%s\n", __func__); 3097 3098 if (ic->mode == 'B') { 3099 ic->bitmap_flush_interval = msecs_to_jiffies(10) + 1; 3100 ic->synchronous_mode = 1; 3101 3102 cancel_delayed_work_sync(&ic->bitmap_flush_work); 3103 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0); 3104 flush_workqueue(ic->commit_wq); 3105 } 3106 } 3107 3108 static int dm_integrity_reboot(struct notifier_block *n, unsigned long code, void *x) 3109 { 3110 struct dm_integrity_c *ic = container_of(n, struct dm_integrity_c, reboot_notifier); 3111 3112 DEBUG_print("%s\n", __func__); 3113 3114 dm_integrity_enter_synchronous_mode(ic); 3115 3116 return NOTIFY_DONE; 3117 } 3118 3119 static void dm_integrity_postsuspend(struct dm_target *ti) 3120 { 3121 struct dm_integrity_c *ic = ti->private; 3122 int r; 3123 3124 WARN_ON(unregister_reboot_notifier(&ic->reboot_notifier)); 3125 3126 del_timer_sync(&ic->autocommit_timer); 3127 3128 if (ic->recalc_wq) 3129 drain_workqueue(ic->recalc_wq); 3130 3131 if (ic->mode == 'B') 3132 cancel_delayed_work_sync(&ic->bitmap_flush_work); 3133 3134 queue_work(ic->commit_wq, &ic->commit_work); 3135 drain_workqueue(ic->commit_wq); 3136 3137 if (ic->mode == 'J') { 3138 queue_work(ic->writer_wq, &ic->writer_work); 3139 drain_workqueue(ic->writer_wq); 3140 dm_integrity_flush_buffers(ic, true); 3141 if (ic->wrote_to_journal) { 3142 init_journal(ic, ic->free_section, 3143 ic->journal_sections - ic->free_section, ic->commit_seq); 3144 if (ic->free_section) { 3145 init_journal(ic, 0, ic->free_section, 3146 next_commit_seq(ic->commit_seq)); 3147 } 3148 } 3149 } 3150 3151 if (ic->mode == 'B') { 3152 dm_integrity_flush_buffers(ic, true); 3153 #if 1 3154 /* set to 0 to test bitmap replay code */ 3155 init_journal(ic, 0, ic->journal_sections, 0); 3156 ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP); 3157 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); 3158 if (unlikely(r)) 3159 dm_integrity_io_error(ic, "writing superblock", r); 3160 #endif 3161 } 3162 3163 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); 3164 3165 ic->journal_uptodate = true; 3166 } 3167 3168 static void dm_integrity_resume(struct dm_target *ti) 3169 { 3170 struct dm_integrity_c *ic = ti->private; 3171 __u64 old_provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors); 3172 int r; 3173 3174 DEBUG_print("resume\n"); 3175 3176 ic->wrote_to_journal = false; 3177 3178 if (ic->provided_data_sectors != old_provided_data_sectors) { 3179 if (ic->provided_data_sectors > old_provided_data_sectors && 3180 ic->mode == 'B' && 3181 ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) { 3182 rw_journal_sectors(ic, REQ_OP_READ, 0, 3183 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 3184 block_bitmap_op(ic, ic->journal, old_provided_data_sectors, 3185 ic->provided_data_sectors - old_provided_data_sectors, BITMAP_OP_SET); 3186 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, 3187 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 3188 } 3189 3190 ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors); 3191 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); 3192 if (unlikely(r)) 3193 dm_integrity_io_error(ic, "writing superblock", r); 3194 } 3195 3196 if (ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) { 3197 DEBUG_print("resume dirty_bitmap\n"); 3198 rw_journal_sectors(ic, REQ_OP_READ, 0, 3199 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 3200 if (ic->mode == 'B') { 3201 if (ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit && 3202 !ic->reset_recalculate_flag) { 3203 block_bitmap_copy(ic, ic->recalc_bitmap, ic->journal); 3204 block_bitmap_copy(ic, ic->may_write_bitmap, ic->journal); 3205 if (!block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, 3206 BITMAP_OP_TEST_ALL_CLEAR)) { 3207 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); 3208 ic->sb->recalc_sector = cpu_to_le64(0); 3209 } 3210 } else { 3211 DEBUG_print("non-matching blocks_per_bitmap_bit: %u, %u\n", 3212 ic->sb->log2_blocks_per_bitmap_bit, ic->log2_blocks_per_bitmap_bit); 3213 ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit; 3214 block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET); 3215 block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET); 3216 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_SET); 3217 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, 3218 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 3219 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); 3220 ic->sb->recalc_sector = cpu_to_le64(0); 3221 } 3222 } else { 3223 if (!(ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit && 3224 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_TEST_ALL_CLEAR)) || 3225 ic->reset_recalculate_flag) { 3226 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); 3227 ic->sb->recalc_sector = cpu_to_le64(0); 3228 } 3229 init_journal(ic, 0, ic->journal_sections, 0); 3230 replay_journal(ic); 3231 ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP); 3232 } 3233 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); 3234 if (unlikely(r)) 3235 dm_integrity_io_error(ic, "writing superblock", r); 3236 } else { 3237 replay_journal(ic); 3238 if (ic->reset_recalculate_flag) { 3239 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); 3240 ic->sb->recalc_sector = cpu_to_le64(0); 3241 } 3242 if (ic->mode == 'B') { 3243 ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP); 3244 ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit; 3245 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); 3246 if (unlikely(r)) 3247 dm_integrity_io_error(ic, "writing superblock", r); 3248 3249 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); 3250 block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); 3251 block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); 3252 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && 3253 le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors) { 3254 block_bitmap_op(ic, ic->journal, le64_to_cpu(ic->sb->recalc_sector), 3255 ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); 3256 block_bitmap_op(ic, ic->recalc_bitmap, le64_to_cpu(ic->sb->recalc_sector), 3257 ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); 3258 block_bitmap_op(ic, ic->may_write_bitmap, le64_to_cpu(ic->sb->recalc_sector), 3259 ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); 3260 } 3261 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, 3262 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 3263 } 3264 } 3265 3266 DEBUG_print("testing recalc: %x\n", ic->sb->flags); 3267 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { 3268 __u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector); 3269 3270 DEBUG_print("recalc pos: %llx / %llx\n", recalc_pos, ic->provided_data_sectors); 3271 if (recalc_pos < ic->provided_data_sectors) { 3272 queue_work(ic->recalc_wq, &ic->recalc_work); 3273 } else if (recalc_pos > ic->provided_data_sectors) { 3274 ic->sb->recalc_sector = cpu_to_le64(ic->provided_data_sectors); 3275 recalc_write_super(ic); 3276 } 3277 } 3278 3279 ic->reboot_notifier.notifier_call = dm_integrity_reboot; 3280 ic->reboot_notifier.next = NULL; 3281 ic->reboot_notifier.priority = INT_MAX - 1; /* be notified after md and before hardware drivers */ 3282 WARN_ON(register_reboot_notifier(&ic->reboot_notifier)); 3283 3284 #if 0 3285 /* set to 1 to stress test synchronous mode */ 3286 dm_integrity_enter_synchronous_mode(ic); 3287 #endif 3288 } 3289 3290 static void dm_integrity_status(struct dm_target *ti, status_type_t type, 3291 unsigned int status_flags, char *result, unsigned int maxlen) 3292 { 3293 struct dm_integrity_c *ic = ti->private; 3294 unsigned int arg_count; 3295 size_t sz = 0; 3296 3297 switch (type) { 3298 case STATUSTYPE_INFO: 3299 DMEMIT("%llu %llu", 3300 (unsigned long long)atomic64_read(&ic->number_of_mismatches), 3301 ic->provided_data_sectors); 3302 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) 3303 DMEMIT(" %llu", le64_to_cpu(ic->sb->recalc_sector)); 3304 else 3305 DMEMIT(" -"); 3306 break; 3307 3308 case STATUSTYPE_TABLE: { 3309 __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100; 3310 3311 watermark_percentage += ic->journal_entries / 2; 3312 do_div(watermark_percentage, ic->journal_entries); 3313 arg_count = 3; 3314 arg_count += !!ic->meta_dev; 3315 arg_count += ic->sectors_per_block != 1; 3316 arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)); 3317 arg_count += ic->reset_recalculate_flag; 3318 arg_count += ic->discard; 3319 arg_count += ic->mode == 'J'; 3320 arg_count += ic->mode == 'J'; 3321 arg_count += ic->mode == 'B'; 3322 arg_count += ic->mode == 'B'; 3323 arg_count += !!ic->internal_hash_alg.alg_string; 3324 arg_count += !!ic->journal_crypt_alg.alg_string; 3325 arg_count += !!ic->journal_mac_alg.alg_string; 3326 arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0; 3327 arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0; 3328 arg_count += ic->legacy_recalculate; 3329 DMEMIT("%s %llu %u %c %u", ic->dev->name, ic->start, 3330 ic->tag_size, ic->mode, arg_count); 3331 if (ic->meta_dev) 3332 DMEMIT(" meta_device:%s", ic->meta_dev->name); 3333 if (ic->sectors_per_block != 1) 3334 DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT); 3335 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) 3336 DMEMIT(" recalculate"); 3337 if (ic->reset_recalculate_flag) 3338 DMEMIT(" reset_recalculate"); 3339 if (ic->discard) 3340 DMEMIT(" allow_discards"); 3341 DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); 3342 DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); 3343 DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); 3344 if (ic->mode == 'J') { 3345 DMEMIT(" journal_watermark:%u", (unsigned int)watermark_percentage); 3346 DMEMIT(" commit_time:%u", ic->autocommit_msec); 3347 } 3348 if (ic->mode == 'B') { 3349 DMEMIT(" sectors_per_bit:%llu", (sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit); 3350 DMEMIT(" bitmap_flush_interval:%u", jiffies_to_msecs(ic->bitmap_flush_interval)); 3351 } 3352 if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) 3353 DMEMIT(" fix_padding"); 3354 if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0) 3355 DMEMIT(" fix_hmac"); 3356 if (ic->legacy_recalculate) 3357 DMEMIT(" legacy_recalculate"); 3358 3359 #define EMIT_ALG(a, n) \ 3360 do { \ 3361 if (ic->a.alg_string) { \ 3362 DMEMIT(" %s:%s", n, ic->a.alg_string); \ 3363 if (ic->a.key_string) \ 3364 DMEMIT(":%s", ic->a.key_string);\ 3365 } \ 3366 } while (0) 3367 EMIT_ALG(internal_hash_alg, "internal_hash"); 3368 EMIT_ALG(journal_crypt_alg, "journal_crypt"); 3369 EMIT_ALG(journal_mac_alg, "journal_mac"); 3370 break; 3371 } 3372 case STATUSTYPE_IMA: 3373 DMEMIT_TARGET_NAME_VERSION(ti->type); 3374 DMEMIT(",dev_name=%s,start=%llu,tag_size=%u,mode=%c", 3375 ic->dev->name, ic->start, ic->tag_size, ic->mode); 3376 3377 if (ic->meta_dev) 3378 DMEMIT(",meta_device=%s", ic->meta_dev->name); 3379 if (ic->sectors_per_block != 1) 3380 DMEMIT(",block_size=%u", ic->sectors_per_block << SECTOR_SHIFT); 3381 3382 DMEMIT(",recalculate=%c", (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) ? 3383 'y' : 'n'); 3384 DMEMIT(",allow_discards=%c", ic->discard ? 'y' : 'n'); 3385 DMEMIT(",fix_padding=%c", 3386 ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) ? 'y' : 'n'); 3387 DMEMIT(",fix_hmac=%c", 3388 ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0) ? 'y' : 'n'); 3389 DMEMIT(",legacy_recalculate=%c", ic->legacy_recalculate ? 'y' : 'n'); 3390 3391 DMEMIT(",journal_sectors=%u", ic->initial_sectors - SB_SECTORS); 3392 DMEMIT(",interleave_sectors=%u", 1U << ic->sb->log2_interleave_sectors); 3393 DMEMIT(",buffer_sectors=%u", 1U << ic->log2_buffer_sectors); 3394 DMEMIT(";"); 3395 break; 3396 } 3397 } 3398 3399 static int dm_integrity_iterate_devices(struct dm_target *ti, 3400 iterate_devices_callout_fn fn, void *data) 3401 { 3402 struct dm_integrity_c *ic = ti->private; 3403 3404 if (!ic->meta_dev) 3405 return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data); 3406 else 3407 return fn(ti, ic->dev, 0, ti->len, data); 3408 } 3409 3410 static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits) 3411 { 3412 struct dm_integrity_c *ic = ti->private; 3413 3414 if (ic->sectors_per_block > 1) { 3415 limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT; 3416 limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT; 3417 blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT); 3418 limits->dma_alignment = limits->logical_block_size - 1; 3419 } 3420 } 3421 3422 static void calculate_journal_section_size(struct dm_integrity_c *ic) 3423 { 3424 unsigned int sector_space = JOURNAL_SECTOR_DATA; 3425 3426 ic->journal_sections = le32_to_cpu(ic->sb->journal_sections); 3427 ic->journal_entry_size = roundup(offsetof(struct journal_entry, last_bytes[ic->sectors_per_block]) + ic->tag_size, 3428 JOURNAL_ENTRY_ROUNDUP); 3429 3430 if (ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) 3431 sector_space -= JOURNAL_MAC_PER_SECTOR; 3432 ic->journal_entries_per_sector = sector_space / ic->journal_entry_size; 3433 ic->journal_section_entries = ic->journal_entries_per_sector * JOURNAL_BLOCK_SECTORS; 3434 ic->journal_section_sectors = (ic->journal_section_entries << ic->sb->log2_sectors_per_block) + JOURNAL_BLOCK_SECTORS; 3435 ic->journal_entries = ic->journal_section_entries * ic->journal_sections; 3436 } 3437 3438 static int calculate_device_limits(struct dm_integrity_c *ic) 3439 { 3440 __u64 initial_sectors; 3441 3442 calculate_journal_section_size(ic); 3443 initial_sectors = SB_SECTORS + (__u64)ic->journal_section_sectors * ic->journal_sections; 3444 if (initial_sectors + METADATA_PADDING_SECTORS >= ic->meta_device_sectors || initial_sectors > UINT_MAX) 3445 return -EINVAL; 3446 ic->initial_sectors = initial_sectors; 3447 3448 if (!ic->meta_dev) { 3449 sector_t last_sector, last_area, last_offset; 3450 3451 /* we have to maintain excessive padding for compatibility with existing volumes */ 3452 __u64 metadata_run_padding = 3453 ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING) ? 3454 (__u64)(METADATA_PADDING_SECTORS << SECTOR_SHIFT) : 3455 (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS); 3456 3457 ic->metadata_run = round_up((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block), 3458 metadata_run_padding) >> SECTOR_SHIFT; 3459 if (!(ic->metadata_run & (ic->metadata_run - 1))) 3460 ic->log2_metadata_run = __ffs(ic->metadata_run); 3461 else 3462 ic->log2_metadata_run = -1; 3463 3464 get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset); 3465 last_sector = get_data_sector(ic, last_area, last_offset); 3466 if (last_sector < ic->start || last_sector >= ic->meta_device_sectors) 3467 return -EINVAL; 3468 } else { 3469 __u64 meta_size = (ic->provided_data_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size; 3470 3471 meta_size = (meta_size + ((1U << (ic->log2_buffer_sectors + SECTOR_SHIFT)) - 1)) 3472 >> (ic->log2_buffer_sectors + SECTOR_SHIFT); 3473 meta_size <<= ic->log2_buffer_sectors; 3474 if (ic->initial_sectors + meta_size < ic->initial_sectors || 3475 ic->initial_sectors + meta_size > ic->meta_device_sectors) 3476 return -EINVAL; 3477 ic->metadata_run = 1; 3478 ic->log2_metadata_run = 0; 3479 } 3480 3481 return 0; 3482 } 3483 3484 static void get_provided_data_sectors(struct dm_integrity_c *ic) 3485 { 3486 if (!ic->meta_dev) { 3487 int test_bit; 3488 3489 ic->provided_data_sectors = 0; 3490 for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) { 3491 __u64 prev_data_sectors = ic->provided_data_sectors; 3492 3493 ic->provided_data_sectors |= (sector_t)1 << test_bit; 3494 if (calculate_device_limits(ic)) 3495 ic->provided_data_sectors = prev_data_sectors; 3496 } 3497 } else { 3498 ic->provided_data_sectors = ic->data_device_sectors; 3499 ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1); 3500 } 3501 } 3502 3503 static int initialize_superblock(struct dm_integrity_c *ic, 3504 unsigned int journal_sectors, unsigned int interleave_sectors) 3505 { 3506 unsigned int journal_sections; 3507 int test_bit; 3508 3509 memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT); 3510 memcpy(ic->sb->magic, SB_MAGIC, 8); 3511 ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size); 3512 ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block); 3513 if (ic->journal_mac_alg.alg_string) 3514 ic->sb->flags |= cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC); 3515 3516 calculate_journal_section_size(ic); 3517 journal_sections = journal_sectors / ic->journal_section_sectors; 3518 if (!journal_sections) 3519 journal_sections = 1; 3520 3521 if (ic->fix_hmac && (ic->internal_hash_alg.alg_string || ic->journal_mac_alg.alg_string)) { 3522 ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_HMAC); 3523 get_random_bytes(ic->sb->salt, SALT_SIZE); 3524 } 3525 3526 if (!ic->meta_dev) { 3527 if (ic->fix_padding) 3528 ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_PADDING); 3529 ic->sb->journal_sections = cpu_to_le32(journal_sections); 3530 if (!interleave_sectors) 3531 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; 3532 ic->sb->log2_interleave_sectors = __fls(interleave_sectors); 3533 ic->sb->log2_interleave_sectors = max_t(__u8, MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); 3534 ic->sb->log2_interleave_sectors = min_t(__u8, MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); 3535 3536 get_provided_data_sectors(ic); 3537 if (!ic->provided_data_sectors) 3538 return -EINVAL; 3539 } else { 3540 ic->sb->log2_interleave_sectors = 0; 3541 3542 get_provided_data_sectors(ic); 3543 if (!ic->provided_data_sectors) 3544 return -EINVAL; 3545 3546 try_smaller_buffer: 3547 ic->sb->journal_sections = cpu_to_le32(0); 3548 for (test_bit = fls(journal_sections) - 1; test_bit >= 0; test_bit--) { 3549 __u32 prev_journal_sections = le32_to_cpu(ic->sb->journal_sections); 3550 __u32 test_journal_sections = prev_journal_sections | (1U << test_bit); 3551 3552 if (test_journal_sections > journal_sections) 3553 continue; 3554 ic->sb->journal_sections = cpu_to_le32(test_journal_sections); 3555 if (calculate_device_limits(ic)) 3556 ic->sb->journal_sections = cpu_to_le32(prev_journal_sections); 3557 3558 } 3559 if (!le32_to_cpu(ic->sb->journal_sections)) { 3560 if (ic->log2_buffer_sectors > 3) { 3561 ic->log2_buffer_sectors--; 3562 goto try_smaller_buffer; 3563 } 3564 return -EINVAL; 3565 } 3566 } 3567 3568 ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors); 3569 3570 sb_set_version(ic); 3571 3572 return 0; 3573 } 3574 3575 static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic) 3576 { 3577 struct gendisk *disk = dm_disk(dm_table_get_md(ti->table)); 3578 struct blk_integrity bi; 3579 3580 memset(&bi, 0, sizeof(bi)); 3581 bi.profile = &dm_integrity_profile; 3582 bi.tuple_size = ic->tag_size; 3583 bi.tag_size = bi.tuple_size; 3584 bi.interval_exp = ic->sb->log2_sectors_per_block + SECTOR_SHIFT; 3585 3586 blk_integrity_register(disk, &bi); 3587 blk_queue_max_integrity_segments(disk->queue, UINT_MAX); 3588 } 3589 3590 static void dm_integrity_free_page_list(struct page_list *pl) 3591 { 3592 unsigned int i; 3593 3594 if (!pl) 3595 return; 3596 for (i = 0; pl[i].page; i++) 3597 __free_page(pl[i].page); 3598 kvfree(pl); 3599 } 3600 3601 static struct page_list *dm_integrity_alloc_page_list(unsigned int n_pages) 3602 { 3603 struct page_list *pl; 3604 unsigned int i; 3605 3606 pl = kvmalloc_array(n_pages + 1, sizeof(struct page_list), GFP_KERNEL | __GFP_ZERO); 3607 if (!pl) 3608 return NULL; 3609 3610 for (i = 0; i < n_pages; i++) { 3611 pl[i].page = alloc_page(GFP_KERNEL); 3612 if (!pl[i].page) { 3613 dm_integrity_free_page_list(pl); 3614 return NULL; 3615 } 3616 if (i) 3617 pl[i - 1].next = &pl[i]; 3618 } 3619 pl[i].page = NULL; 3620 pl[i].next = NULL; 3621 3622 return pl; 3623 } 3624 3625 static void dm_integrity_free_journal_scatterlist(struct dm_integrity_c *ic, struct scatterlist **sl) 3626 { 3627 unsigned int i; 3628 3629 for (i = 0; i < ic->journal_sections; i++) 3630 kvfree(sl[i]); 3631 kvfree(sl); 3632 } 3633 3634 static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic, 3635 struct page_list *pl) 3636 { 3637 struct scatterlist **sl; 3638 unsigned int i; 3639 3640 sl = kvmalloc_array(ic->journal_sections, 3641 sizeof(struct scatterlist *), 3642 GFP_KERNEL | __GFP_ZERO); 3643 if (!sl) 3644 return NULL; 3645 3646 for (i = 0; i < ic->journal_sections; i++) { 3647 struct scatterlist *s; 3648 unsigned int start_index, start_offset; 3649 unsigned int end_index, end_offset; 3650 unsigned int n_pages; 3651 unsigned int idx; 3652 3653 page_list_location(ic, i, 0, &start_index, &start_offset); 3654 page_list_location(ic, i, ic->journal_section_sectors - 1, 3655 &end_index, &end_offset); 3656 3657 n_pages = (end_index - start_index + 1); 3658 3659 s = kvmalloc_array(n_pages, sizeof(struct scatterlist), 3660 GFP_KERNEL); 3661 if (!s) { 3662 dm_integrity_free_journal_scatterlist(ic, sl); 3663 return NULL; 3664 } 3665 3666 sg_init_table(s, n_pages); 3667 for (idx = start_index; idx <= end_index; idx++) { 3668 char *va = lowmem_page_address(pl[idx].page); 3669 unsigned int start = 0, end = PAGE_SIZE; 3670 3671 if (idx == start_index) 3672 start = start_offset; 3673 if (idx == end_index) 3674 end = end_offset + (1 << SECTOR_SHIFT); 3675 sg_set_buf(&s[idx - start_index], va + start, end - start); 3676 } 3677 3678 sl[i] = s; 3679 } 3680 3681 return sl; 3682 } 3683 3684 static void free_alg(struct alg_spec *a) 3685 { 3686 kfree_sensitive(a->alg_string); 3687 kfree_sensitive(a->key); 3688 memset(a, 0, sizeof(*a)); 3689 } 3690 3691 static int get_alg_and_key(const char *arg, struct alg_spec *a, char **error, char *error_inval) 3692 { 3693 char *k; 3694 3695 free_alg(a); 3696 3697 a->alg_string = kstrdup(strchr(arg, ':') + 1, GFP_KERNEL); 3698 if (!a->alg_string) 3699 goto nomem; 3700 3701 k = strchr(a->alg_string, ':'); 3702 if (k) { 3703 *k = 0; 3704 a->key_string = k + 1; 3705 if (strlen(a->key_string) & 1) 3706 goto inval; 3707 3708 a->key_size = strlen(a->key_string) / 2; 3709 a->key = kmalloc(a->key_size, GFP_KERNEL); 3710 if (!a->key) 3711 goto nomem; 3712 if (hex2bin(a->key, a->key_string, a->key_size)) 3713 goto inval; 3714 } 3715 3716 return 0; 3717 inval: 3718 *error = error_inval; 3719 return -EINVAL; 3720 nomem: 3721 *error = "Out of memory for an argument"; 3722 return -ENOMEM; 3723 } 3724 3725 static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error, 3726 char *error_alg, char *error_key) 3727 { 3728 int r; 3729 3730 if (a->alg_string) { 3731 *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY); 3732 if (IS_ERR(*hash)) { 3733 *error = error_alg; 3734 r = PTR_ERR(*hash); 3735 *hash = NULL; 3736 return r; 3737 } 3738 3739 if (a->key) { 3740 r = crypto_shash_setkey(*hash, a->key, a->key_size); 3741 if (r) { 3742 *error = error_key; 3743 return r; 3744 } 3745 } else if (crypto_shash_get_flags(*hash) & CRYPTO_TFM_NEED_KEY) { 3746 *error = error_key; 3747 return -ENOKEY; 3748 } 3749 } 3750 3751 return 0; 3752 } 3753 3754 static int create_journal(struct dm_integrity_c *ic, char **error) 3755 { 3756 int r = 0; 3757 unsigned int i; 3758 __u64 journal_pages, journal_desc_size, journal_tree_size; 3759 unsigned char *crypt_data = NULL, *crypt_iv = NULL; 3760 struct skcipher_request *req = NULL; 3761 3762 ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL); 3763 ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL); 3764 ic->commit_ids[2] = cpu_to_le64(0x3333333333333333ULL); 3765 ic->commit_ids[3] = cpu_to_le64(0x4444444444444444ULL); 3766 3767 journal_pages = roundup((__u64)ic->journal_sections * ic->journal_section_sectors, 3768 PAGE_SIZE >> SECTOR_SHIFT) >> (PAGE_SHIFT - SECTOR_SHIFT); 3769 journal_desc_size = journal_pages * sizeof(struct page_list); 3770 if (journal_pages >= totalram_pages() - totalhigh_pages() || journal_desc_size > ULONG_MAX) { 3771 *error = "Journal doesn't fit into memory"; 3772 r = -ENOMEM; 3773 goto bad; 3774 } 3775 ic->journal_pages = journal_pages; 3776 3777 ic->journal = dm_integrity_alloc_page_list(ic->journal_pages); 3778 if (!ic->journal) { 3779 *error = "Could not allocate memory for journal"; 3780 r = -ENOMEM; 3781 goto bad; 3782 } 3783 if (ic->journal_crypt_alg.alg_string) { 3784 unsigned int ivsize, blocksize; 3785 struct journal_completion comp; 3786 3787 comp.ic = ic; 3788 ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY); 3789 if (IS_ERR(ic->journal_crypt)) { 3790 *error = "Invalid journal cipher"; 3791 r = PTR_ERR(ic->journal_crypt); 3792 ic->journal_crypt = NULL; 3793 goto bad; 3794 } 3795 ivsize = crypto_skcipher_ivsize(ic->journal_crypt); 3796 blocksize = crypto_skcipher_blocksize(ic->journal_crypt); 3797 3798 if (ic->journal_crypt_alg.key) { 3799 r = crypto_skcipher_setkey(ic->journal_crypt, ic->journal_crypt_alg.key, 3800 ic->journal_crypt_alg.key_size); 3801 if (r) { 3802 *error = "Error setting encryption key"; 3803 goto bad; 3804 } 3805 } 3806 DEBUG_print("cipher %s, block size %u iv size %u\n", 3807 ic->journal_crypt_alg.alg_string, blocksize, ivsize); 3808 3809 ic->journal_io = dm_integrity_alloc_page_list(ic->journal_pages); 3810 if (!ic->journal_io) { 3811 *error = "Could not allocate memory for journal io"; 3812 r = -ENOMEM; 3813 goto bad; 3814 } 3815 3816 if (blocksize == 1) { 3817 struct scatterlist *sg; 3818 3819 req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); 3820 if (!req) { 3821 *error = "Could not allocate crypt request"; 3822 r = -ENOMEM; 3823 goto bad; 3824 } 3825 3826 crypt_iv = kzalloc(ivsize, GFP_KERNEL); 3827 if (!crypt_iv) { 3828 *error = "Could not allocate iv"; 3829 r = -ENOMEM; 3830 goto bad; 3831 } 3832 3833 ic->journal_xor = dm_integrity_alloc_page_list(ic->journal_pages); 3834 if (!ic->journal_xor) { 3835 *error = "Could not allocate memory for journal xor"; 3836 r = -ENOMEM; 3837 goto bad; 3838 } 3839 3840 sg = kvmalloc_array(ic->journal_pages + 1, 3841 sizeof(struct scatterlist), 3842 GFP_KERNEL); 3843 if (!sg) { 3844 *error = "Unable to allocate sg list"; 3845 r = -ENOMEM; 3846 goto bad; 3847 } 3848 sg_init_table(sg, ic->journal_pages + 1); 3849 for (i = 0; i < ic->journal_pages; i++) { 3850 char *va = lowmem_page_address(ic->journal_xor[i].page); 3851 3852 clear_page(va); 3853 sg_set_buf(&sg[i], va, PAGE_SIZE); 3854 } 3855 sg_set_buf(&sg[i], &ic->commit_ids, sizeof(ic->commit_ids)); 3856 3857 skcipher_request_set_crypt(req, sg, sg, 3858 PAGE_SIZE * ic->journal_pages + sizeof(ic->commit_ids), crypt_iv); 3859 init_completion(&comp.comp); 3860 comp.in_flight = (atomic_t)ATOMIC_INIT(1); 3861 if (do_crypt(true, req, &comp)) 3862 wait_for_completion(&comp.comp); 3863 kvfree(sg); 3864 r = dm_integrity_failed(ic); 3865 if (r) { 3866 *error = "Unable to encrypt journal"; 3867 goto bad; 3868 } 3869 DEBUG_bytes(lowmem_page_address(ic->journal_xor[0].page), 64, "xor data"); 3870 3871 crypto_free_skcipher(ic->journal_crypt); 3872 ic->journal_crypt = NULL; 3873 } else { 3874 unsigned int crypt_len = roundup(ivsize, blocksize); 3875 3876 req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); 3877 if (!req) { 3878 *error = "Could not allocate crypt request"; 3879 r = -ENOMEM; 3880 goto bad; 3881 } 3882 3883 crypt_iv = kmalloc(ivsize, GFP_KERNEL); 3884 if (!crypt_iv) { 3885 *error = "Could not allocate iv"; 3886 r = -ENOMEM; 3887 goto bad; 3888 } 3889 3890 crypt_data = kmalloc(crypt_len, GFP_KERNEL); 3891 if (!crypt_data) { 3892 *error = "Unable to allocate crypt data"; 3893 r = -ENOMEM; 3894 goto bad; 3895 } 3896 3897 ic->journal_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal); 3898 if (!ic->journal_scatterlist) { 3899 *error = "Unable to allocate sg list"; 3900 r = -ENOMEM; 3901 goto bad; 3902 } 3903 ic->journal_io_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal_io); 3904 if (!ic->journal_io_scatterlist) { 3905 *error = "Unable to allocate sg list"; 3906 r = -ENOMEM; 3907 goto bad; 3908 } 3909 ic->sk_requests = kvmalloc_array(ic->journal_sections, 3910 sizeof(struct skcipher_request *), 3911 GFP_KERNEL | __GFP_ZERO); 3912 if (!ic->sk_requests) { 3913 *error = "Unable to allocate sk requests"; 3914 r = -ENOMEM; 3915 goto bad; 3916 } 3917 for (i = 0; i < ic->journal_sections; i++) { 3918 struct scatterlist sg; 3919 struct skcipher_request *section_req; 3920 __le32 section_le = cpu_to_le32(i); 3921 3922 memset(crypt_iv, 0x00, ivsize); 3923 memset(crypt_data, 0x00, crypt_len); 3924 memcpy(crypt_data, §ion_le, min_t(size_t, crypt_len, sizeof(section_le))); 3925 3926 sg_init_one(&sg, crypt_data, crypt_len); 3927 skcipher_request_set_crypt(req, &sg, &sg, crypt_len, crypt_iv); 3928 init_completion(&comp.comp); 3929 comp.in_flight = (atomic_t)ATOMIC_INIT(1); 3930 if (do_crypt(true, req, &comp)) 3931 wait_for_completion(&comp.comp); 3932 3933 r = dm_integrity_failed(ic); 3934 if (r) { 3935 *error = "Unable to generate iv"; 3936 goto bad; 3937 } 3938 3939 section_req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); 3940 if (!section_req) { 3941 *error = "Unable to allocate crypt request"; 3942 r = -ENOMEM; 3943 goto bad; 3944 } 3945 section_req->iv = kmalloc_array(ivsize, 2, 3946 GFP_KERNEL); 3947 if (!section_req->iv) { 3948 skcipher_request_free(section_req); 3949 *error = "Unable to allocate iv"; 3950 r = -ENOMEM; 3951 goto bad; 3952 } 3953 memcpy(section_req->iv + ivsize, crypt_data, ivsize); 3954 section_req->cryptlen = (size_t)ic->journal_section_sectors << SECTOR_SHIFT; 3955 ic->sk_requests[i] = section_req; 3956 DEBUG_bytes(crypt_data, ivsize, "iv(%u)", i); 3957 } 3958 } 3959 } 3960 3961 for (i = 0; i < N_COMMIT_IDS; i++) { 3962 unsigned int j; 3963 3964 retest_commit_id: 3965 for (j = 0; j < i; j++) { 3966 if (ic->commit_ids[j] == ic->commit_ids[i]) { 3967 ic->commit_ids[i] = cpu_to_le64(le64_to_cpu(ic->commit_ids[i]) + 1); 3968 goto retest_commit_id; 3969 } 3970 } 3971 DEBUG_print("commit id %u: %016llx\n", i, ic->commit_ids[i]); 3972 } 3973 3974 journal_tree_size = (__u64)ic->journal_entries * sizeof(struct journal_node); 3975 if (journal_tree_size > ULONG_MAX) { 3976 *error = "Journal doesn't fit into memory"; 3977 r = -ENOMEM; 3978 goto bad; 3979 } 3980 ic->journal_tree = kvmalloc(journal_tree_size, GFP_KERNEL); 3981 if (!ic->journal_tree) { 3982 *error = "Could not allocate memory for journal tree"; 3983 r = -ENOMEM; 3984 } 3985 bad: 3986 kfree(crypt_data); 3987 kfree(crypt_iv); 3988 skcipher_request_free(req); 3989 3990 return r; 3991 } 3992 3993 /* 3994 * Construct a integrity mapping 3995 * 3996 * Arguments: 3997 * device 3998 * offset from the start of the device 3999 * tag size 4000 * D - direct writes, J - journal writes, B - bitmap mode, R - recovery mode 4001 * number of optional arguments 4002 * optional arguments: 4003 * journal_sectors 4004 * interleave_sectors 4005 * buffer_sectors 4006 * journal_watermark 4007 * commit_time 4008 * meta_device 4009 * block_size 4010 * sectors_per_bit 4011 * bitmap_flush_interval 4012 * internal_hash 4013 * journal_crypt 4014 * journal_mac 4015 * recalculate 4016 */ 4017 static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv) 4018 { 4019 struct dm_integrity_c *ic; 4020 char dummy; 4021 int r; 4022 unsigned int extra_args; 4023 struct dm_arg_set as; 4024 static const struct dm_arg _args[] = { 4025 {0, 18, "Invalid number of feature args"}, 4026 }; 4027 unsigned int journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; 4028 bool should_write_sb; 4029 __u64 threshold; 4030 unsigned long long start; 4031 __s8 log2_sectors_per_bitmap_bit = -1; 4032 __s8 log2_blocks_per_bitmap_bit; 4033 __u64 bits_in_journal; 4034 __u64 n_bitmap_bits; 4035 4036 #define DIRECT_ARGUMENTS 4 4037 4038 if (argc <= DIRECT_ARGUMENTS) { 4039 ti->error = "Invalid argument count"; 4040 return -EINVAL; 4041 } 4042 4043 ic = kzalloc(sizeof(struct dm_integrity_c), GFP_KERNEL); 4044 if (!ic) { 4045 ti->error = "Cannot allocate integrity context"; 4046 return -ENOMEM; 4047 } 4048 ti->private = ic; 4049 ti->per_io_data_size = sizeof(struct dm_integrity_io); 4050 ic->ti = ti; 4051 4052 ic->in_progress = RB_ROOT; 4053 INIT_LIST_HEAD(&ic->wait_list); 4054 init_waitqueue_head(&ic->endio_wait); 4055 bio_list_init(&ic->flush_bio_list); 4056 init_waitqueue_head(&ic->copy_to_journal_wait); 4057 init_completion(&ic->crypto_backoff); 4058 atomic64_set(&ic->number_of_mismatches, 0); 4059 ic->bitmap_flush_interval = BITMAP_FLUSH_INTERVAL; 4060 4061 r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev); 4062 if (r) { 4063 ti->error = "Device lookup failed"; 4064 goto bad; 4065 } 4066 4067 if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1 || start != (sector_t)start) { 4068 ti->error = "Invalid starting offset"; 4069 r = -EINVAL; 4070 goto bad; 4071 } 4072 ic->start = start; 4073 4074 if (strcmp(argv[2], "-")) { 4075 if (sscanf(argv[2], "%u%c", &ic->tag_size, &dummy) != 1 || !ic->tag_size) { 4076 ti->error = "Invalid tag size"; 4077 r = -EINVAL; 4078 goto bad; 4079 } 4080 } 4081 4082 if (!strcmp(argv[3], "J") || !strcmp(argv[3], "B") || 4083 !strcmp(argv[3], "D") || !strcmp(argv[3], "R")) { 4084 ic->mode = argv[3][0]; 4085 } else { 4086 ti->error = "Invalid mode (expecting J, B, D, R)"; 4087 r = -EINVAL; 4088 goto bad; 4089 } 4090 4091 journal_sectors = 0; 4092 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; 4093 buffer_sectors = DEFAULT_BUFFER_SECTORS; 4094 journal_watermark = DEFAULT_JOURNAL_WATERMARK; 4095 sync_msec = DEFAULT_SYNC_MSEC; 4096 ic->sectors_per_block = 1; 4097 4098 as.argc = argc - DIRECT_ARGUMENTS; 4099 as.argv = argv + DIRECT_ARGUMENTS; 4100 r = dm_read_arg_group(_args, &as, &extra_args, &ti->error); 4101 if (r) 4102 goto bad; 4103 4104 while (extra_args--) { 4105 const char *opt_string; 4106 unsigned int val; 4107 unsigned long long llval; 4108 4109 opt_string = dm_shift_arg(&as); 4110 if (!opt_string) { 4111 r = -EINVAL; 4112 ti->error = "Not enough feature arguments"; 4113 goto bad; 4114 } 4115 if (sscanf(opt_string, "journal_sectors:%u%c", &val, &dummy) == 1) 4116 journal_sectors = val ? val : 1; 4117 else if (sscanf(opt_string, "interleave_sectors:%u%c", &val, &dummy) == 1) 4118 interleave_sectors = val; 4119 else if (sscanf(opt_string, "buffer_sectors:%u%c", &val, &dummy) == 1) 4120 buffer_sectors = val; 4121 else if (sscanf(opt_string, "journal_watermark:%u%c", &val, &dummy) == 1 && val <= 100) 4122 journal_watermark = val; 4123 else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1) 4124 sync_msec = val; 4125 else if (!strncmp(opt_string, "meta_device:", strlen("meta_device:"))) { 4126 if (ic->meta_dev) { 4127 dm_put_device(ti, ic->meta_dev); 4128 ic->meta_dev = NULL; 4129 } 4130 r = dm_get_device(ti, strchr(opt_string, ':') + 1, 4131 dm_table_get_mode(ti->table), &ic->meta_dev); 4132 if (r) { 4133 ti->error = "Device lookup failed"; 4134 goto bad; 4135 } 4136 } else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) { 4137 if (val < 1 << SECTOR_SHIFT || 4138 val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT || 4139 (val & (val - 1))) { 4140 r = -EINVAL; 4141 ti->error = "Invalid block_size argument"; 4142 goto bad; 4143 } 4144 ic->sectors_per_block = val >> SECTOR_SHIFT; 4145 } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) { 4146 log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval); 4147 } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) { 4148 if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { 4149 r = -EINVAL; 4150 ti->error = "Invalid bitmap_flush_interval argument"; 4151 goto bad; 4152 } 4153 ic->bitmap_flush_interval = msecs_to_jiffies(val); 4154 } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { 4155 r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error, 4156 "Invalid internal_hash argument"); 4157 if (r) 4158 goto bad; 4159 } else if (!strncmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) { 4160 r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error, 4161 "Invalid journal_crypt argument"); 4162 if (r) 4163 goto bad; 4164 } else if (!strncmp(opt_string, "journal_mac:", strlen("journal_mac:"))) { 4165 r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error, 4166 "Invalid journal_mac argument"); 4167 if (r) 4168 goto bad; 4169 } else if (!strcmp(opt_string, "recalculate")) { 4170 ic->recalculate_flag = true; 4171 } else if (!strcmp(opt_string, "reset_recalculate")) { 4172 ic->recalculate_flag = true; 4173 ic->reset_recalculate_flag = true; 4174 } else if (!strcmp(opt_string, "allow_discards")) { 4175 ic->discard = true; 4176 } else if (!strcmp(opt_string, "fix_padding")) { 4177 ic->fix_padding = true; 4178 } else if (!strcmp(opt_string, "fix_hmac")) { 4179 ic->fix_hmac = true; 4180 } else if (!strcmp(opt_string, "legacy_recalculate")) { 4181 ic->legacy_recalculate = true; 4182 } else { 4183 r = -EINVAL; 4184 ti->error = "Invalid argument"; 4185 goto bad; 4186 } 4187 } 4188 4189 ic->data_device_sectors = bdev_nr_sectors(ic->dev->bdev); 4190 if (!ic->meta_dev) 4191 ic->meta_device_sectors = ic->data_device_sectors; 4192 else 4193 ic->meta_device_sectors = bdev_nr_sectors(ic->meta_dev->bdev); 4194 4195 if (!journal_sectors) { 4196 journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS, 4197 ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR); 4198 } 4199 4200 if (!buffer_sectors) 4201 buffer_sectors = 1; 4202 ic->log2_buffer_sectors = min((int)__fls(buffer_sectors), 31 - SECTOR_SHIFT); 4203 4204 r = get_mac(&ic->internal_hash, &ic->internal_hash_alg, &ti->error, 4205 "Invalid internal hash", "Error setting internal hash key"); 4206 if (r) 4207 goto bad; 4208 4209 r = get_mac(&ic->journal_mac, &ic->journal_mac_alg, &ti->error, 4210 "Invalid journal mac", "Error setting journal mac key"); 4211 if (r) 4212 goto bad; 4213 4214 if (!ic->tag_size) { 4215 if (!ic->internal_hash) { 4216 ti->error = "Unknown tag size"; 4217 r = -EINVAL; 4218 goto bad; 4219 } 4220 ic->tag_size = crypto_shash_digestsize(ic->internal_hash); 4221 } 4222 if (ic->tag_size > MAX_TAG_SIZE) { 4223 ti->error = "Too big tag size"; 4224 r = -EINVAL; 4225 goto bad; 4226 } 4227 if (!(ic->tag_size & (ic->tag_size - 1))) 4228 ic->log2_tag_size = __ffs(ic->tag_size); 4229 else 4230 ic->log2_tag_size = -1; 4231 4232 if (ic->mode == 'B' && !ic->internal_hash) { 4233 r = -EINVAL; 4234 ti->error = "Bitmap mode can be only used with internal hash"; 4235 goto bad; 4236 } 4237 4238 if (ic->discard && !ic->internal_hash) { 4239 r = -EINVAL; 4240 ti->error = "Discard can be only used with internal hash"; 4241 goto bad; 4242 } 4243 4244 ic->autocommit_jiffies = msecs_to_jiffies(sync_msec); 4245 ic->autocommit_msec = sync_msec; 4246 timer_setup(&ic->autocommit_timer, autocommit_fn, 0); 4247 4248 ic->io = dm_io_client_create(); 4249 if (IS_ERR(ic->io)) { 4250 r = PTR_ERR(ic->io); 4251 ic->io = NULL; 4252 ti->error = "Cannot allocate dm io"; 4253 goto bad; 4254 } 4255 4256 r = mempool_init_slab_pool(&ic->journal_io_mempool, JOURNAL_IO_MEMPOOL, journal_io_cache); 4257 if (r) { 4258 ti->error = "Cannot allocate mempool"; 4259 goto bad; 4260 } 4261 4262 ic->metadata_wq = alloc_workqueue("dm-integrity-metadata", 4263 WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE); 4264 if (!ic->metadata_wq) { 4265 ti->error = "Cannot allocate workqueue"; 4266 r = -ENOMEM; 4267 goto bad; 4268 } 4269 4270 /* 4271 * If this workqueue were percpu, it would cause bio reordering 4272 * and reduced performance. 4273 */ 4274 ic->wait_wq = alloc_workqueue("dm-integrity-wait", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); 4275 if (!ic->wait_wq) { 4276 ti->error = "Cannot allocate workqueue"; 4277 r = -ENOMEM; 4278 goto bad; 4279 } 4280 4281 ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM, 4282 METADATA_WORKQUEUE_MAX_ACTIVE); 4283 if (!ic->offload_wq) { 4284 ti->error = "Cannot allocate workqueue"; 4285 r = -ENOMEM; 4286 goto bad; 4287 } 4288 4289 ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1); 4290 if (!ic->commit_wq) { 4291 ti->error = "Cannot allocate workqueue"; 4292 r = -ENOMEM; 4293 goto bad; 4294 } 4295 INIT_WORK(&ic->commit_work, integrity_commit); 4296 4297 if (ic->mode == 'J' || ic->mode == 'B') { 4298 ic->writer_wq = alloc_workqueue("dm-integrity-writer", WQ_MEM_RECLAIM, 1); 4299 if (!ic->writer_wq) { 4300 ti->error = "Cannot allocate workqueue"; 4301 r = -ENOMEM; 4302 goto bad; 4303 } 4304 INIT_WORK(&ic->writer_work, integrity_writer); 4305 } 4306 4307 ic->sb = alloc_pages_exact(SB_SECTORS << SECTOR_SHIFT, GFP_KERNEL); 4308 if (!ic->sb) { 4309 r = -ENOMEM; 4310 ti->error = "Cannot allocate superblock area"; 4311 goto bad; 4312 } 4313 4314 r = sync_rw_sb(ic, REQ_OP_READ); 4315 if (r) { 4316 ti->error = "Error reading superblock"; 4317 goto bad; 4318 } 4319 should_write_sb = false; 4320 if (memcmp(ic->sb->magic, SB_MAGIC, 8)) { 4321 if (ic->mode != 'R') { 4322 if (memchr_inv(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT)) { 4323 r = -EINVAL; 4324 ti->error = "The device is not initialized"; 4325 goto bad; 4326 } 4327 } 4328 4329 r = initialize_superblock(ic, journal_sectors, interleave_sectors); 4330 if (r) { 4331 ti->error = "Could not initialize superblock"; 4332 goto bad; 4333 } 4334 if (ic->mode != 'R') 4335 should_write_sb = true; 4336 } 4337 4338 if (!ic->sb->version || ic->sb->version > SB_VERSION_5) { 4339 r = -EINVAL; 4340 ti->error = "Unknown version"; 4341 goto bad; 4342 } 4343 if (le16_to_cpu(ic->sb->integrity_tag_size) != ic->tag_size) { 4344 r = -EINVAL; 4345 ti->error = "Tag size doesn't match the information in superblock"; 4346 goto bad; 4347 } 4348 if (ic->sb->log2_sectors_per_block != __ffs(ic->sectors_per_block)) { 4349 r = -EINVAL; 4350 ti->error = "Block size doesn't match the information in superblock"; 4351 goto bad; 4352 } 4353 if (!le32_to_cpu(ic->sb->journal_sections)) { 4354 r = -EINVAL; 4355 ti->error = "Corrupted superblock, journal_sections is 0"; 4356 goto bad; 4357 } 4358 /* make sure that ti->max_io_len doesn't overflow */ 4359 if (!ic->meta_dev) { 4360 if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS || 4361 ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) { 4362 r = -EINVAL; 4363 ti->error = "Invalid interleave_sectors in the superblock"; 4364 goto bad; 4365 } 4366 } else { 4367 if (ic->sb->log2_interleave_sectors) { 4368 r = -EINVAL; 4369 ti->error = "Invalid interleave_sectors in the superblock"; 4370 goto bad; 4371 } 4372 } 4373 if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) != !!ic->journal_mac_alg.alg_string) { 4374 r = -EINVAL; 4375 ti->error = "Journal mac mismatch"; 4376 goto bad; 4377 } 4378 4379 get_provided_data_sectors(ic); 4380 if (!ic->provided_data_sectors) { 4381 r = -EINVAL; 4382 ti->error = "The device is too small"; 4383 goto bad; 4384 } 4385 4386 try_smaller_buffer: 4387 r = calculate_device_limits(ic); 4388 if (r) { 4389 if (ic->meta_dev) { 4390 if (ic->log2_buffer_sectors > 3) { 4391 ic->log2_buffer_sectors--; 4392 goto try_smaller_buffer; 4393 } 4394 } 4395 ti->error = "The device is too small"; 4396 goto bad; 4397 } 4398 4399 if (log2_sectors_per_bitmap_bit < 0) 4400 log2_sectors_per_bitmap_bit = __fls(DEFAULT_SECTORS_PER_BITMAP_BIT); 4401 if (log2_sectors_per_bitmap_bit < ic->sb->log2_sectors_per_block) 4402 log2_sectors_per_bitmap_bit = ic->sb->log2_sectors_per_block; 4403 4404 bits_in_journal = ((__u64)ic->journal_section_sectors * ic->journal_sections) << (SECTOR_SHIFT + 3); 4405 if (bits_in_journal > UINT_MAX) 4406 bits_in_journal = UINT_MAX; 4407 while (bits_in_journal < (ic->provided_data_sectors + ((sector_t)1 << log2_sectors_per_bitmap_bit) - 1) >> log2_sectors_per_bitmap_bit) 4408 log2_sectors_per_bitmap_bit++; 4409 4410 log2_blocks_per_bitmap_bit = log2_sectors_per_bitmap_bit - ic->sb->log2_sectors_per_block; 4411 ic->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit; 4412 if (should_write_sb) 4413 ic->sb->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit; 4414 4415 n_bitmap_bits = ((ic->provided_data_sectors >> ic->sb->log2_sectors_per_block) 4416 + (((sector_t)1 << log2_blocks_per_bitmap_bit) - 1)) >> log2_blocks_per_bitmap_bit; 4417 ic->n_bitmap_blocks = DIV_ROUND_UP(n_bitmap_bits, BITMAP_BLOCK_SIZE * 8); 4418 4419 if (!ic->meta_dev) 4420 ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run)); 4421 4422 if (ti->len > ic->provided_data_sectors) { 4423 r = -EINVAL; 4424 ti->error = "Not enough provided sectors for requested mapping size"; 4425 goto bad; 4426 } 4427 4428 4429 threshold = (__u64)ic->journal_entries * (100 - journal_watermark); 4430 threshold += 50; 4431 do_div(threshold, 100); 4432 ic->free_sectors_threshold = threshold; 4433 4434 DEBUG_print("initialized:\n"); 4435 DEBUG_print(" integrity_tag_size %u\n", le16_to_cpu(ic->sb->integrity_tag_size)); 4436 DEBUG_print(" journal_entry_size %u\n", ic->journal_entry_size); 4437 DEBUG_print(" journal_entries_per_sector %u\n", ic->journal_entries_per_sector); 4438 DEBUG_print(" journal_section_entries %u\n", ic->journal_section_entries); 4439 DEBUG_print(" journal_section_sectors %u\n", ic->journal_section_sectors); 4440 DEBUG_print(" journal_sections %u\n", (unsigned int)le32_to_cpu(ic->sb->journal_sections)); 4441 DEBUG_print(" journal_entries %u\n", ic->journal_entries); 4442 DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors); 4443 DEBUG_print(" data_device_sectors 0x%llx\n", bdev_nr_sectors(ic->dev->bdev)); 4444 DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors); 4445 DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run); 4446 DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run); 4447 DEBUG_print(" provided_data_sectors 0x%llx (%llu)\n", ic->provided_data_sectors, ic->provided_data_sectors); 4448 DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors); 4449 DEBUG_print(" bits_in_journal %llu\n", bits_in_journal); 4450 4451 if (ic->recalculate_flag && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) { 4452 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); 4453 ic->sb->recalc_sector = cpu_to_le64(0); 4454 } 4455 4456 if (ic->internal_hash) { 4457 size_t recalc_tags_size; 4458 4459 ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); 4460 if (!ic->recalc_wq) { 4461 ti->error = "Cannot allocate workqueue"; 4462 r = -ENOMEM; 4463 goto bad; 4464 } 4465 INIT_WORK(&ic->recalc_work, integrity_recalc); 4466 ic->recalc_buffer = vmalloc(RECALC_SECTORS << SECTOR_SHIFT); 4467 if (!ic->recalc_buffer) { 4468 ti->error = "Cannot allocate buffer for recalculating"; 4469 r = -ENOMEM; 4470 goto bad; 4471 } 4472 recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size; 4473 if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size) 4474 recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size; 4475 ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL); 4476 if (!ic->recalc_tags) { 4477 ti->error = "Cannot allocate tags for recalculating"; 4478 r = -ENOMEM; 4479 goto bad; 4480 } 4481 } else { 4482 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { 4483 ti->error = "Recalculate can only be specified with internal_hash"; 4484 r = -EINVAL; 4485 goto bad; 4486 } 4487 } 4488 4489 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && 4490 le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors && 4491 dm_integrity_disable_recalculate(ic)) { 4492 ti->error = "Recalculating with HMAC is disabled for security reasons - if you really need it, use the argument \"legacy_recalculate\""; 4493 r = -EOPNOTSUPP; 4494 goto bad; 4495 } 4496 4497 ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev, 4498 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL, 0); 4499 if (IS_ERR(ic->bufio)) { 4500 r = PTR_ERR(ic->bufio); 4501 ti->error = "Cannot initialize dm-bufio"; 4502 ic->bufio = NULL; 4503 goto bad; 4504 } 4505 dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors); 4506 4507 if (ic->mode != 'R') { 4508 r = create_journal(ic, &ti->error); 4509 if (r) 4510 goto bad; 4511 4512 } 4513 4514 if (ic->mode == 'B') { 4515 unsigned int i; 4516 unsigned int n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE); 4517 4518 ic->recalc_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages); 4519 if (!ic->recalc_bitmap) { 4520 r = -ENOMEM; 4521 goto bad; 4522 } 4523 ic->may_write_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages); 4524 if (!ic->may_write_bitmap) { 4525 r = -ENOMEM; 4526 goto bad; 4527 } 4528 ic->bbs = kvmalloc_array(ic->n_bitmap_blocks, sizeof(struct bitmap_block_status), GFP_KERNEL); 4529 if (!ic->bbs) { 4530 r = -ENOMEM; 4531 goto bad; 4532 } 4533 INIT_DELAYED_WORK(&ic->bitmap_flush_work, bitmap_flush_work); 4534 for (i = 0; i < ic->n_bitmap_blocks; i++) { 4535 struct bitmap_block_status *bbs = &ic->bbs[i]; 4536 unsigned int sector, pl_index, pl_offset; 4537 4538 INIT_WORK(&bbs->work, bitmap_block_work); 4539 bbs->ic = ic; 4540 bbs->idx = i; 4541 bio_list_init(&bbs->bio_queue); 4542 spin_lock_init(&bbs->bio_queue_lock); 4543 4544 sector = i * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT); 4545 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 4546 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 4547 4548 bbs->bitmap = lowmem_page_address(ic->journal[pl_index].page) + pl_offset; 4549 } 4550 } 4551 4552 if (should_write_sb) { 4553 init_journal(ic, 0, ic->journal_sections, 0); 4554 r = dm_integrity_failed(ic); 4555 if (unlikely(r)) { 4556 ti->error = "Error initializing journal"; 4557 goto bad; 4558 } 4559 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); 4560 if (r) { 4561 ti->error = "Error initializing superblock"; 4562 goto bad; 4563 } 4564 ic->just_formatted = true; 4565 } 4566 4567 if (!ic->meta_dev) { 4568 r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors); 4569 if (r) 4570 goto bad; 4571 } 4572 if (ic->mode == 'B') { 4573 unsigned int max_io_len; 4574 4575 max_io_len = ((sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit) * (BITMAP_BLOCK_SIZE * 8); 4576 if (!max_io_len) 4577 max_io_len = 1U << 31; 4578 DEBUG_print("max_io_len: old %u, new %u\n", ti->max_io_len, max_io_len); 4579 if (!ti->max_io_len || ti->max_io_len > max_io_len) { 4580 r = dm_set_target_max_io_len(ti, max_io_len); 4581 if (r) 4582 goto bad; 4583 } 4584 } 4585 4586 if (!ic->internal_hash) 4587 dm_integrity_set(ti, ic); 4588 4589 ti->num_flush_bios = 1; 4590 ti->flush_supported = true; 4591 if (ic->discard) 4592 ti->num_discard_bios = 1; 4593 4594 dm_audit_log_ctr(DM_MSG_PREFIX, ti, 1); 4595 return 0; 4596 4597 bad: 4598 dm_audit_log_ctr(DM_MSG_PREFIX, ti, 0); 4599 dm_integrity_dtr(ti); 4600 return r; 4601 } 4602 4603 static void dm_integrity_dtr(struct dm_target *ti) 4604 { 4605 struct dm_integrity_c *ic = ti->private; 4606 4607 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); 4608 BUG_ON(!list_empty(&ic->wait_list)); 4609 4610 if (ic->mode == 'B') 4611 cancel_delayed_work_sync(&ic->bitmap_flush_work); 4612 if (ic->metadata_wq) 4613 destroy_workqueue(ic->metadata_wq); 4614 if (ic->wait_wq) 4615 destroy_workqueue(ic->wait_wq); 4616 if (ic->offload_wq) 4617 destroy_workqueue(ic->offload_wq); 4618 if (ic->commit_wq) 4619 destroy_workqueue(ic->commit_wq); 4620 if (ic->writer_wq) 4621 destroy_workqueue(ic->writer_wq); 4622 if (ic->recalc_wq) 4623 destroy_workqueue(ic->recalc_wq); 4624 vfree(ic->recalc_buffer); 4625 kvfree(ic->recalc_tags); 4626 kvfree(ic->bbs); 4627 if (ic->bufio) 4628 dm_bufio_client_destroy(ic->bufio); 4629 mempool_exit(&ic->journal_io_mempool); 4630 if (ic->io) 4631 dm_io_client_destroy(ic->io); 4632 if (ic->dev) 4633 dm_put_device(ti, ic->dev); 4634 if (ic->meta_dev) 4635 dm_put_device(ti, ic->meta_dev); 4636 dm_integrity_free_page_list(ic->journal); 4637 dm_integrity_free_page_list(ic->journal_io); 4638 dm_integrity_free_page_list(ic->journal_xor); 4639 dm_integrity_free_page_list(ic->recalc_bitmap); 4640 dm_integrity_free_page_list(ic->may_write_bitmap); 4641 if (ic->journal_scatterlist) 4642 dm_integrity_free_journal_scatterlist(ic, ic->journal_scatterlist); 4643 if (ic->journal_io_scatterlist) 4644 dm_integrity_free_journal_scatterlist(ic, ic->journal_io_scatterlist); 4645 if (ic->sk_requests) { 4646 unsigned int i; 4647 4648 for (i = 0; i < ic->journal_sections; i++) { 4649 struct skcipher_request *req; 4650 4651 req = ic->sk_requests[i]; 4652 if (req) { 4653 kfree_sensitive(req->iv); 4654 skcipher_request_free(req); 4655 } 4656 } 4657 kvfree(ic->sk_requests); 4658 } 4659 kvfree(ic->journal_tree); 4660 if (ic->sb) 4661 free_pages_exact(ic->sb, SB_SECTORS << SECTOR_SHIFT); 4662 4663 if (ic->internal_hash) 4664 crypto_free_shash(ic->internal_hash); 4665 free_alg(&ic->internal_hash_alg); 4666 4667 if (ic->journal_crypt) 4668 crypto_free_skcipher(ic->journal_crypt); 4669 free_alg(&ic->journal_crypt_alg); 4670 4671 if (ic->journal_mac) 4672 crypto_free_shash(ic->journal_mac); 4673 free_alg(&ic->journal_mac_alg); 4674 4675 kfree(ic); 4676 dm_audit_log_dtr(DM_MSG_PREFIX, ti, 1); 4677 } 4678 4679 static struct target_type integrity_target = { 4680 .name = "integrity", 4681 .version = {1, 10, 0}, 4682 .module = THIS_MODULE, 4683 .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, 4684 .ctr = dm_integrity_ctr, 4685 .dtr = dm_integrity_dtr, 4686 .map = dm_integrity_map, 4687 .postsuspend = dm_integrity_postsuspend, 4688 .resume = dm_integrity_resume, 4689 .status = dm_integrity_status, 4690 .iterate_devices = dm_integrity_iterate_devices, 4691 .io_hints = dm_integrity_io_hints, 4692 }; 4693 4694 static int __init dm_integrity_init(void) 4695 { 4696 int r; 4697 4698 journal_io_cache = kmem_cache_create("integrity_journal_io", 4699 sizeof(struct journal_io), 0, 0, NULL); 4700 if (!journal_io_cache) { 4701 DMERR("can't allocate journal io cache"); 4702 return -ENOMEM; 4703 } 4704 4705 r = dm_register_target(&integrity_target); 4706 if (r < 0) { 4707 kmem_cache_destroy(journal_io_cache); 4708 return r; 4709 } 4710 4711 return 0; 4712 } 4713 4714 static void __exit dm_integrity_exit(void) 4715 { 4716 dm_unregister_target(&integrity_target); 4717 kmem_cache_destroy(journal_io_cache); 4718 } 4719 4720 module_init(dm_integrity_init); 4721 module_exit(dm_integrity_exit); 4722 4723 MODULE_AUTHOR("Milan Broz"); 4724 MODULE_AUTHOR("Mikulas Patocka"); 4725 MODULE_DESCRIPTION(DM_NAME " target for integrity tags extension"); 4726 MODULE_LICENSE("GPL"); 4727