1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2016-2017 Red Hat, Inc. All rights reserved. 4 * Copyright (C) 2016-2017 Milan Broz 5 * Copyright (C) 2016-2017 Mikulas Patocka 6 * 7 * This file is released under the GPL. 8 */ 9 10 #include "dm-bio-record.h" 11 12 #include <linux/compiler.h> 13 #include <linux/module.h> 14 #include <linux/device-mapper.h> 15 #include <linux/dm-io.h> 16 #include <linux/vmalloc.h> 17 #include <linux/sort.h> 18 #include <linux/rbtree.h> 19 #include <linux/delay.h> 20 #include <linux/random.h> 21 #include <linux/reboot.h> 22 #include <crypto/hash.h> 23 #include <crypto/skcipher.h> 24 #include <linux/async_tx.h> 25 #include <linux/dm-bufio.h> 26 27 #include "dm-audit.h" 28 29 #define DM_MSG_PREFIX "integrity" 30 31 #define DEFAULT_INTERLEAVE_SECTORS 32768 32 #define DEFAULT_JOURNAL_SIZE_FACTOR 7 33 #define DEFAULT_SECTORS_PER_BITMAP_BIT 32768 34 #define DEFAULT_BUFFER_SECTORS 128 35 #define DEFAULT_JOURNAL_WATERMARK 50 36 #define DEFAULT_SYNC_MSEC 10000 37 #define DEFAULT_MAX_JOURNAL_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 131072 : 8192) 38 #define MIN_LOG2_INTERLEAVE_SECTORS 3 39 #define MAX_LOG2_INTERLEAVE_SECTORS 31 40 #define METADATA_WORKQUEUE_MAX_ACTIVE 16 41 #define RECALC_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 32768 : 2048) 42 #define RECALC_WRITE_SUPER 16 43 #define BITMAP_BLOCK_SIZE 4096 /* don't change it */ 44 #define BITMAP_FLUSH_INTERVAL (10 * HZ) 45 #define DISCARD_FILLER 0xf6 46 #define SALT_SIZE 16 47 48 /* 49 * Warning - DEBUG_PRINT prints security-sensitive data to the log, 50 * so it should not be enabled in the official kernel 51 */ 52 //#define DEBUG_PRINT 53 //#define INTERNAL_VERIFY 54 55 /* 56 * On disk structures 57 */ 58 59 #define SB_MAGIC "integrt" 60 #define SB_VERSION_1 1 61 #define SB_VERSION_2 2 62 #define SB_VERSION_3 3 63 #define SB_VERSION_4 4 64 #define SB_VERSION_5 5 65 #define SB_SECTORS 8 66 #define MAX_SECTORS_PER_BLOCK 8 67 68 struct superblock { 69 __u8 magic[8]; 70 __u8 version; 71 __u8 log2_interleave_sectors; 72 __le16 integrity_tag_size; 73 __le32 journal_sections; 74 __le64 provided_data_sectors; /* userspace uses this value */ 75 __le32 flags; 76 __u8 log2_sectors_per_block; 77 __u8 log2_blocks_per_bitmap_bit; 78 __u8 pad[2]; 79 __le64 recalc_sector; 80 __u8 pad2[8]; 81 __u8 salt[SALT_SIZE]; 82 }; 83 84 #define SB_FLAG_HAVE_JOURNAL_MAC 0x1 85 #define SB_FLAG_RECALCULATING 0x2 86 #define SB_FLAG_DIRTY_BITMAP 0x4 87 #define SB_FLAG_FIXED_PADDING 0x8 88 #define SB_FLAG_FIXED_HMAC 0x10 89 90 #define JOURNAL_ENTRY_ROUNDUP 8 91 92 typedef __le64 commit_id_t; 93 #define JOURNAL_MAC_PER_SECTOR 8 94 95 struct journal_entry { 96 union { 97 struct { 98 __le32 sector_lo; 99 __le32 sector_hi; 100 } s; 101 __le64 sector; 102 } u; 103 commit_id_t last_bytes[]; 104 /* __u8 tag[0]; */ 105 }; 106 107 #define journal_entry_tag(ic, je) ((__u8 *)&(je)->last_bytes[(ic)->sectors_per_block]) 108 109 #if BITS_PER_LONG == 64 110 #define journal_entry_set_sector(je, x) do { smp_wmb(); WRITE_ONCE((je)->u.sector, cpu_to_le64(x)); } while (0) 111 #else 112 #define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32((x) >> 32)); } while (0) 113 #endif 114 #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector) 115 #define journal_entry_is_unused(je) ((je)->u.s.sector_hi == cpu_to_le32(-1)) 116 #define journal_entry_set_unused(je) ((je)->u.s.sector_hi = cpu_to_le32(-1)) 117 #define journal_entry_is_inprogress(je) ((je)->u.s.sector_hi == cpu_to_le32(-2)) 118 #define journal_entry_set_inprogress(je) ((je)->u.s.sector_hi = cpu_to_le32(-2)) 119 120 #define JOURNAL_BLOCK_SECTORS 8 121 #define JOURNAL_SECTOR_DATA ((1 << SECTOR_SHIFT) - sizeof(commit_id_t)) 122 #define JOURNAL_MAC_SIZE (JOURNAL_MAC_PER_SECTOR * JOURNAL_BLOCK_SECTORS) 123 124 struct journal_sector { 125 struct_group(sectors, 126 __u8 entries[JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR]; 127 __u8 mac[JOURNAL_MAC_PER_SECTOR]; 128 ); 129 commit_id_t commit_id; 130 }; 131 132 #define MAX_TAG_SIZE (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, last_bytes[MAX_SECTORS_PER_BLOCK])) 133 134 #define METADATA_PADDING_SECTORS 8 135 136 #define N_COMMIT_IDS 4 137 138 static unsigned char prev_commit_seq(unsigned char seq) 139 { 140 return (seq + N_COMMIT_IDS - 1) % N_COMMIT_IDS; 141 } 142 143 static unsigned char next_commit_seq(unsigned char seq) 144 { 145 return (seq + 1) % N_COMMIT_IDS; 146 } 147 148 /* 149 * In-memory structures 150 */ 151 152 struct journal_node { 153 struct rb_node node; 154 sector_t sector; 155 }; 156 157 struct alg_spec { 158 char *alg_string; 159 char *key_string; 160 __u8 *key; 161 unsigned int key_size; 162 }; 163 164 struct dm_integrity_c { 165 struct dm_dev *dev; 166 struct dm_dev *meta_dev; 167 unsigned int tag_size; 168 __s8 log2_tag_size; 169 sector_t start; 170 mempool_t journal_io_mempool; 171 struct dm_io_client *io; 172 struct dm_bufio_client *bufio; 173 struct workqueue_struct *metadata_wq; 174 struct superblock *sb; 175 unsigned int journal_pages; 176 unsigned int n_bitmap_blocks; 177 178 struct page_list *journal; 179 struct page_list *journal_io; 180 struct page_list *journal_xor; 181 struct page_list *recalc_bitmap; 182 struct page_list *may_write_bitmap; 183 struct bitmap_block_status *bbs; 184 unsigned int bitmap_flush_interval; 185 int synchronous_mode; 186 struct bio_list synchronous_bios; 187 struct delayed_work bitmap_flush_work; 188 189 struct crypto_skcipher *journal_crypt; 190 struct scatterlist **journal_scatterlist; 191 struct scatterlist **journal_io_scatterlist; 192 struct skcipher_request **sk_requests; 193 194 struct crypto_shash *journal_mac; 195 196 struct journal_node *journal_tree; 197 struct rb_root journal_tree_root; 198 199 sector_t provided_data_sectors; 200 201 unsigned short journal_entry_size; 202 unsigned char journal_entries_per_sector; 203 unsigned char journal_section_entries; 204 unsigned short journal_section_sectors; 205 unsigned int journal_sections; 206 unsigned int journal_entries; 207 sector_t data_device_sectors; 208 sector_t meta_device_sectors; 209 unsigned int initial_sectors; 210 unsigned int metadata_run; 211 __s8 log2_metadata_run; 212 __u8 log2_buffer_sectors; 213 __u8 sectors_per_block; 214 __u8 log2_blocks_per_bitmap_bit; 215 216 unsigned char mode; 217 218 int failed; 219 220 struct crypto_shash *internal_hash; 221 222 struct dm_target *ti; 223 224 /* these variables are locked with endio_wait.lock */ 225 struct rb_root in_progress; 226 struct list_head wait_list; 227 wait_queue_head_t endio_wait; 228 struct workqueue_struct *wait_wq; 229 struct workqueue_struct *offload_wq; 230 231 unsigned char commit_seq; 232 commit_id_t commit_ids[N_COMMIT_IDS]; 233 234 unsigned int committed_section; 235 unsigned int n_committed_sections; 236 237 unsigned int uncommitted_section; 238 unsigned int n_uncommitted_sections; 239 240 unsigned int free_section; 241 unsigned char free_section_entry; 242 unsigned int free_sectors; 243 244 unsigned int free_sectors_threshold; 245 246 struct workqueue_struct *commit_wq; 247 struct work_struct commit_work; 248 249 struct workqueue_struct *writer_wq; 250 struct work_struct writer_work; 251 252 struct workqueue_struct *recalc_wq; 253 struct work_struct recalc_work; 254 255 struct bio_list flush_bio_list; 256 257 unsigned long autocommit_jiffies; 258 struct timer_list autocommit_timer; 259 unsigned int autocommit_msec; 260 261 wait_queue_head_t copy_to_journal_wait; 262 263 struct completion crypto_backoff; 264 265 bool wrote_to_journal; 266 bool journal_uptodate; 267 bool just_formatted; 268 bool recalculate_flag; 269 bool reset_recalculate_flag; 270 bool discard; 271 bool fix_padding; 272 bool fix_hmac; 273 bool legacy_recalculate; 274 275 struct alg_spec internal_hash_alg; 276 struct alg_spec journal_crypt_alg; 277 struct alg_spec journal_mac_alg; 278 279 atomic64_t number_of_mismatches; 280 281 struct notifier_block reboot_notifier; 282 }; 283 284 struct dm_integrity_range { 285 sector_t logical_sector; 286 sector_t n_sectors; 287 bool waiting; 288 union { 289 struct rb_node node; 290 struct { 291 struct task_struct *task; 292 struct list_head wait_entry; 293 }; 294 }; 295 }; 296 297 struct dm_integrity_io { 298 struct work_struct work; 299 300 struct dm_integrity_c *ic; 301 enum req_op op; 302 bool fua; 303 304 struct dm_integrity_range range; 305 306 sector_t metadata_block; 307 unsigned int metadata_offset; 308 309 atomic_t in_flight; 310 blk_status_t bi_status; 311 312 struct completion *completion; 313 314 struct dm_bio_details bio_details; 315 }; 316 317 struct journal_completion { 318 struct dm_integrity_c *ic; 319 atomic_t in_flight; 320 struct completion comp; 321 }; 322 323 struct journal_io { 324 struct dm_integrity_range range; 325 struct journal_completion *comp; 326 }; 327 328 struct bitmap_block_status { 329 struct work_struct work; 330 struct dm_integrity_c *ic; 331 unsigned int idx; 332 unsigned long *bitmap; 333 struct bio_list bio_queue; 334 spinlock_t bio_queue_lock; 335 336 }; 337 338 static struct kmem_cache *journal_io_cache; 339 340 #define JOURNAL_IO_MEMPOOL 32 341 342 #ifdef DEBUG_PRINT 343 #define DEBUG_print(x, ...) printk(KERN_DEBUG x, ##__VA_ARGS__) 344 #define DEBUG_bytes(bytes, len, msg, ...) printk(KERN_DEBUG msg "%s%*ph\n", ##__VA_ARGS__, \ 345 len ? ": " : "", len, bytes) 346 #else 347 #define DEBUG_print(x, ...) do { } while (0) 348 #define DEBUG_bytes(bytes, len, msg, ...) do { } while (0) 349 #endif 350 351 static void dm_integrity_prepare(struct request *rq) 352 { 353 } 354 355 static void dm_integrity_complete(struct request *rq, unsigned int nr_bytes) 356 { 357 } 358 359 /* 360 * DM Integrity profile, protection is performed layer above (dm-crypt) 361 */ 362 static const struct blk_integrity_profile dm_integrity_profile = { 363 .name = "DM-DIF-EXT-TAG", 364 .generate_fn = NULL, 365 .verify_fn = NULL, 366 .prepare_fn = dm_integrity_prepare, 367 .complete_fn = dm_integrity_complete, 368 }; 369 370 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map); 371 static void integrity_bio_wait(struct work_struct *w); 372 static void dm_integrity_dtr(struct dm_target *ti); 373 374 static void dm_integrity_io_error(struct dm_integrity_c *ic, const char *msg, int err) 375 { 376 if (err == -EILSEQ) 377 atomic64_inc(&ic->number_of_mismatches); 378 if (!cmpxchg(&ic->failed, 0, err)) 379 DMERR("Error on %s: %d", msg, err); 380 } 381 382 static int dm_integrity_failed(struct dm_integrity_c *ic) 383 { 384 return READ_ONCE(ic->failed); 385 } 386 387 static bool dm_integrity_disable_recalculate(struct dm_integrity_c *ic) 388 { 389 if (ic->legacy_recalculate) 390 return false; 391 if (!(ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) ? 392 ic->internal_hash_alg.key || ic->journal_mac_alg.key : 393 ic->internal_hash_alg.key && !ic->journal_mac_alg.key) 394 return true; 395 return false; 396 } 397 398 static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned int i, 399 unsigned int j, unsigned char seq) 400 { 401 /* 402 * Xor the number with section and sector, so that if a piece of 403 * journal is written at wrong place, it is detected. 404 */ 405 return ic->commit_ids[seq] ^ cpu_to_le64(((__u64)i << 32) ^ j); 406 } 407 408 static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector, 409 sector_t *area, sector_t *offset) 410 { 411 if (!ic->meta_dev) { 412 __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors; 413 *area = data_sector >> log2_interleave_sectors; 414 *offset = (unsigned int)data_sector & ((1U << log2_interleave_sectors) - 1); 415 } else { 416 *area = 0; 417 *offset = data_sector; 418 } 419 } 420 421 #define sector_to_block(ic, n) \ 422 do { \ 423 BUG_ON((n) & (unsigned int)((ic)->sectors_per_block - 1)); \ 424 (n) >>= (ic)->sb->log2_sectors_per_block; \ 425 } while (0) 426 427 static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t area, 428 sector_t offset, unsigned int *metadata_offset) 429 { 430 __u64 ms; 431 unsigned int mo; 432 433 ms = area << ic->sb->log2_interleave_sectors; 434 if (likely(ic->log2_metadata_run >= 0)) 435 ms += area << ic->log2_metadata_run; 436 else 437 ms += area * ic->metadata_run; 438 ms >>= ic->log2_buffer_sectors; 439 440 sector_to_block(ic, offset); 441 442 if (likely(ic->log2_tag_size >= 0)) { 443 ms += offset >> (SECTOR_SHIFT + ic->log2_buffer_sectors - ic->log2_tag_size); 444 mo = (offset << ic->log2_tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1); 445 } else { 446 ms += (__u64)offset * ic->tag_size >> (SECTOR_SHIFT + ic->log2_buffer_sectors); 447 mo = (offset * ic->tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1); 448 } 449 *metadata_offset = mo; 450 return ms; 451 } 452 453 static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector_t offset) 454 { 455 sector_t result; 456 457 if (ic->meta_dev) 458 return offset; 459 460 result = area << ic->sb->log2_interleave_sectors; 461 if (likely(ic->log2_metadata_run >= 0)) 462 result += (area + 1) << ic->log2_metadata_run; 463 else 464 result += (area + 1) * ic->metadata_run; 465 466 result += (sector_t)ic->initial_sectors + offset; 467 result += ic->start; 468 469 return result; 470 } 471 472 static void wraparound_section(struct dm_integrity_c *ic, unsigned int *sec_ptr) 473 { 474 if (unlikely(*sec_ptr >= ic->journal_sections)) 475 *sec_ptr -= ic->journal_sections; 476 } 477 478 static void sb_set_version(struct dm_integrity_c *ic) 479 { 480 if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) 481 ic->sb->version = SB_VERSION_5; 482 else if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) 483 ic->sb->version = SB_VERSION_4; 484 else if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) 485 ic->sb->version = SB_VERSION_3; 486 else if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) 487 ic->sb->version = SB_VERSION_2; 488 else 489 ic->sb->version = SB_VERSION_1; 490 } 491 492 static int sb_mac(struct dm_integrity_c *ic, bool wr) 493 { 494 SHASH_DESC_ON_STACK(desc, ic->journal_mac); 495 int r; 496 unsigned int mac_size = crypto_shash_digestsize(ic->journal_mac); 497 __u8 *sb = (__u8 *)ic->sb; 498 __u8 *mac = sb + (1 << SECTOR_SHIFT) - mac_size; 499 500 if (sizeof(struct superblock) + mac_size > 1 << SECTOR_SHIFT) { 501 dm_integrity_io_error(ic, "digest is too long", -EINVAL); 502 return -EINVAL; 503 } 504 505 desc->tfm = ic->journal_mac; 506 507 if (likely(wr)) { 508 r = crypto_shash_digest(desc, sb, mac - sb, mac); 509 if (unlikely(r < 0)) { 510 dm_integrity_io_error(ic, "crypto_shash_digest", r); 511 return r; 512 } 513 } else { 514 __u8 actual_mac[HASH_MAX_DIGESTSIZE]; 515 516 r = crypto_shash_digest(desc, sb, mac - sb, actual_mac); 517 if (unlikely(r < 0)) { 518 dm_integrity_io_error(ic, "crypto_shash_digest", r); 519 return r; 520 } 521 if (memcmp(mac, actual_mac, mac_size)) { 522 dm_integrity_io_error(ic, "superblock mac", -EILSEQ); 523 dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0); 524 return -EILSEQ; 525 } 526 } 527 528 return 0; 529 } 530 531 static int sync_rw_sb(struct dm_integrity_c *ic, blk_opf_t opf) 532 { 533 struct dm_io_request io_req; 534 struct dm_io_region io_loc; 535 const enum req_op op = opf & REQ_OP_MASK; 536 int r; 537 538 io_req.bi_opf = opf; 539 io_req.mem.type = DM_IO_KMEM; 540 io_req.mem.ptr.addr = ic->sb; 541 io_req.notify.fn = NULL; 542 io_req.client = ic->io; 543 io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev; 544 io_loc.sector = ic->start; 545 io_loc.count = SB_SECTORS; 546 547 if (op == REQ_OP_WRITE) { 548 sb_set_version(ic); 549 if (ic->journal_mac && ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) { 550 r = sb_mac(ic, true); 551 if (unlikely(r)) 552 return r; 553 } 554 } 555 556 r = dm_io(&io_req, 1, &io_loc, NULL); 557 if (unlikely(r)) 558 return r; 559 560 if (op == REQ_OP_READ) { 561 if (ic->mode != 'R' && ic->journal_mac && ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) { 562 r = sb_mac(ic, false); 563 if (unlikely(r)) 564 return r; 565 } 566 } 567 568 return 0; 569 } 570 571 #define BITMAP_OP_TEST_ALL_SET 0 572 #define BITMAP_OP_TEST_ALL_CLEAR 1 573 #define BITMAP_OP_SET 2 574 #define BITMAP_OP_CLEAR 3 575 576 static bool block_bitmap_op(struct dm_integrity_c *ic, struct page_list *bitmap, 577 sector_t sector, sector_t n_sectors, int mode) 578 { 579 unsigned long bit, end_bit, this_end_bit, page, end_page; 580 unsigned long *data; 581 582 if (unlikely(((sector | n_sectors) & ((1 << ic->sb->log2_sectors_per_block) - 1)) != 0)) { 583 DMCRIT("invalid bitmap access (%llx,%llx,%d,%d,%d)", 584 sector, 585 n_sectors, 586 ic->sb->log2_sectors_per_block, 587 ic->log2_blocks_per_bitmap_bit, 588 mode); 589 BUG(); 590 } 591 592 if (unlikely(!n_sectors)) 593 return true; 594 595 bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 596 end_bit = (sector + n_sectors - 1) >> 597 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 598 599 page = bit / (PAGE_SIZE * 8); 600 bit %= PAGE_SIZE * 8; 601 602 end_page = end_bit / (PAGE_SIZE * 8); 603 end_bit %= PAGE_SIZE * 8; 604 605 repeat: 606 if (page < end_page) 607 this_end_bit = PAGE_SIZE * 8 - 1; 608 else 609 this_end_bit = end_bit; 610 611 data = lowmem_page_address(bitmap[page].page); 612 613 if (mode == BITMAP_OP_TEST_ALL_SET) { 614 while (bit <= this_end_bit) { 615 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) { 616 do { 617 if (data[bit / BITS_PER_LONG] != -1) 618 return false; 619 bit += BITS_PER_LONG; 620 } while (this_end_bit >= bit + BITS_PER_LONG - 1); 621 continue; 622 } 623 if (!test_bit(bit, data)) 624 return false; 625 bit++; 626 } 627 } else if (mode == BITMAP_OP_TEST_ALL_CLEAR) { 628 while (bit <= this_end_bit) { 629 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) { 630 do { 631 if (data[bit / BITS_PER_LONG] != 0) 632 return false; 633 bit += BITS_PER_LONG; 634 } while (this_end_bit >= bit + BITS_PER_LONG - 1); 635 continue; 636 } 637 if (test_bit(bit, data)) 638 return false; 639 bit++; 640 } 641 } else if (mode == BITMAP_OP_SET) { 642 while (bit <= this_end_bit) { 643 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) { 644 do { 645 data[bit / BITS_PER_LONG] = -1; 646 bit += BITS_PER_LONG; 647 } while (this_end_bit >= bit + BITS_PER_LONG - 1); 648 continue; 649 } 650 __set_bit(bit, data); 651 bit++; 652 } 653 } else if (mode == BITMAP_OP_CLEAR) { 654 if (!bit && this_end_bit == PAGE_SIZE * 8 - 1) 655 clear_page(data); 656 else { 657 while (bit <= this_end_bit) { 658 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) { 659 do { 660 data[bit / BITS_PER_LONG] = 0; 661 bit += BITS_PER_LONG; 662 } while (this_end_bit >= bit + BITS_PER_LONG - 1); 663 continue; 664 } 665 __clear_bit(bit, data); 666 bit++; 667 } 668 } 669 } else { 670 BUG(); 671 } 672 673 if (unlikely(page < end_page)) { 674 bit = 0; 675 page++; 676 goto repeat; 677 } 678 679 return true; 680 } 681 682 static void block_bitmap_copy(struct dm_integrity_c *ic, struct page_list *dst, struct page_list *src) 683 { 684 unsigned int n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE); 685 unsigned int i; 686 687 for (i = 0; i < n_bitmap_pages; i++) { 688 unsigned long *dst_data = lowmem_page_address(dst[i].page); 689 unsigned long *src_data = lowmem_page_address(src[i].page); 690 691 copy_page(dst_data, src_data); 692 } 693 } 694 695 static struct bitmap_block_status *sector_to_bitmap_block(struct dm_integrity_c *ic, sector_t sector) 696 { 697 unsigned int bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 698 unsigned int bitmap_block = bit / (BITMAP_BLOCK_SIZE * 8); 699 700 BUG_ON(bitmap_block >= ic->n_bitmap_blocks); 701 return &ic->bbs[bitmap_block]; 702 } 703 704 static void access_journal_check(struct dm_integrity_c *ic, unsigned int section, unsigned int offset, 705 bool e, const char *function) 706 { 707 #if defined(CONFIG_DM_DEBUG) || defined(INTERNAL_VERIFY) 708 unsigned int limit = e ? ic->journal_section_entries : ic->journal_section_sectors; 709 710 if (unlikely(section >= ic->journal_sections) || 711 unlikely(offset >= limit)) { 712 DMCRIT("%s: invalid access at (%u,%u), limit (%u,%u)", 713 function, section, offset, ic->journal_sections, limit); 714 BUG(); 715 } 716 #endif 717 } 718 719 static void page_list_location(struct dm_integrity_c *ic, unsigned int section, unsigned int offset, 720 unsigned int *pl_index, unsigned int *pl_offset) 721 { 722 unsigned int sector; 723 724 access_journal_check(ic, section, offset, false, "page_list_location"); 725 726 sector = section * ic->journal_section_sectors + offset; 727 728 *pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 729 *pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 730 } 731 732 static struct journal_sector *access_page_list(struct dm_integrity_c *ic, struct page_list *pl, 733 unsigned int section, unsigned int offset, unsigned int *n_sectors) 734 { 735 unsigned int pl_index, pl_offset; 736 char *va; 737 738 page_list_location(ic, section, offset, &pl_index, &pl_offset); 739 740 if (n_sectors) 741 *n_sectors = (PAGE_SIZE - pl_offset) >> SECTOR_SHIFT; 742 743 va = lowmem_page_address(pl[pl_index].page); 744 745 return (struct journal_sector *)(va + pl_offset); 746 } 747 748 static struct journal_sector *access_journal(struct dm_integrity_c *ic, unsigned int section, unsigned int offset) 749 { 750 return access_page_list(ic, ic->journal, section, offset, NULL); 751 } 752 753 static struct journal_entry *access_journal_entry(struct dm_integrity_c *ic, unsigned int section, unsigned int n) 754 { 755 unsigned int rel_sector, offset; 756 struct journal_sector *js; 757 758 access_journal_check(ic, section, n, true, "access_journal_entry"); 759 760 rel_sector = n % JOURNAL_BLOCK_SECTORS; 761 offset = n / JOURNAL_BLOCK_SECTORS; 762 763 js = access_journal(ic, section, rel_sector); 764 return (struct journal_entry *)((char *)js + offset * ic->journal_entry_size); 765 } 766 767 static struct journal_sector *access_journal_data(struct dm_integrity_c *ic, unsigned int section, unsigned int n) 768 { 769 n <<= ic->sb->log2_sectors_per_block; 770 771 n += JOURNAL_BLOCK_SECTORS; 772 773 access_journal_check(ic, section, n, false, "access_journal_data"); 774 775 return access_journal(ic, section, n); 776 } 777 778 static void section_mac(struct dm_integrity_c *ic, unsigned int section, __u8 result[JOURNAL_MAC_SIZE]) 779 { 780 SHASH_DESC_ON_STACK(desc, ic->journal_mac); 781 int r; 782 unsigned int j, size; 783 784 desc->tfm = ic->journal_mac; 785 786 r = crypto_shash_init(desc); 787 if (unlikely(r < 0)) { 788 dm_integrity_io_error(ic, "crypto_shash_init", r); 789 goto err; 790 } 791 792 if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) { 793 __le64 section_le; 794 795 r = crypto_shash_update(desc, (__u8 *)&ic->sb->salt, SALT_SIZE); 796 if (unlikely(r < 0)) { 797 dm_integrity_io_error(ic, "crypto_shash_update", r); 798 goto err; 799 } 800 801 section_le = cpu_to_le64(section); 802 r = crypto_shash_update(desc, (__u8 *)§ion_le, sizeof(section_le)); 803 if (unlikely(r < 0)) { 804 dm_integrity_io_error(ic, "crypto_shash_update", r); 805 goto err; 806 } 807 } 808 809 for (j = 0; j < ic->journal_section_entries; j++) { 810 struct journal_entry *je = access_journal_entry(ic, section, j); 811 812 r = crypto_shash_update(desc, (__u8 *)&je->u.sector, sizeof(je->u.sector)); 813 if (unlikely(r < 0)) { 814 dm_integrity_io_error(ic, "crypto_shash_update", r); 815 goto err; 816 } 817 } 818 819 size = crypto_shash_digestsize(ic->journal_mac); 820 821 if (likely(size <= JOURNAL_MAC_SIZE)) { 822 r = crypto_shash_final(desc, result); 823 if (unlikely(r < 0)) { 824 dm_integrity_io_error(ic, "crypto_shash_final", r); 825 goto err; 826 } 827 memset(result + size, 0, JOURNAL_MAC_SIZE - size); 828 } else { 829 __u8 digest[HASH_MAX_DIGESTSIZE]; 830 831 if (WARN_ON(size > sizeof(digest))) { 832 dm_integrity_io_error(ic, "digest_size", -EINVAL); 833 goto err; 834 } 835 r = crypto_shash_final(desc, digest); 836 if (unlikely(r < 0)) { 837 dm_integrity_io_error(ic, "crypto_shash_final", r); 838 goto err; 839 } 840 memcpy(result, digest, JOURNAL_MAC_SIZE); 841 } 842 843 return; 844 err: 845 memset(result, 0, JOURNAL_MAC_SIZE); 846 } 847 848 static void rw_section_mac(struct dm_integrity_c *ic, unsigned int section, bool wr) 849 { 850 __u8 result[JOURNAL_MAC_SIZE]; 851 unsigned int j; 852 853 if (!ic->journal_mac) 854 return; 855 856 section_mac(ic, section, result); 857 858 for (j = 0; j < JOURNAL_BLOCK_SECTORS; j++) { 859 struct journal_sector *js = access_journal(ic, section, j); 860 861 if (likely(wr)) 862 memcpy(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR); 863 else { 864 if (memcmp(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) { 865 dm_integrity_io_error(ic, "journal mac", -EILSEQ); 866 dm_audit_log_target(DM_MSG_PREFIX, "mac-journal", ic->ti, 0); 867 } 868 } 869 } 870 } 871 872 static void complete_journal_op(void *context) 873 { 874 struct journal_completion *comp = context; 875 876 BUG_ON(!atomic_read(&comp->in_flight)); 877 if (likely(atomic_dec_and_test(&comp->in_flight))) 878 complete(&comp->comp); 879 } 880 881 static void xor_journal(struct dm_integrity_c *ic, bool encrypt, unsigned int section, 882 unsigned int n_sections, struct journal_completion *comp) 883 { 884 struct async_submit_ctl submit; 885 size_t n_bytes = (size_t)(n_sections * ic->journal_section_sectors) << SECTOR_SHIFT; 886 unsigned int pl_index, pl_offset, section_index; 887 struct page_list *source_pl, *target_pl; 888 889 if (likely(encrypt)) { 890 source_pl = ic->journal; 891 target_pl = ic->journal_io; 892 } else { 893 source_pl = ic->journal_io; 894 target_pl = ic->journal; 895 } 896 897 page_list_location(ic, section, 0, &pl_index, &pl_offset); 898 899 atomic_add(roundup(pl_offset + n_bytes, PAGE_SIZE) >> PAGE_SHIFT, &comp->in_flight); 900 901 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, complete_journal_op, comp, NULL); 902 903 section_index = pl_index; 904 905 do { 906 size_t this_step; 907 struct page *src_pages[2]; 908 struct page *dst_page; 909 910 while (unlikely(pl_index == section_index)) { 911 unsigned int dummy; 912 913 if (likely(encrypt)) 914 rw_section_mac(ic, section, true); 915 section++; 916 n_sections--; 917 if (!n_sections) 918 break; 919 page_list_location(ic, section, 0, §ion_index, &dummy); 920 } 921 922 this_step = min(n_bytes, (size_t)PAGE_SIZE - pl_offset); 923 dst_page = target_pl[pl_index].page; 924 src_pages[0] = source_pl[pl_index].page; 925 src_pages[1] = ic->journal_xor[pl_index].page; 926 927 async_xor(dst_page, src_pages, pl_offset, 2, this_step, &submit); 928 929 pl_index++; 930 pl_offset = 0; 931 n_bytes -= this_step; 932 } while (n_bytes); 933 934 BUG_ON(n_sections); 935 936 async_tx_issue_pending_all(); 937 } 938 939 static void complete_journal_encrypt(void *data, int err) 940 { 941 struct journal_completion *comp = data; 942 943 if (unlikely(err)) { 944 if (likely(err == -EINPROGRESS)) { 945 complete(&comp->ic->crypto_backoff); 946 return; 947 } 948 dm_integrity_io_error(comp->ic, "asynchronous encrypt", err); 949 } 950 complete_journal_op(comp); 951 } 952 953 static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp) 954 { 955 int r; 956 957 skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 958 complete_journal_encrypt, comp); 959 if (likely(encrypt)) 960 r = crypto_skcipher_encrypt(req); 961 else 962 r = crypto_skcipher_decrypt(req); 963 if (likely(!r)) 964 return false; 965 if (likely(r == -EINPROGRESS)) 966 return true; 967 if (likely(r == -EBUSY)) { 968 wait_for_completion(&comp->ic->crypto_backoff); 969 reinit_completion(&comp->ic->crypto_backoff); 970 return true; 971 } 972 dm_integrity_io_error(comp->ic, "encrypt", r); 973 return false; 974 } 975 976 static void crypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned int section, 977 unsigned int n_sections, struct journal_completion *comp) 978 { 979 struct scatterlist **source_sg; 980 struct scatterlist **target_sg; 981 982 atomic_add(2, &comp->in_flight); 983 984 if (likely(encrypt)) { 985 source_sg = ic->journal_scatterlist; 986 target_sg = ic->journal_io_scatterlist; 987 } else { 988 source_sg = ic->journal_io_scatterlist; 989 target_sg = ic->journal_scatterlist; 990 } 991 992 do { 993 struct skcipher_request *req; 994 unsigned int ivsize; 995 char *iv; 996 997 if (likely(encrypt)) 998 rw_section_mac(ic, section, true); 999 1000 req = ic->sk_requests[section]; 1001 ivsize = crypto_skcipher_ivsize(ic->journal_crypt); 1002 iv = req->iv; 1003 1004 memcpy(iv, iv + ivsize, ivsize); 1005 1006 req->src = source_sg[section]; 1007 req->dst = target_sg[section]; 1008 1009 if (unlikely(do_crypt(encrypt, req, comp))) 1010 atomic_inc(&comp->in_flight); 1011 1012 section++; 1013 n_sections--; 1014 } while (n_sections); 1015 1016 atomic_dec(&comp->in_flight); 1017 complete_journal_op(comp); 1018 } 1019 1020 static void encrypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned int section, 1021 unsigned int n_sections, struct journal_completion *comp) 1022 { 1023 if (ic->journal_xor) 1024 return xor_journal(ic, encrypt, section, n_sections, comp); 1025 else 1026 return crypt_journal(ic, encrypt, section, n_sections, comp); 1027 } 1028 1029 static void complete_journal_io(unsigned long error, void *context) 1030 { 1031 struct journal_completion *comp = context; 1032 1033 if (unlikely(error != 0)) 1034 dm_integrity_io_error(comp->ic, "writing journal", -EIO); 1035 complete_journal_op(comp); 1036 } 1037 1038 static void rw_journal_sectors(struct dm_integrity_c *ic, blk_opf_t opf, 1039 unsigned int sector, unsigned int n_sectors, 1040 struct journal_completion *comp) 1041 { 1042 struct dm_io_request io_req; 1043 struct dm_io_region io_loc; 1044 unsigned int pl_index, pl_offset; 1045 int r; 1046 1047 if (unlikely(dm_integrity_failed(ic))) { 1048 if (comp) 1049 complete_journal_io(-1UL, comp); 1050 return; 1051 } 1052 1053 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 1054 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 1055 1056 io_req.bi_opf = opf; 1057 io_req.mem.type = DM_IO_PAGE_LIST; 1058 if (ic->journal_io) 1059 io_req.mem.ptr.pl = &ic->journal_io[pl_index]; 1060 else 1061 io_req.mem.ptr.pl = &ic->journal[pl_index]; 1062 io_req.mem.offset = pl_offset; 1063 if (likely(comp != NULL)) { 1064 io_req.notify.fn = complete_journal_io; 1065 io_req.notify.context = comp; 1066 } else { 1067 io_req.notify.fn = NULL; 1068 } 1069 io_req.client = ic->io; 1070 io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev; 1071 io_loc.sector = ic->start + SB_SECTORS + sector; 1072 io_loc.count = n_sectors; 1073 1074 r = dm_io(&io_req, 1, &io_loc, NULL); 1075 if (unlikely(r)) { 1076 dm_integrity_io_error(ic, (opf & REQ_OP_MASK) == REQ_OP_READ ? 1077 "reading journal" : "writing journal", r); 1078 if (comp) { 1079 WARN_ONCE(1, "asynchronous dm_io failed: %d", r); 1080 complete_journal_io(-1UL, comp); 1081 } 1082 } 1083 } 1084 1085 static void rw_journal(struct dm_integrity_c *ic, blk_opf_t opf, 1086 unsigned int section, unsigned int n_sections, 1087 struct journal_completion *comp) 1088 { 1089 unsigned int sector, n_sectors; 1090 1091 sector = section * ic->journal_section_sectors; 1092 n_sectors = n_sections * ic->journal_section_sectors; 1093 1094 rw_journal_sectors(ic, opf, sector, n_sectors, comp); 1095 } 1096 1097 static void write_journal(struct dm_integrity_c *ic, unsigned int commit_start, unsigned int commit_sections) 1098 { 1099 struct journal_completion io_comp; 1100 struct journal_completion crypt_comp_1; 1101 struct journal_completion crypt_comp_2; 1102 unsigned int i; 1103 1104 io_comp.ic = ic; 1105 init_completion(&io_comp.comp); 1106 1107 if (commit_start + commit_sections <= ic->journal_sections) { 1108 io_comp.in_flight = (atomic_t)ATOMIC_INIT(1); 1109 if (ic->journal_io) { 1110 crypt_comp_1.ic = ic; 1111 init_completion(&crypt_comp_1.comp); 1112 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); 1113 encrypt_journal(ic, true, commit_start, commit_sections, &crypt_comp_1); 1114 wait_for_completion_io(&crypt_comp_1.comp); 1115 } else { 1116 for (i = 0; i < commit_sections; i++) 1117 rw_section_mac(ic, commit_start + i, true); 1118 } 1119 rw_journal(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, commit_start, 1120 commit_sections, &io_comp); 1121 } else { 1122 unsigned int to_end; 1123 1124 io_comp.in_flight = (atomic_t)ATOMIC_INIT(2); 1125 to_end = ic->journal_sections - commit_start; 1126 if (ic->journal_io) { 1127 crypt_comp_1.ic = ic; 1128 init_completion(&crypt_comp_1.comp); 1129 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); 1130 encrypt_journal(ic, true, commit_start, to_end, &crypt_comp_1); 1131 if (try_wait_for_completion(&crypt_comp_1.comp)) { 1132 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, 1133 commit_start, to_end, &io_comp); 1134 reinit_completion(&crypt_comp_1.comp); 1135 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); 1136 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_1); 1137 wait_for_completion_io(&crypt_comp_1.comp); 1138 } else { 1139 crypt_comp_2.ic = ic; 1140 init_completion(&crypt_comp_2.comp); 1141 crypt_comp_2.in_flight = (atomic_t)ATOMIC_INIT(0); 1142 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_2); 1143 wait_for_completion_io(&crypt_comp_1.comp); 1144 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp); 1145 wait_for_completion_io(&crypt_comp_2.comp); 1146 } 1147 } else { 1148 for (i = 0; i < to_end; i++) 1149 rw_section_mac(ic, commit_start + i, true); 1150 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp); 1151 for (i = 0; i < commit_sections - to_end; i++) 1152 rw_section_mac(ic, i, true); 1153 } 1154 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, 0, commit_sections - to_end, &io_comp); 1155 } 1156 1157 wait_for_completion_io(&io_comp.comp); 1158 } 1159 1160 static void copy_from_journal(struct dm_integrity_c *ic, unsigned int section, unsigned int offset, 1161 unsigned int n_sectors, sector_t target, io_notify_fn fn, void *data) 1162 { 1163 struct dm_io_request io_req; 1164 struct dm_io_region io_loc; 1165 int r; 1166 unsigned int sector, pl_index, pl_offset; 1167 1168 BUG_ON((target | n_sectors | offset) & (unsigned int)(ic->sectors_per_block - 1)); 1169 1170 if (unlikely(dm_integrity_failed(ic))) { 1171 fn(-1UL, data); 1172 return; 1173 } 1174 1175 sector = section * ic->journal_section_sectors + JOURNAL_BLOCK_SECTORS + offset; 1176 1177 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 1178 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 1179 1180 io_req.bi_opf = REQ_OP_WRITE; 1181 io_req.mem.type = DM_IO_PAGE_LIST; 1182 io_req.mem.ptr.pl = &ic->journal[pl_index]; 1183 io_req.mem.offset = pl_offset; 1184 io_req.notify.fn = fn; 1185 io_req.notify.context = data; 1186 io_req.client = ic->io; 1187 io_loc.bdev = ic->dev->bdev; 1188 io_loc.sector = target; 1189 io_loc.count = n_sectors; 1190 1191 r = dm_io(&io_req, 1, &io_loc, NULL); 1192 if (unlikely(r)) { 1193 WARN_ONCE(1, "asynchronous dm_io failed: %d", r); 1194 fn(-1UL, data); 1195 } 1196 } 1197 1198 static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2) 1199 { 1200 return range1->logical_sector < range2->logical_sector + range2->n_sectors && 1201 range1->logical_sector + range1->n_sectors > range2->logical_sector; 1202 } 1203 1204 static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting) 1205 { 1206 struct rb_node **n = &ic->in_progress.rb_node; 1207 struct rb_node *parent; 1208 1209 BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned int)(ic->sectors_per_block - 1)); 1210 1211 if (likely(check_waiting)) { 1212 struct dm_integrity_range *range; 1213 1214 list_for_each_entry(range, &ic->wait_list, wait_entry) { 1215 if (unlikely(ranges_overlap(range, new_range))) 1216 return false; 1217 } 1218 } 1219 1220 parent = NULL; 1221 1222 while (*n) { 1223 struct dm_integrity_range *range = container_of(*n, struct dm_integrity_range, node); 1224 1225 parent = *n; 1226 if (new_range->logical_sector + new_range->n_sectors <= range->logical_sector) 1227 n = &range->node.rb_left; 1228 else if (new_range->logical_sector >= range->logical_sector + range->n_sectors) 1229 n = &range->node.rb_right; 1230 else 1231 return false; 1232 } 1233 1234 rb_link_node(&new_range->node, parent, n); 1235 rb_insert_color(&new_range->node, &ic->in_progress); 1236 1237 return true; 1238 } 1239 1240 static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity_range *range) 1241 { 1242 rb_erase(&range->node, &ic->in_progress); 1243 while (unlikely(!list_empty(&ic->wait_list))) { 1244 struct dm_integrity_range *last_range = 1245 list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry); 1246 struct task_struct *last_range_task; 1247 1248 last_range_task = last_range->task; 1249 list_del(&last_range->wait_entry); 1250 if (!add_new_range(ic, last_range, false)) { 1251 last_range->task = last_range_task; 1252 list_add(&last_range->wait_entry, &ic->wait_list); 1253 break; 1254 } 1255 last_range->waiting = false; 1256 wake_up_process(last_range_task); 1257 } 1258 } 1259 1260 static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *range) 1261 { 1262 unsigned long flags; 1263 1264 spin_lock_irqsave(&ic->endio_wait.lock, flags); 1265 remove_range_unlocked(ic, range); 1266 spin_unlock_irqrestore(&ic->endio_wait.lock, flags); 1267 } 1268 1269 static void wait_and_add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range) 1270 { 1271 new_range->waiting = true; 1272 list_add_tail(&new_range->wait_entry, &ic->wait_list); 1273 new_range->task = current; 1274 do { 1275 __set_current_state(TASK_UNINTERRUPTIBLE); 1276 spin_unlock_irq(&ic->endio_wait.lock); 1277 io_schedule(); 1278 spin_lock_irq(&ic->endio_wait.lock); 1279 } while (unlikely(new_range->waiting)); 1280 } 1281 1282 static void add_new_range_and_wait(struct dm_integrity_c *ic, struct dm_integrity_range *new_range) 1283 { 1284 if (unlikely(!add_new_range(ic, new_range, true))) 1285 wait_and_add_new_range(ic, new_range); 1286 } 1287 1288 static void init_journal_node(struct journal_node *node) 1289 { 1290 RB_CLEAR_NODE(&node->node); 1291 node->sector = (sector_t)-1; 1292 } 1293 1294 static void add_journal_node(struct dm_integrity_c *ic, struct journal_node *node, sector_t sector) 1295 { 1296 struct rb_node **link; 1297 struct rb_node *parent; 1298 1299 node->sector = sector; 1300 BUG_ON(!RB_EMPTY_NODE(&node->node)); 1301 1302 link = &ic->journal_tree_root.rb_node; 1303 parent = NULL; 1304 1305 while (*link) { 1306 struct journal_node *j; 1307 1308 parent = *link; 1309 j = container_of(parent, struct journal_node, node); 1310 if (sector < j->sector) 1311 link = &j->node.rb_left; 1312 else 1313 link = &j->node.rb_right; 1314 } 1315 1316 rb_link_node(&node->node, parent, link); 1317 rb_insert_color(&node->node, &ic->journal_tree_root); 1318 } 1319 1320 static void remove_journal_node(struct dm_integrity_c *ic, struct journal_node *node) 1321 { 1322 BUG_ON(RB_EMPTY_NODE(&node->node)); 1323 rb_erase(&node->node, &ic->journal_tree_root); 1324 init_journal_node(node); 1325 } 1326 1327 #define NOT_FOUND (-1U) 1328 1329 static unsigned int find_journal_node(struct dm_integrity_c *ic, sector_t sector, sector_t *next_sector) 1330 { 1331 struct rb_node *n = ic->journal_tree_root.rb_node; 1332 unsigned int found = NOT_FOUND; 1333 1334 *next_sector = (sector_t)-1; 1335 while (n) { 1336 struct journal_node *j = container_of(n, struct journal_node, node); 1337 1338 if (sector == j->sector) 1339 found = j - ic->journal_tree; 1340 1341 if (sector < j->sector) { 1342 *next_sector = j->sector; 1343 n = j->node.rb_left; 1344 } else 1345 n = j->node.rb_right; 1346 } 1347 1348 return found; 1349 } 1350 1351 static bool test_journal_node(struct dm_integrity_c *ic, unsigned int pos, sector_t sector) 1352 { 1353 struct journal_node *node, *next_node; 1354 struct rb_node *next; 1355 1356 if (unlikely(pos >= ic->journal_entries)) 1357 return false; 1358 node = &ic->journal_tree[pos]; 1359 if (unlikely(RB_EMPTY_NODE(&node->node))) 1360 return false; 1361 if (unlikely(node->sector != sector)) 1362 return false; 1363 1364 next = rb_next(&node->node); 1365 if (unlikely(!next)) 1366 return true; 1367 1368 next_node = container_of(next, struct journal_node, node); 1369 return next_node->sector != sector; 1370 } 1371 1372 static bool find_newer_committed_node(struct dm_integrity_c *ic, struct journal_node *node) 1373 { 1374 struct rb_node *next; 1375 struct journal_node *next_node; 1376 unsigned int next_section; 1377 1378 BUG_ON(RB_EMPTY_NODE(&node->node)); 1379 1380 next = rb_next(&node->node); 1381 if (unlikely(!next)) 1382 return false; 1383 1384 next_node = container_of(next, struct journal_node, node); 1385 1386 if (next_node->sector != node->sector) 1387 return false; 1388 1389 next_section = (unsigned int)(next_node - ic->journal_tree) / ic->journal_section_entries; 1390 if (next_section >= ic->committed_section && 1391 next_section < ic->committed_section + ic->n_committed_sections) 1392 return true; 1393 if (next_section + ic->journal_sections < ic->committed_section + ic->n_committed_sections) 1394 return true; 1395 1396 return false; 1397 } 1398 1399 #define TAG_READ 0 1400 #define TAG_WRITE 1 1401 #define TAG_CMP 2 1402 1403 static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block, 1404 unsigned int *metadata_offset, unsigned int total_size, int op) 1405 { 1406 #define MAY_BE_FILLER 1 1407 #define MAY_BE_HASH 2 1408 unsigned int hash_offset = 0; 1409 unsigned int may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0); 1410 1411 do { 1412 unsigned char *data, *dp; 1413 struct dm_buffer *b; 1414 unsigned int to_copy; 1415 int r; 1416 1417 r = dm_integrity_failed(ic); 1418 if (unlikely(r)) 1419 return r; 1420 1421 data = dm_bufio_read(ic->bufio, *metadata_block, &b); 1422 if (IS_ERR(data)) 1423 return PTR_ERR(data); 1424 1425 to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size); 1426 dp = data + *metadata_offset; 1427 if (op == TAG_READ) { 1428 memcpy(tag, dp, to_copy); 1429 } else if (op == TAG_WRITE) { 1430 if (memcmp(dp, tag, to_copy)) { 1431 memcpy(dp, tag, to_copy); 1432 dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy); 1433 } 1434 } else { 1435 /* e.g.: op == TAG_CMP */ 1436 1437 if (likely(is_power_of_2(ic->tag_size))) { 1438 if (unlikely(memcmp(dp, tag, to_copy))) 1439 if (unlikely(!ic->discard) || 1440 unlikely(memchr_inv(dp, DISCARD_FILLER, to_copy) != NULL)) { 1441 goto thorough_test; 1442 } 1443 } else { 1444 unsigned int i, ts; 1445 thorough_test: 1446 ts = total_size; 1447 1448 for (i = 0; i < to_copy; i++, ts--) { 1449 if (unlikely(dp[i] != tag[i])) 1450 may_be &= ~MAY_BE_HASH; 1451 if (likely(dp[i] != DISCARD_FILLER)) 1452 may_be &= ~MAY_BE_FILLER; 1453 hash_offset++; 1454 if (unlikely(hash_offset == ic->tag_size)) { 1455 if (unlikely(!may_be)) { 1456 dm_bufio_release(b); 1457 return ts; 1458 } 1459 hash_offset = 0; 1460 may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0); 1461 } 1462 } 1463 } 1464 } 1465 dm_bufio_release(b); 1466 1467 tag += to_copy; 1468 *metadata_offset += to_copy; 1469 if (unlikely(*metadata_offset == 1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) { 1470 (*metadata_block)++; 1471 *metadata_offset = 0; 1472 } 1473 1474 if (unlikely(!is_power_of_2(ic->tag_size))) 1475 hash_offset = (hash_offset + to_copy) % ic->tag_size; 1476 1477 total_size -= to_copy; 1478 } while (unlikely(total_size)); 1479 1480 return 0; 1481 #undef MAY_BE_FILLER 1482 #undef MAY_BE_HASH 1483 } 1484 1485 struct flush_request { 1486 struct dm_io_request io_req; 1487 struct dm_io_region io_reg; 1488 struct dm_integrity_c *ic; 1489 struct completion comp; 1490 }; 1491 1492 static void flush_notify(unsigned long error, void *fr_) 1493 { 1494 struct flush_request *fr = fr_; 1495 1496 if (unlikely(error != 0)) 1497 dm_integrity_io_error(fr->ic, "flushing disk cache", -EIO); 1498 complete(&fr->comp); 1499 } 1500 1501 static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data) 1502 { 1503 int r; 1504 struct flush_request fr; 1505 1506 if (!ic->meta_dev) 1507 flush_data = false; 1508 if (flush_data) { 1509 fr.io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC, 1510 fr.io_req.mem.type = DM_IO_KMEM, 1511 fr.io_req.mem.ptr.addr = NULL, 1512 fr.io_req.notify.fn = flush_notify, 1513 fr.io_req.notify.context = &fr; 1514 fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio), 1515 fr.io_reg.bdev = ic->dev->bdev, 1516 fr.io_reg.sector = 0, 1517 fr.io_reg.count = 0, 1518 fr.ic = ic; 1519 init_completion(&fr.comp); 1520 r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL); 1521 BUG_ON(r); 1522 } 1523 1524 r = dm_bufio_write_dirty_buffers(ic->bufio); 1525 if (unlikely(r)) 1526 dm_integrity_io_error(ic, "writing tags", r); 1527 1528 if (flush_data) 1529 wait_for_completion(&fr.comp); 1530 } 1531 1532 static void sleep_on_endio_wait(struct dm_integrity_c *ic) 1533 { 1534 DECLARE_WAITQUEUE(wait, current); 1535 1536 __add_wait_queue(&ic->endio_wait, &wait); 1537 __set_current_state(TASK_UNINTERRUPTIBLE); 1538 spin_unlock_irq(&ic->endio_wait.lock); 1539 io_schedule(); 1540 spin_lock_irq(&ic->endio_wait.lock); 1541 __remove_wait_queue(&ic->endio_wait, &wait); 1542 } 1543 1544 static void autocommit_fn(struct timer_list *t) 1545 { 1546 struct dm_integrity_c *ic = from_timer(ic, t, autocommit_timer); 1547 1548 if (likely(!dm_integrity_failed(ic))) 1549 queue_work(ic->commit_wq, &ic->commit_work); 1550 } 1551 1552 static void schedule_autocommit(struct dm_integrity_c *ic) 1553 { 1554 if (!timer_pending(&ic->autocommit_timer)) 1555 mod_timer(&ic->autocommit_timer, jiffies + ic->autocommit_jiffies); 1556 } 1557 1558 static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio) 1559 { 1560 struct bio *bio; 1561 unsigned long flags; 1562 1563 spin_lock_irqsave(&ic->endio_wait.lock, flags); 1564 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1565 bio_list_add(&ic->flush_bio_list, bio); 1566 spin_unlock_irqrestore(&ic->endio_wait.lock, flags); 1567 1568 queue_work(ic->commit_wq, &ic->commit_work); 1569 } 1570 1571 static void do_endio(struct dm_integrity_c *ic, struct bio *bio) 1572 { 1573 int r; 1574 1575 r = dm_integrity_failed(ic); 1576 if (unlikely(r) && !bio->bi_status) 1577 bio->bi_status = errno_to_blk_status(r); 1578 if (unlikely(ic->synchronous_mode) && bio_op(bio) == REQ_OP_WRITE) { 1579 unsigned long flags; 1580 1581 spin_lock_irqsave(&ic->endio_wait.lock, flags); 1582 bio_list_add(&ic->synchronous_bios, bio); 1583 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0); 1584 spin_unlock_irqrestore(&ic->endio_wait.lock, flags); 1585 return; 1586 } 1587 bio_endio(bio); 1588 } 1589 1590 static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *dio) 1591 { 1592 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1593 1594 if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic))) 1595 submit_flush_bio(ic, dio); 1596 else 1597 do_endio(ic, bio); 1598 } 1599 1600 static void dec_in_flight(struct dm_integrity_io *dio) 1601 { 1602 if (atomic_dec_and_test(&dio->in_flight)) { 1603 struct dm_integrity_c *ic = dio->ic; 1604 struct bio *bio; 1605 1606 remove_range(ic, &dio->range); 1607 1608 if (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD)) 1609 schedule_autocommit(ic); 1610 1611 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1612 if (unlikely(dio->bi_status) && !bio->bi_status) 1613 bio->bi_status = dio->bi_status; 1614 if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) { 1615 dio->range.logical_sector += dio->range.n_sectors; 1616 bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT); 1617 INIT_WORK(&dio->work, integrity_bio_wait); 1618 queue_work(ic->offload_wq, &dio->work); 1619 return; 1620 } 1621 do_endio_flush(ic, dio); 1622 } 1623 } 1624 1625 static void integrity_end_io(struct bio *bio) 1626 { 1627 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 1628 1629 dm_bio_restore(&dio->bio_details, bio); 1630 if (bio->bi_integrity) 1631 bio->bi_opf |= REQ_INTEGRITY; 1632 1633 if (dio->completion) 1634 complete(dio->completion); 1635 1636 dec_in_flight(dio); 1637 } 1638 1639 static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector, 1640 const char *data, char *result) 1641 { 1642 __le64 sector_le = cpu_to_le64(sector); 1643 SHASH_DESC_ON_STACK(req, ic->internal_hash); 1644 int r; 1645 unsigned int digest_size; 1646 1647 req->tfm = ic->internal_hash; 1648 1649 r = crypto_shash_init(req); 1650 if (unlikely(r < 0)) { 1651 dm_integrity_io_error(ic, "crypto_shash_init", r); 1652 goto failed; 1653 } 1654 1655 if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) { 1656 r = crypto_shash_update(req, (__u8 *)&ic->sb->salt, SALT_SIZE); 1657 if (unlikely(r < 0)) { 1658 dm_integrity_io_error(ic, "crypto_shash_update", r); 1659 goto failed; 1660 } 1661 } 1662 1663 r = crypto_shash_update(req, (const __u8 *)§or_le, sizeof(sector_le)); 1664 if (unlikely(r < 0)) { 1665 dm_integrity_io_error(ic, "crypto_shash_update", r); 1666 goto failed; 1667 } 1668 1669 r = crypto_shash_update(req, data, ic->sectors_per_block << SECTOR_SHIFT); 1670 if (unlikely(r < 0)) { 1671 dm_integrity_io_error(ic, "crypto_shash_update", r); 1672 goto failed; 1673 } 1674 1675 r = crypto_shash_final(req, result); 1676 if (unlikely(r < 0)) { 1677 dm_integrity_io_error(ic, "crypto_shash_final", r); 1678 goto failed; 1679 } 1680 1681 digest_size = crypto_shash_digestsize(ic->internal_hash); 1682 if (unlikely(digest_size < ic->tag_size)) 1683 memset(result + digest_size, 0, ic->tag_size - digest_size); 1684 1685 return; 1686 1687 failed: 1688 /* this shouldn't happen anyway, the hash functions have no reason to fail */ 1689 get_random_bytes(result, ic->tag_size); 1690 } 1691 1692 static void integrity_metadata(struct work_struct *w) 1693 { 1694 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); 1695 struct dm_integrity_c *ic = dio->ic; 1696 1697 int r; 1698 1699 if (ic->internal_hash) { 1700 struct bvec_iter iter; 1701 struct bio_vec bv; 1702 unsigned int digest_size = crypto_shash_digestsize(ic->internal_hash); 1703 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1704 char *checksums; 1705 unsigned int extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0; 1706 char checksums_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; 1707 sector_t sector; 1708 unsigned int sectors_to_process; 1709 1710 if (unlikely(ic->mode == 'R')) 1711 goto skip_io; 1712 1713 if (likely(dio->op != REQ_OP_DISCARD)) 1714 checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space, 1715 GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN); 1716 else 1717 checksums = kmalloc(PAGE_SIZE, GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN); 1718 if (!checksums) { 1719 checksums = checksums_onstack; 1720 if (WARN_ON(extra_space && 1721 digest_size > sizeof(checksums_onstack))) { 1722 r = -EINVAL; 1723 goto error; 1724 } 1725 } 1726 1727 if (unlikely(dio->op == REQ_OP_DISCARD)) { 1728 unsigned int bi_size = dio->bio_details.bi_iter.bi_size; 1729 unsigned int max_size = likely(checksums != checksums_onstack) ? PAGE_SIZE : HASH_MAX_DIGESTSIZE; 1730 unsigned int max_blocks = max_size / ic->tag_size; 1731 1732 memset(checksums, DISCARD_FILLER, max_size); 1733 1734 while (bi_size) { 1735 unsigned int this_step_blocks = bi_size >> (SECTOR_SHIFT + ic->sb->log2_sectors_per_block); 1736 1737 this_step_blocks = min(this_step_blocks, max_blocks); 1738 r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, 1739 this_step_blocks * ic->tag_size, TAG_WRITE); 1740 if (unlikely(r)) { 1741 if (likely(checksums != checksums_onstack)) 1742 kfree(checksums); 1743 goto error; 1744 } 1745 1746 bi_size -= this_step_blocks << (SECTOR_SHIFT + ic->sb->log2_sectors_per_block); 1747 } 1748 1749 if (likely(checksums != checksums_onstack)) 1750 kfree(checksums); 1751 goto skip_io; 1752 } 1753 1754 sector = dio->range.logical_sector; 1755 sectors_to_process = dio->range.n_sectors; 1756 1757 __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { 1758 unsigned int pos; 1759 char *mem, *checksums_ptr; 1760 1761 again: 1762 mem = bvec_kmap_local(&bv); 1763 pos = 0; 1764 checksums_ptr = checksums; 1765 do { 1766 integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr); 1767 checksums_ptr += ic->tag_size; 1768 sectors_to_process -= ic->sectors_per_block; 1769 pos += ic->sectors_per_block << SECTOR_SHIFT; 1770 sector += ic->sectors_per_block; 1771 } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack); 1772 kunmap_local(mem); 1773 1774 r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, 1775 checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); 1776 if (unlikely(r)) { 1777 if (r > 0) { 1778 sector_t s; 1779 1780 s = sector - ((r + ic->tag_size - 1) / ic->tag_size); 1781 DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", 1782 bio->bi_bdev, s); 1783 r = -EILSEQ; 1784 atomic64_inc(&ic->number_of_mismatches); 1785 dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", 1786 bio, s, 0); 1787 } 1788 if (likely(checksums != checksums_onstack)) 1789 kfree(checksums); 1790 goto error; 1791 } 1792 1793 if (!sectors_to_process) 1794 break; 1795 1796 if (unlikely(pos < bv.bv_len)) { 1797 bv.bv_offset += pos; 1798 bv.bv_len -= pos; 1799 goto again; 1800 } 1801 } 1802 1803 if (likely(checksums != checksums_onstack)) 1804 kfree(checksums); 1805 } else { 1806 struct bio_integrity_payload *bip = dio->bio_details.bi_integrity; 1807 1808 if (bip) { 1809 struct bio_vec biv; 1810 struct bvec_iter iter; 1811 unsigned int data_to_process = dio->range.n_sectors; 1812 1813 sector_to_block(ic, data_to_process); 1814 data_to_process *= ic->tag_size; 1815 1816 bip_for_each_vec(biv, bip, iter) { 1817 unsigned char *tag; 1818 unsigned int this_len; 1819 1820 BUG_ON(PageHighMem(biv.bv_page)); 1821 tag = bvec_virt(&biv); 1822 this_len = min(biv.bv_len, data_to_process); 1823 r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset, 1824 this_len, dio->op == REQ_OP_READ ? TAG_READ : TAG_WRITE); 1825 if (unlikely(r)) 1826 goto error; 1827 data_to_process -= this_len; 1828 if (!data_to_process) 1829 break; 1830 } 1831 } 1832 } 1833 skip_io: 1834 dec_in_flight(dio); 1835 return; 1836 error: 1837 dio->bi_status = errno_to_blk_status(r); 1838 dec_in_flight(dio); 1839 } 1840 1841 static int dm_integrity_map(struct dm_target *ti, struct bio *bio) 1842 { 1843 struct dm_integrity_c *ic = ti->private; 1844 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 1845 struct bio_integrity_payload *bip; 1846 1847 sector_t area, offset; 1848 1849 dio->ic = ic; 1850 dio->bi_status = 0; 1851 dio->op = bio_op(bio); 1852 1853 if (unlikely(dio->op == REQ_OP_DISCARD)) { 1854 if (ti->max_io_len) { 1855 sector_t sec = dm_target_offset(ti, bio->bi_iter.bi_sector); 1856 unsigned int log2_max_io_len = __fls(ti->max_io_len); 1857 sector_t start_boundary = sec >> log2_max_io_len; 1858 sector_t end_boundary = (sec + bio_sectors(bio) - 1) >> log2_max_io_len; 1859 1860 if (start_boundary < end_boundary) { 1861 sector_t len = ti->max_io_len - (sec & (ti->max_io_len - 1)); 1862 1863 dm_accept_partial_bio(bio, len); 1864 } 1865 } 1866 } 1867 1868 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { 1869 submit_flush_bio(ic, dio); 1870 return DM_MAPIO_SUBMITTED; 1871 } 1872 1873 dio->range.logical_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); 1874 dio->fua = dio->op == REQ_OP_WRITE && bio->bi_opf & REQ_FUA; 1875 if (unlikely(dio->fua)) { 1876 /* 1877 * Don't pass down the FUA flag because we have to flush 1878 * disk cache anyway. 1879 */ 1880 bio->bi_opf &= ~REQ_FUA; 1881 } 1882 if (unlikely(dio->range.logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) { 1883 DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx", 1884 dio->range.logical_sector, bio_sectors(bio), 1885 ic->provided_data_sectors); 1886 return DM_MAPIO_KILL; 1887 } 1888 if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned int)(ic->sectors_per_block - 1))) { 1889 DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x", 1890 ic->sectors_per_block, 1891 dio->range.logical_sector, bio_sectors(bio)); 1892 return DM_MAPIO_KILL; 1893 } 1894 1895 if (ic->sectors_per_block > 1 && likely(dio->op != REQ_OP_DISCARD)) { 1896 struct bvec_iter iter; 1897 struct bio_vec bv; 1898 1899 bio_for_each_segment(bv, bio, iter) { 1900 if (unlikely(bv.bv_len & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) { 1901 DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary", 1902 bv.bv_offset, bv.bv_len, ic->sectors_per_block); 1903 return DM_MAPIO_KILL; 1904 } 1905 } 1906 } 1907 1908 bip = bio_integrity(bio); 1909 if (!ic->internal_hash) { 1910 if (bip) { 1911 unsigned int wanted_tag_size = bio_sectors(bio) >> ic->sb->log2_sectors_per_block; 1912 1913 if (ic->log2_tag_size >= 0) 1914 wanted_tag_size <<= ic->log2_tag_size; 1915 else 1916 wanted_tag_size *= ic->tag_size; 1917 if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) { 1918 DMERR("Invalid integrity data size %u, expected %u", 1919 bip->bip_iter.bi_size, wanted_tag_size); 1920 return DM_MAPIO_KILL; 1921 } 1922 } 1923 } else { 1924 if (unlikely(bip != NULL)) { 1925 DMERR("Unexpected integrity data when using internal hash"); 1926 return DM_MAPIO_KILL; 1927 } 1928 } 1929 1930 if (unlikely(ic->mode == 'R') && unlikely(dio->op != REQ_OP_READ)) 1931 return DM_MAPIO_KILL; 1932 1933 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); 1934 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset); 1935 bio->bi_iter.bi_sector = get_data_sector(ic, area, offset); 1936 1937 dm_integrity_map_continue(dio, true); 1938 return DM_MAPIO_SUBMITTED; 1939 } 1940 1941 static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio, 1942 unsigned int journal_section, unsigned int journal_entry) 1943 { 1944 struct dm_integrity_c *ic = dio->ic; 1945 sector_t logical_sector; 1946 unsigned int n_sectors; 1947 1948 logical_sector = dio->range.logical_sector; 1949 n_sectors = dio->range.n_sectors; 1950 do { 1951 struct bio_vec bv = bio_iovec(bio); 1952 char *mem; 1953 1954 if (unlikely(bv.bv_len >> SECTOR_SHIFT > n_sectors)) 1955 bv.bv_len = n_sectors << SECTOR_SHIFT; 1956 n_sectors -= bv.bv_len >> SECTOR_SHIFT; 1957 bio_advance_iter(bio, &bio->bi_iter, bv.bv_len); 1958 retry_kmap: 1959 mem = kmap_local_page(bv.bv_page); 1960 if (likely(dio->op == REQ_OP_WRITE)) 1961 flush_dcache_page(bv.bv_page); 1962 1963 do { 1964 struct journal_entry *je = access_journal_entry(ic, journal_section, journal_entry); 1965 1966 if (unlikely(dio->op == REQ_OP_READ)) { 1967 struct journal_sector *js; 1968 char *mem_ptr; 1969 unsigned int s; 1970 1971 if (unlikely(journal_entry_is_inprogress(je))) { 1972 flush_dcache_page(bv.bv_page); 1973 kunmap_local(mem); 1974 1975 __io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je)); 1976 goto retry_kmap; 1977 } 1978 smp_rmb(); 1979 BUG_ON(journal_entry_get_sector(je) != logical_sector); 1980 js = access_journal_data(ic, journal_section, journal_entry); 1981 mem_ptr = mem + bv.bv_offset; 1982 s = 0; 1983 do { 1984 memcpy(mem_ptr, js, JOURNAL_SECTOR_DATA); 1985 *(commit_id_t *)(mem_ptr + JOURNAL_SECTOR_DATA) = je->last_bytes[s]; 1986 js++; 1987 mem_ptr += 1 << SECTOR_SHIFT; 1988 } while (++s < ic->sectors_per_block); 1989 #ifdef INTERNAL_VERIFY 1990 if (ic->internal_hash) { 1991 char checksums_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; 1992 1993 integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack); 1994 if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) { 1995 DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx", 1996 logical_sector); 1997 dm_audit_log_bio(DM_MSG_PREFIX, "journal-checksum", 1998 bio, logical_sector, 0); 1999 } 2000 } 2001 #endif 2002 } 2003 2004 if (!ic->internal_hash) { 2005 struct bio_integrity_payload *bip = bio_integrity(bio); 2006 unsigned int tag_todo = ic->tag_size; 2007 char *tag_ptr = journal_entry_tag(ic, je); 2008 2009 if (bip) { 2010 do { 2011 struct bio_vec biv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); 2012 unsigned int tag_now = min(biv.bv_len, tag_todo); 2013 char *tag_addr; 2014 2015 BUG_ON(PageHighMem(biv.bv_page)); 2016 tag_addr = bvec_virt(&biv); 2017 if (likely(dio->op == REQ_OP_WRITE)) 2018 memcpy(tag_ptr, tag_addr, tag_now); 2019 else 2020 memcpy(tag_addr, tag_ptr, tag_now); 2021 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, tag_now); 2022 tag_ptr += tag_now; 2023 tag_todo -= tag_now; 2024 } while (unlikely(tag_todo)); 2025 } else if (likely(dio->op == REQ_OP_WRITE)) 2026 memset(tag_ptr, 0, tag_todo); 2027 } 2028 2029 if (likely(dio->op == REQ_OP_WRITE)) { 2030 struct journal_sector *js; 2031 unsigned int s; 2032 2033 js = access_journal_data(ic, journal_section, journal_entry); 2034 memcpy(js, mem + bv.bv_offset, ic->sectors_per_block << SECTOR_SHIFT); 2035 2036 s = 0; 2037 do { 2038 je->last_bytes[s] = js[s].commit_id; 2039 } while (++s < ic->sectors_per_block); 2040 2041 if (ic->internal_hash) { 2042 unsigned int digest_size = crypto_shash_digestsize(ic->internal_hash); 2043 2044 if (unlikely(digest_size > ic->tag_size)) { 2045 char checksums_onstack[HASH_MAX_DIGESTSIZE]; 2046 2047 integrity_sector_checksum(ic, logical_sector, (char *)js, checksums_onstack); 2048 memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size); 2049 } else 2050 integrity_sector_checksum(ic, logical_sector, (char *)js, journal_entry_tag(ic, je)); 2051 } 2052 2053 journal_entry_set_sector(je, logical_sector); 2054 } 2055 logical_sector += ic->sectors_per_block; 2056 2057 journal_entry++; 2058 if (unlikely(journal_entry == ic->journal_section_entries)) { 2059 journal_entry = 0; 2060 journal_section++; 2061 wraparound_section(ic, &journal_section); 2062 } 2063 2064 bv.bv_offset += ic->sectors_per_block << SECTOR_SHIFT; 2065 } while (bv.bv_len -= ic->sectors_per_block << SECTOR_SHIFT); 2066 2067 if (unlikely(dio->op == REQ_OP_READ)) 2068 flush_dcache_page(bv.bv_page); 2069 kunmap_local(mem); 2070 } while (n_sectors); 2071 2072 if (likely(dio->op == REQ_OP_WRITE)) { 2073 smp_mb(); 2074 if (unlikely(waitqueue_active(&ic->copy_to_journal_wait))) 2075 wake_up(&ic->copy_to_journal_wait); 2076 if (READ_ONCE(ic->free_sectors) <= ic->free_sectors_threshold) 2077 queue_work(ic->commit_wq, &ic->commit_work); 2078 else 2079 schedule_autocommit(ic); 2080 } else 2081 remove_range(ic, &dio->range); 2082 2083 if (unlikely(bio->bi_iter.bi_size)) { 2084 sector_t area, offset; 2085 2086 dio->range.logical_sector = logical_sector; 2087 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); 2088 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset); 2089 return true; 2090 } 2091 2092 return false; 2093 } 2094 2095 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map) 2096 { 2097 struct dm_integrity_c *ic = dio->ic; 2098 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 2099 unsigned int journal_section, journal_entry; 2100 unsigned int journal_read_pos; 2101 struct completion read_comp; 2102 bool discard_retried = false; 2103 bool need_sync_io = ic->internal_hash && dio->op == REQ_OP_READ; 2104 2105 if (unlikely(dio->op == REQ_OP_DISCARD) && ic->mode != 'D') 2106 need_sync_io = true; 2107 2108 if (need_sync_io && from_map) { 2109 INIT_WORK(&dio->work, integrity_bio_wait); 2110 queue_work(ic->offload_wq, &dio->work); 2111 return; 2112 } 2113 2114 lock_retry: 2115 spin_lock_irq(&ic->endio_wait.lock); 2116 retry: 2117 if (unlikely(dm_integrity_failed(ic))) { 2118 spin_unlock_irq(&ic->endio_wait.lock); 2119 do_endio(ic, bio); 2120 return; 2121 } 2122 dio->range.n_sectors = bio_sectors(bio); 2123 journal_read_pos = NOT_FOUND; 2124 if (ic->mode == 'J' && likely(dio->op != REQ_OP_DISCARD)) { 2125 if (dio->op == REQ_OP_WRITE) { 2126 unsigned int next_entry, i, pos; 2127 unsigned int ws, we, range_sectors; 2128 2129 dio->range.n_sectors = min(dio->range.n_sectors, 2130 (sector_t)ic->free_sectors << ic->sb->log2_sectors_per_block); 2131 if (unlikely(!dio->range.n_sectors)) { 2132 if (from_map) 2133 goto offload_to_thread; 2134 sleep_on_endio_wait(ic); 2135 goto retry; 2136 } 2137 range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block; 2138 ic->free_sectors -= range_sectors; 2139 journal_section = ic->free_section; 2140 journal_entry = ic->free_section_entry; 2141 2142 next_entry = ic->free_section_entry + range_sectors; 2143 ic->free_section_entry = next_entry % ic->journal_section_entries; 2144 ic->free_section += next_entry / ic->journal_section_entries; 2145 ic->n_uncommitted_sections += next_entry / ic->journal_section_entries; 2146 wraparound_section(ic, &ic->free_section); 2147 2148 pos = journal_section * ic->journal_section_entries + journal_entry; 2149 ws = journal_section; 2150 we = journal_entry; 2151 i = 0; 2152 do { 2153 struct journal_entry *je; 2154 2155 add_journal_node(ic, &ic->journal_tree[pos], dio->range.logical_sector + i); 2156 pos++; 2157 if (unlikely(pos >= ic->journal_entries)) 2158 pos = 0; 2159 2160 je = access_journal_entry(ic, ws, we); 2161 BUG_ON(!journal_entry_is_unused(je)); 2162 journal_entry_set_inprogress(je); 2163 we++; 2164 if (unlikely(we == ic->journal_section_entries)) { 2165 we = 0; 2166 ws++; 2167 wraparound_section(ic, &ws); 2168 } 2169 } while ((i += ic->sectors_per_block) < dio->range.n_sectors); 2170 2171 spin_unlock_irq(&ic->endio_wait.lock); 2172 goto journal_read_write; 2173 } else { 2174 sector_t next_sector; 2175 2176 journal_read_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector); 2177 if (likely(journal_read_pos == NOT_FOUND)) { 2178 if (unlikely(dio->range.n_sectors > next_sector - dio->range.logical_sector)) 2179 dio->range.n_sectors = next_sector - dio->range.logical_sector; 2180 } else { 2181 unsigned int i; 2182 unsigned int jp = journal_read_pos + 1; 2183 2184 for (i = ic->sectors_per_block; i < dio->range.n_sectors; i += ic->sectors_per_block, jp++) { 2185 if (!test_journal_node(ic, jp, dio->range.logical_sector + i)) 2186 break; 2187 } 2188 dio->range.n_sectors = i; 2189 } 2190 } 2191 } 2192 if (unlikely(!add_new_range(ic, &dio->range, true))) { 2193 /* 2194 * We must not sleep in the request routine because it could 2195 * stall bios on current->bio_list. 2196 * So, we offload the bio to a workqueue if we have to sleep. 2197 */ 2198 if (from_map) { 2199 offload_to_thread: 2200 spin_unlock_irq(&ic->endio_wait.lock); 2201 INIT_WORK(&dio->work, integrity_bio_wait); 2202 queue_work(ic->wait_wq, &dio->work); 2203 return; 2204 } 2205 if (journal_read_pos != NOT_FOUND) 2206 dio->range.n_sectors = ic->sectors_per_block; 2207 wait_and_add_new_range(ic, &dio->range); 2208 /* 2209 * wait_and_add_new_range drops the spinlock, so the journal 2210 * may have been changed arbitrarily. We need to recheck. 2211 * To simplify the code, we restrict I/O size to just one block. 2212 */ 2213 if (journal_read_pos != NOT_FOUND) { 2214 sector_t next_sector; 2215 unsigned int new_pos; 2216 2217 new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector); 2218 if (unlikely(new_pos != journal_read_pos)) { 2219 remove_range_unlocked(ic, &dio->range); 2220 goto retry; 2221 } 2222 } 2223 } 2224 if (ic->mode == 'J' && likely(dio->op == REQ_OP_DISCARD) && !discard_retried) { 2225 sector_t next_sector; 2226 unsigned int new_pos; 2227 2228 new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector); 2229 if (unlikely(new_pos != NOT_FOUND) || 2230 unlikely(next_sector < dio->range.logical_sector - dio->range.n_sectors)) { 2231 remove_range_unlocked(ic, &dio->range); 2232 spin_unlock_irq(&ic->endio_wait.lock); 2233 queue_work(ic->commit_wq, &ic->commit_work); 2234 flush_workqueue(ic->commit_wq); 2235 queue_work(ic->writer_wq, &ic->writer_work); 2236 flush_workqueue(ic->writer_wq); 2237 discard_retried = true; 2238 goto lock_retry; 2239 } 2240 } 2241 spin_unlock_irq(&ic->endio_wait.lock); 2242 2243 if (unlikely(journal_read_pos != NOT_FOUND)) { 2244 journal_section = journal_read_pos / ic->journal_section_entries; 2245 journal_entry = journal_read_pos % ic->journal_section_entries; 2246 goto journal_read_write; 2247 } 2248 2249 if (ic->mode == 'B' && (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD))) { 2250 if (!block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, 2251 dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) { 2252 struct bitmap_block_status *bbs; 2253 2254 bbs = sector_to_bitmap_block(ic, dio->range.logical_sector); 2255 spin_lock(&bbs->bio_queue_lock); 2256 bio_list_add(&bbs->bio_queue, bio); 2257 spin_unlock(&bbs->bio_queue_lock); 2258 queue_work(ic->writer_wq, &bbs->work); 2259 return; 2260 } 2261 } 2262 2263 dio->in_flight = (atomic_t)ATOMIC_INIT(2); 2264 2265 if (need_sync_io) { 2266 init_completion(&read_comp); 2267 dio->completion = &read_comp; 2268 } else 2269 dio->completion = NULL; 2270 2271 dm_bio_record(&dio->bio_details, bio); 2272 bio_set_dev(bio, ic->dev->bdev); 2273 bio->bi_integrity = NULL; 2274 bio->bi_opf &= ~REQ_INTEGRITY; 2275 bio->bi_end_io = integrity_end_io; 2276 bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT; 2277 2278 if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) { 2279 integrity_metadata(&dio->work); 2280 dm_integrity_flush_buffers(ic, false); 2281 2282 dio->in_flight = (atomic_t)ATOMIC_INIT(1); 2283 dio->completion = NULL; 2284 2285 submit_bio_noacct(bio); 2286 2287 return; 2288 } 2289 2290 submit_bio_noacct(bio); 2291 2292 if (need_sync_io) { 2293 wait_for_completion_io(&read_comp); 2294 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && 2295 dio->range.logical_sector + dio->range.n_sectors > le64_to_cpu(ic->sb->recalc_sector)) 2296 goto skip_check; 2297 if (ic->mode == 'B') { 2298 if (!block_bitmap_op(ic, ic->recalc_bitmap, dio->range.logical_sector, 2299 dio->range.n_sectors, BITMAP_OP_TEST_ALL_CLEAR)) 2300 goto skip_check; 2301 } 2302 2303 if (likely(!bio->bi_status)) 2304 integrity_metadata(&dio->work); 2305 else 2306 skip_check: 2307 dec_in_flight(dio); 2308 } else { 2309 INIT_WORK(&dio->work, integrity_metadata); 2310 queue_work(ic->metadata_wq, &dio->work); 2311 } 2312 2313 return; 2314 2315 journal_read_write: 2316 if (unlikely(__journal_read_write(dio, bio, journal_section, journal_entry))) 2317 goto lock_retry; 2318 2319 do_endio_flush(ic, dio); 2320 } 2321 2322 2323 static void integrity_bio_wait(struct work_struct *w) 2324 { 2325 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); 2326 2327 dm_integrity_map_continue(dio, false); 2328 } 2329 2330 static void pad_uncommitted(struct dm_integrity_c *ic) 2331 { 2332 if (ic->free_section_entry) { 2333 ic->free_sectors -= ic->journal_section_entries - ic->free_section_entry; 2334 ic->free_section_entry = 0; 2335 ic->free_section++; 2336 wraparound_section(ic, &ic->free_section); 2337 ic->n_uncommitted_sections++; 2338 } 2339 if (WARN_ON(ic->journal_sections * ic->journal_section_entries != 2340 (ic->n_uncommitted_sections + ic->n_committed_sections) * 2341 ic->journal_section_entries + ic->free_sectors)) { 2342 DMCRIT("journal_sections %u, journal_section_entries %u, " 2343 "n_uncommitted_sections %u, n_committed_sections %u, " 2344 "journal_section_entries %u, free_sectors %u", 2345 ic->journal_sections, ic->journal_section_entries, 2346 ic->n_uncommitted_sections, ic->n_committed_sections, 2347 ic->journal_section_entries, ic->free_sectors); 2348 } 2349 } 2350 2351 static void integrity_commit(struct work_struct *w) 2352 { 2353 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, commit_work); 2354 unsigned int commit_start, commit_sections; 2355 unsigned int i, j, n; 2356 struct bio *flushes; 2357 2358 del_timer(&ic->autocommit_timer); 2359 2360 spin_lock_irq(&ic->endio_wait.lock); 2361 flushes = bio_list_get(&ic->flush_bio_list); 2362 if (unlikely(ic->mode != 'J')) { 2363 spin_unlock_irq(&ic->endio_wait.lock); 2364 dm_integrity_flush_buffers(ic, true); 2365 goto release_flush_bios; 2366 } 2367 2368 pad_uncommitted(ic); 2369 commit_start = ic->uncommitted_section; 2370 commit_sections = ic->n_uncommitted_sections; 2371 spin_unlock_irq(&ic->endio_wait.lock); 2372 2373 if (!commit_sections) 2374 goto release_flush_bios; 2375 2376 ic->wrote_to_journal = true; 2377 2378 i = commit_start; 2379 for (n = 0; n < commit_sections; n++) { 2380 for (j = 0; j < ic->journal_section_entries; j++) { 2381 struct journal_entry *je; 2382 2383 je = access_journal_entry(ic, i, j); 2384 io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je)); 2385 } 2386 for (j = 0; j < ic->journal_section_sectors; j++) { 2387 struct journal_sector *js; 2388 2389 js = access_journal(ic, i, j); 2390 js->commit_id = dm_integrity_commit_id(ic, i, j, ic->commit_seq); 2391 } 2392 i++; 2393 if (unlikely(i >= ic->journal_sections)) 2394 ic->commit_seq = next_commit_seq(ic->commit_seq); 2395 wraparound_section(ic, &i); 2396 } 2397 smp_rmb(); 2398 2399 write_journal(ic, commit_start, commit_sections); 2400 2401 spin_lock_irq(&ic->endio_wait.lock); 2402 ic->uncommitted_section += commit_sections; 2403 wraparound_section(ic, &ic->uncommitted_section); 2404 ic->n_uncommitted_sections -= commit_sections; 2405 ic->n_committed_sections += commit_sections; 2406 spin_unlock_irq(&ic->endio_wait.lock); 2407 2408 if (READ_ONCE(ic->free_sectors) <= ic->free_sectors_threshold) 2409 queue_work(ic->writer_wq, &ic->writer_work); 2410 2411 release_flush_bios: 2412 while (flushes) { 2413 struct bio *next = flushes->bi_next; 2414 2415 flushes->bi_next = NULL; 2416 do_endio(ic, flushes); 2417 flushes = next; 2418 } 2419 } 2420 2421 static void complete_copy_from_journal(unsigned long error, void *context) 2422 { 2423 struct journal_io *io = context; 2424 struct journal_completion *comp = io->comp; 2425 struct dm_integrity_c *ic = comp->ic; 2426 2427 remove_range(ic, &io->range); 2428 mempool_free(io, &ic->journal_io_mempool); 2429 if (unlikely(error != 0)) 2430 dm_integrity_io_error(ic, "copying from journal", -EIO); 2431 complete_journal_op(comp); 2432 } 2433 2434 static void restore_last_bytes(struct dm_integrity_c *ic, struct journal_sector *js, 2435 struct journal_entry *je) 2436 { 2437 unsigned int s = 0; 2438 2439 do { 2440 js->commit_id = je->last_bytes[s]; 2441 js++; 2442 } while (++s < ic->sectors_per_block); 2443 } 2444 2445 static void do_journal_write(struct dm_integrity_c *ic, unsigned int write_start, 2446 unsigned int write_sections, bool from_replay) 2447 { 2448 unsigned int i, j, n; 2449 struct journal_completion comp; 2450 struct blk_plug plug; 2451 2452 blk_start_plug(&plug); 2453 2454 comp.ic = ic; 2455 comp.in_flight = (atomic_t)ATOMIC_INIT(1); 2456 init_completion(&comp.comp); 2457 2458 i = write_start; 2459 for (n = 0; n < write_sections; n++, i++, wraparound_section(ic, &i)) { 2460 #ifndef INTERNAL_VERIFY 2461 if (unlikely(from_replay)) 2462 #endif 2463 rw_section_mac(ic, i, false); 2464 for (j = 0; j < ic->journal_section_entries; j++) { 2465 struct journal_entry *je = access_journal_entry(ic, i, j); 2466 sector_t sec, area, offset; 2467 unsigned int k, l, next_loop; 2468 sector_t metadata_block; 2469 unsigned int metadata_offset; 2470 struct journal_io *io; 2471 2472 if (journal_entry_is_unused(je)) 2473 continue; 2474 BUG_ON(unlikely(journal_entry_is_inprogress(je)) && !from_replay); 2475 sec = journal_entry_get_sector(je); 2476 if (unlikely(from_replay)) { 2477 if (unlikely(sec & (unsigned int)(ic->sectors_per_block - 1))) { 2478 dm_integrity_io_error(ic, "invalid sector in journal", -EIO); 2479 sec &= ~(sector_t)(ic->sectors_per_block - 1); 2480 } 2481 if (unlikely(sec >= ic->provided_data_sectors)) { 2482 journal_entry_set_unused(je); 2483 continue; 2484 } 2485 } 2486 get_area_and_offset(ic, sec, &area, &offset); 2487 restore_last_bytes(ic, access_journal_data(ic, i, j), je); 2488 for (k = j + 1; k < ic->journal_section_entries; k++) { 2489 struct journal_entry *je2 = access_journal_entry(ic, i, k); 2490 sector_t sec2, area2, offset2; 2491 2492 if (journal_entry_is_unused(je2)) 2493 break; 2494 BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay); 2495 sec2 = journal_entry_get_sector(je2); 2496 if (unlikely(sec2 >= ic->provided_data_sectors)) 2497 break; 2498 get_area_and_offset(ic, sec2, &area2, &offset2); 2499 if (area2 != area || offset2 != offset + ((k - j) << ic->sb->log2_sectors_per_block)) 2500 break; 2501 restore_last_bytes(ic, access_journal_data(ic, i, k), je2); 2502 } 2503 next_loop = k - 1; 2504 2505 io = mempool_alloc(&ic->journal_io_mempool, GFP_NOIO); 2506 io->comp = ∁ 2507 io->range.logical_sector = sec; 2508 io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block; 2509 2510 spin_lock_irq(&ic->endio_wait.lock); 2511 add_new_range_and_wait(ic, &io->range); 2512 2513 if (likely(!from_replay)) { 2514 struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries]; 2515 2516 /* don't write if there is newer committed sector */ 2517 while (j < k && find_newer_committed_node(ic, §ion_node[j])) { 2518 struct journal_entry *je2 = access_journal_entry(ic, i, j); 2519 2520 journal_entry_set_unused(je2); 2521 remove_journal_node(ic, §ion_node[j]); 2522 j++; 2523 sec += ic->sectors_per_block; 2524 offset += ic->sectors_per_block; 2525 } 2526 while (j < k && find_newer_committed_node(ic, §ion_node[k - 1])) { 2527 struct journal_entry *je2 = access_journal_entry(ic, i, k - 1); 2528 2529 journal_entry_set_unused(je2); 2530 remove_journal_node(ic, §ion_node[k - 1]); 2531 k--; 2532 } 2533 if (j == k) { 2534 remove_range_unlocked(ic, &io->range); 2535 spin_unlock_irq(&ic->endio_wait.lock); 2536 mempool_free(io, &ic->journal_io_mempool); 2537 goto skip_io; 2538 } 2539 for (l = j; l < k; l++) 2540 remove_journal_node(ic, §ion_node[l]); 2541 } 2542 spin_unlock_irq(&ic->endio_wait.lock); 2543 2544 metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset); 2545 for (l = j; l < k; l++) { 2546 int r; 2547 struct journal_entry *je2 = access_journal_entry(ic, i, l); 2548 2549 if ( 2550 #ifndef INTERNAL_VERIFY 2551 unlikely(from_replay) && 2552 #endif 2553 ic->internal_hash) { 2554 char test_tag[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; 2555 2556 integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block), 2557 (char *)access_journal_data(ic, i, l), test_tag); 2558 if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) { 2559 dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ); 2560 dm_audit_log_target(DM_MSG_PREFIX, "integrity-replay-journal", ic->ti, 0); 2561 } 2562 } 2563 2564 journal_entry_set_unused(je2); 2565 r = dm_integrity_rw_tag(ic, journal_entry_tag(ic, je2), &metadata_block, &metadata_offset, 2566 ic->tag_size, TAG_WRITE); 2567 if (unlikely(r)) 2568 dm_integrity_io_error(ic, "reading tags", r); 2569 } 2570 2571 atomic_inc(&comp.in_flight); 2572 copy_from_journal(ic, i, j << ic->sb->log2_sectors_per_block, 2573 (k - j) << ic->sb->log2_sectors_per_block, 2574 get_data_sector(ic, area, offset), 2575 complete_copy_from_journal, io); 2576 skip_io: 2577 j = next_loop; 2578 } 2579 } 2580 2581 dm_bufio_write_dirty_buffers_async(ic->bufio); 2582 2583 blk_finish_plug(&plug); 2584 2585 complete_journal_op(&comp); 2586 wait_for_completion_io(&comp.comp); 2587 2588 dm_integrity_flush_buffers(ic, true); 2589 } 2590 2591 static void integrity_writer(struct work_struct *w) 2592 { 2593 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, writer_work); 2594 unsigned int write_start, write_sections; 2595 unsigned int prev_free_sectors; 2596 2597 spin_lock_irq(&ic->endio_wait.lock); 2598 write_start = ic->committed_section; 2599 write_sections = ic->n_committed_sections; 2600 spin_unlock_irq(&ic->endio_wait.lock); 2601 2602 if (!write_sections) 2603 return; 2604 2605 do_journal_write(ic, write_start, write_sections, false); 2606 2607 spin_lock_irq(&ic->endio_wait.lock); 2608 2609 ic->committed_section += write_sections; 2610 wraparound_section(ic, &ic->committed_section); 2611 ic->n_committed_sections -= write_sections; 2612 2613 prev_free_sectors = ic->free_sectors; 2614 ic->free_sectors += write_sections * ic->journal_section_entries; 2615 if (unlikely(!prev_free_sectors)) 2616 wake_up_locked(&ic->endio_wait); 2617 2618 spin_unlock_irq(&ic->endio_wait.lock); 2619 } 2620 2621 static void recalc_write_super(struct dm_integrity_c *ic) 2622 { 2623 int r; 2624 2625 dm_integrity_flush_buffers(ic, false); 2626 if (dm_integrity_failed(ic)) 2627 return; 2628 2629 r = sync_rw_sb(ic, REQ_OP_WRITE); 2630 if (unlikely(r)) 2631 dm_integrity_io_error(ic, "writing superblock", r); 2632 } 2633 2634 static void integrity_recalc(struct work_struct *w) 2635 { 2636 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, recalc_work); 2637 size_t recalc_tags_size; 2638 u8 *recalc_buffer = NULL; 2639 u8 *recalc_tags = NULL; 2640 struct dm_integrity_range range; 2641 struct dm_io_request io_req; 2642 struct dm_io_region io_loc; 2643 sector_t area, offset; 2644 sector_t metadata_block; 2645 unsigned int metadata_offset; 2646 sector_t logical_sector, n_sectors; 2647 __u8 *t; 2648 unsigned int i; 2649 int r; 2650 unsigned int super_counter = 0; 2651 unsigned recalc_sectors = RECALC_SECTORS; 2652 2653 retry: 2654 recalc_buffer = __vmalloc(recalc_sectors << SECTOR_SHIFT, GFP_NOIO); 2655 if (!recalc_buffer) { 2656 oom: 2657 recalc_sectors >>= 1; 2658 if (recalc_sectors >= 1U << ic->sb->log2_sectors_per_block) 2659 goto retry; 2660 DMCRIT("out of memory for recalculate buffer - recalculation disabled"); 2661 goto free_ret; 2662 } 2663 recalc_tags_size = (recalc_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size; 2664 if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size) 2665 recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size; 2666 recalc_tags = kvmalloc(recalc_tags_size, GFP_NOIO); 2667 if (!recalc_tags) { 2668 vfree(recalc_buffer); 2669 recalc_buffer = NULL; 2670 goto oom; 2671 } 2672 2673 DEBUG_print("start recalculation... (position %llx)\n", le64_to_cpu(ic->sb->recalc_sector)); 2674 2675 spin_lock_irq(&ic->endio_wait.lock); 2676 2677 next_chunk: 2678 2679 if (unlikely(dm_post_suspending(ic->ti))) 2680 goto unlock_ret; 2681 2682 range.logical_sector = le64_to_cpu(ic->sb->recalc_sector); 2683 if (unlikely(range.logical_sector >= ic->provided_data_sectors)) { 2684 if (ic->mode == 'B') { 2685 block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); 2686 DEBUG_print("queue_delayed_work: bitmap_flush_work\n"); 2687 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0); 2688 } 2689 goto unlock_ret; 2690 } 2691 2692 get_area_and_offset(ic, range.logical_sector, &area, &offset); 2693 range.n_sectors = min((sector_t)recalc_sectors, ic->provided_data_sectors - range.logical_sector); 2694 if (!ic->meta_dev) 2695 range.n_sectors = min(range.n_sectors, ((sector_t)1U << ic->sb->log2_interleave_sectors) - (unsigned int)offset); 2696 2697 add_new_range_and_wait(ic, &range); 2698 spin_unlock_irq(&ic->endio_wait.lock); 2699 logical_sector = range.logical_sector; 2700 n_sectors = range.n_sectors; 2701 2702 if (ic->mode == 'B') { 2703 if (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, n_sectors, BITMAP_OP_TEST_ALL_CLEAR)) 2704 goto advance_and_next; 2705 2706 while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, 2707 ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) { 2708 logical_sector += ic->sectors_per_block; 2709 n_sectors -= ic->sectors_per_block; 2710 cond_resched(); 2711 } 2712 while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector + n_sectors - ic->sectors_per_block, 2713 ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) { 2714 n_sectors -= ic->sectors_per_block; 2715 cond_resched(); 2716 } 2717 get_area_and_offset(ic, logical_sector, &area, &offset); 2718 } 2719 2720 DEBUG_print("recalculating: %llx, %llx\n", logical_sector, n_sectors); 2721 2722 if (unlikely(++super_counter == RECALC_WRITE_SUPER)) { 2723 recalc_write_super(ic); 2724 if (ic->mode == 'B') 2725 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval); 2726 2727 super_counter = 0; 2728 } 2729 2730 if (unlikely(dm_integrity_failed(ic))) 2731 goto err; 2732 2733 io_req.bi_opf = REQ_OP_READ; 2734 io_req.mem.type = DM_IO_VMA; 2735 io_req.mem.ptr.addr = recalc_buffer; 2736 io_req.notify.fn = NULL; 2737 io_req.client = ic->io; 2738 io_loc.bdev = ic->dev->bdev; 2739 io_loc.sector = get_data_sector(ic, area, offset); 2740 io_loc.count = n_sectors; 2741 2742 r = dm_io(&io_req, 1, &io_loc, NULL); 2743 if (unlikely(r)) { 2744 dm_integrity_io_error(ic, "reading data", r); 2745 goto err; 2746 } 2747 2748 t = recalc_tags; 2749 for (i = 0; i < n_sectors; i += ic->sectors_per_block) { 2750 integrity_sector_checksum(ic, logical_sector + i, recalc_buffer + (i << SECTOR_SHIFT), t); 2751 t += ic->tag_size; 2752 } 2753 2754 metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset); 2755 2756 r = dm_integrity_rw_tag(ic, recalc_tags, &metadata_block, &metadata_offset, t - recalc_tags, TAG_WRITE); 2757 if (unlikely(r)) { 2758 dm_integrity_io_error(ic, "writing tags", r); 2759 goto err; 2760 } 2761 2762 if (ic->mode == 'B') { 2763 sector_t start, end; 2764 2765 start = (range.logical_sector >> 2766 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)) << 2767 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 2768 end = ((range.logical_sector + range.n_sectors) >> 2769 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)) << 2770 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 2771 block_bitmap_op(ic, ic->recalc_bitmap, start, end - start, BITMAP_OP_CLEAR); 2772 } 2773 2774 advance_and_next: 2775 cond_resched(); 2776 2777 spin_lock_irq(&ic->endio_wait.lock); 2778 remove_range_unlocked(ic, &range); 2779 ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors); 2780 goto next_chunk; 2781 2782 err: 2783 remove_range(ic, &range); 2784 goto free_ret; 2785 2786 unlock_ret: 2787 spin_unlock_irq(&ic->endio_wait.lock); 2788 2789 recalc_write_super(ic); 2790 2791 free_ret: 2792 vfree(recalc_buffer); 2793 kvfree(recalc_tags); 2794 } 2795 2796 static void bitmap_block_work(struct work_struct *w) 2797 { 2798 struct bitmap_block_status *bbs = container_of(w, struct bitmap_block_status, work); 2799 struct dm_integrity_c *ic = bbs->ic; 2800 struct bio *bio; 2801 struct bio_list bio_queue; 2802 struct bio_list waiting; 2803 2804 bio_list_init(&waiting); 2805 2806 spin_lock(&bbs->bio_queue_lock); 2807 bio_queue = bbs->bio_queue; 2808 bio_list_init(&bbs->bio_queue); 2809 spin_unlock(&bbs->bio_queue_lock); 2810 2811 while ((bio = bio_list_pop(&bio_queue))) { 2812 struct dm_integrity_io *dio; 2813 2814 dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 2815 2816 if (block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, 2817 dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) { 2818 remove_range(ic, &dio->range); 2819 INIT_WORK(&dio->work, integrity_bio_wait); 2820 queue_work(ic->offload_wq, &dio->work); 2821 } else { 2822 block_bitmap_op(ic, ic->journal, dio->range.logical_sector, 2823 dio->range.n_sectors, BITMAP_OP_SET); 2824 bio_list_add(&waiting, bio); 2825 } 2826 } 2827 2828 if (bio_list_empty(&waiting)) 2829 return; 2830 2831 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 2832 bbs->idx * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), 2833 BITMAP_BLOCK_SIZE >> SECTOR_SHIFT, NULL); 2834 2835 while ((bio = bio_list_pop(&waiting))) { 2836 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 2837 2838 block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, 2839 dio->range.n_sectors, BITMAP_OP_SET); 2840 2841 remove_range(ic, &dio->range); 2842 INIT_WORK(&dio->work, integrity_bio_wait); 2843 queue_work(ic->offload_wq, &dio->work); 2844 } 2845 2846 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval); 2847 } 2848 2849 static void bitmap_flush_work(struct work_struct *work) 2850 { 2851 struct dm_integrity_c *ic = container_of(work, struct dm_integrity_c, bitmap_flush_work.work); 2852 struct dm_integrity_range range; 2853 unsigned long limit; 2854 struct bio *bio; 2855 2856 dm_integrity_flush_buffers(ic, false); 2857 2858 range.logical_sector = 0; 2859 range.n_sectors = ic->provided_data_sectors; 2860 2861 spin_lock_irq(&ic->endio_wait.lock); 2862 add_new_range_and_wait(ic, &range); 2863 spin_unlock_irq(&ic->endio_wait.lock); 2864 2865 dm_integrity_flush_buffers(ic, true); 2866 2867 limit = ic->provided_data_sectors; 2868 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { 2869 limit = le64_to_cpu(ic->sb->recalc_sector) 2870 >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit) 2871 << (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 2872 } 2873 /*DEBUG_print("zeroing journal\n");*/ 2874 block_bitmap_op(ic, ic->journal, 0, limit, BITMAP_OP_CLEAR); 2875 block_bitmap_op(ic, ic->may_write_bitmap, 0, limit, BITMAP_OP_CLEAR); 2876 2877 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, 2878 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 2879 2880 spin_lock_irq(&ic->endio_wait.lock); 2881 remove_range_unlocked(ic, &range); 2882 while (unlikely((bio = bio_list_pop(&ic->synchronous_bios)) != NULL)) { 2883 bio_endio(bio); 2884 spin_unlock_irq(&ic->endio_wait.lock); 2885 spin_lock_irq(&ic->endio_wait.lock); 2886 } 2887 spin_unlock_irq(&ic->endio_wait.lock); 2888 } 2889 2890 2891 static void init_journal(struct dm_integrity_c *ic, unsigned int start_section, 2892 unsigned int n_sections, unsigned char commit_seq) 2893 { 2894 unsigned int i, j, n; 2895 2896 if (!n_sections) 2897 return; 2898 2899 for (n = 0; n < n_sections; n++) { 2900 i = start_section + n; 2901 wraparound_section(ic, &i); 2902 for (j = 0; j < ic->journal_section_sectors; j++) { 2903 struct journal_sector *js = access_journal(ic, i, j); 2904 2905 BUILD_BUG_ON(sizeof(js->sectors) != JOURNAL_SECTOR_DATA); 2906 memset(&js->sectors, 0, sizeof(js->sectors)); 2907 js->commit_id = dm_integrity_commit_id(ic, i, j, commit_seq); 2908 } 2909 for (j = 0; j < ic->journal_section_entries; j++) { 2910 struct journal_entry *je = access_journal_entry(ic, i, j); 2911 2912 journal_entry_set_unused(je); 2913 } 2914 } 2915 2916 write_journal(ic, start_section, n_sections); 2917 } 2918 2919 static int find_commit_seq(struct dm_integrity_c *ic, unsigned int i, unsigned int j, commit_id_t id) 2920 { 2921 unsigned char k; 2922 2923 for (k = 0; k < N_COMMIT_IDS; k++) { 2924 if (dm_integrity_commit_id(ic, i, j, k) == id) 2925 return k; 2926 } 2927 dm_integrity_io_error(ic, "journal commit id", -EIO); 2928 return -EIO; 2929 } 2930 2931 static void replay_journal(struct dm_integrity_c *ic) 2932 { 2933 unsigned int i, j; 2934 bool used_commit_ids[N_COMMIT_IDS]; 2935 unsigned int max_commit_id_sections[N_COMMIT_IDS]; 2936 unsigned int write_start, write_sections; 2937 unsigned int continue_section; 2938 bool journal_empty; 2939 unsigned char unused, last_used, want_commit_seq; 2940 2941 if (ic->mode == 'R') 2942 return; 2943 2944 if (ic->journal_uptodate) 2945 return; 2946 2947 last_used = 0; 2948 write_start = 0; 2949 2950 if (!ic->just_formatted) { 2951 DEBUG_print("reading journal\n"); 2952 rw_journal(ic, REQ_OP_READ, 0, ic->journal_sections, NULL); 2953 if (ic->journal_io) 2954 DEBUG_bytes(lowmem_page_address(ic->journal_io[0].page), 64, "read journal"); 2955 if (ic->journal_io) { 2956 struct journal_completion crypt_comp; 2957 2958 crypt_comp.ic = ic; 2959 init_completion(&crypt_comp.comp); 2960 crypt_comp.in_flight = (atomic_t)ATOMIC_INIT(0); 2961 encrypt_journal(ic, false, 0, ic->journal_sections, &crypt_comp); 2962 wait_for_completion(&crypt_comp.comp); 2963 } 2964 DEBUG_bytes(lowmem_page_address(ic->journal[0].page), 64, "decrypted journal"); 2965 } 2966 2967 if (dm_integrity_failed(ic)) 2968 goto clear_journal; 2969 2970 journal_empty = true; 2971 memset(used_commit_ids, 0, sizeof(used_commit_ids)); 2972 memset(max_commit_id_sections, 0, sizeof(max_commit_id_sections)); 2973 for (i = 0; i < ic->journal_sections; i++) { 2974 for (j = 0; j < ic->journal_section_sectors; j++) { 2975 int k; 2976 struct journal_sector *js = access_journal(ic, i, j); 2977 2978 k = find_commit_seq(ic, i, j, js->commit_id); 2979 if (k < 0) 2980 goto clear_journal; 2981 used_commit_ids[k] = true; 2982 max_commit_id_sections[k] = i; 2983 } 2984 if (journal_empty) { 2985 for (j = 0; j < ic->journal_section_entries; j++) { 2986 struct journal_entry *je = access_journal_entry(ic, i, j); 2987 2988 if (!journal_entry_is_unused(je)) { 2989 journal_empty = false; 2990 break; 2991 } 2992 } 2993 } 2994 } 2995 2996 if (!used_commit_ids[N_COMMIT_IDS - 1]) { 2997 unused = N_COMMIT_IDS - 1; 2998 while (unused && !used_commit_ids[unused - 1]) 2999 unused--; 3000 } else { 3001 for (unused = 0; unused < N_COMMIT_IDS; unused++) 3002 if (!used_commit_ids[unused]) 3003 break; 3004 if (unused == N_COMMIT_IDS) { 3005 dm_integrity_io_error(ic, "journal commit ids", -EIO); 3006 goto clear_journal; 3007 } 3008 } 3009 DEBUG_print("first unused commit seq %d [%d,%d,%d,%d]\n", 3010 unused, used_commit_ids[0], used_commit_ids[1], 3011 used_commit_ids[2], used_commit_ids[3]); 3012 3013 last_used = prev_commit_seq(unused); 3014 want_commit_seq = prev_commit_seq(last_used); 3015 3016 if (!used_commit_ids[want_commit_seq] && used_commit_ids[prev_commit_seq(want_commit_seq)]) 3017 journal_empty = true; 3018 3019 write_start = max_commit_id_sections[last_used] + 1; 3020 if (unlikely(write_start >= ic->journal_sections)) 3021 want_commit_seq = next_commit_seq(want_commit_seq); 3022 wraparound_section(ic, &write_start); 3023 3024 i = write_start; 3025 for (write_sections = 0; write_sections < ic->journal_sections; write_sections++) { 3026 for (j = 0; j < ic->journal_section_sectors; j++) { 3027 struct journal_sector *js = access_journal(ic, i, j); 3028 3029 if (js->commit_id != dm_integrity_commit_id(ic, i, j, want_commit_seq)) { 3030 /* 3031 * This could be caused by crash during writing. 3032 * We won't replay the inconsistent part of the 3033 * journal. 3034 */ 3035 DEBUG_print("commit id mismatch at position (%u, %u): %d != %d\n", 3036 i, j, find_commit_seq(ic, i, j, js->commit_id), want_commit_seq); 3037 goto brk; 3038 } 3039 } 3040 i++; 3041 if (unlikely(i >= ic->journal_sections)) 3042 want_commit_seq = next_commit_seq(want_commit_seq); 3043 wraparound_section(ic, &i); 3044 } 3045 brk: 3046 3047 if (!journal_empty) { 3048 DEBUG_print("replaying %u sections, starting at %u, commit seq %d\n", 3049 write_sections, write_start, want_commit_seq); 3050 do_journal_write(ic, write_start, write_sections, true); 3051 } 3052 3053 if (write_sections == ic->journal_sections && (ic->mode == 'J' || journal_empty)) { 3054 continue_section = write_start; 3055 ic->commit_seq = want_commit_seq; 3056 DEBUG_print("continuing from section %u, commit seq %d\n", write_start, ic->commit_seq); 3057 } else { 3058 unsigned int s; 3059 unsigned char erase_seq; 3060 3061 clear_journal: 3062 DEBUG_print("clearing journal\n"); 3063 3064 erase_seq = prev_commit_seq(prev_commit_seq(last_used)); 3065 s = write_start; 3066 init_journal(ic, s, 1, erase_seq); 3067 s++; 3068 wraparound_section(ic, &s); 3069 if (ic->journal_sections >= 2) { 3070 init_journal(ic, s, ic->journal_sections - 2, erase_seq); 3071 s += ic->journal_sections - 2; 3072 wraparound_section(ic, &s); 3073 init_journal(ic, s, 1, erase_seq); 3074 } 3075 3076 continue_section = 0; 3077 ic->commit_seq = next_commit_seq(erase_seq); 3078 } 3079 3080 ic->committed_section = continue_section; 3081 ic->n_committed_sections = 0; 3082 3083 ic->uncommitted_section = continue_section; 3084 ic->n_uncommitted_sections = 0; 3085 3086 ic->free_section = continue_section; 3087 ic->free_section_entry = 0; 3088 ic->free_sectors = ic->journal_entries; 3089 3090 ic->journal_tree_root = RB_ROOT; 3091 for (i = 0; i < ic->journal_entries; i++) 3092 init_journal_node(&ic->journal_tree[i]); 3093 } 3094 3095 static void dm_integrity_enter_synchronous_mode(struct dm_integrity_c *ic) 3096 { 3097 DEBUG_print("%s\n", __func__); 3098 3099 if (ic->mode == 'B') { 3100 ic->bitmap_flush_interval = msecs_to_jiffies(10) + 1; 3101 ic->synchronous_mode = 1; 3102 3103 cancel_delayed_work_sync(&ic->bitmap_flush_work); 3104 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0); 3105 flush_workqueue(ic->commit_wq); 3106 } 3107 } 3108 3109 static int dm_integrity_reboot(struct notifier_block *n, unsigned long code, void *x) 3110 { 3111 struct dm_integrity_c *ic = container_of(n, struct dm_integrity_c, reboot_notifier); 3112 3113 DEBUG_print("%s\n", __func__); 3114 3115 dm_integrity_enter_synchronous_mode(ic); 3116 3117 return NOTIFY_DONE; 3118 } 3119 3120 static void dm_integrity_postsuspend(struct dm_target *ti) 3121 { 3122 struct dm_integrity_c *ic = ti->private; 3123 int r; 3124 3125 WARN_ON(unregister_reboot_notifier(&ic->reboot_notifier)); 3126 3127 del_timer_sync(&ic->autocommit_timer); 3128 3129 if (ic->recalc_wq) 3130 drain_workqueue(ic->recalc_wq); 3131 3132 if (ic->mode == 'B') 3133 cancel_delayed_work_sync(&ic->bitmap_flush_work); 3134 3135 queue_work(ic->commit_wq, &ic->commit_work); 3136 drain_workqueue(ic->commit_wq); 3137 3138 if (ic->mode == 'J') { 3139 queue_work(ic->writer_wq, &ic->writer_work); 3140 drain_workqueue(ic->writer_wq); 3141 dm_integrity_flush_buffers(ic, true); 3142 if (ic->wrote_to_journal) { 3143 init_journal(ic, ic->free_section, 3144 ic->journal_sections - ic->free_section, ic->commit_seq); 3145 if (ic->free_section) { 3146 init_journal(ic, 0, ic->free_section, 3147 next_commit_seq(ic->commit_seq)); 3148 } 3149 } 3150 } 3151 3152 if (ic->mode == 'B') { 3153 dm_integrity_flush_buffers(ic, true); 3154 #if 1 3155 /* set to 0 to test bitmap replay code */ 3156 init_journal(ic, 0, ic->journal_sections, 0); 3157 ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP); 3158 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); 3159 if (unlikely(r)) 3160 dm_integrity_io_error(ic, "writing superblock", r); 3161 #endif 3162 } 3163 3164 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); 3165 3166 ic->journal_uptodate = true; 3167 } 3168 3169 static void dm_integrity_resume(struct dm_target *ti) 3170 { 3171 struct dm_integrity_c *ic = ti->private; 3172 __u64 old_provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors); 3173 int r; 3174 3175 DEBUG_print("resume\n"); 3176 3177 ic->wrote_to_journal = false; 3178 3179 if (ic->provided_data_sectors != old_provided_data_sectors) { 3180 if (ic->provided_data_sectors > old_provided_data_sectors && 3181 ic->mode == 'B' && 3182 ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) { 3183 rw_journal_sectors(ic, REQ_OP_READ, 0, 3184 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 3185 block_bitmap_op(ic, ic->journal, old_provided_data_sectors, 3186 ic->provided_data_sectors - old_provided_data_sectors, BITMAP_OP_SET); 3187 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, 3188 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 3189 } 3190 3191 ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors); 3192 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); 3193 if (unlikely(r)) 3194 dm_integrity_io_error(ic, "writing superblock", r); 3195 } 3196 3197 if (ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) { 3198 DEBUG_print("resume dirty_bitmap\n"); 3199 rw_journal_sectors(ic, REQ_OP_READ, 0, 3200 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 3201 if (ic->mode == 'B') { 3202 if (ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit && 3203 !ic->reset_recalculate_flag) { 3204 block_bitmap_copy(ic, ic->recalc_bitmap, ic->journal); 3205 block_bitmap_copy(ic, ic->may_write_bitmap, ic->journal); 3206 if (!block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, 3207 BITMAP_OP_TEST_ALL_CLEAR)) { 3208 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); 3209 ic->sb->recalc_sector = cpu_to_le64(0); 3210 } 3211 } else { 3212 DEBUG_print("non-matching blocks_per_bitmap_bit: %u, %u\n", 3213 ic->sb->log2_blocks_per_bitmap_bit, ic->log2_blocks_per_bitmap_bit); 3214 ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit; 3215 block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET); 3216 block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET); 3217 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_SET); 3218 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, 3219 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 3220 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); 3221 ic->sb->recalc_sector = cpu_to_le64(0); 3222 } 3223 } else { 3224 if (!(ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit && 3225 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_TEST_ALL_CLEAR)) || 3226 ic->reset_recalculate_flag) { 3227 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); 3228 ic->sb->recalc_sector = cpu_to_le64(0); 3229 } 3230 init_journal(ic, 0, ic->journal_sections, 0); 3231 replay_journal(ic); 3232 ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP); 3233 } 3234 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); 3235 if (unlikely(r)) 3236 dm_integrity_io_error(ic, "writing superblock", r); 3237 } else { 3238 replay_journal(ic); 3239 if (ic->reset_recalculate_flag) { 3240 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); 3241 ic->sb->recalc_sector = cpu_to_le64(0); 3242 } 3243 if (ic->mode == 'B') { 3244 ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP); 3245 ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit; 3246 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); 3247 if (unlikely(r)) 3248 dm_integrity_io_error(ic, "writing superblock", r); 3249 3250 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); 3251 block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); 3252 block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); 3253 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && 3254 le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors) { 3255 block_bitmap_op(ic, ic->journal, le64_to_cpu(ic->sb->recalc_sector), 3256 ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); 3257 block_bitmap_op(ic, ic->recalc_bitmap, le64_to_cpu(ic->sb->recalc_sector), 3258 ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); 3259 block_bitmap_op(ic, ic->may_write_bitmap, le64_to_cpu(ic->sb->recalc_sector), 3260 ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); 3261 } 3262 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, 3263 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 3264 } 3265 } 3266 3267 DEBUG_print("testing recalc: %x\n", ic->sb->flags); 3268 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { 3269 __u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector); 3270 3271 DEBUG_print("recalc pos: %llx / %llx\n", recalc_pos, ic->provided_data_sectors); 3272 if (recalc_pos < ic->provided_data_sectors) { 3273 queue_work(ic->recalc_wq, &ic->recalc_work); 3274 } else if (recalc_pos > ic->provided_data_sectors) { 3275 ic->sb->recalc_sector = cpu_to_le64(ic->provided_data_sectors); 3276 recalc_write_super(ic); 3277 } 3278 } 3279 3280 ic->reboot_notifier.notifier_call = dm_integrity_reboot; 3281 ic->reboot_notifier.next = NULL; 3282 ic->reboot_notifier.priority = INT_MAX - 1; /* be notified after md and before hardware drivers */ 3283 WARN_ON(register_reboot_notifier(&ic->reboot_notifier)); 3284 3285 #if 0 3286 /* set to 1 to stress test synchronous mode */ 3287 dm_integrity_enter_synchronous_mode(ic); 3288 #endif 3289 } 3290 3291 static void dm_integrity_status(struct dm_target *ti, status_type_t type, 3292 unsigned int status_flags, char *result, unsigned int maxlen) 3293 { 3294 struct dm_integrity_c *ic = ti->private; 3295 unsigned int arg_count; 3296 size_t sz = 0; 3297 3298 switch (type) { 3299 case STATUSTYPE_INFO: 3300 DMEMIT("%llu %llu", 3301 (unsigned long long)atomic64_read(&ic->number_of_mismatches), 3302 ic->provided_data_sectors); 3303 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) 3304 DMEMIT(" %llu", le64_to_cpu(ic->sb->recalc_sector)); 3305 else 3306 DMEMIT(" -"); 3307 break; 3308 3309 case STATUSTYPE_TABLE: { 3310 __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100; 3311 3312 watermark_percentage += ic->journal_entries / 2; 3313 do_div(watermark_percentage, ic->journal_entries); 3314 arg_count = 3; 3315 arg_count += !!ic->meta_dev; 3316 arg_count += ic->sectors_per_block != 1; 3317 arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)); 3318 arg_count += ic->reset_recalculate_flag; 3319 arg_count += ic->discard; 3320 arg_count += ic->mode == 'J'; 3321 arg_count += ic->mode == 'J'; 3322 arg_count += ic->mode == 'B'; 3323 arg_count += ic->mode == 'B'; 3324 arg_count += !!ic->internal_hash_alg.alg_string; 3325 arg_count += !!ic->journal_crypt_alg.alg_string; 3326 arg_count += !!ic->journal_mac_alg.alg_string; 3327 arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0; 3328 arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0; 3329 arg_count += ic->legacy_recalculate; 3330 DMEMIT("%s %llu %u %c %u", ic->dev->name, ic->start, 3331 ic->tag_size, ic->mode, arg_count); 3332 if (ic->meta_dev) 3333 DMEMIT(" meta_device:%s", ic->meta_dev->name); 3334 if (ic->sectors_per_block != 1) 3335 DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT); 3336 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) 3337 DMEMIT(" recalculate"); 3338 if (ic->reset_recalculate_flag) 3339 DMEMIT(" reset_recalculate"); 3340 if (ic->discard) 3341 DMEMIT(" allow_discards"); 3342 DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); 3343 DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); 3344 DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); 3345 if (ic->mode == 'J') { 3346 DMEMIT(" journal_watermark:%u", (unsigned int)watermark_percentage); 3347 DMEMIT(" commit_time:%u", ic->autocommit_msec); 3348 } 3349 if (ic->mode == 'B') { 3350 DMEMIT(" sectors_per_bit:%llu", (sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit); 3351 DMEMIT(" bitmap_flush_interval:%u", jiffies_to_msecs(ic->bitmap_flush_interval)); 3352 } 3353 if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) 3354 DMEMIT(" fix_padding"); 3355 if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0) 3356 DMEMIT(" fix_hmac"); 3357 if (ic->legacy_recalculate) 3358 DMEMIT(" legacy_recalculate"); 3359 3360 #define EMIT_ALG(a, n) \ 3361 do { \ 3362 if (ic->a.alg_string) { \ 3363 DMEMIT(" %s:%s", n, ic->a.alg_string); \ 3364 if (ic->a.key_string) \ 3365 DMEMIT(":%s", ic->a.key_string);\ 3366 } \ 3367 } while (0) 3368 EMIT_ALG(internal_hash_alg, "internal_hash"); 3369 EMIT_ALG(journal_crypt_alg, "journal_crypt"); 3370 EMIT_ALG(journal_mac_alg, "journal_mac"); 3371 break; 3372 } 3373 case STATUSTYPE_IMA: 3374 DMEMIT_TARGET_NAME_VERSION(ti->type); 3375 DMEMIT(",dev_name=%s,start=%llu,tag_size=%u,mode=%c", 3376 ic->dev->name, ic->start, ic->tag_size, ic->mode); 3377 3378 if (ic->meta_dev) 3379 DMEMIT(",meta_device=%s", ic->meta_dev->name); 3380 if (ic->sectors_per_block != 1) 3381 DMEMIT(",block_size=%u", ic->sectors_per_block << SECTOR_SHIFT); 3382 3383 DMEMIT(",recalculate=%c", (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) ? 3384 'y' : 'n'); 3385 DMEMIT(",allow_discards=%c", ic->discard ? 'y' : 'n'); 3386 DMEMIT(",fix_padding=%c", 3387 ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) ? 'y' : 'n'); 3388 DMEMIT(",fix_hmac=%c", 3389 ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0) ? 'y' : 'n'); 3390 DMEMIT(",legacy_recalculate=%c", ic->legacy_recalculate ? 'y' : 'n'); 3391 3392 DMEMIT(",journal_sectors=%u", ic->initial_sectors - SB_SECTORS); 3393 DMEMIT(",interleave_sectors=%u", 1U << ic->sb->log2_interleave_sectors); 3394 DMEMIT(",buffer_sectors=%u", 1U << ic->log2_buffer_sectors); 3395 DMEMIT(";"); 3396 break; 3397 } 3398 } 3399 3400 static int dm_integrity_iterate_devices(struct dm_target *ti, 3401 iterate_devices_callout_fn fn, void *data) 3402 { 3403 struct dm_integrity_c *ic = ti->private; 3404 3405 if (!ic->meta_dev) 3406 return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data); 3407 else 3408 return fn(ti, ic->dev, 0, ti->len, data); 3409 } 3410 3411 static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits) 3412 { 3413 struct dm_integrity_c *ic = ti->private; 3414 3415 if (ic->sectors_per_block > 1) { 3416 limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT; 3417 limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT; 3418 blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT); 3419 limits->dma_alignment = limits->logical_block_size - 1; 3420 } 3421 } 3422 3423 static void calculate_journal_section_size(struct dm_integrity_c *ic) 3424 { 3425 unsigned int sector_space = JOURNAL_SECTOR_DATA; 3426 3427 ic->journal_sections = le32_to_cpu(ic->sb->journal_sections); 3428 ic->journal_entry_size = roundup(offsetof(struct journal_entry, last_bytes[ic->sectors_per_block]) + ic->tag_size, 3429 JOURNAL_ENTRY_ROUNDUP); 3430 3431 if (ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) 3432 sector_space -= JOURNAL_MAC_PER_SECTOR; 3433 ic->journal_entries_per_sector = sector_space / ic->journal_entry_size; 3434 ic->journal_section_entries = ic->journal_entries_per_sector * JOURNAL_BLOCK_SECTORS; 3435 ic->journal_section_sectors = (ic->journal_section_entries << ic->sb->log2_sectors_per_block) + JOURNAL_BLOCK_SECTORS; 3436 ic->journal_entries = ic->journal_section_entries * ic->journal_sections; 3437 } 3438 3439 static int calculate_device_limits(struct dm_integrity_c *ic) 3440 { 3441 __u64 initial_sectors; 3442 3443 calculate_journal_section_size(ic); 3444 initial_sectors = SB_SECTORS + (__u64)ic->journal_section_sectors * ic->journal_sections; 3445 if (initial_sectors + METADATA_PADDING_SECTORS >= ic->meta_device_sectors || initial_sectors > UINT_MAX) 3446 return -EINVAL; 3447 ic->initial_sectors = initial_sectors; 3448 3449 if (!ic->meta_dev) { 3450 sector_t last_sector, last_area, last_offset; 3451 3452 /* we have to maintain excessive padding for compatibility with existing volumes */ 3453 __u64 metadata_run_padding = 3454 ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING) ? 3455 (__u64)(METADATA_PADDING_SECTORS << SECTOR_SHIFT) : 3456 (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS); 3457 3458 ic->metadata_run = round_up((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block), 3459 metadata_run_padding) >> SECTOR_SHIFT; 3460 if (!(ic->metadata_run & (ic->metadata_run - 1))) 3461 ic->log2_metadata_run = __ffs(ic->metadata_run); 3462 else 3463 ic->log2_metadata_run = -1; 3464 3465 get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset); 3466 last_sector = get_data_sector(ic, last_area, last_offset); 3467 if (last_sector < ic->start || last_sector >= ic->meta_device_sectors) 3468 return -EINVAL; 3469 } else { 3470 __u64 meta_size = (ic->provided_data_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size; 3471 3472 meta_size = (meta_size + ((1U << (ic->log2_buffer_sectors + SECTOR_SHIFT)) - 1)) 3473 >> (ic->log2_buffer_sectors + SECTOR_SHIFT); 3474 meta_size <<= ic->log2_buffer_sectors; 3475 if (ic->initial_sectors + meta_size < ic->initial_sectors || 3476 ic->initial_sectors + meta_size > ic->meta_device_sectors) 3477 return -EINVAL; 3478 ic->metadata_run = 1; 3479 ic->log2_metadata_run = 0; 3480 } 3481 3482 return 0; 3483 } 3484 3485 static void get_provided_data_sectors(struct dm_integrity_c *ic) 3486 { 3487 if (!ic->meta_dev) { 3488 int test_bit; 3489 3490 ic->provided_data_sectors = 0; 3491 for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) { 3492 __u64 prev_data_sectors = ic->provided_data_sectors; 3493 3494 ic->provided_data_sectors |= (sector_t)1 << test_bit; 3495 if (calculate_device_limits(ic)) 3496 ic->provided_data_sectors = prev_data_sectors; 3497 } 3498 } else { 3499 ic->provided_data_sectors = ic->data_device_sectors; 3500 ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1); 3501 } 3502 } 3503 3504 static int initialize_superblock(struct dm_integrity_c *ic, 3505 unsigned int journal_sectors, unsigned int interleave_sectors) 3506 { 3507 unsigned int journal_sections; 3508 int test_bit; 3509 3510 memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT); 3511 memcpy(ic->sb->magic, SB_MAGIC, 8); 3512 ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size); 3513 ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block); 3514 if (ic->journal_mac_alg.alg_string) 3515 ic->sb->flags |= cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC); 3516 3517 calculate_journal_section_size(ic); 3518 journal_sections = journal_sectors / ic->journal_section_sectors; 3519 if (!journal_sections) 3520 journal_sections = 1; 3521 3522 if (ic->fix_hmac && (ic->internal_hash_alg.alg_string || ic->journal_mac_alg.alg_string)) { 3523 ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_HMAC); 3524 get_random_bytes(ic->sb->salt, SALT_SIZE); 3525 } 3526 3527 if (!ic->meta_dev) { 3528 if (ic->fix_padding) 3529 ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_PADDING); 3530 ic->sb->journal_sections = cpu_to_le32(journal_sections); 3531 if (!interleave_sectors) 3532 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; 3533 ic->sb->log2_interleave_sectors = __fls(interleave_sectors); 3534 ic->sb->log2_interleave_sectors = max_t(__u8, MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); 3535 ic->sb->log2_interleave_sectors = min_t(__u8, MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); 3536 3537 get_provided_data_sectors(ic); 3538 if (!ic->provided_data_sectors) 3539 return -EINVAL; 3540 } else { 3541 ic->sb->log2_interleave_sectors = 0; 3542 3543 get_provided_data_sectors(ic); 3544 if (!ic->provided_data_sectors) 3545 return -EINVAL; 3546 3547 try_smaller_buffer: 3548 ic->sb->journal_sections = cpu_to_le32(0); 3549 for (test_bit = fls(journal_sections) - 1; test_bit >= 0; test_bit--) { 3550 __u32 prev_journal_sections = le32_to_cpu(ic->sb->journal_sections); 3551 __u32 test_journal_sections = prev_journal_sections | (1U << test_bit); 3552 3553 if (test_journal_sections > journal_sections) 3554 continue; 3555 ic->sb->journal_sections = cpu_to_le32(test_journal_sections); 3556 if (calculate_device_limits(ic)) 3557 ic->sb->journal_sections = cpu_to_le32(prev_journal_sections); 3558 3559 } 3560 if (!le32_to_cpu(ic->sb->journal_sections)) { 3561 if (ic->log2_buffer_sectors > 3) { 3562 ic->log2_buffer_sectors--; 3563 goto try_smaller_buffer; 3564 } 3565 return -EINVAL; 3566 } 3567 } 3568 3569 ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors); 3570 3571 sb_set_version(ic); 3572 3573 return 0; 3574 } 3575 3576 static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic) 3577 { 3578 struct gendisk *disk = dm_disk(dm_table_get_md(ti->table)); 3579 struct blk_integrity bi; 3580 3581 memset(&bi, 0, sizeof(bi)); 3582 bi.profile = &dm_integrity_profile; 3583 bi.tuple_size = ic->tag_size; 3584 bi.tag_size = bi.tuple_size; 3585 bi.interval_exp = ic->sb->log2_sectors_per_block + SECTOR_SHIFT; 3586 3587 blk_integrity_register(disk, &bi); 3588 blk_queue_max_integrity_segments(disk->queue, UINT_MAX); 3589 } 3590 3591 static void dm_integrity_free_page_list(struct page_list *pl) 3592 { 3593 unsigned int i; 3594 3595 if (!pl) 3596 return; 3597 for (i = 0; pl[i].page; i++) 3598 __free_page(pl[i].page); 3599 kvfree(pl); 3600 } 3601 3602 static struct page_list *dm_integrity_alloc_page_list(unsigned int n_pages) 3603 { 3604 struct page_list *pl; 3605 unsigned int i; 3606 3607 pl = kvmalloc_array(n_pages + 1, sizeof(struct page_list), GFP_KERNEL | __GFP_ZERO); 3608 if (!pl) 3609 return NULL; 3610 3611 for (i = 0; i < n_pages; i++) { 3612 pl[i].page = alloc_page(GFP_KERNEL); 3613 if (!pl[i].page) { 3614 dm_integrity_free_page_list(pl); 3615 return NULL; 3616 } 3617 if (i) 3618 pl[i - 1].next = &pl[i]; 3619 } 3620 pl[i].page = NULL; 3621 pl[i].next = NULL; 3622 3623 return pl; 3624 } 3625 3626 static void dm_integrity_free_journal_scatterlist(struct dm_integrity_c *ic, struct scatterlist **sl) 3627 { 3628 unsigned int i; 3629 3630 for (i = 0; i < ic->journal_sections; i++) 3631 kvfree(sl[i]); 3632 kvfree(sl); 3633 } 3634 3635 static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic, 3636 struct page_list *pl) 3637 { 3638 struct scatterlist **sl; 3639 unsigned int i; 3640 3641 sl = kvmalloc_array(ic->journal_sections, 3642 sizeof(struct scatterlist *), 3643 GFP_KERNEL | __GFP_ZERO); 3644 if (!sl) 3645 return NULL; 3646 3647 for (i = 0; i < ic->journal_sections; i++) { 3648 struct scatterlist *s; 3649 unsigned int start_index, start_offset; 3650 unsigned int end_index, end_offset; 3651 unsigned int n_pages; 3652 unsigned int idx; 3653 3654 page_list_location(ic, i, 0, &start_index, &start_offset); 3655 page_list_location(ic, i, ic->journal_section_sectors - 1, 3656 &end_index, &end_offset); 3657 3658 n_pages = (end_index - start_index + 1); 3659 3660 s = kvmalloc_array(n_pages, sizeof(struct scatterlist), 3661 GFP_KERNEL); 3662 if (!s) { 3663 dm_integrity_free_journal_scatterlist(ic, sl); 3664 return NULL; 3665 } 3666 3667 sg_init_table(s, n_pages); 3668 for (idx = start_index; idx <= end_index; idx++) { 3669 char *va = lowmem_page_address(pl[idx].page); 3670 unsigned int start = 0, end = PAGE_SIZE; 3671 3672 if (idx == start_index) 3673 start = start_offset; 3674 if (idx == end_index) 3675 end = end_offset + (1 << SECTOR_SHIFT); 3676 sg_set_buf(&s[idx - start_index], va + start, end - start); 3677 } 3678 3679 sl[i] = s; 3680 } 3681 3682 return sl; 3683 } 3684 3685 static void free_alg(struct alg_spec *a) 3686 { 3687 kfree_sensitive(a->alg_string); 3688 kfree_sensitive(a->key); 3689 memset(a, 0, sizeof(*a)); 3690 } 3691 3692 static int get_alg_and_key(const char *arg, struct alg_spec *a, char **error, char *error_inval) 3693 { 3694 char *k; 3695 3696 free_alg(a); 3697 3698 a->alg_string = kstrdup(strchr(arg, ':') + 1, GFP_KERNEL); 3699 if (!a->alg_string) 3700 goto nomem; 3701 3702 k = strchr(a->alg_string, ':'); 3703 if (k) { 3704 *k = 0; 3705 a->key_string = k + 1; 3706 if (strlen(a->key_string) & 1) 3707 goto inval; 3708 3709 a->key_size = strlen(a->key_string) / 2; 3710 a->key = kmalloc(a->key_size, GFP_KERNEL); 3711 if (!a->key) 3712 goto nomem; 3713 if (hex2bin(a->key, a->key_string, a->key_size)) 3714 goto inval; 3715 } 3716 3717 return 0; 3718 inval: 3719 *error = error_inval; 3720 return -EINVAL; 3721 nomem: 3722 *error = "Out of memory for an argument"; 3723 return -ENOMEM; 3724 } 3725 3726 static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error, 3727 char *error_alg, char *error_key) 3728 { 3729 int r; 3730 3731 if (a->alg_string) { 3732 *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY); 3733 if (IS_ERR(*hash)) { 3734 *error = error_alg; 3735 r = PTR_ERR(*hash); 3736 *hash = NULL; 3737 return r; 3738 } 3739 3740 if (a->key) { 3741 r = crypto_shash_setkey(*hash, a->key, a->key_size); 3742 if (r) { 3743 *error = error_key; 3744 return r; 3745 } 3746 } else if (crypto_shash_get_flags(*hash) & CRYPTO_TFM_NEED_KEY) { 3747 *error = error_key; 3748 return -ENOKEY; 3749 } 3750 } 3751 3752 return 0; 3753 } 3754 3755 static int create_journal(struct dm_integrity_c *ic, char **error) 3756 { 3757 int r = 0; 3758 unsigned int i; 3759 __u64 journal_pages, journal_desc_size, journal_tree_size; 3760 unsigned char *crypt_data = NULL, *crypt_iv = NULL; 3761 struct skcipher_request *req = NULL; 3762 3763 ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL); 3764 ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL); 3765 ic->commit_ids[2] = cpu_to_le64(0x3333333333333333ULL); 3766 ic->commit_ids[3] = cpu_to_le64(0x4444444444444444ULL); 3767 3768 journal_pages = roundup((__u64)ic->journal_sections * ic->journal_section_sectors, 3769 PAGE_SIZE >> SECTOR_SHIFT) >> (PAGE_SHIFT - SECTOR_SHIFT); 3770 journal_desc_size = journal_pages * sizeof(struct page_list); 3771 if (journal_pages >= totalram_pages() - totalhigh_pages() || journal_desc_size > ULONG_MAX) { 3772 *error = "Journal doesn't fit into memory"; 3773 r = -ENOMEM; 3774 goto bad; 3775 } 3776 ic->journal_pages = journal_pages; 3777 3778 ic->journal = dm_integrity_alloc_page_list(ic->journal_pages); 3779 if (!ic->journal) { 3780 *error = "Could not allocate memory for journal"; 3781 r = -ENOMEM; 3782 goto bad; 3783 } 3784 if (ic->journal_crypt_alg.alg_string) { 3785 unsigned int ivsize, blocksize; 3786 struct journal_completion comp; 3787 3788 comp.ic = ic; 3789 ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY); 3790 if (IS_ERR(ic->journal_crypt)) { 3791 *error = "Invalid journal cipher"; 3792 r = PTR_ERR(ic->journal_crypt); 3793 ic->journal_crypt = NULL; 3794 goto bad; 3795 } 3796 ivsize = crypto_skcipher_ivsize(ic->journal_crypt); 3797 blocksize = crypto_skcipher_blocksize(ic->journal_crypt); 3798 3799 if (ic->journal_crypt_alg.key) { 3800 r = crypto_skcipher_setkey(ic->journal_crypt, ic->journal_crypt_alg.key, 3801 ic->journal_crypt_alg.key_size); 3802 if (r) { 3803 *error = "Error setting encryption key"; 3804 goto bad; 3805 } 3806 } 3807 DEBUG_print("cipher %s, block size %u iv size %u\n", 3808 ic->journal_crypt_alg.alg_string, blocksize, ivsize); 3809 3810 ic->journal_io = dm_integrity_alloc_page_list(ic->journal_pages); 3811 if (!ic->journal_io) { 3812 *error = "Could not allocate memory for journal io"; 3813 r = -ENOMEM; 3814 goto bad; 3815 } 3816 3817 if (blocksize == 1) { 3818 struct scatterlist *sg; 3819 3820 req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); 3821 if (!req) { 3822 *error = "Could not allocate crypt request"; 3823 r = -ENOMEM; 3824 goto bad; 3825 } 3826 3827 crypt_iv = kzalloc(ivsize, GFP_KERNEL); 3828 if (!crypt_iv) { 3829 *error = "Could not allocate iv"; 3830 r = -ENOMEM; 3831 goto bad; 3832 } 3833 3834 ic->journal_xor = dm_integrity_alloc_page_list(ic->journal_pages); 3835 if (!ic->journal_xor) { 3836 *error = "Could not allocate memory for journal xor"; 3837 r = -ENOMEM; 3838 goto bad; 3839 } 3840 3841 sg = kvmalloc_array(ic->journal_pages + 1, 3842 sizeof(struct scatterlist), 3843 GFP_KERNEL); 3844 if (!sg) { 3845 *error = "Unable to allocate sg list"; 3846 r = -ENOMEM; 3847 goto bad; 3848 } 3849 sg_init_table(sg, ic->journal_pages + 1); 3850 for (i = 0; i < ic->journal_pages; i++) { 3851 char *va = lowmem_page_address(ic->journal_xor[i].page); 3852 3853 clear_page(va); 3854 sg_set_buf(&sg[i], va, PAGE_SIZE); 3855 } 3856 sg_set_buf(&sg[i], &ic->commit_ids, sizeof(ic->commit_ids)); 3857 3858 skcipher_request_set_crypt(req, sg, sg, 3859 PAGE_SIZE * ic->journal_pages + sizeof(ic->commit_ids), crypt_iv); 3860 init_completion(&comp.comp); 3861 comp.in_flight = (atomic_t)ATOMIC_INIT(1); 3862 if (do_crypt(true, req, &comp)) 3863 wait_for_completion(&comp.comp); 3864 kvfree(sg); 3865 r = dm_integrity_failed(ic); 3866 if (r) { 3867 *error = "Unable to encrypt journal"; 3868 goto bad; 3869 } 3870 DEBUG_bytes(lowmem_page_address(ic->journal_xor[0].page), 64, "xor data"); 3871 3872 crypto_free_skcipher(ic->journal_crypt); 3873 ic->journal_crypt = NULL; 3874 } else { 3875 unsigned int crypt_len = roundup(ivsize, blocksize); 3876 3877 req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); 3878 if (!req) { 3879 *error = "Could not allocate crypt request"; 3880 r = -ENOMEM; 3881 goto bad; 3882 } 3883 3884 crypt_iv = kmalloc(ivsize, GFP_KERNEL); 3885 if (!crypt_iv) { 3886 *error = "Could not allocate iv"; 3887 r = -ENOMEM; 3888 goto bad; 3889 } 3890 3891 crypt_data = kmalloc(crypt_len, GFP_KERNEL); 3892 if (!crypt_data) { 3893 *error = "Unable to allocate crypt data"; 3894 r = -ENOMEM; 3895 goto bad; 3896 } 3897 3898 ic->journal_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal); 3899 if (!ic->journal_scatterlist) { 3900 *error = "Unable to allocate sg list"; 3901 r = -ENOMEM; 3902 goto bad; 3903 } 3904 ic->journal_io_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal_io); 3905 if (!ic->journal_io_scatterlist) { 3906 *error = "Unable to allocate sg list"; 3907 r = -ENOMEM; 3908 goto bad; 3909 } 3910 ic->sk_requests = kvmalloc_array(ic->journal_sections, 3911 sizeof(struct skcipher_request *), 3912 GFP_KERNEL | __GFP_ZERO); 3913 if (!ic->sk_requests) { 3914 *error = "Unable to allocate sk requests"; 3915 r = -ENOMEM; 3916 goto bad; 3917 } 3918 for (i = 0; i < ic->journal_sections; i++) { 3919 struct scatterlist sg; 3920 struct skcipher_request *section_req; 3921 __le32 section_le = cpu_to_le32(i); 3922 3923 memset(crypt_iv, 0x00, ivsize); 3924 memset(crypt_data, 0x00, crypt_len); 3925 memcpy(crypt_data, §ion_le, min_t(size_t, crypt_len, sizeof(section_le))); 3926 3927 sg_init_one(&sg, crypt_data, crypt_len); 3928 skcipher_request_set_crypt(req, &sg, &sg, crypt_len, crypt_iv); 3929 init_completion(&comp.comp); 3930 comp.in_flight = (atomic_t)ATOMIC_INIT(1); 3931 if (do_crypt(true, req, &comp)) 3932 wait_for_completion(&comp.comp); 3933 3934 r = dm_integrity_failed(ic); 3935 if (r) { 3936 *error = "Unable to generate iv"; 3937 goto bad; 3938 } 3939 3940 section_req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); 3941 if (!section_req) { 3942 *error = "Unable to allocate crypt request"; 3943 r = -ENOMEM; 3944 goto bad; 3945 } 3946 section_req->iv = kmalloc_array(ivsize, 2, 3947 GFP_KERNEL); 3948 if (!section_req->iv) { 3949 skcipher_request_free(section_req); 3950 *error = "Unable to allocate iv"; 3951 r = -ENOMEM; 3952 goto bad; 3953 } 3954 memcpy(section_req->iv + ivsize, crypt_data, ivsize); 3955 section_req->cryptlen = (size_t)ic->journal_section_sectors << SECTOR_SHIFT; 3956 ic->sk_requests[i] = section_req; 3957 DEBUG_bytes(crypt_data, ivsize, "iv(%u)", i); 3958 } 3959 } 3960 } 3961 3962 for (i = 0; i < N_COMMIT_IDS; i++) { 3963 unsigned int j; 3964 3965 retest_commit_id: 3966 for (j = 0; j < i; j++) { 3967 if (ic->commit_ids[j] == ic->commit_ids[i]) { 3968 ic->commit_ids[i] = cpu_to_le64(le64_to_cpu(ic->commit_ids[i]) + 1); 3969 goto retest_commit_id; 3970 } 3971 } 3972 DEBUG_print("commit id %u: %016llx\n", i, ic->commit_ids[i]); 3973 } 3974 3975 journal_tree_size = (__u64)ic->journal_entries * sizeof(struct journal_node); 3976 if (journal_tree_size > ULONG_MAX) { 3977 *error = "Journal doesn't fit into memory"; 3978 r = -ENOMEM; 3979 goto bad; 3980 } 3981 ic->journal_tree = kvmalloc(journal_tree_size, GFP_KERNEL); 3982 if (!ic->journal_tree) { 3983 *error = "Could not allocate memory for journal tree"; 3984 r = -ENOMEM; 3985 } 3986 bad: 3987 kfree(crypt_data); 3988 kfree(crypt_iv); 3989 skcipher_request_free(req); 3990 3991 return r; 3992 } 3993 3994 /* 3995 * Construct a integrity mapping 3996 * 3997 * Arguments: 3998 * device 3999 * offset from the start of the device 4000 * tag size 4001 * D - direct writes, J - journal writes, B - bitmap mode, R - recovery mode 4002 * number of optional arguments 4003 * optional arguments: 4004 * journal_sectors 4005 * interleave_sectors 4006 * buffer_sectors 4007 * journal_watermark 4008 * commit_time 4009 * meta_device 4010 * block_size 4011 * sectors_per_bit 4012 * bitmap_flush_interval 4013 * internal_hash 4014 * journal_crypt 4015 * journal_mac 4016 * recalculate 4017 */ 4018 static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv) 4019 { 4020 struct dm_integrity_c *ic; 4021 char dummy; 4022 int r; 4023 unsigned int extra_args; 4024 struct dm_arg_set as; 4025 static const struct dm_arg _args[] = { 4026 {0, 18, "Invalid number of feature args"}, 4027 }; 4028 unsigned int journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; 4029 bool should_write_sb; 4030 __u64 threshold; 4031 unsigned long long start; 4032 __s8 log2_sectors_per_bitmap_bit = -1; 4033 __s8 log2_blocks_per_bitmap_bit; 4034 __u64 bits_in_journal; 4035 __u64 n_bitmap_bits; 4036 4037 #define DIRECT_ARGUMENTS 4 4038 4039 if (argc <= DIRECT_ARGUMENTS) { 4040 ti->error = "Invalid argument count"; 4041 return -EINVAL; 4042 } 4043 4044 ic = kzalloc(sizeof(struct dm_integrity_c), GFP_KERNEL); 4045 if (!ic) { 4046 ti->error = "Cannot allocate integrity context"; 4047 return -ENOMEM; 4048 } 4049 ti->private = ic; 4050 ti->per_io_data_size = sizeof(struct dm_integrity_io); 4051 ic->ti = ti; 4052 4053 ic->in_progress = RB_ROOT; 4054 INIT_LIST_HEAD(&ic->wait_list); 4055 init_waitqueue_head(&ic->endio_wait); 4056 bio_list_init(&ic->flush_bio_list); 4057 init_waitqueue_head(&ic->copy_to_journal_wait); 4058 init_completion(&ic->crypto_backoff); 4059 atomic64_set(&ic->number_of_mismatches, 0); 4060 ic->bitmap_flush_interval = BITMAP_FLUSH_INTERVAL; 4061 4062 r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev); 4063 if (r) { 4064 ti->error = "Device lookup failed"; 4065 goto bad; 4066 } 4067 4068 if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1 || start != (sector_t)start) { 4069 ti->error = "Invalid starting offset"; 4070 r = -EINVAL; 4071 goto bad; 4072 } 4073 ic->start = start; 4074 4075 if (strcmp(argv[2], "-")) { 4076 if (sscanf(argv[2], "%u%c", &ic->tag_size, &dummy) != 1 || !ic->tag_size) { 4077 ti->error = "Invalid tag size"; 4078 r = -EINVAL; 4079 goto bad; 4080 } 4081 } 4082 4083 if (!strcmp(argv[3], "J") || !strcmp(argv[3], "B") || 4084 !strcmp(argv[3], "D") || !strcmp(argv[3], "R")) { 4085 ic->mode = argv[3][0]; 4086 } else { 4087 ti->error = "Invalid mode (expecting J, B, D, R)"; 4088 r = -EINVAL; 4089 goto bad; 4090 } 4091 4092 journal_sectors = 0; 4093 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; 4094 buffer_sectors = DEFAULT_BUFFER_SECTORS; 4095 journal_watermark = DEFAULT_JOURNAL_WATERMARK; 4096 sync_msec = DEFAULT_SYNC_MSEC; 4097 ic->sectors_per_block = 1; 4098 4099 as.argc = argc - DIRECT_ARGUMENTS; 4100 as.argv = argv + DIRECT_ARGUMENTS; 4101 r = dm_read_arg_group(_args, &as, &extra_args, &ti->error); 4102 if (r) 4103 goto bad; 4104 4105 while (extra_args--) { 4106 const char *opt_string; 4107 unsigned int val; 4108 unsigned long long llval; 4109 4110 opt_string = dm_shift_arg(&as); 4111 if (!opt_string) { 4112 r = -EINVAL; 4113 ti->error = "Not enough feature arguments"; 4114 goto bad; 4115 } 4116 if (sscanf(opt_string, "journal_sectors:%u%c", &val, &dummy) == 1) 4117 journal_sectors = val ? val : 1; 4118 else if (sscanf(opt_string, "interleave_sectors:%u%c", &val, &dummy) == 1) 4119 interleave_sectors = val; 4120 else if (sscanf(opt_string, "buffer_sectors:%u%c", &val, &dummy) == 1) 4121 buffer_sectors = val; 4122 else if (sscanf(opt_string, "journal_watermark:%u%c", &val, &dummy) == 1 && val <= 100) 4123 journal_watermark = val; 4124 else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1) 4125 sync_msec = val; 4126 else if (!strncmp(opt_string, "meta_device:", strlen("meta_device:"))) { 4127 if (ic->meta_dev) { 4128 dm_put_device(ti, ic->meta_dev); 4129 ic->meta_dev = NULL; 4130 } 4131 r = dm_get_device(ti, strchr(opt_string, ':') + 1, 4132 dm_table_get_mode(ti->table), &ic->meta_dev); 4133 if (r) { 4134 ti->error = "Device lookup failed"; 4135 goto bad; 4136 } 4137 } else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) { 4138 if (val < 1 << SECTOR_SHIFT || 4139 val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT || 4140 (val & (val - 1))) { 4141 r = -EINVAL; 4142 ti->error = "Invalid block_size argument"; 4143 goto bad; 4144 } 4145 ic->sectors_per_block = val >> SECTOR_SHIFT; 4146 } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) { 4147 log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval); 4148 } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) { 4149 if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { 4150 r = -EINVAL; 4151 ti->error = "Invalid bitmap_flush_interval argument"; 4152 goto bad; 4153 } 4154 ic->bitmap_flush_interval = msecs_to_jiffies(val); 4155 } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { 4156 r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error, 4157 "Invalid internal_hash argument"); 4158 if (r) 4159 goto bad; 4160 } else if (!strncmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) { 4161 r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error, 4162 "Invalid journal_crypt argument"); 4163 if (r) 4164 goto bad; 4165 } else if (!strncmp(opt_string, "journal_mac:", strlen("journal_mac:"))) { 4166 r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error, 4167 "Invalid journal_mac argument"); 4168 if (r) 4169 goto bad; 4170 } else if (!strcmp(opt_string, "recalculate")) { 4171 ic->recalculate_flag = true; 4172 } else if (!strcmp(opt_string, "reset_recalculate")) { 4173 ic->recalculate_flag = true; 4174 ic->reset_recalculate_flag = true; 4175 } else if (!strcmp(opt_string, "allow_discards")) { 4176 ic->discard = true; 4177 } else if (!strcmp(opt_string, "fix_padding")) { 4178 ic->fix_padding = true; 4179 } else if (!strcmp(opt_string, "fix_hmac")) { 4180 ic->fix_hmac = true; 4181 } else if (!strcmp(opt_string, "legacy_recalculate")) { 4182 ic->legacy_recalculate = true; 4183 } else { 4184 r = -EINVAL; 4185 ti->error = "Invalid argument"; 4186 goto bad; 4187 } 4188 } 4189 4190 ic->data_device_sectors = bdev_nr_sectors(ic->dev->bdev); 4191 if (!ic->meta_dev) 4192 ic->meta_device_sectors = ic->data_device_sectors; 4193 else 4194 ic->meta_device_sectors = bdev_nr_sectors(ic->meta_dev->bdev); 4195 4196 if (!journal_sectors) { 4197 journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS, 4198 ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR); 4199 } 4200 4201 if (!buffer_sectors) 4202 buffer_sectors = 1; 4203 ic->log2_buffer_sectors = min((int)__fls(buffer_sectors), 31 - SECTOR_SHIFT); 4204 4205 r = get_mac(&ic->internal_hash, &ic->internal_hash_alg, &ti->error, 4206 "Invalid internal hash", "Error setting internal hash key"); 4207 if (r) 4208 goto bad; 4209 4210 r = get_mac(&ic->journal_mac, &ic->journal_mac_alg, &ti->error, 4211 "Invalid journal mac", "Error setting journal mac key"); 4212 if (r) 4213 goto bad; 4214 4215 if (!ic->tag_size) { 4216 if (!ic->internal_hash) { 4217 ti->error = "Unknown tag size"; 4218 r = -EINVAL; 4219 goto bad; 4220 } 4221 ic->tag_size = crypto_shash_digestsize(ic->internal_hash); 4222 } 4223 if (ic->tag_size > MAX_TAG_SIZE) { 4224 ti->error = "Too big tag size"; 4225 r = -EINVAL; 4226 goto bad; 4227 } 4228 if (!(ic->tag_size & (ic->tag_size - 1))) 4229 ic->log2_tag_size = __ffs(ic->tag_size); 4230 else 4231 ic->log2_tag_size = -1; 4232 4233 if (ic->mode == 'B' && !ic->internal_hash) { 4234 r = -EINVAL; 4235 ti->error = "Bitmap mode can be only used with internal hash"; 4236 goto bad; 4237 } 4238 4239 if (ic->discard && !ic->internal_hash) { 4240 r = -EINVAL; 4241 ti->error = "Discard can be only used with internal hash"; 4242 goto bad; 4243 } 4244 4245 ic->autocommit_jiffies = msecs_to_jiffies(sync_msec); 4246 ic->autocommit_msec = sync_msec; 4247 timer_setup(&ic->autocommit_timer, autocommit_fn, 0); 4248 4249 ic->io = dm_io_client_create(); 4250 if (IS_ERR(ic->io)) { 4251 r = PTR_ERR(ic->io); 4252 ic->io = NULL; 4253 ti->error = "Cannot allocate dm io"; 4254 goto bad; 4255 } 4256 4257 r = mempool_init_slab_pool(&ic->journal_io_mempool, JOURNAL_IO_MEMPOOL, journal_io_cache); 4258 if (r) { 4259 ti->error = "Cannot allocate mempool"; 4260 goto bad; 4261 } 4262 4263 ic->metadata_wq = alloc_workqueue("dm-integrity-metadata", 4264 WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE); 4265 if (!ic->metadata_wq) { 4266 ti->error = "Cannot allocate workqueue"; 4267 r = -ENOMEM; 4268 goto bad; 4269 } 4270 4271 /* 4272 * If this workqueue weren't ordered, it would cause bio reordering 4273 * and reduced performance. 4274 */ 4275 ic->wait_wq = alloc_ordered_workqueue("dm-integrity-wait", WQ_MEM_RECLAIM); 4276 if (!ic->wait_wq) { 4277 ti->error = "Cannot allocate workqueue"; 4278 r = -ENOMEM; 4279 goto bad; 4280 } 4281 4282 ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM, 4283 METADATA_WORKQUEUE_MAX_ACTIVE); 4284 if (!ic->offload_wq) { 4285 ti->error = "Cannot allocate workqueue"; 4286 r = -ENOMEM; 4287 goto bad; 4288 } 4289 4290 ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1); 4291 if (!ic->commit_wq) { 4292 ti->error = "Cannot allocate workqueue"; 4293 r = -ENOMEM; 4294 goto bad; 4295 } 4296 INIT_WORK(&ic->commit_work, integrity_commit); 4297 4298 if (ic->mode == 'J' || ic->mode == 'B') { 4299 ic->writer_wq = alloc_workqueue("dm-integrity-writer", WQ_MEM_RECLAIM, 1); 4300 if (!ic->writer_wq) { 4301 ti->error = "Cannot allocate workqueue"; 4302 r = -ENOMEM; 4303 goto bad; 4304 } 4305 INIT_WORK(&ic->writer_work, integrity_writer); 4306 } 4307 4308 ic->sb = alloc_pages_exact(SB_SECTORS << SECTOR_SHIFT, GFP_KERNEL); 4309 if (!ic->sb) { 4310 r = -ENOMEM; 4311 ti->error = "Cannot allocate superblock area"; 4312 goto bad; 4313 } 4314 4315 r = sync_rw_sb(ic, REQ_OP_READ); 4316 if (r) { 4317 ti->error = "Error reading superblock"; 4318 goto bad; 4319 } 4320 should_write_sb = false; 4321 if (memcmp(ic->sb->magic, SB_MAGIC, 8)) { 4322 if (ic->mode != 'R') { 4323 if (memchr_inv(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT)) { 4324 r = -EINVAL; 4325 ti->error = "The device is not initialized"; 4326 goto bad; 4327 } 4328 } 4329 4330 r = initialize_superblock(ic, journal_sectors, interleave_sectors); 4331 if (r) { 4332 ti->error = "Could not initialize superblock"; 4333 goto bad; 4334 } 4335 if (ic->mode != 'R') 4336 should_write_sb = true; 4337 } 4338 4339 if (!ic->sb->version || ic->sb->version > SB_VERSION_5) { 4340 r = -EINVAL; 4341 ti->error = "Unknown version"; 4342 goto bad; 4343 } 4344 if (le16_to_cpu(ic->sb->integrity_tag_size) != ic->tag_size) { 4345 r = -EINVAL; 4346 ti->error = "Tag size doesn't match the information in superblock"; 4347 goto bad; 4348 } 4349 if (ic->sb->log2_sectors_per_block != __ffs(ic->sectors_per_block)) { 4350 r = -EINVAL; 4351 ti->error = "Block size doesn't match the information in superblock"; 4352 goto bad; 4353 } 4354 if (!le32_to_cpu(ic->sb->journal_sections)) { 4355 r = -EINVAL; 4356 ti->error = "Corrupted superblock, journal_sections is 0"; 4357 goto bad; 4358 } 4359 /* make sure that ti->max_io_len doesn't overflow */ 4360 if (!ic->meta_dev) { 4361 if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS || 4362 ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) { 4363 r = -EINVAL; 4364 ti->error = "Invalid interleave_sectors in the superblock"; 4365 goto bad; 4366 } 4367 } else { 4368 if (ic->sb->log2_interleave_sectors) { 4369 r = -EINVAL; 4370 ti->error = "Invalid interleave_sectors in the superblock"; 4371 goto bad; 4372 } 4373 } 4374 if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) != !!ic->journal_mac_alg.alg_string) { 4375 r = -EINVAL; 4376 ti->error = "Journal mac mismatch"; 4377 goto bad; 4378 } 4379 4380 get_provided_data_sectors(ic); 4381 if (!ic->provided_data_sectors) { 4382 r = -EINVAL; 4383 ti->error = "The device is too small"; 4384 goto bad; 4385 } 4386 4387 try_smaller_buffer: 4388 r = calculate_device_limits(ic); 4389 if (r) { 4390 if (ic->meta_dev) { 4391 if (ic->log2_buffer_sectors > 3) { 4392 ic->log2_buffer_sectors--; 4393 goto try_smaller_buffer; 4394 } 4395 } 4396 ti->error = "The device is too small"; 4397 goto bad; 4398 } 4399 4400 if (log2_sectors_per_bitmap_bit < 0) 4401 log2_sectors_per_bitmap_bit = __fls(DEFAULT_SECTORS_PER_BITMAP_BIT); 4402 if (log2_sectors_per_bitmap_bit < ic->sb->log2_sectors_per_block) 4403 log2_sectors_per_bitmap_bit = ic->sb->log2_sectors_per_block; 4404 4405 bits_in_journal = ((__u64)ic->journal_section_sectors * ic->journal_sections) << (SECTOR_SHIFT + 3); 4406 if (bits_in_journal > UINT_MAX) 4407 bits_in_journal = UINT_MAX; 4408 while (bits_in_journal < (ic->provided_data_sectors + ((sector_t)1 << log2_sectors_per_bitmap_bit) - 1) >> log2_sectors_per_bitmap_bit) 4409 log2_sectors_per_bitmap_bit++; 4410 4411 log2_blocks_per_bitmap_bit = log2_sectors_per_bitmap_bit - ic->sb->log2_sectors_per_block; 4412 ic->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit; 4413 if (should_write_sb) 4414 ic->sb->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit; 4415 4416 n_bitmap_bits = ((ic->provided_data_sectors >> ic->sb->log2_sectors_per_block) 4417 + (((sector_t)1 << log2_blocks_per_bitmap_bit) - 1)) >> log2_blocks_per_bitmap_bit; 4418 ic->n_bitmap_blocks = DIV_ROUND_UP(n_bitmap_bits, BITMAP_BLOCK_SIZE * 8); 4419 4420 if (!ic->meta_dev) 4421 ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run)); 4422 4423 if (ti->len > ic->provided_data_sectors) { 4424 r = -EINVAL; 4425 ti->error = "Not enough provided sectors for requested mapping size"; 4426 goto bad; 4427 } 4428 4429 4430 threshold = (__u64)ic->journal_entries * (100 - journal_watermark); 4431 threshold += 50; 4432 do_div(threshold, 100); 4433 ic->free_sectors_threshold = threshold; 4434 4435 DEBUG_print("initialized:\n"); 4436 DEBUG_print(" integrity_tag_size %u\n", le16_to_cpu(ic->sb->integrity_tag_size)); 4437 DEBUG_print(" journal_entry_size %u\n", ic->journal_entry_size); 4438 DEBUG_print(" journal_entries_per_sector %u\n", ic->journal_entries_per_sector); 4439 DEBUG_print(" journal_section_entries %u\n", ic->journal_section_entries); 4440 DEBUG_print(" journal_section_sectors %u\n", ic->journal_section_sectors); 4441 DEBUG_print(" journal_sections %u\n", (unsigned int)le32_to_cpu(ic->sb->journal_sections)); 4442 DEBUG_print(" journal_entries %u\n", ic->journal_entries); 4443 DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors); 4444 DEBUG_print(" data_device_sectors 0x%llx\n", bdev_nr_sectors(ic->dev->bdev)); 4445 DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors); 4446 DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run); 4447 DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run); 4448 DEBUG_print(" provided_data_sectors 0x%llx (%llu)\n", ic->provided_data_sectors, ic->provided_data_sectors); 4449 DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors); 4450 DEBUG_print(" bits_in_journal %llu\n", bits_in_journal); 4451 4452 if (ic->recalculate_flag && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) { 4453 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); 4454 ic->sb->recalc_sector = cpu_to_le64(0); 4455 } 4456 4457 if (ic->internal_hash) { 4458 ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); 4459 if (!ic->recalc_wq) { 4460 ti->error = "Cannot allocate workqueue"; 4461 r = -ENOMEM; 4462 goto bad; 4463 } 4464 INIT_WORK(&ic->recalc_work, integrity_recalc); 4465 } else { 4466 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { 4467 ti->error = "Recalculate can only be specified with internal_hash"; 4468 r = -EINVAL; 4469 goto bad; 4470 } 4471 } 4472 4473 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && 4474 le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors && 4475 dm_integrity_disable_recalculate(ic)) { 4476 ti->error = "Recalculating with HMAC is disabled for security reasons - if you really need it, use the argument \"legacy_recalculate\""; 4477 r = -EOPNOTSUPP; 4478 goto bad; 4479 } 4480 4481 ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev, 4482 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL, 0); 4483 if (IS_ERR(ic->bufio)) { 4484 r = PTR_ERR(ic->bufio); 4485 ti->error = "Cannot initialize dm-bufio"; 4486 ic->bufio = NULL; 4487 goto bad; 4488 } 4489 dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors); 4490 4491 if (ic->mode != 'R') { 4492 r = create_journal(ic, &ti->error); 4493 if (r) 4494 goto bad; 4495 4496 } 4497 4498 if (ic->mode == 'B') { 4499 unsigned int i; 4500 unsigned int n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE); 4501 4502 ic->recalc_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages); 4503 if (!ic->recalc_bitmap) { 4504 r = -ENOMEM; 4505 goto bad; 4506 } 4507 ic->may_write_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages); 4508 if (!ic->may_write_bitmap) { 4509 r = -ENOMEM; 4510 goto bad; 4511 } 4512 ic->bbs = kvmalloc_array(ic->n_bitmap_blocks, sizeof(struct bitmap_block_status), GFP_KERNEL); 4513 if (!ic->bbs) { 4514 r = -ENOMEM; 4515 goto bad; 4516 } 4517 INIT_DELAYED_WORK(&ic->bitmap_flush_work, bitmap_flush_work); 4518 for (i = 0; i < ic->n_bitmap_blocks; i++) { 4519 struct bitmap_block_status *bbs = &ic->bbs[i]; 4520 unsigned int sector, pl_index, pl_offset; 4521 4522 INIT_WORK(&bbs->work, bitmap_block_work); 4523 bbs->ic = ic; 4524 bbs->idx = i; 4525 bio_list_init(&bbs->bio_queue); 4526 spin_lock_init(&bbs->bio_queue_lock); 4527 4528 sector = i * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT); 4529 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 4530 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 4531 4532 bbs->bitmap = lowmem_page_address(ic->journal[pl_index].page) + pl_offset; 4533 } 4534 } 4535 4536 if (should_write_sb) { 4537 init_journal(ic, 0, ic->journal_sections, 0); 4538 r = dm_integrity_failed(ic); 4539 if (unlikely(r)) { 4540 ti->error = "Error initializing journal"; 4541 goto bad; 4542 } 4543 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); 4544 if (r) { 4545 ti->error = "Error initializing superblock"; 4546 goto bad; 4547 } 4548 ic->just_formatted = true; 4549 } 4550 4551 if (!ic->meta_dev) { 4552 r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors); 4553 if (r) 4554 goto bad; 4555 } 4556 if (ic->mode == 'B') { 4557 unsigned int max_io_len; 4558 4559 max_io_len = ((sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit) * (BITMAP_BLOCK_SIZE * 8); 4560 if (!max_io_len) 4561 max_io_len = 1U << 31; 4562 DEBUG_print("max_io_len: old %u, new %u\n", ti->max_io_len, max_io_len); 4563 if (!ti->max_io_len || ti->max_io_len > max_io_len) { 4564 r = dm_set_target_max_io_len(ti, max_io_len); 4565 if (r) 4566 goto bad; 4567 } 4568 } 4569 4570 if (!ic->internal_hash) 4571 dm_integrity_set(ti, ic); 4572 4573 ti->num_flush_bios = 1; 4574 ti->flush_supported = true; 4575 if (ic->discard) 4576 ti->num_discard_bios = 1; 4577 4578 dm_audit_log_ctr(DM_MSG_PREFIX, ti, 1); 4579 return 0; 4580 4581 bad: 4582 dm_audit_log_ctr(DM_MSG_PREFIX, ti, 0); 4583 dm_integrity_dtr(ti); 4584 return r; 4585 } 4586 4587 static void dm_integrity_dtr(struct dm_target *ti) 4588 { 4589 struct dm_integrity_c *ic = ti->private; 4590 4591 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); 4592 BUG_ON(!list_empty(&ic->wait_list)); 4593 4594 if (ic->mode == 'B') 4595 cancel_delayed_work_sync(&ic->bitmap_flush_work); 4596 if (ic->metadata_wq) 4597 destroy_workqueue(ic->metadata_wq); 4598 if (ic->wait_wq) 4599 destroy_workqueue(ic->wait_wq); 4600 if (ic->offload_wq) 4601 destroy_workqueue(ic->offload_wq); 4602 if (ic->commit_wq) 4603 destroy_workqueue(ic->commit_wq); 4604 if (ic->writer_wq) 4605 destroy_workqueue(ic->writer_wq); 4606 if (ic->recalc_wq) 4607 destroy_workqueue(ic->recalc_wq); 4608 kvfree(ic->bbs); 4609 if (ic->bufio) 4610 dm_bufio_client_destroy(ic->bufio); 4611 mempool_exit(&ic->journal_io_mempool); 4612 if (ic->io) 4613 dm_io_client_destroy(ic->io); 4614 if (ic->dev) 4615 dm_put_device(ti, ic->dev); 4616 if (ic->meta_dev) 4617 dm_put_device(ti, ic->meta_dev); 4618 dm_integrity_free_page_list(ic->journal); 4619 dm_integrity_free_page_list(ic->journal_io); 4620 dm_integrity_free_page_list(ic->journal_xor); 4621 dm_integrity_free_page_list(ic->recalc_bitmap); 4622 dm_integrity_free_page_list(ic->may_write_bitmap); 4623 if (ic->journal_scatterlist) 4624 dm_integrity_free_journal_scatterlist(ic, ic->journal_scatterlist); 4625 if (ic->journal_io_scatterlist) 4626 dm_integrity_free_journal_scatterlist(ic, ic->journal_io_scatterlist); 4627 if (ic->sk_requests) { 4628 unsigned int i; 4629 4630 for (i = 0; i < ic->journal_sections; i++) { 4631 struct skcipher_request *req; 4632 4633 req = ic->sk_requests[i]; 4634 if (req) { 4635 kfree_sensitive(req->iv); 4636 skcipher_request_free(req); 4637 } 4638 } 4639 kvfree(ic->sk_requests); 4640 } 4641 kvfree(ic->journal_tree); 4642 if (ic->sb) 4643 free_pages_exact(ic->sb, SB_SECTORS << SECTOR_SHIFT); 4644 4645 if (ic->internal_hash) 4646 crypto_free_shash(ic->internal_hash); 4647 free_alg(&ic->internal_hash_alg); 4648 4649 if (ic->journal_crypt) 4650 crypto_free_skcipher(ic->journal_crypt); 4651 free_alg(&ic->journal_crypt_alg); 4652 4653 if (ic->journal_mac) 4654 crypto_free_shash(ic->journal_mac); 4655 free_alg(&ic->journal_mac_alg); 4656 4657 kfree(ic); 4658 dm_audit_log_dtr(DM_MSG_PREFIX, ti, 1); 4659 } 4660 4661 static struct target_type integrity_target = { 4662 .name = "integrity", 4663 .version = {1, 10, 0}, 4664 .module = THIS_MODULE, 4665 .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, 4666 .ctr = dm_integrity_ctr, 4667 .dtr = dm_integrity_dtr, 4668 .map = dm_integrity_map, 4669 .postsuspend = dm_integrity_postsuspend, 4670 .resume = dm_integrity_resume, 4671 .status = dm_integrity_status, 4672 .iterate_devices = dm_integrity_iterate_devices, 4673 .io_hints = dm_integrity_io_hints, 4674 }; 4675 4676 static int __init dm_integrity_init(void) 4677 { 4678 int r; 4679 4680 journal_io_cache = kmem_cache_create("integrity_journal_io", 4681 sizeof(struct journal_io), 0, 0, NULL); 4682 if (!journal_io_cache) { 4683 DMERR("can't allocate journal io cache"); 4684 return -ENOMEM; 4685 } 4686 4687 r = dm_register_target(&integrity_target); 4688 if (r < 0) { 4689 kmem_cache_destroy(journal_io_cache); 4690 return r; 4691 } 4692 4693 return 0; 4694 } 4695 4696 static void __exit dm_integrity_exit(void) 4697 { 4698 dm_unregister_target(&integrity_target); 4699 kmem_cache_destroy(journal_io_cache); 4700 } 4701 4702 module_init(dm_integrity_init); 4703 module_exit(dm_integrity_exit); 4704 4705 MODULE_AUTHOR("Milan Broz"); 4706 MODULE_AUTHOR("Mikulas Patocka"); 4707 MODULE_DESCRIPTION(DM_NAME " target for integrity tags extension"); 4708 MODULE_LICENSE("GPL"); 4709