1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2016-2017 Red Hat, Inc. All rights reserved. 4 * Copyright (C) 2016-2017 Milan Broz 5 * Copyright (C) 2016-2017 Mikulas Patocka 6 * 7 * This file is released under the GPL. 8 */ 9 10 #include "dm-bio-record.h" 11 12 #include <linux/compiler.h> 13 #include <linux/module.h> 14 #include <linux/device-mapper.h> 15 #include <linux/dm-io.h> 16 #include <linux/vmalloc.h> 17 #include <linux/sort.h> 18 #include <linux/rbtree.h> 19 #include <linux/delay.h> 20 #include <linux/random.h> 21 #include <linux/reboot.h> 22 #include <crypto/hash.h> 23 #include <crypto/skcipher.h> 24 #include <crypto/utils.h> 25 #include <linux/async_tx.h> 26 #include <linux/dm-bufio.h> 27 28 #include "dm-audit.h" 29 30 #define DM_MSG_PREFIX "integrity" 31 32 #define DEFAULT_INTERLEAVE_SECTORS 32768 33 #define DEFAULT_JOURNAL_SIZE_FACTOR 7 34 #define DEFAULT_SECTORS_PER_BITMAP_BIT 32768 35 #define DEFAULT_BUFFER_SECTORS 128 36 #define DEFAULT_JOURNAL_WATERMARK 50 37 #define DEFAULT_SYNC_MSEC 10000 38 #define DEFAULT_MAX_JOURNAL_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 131072 : 8192) 39 #define MIN_LOG2_INTERLEAVE_SECTORS 3 40 #define MAX_LOG2_INTERLEAVE_SECTORS 31 41 #define METADATA_WORKQUEUE_MAX_ACTIVE 16 42 #define RECALC_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 32768 : 2048) 43 #define RECALC_WRITE_SUPER 16 44 #define BITMAP_BLOCK_SIZE 4096 /* don't change it */ 45 #define BITMAP_FLUSH_INTERVAL (10 * HZ) 46 #define DISCARD_FILLER 0xf6 47 #define SALT_SIZE 16 48 #define RECHECK_POOL_SIZE 256 49 50 /* 51 * Warning - DEBUG_PRINT prints security-sensitive data to the log, 52 * so it should not be enabled in the official kernel 53 */ 54 //#define DEBUG_PRINT 55 //#define INTERNAL_VERIFY 56 57 /* 58 * On disk structures 59 */ 60 61 #define SB_MAGIC "integrt" 62 #define SB_VERSION_1 1 63 #define SB_VERSION_2 2 64 #define SB_VERSION_3 3 65 #define SB_VERSION_4 4 66 #define SB_VERSION_5 5 67 #define SB_VERSION_6 6 68 #define SB_SECTORS 8 69 #define MAX_SECTORS_PER_BLOCK 8 70 71 struct superblock { 72 __u8 magic[8]; 73 __u8 version; 74 __u8 log2_interleave_sectors; 75 __le16 integrity_tag_size; 76 __le32 journal_sections; 77 __le64 provided_data_sectors; /* userspace uses this value */ 78 __le32 flags; 79 __u8 log2_sectors_per_block; 80 __u8 log2_blocks_per_bitmap_bit; 81 __u8 pad[2]; 82 __le64 recalc_sector; 83 __u8 pad2[8]; 84 __u8 salt[SALT_SIZE]; 85 }; 86 87 #define SB_FLAG_HAVE_JOURNAL_MAC 0x1 88 #define SB_FLAG_RECALCULATING 0x2 89 #define SB_FLAG_DIRTY_BITMAP 0x4 90 #define SB_FLAG_FIXED_PADDING 0x8 91 #define SB_FLAG_FIXED_HMAC 0x10 92 #define SB_FLAG_INLINE 0x20 93 94 #define JOURNAL_ENTRY_ROUNDUP 8 95 96 typedef __le64 commit_id_t; 97 #define JOURNAL_MAC_PER_SECTOR 8 98 99 struct journal_entry { 100 union { 101 struct { 102 __le32 sector_lo; 103 __le32 sector_hi; 104 } s; 105 __le64 sector; 106 } u; 107 commit_id_t last_bytes[]; 108 /* __u8 tag[0]; */ 109 }; 110 111 #define journal_entry_tag(ic, je) ((__u8 *)&(je)->last_bytes[(ic)->sectors_per_block]) 112 113 #if BITS_PER_LONG == 64 114 #define journal_entry_set_sector(je, x) do { smp_wmb(); WRITE_ONCE((je)->u.sector, cpu_to_le64(x)); } while (0) 115 #else 116 #define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32((x) >> 32)); } while (0) 117 #endif 118 #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector) 119 #define journal_entry_is_unused(je) ((je)->u.s.sector_hi == cpu_to_le32(-1)) 120 #define journal_entry_set_unused(je) ((je)->u.s.sector_hi = cpu_to_le32(-1)) 121 #define journal_entry_is_inprogress(je) ((je)->u.s.sector_hi == cpu_to_le32(-2)) 122 #define journal_entry_set_inprogress(je) ((je)->u.s.sector_hi = cpu_to_le32(-2)) 123 124 #define JOURNAL_BLOCK_SECTORS 8 125 #define JOURNAL_SECTOR_DATA ((1 << SECTOR_SHIFT) - sizeof(commit_id_t)) 126 #define JOURNAL_MAC_SIZE (JOURNAL_MAC_PER_SECTOR * JOURNAL_BLOCK_SECTORS) 127 128 struct journal_sector { 129 struct_group(sectors, 130 __u8 entries[JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR]; 131 __u8 mac[JOURNAL_MAC_PER_SECTOR]; 132 ); 133 commit_id_t commit_id; 134 }; 135 136 #define MAX_TAG_SIZE 255 137 138 #define METADATA_PADDING_SECTORS 8 139 140 #define N_COMMIT_IDS 4 141 142 static unsigned char prev_commit_seq(unsigned char seq) 143 { 144 return (seq + N_COMMIT_IDS - 1) % N_COMMIT_IDS; 145 } 146 147 static unsigned char next_commit_seq(unsigned char seq) 148 { 149 return (seq + 1) % N_COMMIT_IDS; 150 } 151 152 /* 153 * In-memory structures 154 */ 155 156 struct journal_node { 157 struct rb_node node; 158 sector_t sector; 159 }; 160 161 struct alg_spec { 162 char *alg_string; 163 char *key_string; 164 __u8 *key; 165 unsigned int key_size; 166 }; 167 168 struct dm_integrity_c { 169 struct dm_dev *dev; 170 struct dm_dev *meta_dev; 171 unsigned int tag_size; 172 __s8 log2_tag_size; 173 unsigned int tuple_size; 174 sector_t start; 175 mempool_t journal_io_mempool; 176 struct dm_io_client *io; 177 struct dm_bufio_client *bufio; 178 struct workqueue_struct *metadata_wq; 179 struct superblock *sb; 180 unsigned int journal_pages; 181 unsigned int n_bitmap_blocks; 182 183 struct page_list *journal; 184 struct page_list *journal_io; 185 struct page_list *journal_xor; 186 struct page_list *recalc_bitmap; 187 struct page_list *may_write_bitmap; 188 struct bitmap_block_status *bbs; 189 unsigned int bitmap_flush_interval; 190 int synchronous_mode; 191 struct bio_list synchronous_bios; 192 struct delayed_work bitmap_flush_work; 193 194 struct crypto_skcipher *journal_crypt; 195 struct scatterlist **journal_scatterlist; 196 struct scatterlist **journal_io_scatterlist; 197 struct skcipher_request **sk_requests; 198 199 struct crypto_shash *journal_mac; 200 201 struct journal_node *journal_tree; 202 struct rb_root journal_tree_root; 203 204 sector_t provided_data_sectors; 205 206 unsigned short journal_entry_size; 207 unsigned char journal_entries_per_sector; 208 unsigned char journal_section_entries; 209 unsigned short journal_section_sectors; 210 unsigned int journal_sections; 211 unsigned int journal_entries; 212 sector_t data_device_sectors; 213 sector_t meta_device_sectors; 214 unsigned int initial_sectors; 215 unsigned int metadata_run; 216 __s8 log2_metadata_run; 217 __u8 log2_buffer_sectors; 218 __u8 sectors_per_block; 219 __u8 log2_blocks_per_bitmap_bit; 220 221 unsigned char mode; 222 bool internal_hash; 223 224 int failed; 225 226 struct crypto_shash *internal_shash; 227 struct crypto_ahash *internal_ahash; 228 unsigned int internal_hash_digestsize; 229 230 struct dm_target *ti; 231 232 /* these variables are locked with endio_wait.lock */ 233 struct rb_root in_progress; 234 struct list_head wait_list; 235 wait_queue_head_t endio_wait; 236 struct workqueue_struct *wait_wq; 237 struct workqueue_struct *offload_wq; 238 239 unsigned char commit_seq; 240 commit_id_t commit_ids[N_COMMIT_IDS]; 241 242 unsigned int committed_section; 243 unsigned int n_committed_sections; 244 245 unsigned int uncommitted_section; 246 unsigned int n_uncommitted_sections; 247 248 unsigned int free_section; 249 unsigned char free_section_entry; 250 unsigned int free_sectors; 251 252 unsigned int free_sectors_threshold; 253 254 struct workqueue_struct *commit_wq; 255 struct work_struct commit_work; 256 257 struct workqueue_struct *writer_wq; 258 struct work_struct writer_work; 259 260 struct workqueue_struct *recalc_wq; 261 struct work_struct recalc_work; 262 263 struct bio_list flush_bio_list; 264 265 unsigned long autocommit_jiffies; 266 struct timer_list autocommit_timer; 267 unsigned int autocommit_msec; 268 269 wait_queue_head_t copy_to_journal_wait; 270 271 struct completion crypto_backoff; 272 273 bool wrote_to_journal; 274 bool journal_uptodate; 275 bool just_formatted; 276 bool recalculate_flag; 277 bool reset_recalculate_flag; 278 bool discard; 279 bool fix_padding; 280 bool fix_hmac; 281 bool legacy_recalculate; 282 283 mempool_t ahash_req_pool; 284 struct ahash_request *journal_ahash_req; 285 286 struct alg_spec internal_hash_alg; 287 struct alg_spec journal_crypt_alg; 288 struct alg_spec journal_mac_alg; 289 290 atomic64_t number_of_mismatches; 291 292 mempool_t recheck_pool; 293 struct bio_set recheck_bios; 294 struct bio_set recalc_bios; 295 296 struct notifier_block reboot_notifier; 297 }; 298 299 struct dm_integrity_range { 300 sector_t logical_sector; 301 sector_t n_sectors; 302 bool waiting; 303 union { 304 struct rb_node node; 305 struct { 306 struct task_struct *task; 307 struct list_head wait_entry; 308 }; 309 }; 310 }; 311 312 struct dm_integrity_io { 313 struct work_struct work; 314 315 struct dm_integrity_c *ic; 316 enum req_op op; 317 bool fua; 318 319 struct dm_integrity_range range; 320 321 sector_t metadata_block; 322 unsigned int metadata_offset; 323 324 atomic_t in_flight; 325 blk_status_t bi_status; 326 327 struct completion *completion; 328 329 struct dm_bio_details bio_details; 330 331 char *integrity_payload; 332 unsigned payload_len; 333 bool integrity_payload_from_mempool; 334 bool integrity_range_locked; 335 336 struct ahash_request *ahash_req; 337 }; 338 339 struct journal_completion { 340 struct dm_integrity_c *ic; 341 atomic_t in_flight; 342 struct completion comp; 343 }; 344 345 struct journal_io { 346 struct dm_integrity_range range; 347 struct journal_completion *comp; 348 }; 349 350 struct bitmap_block_status { 351 struct work_struct work; 352 struct dm_integrity_c *ic; 353 unsigned int idx; 354 unsigned long *bitmap; 355 struct bio_list bio_queue; 356 spinlock_t bio_queue_lock; 357 358 }; 359 360 static struct kmem_cache *journal_io_cache; 361 362 #define JOURNAL_IO_MEMPOOL 32 363 #define AHASH_MEMPOOL 32 364 365 #ifdef DEBUG_PRINT 366 #define DEBUG_print(x, ...) printk(KERN_DEBUG x, ##__VA_ARGS__) 367 #define DEBUG_bytes(bytes, len, msg, ...) printk(KERN_DEBUG msg "%s%*ph\n", ##__VA_ARGS__, \ 368 len ? ": " : "", len, bytes) 369 #else 370 #define DEBUG_print(x, ...) do { } while (0) 371 #define DEBUG_bytes(bytes, len, msg, ...) do { } while (0) 372 #endif 373 374 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map); 375 static int dm_integrity_map_inline(struct dm_integrity_io *dio, bool from_map); 376 static void integrity_bio_wait(struct work_struct *w); 377 static void dm_integrity_dtr(struct dm_target *ti); 378 379 static void dm_integrity_io_error(struct dm_integrity_c *ic, const char *msg, int err) 380 { 381 if (err == -EILSEQ) 382 atomic64_inc(&ic->number_of_mismatches); 383 if (!cmpxchg(&ic->failed, 0, err)) 384 DMERR("Error on %s: %d", msg, err); 385 } 386 387 static int dm_integrity_failed(struct dm_integrity_c *ic) 388 { 389 return READ_ONCE(ic->failed); 390 } 391 392 static bool dm_integrity_disable_recalculate(struct dm_integrity_c *ic) 393 { 394 if (ic->legacy_recalculate) 395 return false; 396 if (!(ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) ? 397 ic->internal_hash_alg.key || ic->journal_mac_alg.key : 398 ic->internal_hash_alg.key && !ic->journal_mac_alg.key) 399 return true; 400 return false; 401 } 402 403 static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned int i, 404 unsigned int j, unsigned char seq) 405 { 406 /* 407 * Xor the number with section and sector, so that if a piece of 408 * journal is written at wrong place, it is detected. 409 */ 410 return ic->commit_ids[seq] ^ cpu_to_le64(((__u64)i << 32) ^ j); 411 } 412 413 static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector, 414 sector_t *area, sector_t *offset) 415 { 416 if (!ic->meta_dev) { 417 __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors; 418 *area = data_sector >> log2_interleave_sectors; 419 *offset = (unsigned int)data_sector & ((1U << log2_interleave_sectors) - 1); 420 } else { 421 *area = 0; 422 *offset = data_sector; 423 } 424 } 425 426 #define sector_to_block(ic, n) \ 427 do { \ 428 BUG_ON((n) & (unsigned int)((ic)->sectors_per_block - 1)); \ 429 (n) >>= (ic)->sb->log2_sectors_per_block; \ 430 } while (0) 431 432 static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t area, 433 sector_t offset, unsigned int *metadata_offset) 434 { 435 __u64 ms; 436 unsigned int mo; 437 438 ms = area << ic->sb->log2_interleave_sectors; 439 if (likely(ic->log2_metadata_run >= 0)) 440 ms += area << ic->log2_metadata_run; 441 else 442 ms += area * ic->metadata_run; 443 ms >>= ic->log2_buffer_sectors; 444 445 sector_to_block(ic, offset); 446 447 if (likely(ic->log2_tag_size >= 0)) { 448 ms += offset >> (SECTOR_SHIFT + ic->log2_buffer_sectors - ic->log2_tag_size); 449 mo = (offset << ic->log2_tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1); 450 } else { 451 ms += (__u64)offset * ic->tag_size >> (SECTOR_SHIFT + ic->log2_buffer_sectors); 452 mo = (offset * ic->tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1); 453 } 454 *metadata_offset = mo; 455 return ms; 456 } 457 458 static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector_t offset) 459 { 460 sector_t result; 461 462 if (ic->meta_dev) 463 return offset; 464 465 result = area << ic->sb->log2_interleave_sectors; 466 if (likely(ic->log2_metadata_run >= 0)) 467 result += (area + 1) << ic->log2_metadata_run; 468 else 469 result += (area + 1) * ic->metadata_run; 470 471 result += (sector_t)ic->initial_sectors + offset; 472 result += ic->start; 473 474 return result; 475 } 476 477 static void wraparound_section(struct dm_integrity_c *ic, unsigned int *sec_ptr) 478 { 479 if (unlikely(*sec_ptr >= ic->journal_sections)) 480 *sec_ptr -= ic->journal_sections; 481 } 482 483 static void sb_set_version(struct dm_integrity_c *ic) 484 { 485 if (ic->sb->flags & cpu_to_le32(SB_FLAG_INLINE)) 486 ic->sb->version = SB_VERSION_6; 487 else if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) 488 ic->sb->version = SB_VERSION_5; 489 else if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) 490 ic->sb->version = SB_VERSION_4; 491 else if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) 492 ic->sb->version = SB_VERSION_3; 493 else if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) 494 ic->sb->version = SB_VERSION_2; 495 else 496 ic->sb->version = SB_VERSION_1; 497 } 498 499 static int sb_mac(struct dm_integrity_c *ic, bool wr) 500 { 501 SHASH_DESC_ON_STACK(desc, ic->journal_mac); 502 int r; 503 unsigned int mac_size = crypto_shash_digestsize(ic->journal_mac); 504 __u8 *sb = (__u8 *)ic->sb; 505 __u8 *mac = sb + (1 << SECTOR_SHIFT) - mac_size; 506 507 if (sizeof(struct superblock) + mac_size > 1 << SECTOR_SHIFT || 508 mac_size > HASH_MAX_DIGESTSIZE) { 509 dm_integrity_io_error(ic, "digest is too long", -EINVAL); 510 return -EINVAL; 511 } 512 513 desc->tfm = ic->journal_mac; 514 515 if (likely(wr)) { 516 r = crypto_shash_digest(desc, sb, mac - sb, mac); 517 if (unlikely(r < 0)) { 518 dm_integrity_io_error(ic, "crypto_shash_digest", r); 519 return r; 520 } 521 } else { 522 __u8 actual_mac[HASH_MAX_DIGESTSIZE]; 523 524 r = crypto_shash_digest(desc, sb, mac - sb, actual_mac); 525 if (unlikely(r < 0)) { 526 dm_integrity_io_error(ic, "crypto_shash_digest", r); 527 return r; 528 } 529 if (crypto_memneq(mac, actual_mac, mac_size)) { 530 dm_integrity_io_error(ic, "superblock mac", -EILSEQ); 531 dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0); 532 return -EILSEQ; 533 } 534 } 535 536 return 0; 537 } 538 539 static int sync_rw_sb(struct dm_integrity_c *ic, blk_opf_t opf) 540 { 541 struct dm_io_request io_req; 542 struct dm_io_region io_loc; 543 const enum req_op op = opf & REQ_OP_MASK; 544 int r; 545 546 io_req.bi_opf = opf; 547 io_req.mem.type = DM_IO_KMEM; 548 io_req.mem.ptr.addr = ic->sb; 549 io_req.notify.fn = NULL; 550 io_req.client = ic->io; 551 io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev; 552 io_loc.sector = ic->start; 553 io_loc.count = SB_SECTORS; 554 555 if (op == REQ_OP_WRITE) { 556 sb_set_version(ic); 557 if (ic->journal_mac && ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) { 558 r = sb_mac(ic, true); 559 if (unlikely(r)) 560 return r; 561 } 562 } 563 564 r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); 565 if (unlikely(r)) 566 return r; 567 568 if (op == REQ_OP_READ) { 569 if (ic->mode != 'R' && ic->journal_mac && ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) { 570 r = sb_mac(ic, false); 571 if (unlikely(r)) 572 return r; 573 } 574 } 575 576 return 0; 577 } 578 579 #define BITMAP_OP_TEST_ALL_SET 0 580 #define BITMAP_OP_TEST_ALL_CLEAR 1 581 #define BITMAP_OP_SET 2 582 #define BITMAP_OP_CLEAR 3 583 584 static bool block_bitmap_op(struct dm_integrity_c *ic, struct page_list *bitmap, 585 sector_t sector, sector_t n_sectors, int mode) 586 { 587 unsigned long bit, end_bit, this_end_bit, page, end_page; 588 unsigned long *data; 589 590 if (unlikely(((sector | n_sectors) & ((1 << ic->sb->log2_sectors_per_block) - 1)) != 0)) { 591 DMCRIT("invalid bitmap access (%llx,%llx,%d,%d,%d)", 592 sector, 593 n_sectors, 594 ic->sb->log2_sectors_per_block, 595 ic->log2_blocks_per_bitmap_bit, 596 mode); 597 BUG(); 598 } 599 600 if (unlikely(!n_sectors)) 601 return true; 602 603 bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 604 end_bit = (sector + n_sectors - 1) >> 605 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 606 607 page = bit / (PAGE_SIZE * 8); 608 bit %= PAGE_SIZE * 8; 609 610 end_page = end_bit / (PAGE_SIZE * 8); 611 end_bit %= PAGE_SIZE * 8; 612 613 repeat: 614 if (page < end_page) 615 this_end_bit = PAGE_SIZE * 8 - 1; 616 else 617 this_end_bit = end_bit; 618 619 data = lowmem_page_address(bitmap[page].page); 620 621 if (mode == BITMAP_OP_TEST_ALL_SET) { 622 while (bit <= this_end_bit) { 623 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) { 624 do { 625 if (data[bit / BITS_PER_LONG] != -1) 626 return false; 627 bit += BITS_PER_LONG; 628 } while (this_end_bit >= bit + BITS_PER_LONG - 1); 629 continue; 630 } 631 if (!test_bit(bit, data)) 632 return false; 633 bit++; 634 } 635 } else if (mode == BITMAP_OP_TEST_ALL_CLEAR) { 636 while (bit <= this_end_bit) { 637 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) { 638 do { 639 if (data[bit / BITS_PER_LONG] != 0) 640 return false; 641 bit += BITS_PER_LONG; 642 } while (this_end_bit >= bit + BITS_PER_LONG - 1); 643 continue; 644 } 645 if (test_bit(bit, data)) 646 return false; 647 bit++; 648 } 649 } else if (mode == BITMAP_OP_SET) { 650 while (bit <= this_end_bit) { 651 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) { 652 do { 653 data[bit / BITS_PER_LONG] = -1; 654 bit += BITS_PER_LONG; 655 } while (this_end_bit >= bit + BITS_PER_LONG - 1); 656 continue; 657 } 658 __set_bit(bit, data); 659 bit++; 660 } 661 } else if (mode == BITMAP_OP_CLEAR) { 662 if (!bit && this_end_bit == PAGE_SIZE * 8 - 1) 663 clear_page(data); 664 else { 665 while (bit <= this_end_bit) { 666 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) { 667 do { 668 data[bit / BITS_PER_LONG] = 0; 669 bit += BITS_PER_LONG; 670 } while (this_end_bit >= bit + BITS_PER_LONG - 1); 671 continue; 672 } 673 __clear_bit(bit, data); 674 bit++; 675 } 676 } 677 } else { 678 BUG(); 679 } 680 681 if (unlikely(page < end_page)) { 682 bit = 0; 683 page++; 684 goto repeat; 685 } 686 687 return true; 688 } 689 690 static void block_bitmap_copy(struct dm_integrity_c *ic, struct page_list *dst, struct page_list *src) 691 { 692 unsigned int n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE); 693 unsigned int i; 694 695 for (i = 0; i < n_bitmap_pages; i++) { 696 unsigned long *dst_data = lowmem_page_address(dst[i].page); 697 unsigned long *src_data = lowmem_page_address(src[i].page); 698 699 copy_page(dst_data, src_data); 700 } 701 } 702 703 static struct bitmap_block_status *sector_to_bitmap_block(struct dm_integrity_c *ic, sector_t sector) 704 { 705 unsigned int bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 706 unsigned int bitmap_block = bit / (BITMAP_BLOCK_SIZE * 8); 707 708 BUG_ON(bitmap_block >= ic->n_bitmap_blocks); 709 return &ic->bbs[bitmap_block]; 710 } 711 712 static void access_journal_check(struct dm_integrity_c *ic, unsigned int section, unsigned int offset, 713 bool e, const char *function) 714 { 715 #if defined(CONFIG_DM_DEBUG) || defined(INTERNAL_VERIFY) 716 unsigned int limit = e ? ic->journal_section_entries : ic->journal_section_sectors; 717 718 if (unlikely(section >= ic->journal_sections) || 719 unlikely(offset >= limit)) { 720 DMCRIT("%s: invalid access at (%u,%u), limit (%u,%u)", 721 function, section, offset, ic->journal_sections, limit); 722 BUG(); 723 } 724 #endif 725 } 726 727 static void page_list_location(struct dm_integrity_c *ic, unsigned int section, unsigned int offset, 728 unsigned int *pl_index, unsigned int *pl_offset) 729 { 730 unsigned int sector; 731 732 access_journal_check(ic, section, offset, false, "page_list_location"); 733 734 sector = section * ic->journal_section_sectors + offset; 735 736 *pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 737 *pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 738 } 739 740 static struct journal_sector *access_page_list(struct dm_integrity_c *ic, struct page_list *pl, 741 unsigned int section, unsigned int offset, unsigned int *n_sectors) 742 { 743 unsigned int pl_index, pl_offset; 744 char *va; 745 746 page_list_location(ic, section, offset, &pl_index, &pl_offset); 747 748 if (n_sectors) 749 *n_sectors = (PAGE_SIZE - pl_offset) >> SECTOR_SHIFT; 750 751 va = lowmem_page_address(pl[pl_index].page); 752 753 return (struct journal_sector *)(va + pl_offset); 754 } 755 756 static struct journal_sector *access_journal(struct dm_integrity_c *ic, unsigned int section, unsigned int offset) 757 { 758 return access_page_list(ic, ic->journal, section, offset, NULL); 759 } 760 761 static struct journal_entry *access_journal_entry(struct dm_integrity_c *ic, unsigned int section, unsigned int n) 762 { 763 unsigned int rel_sector, offset; 764 struct journal_sector *js; 765 766 access_journal_check(ic, section, n, true, "access_journal_entry"); 767 768 rel_sector = n % JOURNAL_BLOCK_SECTORS; 769 offset = n / JOURNAL_BLOCK_SECTORS; 770 771 js = access_journal(ic, section, rel_sector); 772 return (struct journal_entry *)((char *)js + offset * ic->journal_entry_size); 773 } 774 775 static struct journal_sector *access_journal_data(struct dm_integrity_c *ic, unsigned int section, unsigned int n) 776 { 777 n <<= ic->sb->log2_sectors_per_block; 778 779 n += JOURNAL_BLOCK_SECTORS; 780 781 access_journal_check(ic, section, n, false, "access_journal_data"); 782 783 return access_journal(ic, section, n); 784 } 785 786 static void section_mac(struct dm_integrity_c *ic, unsigned int section, __u8 result[JOURNAL_MAC_SIZE]) 787 { 788 SHASH_DESC_ON_STACK(desc, ic->journal_mac); 789 int r; 790 unsigned int j, size; 791 792 desc->tfm = ic->journal_mac; 793 794 r = crypto_shash_init(desc); 795 if (unlikely(r < 0)) { 796 dm_integrity_io_error(ic, "crypto_shash_init", r); 797 goto err; 798 } 799 800 if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) { 801 __le64 section_le; 802 803 r = crypto_shash_update(desc, (__u8 *)&ic->sb->salt, SALT_SIZE); 804 if (unlikely(r < 0)) { 805 dm_integrity_io_error(ic, "crypto_shash_update", r); 806 goto err; 807 } 808 809 section_le = cpu_to_le64(section); 810 r = crypto_shash_update(desc, (__u8 *)§ion_le, sizeof(section_le)); 811 if (unlikely(r < 0)) { 812 dm_integrity_io_error(ic, "crypto_shash_update", r); 813 goto err; 814 } 815 } 816 817 for (j = 0; j < ic->journal_section_entries; j++) { 818 struct journal_entry *je = access_journal_entry(ic, section, j); 819 820 r = crypto_shash_update(desc, (__u8 *)&je->u.sector, sizeof(je->u.sector)); 821 if (unlikely(r < 0)) { 822 dm_integrity_io_error(ic, "crypto_shash_update", r); 823 goto err; 824 } 825 } 826 827 size = crypto_shash_digestsize(ic->journal_mac); 828 829 if (likely(size <= JOURNAL_MAC_SIZE)) { 830 r = crypto_shash_final(desc, result); 831 if (unlikely(r < 0)) { 832 dm_integrity_io_error(ic, "crypto_shash_final", r); 833 goto err; 834 } 835 memset(result + size, 0, JOURNAL_MAC_SIZE - size); 836 } else { 837 __u8 digest[HASH_MAX_DIGESTSIZE]; 838 839 if (WARN_ON(size > sizeof(digest))) { 840 dm_integrity_io_error(ic, "digest_size", -EINVAL); 841 goto err; 842 } 843 r = crypto_shash_final(desc, digest); 844 if (unlikely(r < 0)) { 845 dm_integrity_io_error(ic, "crypto_shash_final", r); 846 goto err; 847 } 848 memcpy(result, digest, JOURNAL_MAC_SIZE); 849 } 850 851 return; 852 err: 853 memset(result, 0, JOURNAL_MAC_SIZE); 854 } 855 856 static void rw_section_mac(struct dm_integrity_c *ic, unsigned int section, bool wr) 857 { 858 __u8 result[JOURNAL_MAC_SIZE]; 859 unsigned int j; 860 861 if (!ic->journal_mac) 862 return; 863 864 section_mac(ic, section, result); 865 866 for (j = 0; j < JOURNAL_BLOCK_SECTORS; j++) { 867 struct journal_sector *js = access_journal(ic, section, j); 868 869 if (likely(wr)) 870 memcpy(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR); 871 else { 872 if (crypto_memneq(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) { 873 dm_integrity_io_error(ic, "journal mac", -EILSEQ); 874 dm_audit_log_target(DM_MSG_PREFIX, "mac-journal", ic->ti, 0); 875 } 876 } 877 } 878 } 879 880 static void complete_journal_op(void *context) 881 { 882 struct journal_completion *comp = context; 883 884 BUG_ON(!atomic_read(&comp->in_flight)); 885 if (likely(atomic_dec_and_test(&comp->in_flight))) 886 complete(&comp->comp); 887 } 888 889 static void xor_journal(struct dm_integrity_c *ic, bool encrypt, unsigned int section, 890 unsigned int n_sections, struct journal_completion *comp) 891 { 892 struct async_submit_ctl submit; 893 size_t n_bytes = (size_t)(n_sections * ic->journal_section_sectors) << SECTOR_SHIFT; 894 unsigned int pl_index, pl_offset, section_index; 895 struct page_list *source_pl, *target_pl; 896 897 if (likely(encrypt)) { 898 source_pl = ic->journal; 899 target_pl = ic->journal_io; 900 } else { 901 source_pl = ic->journal_io; 902 target_pl = ic->journal; 903 } 904 905 page_list_location(ic, section, 0, &pl_index, &pl_offset); 906 907 atomic_add(roundup(pl_offset + n_bytes, PAGE_SIZE) >> PAGE_SHIFT, &comp->in_flight); 908 909 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, complete_journal_op, comp, NULL); 910 911 section_index = pl_index; 912 913 do { 914 size_t this_step; 915 struct page *src_pages[2]; 916 struct page *dst_page; 917 918 while (unlikely(pl_index == section_index)) { 919 unsigned int dummy; 920 921 if (likely(encrypt)) 922 rw_section_mac(ic, section, true); 923 section++; 924 n_sections--; 925 if (!n_sections) 926 break; 927 page_list_location(ic, section, 0, §ion_index, &dummy); 928 } 929 930 this_step = min(n_bytes, (size_t)PAGE_SIZE - pl_offset); 931 dst_page = target_pl[pl_index].page; 932 src_pages[0] = source_pl[pl_index].page; 933 src_pages[1] = ic->journal_xor[pl_index].page; 934 935 async_xor(dst_page, src_pages, pl_offset, 2, this_step, &submit); 936 937 pl_index++; 938 pl_offset = 0; 939 n_bytes -= this_step; 940 } while (n_bytes); 941 942 BUG_ON(n_sections); 943 944 async_tx_issue_pending_all(); 945 } 946 947 static void complete_journal_encrypt(void *data, int err) 948 { 949 struct journal_completion *comp = data; 950 951 if (unlikely(err)) { 952 if (likely(err == -EINPROGRESS)) { 953 complete(&comp->ic->crypto_backoff); 954 return; 955 } 956 dm_integrity_io_error(comp->ic, "asynchronous encrypt", err); 957 } 958 complete_journal_op(comp); 959 } 960 961 static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp) 962 { 963 int r; 964 965 skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 966 complete_journal_encrypt, comp); 967 if (likely(encrypt)) 968 r = crypto_skcipher_encrypt(req); 969 else 970 r = crypto_skcipher_decrypt(req); 971 if (likely(!r)) 972 return false; 973 if (likely(r == -EINPROGRESS)) 974 return true; 975 if (likely(r == -EBUSY)) { 976 wait_for_completion(&comp->ic->crypto_backoff); 977 reinit_completion(&comp->ic->crypto_backoff); 978 return true; 979 } 980 dm_integrity_io_error(comp->ic, "encrypt", r); 981 return false; 982 } 983 984 static void crypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned int section, 985 unsigned int n_sections, struct journal_completion *comp) 986 { 987 struct scatterlist **source_sg; 988 struct scatterlist **target_sg; 989 990 atomic_add(2, &comp->in_flight); 991 992 if (likely(encrypt)) { 993 source_sg = ic->journal_scatterlist; 994 target_sg = ic->journal_io_scatterlist; 995 } else { 996 source_sg = ic->journal_io_scatterlist; 997 target_sg = ic->journal_scatterlist; 998 } 999 1000 do { 1001 struct skcipher_request *req; 1002 unsigned int ivsize; 1003 char *iv; 1004 1005 if (likely(encrypt)) 1006 rw_section_mac(ic, section, true); 1007 1008 req = ic->sk_requests[section]; 1009 ivsize = crypto_skcipher_ivsize(ic->journal_crypt); 1010 iv = req->iv; 1011 1012 memcpy(iv, iv + ivsize, ivsize); 1013 1014 req->src = source_sg[section]; 1015 req->dst = target_sg[section]; 1016 1017 if (unlikely(do_crypt(encrypt, req, comp))) 1018 atomic_inc(&comp->in_flight); 1019 1020 section++; 1021 n_sections--; 1022 } while (n_sections); 1023 1024 atomic_dec(&comp->in_flight); 1025 complete_journal_op(comp); 1026 } 1027 1028 static void encrypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned int section, 1029 unsigned int n_sections, struct journal_completion *comp) 1030 { 1031 if (ic->journal_xor) 1032 return xor_journal(ic, encrypt, section, n_sections, comp); 1033 else 1034 return crypt_journal(ic, encrypt, section, n_sections, comp); 1035 } 1036 1037 static void complete_journal_io(unsigned long error, void *context) 1038 { 1039 struct journal_completion *comp = context; 1040 1041 if (unlikely(error != 0)) 1042 dm_integrity_io_error(comp->ic, "writing journal", -EIO); 1043 complete_journal_op(comp); 1044 } 1045 1046 static void rw_journal_sectors(struct dm_integrity_c *ic, blk_opf_t opf, 1047 unsigned int sector, unsigned int n_sectors, 1048 struct journal_completion *comp) 1049 { 1050 struct dm_io_request io_req; 1051 struct dm_io_region io_loc; 1052 unsigned int pl_index, pl_offset; 1053 int r; 1054 1055 if (unlikely(dm_integrity_failed(ic))) { 1056 if (comp) 1057 complete_journal_io(-1UL, comp); 1058 return; 1059 } 1060 1061 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 1062 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 1063 1064 io_req.bi_opf = opf; 1065 io_req.mem.type = DM_IO_PAGE_LIST; 1066 if (ic->journal_io) 1067 io_req.mem.ptr.pl = &ic->journal_io[pl_index]; 1068 else 1069 io_req.mem.ptr.pl = &ic->journal[pl_index]; 1070 io_req.mem.offset = pl_offset; 1071 if (likely(comp != NULL)) { 1072 io_req.notify.fn = complete_journal_io; 1073 io_req.notify.context = comp; 1074 } else { 1075 io_req.notify.fn = NULL; 1076 } 1077 io_req.client = ic->io; 1078 io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev; 1079 io_loc.sector = ic->start + SB_SECTORS + sector; 1080 io_loc.count = n_sectors; 1081 1082 r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); 1083 if (unlikely(r)) { 1084 dm_integrity_io_error(ic, (opf & REQ_OP_MASK) == REQ_OP_READ ? 1085 "reading journal" : "writing journal", r); 1086 if (comp) { 1087 WARN_ONCE(1, "asynchronous dm_io failed: %d", r); 1088 complete_journal_io(-1UL, comp); 1089 } 1090 } 1091 } 1092 1093 static void rw_journal(struct dm_integrity_c *ic, blk_opf_t opf, 1094 unsigned int section, unsigned int n_sections, 1095 struct journal_completion *comp) 1096 { 1097 unsigned int sector, n_sectors; 1098 1099 sector = section * ic->journal_section_sectors; 1100 n_sectors = n_sections * ic->journal_section_sectors; 1101 1102 rw_journal_sectors(ic, opf, sector, n_sectors, comp); 1103 } 1104 1105 static void write_journal(struct dm_integrity_c *ic, unsigned int commit_start, unsigned int commit_sections) 1106 { 1107 struct journal_completion io_comp; 1108 struct journal_completion crypt_comp_1; 1109 struct journal_completion crypt_comp_2; 1110 unsigned int i; 1111 1112 io_comp.ic = ic; 1113 init_completion(&io_comp.comp); 1114 1115 if (commit_start + commit_sections <= ic->journal_sections) { 1116 io_comp.in_flight = (atomic_t)ATOMIC_INIT(1); 1117 if (ic->journal_io) { 1118 crypt_comp_1.ic = ic; 1119 init_completion(&crypt_comp_1.comp); 1120 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); 1121 encrypt_journal(ic, true, commit_start, commit_sections, &crypt_comp_1); 1122 wait_for_completion_io(&crypt_comp_1.comp); 1123 } else { 1124 for (i = 0; i < commit_sections; i++) 1125 rw_section_mac(ic, commit_start + i, true); 1126 } 1127 rw_journal(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, commit_start, 1128 commit_sections, &io_comp); 1129 } else { 1130 unsigned int to_end; 1131 1132 io_comp.in_flight = (atomic_t)ATOMIC_INIT(2); 1133 to_end = ic->journal_sections - commit_start; 1134 if (ic->journal_io) { 1135 crypt_comp_1.ic = ic; 1136 init_completion(&crypt_comp_1.comp); 1137 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); 1138 encrypt_journal(ic, true, commit_start, to_end, &crypt_comp_1); 1139 if (try_wait_for_completion(&crypt_comp_1.comp)) { 1140 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, 1141 commit_start, to_end, &io_comp); 1142 reinit_completion(&crypt_comp_1.comp); 1143 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); 1144 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_1); 1145 wait_for_completion_io(&crypt_comp_1.comp); 1146 } else { 1147 crypt_comp_2.ic = ic; 1148 init_completion(&crypt_comp_2.comp); 1149 crypt_comp_2.in_flight = (atomic_t)ATOMIC_INIT(0); 1150 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_2); 1151 wait_for_completion_io(&crypt_comp_1.comp); 1152 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp); 1153 wait_for_completion_io(&crypt_comp_2.comp); 1154 } 1155 } else { 1156 for (i = 0; i < to_end; i++) 1157 rw_section_mac(ic, commit_start + i, true); 1158 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp); 1159 for (i = 0; i < commit_sections - to_end; i++) 1160 rw_section_mac(ic, i, true); 1161 } 1162 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, 0, commit_sections - to_end, &io_comp); 1163 } 1164 1165 wait_for_completion_io(&io_comp.comp); 1166 } 1167 1168 static void copy_from_journal(struct dm_integrity_c *ic, unsigned int section, unsigned int offset, 1169 unsigned int n_sectors, sector_t target, io_notify_fn fn, void *data) 1170 { 1171 struct dm_io_request io_req; 1172 struct dm_io_region io_loc; 1173 int r; 1174 unsigned int sector, pl_index, pl_offset; 1175 1176 BUG_ON((target | n_sectors | offset) & (unsigned int)(ic->sectors_per_block - 1)); 1177 1178 if (unlikely(dm_integrity_failed(ic))) { 1179 fn(-1UL, data); 1180 return; 1181 } 1182 1183 sector = section * ic->journal_section_sectors + JOURNAL_BLOCK_SECTORS + offset; 1184 1185 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 1186 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 1187 1188 io_req.bi_opf = REQ_OP_WRITE; 1189 io_req.mem.type = DM_IO_PAGE_LIST; 1190 io_req.mem.ptr.pl = &ic->journal[pl_index]; 1191 io_req.mem.offset = pl_offset; 1192 io_req.notify.fn = fn; 1193 io_req.notify.context = data; 1194 io_req.client = ic->io; 1195 io_loc.bdev = ic->dev->bdev; 1196 io_loc.sector = target; 1197 io_loc.count = n_sectors; 1198 1199 r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); 1200 if (unlikely(r)) { 1201 WARN_ONCE(1, "asynchronous dm_io failed: %d", r); 1202 fn(-1UL, data); 1203 } 1204 } 1205 1206 static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2) 1207 { 1208 return range1->logical_sector < range2->logical_sector + range2->n_sectors && 1209 range1->logical_sector + range1->n_sectors > range2->logical_sector; 1210 } 1211 1212 static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting) 1213 { 1214 struct rb_node **n = &ic->in_progress.rb_node; 1215 struct rb_node *parent; 1216 1217 BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned int)(ic->sectors_per_block - 1)); 1218 1219 if (likely(check_waiting)) { 1220 struct dm_integrity_range *range; 1221 1222 list_for_each_entry(range, &ic->wait_list, wait_entry) { 1223 if (unlikely(ranges_overlap(range, new_range))) 1224 return false; 1225 } 1226 } 1227 1228 parent = NULL; 1229 1230 while (*n) { 1231 struct dm_integrity_range *range = container_of(*n, struct dm_integrity_range, node); 1232 1233 parent = *n; 1234 if (new_range->logical_sector + new_range->n_sectors <= range->logical_sector) 1235 n = &range->node.rb_left; 1236 else if (new_range->logical_sector >= range->logical_sector + range->n_sectors) 1237 n = &range->node.rb_right; 1238 else 1239 return false; 1240 } 1241 1242 rb_link_node(&new_range->node, parent, n); 1243 rb_insert_color(&new_range->node, &ic->in_progress); 1244 1245 return true; 1246 } 1247 1248 static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity_range *range) 1249 { 1250 rb_erase(&range->node, &ic->in_progress); 1251 while (unlikely(!list_empty(&ic->wait_list))) { 1252 struct dm_integrity_range *last_range = 1253 list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry); 1254 struct task_struct *last_range_task; 1255 1256 last_range_task = last_range->task; 1257 list_del(&last_range->wait_entry); 1258 if (!add_new_range(ic, last_range, false)) { 1259 last_range->task = last_range_task; 1260 list_add(&last_range->wait_entry, &ic->wait_list); 1261 break; 1262 } 1263 last_range->waiting = false; 1264 wake_up_process(last_range_task); 1265 } 1266 } 1267 1268 static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *range) 1269 { 1270 unsigned long flags; 1271 1272 spin_lock_irqsave(&ic->endio_wait.lock, flags); 1273 remove_range_unlocked(ic, range); 1274 spin_unlock_irqrestore(&ic->endio_wait.lock, flags); 1275 } 1276 1277 static void wait_and_add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range) 1278 { 1279 new_range->waiting = true; 1280 list_add_tail(&new_range->wait_entry, &ic->wait_list); 1281 new_range->task = current; 1282 do { 1283 __set_current_state(TASK_UNINTERRUPTIBLE); 1284 spin_unlock_irq(&ic->endio_wait.lock); 1285 io_schedule(); 1286 spin_lock_irq(&ic->endio_wait.lock); 1287 } while (unlikely(new_range->waiting)); 1288 } 1289 1290 static void add_new_range_and_wait(struct dm_integrity_c *ic, struct dm_integrity_range *new_range) 1291 { 1292 if (unlikely(!add_new_range(ic, new_range, true))) 1293 wait_and_add_new_range(ic, new_range); 1294 } 1295 1296 static void init_journal_node(struct journal_node *node) 1297 { 1298 RB_CLEAR_NODE(&node->node); 1299 node->sector = (sector_t)-1; 1300 } 1301 1302 static void add_journal_node(struct dm_integrity_c *ic, struct journal_node *node, sector_t sector) 1303 { 1304 struct rb_node **link; 1305 struct rb_node *parent; 1306 1307 node->sector = sector; 1308 BUG_ON(!RB_EMPTY_NODE(&node->node)); 1309 1310 link = &ic->journal_tree_root.rb_node; 1311 parent = NULL; 1312 1313 while (*link) { 1314 struct journal_node *j; 1315 1316 parent = *link; 1317 j = container_of(parent, struct journal_node, node); 1318 if (sector < j->sector) 1319 link = &j->node.rb_left; 1320 else 1321 link = &j->node.rb_right; 1322 } 1323 1324 rb_link_node(&node->node, parent, link); 1325 rb_insert_color(&node->node, &ic->journal_tree_root); 1326 } 1327 1328 static void remove_journal_node(struct dm_integrity_c *ic, struct journal_node *node) 1329 { 1330 BUG_ON(RB_EMPTY_NODE(&node->node)); 1331 rb_erase(&node->node, &ic->journal_tree_root); 1332 init_journal_node(node); 1333 } 1334 1335 #define NOT_FOUND (-1U) 1336 1337 static unsigned int find_journal_node(struct dm_integrity_c *ic, sector_t sector, sector_t *next_sector) 1338 { 1339 struct rb_node *n = ic->journal_tree_root.rb_node; 1340 unsigned int found = NOT_FOUND; 1341 1342 *next_sector = (sector_t)-1; 1343 while (n) { 1344 struct journal_node *j = container_of(n, struct journal_node, node); 1345 1346 if (sector == j->sector) 1347 found = j - ic->journal_tree; 1348 1349 if (sector < j->sector) { 1350 *next_sector = j->sector; 1351 n = j->node.rb_left; 1352 } else 1353 n = j->node.rb_right; 1354 } 1355 1356 return found; 1357 } 1358 1359 static bool test_journal_node(struct dm_integrity_c *ic, unsigned int pos, sector_t sector) 1360 { 1361 struct journal_node *node, *next_node; 1362 struct rb_node *next; 1363 1364 if (unlikely(pos >= ic->journal_entries)) 1365 return false; 1366 node = &ic->journal_tree[pos]; 1367 if (unlikely(RB_EMPTY_NODE(&node->node))) 1368 return false; 1369 if (unlikely(node->sector != sector)) 1370 return false; 1371 1372 next = rb_next(&node->node); 1373 if (unlikely(!next)) 1374 return true; 1375 1376 next_node = container_of(next, struct journal_node, node); 1377 return next_node->sector != sector; 1378 } 1379 1380 static bool find_newer_committed_node(struct dm_integrity_c *ic, struct journal_node *node) 1381 { 1382 struct rb_node *next; 1383 struct journal_node *next_node; 1384 unsigned int next_section; 1385 1386 BUG_ON(RB_EMPTY_NODE(&node->node)); 1387 1388 next = rb_next(&node->node); 1389 if (unlikely(!next)) 1390 return false; 1391 1392 next_node = container_of(next, struct journal_node, node); 1393 1394 if (next_node->sector != node->sector) 1395 return false; 1396 1397 next_section = (unsigned int)(next_node - ic->journal_tree) / ic->journal_section_entries; 1398 if (next_section >= ic->committed_section && 1399 next_section < ic->committed_section + ic->n_committed_sections) 1400 return true; 1401 if (next_section + ic->journal_sections < ic->committed_section + ic->n_committed_sections) 1402 return true; 1403 1404 return false; 1405 } 1406 1407 #define TAG_READ 0 1408 #define TAG_WRITE 1 1409 #define TAG_CMP 2 1410 1411 static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block, 1412 unsigned int *metadata_offset, unsigned int total_size, int op) 1413 { 1414 unsigned int hash_offset = 0; 1415 unsigned char mismatch_hash = 0; 1416 unsigned char mismatch_filler = !ic->discard; 1417 1418 do { 1419 unsigned char *data, *dp; 1420 struct dm_buffer *b; 1421 unsigned int to_copy; 1422 int r; 1423 1424 r = dm_integrity_failed(ic); 1425 if (unlikely(r)) 1426 return r; 1427 1428 data = dm_bufio_read(ic->bufio, *metadata_block, &b); 1429 if (IS_ERR(data)) 1430 return PTR_ERR(data); 1431 1432 to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size); 1433 dp = data + *metadata_offset; 1434 if (op == TAG_READ) { 1435 memcpy(tag, dp, to_copy); 1436 } else if (op == TAG_WRITE) { 1437 if (crypto_memneq(dp, tag, to_copy)) { 1438 memcpy(dp, tag, to_copy); 1439 dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy); 1440 } 1441 } else { 1442 /* e.g.: op == TAG_CMP */ 1443 1444 if (likely(is_power_of_2(ic->tag_size))) { 1445 if (unlikely(crypto_memneq(dp, tag, to_copy))) 1446 goto thorough_test; 1447 } else { 1448 unsigned int i, ts; 1449 thorough_test: 1450 ts = total_size; 1451 1452 for (i = 0; i < to_copy; i++, ts--) { 1453 /* 1454 * Warning: the control flow must not be 1455 * dependent on match/mismatch of 1456 * individual bytes. 1457 */ 1458 mismatch_hash |= dp[i] ^ tag[i]; 1459 mismatch_filler |= dp[i] ^ DISCARD_FILLER; 1460 hash_offset++; 1461 if (unlikely(hash_offset == ic->tag_size)) { 1462 if (unlikely(mismatch_hash) && unlikely(mismatch_filler)) { 1463 dm_bufio_release(b); 1464 return ts; 1465 } 1466 hash_offset = 0; 1467 mismatch_hash = 0; 1468 mismatch_filler = !ic->discard; 1469 } 1470 } 1471 } 1472 } 1473 dm_bufio_release(b); 1474 1475 tag += to_copy; 1476 *metadata_offset += to_copy; 1477 if (unlikely(*metadata_offset == 1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) { 1478 (*metadata_block)++; 1479 *metadata_offset = 0; 1480 } 1481 1482 if (unlikely(!is_power_of_2(ic->tag_size))) 1483 hash_offset = (hash_offset + to_copy) % ic->tag_size; 1484 1485 total_size -= to_copy; 1486 } while (unlikely(total_size)); 1487 1488 return 0; 1489 } 1490 1491 struct flush_request { 1492 struct dm_io_request io_req; 1493 struct dm_io_region io_reg; 1494 struct dm_integrity_c *ic; 1495 struct completion comp; 1496 }; 1497 1498 static void flush_notify(unsigned long error, void *fr_) 1499 { 1500 struct flush_request *fr = fr_; 1501 1502 if (unlikely(error != 0)) 1503 dm_integrity_io_error(fr->ic, "flushing disk cache", -EIO); 1504 complete(&fr->comp); 1505 } 1506 1507 static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data) 1508 { 1509 int r; 1510 struct flush_request fr; 1511 1512 if (!ic->meta_dev) 1513 flush_data = false; 1514 if (flush_data) { 1515 fr.io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; 1516 fr.io_req.mem.type = DM_IO_KMEM; 1517 fr.io_req.mem.ptr.addr = NULL; 1518 fr.io_req.notify.fn = flush_notify; 1519 fr.io_req.notify.context = &fr; 1520 fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio); 1521 fr.io_reg.bdev = ic->dev->bdev; 1522 fr.io_reg.sector = 0; 1523 fr.io_reg.count = 0; 1524 fr.ic = ic; 1525 init_completion(&fr.comp); 1526 r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL, IOPRIO_DEFAULT); 1527 BUG_ON(r); 1528 } 1529 1530 r = dm_bufio_write_dirty_buffers(ic->bufio); 1531 if (unlikely(r)) 1532 dm_integrity_io_error(ic, "writing tags", r); 1533 1534 if (flush_data) 1535 wait_for_completion(&fr.comp); 1536 } 1537 1538 static void sleep_on_endio_wait(struct dm_integrity_c *ic) 1539 { 1540 DECLARE_WAITQUEUE(wait, current); 1541 1542 __add_wait_queue(&ic->endio_wait, &wait); 1543 __set_current_state(TASK_UNINTERRUPTIBLE); 1544 spin_unlock_irq(&ic->endio_wait.lock); 1545 io_schedule(); 1546 spin_lock_irq(&ic->endio_wait.lock); 1547 __remove_wait_queue(&ic->endio_wait, &wait); 1548 } 1549 1550 static void autocommit_fn(struct timer_list *t) 1551 { 1552 struct dm_integrity_c *ic = timer_container_of(ic, t, 1553 autocommit_timer); 1554 1555 if (likely(!dm_integrity_failed(ic))) 1556 queue_work(ic->commit_wq, &ic->commit_work); 1557 } 1558 1559 static void schedule_autocommit(struct dm_integrity_c *ic) 1560 { 1561 if (!timer_pending(&ic->autocommit_timer)) 1562 mod_timer(&ic->autocommit_timer, jiffies + ic->autocommit_jiffies); 1563 } 1564 1565 static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio) 1566 { 1567 struct bio *bio; 1568 unsigned long flags; 1569 1570 spin_lock_irqsave(&ic->endio_wait.lock, flags); 1571 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1572 bio_list_add(&ic->flush_bio_list, bio); 1573 spin_unlock_irqrestore(&ic->endio_wait.lock, flags); 1574 1575 queue_work(ic->commit_wq, &ic->commit_work); 1576 } 1577 1578 static void do_endio(struct dm_integrity_c *ic, struct bio *bio) 1579 { 1580 int r; 1581 1582 r = dm_integrity_failed(ic); 1583 if (unlikely(r) && !bio->bi_status) 1584 bio->bi_status = errno_to_blk_status(r); 1585 if (unlikely(ic->synchronous_mode) && bio_op(bio) == REQ_OP_WRITE) { 1586 unsigned long flags; 1587 1588 spin_lock_irqsave(&ic->endio_wait.lock, flags); 1589 bio_list_add(&ic->synchronous_bios, bio); 1590 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0); 1591 spin_unlock_irqrestore(&ic->endio_wait.lock, flags); 1592 return; 1593 } 1594 bio_endio(bio); 1595 } 1596 1597 static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *dio) 1598 { 1599 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1600 1601 if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic))) 1602 submit_flush_bio(ic, dio); 1603 else 1604 do_endio(ic, bio); 1605 } 1606 1607 static void dec_in_flight(struct dm_integrity_io *dio) 1608 { 1609 if (atomic_dec_and_test(&dio->in_flight)) { 1610 struct dm_integrity_c *ic = dio->ic; 1611 struct bio *bio; 1612 1613 remove_range(ic, &dio->range); 1614 1615 if (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD)) 1616 schedule_autocommit(ic); 1617 1618 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1619 if (unlikely(dio->bi_status) && !bio->bi_status) 1620 bio->bi_status = dio->bi_status; 1621 if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) { 1622 dio->range.logical_sector += dio->range.n_sectors; 1623 bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT); 1624 INIT_WORK(&dio->work, integrity_bio_wait); 1625 queue_work(ic->offload_wq, &dio->work); 1626 return; 1627 } 1628 do_endio_flush(ic, dio); 1629 } 1630 } 1631 1632 static void integrity_end_io(struct bio *bio) 1633 { 1634 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 1635 1636 dm_bio_restore(&dio->bio_details, bio); 1637 if (bio->bi_integrity) 1638 bio->bi_opf |= REQ_INTEGRITY; 1639 1640 if (dio->completion) 1641 complete(dio->completion); 1642 1643 dec_in_flight(dio); 1644 } 1645 1646 static void integrity_sector_checksum_shash(struct dm_integrity_c *ic, sector_t sector, 1647 const char *data, unsigned offset, char *result) 1648 { 1649 __le64 sector_le = cpu_to_le64(sector); 1650 SHASH_DESC_ON_STACK(req, ic->internal_shash); 1651 int r; 1652 unsigned int digest_size; 1653 1654 req->tfm = ic->internal_shash; 1655 1656 r = crypto_shash_init(req); 1657 if (unlikely(r < 0)) { 1658 dm_integrity_io_error(ic, "crypto_shash_init", r); 1659 goto failed; 1660 } 1661 1662 if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) { 1663 r = crypto_shash_update(req, (__u8 *)&ic->sb->salt, SALT_SIZE); 1664 if (unlikely(r < 0)) { 1665 dm_integrity_io_error(ic, "crypto_shash_update", r); 1666 goto failed; 1667 } 1668 } 1669 1670 r = crypto_shash_update(req, (const __u8 *)§or_le, sizeof(sector_le)); 1671 if (unlikely(r < 0)) { 1672 dm_integrity_io_error(ic, "crypto_shash_update", r); 1673 goto failed; 1674 } 1675 1676 r = crypto_shash_update(req, data + offset, ic->sectors_per_block << SECTOR_SHIFT); 1677 if (unlikely(r < 0)) { 1678 dm_integrity_io_error(ic, "crypto_shash_update", r); 1679 goto failed; 1680 } 1681 1682 r = crypto_shash_final(req, result); 1683 if (unlikely(r < 0)) { 1684 dm_integrity_io_error(ic, "crypto_shash_final", r); 1685 goto failed; 1686 } 1687 1688 digest_size = ic->internal_hash_digestsize; 1689 if (unlikely(digest_size < ic->tag_size)) 1690 memset(result + digest_size, 0, ic->tag_size - digest_size); 1691 1692 return; 1693 1694 failed: 1695 /* this shouldn't happen anyway, the hash functions have no reason to fail */ 1696 get_random_bytes(result, ic->tag_size); 1697 } 1698 1699 static void integrity_sector_checksum_ahash(struct dm_integrity_c *ic, struct ahash_request **ahash_req, 1700 sector_t sector, struct page *page, unsigned offset, char *result) 1701 { 1702 __le64 sector_le = cpu_to_le64(sector); 1703 struct ahash_request *req; 1704 DECLARE_CRYPTO_WAIT(wait); 1705 struct scatterlist sg[3], *s = sg; 1706 int r; 1707 unsigned int digest_size; 1708 unsigned int nbytes = 0; 1709 1710 might_sleep(); 1711 1712 req = *ahash_req; 1713 if (unlikely(!req)) { 1714 req = mempool_alloc(&ic->ahash_req_pool, GFP_NOIO); 1715 *ahash_req = req; 1716 } 1717 1718 ahash_request_set_tfm(req, ic->internal_ahash); 1719 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); 1720 1721 if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) { 1722 sg_init_table(sg, 3); 1723 sg_set_buf(s, (const __u8 *)&ic->sb->salt, SALT_SIZE); 1724 nbytes += SALT_SIZE; 1725 s++; 1726 } else { 1727 sg_init_table(sg, 2); 1728 } 1729 1730 if (likely(!is_vmalloc_addr(§or_le))) { 1731 sg_set_buf(s, §or_le, sizeof(sector_le)); 1732 } else { 1733 struct page *sec_page = vmalloc_to_page(§or_le); 1734 unsigned int sec_off = offset_in_page(§or_le); 1735 sg_set_page(s, sec_page, sizeof(sector_le), sec_off); 1736 } 1737 nbytes += sizeof(sector_le); 1738 s++; 1739 1740 sg_set_page(s, page, ic->sectors_per_block << SECTOR_SHIFT, offset); 1741 nbytes += ic->sectors_per_block << SECTOR_SHIFT; 1742 1743 ahash_request_set_crypt(req, sg, result, nbytes); 1744 1745 r = crypto_wait_req(crypto_ahash_digest(req), &wait); 1746 if (unlikely(r)) { 1747 dm_integrity_io_error(ic, "crypto_ahash_digest", r); 1748 goto failed; 1749 } 1750 1751 digest_size = ic->internal_hash_digestsize; 1752 if (unlikely(digest_size < ic->tag_size)) 1753 memset(result + digest_size, 0, ic->tag_size - digest_size); 1754 1755 return; 1756 1757 failed: 1758 /* this shouldn't happen anyway, the hash functions have no reason to fail */ 1759 get_random_bytes(result, ic->tag_size); 1760 } 1761 1762 static void integrity_sector_checksum(struct dm_integrity_c *ic, struct ahash_request **ahash_req, 1763 sector_t sector, const char *data, unsigned offset, char *result) 1764 { 1765 if (likely(ic->internal_shash != NULL)) 1766 integrity_sector_checksum_shash(ic, sector, data, offset, result); 1767 else 1768 integrity_sector_checksum_ahash(ic, ahash_req, sector, (struct page *)data, offset, result); 1769 } 1770 1771 static void *integrity_kmap(struct dm_integrity_c *ic, struct page *p) 1772 { 1773 if (likely(ic->internal_shash != NULL)) 1774 return kmap_local_page(p); 1775 else 1776 return p; 1777 } 1778 1779 static void integrity_kunmap(struct dm_integrity_c *ic, const void *ptr) 1780 { 1781 if (likely(ic->internal_shash != NULL)) 1782 kunmap_local(ptr); 1783 } 1784 1785 static void *integrity_identity(struct dm_integrity_c *ic, void *data) 1786 { 1787 #ifdef CONFIG_DEBUG_SG 1788 BUG_ON(offset_in_page(data)); 1789 BUG_ON(!virt_addr_valid(data)); 1790 #endif 1791 if (likely(ic->internal_shash != NULL)) 1792 return data; 1793 else 1794 return virt_to_page(data); 1795 } 1796 1797 static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checksum) 1798 { 1799 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1800 struct dm_integrity_c *ic = dio->ic; 1801 struct bvec_iter iter; 1802 struct bio_vec bv; 1803 sector_t sector, logical_sector, area, offset; 1804 struct page *page; 1805 1806 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); 1807 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, 1808 &dio->metadata_offset); 1809 sector = get_data_sector(ic, area, offset); 1810 logical_sector = dio->range.logical_sector; 1811 1812 page = mempool_alloc(&ic->recheck_pool, GFP_NOIO); 1813 1814 __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { 1815 unsigned pos = 0; 1816 1817 do { 1818 sector_t alignment; 1819 char *mem; 1820 char *buffer = page_to_virt(page); 1821 unsigned int buffer_offset; 1822 int r; 1823 struct dm_io_request io_req; 1824 struct dm_io_region io_loc; 1825 io_req.bi_opf = REQ_OP_READ; 1826 io_req.mem.type = DM_IO_KMEM; 1827 io_req.mem.ptr.addr = buffer; 1828 io_req.notify.fn = NULL; 1829 io_req.client = ic->io; 1830 io_loc.bdev = ic->dev->bdev; 1831 io_loc.sector = sector; 1832 io_loc.count = ic->sectors_per_block; 1833 1834 /* Align the bio to logical block size */ 1835 alignment = dio->range.logical_sector | bio_sectors(bio) | (PAGE_SIZE >> SECTOR_SHIFT); 1836 alignment &= -alignment; 1837 io_loc.sector = round_down(io_loc.sector, alignment); 1838 io_loc.count += sector - io_loc.sector; 1839 buffer_offset = (sector - io_loc.sector) << SECTOR_SHIFT; 1840 io_loc.count = round_up(io_loc.count, alignment); 1841 1842 r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); 1843 if (unlikely(r)) { 1844 dio->bi_status = errno_to_blk_status(r); 1845 goto free_ret; 1846 } 1847 1848 integrity_sector_checksum(ic, &dio->ahash_req, logical_sector, integrity_identity(ic, buffer), buffer_offset, checksum); 1849 r = dm_integrity_rw_tag(ic, checksum, &dio->metadata_block, 1850 &dio->metadata_offset, ic->tag_size, TAG_CMP); 1851 if (r) { 1852 if (r > 0) { 1853 DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", 1854 bio->bi_bdev, logical_sector); 1855 atomic64_inc(&ic->number_of_mismatches); 1856 dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", 1857 bio, logical_sector, 0); 1858 r = -EILSEQ; 1859 } 1860 dio->bi_status = errno_to_blk_status(r); 1861 goto free_ret; 1862 } 1863 1864 mem = bvec_kmap_local(&bv); 1865 memcpy(mem + pos, buffer + buffer_offset, ic->sectors_per_block << SECTOR_SHIFT); 1866 kunmap_local(mem); 1867 1868 pos += ic->sectors_per_block << SECTOR_SHIFT; 1869 sector += ic->sectors_per_block; 1870 logical_sector += ic->sectors_per_block; 1871 } while (pos < bv.bv_len); 1872 } 1873 free_ret: 1874 mempool_free(page, &ic->recheck_pool); 1875 } 1876 1877 static void integrity_metadata(struct work_struct *w) 1878 { 1879 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); 1880 struct dm_integrity_c *ic = dio->ic; 1881 1882 int r; 1883 1884 if (ic->internal_hash) { 1885 struct bvec_iter iter; 1886 struct bio_vec bv; 1887 unsigned int digest_size = ic->internal_hash_digestsize; 1888 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1889 char *checksums; 1890 unsigned int extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0; 1891 char checksums_onstack[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; 1892 sector_t sector; 1893 unsigned int sectors_to_process; 1894 1895 if (unlikely(ic->mode == 'R')) 1896 goto skip_io; 1897 1898 if (likely(dio->op != REQ_OP_DISCARD)) 1899 checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space, 1900 GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN); 1901 else 1902 checksums = kmalloc(PAGE_SIZE, GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN); 1903 if (!checksums) { 1904 checksums = checksums_onstack; 1905 if (WARN_ON(extra_space && 1906 digest_size > sizeof(checksums_onstack))) { 1907 r = -EINVAL; 1908 goto error; 1909 } 1910 } 1911 1912 if (unlikely(dio->op == REQ_OP_DISCARD)) { 1913 unsigned int bi_size = dio->bio_details.bi_iter.bi_size; 1914 unsigned int max_size = likely(checksums != checksums_onstack) ? PAGE_SIZE : HASH_MAX_DIGESTSIZE; 1915 unsigned int max_blocks = max_size / ic->tag_size; 1916 1917 memset(checksums, DISCARD_FILLER, max_size); 1918 1919 while (bi_size) { 1920 unsigned int this_step_blocks = bi_size >> (SECTOR_SHIFT + ic->sb->log2_sectors_per_block); 1921 1922 this_step_blocks = min(this_step_blocks, max_blocks); 1923 r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, 1924 this_step_blocks * ic->tag_size, TAG_WRITE); 1925 if (unlikely(r)) { 1926 if (likely(checksums != checksums_onstack)) 1927 kfree(checksums); 1928 goto error; 1929 } 1930 1931 bi_size -= this_step_blocks << (SECTOR_SHIFT + ic->sb->log2_sectors_per_block); 1932 } 1933 1934 if (likely(checksums != checksums_onstack)) 1935 kfree(checksums); 1936 goto skip_io; 1937 } 1938 1939 sector = dio->range.logical_sector; 1940 sectors_to_process = dio->range.n_sectors; 1941 1942 __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { 1943 struct bio_vec bv_copy = bv; 1944 unsigned int pos; 1945 char *mem, *checksums_ptr; 1946 1947 again: 1948 mem = integrity_kmap(ic, bv_copy.bv_page); 1949 pos = 0; 1950 checksums_ptr = checksums; 1951 do { 1952 integrity_sector_checksum(ic, &dio->ahash_req, sector, mem, bv_copy.bv_offset + pos, checksums_ptr); 1953 checksums_ptr += ic->tag_size; 1954 sectors_to_process -= ic->sectors_per_block; 1955 pos += ic->sectors_per_block << SECTOR_SHIFT; 1956 sector += ic->sectors_per_block; 1957 } while (pos < bv_copy.bv_len && sectors_to_process && checksums != checksums_onstack); 1958 integrity_kunmap(ic, mem); 1959 1960 r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, 1961 checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); 1962 if (unlikely(r)) { 1963 if (likely(checksums != checksums_onstack)) 1964 kfree(checksums); 1965 if (r > 0) { 1966 integrity_recheck(dio, checksums_onstack); 1967 goto skip_io; 1968 } 1969 goto error; 1970 } 1971 1972 if (!sectors_to_process) 1973 break; 1974 1975 if (unlikely(pos < bv_copy.bv_len)) { 1976 bv_copy.bv_offset += pos; 1977 bv_copy.bv_len -= pos; 1978 goto again; 1979 } 1980 } 1981 1982 if (likely(checksums != checksums_onstack)) 1983 kfree(checksums); 1984 } else { 1985 struct bio_integrity_payload *bip = dio->bio_details.bi_integrity; 1986 1987 if (bip) { 1988 struct bio_vec biv; 1989 struct bvec_iter iter; 1990 unsigned int data_to_process = dio->range.n_sectors; 1991 1992 sector_to_block(ic, data_to_process); 1993 data_to_process *= ic->tag_size; 1994 1995 bip_for_each_vec(biv, bip, iter) { 1996 unsigned char *tag; 1997 unsigned int this_len; 1998 1999 BUG_ON(PageHighMem(biv.bv_page)); 2000 tag = bvec_virt(&biv); 2001 this_len = min(biv.bv_len, data_to_process); 2002 r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset, 2003 this_len, dio->op == REQ_OP_READ ? TAG_READ : TAG_WRITE); 2004 if (unlikely(r)) 2005 goto error; 2006 data_to_process -= this_len; 2007 if (!data_to_process) 2008 break; 2009 } 2010 } 2011 } 2012 skip_io: 2013 dec_in_flight(dio); 2014 return; 2015 error: 2016 dio->bi_status = errno_to_blk_status(r); 2017 dec_in_flight(dio); 2018 } 2019 2020 static inline bool dm_integrity_check_limits(struct dm_integrity_c *ic, sector_t logical_sector, struct bio *bio) 2021 { 2022 if (unlikely(logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) { 2023 DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx", 2024 logical_sector, bio_sectors(bio), 2025 ic->provided_data_sectors); 2026 return false; 2027 } 2028 if (unlikely((logical_sector | bio_sectors(bio)) & (unsigned int)(ic->sectors_per_block - 1))) { 2029 DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x", 2030 ic->sectors_per_block, 2031 logical_sector, bio_sectors(bio)); 2032 return false; 2033 } 2034 if (ic->sectors_per_block > 1 && likely(bio_op(bio) != REQ_OP_DISCARD)) { 2035 struct bvec_iter iter; 2036 struct bio_vec bv; 2037 2038 bio_for_each_segment(bv, bio, iter) { 2039 if (unlikely(bv.bv_len & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) { 2040 DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary", 2041 bv.bv_offset, bv.bv_len, ic->sectors_per_block); 2042 return false; 2043 } 2044 } 2045 } 2046 return true; 2047 } 2048 2049 static int dm_integrity_map(struct dm_target *ti, struct bio *bio) 2050 { 2051 struct dm_integrity_c *ic = ti->private; 2052 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 2053 struct bio_integrity_payload *bip; 2054 2055 sector_t area, offset; 2056 2057 dio->ic = ic; 2058 dio->bi_status = 0; 2059 dio->op = bio_op(bio); 2060 dio->ahash_req = NULL; 2061 2062 if (ic->mode == 'I') { 2063 bio->bi_iter.bi_sector = dm_target_offset(ic->ti, bio->bi_iter.bi_sector); 2064 dio->integrity_payload = NULL; 2065 dio->integrity_payload_from_mempool = false; 2066 dio->integrity_range_locked = false; 2067 return dm_integrity_map_inline(dio, true); 2068 } 2069 2070 if (unlikely(dio->op == REQ_OP_DISCARD)) { 2071 if (ti->max_io_len) { 2072 sector_t sec = dm_target_offset(ti, bio->bi_iter.bi_sector); 2073 unsigned int log2_max_io_len = __fls(ti->max_io_len); 2074 sector_t start_boundary = sec >> log2_max_io_len; 2075 sector_t end_boundary = (sec + bio_sectors(bio) - 1) >> log2_max_io_len; 2076 2077 if (start_boundary < end_boundary) { 2078 sector_t len = ti->max_io_len - (sec & (ti->max_io_len - 1)); 2079 2080 dm_accept_partial_bio(bio, len); 2081 } 2082 } 2083 } 2084 2085 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { 2086 submit_flush_bio(ic, dio); 2087 return DM_MAPIO_SUBMITTED; 2088 } 2089 2090 dio->range.logical_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); 2091 dio->fua = dio->op == REQ_OP_WRITE && bio->bi_opf & REQ_FUA; 2092 if (unlikely(dio->fua)) { 2093 /* 2094 * Don't pass down the FUA flag because we have to flush 2095 * disk cache anyway. 2096 */ 2097 bio->bi_opf &= ~REQ_FUA; 2098 } 2099 if (unlikely(!dm_integrity_check_limits(ic, dio->range.logical_sector, bio))) 2100 return DM_MAPIO_KILL; 2101 2102 bip = bio_integrity(bio); 2103 if (!ic->internal_hash) { 2104 if (bip) { 2105 unsigned int wanted_tag_size = bio_sectors(bio) >> ic->sb->log2_sectors_per_block; 2106 2107 if (ic->log2_tag_size >= 0) 2108 wanted_tag_size <<= ic->log2_tag_size; 2109 else 2110 wanted_tag_size *= ic->tag_size; 2111 if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) { 2112 DMERR("Invalid integrity data size %u, expected %u", 2113 bip->bip_iter.bi_size, wanted_tag_size); 2114 return DM_MAPIO_KILL; 2115 } 2116 } 2117 } else { 2118 if (unlikely(bip != NULL)) { 2119 DMERR("Unexpected integrity data when using internal hash"); 2120 return DM_MAPIO_KILL; 2121 } 2122 } 2123 2124 if (unlikely(ic->mode == 'R') && unlikely(dio->op != REQ_OP_READ)) 2125 return DM_MAPIO_KILL; 2126 2127 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); 2128 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset); 2129 bio->bi_iter.bi_sector = get_data_sector(ic, area, offset); 2130 2131 dm_integrity_map_continue(dio, true); 2132 return DM_MAPIO_SUBMITTED; 2133 } 2134 2135 static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio, 2136 unsigned int journal_section, unsigned int journal_entry) 2137 { 2138 struct dm_integrity_c *ic = dio->ic; 2139 sector_t logical_sector; 2140 unsigned int n_sectors; 2141 2142 logical_sector = dio->range.logical_sector; 2143 n_sectors = dio->range.n_sectors; 2144 do { 2145 struct bio_vec bv = bio_iovec(bio); 2146 char *mem; 2147 2148 if (unlikely(bv.bv_len >> SECTOR_SHIFT > n_sectors)) 2149 bv.bv_len = n_sectors << SECTOR_SHIFT; 2150 n_sectors -= bv.bv_len >> SECTOR_SHIFT; 2151 bio_advance_iter(bio, &bio->bi_iter, bv.bv_len); 2152 retry_kmap: 2153 mem = kmap_local_page(bv.bv_page); 2154 if (likely(dio->op == REQ_OP_WRITE)) 2155 flush_dcache_page(bv.bv_page); 2156 2157 do { 2158 struct journal_entry *je = access_journal_entry(ic, journal_section, journal_entry); 2159 2160 if (unlikely(dio->op == REQ_OP_READ)) { 2161 struct journal_sector *js; 2162 char *mem_ptr; 2163 unsigned int s; 2164 2165 if (unlikely(journal_entry_is_inprogress(je))) { 2166 flush_dcache_page(bv.bv_page); 2167 kunmap_local(mem); 2168 2169 __io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je)); 2170 goto retry_kmap; 2171 } 2172 smp_rmb(); 2173 BUG_ON(journal_entry_get_sector(je) != logical_sector); 2174 js = access_journal_data(ic, journal_section, journal_entry); 2175 mem_ptr = mem + bv.bv_offset; 2176 s = 0; 2177 do { 2178 memcpy(mem_ptr, js, JOURNAL_SECTOR_DATA); 2179 *(commit_id_t *)(mem_ptr + JOURNAL_SECTOR_DATA) = je->last_bytes[s]; 2180 js++; 2181 mem_ptr += 1 << SECTOR_SHIFT; 2182 } while (++s < ic->sectors_per_block); 2183 } 2184 2185 if (!ic->internal_hash) { 2186 struct bio_integrity_payload *bip = bio_integrity(bio); 2187 unsigned int tag_todo = ic->tag_size; 2188 char *tag_ptr = journal_entry_tag(ic, je); 2189 2190 if (bip) { 2191 do { 2192 struct bio_vec biv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); 2193 unsigned int tag_now = min(biv.bv_len, tag_todo); 2194 char *tag_addr; 2195 2196 BUG_ON(PageHighMem(biv.bv_page)); 2197 tag_addr = bvec_virt(&biv); 2198 if (likely(dio->op == REQ_OP_WRITE)) 2199 memcpy(tag_ptr, tag_addr, tag_now); 2200 else 2201 memcpy(tag_addr, tag_ptr, tag_now); 2202 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, tag_now); 2203 tag_ptr += tag_now; 2204 tag_todo -= tag_now; 2205 } while (unlikely(tag_todo)); 2206 } else if (likely(dio->op == REQ_OP_WRITE)) 2207 memset(tag_ptr, 0, tag_todo); 2208 } 2209 2210 if (likely(dio->op == REQ_OP_WRITE)) { 2211 struct journal_sector *js; 2212 unsigned int s; 2213 2214 js = access_journal_data(ic, journal_section, journal_entry); 2215 memcpy(js, mem + bv.bv_offset, ic->sectors_per_block << SECTOR_SHIFT); 2216 2217 s = 0; 2218 do { 2219 je->last_bytes[s] = js[s].commit_id; 2220 } while (++s < ic->sectors_per_block); 2221 2222 if (ic->internal_hash) { 2223 unsigned int digest_size = ic->internal_hash_digestsize; 2224 void *js_page = integrity_identity(ic, (char *)js - offset_in_page(js)); 2225 unsigned js_offset = offset_in_page(js); 2226 2227 if (unlikely(digest_size > ic->tag_size)) { 2228 char checksums_onstack[HASH_MAX_DIGESTSIZE]; 2229 2230 integrity_sector_checksum(ic, &dio->ahash_req, logical_sector, js_page, js_offset, checksums_onstack); 2231 memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size); 2232 } else 2233 integrity_sector_checksum(ic, &dio->ahash_req, logical_sector, js_page, js_offset, journal_entry_tag(ic, je)); 2234 } 2235 2236 journal_entry_set_sector(je, logical_sector); 2237 } 2238 logical_sector += ic->sectors_per_block; 2239 2240 journal_entry++; 2241 if (unlikely(journal_entry == ic->journal_section_entries)) { 2242 journal_entry = 0; 2243 journal_section++; 2244 wraparound_section(ic, &journal_section); 2245 } 2246 2247 bv.bv_offset += ic->sectors_per_block << SECTOR_SHIFT; 2248 } while (bv.bv_len -= ic->sectors_per_block << SECTOR_SHIFT); 2249 2250 if (unlikely(dio->op == REQ_OP_READ)) 2251 flush_dcache_page(bv.bv_page); 2252 kunmap_local(mem); 2253 } while (n_sectors); 2254 2255 if (likely(dio->op == REQ_OP_WRITE)) { 2256 smp_mb(); 2257 if (unlikely(waitqueue_active(&ic->copy_to_journal_wait))) 2258 wake_up(&ic->copy_to_journal_wait); 2259 if (READ_ONCE(ic->free_sectors) <= ic->free_sectors_threshold) 2260 queue_work(ic->commit_wq, &ic->commit_work); 2261 else 2262 schedule_autocommit(ic); 2263 } else 2264 remove_range(ic, &dio->range); 2265 2266 if (unlikely(bio->bi_iter.bi_size)) { 2267 sector_t area, offset; 2268 2269 dio->range.logical_sector = logical_sector; 2270 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); 2271 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset); 2272 return true; 2273 } 2274 2275 return false; 2276 } 2277 2278 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map) 2279 { 2280 struct dm_integrity_c *ic = dio->ic; 2281 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 2282 unsigned int journal_section, journal_entry; 2283 unsigned int journal_read_pos; 2284 sector_t recalc_sector; 2285 struct completion read_comp; 2286 bool discard_retried = false; 2287 bool need_sync_io = ic->internal_hash && dio->op == REQ_OP_READ; 2288 2289 if (unlikely(dio->op == REQ_OP_DISCARD) && ic->mode != 'D') 2290 need_sync_io = true; 2291 2292 if (need_sync_io && from_map) { 2293 INIT_WORK(&dio->work, integrity_bio_wait); 2294 queue_work(ic->offload_wq, &dio->work); 2295 return; 2296 } 2297 2298 lock_retry: 2299 spin_lock_irq(&ic->endio_wait.lock); 2300 retry: 2301 if (unlikely(dm_integrity_failed(ic))) { 2302 spin_unlock_irq(&ic->endio_wait.lock); 2303 do_endio(ic, bio); 2304 return; 2305 } 2306 dio->range.n_sectors = bio_sectors(bio); 2307 journal_read_pos = NOT_FOUND; 2308 if (ic->mode == 'J' && likely(dio->op != REQ_OP_DISCARD)) { 2309 if (dio->op == REQ_OP_WRITE) { 2310 unsigned int next_entry, i, pos; 2311 unsigned int ws, we, range_sectors; 2312 2313 dio->range.n_sectors = min(dio->range.n_sectors, 2314 (sector_t)ic->free_sectors << ic->sb->log2_sectors_per_block); 2315 if (unlikely(!dio->range.n_sectors)) { 2316 if (from_map) 2317 goto offload_to_thread; 2318 sleep_on_endio_wait(ic); 2319 goto retry; 2320 } 2321 range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block; 2322 ic->free_sectors -= range_sectors; 2323 journal_section = ic->free_section; 2324 journal_entry = ic->free_section_entry; 2325 2326 next_entry = ic->free_section_entry + range_sectors; 2327 ic->free_section_entry = next_entry % ic->journal_section_entries; 2328 ic->free_section += next_entry / ic->journal_section_entries; 2329 ic->n_uncommitted_sections += next_entry / ic->journal_section_entries; 2330 wraparound_section(ic, &ic->free_section); 2331 2332 pos = journal_section * ic->journal_section_entries + journal_entry; 2333 ws = journal_section; 2334 we = journal_entry; 2335 i = 0; 2336 do { 2337 struct journal_entry *je; 2338 2339 add_journal_node(ic, &ic->journal_tree[pos], dio->range.logical_sector + i); 2340 pos++; 2341 if (unlikely(pos >= ic->journal_entries)) 2342 pos = 0; 2343 2344 je = access_journal_entry(ic, ws, we); 2345 BUG_ON(!journal_entry_is_unused(je)); 2346 journal_entry_set_inprogress(je); 2347 we++; 2348 if (unlikely(we == ic->journal_section_entries)) { 2349 we = 0; 2350 ws++; 2351 wraparound_section(ic, &ws); 2352 } 2353 } while ((i += ic->sectors_per_block) < dio->range.n_sectors); 2354 2355 spin_unlock_irq(&ic->endio_wait.lock); 2356 goto journal_read_write; 2357 } else { 2358 sector_t next_sector; 2359 2360 journal_read_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector); 2361 if (likely(journal_read_pos == NOT_FOUND)) { 2362 if (unlikely(dio->range.n_sectors > next_sector - dio->range.logical_sector)) 2363 dio->range.n_sectors = next_sector - dio->range.logical_sector; 2364 } else { 2365 unsigned int i; 2366 unsigned int jp = journal_read_pos + 1; 2367 2368 for (i = ic->sectors_per_block; i < dio->range.n_sectors; i += ic->sectors_per_block, jp++) { 2369 if (!test_journal_node(ic, jp, dio->range.logical_sector + i)) 2370 break; 2371 } 2372 dio->range.n_sectors = i; 2373 } 2374 } 2375 } 2376 if (unlikely(!add_new_range(ic, &dio->range, true))) { 2377 /* 2378 * We must not sleep in the request routine because it could 2379 * stall bios on current->bio_list. 2380 * So, we offload the bio to a workqueue if we have to sleep. 2381 */ 2382 if (from_map) { 2383 offload_to_thread: 2384 spin_unlock_irq(&ic->endio_wait.lock); 2385 INIT_WORK(&dio->work, integrity_bio_wait); 2386 queue_work(ic->wait_wq, &dio->work); 2387 return; 2388 } 2389 if (journal_read_pos != NOT_FOUND) 2390 dio->range.n_sectors = ic->sectors_per_block; 2391 wait_and_add_new_range(ic, &dio->range); 2392 /* 2393 * wait_and_add_new_range drops the spinlock, so the journal 2394 * may have been changed arbitrarily. We need to recheck. 2395 * To simplify the code, we restrict I/O size to just one block. 2396 */ 2397 if (journal_read_pos != NOT_FOUND) { 2398 sector_t next_sector; 2399 unsigned int new_pos; 2400 2401 new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector); 2402 if (unlikely(new_pos != journal_read_pos)) { 2403 remove_range_unlocked(ic, &dio->range); 2404 goto retry; 2405 } 2406 } 2407 } 2408 if (ic->mode == 'J' && likely(dio->op == REQ_OP_DISCARD) && !discard_retried) { 2409 sector_t next_sector; 2410 unsigned int new_pos; 2411 2412 new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector); 2413 if (unlikely(new_pos != NOT_FOUND) || 2414 unlikely(next_sector < dio->range.logical_sector - dio->range.n_sectors)) { 2415 remove_range_unlocked(ic, &dio->range); 2416 spin_unlock_irq(&ic->endio_wait.lock); 2417 queue_work(ic->commit_wq, &ic->commit_work); 2418 flush_workqueue(ic->commit_wq); 2419 queue_work(ic->writer_wq, &ic->writer_work); 2420 flush_workqueue(ic->writer_wq); 2421 discard_retried = true; 2422 goto lock_retry; 2423 } 2424 } 2425 recalc_sector = le64_to_cpu(ic->sb->recalc_sector); 2426 spin_unlock_irq(&ic->endio_wait.lock); 2427 2428 if (unlikely(journal_read_pos != NOT_FOUND)) { 2429 journal_section = journal_read_pos / ic->journal_section_entries; 2430 journal_entry = journal_read_pos % ic->journal_section_entries; 2431 goto journal_read_write; 2432 } 2433 2434 if (ic->mode == 'B' && (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD))) { 2435 if (!block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, 2436 dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) { 2437 struct bitmap_block_status *bbs; 2438 2439 bbs = sector_to_bitmap_block(ic, dio->range.logical_sector); 2440 spin_lock(&bbs->bio_queue_lock); 2441 bio_list_add(&bbs->bio_queue, bio); 2442 spin_unlock(&bbs->bio_queue_lock); 2443 queue_work(ic->writer_wq, &bbs->work); 2444 return; 2445 } 2446 } 2447 2448 dio->in_flight = (atomic_t)ATOMIC_INIT(2); 2449 2450 if (need_sync_io) { 2451 init_completion(&read_comp); 2452 dio->completion = &read_comp; 2453 } else 2454 dio->completion = NULL; 2455 2456 dm_bio_record(&dio->bio_details, bio); 2457 bio_set_dev(bio, ic->dev->bdev); 2458 bio->bi_integrity = NULL; 2459 bio->bi_opf &= ~REQ_INTEGRITY; 2460 bio->bi_end_io = integrity_end_io; 2461 bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT; 2462 2463 if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) { 2464 integrity_metadata(&dio->work); 2465 dm_integrity_flush_buffers(ic, false); 2466 2467 dio->in_flight = (atomic_t)ATOMIC_INIT(1); 2468 dio->completion = NULL; 2469 2470 submit_bio_noacct(bio); 2471 2472 return; 2473 } 2474 2475 submit_bio_noacct(bio); 2476 2477 if (need_sync_io) { 2478 wait_for_completion_io(&read_comp); 2479 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && 2480 dio->range.logical_sector + dio->range.n_sectors > recalc_sector) 2481 goto skip_check; 2482 if (ic->mode == 'B') { 2483 if (!block_bitmap_op(ic, ic->recalc_bitmap, dio->range.logical_sector, 2484 dio->range.n_sectors, BITMAP_OP_TEST_ALL_CLEAR)) 2485 goto skip_check; 2486 } 2487 2488 if (likely(!bio->bi_status)) 2489 integrity_metadata(&dio->work); 2490 else 2491 skip_check: 2492 dec_in_flight(dio); 2493 } else { 2494 INIT_WORK(&dio->work, integrity_metadata); 2495 queue_work(ic->metadata_wq, &dio->work); 2496 } 2497 2498 return; 2499 2500 journal_read_write: 2501 if (unlikely(__journal_read_write(dio, bio, journal_section, journal_entry))) 2502 goto lock_retry; 2503 2504 do_endio_flush(ic, dio); 2505 } 2506 2507 static int dm_integrity_map_inline(struct dm_integrity_io *dio, bool from_map) 2508 { 2509 struct dm_integrity_c *ic = dio->ic; 2510 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 2511 struct bio_integrity_payload *bip; 2512 unsigned ret; 2513 sector_t recalc_sector; 2514 2515 if (unlikely(bio_integrity(bio))) { 2516 bio->bi_status = BLK_STS_NOTSUPP; 2517 bio_endio(bio); 2518 return DM_MAPIO_SUBMITTED; 2519 } 2520 2521 bio_set_dev(bio, ic->dev->bdev); 2522 if (unlikely((bio->bi_opf & REQ_PREFLUSH) != 0)) 2523 return DM_MAPIO_REMAPPED; 2524 2525 retry: 2526 if (!dio->integrity_payload) { 2527 unsigned digest_size, extra_size; 2528 dio->payload_len = ic->tuple_size * (bio_sectors(bio) >> ic->sb->log2_sectors_per_block); 2529 digest_size = ic->internal_hash_digestsize; 2530 extra_size = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0; 2531 dio->payload_len += extra_size; 2532 dio->integrity_payload = kmalloc(dio->payload_len, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); 2533 if (unlikely(!dio->integrity_payload)) { 2534 const unsigned x_size = PAGE_SIZE << 1; 2535 if (dio->payload_len > x_size) { 2536 unsigned sectors = ((x_size - extra_size) / ic->tuple_size) << ic->sb->log2_sectors_per_block; 2537 if (WARN_ON(!sectors || sectors >= bio_sectors(bio))) { 2538 bio->bi_status = BLK_STS_NOTSUPP; 2539 bio_endio(bio); 2540 return DM_MAPIO_SUBMITTED; 2541 } 2542 dm_accept_partial_bio(bio, sectors); 2543 goto retry; 2544 } 2545 } 2546 } 2547 2548 dio->range.logical_sector = bio->bi_iter.bi_sector; 2549 dio->range.n_sectors = bio_sectors(bio); 2550 2551 if (!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) 2552 goto skip_spinlock; 2553 #ifdef CONFIG_64BIT 2554 /* 2555 * On 64-bit CPUs we can optimize the lock away (so that it won't cause 2556 * cache line bouncing) and use acquire/release barriers instead. 2557 * 2558 * Paired with smp_store_release in integrity_recalc_inline. 2559 */ 2560 recalc_sector = le64_to_cpu(smp_load_acquire(&ic->sb->recalc_sector)); 2561 if (likely(dio->range.logical_sector + dio->range.n_sectors <= recalc_sector)) 2562 goto skip_spinlock; 2563 #endif 2564 spin_lock_irq(&ic->endio_wait.lock); 2565 recalc_sector = le64_to_cpu(ic->sb->recalc_sector); 2566 if (dio->range.logical_sector + dio->range.n_sectors <= recalc_sector) 2567 goto skip_unlock; 2568 if (unlikely(!add_new_range(ic, &dio->range, true))) { 2569 if (from_map) { 2570 spin_unlock_irq(&ic->endio_wait.lock); 2571 INIT_WORK(&dio->work, integrity_bio_wait); 2572 queue_work(ic->wait_wq, &dio->work); 2573 return DM_MAPIO_SUBMITTED; 2574 } 2575 wait_and_add_new_range(ic, &dio->range); 2576 } 2577 dio->integrity_range_locked = true; 2578 skip_unlock: 2579 spin_unlock_irq(&ic->endio_wait.lock); 2580 skip_spinlock: 2581 2582 if (unlikely(!dio->integrity_payload)) { 2583 dio->integrity_payload = page_to_virt((struct page *)mempool_alloc(&ic->recheck_pool, GFP_NOIO)); 2584 dio->integrity_payload_from_mempool = true; 2585 } 2586 2587 dio->bio_details.bi_iter = bio->bi_iter; 2588 2589 if (unlikely(!dm_integrity_check_limits(ic, bio->bi_iter.bi_sector, bio))) { 2590 return DM_MAPIO_KILL; 2591 } 2592 2593 bio->bi_iter.bi_sector += ic->start + SB_SECTORS; 2594 2595 bip = bio_integrity_alloc(bio, GFP_NOIO, 1); 2596 if (IS_ERR(bip)) { 2597 bio->bi_status = errno_to_blk_status(PTR_ERR(bip)); 2598 bio_endio(bio); 2599 return DM_MAPIO_SUBMITTED; 2600 } 2601 2602 if (dio->op == REQ_OP_WRITE) { 2603 unsigned pos = 0; 2604 while (dio->bio_details.bi_iter.bi_size) { 2605 struct bio_vec bv = bio_iter_iovec(bio, dio->bio_details.bi_iter); 2606 const char *mem = integrity_kmap(ic, bv.bv_page); 2607 if (ic->tag_size < ic->tuple_size) 2608 memset(dio->integrity_payload + pos + ic->tag_size, 0, ic->tuple_size - ic->tuple_size); 2609 integrity_sector_checksum(ic, &dio->ahash_req, dio->bio_details.bi_iter.bi_sector, mem, bv.bv_offset, dio->integrity_payload + pos); 2610 integrity_kunmap(ic, mem); 2611 pos += ic->tuple_size; 2612 bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT); 2613 } 2614 } 2615 2616 ret = bio_integrity_add_page(bio, virt_to_page(dio->integrity_payload), 2617 dio->payload_len, offset_in_page(dio->integrity_payload)); 2618 if (unlikely(ret != dio->payload_len)) { 2619 bio->bi_status = BLK_STS_RESOURCE; 2620 bio_endio(bio); 2621 return DM_MAPIO_SUBMITTED; 2622 } 2623 2624 return DM_MAPIO_REMAPPED; 2625 } 2626 2627 static inline void dm_integrity_free_payload(struct dm_integrity_io *dio) 2628 { 2629 struct dm_integrity_c *ic = dio->ic; 2630 if (unlikely(dio->integrity_payload_from_mempool)) 2631 mempool_free(virt_to_page(dio->integrity_payload), &ic->recheck_pool); 2632 else 2633 kfree(dio->integrity_payload); 2634 dio->integrity_payload = NULL; 2635 dio->integrity_payload_from_mempool = false; 2636 } 2637 2638 static void dm_integrity_inline_recheck(struct work_struct *w) 2639 { 2640 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); 2641 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 2642 struct dm_integrity_c *ic = dio->ic; 2643 struct bio *outgoing_bio; 2644 void *outgoing_data; 2645 2646 dio->integrity_payload = page_to_virt((struct page *)mempool_alloc(&ic->recheck_pool, GFP_NOIO)); 2647 dio->integrity_payload_from_mempool = true; 2648 2649 outgoing_data = dio->integrity_payload + PAGE_SIZE; 2650 2651 while (dio->bio_details.bi_iter.bi_size) { 2652 char digest[HASH_MAX_DIGESTSIZE]; 2653 int r; 2654 struct bio_integrity_payload *bip; 2655 struct bio_vec bv; 2656 char *mem; 2657 2658 outgoing_bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_READ, GFP_NOIO, &ic->recheck_bios); 2659 bio_add_virt_nofail(outgoing_bio, outgoing_data, 2660 ic->sectors_per_block << SECTOR_SHIFT); 2661 2662 bip = bio_integrity_alloc(outgoing_bio, GFP_NOIO, 1); 2663 if (IS_ERR(bip)) { 2664 bio_put(outgoing_bio); 2665 bio->bi_status = errno_to_blk_status(PTR_ERR(bip)); 2666 bio_endio(bio); 2667 return; 2668 } 2669 2670 r = bio_integrity_add_page(outgoing_bio, virt_to_page(dio->integrity_payload), ic->tuple_size, 0); 2671 if (unlikely(r != ic->tuple_size)) { 2672 bio_put(outgoing_bio); 2673 bio->bi_status = BLK_STS_RESOURCE; 2674 bio_endio(bio); 2675 return; 2676 } 2677 2678 outgoing_bio->bi_iter.bi_sector = dio->bio_details.bi_iter.bi_sector + ic->start + SB_SECTORS; 2679 2680 r = submit_bio_wait(outgoing_bio); 2681 if (unlikely(r != 0)) { 2682 bio_put(outgoing_bio); 2683 bio->bi_status = errno_to_blk_status(r); 2684 bio_endio(bio); 2685 return; 2686 } 2687 bio_put(outgoing_bio); 2688 2689 integrity_sector_checksum(ic, &dio->ahash_req, dio->bio_details.bi_iter.bi_sector, integrity_identity(ic, outgoing_data), 0, digest); 2690 if (unlikely(crypto_memneq(digest, dio->integrity_payload, min(ic->internal_hash_digestsize, ic->tag_size)))) { 2691 DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", 2692 ic->dev->bdev, dio->bio_details.bi_iter.bi_sector); 2693 atomic64_inc(&ic->number_of_mismatches); 2694 dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", 2695 bio, dio->bio_details.bi_iter.bi_sector, 0); 2696 2697 bio->bi_status = BLK_STS_PROTECTION; 2698 bio_endio(bio); 2699 return; 2700 } 2701 2702 bv = bio_iter_iovec(bio, dio->bio_details.bi_iter); 2703 mem = bvec_kmap_local(&bv); 2704 memcpy(mem, outgoing_data, ic->sectors_per_block << SECTOR_SHIFT); 2705 kunmap_local(mem); 2706 2707 bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT); 2708 } 2709 2710 bio_endio(bio); 2711 } 2712 2713 static inline bool dm_integrity_check(struct dm_integrity_c *ic, struct dm_integrity_io *dio) 2714 { 2715 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 2716 unsigned pos = 0; 2717 2718 while (dio->bio_details.bi_iter.bi_size) { 2719 char digest[HASH_MAX_DIGESTSIZE]; 2720 struct bio_vec bv = bio_iter_iovec(bio, dio->bio_details.bi_iter); 2721 char *mem = integrity_kmap(ic, bv.bv_page); 2722 integrity_sector_checksum(ic, &dio->ahash_req, dio->bio_details.bi_iter.bi_sector, mem, bv.bv_offset, digest); 2723 if (unlikely(crypto_memneq(digest, dio->integrity_payload + pos, 2724 min(ic->internal_hash_digestsize, ic->tag_size)))) { 2725 integrity_kunmap(ic, mem); 2726 dm_integrity_free_payload(dio); 2727 INIT_WORK(&dio->work, dm_integrity_inline_recheck); 2728 queue_work(ic->offload_wq, &dio->work); 2729 return false; 2730 } 2731 integrity_kunmap(ic, mem); 2732 pos += ic->tuple_size; 2733 bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT); 2734 } 2735 2736 return true; 2737 } 2738 2739 static void dm_integrity_inline_async_check(struct work_struct *w) 2740 { 2741 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); 2742 struct dm_integrity_c *ic = dio->ic; 2743 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 2744 2745 if (likely(dm_integrity_check(ic, dio))) 2746 bio_endio(bio); 2747 } 2748 2749 static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status) 2750 { 2751 struct dm_integrity_c *ic = ti->private; 2752 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 2753 if (ic->mode == 'I') { 2754 if (dio->op == REQ_OP_READ && likely(*status == BLK_STS_OK) && likely(dio->bio_details.bi_iter.bi_size != 0)) { 2755 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && 2756 unlikely(dio->integrity_range_locked)) 2757 goto skip_check; 2758 if (likely(ic->internal_shash != NULL)) { 2759 if (unlikely(!dm_integrity_check(ic, dio))) 2760 return DM_ENDIO_INCOMPLETE; 2761 } else { 2762 INIT_WORK(&dio->work, dm_integrity_inline_async_check); 2763 queue_work(ic->offload_wq, &dio->work); 2764 return DM_ENDIO_INCOMPLETE; 2765 } 2766 } 2767 skip_check: 2768 dm_integrity_free_payload(dio); 2769 if (unlikely(dio->integrity_range_locked)) 2770 remove_range(ic, &dio->range); 2771 } 2772 if (unlikely(dio->ahash_req)) 2773 mempool_free(dio->ahash_req, &ic->ahash_req_pool); 2774 return DM_ENDIO_DONE; 2775 } 2776 2777 static void integrity_bio_wait(struct work_struct *w) 2778 { 2779 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); 2780 struct dm_integrity_c *ic = dio->ic; 2781 2782 if (ic->mode == 'I') { 2783 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 2784 int r = dm_integrity_map_inline(dio, false); 2785 switch (r) { 2786 case DM_MAPIO_KILL: 2787 bio->bi_status = BLK_STS_IOERR; 2788 fallthrough; 2789 case DM_MAPIO_REMAPPED: 2790 submit_bio_noacct(bio); 2791 fallthrough; 2792 case DM_MAPIO_SUBMITTED: 2793 return; 2794 default: 2795 BUG(); 2796 } 2797 } else { 2798 dm_integrity_map_continue(dio, false); 2799 } 2800 } 2801 2802 static void pad_uncommitted(struct dm_integrity_c *ic) 2803 { 2804 if (ic->free_section_entry) { 2805 ic->free_sectors -= ic->journal_section_entries - ic->free_section_entry; 2806 ic->free_section_entry = 0; 2807 ic->free_section++; 2808 wraparound_section(ic, &ic->free_section); 2809 ic->n_uncommitted_sections++; 2810 } 2811 if (WARN_ON(ic->journal_sections * ic->journal_section_entries != 2812 (ic->n_uncommitted_sections + ic->n_committed_sections) * 2813 ic->journal_section_entries + ic->free_sectors)) { 2814 DMCRIT("journal_sections %u, journal_section_entries %u, " 2815 "n_uncommitted_sections %u, n_committed_sections %u, " 2816 "journal_section_entries %u, free_sectors %u", 2817 ic->journal_sections, ic->journal_section_entries, 2818 ic->n_uncommitted_sections, ic->n_committed_sections, 2819 ic->journal_section_entries, ic->free_sectors); 2820 } 2821 } 2822 2823 static void integrity_commit(struct work_struct *w) 2824 { 2825 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, commit_work); 2826 unsigned int commit_start, commit_sections; 2827 unsigned int i, j, n; 2828 struct bio *flushes; 2829 2830 timer_delete(&ic->autocommit_timer); 2831 2832 if (ic->mode == 'I') 2833 return; 2834 2835 spin_lock_irq(&ic->endio_wait.lock); 2836 flushes = bio_list_get(&ic->flush_bio_list); 2837 if (unlikely(ic->mode != 'J')) { 2838 spin_unlock_irq(&ic->endio_wait.lock); 2839 dm_integrity_flush_buffers(ic, true); 2840 goto release_flush_bios; 2841 } 2842 2843 pad_uncommitted(ic); 2844 commit_start = ic->uncommitted_section; 2845 commit_sections = ic->n_uncommitted_sections; 2846 spin_unlock_irq(&ic->endio_wait.lock); 2847 2848 if (!commit_sections) 2849 goto release_flush_bios; 2850 2851 ic->wrote_to_journal = true; 2852 2853 i = commit_start; 2854 for (n = 0; n < commit_sections; n++) { 2855 for (j = 0; j < ic->journal_section_entries; j++) { 2856 struct journal_entry *je; 2857 2858 je = access_journal_entry(ic, i, j); 2859 io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je)); 2860 } 2861 for (j = 0; j < ic->journal_section_sectors; j++) { 2862 struct journal_sector *js; 2863 2864 js = access_journal(ic, i, j); 2865 js->commit_id = dm_integrity_commit_id(ic, i, j, ic->commit_seq); 2866 } 2867 i++; 2868 if (unlikely(i >= ic->journal_sections)) 2869 ic->commit_seq = next_commit_seq(ic->commit_seq); 2870 wraparound_section(ic, &i); 2871 } 2872 smp_rmb(); 2873 2874 write_journal(ic, commit_start, commit_sections); 2875 2876 spin_lock_irq(&ic->endio_wait.lock); 2877 ic->uncommitted_section += commit_sections; 2878 wraparound_section(ic, &ic->uncommitted_section); 2879 ic->n_uncommitted_sections -= commit_sections; 2880 ic->n_committed_sections += commit_sections; 2881 spin_unlock_irq(&ic->endio_wait.lock); 2882 2883 if (READ_ONCE(ic->free_sectors) <= ic->free_sectors_threshold) 2884 queue_work(ic->writer_wq, &ic->writer_work); 2885 2886 release_flush_bios: 2887 while (flushes) { 2888 struct bio *next = flushes->bi_next; 2889 2890 flushes->bi_next = NULL; 2891 do_endio(ic, flushes); 2892 flushes = next; 2893 } 2894 } 2895 2896 static void complete_copy_from_journal(unsigned long error, void *context) 2897 { 2898 struct journal_io *io = context; 2899 struct journal_completion *comp = io->comp; 2900 struct dm_integrity_c *ic = comp->ic; 2901 2902 remove_range(ic, &io->range); 2903 mempool_free(io, &ic->journal_io_mempool); 2904 if (unlikely(error != 0)) 2905 dm_integrity_io_error(ic, "copying from journal", -EIO); 2906 complete_journal_op(comp); 2907 } 2908 2909 static void restore_last_bytes(struct dm_integrity_c *ic, struct journal_sector *js, 2910 struct journal_entry *je) 2911 { 2912 unsigned int s = 0; 2913 2914 do { 2915 js->commit_id = je->last_bytes[s]; 2916 js++; 2917 } while (++s < ic->sectors_per_block); 2918 } 2919 2920 static void do_journal_write(struct dm_integrity_c *ic, unsigned int write_start, 2921 unsigned int write_sections, bool from_replay) 2922 { 2923 unsigned int i, j, n; 2924 struct journal_completion comp; 2925 struct blk_plug plug; 2926 2927 blk_start_plug(&plug); 2928 2929 comp.ic = ic; 2930 comp.in_flight = (atomic_t)ATOMIC_INIT(1); 2931 init_completion(&comp.comp); 2932 2933 i = write_start; 2934 for (n = 0; n < write_sections; n++, i++, wraparound_section(ic, &i)) { 2935 #ifndef INTERNAL_VERIFY 2936 if (unlikely(from_replay)) 2937 #endif 2938 rw_section_mac(ic, i, false); 2939 for (j = 0; j < ic->journal_section_entries; j++) { 2940 struct journal_entry *je = access_journal_entry(ic, i, j); 2941 sector_t sec, area, offset; 2942 unsigned int k, l, next_loop; 2943 sector_t metadata_block; 2944 unsigned int metadata_offset; 2945 struct journal_io *io; 2946 2947 if (journal_entry_is_unused(je)) 2948 continue; 2949 BUG_ON(unlikely(journal_entry_is_inprogress(je)) && !from_replay); 2950 sec = journal_entry_get_sector(je); 2951 if (unlikely(from_replay)) { 2952 if (unlikely(sec & (unsigned int)(ic->sectors_per_block - 1))) { 2953 dm_integrity_io_error(ic, "invalid sector in journal", -EIO); 2954 sec &= ~(sector_t)(ic->sectors_per_block - 1); 2955 } 2956 if (unlikely(sec >= ic->provided_data_sectors)) { 2957 journal_entry_set_unused(je); 2958 continue; 2959 } 2960 } 2961 get_area_and_offset(ic, sec, &area, &offset); 2962 restore_last_bytes(ic, access_journal_data(ic, i, j), je); 2963 for (k = j + 1; k < ic->journal_section_entries; k++) { 2964 struct journal_entry *je2 = access_journal_entry(ic, i, k); 2965 sector_t sec2, area2, offset2; 2966 2967 if (journal_entry_is_unused(je2)) 2968 break; 2969 BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay); 2970 sec2 = journal_entry_get_sector(je2); 2971 if (unlikely(sec2 >= ic->provided_data_sectors)) 2972 break; 2973 get_area_and_offset(ic, sec2, &area2, &offset2); 2974 if (area2 != area || offset2 != offset + ((k - j) << ic->sb->log2_sectors_per_block)) 2975 break; 2976 restore_last_bytes(ic, access_journal_data(ic, i, k), je2); 2977 } 2978 next_loop = k - 1; 2979 2980 io = mempool_alloc(&ic->journal_io_mempool, GFP_NOIO); 2981 io->comp = ∁ 2982 io->range.logical_sector = sec; 2983 io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block; 2984 2985 spin_lock_irq(&ic->endio_wait.lock); 2986 add_new_range_and_wait(ic, &io->range); 2987 2988 if (likely(!from_replay)) { 2989 struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries]; 2990 2991 /* don't write if there is newer committed sector */ 2992 while (j < k && find_newer_committed_node(ic, §ion_node[j])) { 2993 struct journal_entry *je2 = access_journal_entry(ic, i, j); 2994 2995 journal_entry_set_unused(je2); 2996 remove_journal_node(ic, §ion_node[j]); 2997 j++; 2998 sec += ic->sectors_per_block; 2999 offset += ic->sectors_per_block; 3000 } 3001 while (j < k && find_newer_committed_node(ic, §ion_node[k - 1])) { 3002 struct journal_entry *je2 = access_journal_entry(ic, i, k - 1); 3003 3004 journal_entry_set_unused(je2); 3005 remove_journal_node(ic, §ion_node[k - 1]); 3006 k--; 3007 } 3008 if (j == k) { 3009 remove_range_unlocked(ic, &io->range); 3010 spin_unlock_irq(&ic->endio_wait.lock); 3011 mempool_free(io, &ic->journal_io_mempool); 3012 goto skip_io; 3013 } 3014 for (l = j; l < k; l++) 3015 remove_journal_node(ic, §ion_node[l]); 3016 } 3017 spin_unlock_irq(&ic->endio_wait.lock); 3018 3019 metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset); 3020 for (l = j; l < k; l++) { 3021 int r; 3022 struct journal_entry *je2 = access_journal_entry(ic, i, l); 3023 3024 if ( 3025 #ifndef INTERNAL_VERIFY 3026 unlikely(from_replay) && 3027 #endif 3028 ic->internal_hash) { 3029 char test_tag[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; 3030 struct journal_sector *js = access_journal_data(ic, i, l); 3031 void *js_page = integrity_identity(ic, (char *)js - offset_in_page(js)); 3032 unsigned js_offset = offset_in_page(js); 3033 3034 integrity_sector_checksum(ic, &ic->journal_ahash_req, sec + ((l - j) << ic->sb->log2_sectors_per_block), 3035 js_page, js_offset, test_tag); 3036 if (unlikely(crypto_memneq(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) { 3037 dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ); 3038 dm_audit_log_target(DM_MSG_PREFIX, "integrity-replay-journal", ic->ti, 0); 3039 } 3040 } 3041 3042 journal_entry_set_unused(je2); 3043 r = dm_integrity_rw_tag(ic, journal_entry_tag(ic, je2), &metadata_block, &metadata_offset, 3044 ic->tag_size, TAG_WRITE); 3045 if (unlikely(r)) 3046 dm_integrity_io_error(ic, "reading tags", r); 3047 } 3048 3049 atomic_inc(&comp.in_flight); 3050 copy_from_journal(ic, i, j << ic->sb->log2_sectors_per_block, 3051 (k - j) << ic->sb->log2_sectors_per_block, 3052 get_data_sector(ic, area, offset), 3053 complete_copy_from_journal, io); 3054 skip_io: 3055 j = next_loop; 3056 } 3057 } 3058 3059 dm_bufio_write_dirty_buffers_async(ic->bufio); 3060 3061 blk_finish_plug(&plug); 3062 3063 complete_journal_op(&comp); 3064 wait_for_completion_io(&comp.comp); 3065 3066 dm_integrity_flush_buffers(ic, true); 3067 } 3068 3069 static void integrity_writer(struct work_struct *w) 3070 { 3071 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, writer_work); 3072 unsigned int write_start, write_sections; 3073 unsigned int prev_free_sectors; 3074 3075 spin_lock_irq(&ic->endio_wait.lock); 3076 write_start = ic->committed_section; 3077 write_sections = ic->n_committed_sections; 3078 spin_unlock_irq(&ic->endio_wait.lock); 3079 3080 if (!write_sections) 3081 return; 3082 3083 do_journal_write(ic, write_start, write_sections, false); 3084 3085 spin_lock_irq(&ic->endio_wait.lock); 3086 3087 ic->committed_section += write_sections; 3088 wraparound_section(ic, &ic->committed_section); 3089 ic->n_committed_sections -= write_sections; 3090 3091 prev_free_sectors = ic->free_sectors; 3092 ic->free_sectors += write_sections * ic->journal_section_entries; 3093 if (unlikely(!prev_free_sectors)) 3094 wake_up_locked(&ic->endio_wait); 3095 3096 spin_unlock_irq(&ic->endio_wait.lock); 3097 } 3098 3099 static void recalc_write_super(struct dm_integrity_c *ic) 3100 { 3101 int r; 3102 3103 dm_integrity_flush_buffers(ic, false); 3104 if (dm_integrity_failed(ic)) 3105 return; 3106 3107 r = sync_rw_sb(ic, REQ_OP_WRITE); 3108 if (unlikely(r)) 3109 dm_integrity_io_error(ic, "writing superblock", r); 3110 } 3111 3112 static void integrity_recalc(struct work_struct *w) 3113 { 3114 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, recalc_work); 3115 size_t recalc_tags_size; 3116 u8 *recalc_buffer = NULL; 3117 u8 *recalc_tags = NULL; 3118 struct ahash_request *ahash_req = NULL; 3119 struct dm_integrity_range range; 3120 struct dm_io_request io_req; 3121 struct dm_io_region io_loc; 3122 sector_t area, offset; 3123 sector_t metadata_block; 3124 unsigned int metadata_offset; 3125 sector_t logical_sector, n_sectors; 3126 __u8 *t; 3127 unsigned int i; 3128 int r; 3129 unsigned int super_counter = 0; 3130 unsigned recalc_sectors = RECALC_SECTORS; 3131 3132 retry: 3133 recalc_buffer = kmalloc(recalc_sectors << SECTOR_SHIFT, GFP_NOIO | __GFP_NOWARN); 3134 if (!recalc_buffer) { 3135 oom: 3136 recalc_sectors >>= 1; 3137 if (recalc_sectors >= 1U << ic->sb->log2_sectors_per_block) 3138 goto retry; 3139 DMCRIT("out of memory for recalculate buffer - recalculation disabled"); 3140 goto free_ret; 3141 } 3142 recalc_tags_size = (recalc_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size; 3143 if (ic->internal_hash_digestsize > ic->tag_size) 3144 recalc_tags_size += ic->internal_hash_digestsize - ic->tag_size; 3145 recalc_tags = kvmalloc(recalc_tags_size, GFP_NOIO); 3146 if (!recalc_tags) { 3147 kfree(recalc_buffer); 3148 recalc_buffer = NULL; 3149 goto oom; 3150 } 3151 3152 DEBUG_print("start recalculation... (position %llx)\n", le64_to_cpu(ic->sb->recalc_sector)); 3153 3154 spin_lock_irq(&ic->endio_wait.lock); 3155 3156 next_chunk: 3157 3158 if (unlikely(dm_post_suspending(ic->ti))) 3159 goto unlock_ret; 3160 3161 range.logical_sector = le64_to_cpu(ic->sb->recalc_sector); 3162 if (unlikely(range.logical_sector >= ic->provided_data_sectors)) { 3163 if (ic->mode == 'B') { 3164 block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); 3165 DEBUG_print("queue_delayed_work: bitmap_flush_work\n"); 3166 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0); 3167 } 3168 goto unlock_ret; 3169 } 3170 3171 get_area_and_offset(ic, range.logical_sector, &area, &offset); 3172 range.n_sectors = min((sector_t)recalc_sectors, ic->provided_data_sectors - range.logical_sector); 3173 if (!ic->meta_dev) 3174 range.n_sectors = min(range.n_sectors, ((sector_t)1U << ic->sb->log2_interleave_sectors) - (unsigned int)offset); 3175 3176 add_new_range_and_wait(ic, &range); 3177 spin_unlock_irq(&ic->endio_wait.lock); 3178 logical_sector = range.logical_sector; 3179 n_sectors = range.n_sectors; 3180 3181 if (ic->mode == 'B') { 3182 if (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, n_sectors, BITMAP_OP_TEST_ALL_CLEAR)) 3183 goto advance_and_next; 3184 3185 while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, 3186 ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) { 3187 logical_sector += ic->sectors_per_block; 3188 n_sectors -= ic->sectors_per_block; 3189 cond_resched(); 3190 } 3191 while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector + n_sectors - ic->sectors_per_block, 3192 ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) { 3193 n_sectors -= ic->sectors_per_block; 3194 cond_resched(); 3195 } 3196 get_area_and_offset(ic, logical_sector, &area, &offset); 3197 } 3198 3199 DEBUG_print("recalculating: %llx, %llx\n", logical_sector, n_sectors); 3200 3201 if (unlikely(++super_counter == RECALC_WRITE_SUPER)) { 3202 recalc_write_super(ic); 3203 if (ic->mode == 'B') 3204 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval); 3205 3206 super_counter = 0; 3207 } 3208 3209 if (unlikely(dm_integrity_failed(ic))) 3210 goto err; 3211 3212 io_req.bi_opf = REQ_OP_READ; 3213 io_req.mem.type = DM_IO_KMEM; 3214 io_req.mem.ptr.addr = recalc_buffer; 3215 io_req.notify.fn = NULL; 3216 io_req.client = ic->io; 3217 io_loc.bdev = ic->dev->bdev; 3218 io_loc.sector = get_data_sector(ic, area, offset); 3219 io_loc.count = n_sectors; 3220 3221 r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT); 3222 if (unlikely(r)) { 3223 dm_integrity_io_error(ic, "reading data", r); 3224 goto err; 3225 } 3226 3227 t = recalc_tags; 3228 for (i = 0; i < n_sectors; i += ic->sectors_per_block) { 3229 void *ptr = recalc_buffer + (i << SECTOR_SHIFT); 3230 void *ptr_page = integrity_identity(ic, (char *)ptr - offset_in_page(ptr)); 3231 unsigned ptr_offset = offset_in_page(ptr); 3232 integrity_sector_checksum(ic, &ahash_req, logical_sector + i, ptr_page, ptr_offset, t); 3233 t += ic->tag_size; 3234 } 3235 3236 metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset); 3237 3238 r = dm_integrity_rw_tag(ic, recalc_tags, &metadata_block, &metadata_offset, t - recalc_tags, TAG_WRITE); 3239 if (unlikely(r)) { 3240 dm_integrity_io_error(ic, "writing tags", r); 3241 goto err; 3242 } 3243 3244 if (ic->mode == 'B') { 3245 sector_t start, end; 3246 3247 start = (range.logical_sector >> 3248 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)) << 3249 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 3250 end = ((range.logical_sector + range.n_sectors) >> 3251 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)) << 3252 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 3253 block_bitmap_op(ic, ic->recalc_bitmap, start, end - start, BITMAP_OP_CLEAR); 3254 } 3255 3256 advance_and_next: 3257 cond_resched(); 3258 3259 spin_lock_irq(&ic->endio_wait.lock); 3260 remove_range_unlocked(ic, &range); 3261 ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors); 3262 goto next_chunk; 3263 3264 err: 3265 remove_range(ic, &range); 3266 goto free_ret; 3267 3268 unlock_ret: 3269 spin_unlock_irq(&ic->endio_wait.lock); 3270 3271 recalc_write_super(ic); 3272 3273 free_ret: 3274 kfree(recalc_buffer); 3275 kvfree(recalc_tags); 3276 mempool_free(ahash_req, &ic->ahash_req_pool); 3277 } 3278 3279 static void integrity_recalc_inline(struct work_struct *w) 3280 { 3281 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, recalc_work); 3282 size_t recalc_tags_size; 3283 u8 *recalc_buffer = NULL; 3284 u8 *recalc_tags = NULL; 3285 struct ahash_request *ahash_req = NULL; 3286 struct dm_integrity_range range; 3287 struct bio *bio; 3288 struct bio_integrity_payload *bip; 3289 __u8 *t; 3290 unsigned int i; 3291 int r; 3292 unsigned ret; 3293 unsigned int super_counter = 0; 3294 unsigned recalc_sectors = RECALC_SECTORS; 3295 3296 retry: 3297 recalc_buffer = kmalloc(recalc_sectors << SECTOR_SHIFT, GFP_NOIO | __GFP_NOWARN); 3298 if (!recalc_buffer) { 3299 oom: 3300 recalc_sectors >>= 1; 3301 if (recalc_sectors >= 1U << ic->sb->log2_sectors_per_block) 3302 goto retry; 3303 DMCRIT("out of memory for recalculate buffer - recalculation disabled"); 3304 goto free_ret; 3305 } 3306 3307 recalc_tags_size = (recalc_sectors >> ic->sb->log2_sectors_per_block) * ic->tuple_size; 3308 if (ic->internal_hash_digestsize > ic->tuple_size) 3309 recalc_tags_size += ic->internal_hash_digestsize - ic->tuple_size; 3310 recalc_tags = kmalloc(recalc_tags_size, GFP_NOIO | __GFP_NOWARN); 3311 if (!recalc_tags) { 3312 kfree(recalc_buffer); 3313 recalc_buffer = NULL; 3314 goto oom; 3315 } 3316 3317 spin_lock_irq(&ic->endio_wait.lock); 3318 3319 next_chunk: 3320 if (unlikely(dm_post_suspending(ic->ti))) 3321 goto unlock_ret; 3322 3323 range.logical_sector = le64_to_cpu(ic->sb->recalc_sector); 3324 if (unlikely(range.logical_sector >= ic->provided_data_sectors)) 3325 goto unlock_ret; 3326 range.n_sectors = min((sector_t)recalc_sectors, ic->provided_data_sectors - range.logical_sector); 3327 3328 add_new_range_and_wait(ic, &range); 3329 spin_unlock_irq(&ic->endio_wait.lock); 3330 3331 if (unlikely(++super_counter == RECALC_WRITE_SUPER)) { 3332 recalc_write_super(ic); 3333 super_counter = 0; 3334 } 3335 3336 if (unlikely(dm_integrity_failed(ic))) 3337 goto err; 3338 3339 DEBUG_print("recalculating: %llx - %llx\n", range.logical_sector, range.n_sectors); 3340 3341 bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_READ, GFP_NOIO, &ic->recalc_bios); 3342 bio->bi_iter.bi_sector = ic->start + SB_SECTORS + range.logical_sector; 3343 bio_add_virt_nofail(bio, recalc_buffer, 3344 range.n_sectors << SECTOR_SHIFT); 3345 r = submit_bio_wait(bio); 3346 bio_put(bio); 3347 if (unlikely(r)) { 3348 dm_integrity_io_error(ic, "reading data", r); 3349 goto err; 3350 } 3351 3352 t = recalc_tags; 3353 for (i = 0; i < range.n_sectors; i += ic->sectors_per_block) { 3354 void *ptr = recalc_buffer + (i << SECTOR_SHIFT); 3355 void *ptr_page = integrity_identity(ic, (char *)ptr - offset_in_page(ptr)); 3356 unsigned ptr_offset = offset_in_page(ptr); 3357 memset(t, 0, ic->tuple_size); 3358 integrity_sector_checksum(ic, &ahash_req, range.logical_sector + i, ptr_page, ptr_offset, t); 3359 t += ic->tuple_size; 3360 } 3361 3362 bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_WRITE, GFP_NOIO, &ic->recalc_bios); 3363 bio->bi_iter.bi_sector = ic->start + SB_SECTORS + range.logical_sector; 3364 bio_add_virt_nofail(bio, recalc_buffer, 3365 range.n_sectors << SECTOR_SHIFT); 3366 3367 bip = bio_integrity_alloc(bio, GFP_NOIO, 1); 3368 if (unlikely(IS_ERR(bip))) { 3369 bio_put(bio); 3370 DMCRIT("out of memory for bio integrity payload - recalculation disabled"); 3371 goto err; 3372 } 3373 ret = bio_integrity_add_page(bio, virt_to_page(recalc_tags), t - recalc_tags, offset_in_page(recalc_tags)); 3374 if (unlikely(ret != t - recalc_tags)) { 3375 bio_put(bio); 3376 dm_integrity_io_error(ic, "attaching integrity tags", -ENOMEM); 3377 goto err; 3378 } 3379 3380 r = submit_bio_wait(bio); 3381 bio_put(bio); 3382 if (unlikely(r)) { 3383 dm_integrity_io_error(ic, "writing data", r); 3384 goto err; 3385 } 3386 3387 cond_resched(); 3388 spin_lock_irq(&ic->endio_wait.lock); 3389 remove_range_unlocked(ic, &range); 3390 #ifdef CONFIG_64BIT 3391 /* Paired with smp_load_acquire in dm_integrity_map_inline. */ 3392 smp_store_release(&ic->sb->recalc_sector, cpu_to_le64(range.logical_sector + range.n_sectors)); 3393 #else 3394 ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors); 3395 #endif 3396 goto next_chunk; 3397 3398 err: 3399 remove_range(ic, &range); 3400 goto free_ret; 3401 3402 unlock_ret: 3403 spin_unlock_irq(&ic->endio_wait.lock); 3404 3405 recalc_write_super(ic); 3406 3407 free_ret: 3408 kfree(recalc_buffer); 3409 kfree(recalc_tags); 3410 mempool_free(ahash_req, &ic->ahash_req_pool); 3411 } 3412 3413 static void bitmap_block_work(struct work_struct *w) 3414 { 3415 struct bitmap_block_status *bbs = container_of(w, struct bitmap_block_status, work); 3416 struct dm_integrity_c *ic = bbs->ic; 3417 struct bio *bio; 3418 struct bio_list bio_queue; 3419 struct bio_list waiting; 3420 3421 bio_list_init(&waiting); 3422 3423 spin_lock(&bbs->bio_queue_lock); 3424 bio_queue = bbs->bio_queue; 3425 bio_list_init(&bbs->bio_queue); 3426 spin_unlock(&bbs->bio_queue_lock); 3427 3428 while ((bio = bio_list_pop(&bio_queue))) { 3429 struct dm_integrity_io *dio; 3430 3431 dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 3432 3433 if (block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, 3434 dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) { 3435 remove_range(ic, &dio->range); 3436 INIT_WORK(&dio->work, integrity_bio_wait); 3437 queue_work(ic->offload_wq, &dio->work); 3438 } else { 3439 block_bitmap_op(ic, ic->journal, dio->range.logical_sector, 3440 dio->range.n_sectors, BITMAP_OP_SET); 3441 bio_list_add(&waiting, bio); 3442 } 3443 } 3444 3445 if (bio_list_empty(&waiting)) 3446 return; 3447 3448 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 3449 bbs->idx * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), 3450 BITMAP_BLOCK_SIZE >> SECTOR_SHIFT, NULL); 3451 3452 while ((bio = bio_list_pop(&waiting))) { 3453 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); 3454 3455 block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, 3456 dio->range.n_sectors, BITMAP_OP_SET); 3457 3458 remove_range(ic, &dio->range); 3459 INIT_WORK(&dio->work, integrity_bio_wait); 3460 queue_work(ic->offload_wq, &dio->work); 3461 } 3462 3463 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval); 3464 } 3465 3466 static void bitmap_flush_work(struct work_struct *work) 3467 { 3468 struct dm_integrity_c *ic = container_of(work, struct dm_integrity_c, bitmap_flush_work.work); 3469 struct dm_integrity_range range; 3470 unsigned long limit; 3471 struct bio *bio; 3472 3473 dm_integrity_flush_buffers(ic, false); 3474 3475 range.logical_sector = 0; 3476 range.n_sectors = ic->provided_data_sectors; 3477 3478 spin_lock_irq(&ic->endio_wait.lock); 3479 add_new_range_and_wait(ic, &range); 3480 spin_unlock_irq(&ic->endio_wait.lock); 3481 3482 dm_integrity_flush_buffers(ic, true); 3483 3484 limit = ic->provided_data_sectors; 3485 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { 3486 limit = le64_to_cpu(ic->sb->recalc_sector) 3487 >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit) 3488 << (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit); 3489 } 3490 /*DEBUG_print("zeroing journal\n");*/ 3491 block_bitmap_op(ic, ic->journal, 0, limit, BITMAP_OP_CLEAR); 3492 block_bitmap_op(ic, ic->may_write_bitmap, 0, limit, BITMAP_OP_CLEAR); 3493 3494 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, 3495 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 3496 3497 spin_lock_irq(&ic->endio_wait.lock); 3498 remove_range_unlocked(ic, &range); 3499 while (unlikely((bio = bio_list_pop(&ic->synchronous_bios)) != NULL)) { 3500 bio_endio(bio); 3501 spin_unlock_irq(&ic->endio_wait.lock); 3502 spin_lock_irq(&ic->endio_wait.lock); 3503 } 3504 spin_unlock_irq(&ic->endio_wait.lock); 3505 } 3506 3507 3508 static void init_journal(struct dm_integrity_c *ic, unsigned int start_section, 3509 unsigned int n_sections, unsigned char commit_seq) 3510 { 3511 unsigned int i, j, n; 3512 3513 if (!n_sections) 3514 return; 3515 3516 for (n = 0; n < n_sections; n++) { 3517 i = start_section + n; 3518 wraparound_section(ic, &i); 3519 for (j = 0; j < ic->journal_section_sectors; j++) { 3520 struct journal_sector *js = access_journal(ic, i, j); 3521 3522 BUILD_BUG_ON(sizeof(js->sectors) != JOURNAL_SECTOR_DATA); 3523 memset(&js->sectors, 0, sizeof(js->sectors)); 3524 js->commit_id = dm_integrity_commit_id(ic, i, j, commit_seq); 3525 } 3526 for (j = 0; j < ic->journal_section_entries; j++) { 3527 struct journal_entry *je = access_journal_entry(ic, i, j); 3528 3529 journal_entry_set_unused(je); 3530 } 3531 } 3532 3533 write_journal(ic, start_section, n_sections); 3534 } 3535 3536 static int find_commit_seq(struct dm_integrity_c *ic, unsigned int i, unsigned int j, commit_id_t id) 3537 { 3538 unsigned char k; 3539 3540 for (k = 0; k < N_COMMIT_IDS; k++) { 3541 if (dm_integrity_commit_id(ic, i, j, k) == id) 3542 return k; 3543 } 3544 dm_integrity_io_error(ic, "journal commit id", -EIO); 3545 return -EIO; 3546 } 3547 3548 static void replay_journal(struct dm_integrity_c *ic) 3549 { 3550 unsigned int i, j; 3551 bool used_commit_ids[N_COMMIT_IDS]; 3552 unsigned int max_commit_id_sections[N_COMMIT_IDS]; 3553 unsigned int write_start, write_sections; 3554 unsigned int continue_section; 3555 bool journal_empty; 3556 unsigned char unused, last_used, want_commit_seq; 3557 3558 if (ic->mode == 'R') 3559 return; 3560 3561 if (ic->journal_uptodate) 3562 return; 3563 3564 last_used = 0; 3565 write_start = 0; 3566 3567 if (!ic->just_formatted) { 3568 DEBUG_print("reading journal\n"); 3569 rw_journal(ic, REQ_OP_READ, 0, ic->journal_sections, NULL); 3570 if (ic->journal_io) 3571 DEBUG_bytes(lowmem_page_address(ic->journal_io[0].page), 64, "read journal"); 3572 if (ic->journal_io) { 3573 struct journal_completion crypt_comp; 3574 3575 crypt_comp.ic = ic; 3576 init_completion(&crypt_comp.comp); 3577 crypt_comp.in_flight = (atomic_t)ATOMIC_INIT(0); 3578 encrypt_journal(ic, false, 0, ic->journal_sections, &crypt_comp); 3579 wait_for_completion(&crypt_comp.comp); 3580 } 3581 DEBUG_bytes(lowmem_page_address(ic->journal[0].page), 64, "decrypted journal"); 3582 } 3583 3584 if (dm_integrity_failed(ic)) 3585 goto clear_journal; 3586 3587 journal_empty = true; 3588 memset(used_commit_ids, 0, sizeof(used_commit_ids)); 3589 memset(max_commit_id_sections, 0, sizeof(max_commit_id_sections)); 3590 for (i = 0; i < ic->journal_sections; i++) { 3591 for (j = 0; j < ic->journal_section_sectors; j++) { 3592 int k; 3593 struct journal_sector *js = access_journal(ic, i, j); 3594 3595 k = find_commit_seq(ic, i, j, js->commit_id); 3596 if (k < 0) 3597 goto clear_journal; 3598 used_commit_ids[k] = true; 3599 max_commit_id_sections[k] = i; 3600 } 3601 if (journal_empty) { 3602 for (j = 0; j < ic->journal_section_entries; j++) { 3603 struct journal_entry *je = access_journal_entry(ic, i, j); 3604 3605 if (!journal_entry_is_unused(je)) { 3606 journal_empty = false; 3607 break; 3608 } 3609 } 3610 } 3611 } 3612 3613 if (!used_commit_ids[N_COMMIT_IDS - 1]) { 3614 unused = N_COMMIT_IDS - 1; 3615 while (unused && !used_commit_ids[unused - 1]) 3616 unused--; 3617 } else { 3618 for (unused = 0; unused < N_COMMIT_IDS; unused++) 3619 if (!used_commit_ids[unused]) 3620 break; 3621 if (unused == N_COMMIT_IDS) { 3622 dm_integrity_io_error(ic, "journal commit ids", -EIO); 3623 goto clear_journal; 3624 } 3625 } 3626 DEBUG_print("first unused commit seq %d [%d,%d,%d,%d]\n", 3627 unused, used_commit_ids[0], used_commit_ids[1], 3628 used_commit_ids[2], used_commit_ids[3]); 3629 3630 last_used = prev_commit_seq(unused); 3631 want_commit_seq = prev_commit_seq(last_used); 3632 3633 if (!used_commit_ids[want_commit_seq] && used_commit_ids[prev_commit_seq(want_commit_seq)]) 3634 journal_empty = true; 3635 3636 write_start = max_commit_id_sections[last_used] + 1; 3637 if (unlikely(write_start >= ic->journal_sections)) 3638 want_commit_seq = next_commit_seq(want_commit_seq); 3639 wraparound_section(ic, &write_start); 3640 3641 i = write_start; 3642 for (write_sections = 0; write_sections < ic->journal_sections; write_sections++) { 3643 for (j = 0; j < ic->journal_section_sectors; j++) { 3644 struct journal_sector *js = access_journal(ic, i, j); 3645 3646 if (js->commit_id != dm_integrity_commit_id(ic, i, j, want_commit_seq)) { 3647 /* 3648 * This could be caused by crash during writing. 3649 * We won't replay the inconsistent part of the 3650 * journal. 3651 */ 3652 DEBUG_print("commit id mismatch at position (%u, %u): %d != %d\n", 3653 i, j, find_commit_seq(ic, i, j, js->commit_id), want_commit_seq); 3654 goto brk; 3655 } 3656 } 3657 i++; 3658 if (unlikely(i >= ic->journal_sections)) 3659 want_commit_seq = next_commit_seq(want_commit_seq); 3660 wraparound_section(ic, &i); 3661 } 3662 brk: 3663 3664 if (!journal_empty) { 3665 DEBUG_print("replaying %u sections, starting at %u, commit seq %d\n", 3666 write_sections, write_start, want_commit_seq); 3667 do_journal_write(ic, write_start, write_sections, true); 3668 } 3669 3670 if (write_sections == ic->journal_sections && (ic->mode == 'J' || journal_empty)) { 3671 continue_section = write_start; 3672 ic->commit_seq = want_commit_seq; 3673 DEBUG_print("continuing from section %u, commit seq %d\n", write_start, ic->commit_seq); 3674 } else { 3675 unsigned int s; 3676 unsigned char erase_seq; 3677 3678 clear_journal: 3679 DEBUG_print("clearing journal\n"); 3680 3681 erase_seq = prev_commit_seq(prev_commit_seq(last_used)); 3682 s = write_start; 3683 init_journal(ic, s, 1, erase_seq); 3684 s++; 3685 wraparound_section(ic, &s); 3686 if (ic->journal_sections >= 2) { 3687 init_journal(ic, s, ic->journal_sections - 2, erase_seq); 3688 s += ic->journal_sections - 2; 3689 wraparound_section(ic, &s); 3690 init_journal(ic, s, 1, erase_seq); 3691 } 3692 3693 continue_section = 0; 3694 ic->commit_seq = next_commit_seq(erase_seq); 3695 } 3696 3697 ic->committed_section = continue_section; 3698 ic->n_committed_sections = 0; 3699 3700 ic->uncommitted_section = continue_section; 3701 ic->n_uncommitted_sections = 0; 3702 3703 ic->free_section = continue_section; 3704 ic->free_section_entry = 0; 3705 ic->free_sectors = ic->journal_entries; 3706 3707 ic->journal_tree_root = RB_ROOT; 3708 for (i = 0; i < ic->journal_entries; i++) 3709 init_journal_node(&ic->journal_tree[i]); 3710 } 3711 3712 static void dm_integrity_enter_synchronous_mode(struct dm_integrity_c *ic) 3713 { 3714 DEBUG_print("%s\n", __func__); 3715 3716 if (ic->mode == 'B') { 3717 ic->bitmap_flush_interval = msecs_to_jiffies(10) + 1; 3718 ic->synchronous_mode = 1; 3719 3720 cancel_delayed_work_sync(&ic->bitmap_flush_work); 3721 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0); 3722 flush_workqueue(ic->commit_wq); 3723 } 3724 } 3725 3726 static int dm_integrity_reboot(struct notifier_block *n, unsigned long code, void *x) 3727 { 3728 struct dm_integrity_c *ic = container_of(n, struct dm_integrity_c, reboot_notifier); 3729 3730 DEBUG_print("%s\n", __func__); 3731 3732 dm_integrity_enter_synchronous_mode(ic); 3733 3734 return NOTIFY_DONE; 3735 } 3736 3737 static void dm_integrity_postsuspend(struct dm_target *ti) 3738 { 3739 struct dm_integrity_c *ic = ti->private; 3740 int r; 3741 3742 WARN_ON(unregister_reboot_notifier(&ic->reboot_notifier)); 3743 3744 timer_delete_sync(&ic->autocommit_timer); 3745 3746 if (ic->recalc_wq) 3747 drain_workqueue(ic->recalc_wq); 3748 3749 if (ic->mode == 'B') 3750 cancel_delayed_work_sync(&ic->bitmap_flush_work); 3751 3752 queue_work(ic->commit_wq, &ic->commit_work); 3753 drain_workqueue(ic->commit_wq); 3754 3755 if (ic->mode == 'J') { 3756 queue_work(ic->writer_wq, &ic->writer_work); 3757 drain_workqueue(ic->writer_wq); 3758 dm_integrity_flush_buffers(ic, true); 3759 if (ic->wrote_to_journal) { 3760 init_journal(ic, ic->free_section, 3761 ic->journal_sections - ic->free_section, ic->commit_seq); 3762 if (ic->free_section) { 3763 init_journal(ic, 0, ic->free_section, 3764 next_commit_seq(ic->commit_seq)); 3765 } 3766 } 3767 } 3768 3769 if (ic->mode == 'B') { 3770 dm_integrity_flush_buffers(ic, true); 3771 #if 1 3772 /* set to 0 to test bitmap replay code */ 3773 init_journal(ic, 0, ic->journal_sections, 0); 3774 ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP); 3775 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); 3776 if (unlikely(r)) 3777 dm_integrity_io_error(ic, "writing superblock", r); 3778 #endif 3779 } 3780 3781 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); 3782 3783 ic->journal_uptodate = true; 3784 } 3785 3786 static void dm_integrity_resume(struct dm_target *ti) 3787 { 3788 struct dm_integrity_c *ic = ti->private; 3789 __u64 old_provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors); 3790 int r; 3791 3792 DEBUG_print("resume\n"); 3793 3794 ic->wrote_to_journal = false; 3795 3796 if (ic->provided_data_sectors != old_provided_data_sectors) { 3797 if (ic->provided_data_sectors > old_provided_data_sectors && 3798 ic->mode == 'B' && 3799 ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) { 3800 rw_journal_sectors(ic, REQ_OP_READ, 0, 3801 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 3802 block_bitmap_op(ic, ic->journal, old_provided_data_sectors, 3803 ic->provided_data_sectors - old_provided_data_sectors, BITMAP_OP_SET); 3804 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, 3805 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 3806 } 3807 3808 ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors); 3809 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); 3810 if (unlikely(r)) 3811 dm_integrity_io_error(ic, "writing superblock", r); 3812 } 3813 3814 if (ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) { 3815 DEBUG_print("resume dirty_bitmap\n"); 3816 rw_journal_sectors(ic, REQ_OP_READ, 0, 3817 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 3818 if (ic->mode == 'B') { 3819 if (ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit && 3820 !ic->reset_recalculate_flag) { 3821 block_bitmap_copy(ic, ic->recalc_bitmap, ic->journal); 3822 block_bitmap_copy(ic, ic->may_write_bitmap, ic->journal); 3823 if (!block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, 3824 BITMAP_OP_TEST_ALL_CLEAR)) { 3825 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); 3826 ic->sb->recalc_sector = cpu_to_le64(0); 3827 } 3828 } else { 3829 DEBUG_print("non-matching blocks_per_bitmap_bit: %u, %u\n", 3830 ic->sb->log2_blocks_per_bitmap_bit, ic->log2_blocks_per_bitmap_bit); 3831 ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit; 3832 block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET); 3833 block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET); 3834 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_SET); 3835 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, 3836 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 3837 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); 3838 ic->sb->recalc_sector = cpu_to_le64(0); 3839 } 3840 } else { 3841 if (!(ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit && 3842 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_TEST_ALL_CLEAR)) || 3843 ic->reset_recalculate_flag) { 3844 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); 3845 ic->sb->recalc_sector = cpu_to_le64(0); 3846 } 3847 init_journal(ic, 0, ic->journal_sections, 0); 3848 replay_journal(ic); 3849 ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP); 3850 } 3851 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); 3852 if (unlikely(r)) 3853 dm_integrity_io_error(ic, "writing superblock", r); 3854 } else { 3855 replay_journal(ic); 3856 if (ic->reset_recalculate_flag) { 3857 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); 3858 ic->sb->recalc_sector = cpu_to_le64(0); 3859 } 3860 if (ic->mode == 'B') { 3861 ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP); 3862 ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit; 3863 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); 3864 if (unlikely(r)) 3865 dm_integrity_io_error(ic, "writing superblock", r); 3866 3867 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); 3868 block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); 3869 block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); 3870 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && 3871 le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors) { 3872 block_bitmap_op(ic, ic->journal, le64_to_cpu(ic->sb->recalc_sector), 3873 ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); 3874 block_bitmap_op(ic, ic->recalc_bitmap, le64_to_cpu(ic->sb->recalc_sector), 3875 ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); 3876 block_bitmap_op(ic, ic->may_write_bitmap, le64_to_cpu(ic->sb->recalc_sector), 3877 ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); 3878 } 3879 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, 3880 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); 3881 } 3882 } 3883 3884 DEBUG_print("testing recalc: %x\n", ic->sb->flags); 3885 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { 3886 __u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector); 3887 3888 DEBUG_print("recalc pos: %llx / %llx\n", recalc_pos, ic->provided_data_sectors); 3889 if (recalc_pos < ic->provided_data_sectors) { 3890 queue_work(ic->recalc_wq, &ic->recalc_work); 3891 } else if (recalc_pos > ic->provided_data_sectors) { 3892 ic->sb->recalc_sector = cpu_to_le64(ic->provided_data_sectors); 3893 recalc_write_super(ic); 3894 } 3895 } 3896 3897 ic->reboot_notifier.notifier_call = dm_integrity_reboot; 3898 ic->reboot_notifier.next = NULL; 3899 ic->reboot_notifier.priority = INT_MAX - 1; /* be notified after md and before hardware drivers */ 3900 WARN_ON(register_reboot_notifier(&ic->reboot_notifier)); 3901 3902 #if 0 3903 /* set to 1 to stress test synchronous mode */ 3904 dm_integrity_enter_synchronous_mode(ic); 3905 #endif 3906 } 3907 3908 static void dm_integrity_status(struct dm_target *ti, status_type_t type, 3909 unsigned int status_flags, char *result, unsigned int maxlen) 3910 { 3911 struct dm_integrity_c *ic = ti->private; 3912 unsigned int arg_count; 3913 size_t sz = 0; 3914 3915 switch (type) { 3916 case STATUSTYPE_INFO: 3917 DMEMIT("%llu %llu", 3918 (unsigned long long)atomic64_read(&ic->number_of_mismatches), 3919 ic->provided_data_sectors); 3920 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) 3921 DMEMIT(" %llu", le64_to_cpu(ic->sb->recalc_sector)); 3922 else 3923 DMEMIT(" -"); 3924 break; 3925 3926 case STATUSTYPE_TABLE: { 3927 arg_count = 1; /* buffer_sectors */ 3928 arg_count += !!ic->meta_dev; 3929 arg_count += ic->sectors_per_block != 1; 3930 arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)); 3931 arg_count += ic->reset_recalculate_flag; 3932 arg_count += ic->discard; 3933 arg_count += ic->mode != 'I'; /* interleave_sectors */ 3934 arg_count += ic->mode == 'J'; /* journal_sectors */ 3935 arg_count += ic->mode == 'J'; /* journal_watermark */ 3936 arg_count += ic->mode == 'J'; /* commit_time */ 3937 arg_count += ic->mode == 'B'; /* sectors_per_bit */ 3938 arg_count += ic->mode == 'B'; /* bitmap_flush_interval */ 3939 arg_count += !!ic->internal_hash_alg.alg_string; 3940 arg_count += !!ic->journal_crypt_alg.alg_string; 3941 arg_count += !!ic->journal_mac_alg.alg_string; 3942 arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0; 3943 arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0; 3944 arg_count += ic->legacy_recalculate; 3945 DMEMIT("%s %llu %u %c %u", ic->dev->name, ic->start, 3946 ic->tag_size, ic->mode, arg_count); 3947 if (ic->meta_dev) 3948 DMEMIT(" meta_device:%s", ic->meta_dev->name); 3949 if (ic->sectors_per_block != 1) 3950 DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT); 3951 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) 3952 DMEMIT(" recalculate"); 3953 if (ic->reset_recalculate_flag) 3954 DMEMIT(" reset_recalculate"); 3955 if (ic->discard) 3956 DMEMIT(" allow_discards"); 3957 if (ic->mode != 'I') 3958 DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); 3959 DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); 3960 if (ic->mode == 'J') { 3961 __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100; 3962 3963 watermark_percentage += ic->journal_entries / 2; 3964 do_div(watermark_percentage, ic->journal_entries); 3965 DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); 3966 DMEMIT(" journal_watermark:%u", (unsigned int)watermark_percentage); 3967 DMEMIT(" commit_time:%u", ic->autocommit_msec); 3968 } 3969 if (ic->mode == 'B') { 3970 DMEMIT(" sectors_per_bit:%llu", (sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit); 3971 DMEMIT(" bitmap_flush_interval:%u", jiffies_to_msecs(ic->bitmap_flush_interval)); 3972 } 3973 if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) 3974 DMEMIT(" fix_padding"); 3975 if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0) 3976 DMEMIT(" fix_hmac"); 3977 if (ic->legacy_recalculate) 3978 DMEMIT(" legacy_recalculate"); 3979 3980 #define EMIT_ALG(a, n) \ 3981 do { \ 3982 if (ic->a.alg_string) { \ 3983 DMEMIT(" %s:%s", n, ic->a.alg_string); \ 3984 if (ic->a.key_string) \ 3985 DMEMIT(":%s", ic->a.key_string);\ 3986 } \ 3987 } while (0) 3988 EMIT_ALG(internal_hash_alg, "internal_hash"); 3989 EMIT_ALG(journal_crypt_alg, "journal_crypt"); 3990 EMIT_ALG(journal_mac_alg, "journal_mac"); 3991 break; 3992 } 3993 case STATUSTYPE_IMA: 3994 DMEMIT_TARGET_NAME_VERSION(ti->type); 3995 DMEMIT(",dev_name=%s,start=%llu,tag_size=%u,mode=%c", 3996 ic->dev->name, ic->start, ic->tag_size, ic->mode); 3997 3998 if (ic->meta_dev) 3999 DMEMIT(",meta_device=%s", ic->meta_dev->name); 4000 if (ic->sectors_per_block != 1) 4001 DMEMIT(",block_size=%u", ic->sectors_per_block << SECTOR_SHIFT); 4002 4003 DMEMIT(",recalculate=%c", (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) ? 4004 'y' : 'n'); 4005 DMEMIT(",allow_discards=%c", ic->discard ? 'y' : 'n'); 4006 DMEMIT(",fix_padding=%c", 4007 ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) ? 'y' : 'n'); 4008 DMEMIT(",fix_hmac=%c", 4009 ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0) ? 'y' : 'n'); 4010 DMEMIT(",legacy_recalculate=%c", ic->legacy_recalculate ? 'y' : 'n'); 4011 4012 DMEMIT(",journal_sectors=%u", ic->initial_sectors - SB_SECTORS); 4013 DMEMIT(",interleave_sectors=%u", 1U << ic->sb->log2_interleave_sectors); 4014 DMEMIT(",buffer_sectors=%u", 1U << ic->log2_buffer_sectors); 4015 DMEMIT(";"); 4016 break; 4017 } 4018 } 4019 4020 static int dm_integrity_iterate_devices(struct dm_target *ti, 4021 iterate_devices_callout_fn fn, void *data) 4022 { 4023 struct dm_integrity_c *ic = ti->private; 4024 4025 if (!ic->meta_dev) 4026 return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data); 4027 else 4028 return fn(ti, ic->dev, 0, ti->len, data); 4029 } 4030 4031 static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits) 4032 { 4033 struct dm_integrity_c *ic = ti->private; 4034 4035 if (ic->sectors_per_block > 1) { 4036 limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT; 4037 limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT; 4038 limits->io_min = ic->sectors_per_block << SECTOR_SHIFT; 4039 limits->dma_alignment = limits->logical_block_size - 1; 4040 limits->discard_granularity = ic->sectors_per_block << SECTOR_SHIFT; 4041 } 4042 4043 if (!ic->internal_hash) { 4044 struct blk_integrity *bi = &limits->integrity; 4045 4046 memset(bi, 0, sizeof(*bi)); 4047 bi->metadata_size = ic->tag_size; 4048 bi->tag_size = bi->metadata_size; 4049 bi->interval_exp = 4050 ic->sb->log2_sectors_per_block + SECTOR_SHIFT; 4051 } 4052 4053 limits->max_integrity_segments = USHRT_MAX; 4054 } 4055 4056 static void calculate_journal_section_size(struct dm_integrity_c *ic) 4057 { 4058 unsigned int sector_space = JOURNAL_SECTOR_DATA; 4059 4060 ic->journal_sections = le32_to_cpu(ic->sb->journal_sections); 4061 ic->journal_entry_size = roundup(offsetof(struct journal_entry, last_bytes[ic->sectors_per_block]) + ic->tag_size, 4062 JOURNAL_ENTRY_ROUNDUP); 4063 4064 if (ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) 4065 sector_space -= JOURNAL_MAC_PER_SECTOR; 4066 ic->journal_entries_per_sector = sector_space / ic->journal_entry_size; 4067 ic->journal_section_entries = ic->journal_entries_per_sector * JOURNAL_BLOCK_SECTORS; 4068 ic->journal_section_sectors = (ic->journal_section_entries << ic->sb->log2_sectors_per_block) + JOURNAL_BLOCK_SECTORS; 4069 ic->journal_entries = ic->journal_section_entries * ic->journal_sections; 4070 } 4071 4072 static int calculate_device_limits(struct dm_integrity_c *ic) 4073 { 4074 __u64 initial_sectors; 4075 4076 calculate_journal_section_size(ic); 4077 initial_sectors = SB_SECTORS + (__u64)ic->journal_section_sectors * ic->journal_sections; 4078 if (initial_sectors + METADATA_PADDING_SECTORS >= ic->meta_device_sectors || initial_sectors > UINT_MAX) 4079 return -EINVAL; 4080 ic->initial_sectors = initial_sectors; 4081 4082 if (ic->mode == 'I') { 4083 if (ic->initial_sectors + ic->provided_data_sectors > ic->meta_device_sectors) 4084 return -EINVAL; 4085 } else if (!ic->meta_dev) { 4086 sector_t last_sector, last_area, last_offset; 4087 4088 /* we have to maintain excessive padding for compatibility with existing volumes */ 4089 __u64 metadata_run_padding = 4090 ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING) ? 4091 (__u64)(METADATA_PADDING_SECTORS << SECTOR_SHIFT) : 4092 (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS); 4093 4094 ic->metadata_run = round_up((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block), 4095 metadata_run_padding) >> SECTOR_SHIFT; 4096 if (!(ic->metadata_run & (ic->metadata_run - 1))) 4097 ic->log2_metadata_run = __ffs(ic->metadata_run); 4098 else 4099 ic->log2_metadata_run = -1; 4100 4101 get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset); 4102 last_sector = get_data_sector(ic, last_area, last_offset); 4103 if (last_sector < ic->start || last_sector >= ic->meta_device_sectors) 4104 return -EINVAL; 4105 } else { 4106 __u64 meta_size = (ic->provided_data_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size; 4107 4108 meta_size = (meta_size + ((1U << (ic->log2_buffer_sectors + SECTOR_SHIFT)) - 1)) 4109 >> (ic->log2_buffer_sectors + SECTOR_SHIFT); 4110 meta_size <<= ic->log2_buffer_sectors; 4111 if (ic->initial_sectors + meta_size < ic->initial_sectors || 4112 ic->initial_sectors + meta_size > ic->meta_device_sectors) 4113 return -EINVAL; 4114 ic->metadata_run = 1; 4115 ic->log2_metadata_run = 0; 4116 } 4117 4118 return 0; 4119 } 4120 4121 static void get_provided_data_sectors(struct dm_integrity_c *ic) 4122 { 4123 if (!ic->meta_dev) { 4124 int test_bit; 4125 4126 ic->provided_data_sectors = 0; 4127 for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) { 4128 __u64 prev_data_sectors = ic->provided_data_sectors; 4129 4130 ic->provided_data_sectors |= (sector_t)1 << test_bit; 4131 if (calculate_device_limits(ic)) 4132 ic->provided_data_sectors = prev_data_sectors; 4133 } 4134 } else { 4135 ic->provided_data_sectors = ic->data_device_sectors; 4136 ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1); 4137 } 4138 } 4139 4140 static int initialize_superblock(struct dm_integrity_c *ic, 4141 unsigned int journal_sectors, unsigned int interleave_sectors) 4142 { 4143 unsigned int journal_sections; 4144 int test_bit; 4145 4146 memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT); 4147 memcpy(ic->sb->magic, SB_MAGIC, 8); 4148 if (ic->mode == 'I') 4149 ic->sb->flags |= cpu_to_le32(SB_FLAG_INLINE); 4150 ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size); 4151 ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block); 4152 if (ic->journal_mac_alg.alg_string) 4153 ic->sb->flags |= cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC); 4154 4155 calculate_journal_section_size(ic); 4156 journal_sections = journal_sectors / ic->journal_section_sectors; 4157 if (!journal_sections) 4158 journal_sections = 1; 4159 if (ic->mode == 'I') 4160 journal_sections = 0; 4161 4162 if (ic->fix_hmac && (ic->internal_hash_alg.alg_string || ic->journal_mac_alg.alg_string)) { 4163 ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_HMAC); 4164 get_random_bytes(ic->sb->salt, SALT_SIZE); 4165 } 4166 4167 if (!ic->meta_dev) { 4168 if (ic->fix_padding) 4169 ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_PADDING); 4170 ic->sb->journal_sections = cpu_to_le32(journal_sections); 4171 if (!interleave_sectors) 4172 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; 4173 ic->sb->log2_interleave_sectors = __fls(interleave_sectors); 4174 ic->sb->log2_interleave_sectors = max_t(__u8, MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); 4175 ic->sb->log2_interleave_sectors = min_t(__u8, MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); 4176 4177 get_provided_data_sectors(ic); 4178 if (!ic->provided_data_sectors) 4179 return -EINVAL; 4180 } else { 4181 ic->sb->log2_interleave_sectors = 0; 4182 4183 get_provided_data_sectors(ic); 4184 if (!ic->provided_data_sectors) 4185 return -EINVAL; 4186 4187 try_smaller_buffer: 4188 ic->sb->journal_sections = cpu_to_le32(0); 4189 for (test_bit = fls(journal_sections) - 1; test_bit >= 0; test_bit--) { 4190 __u32 prev_journal_sections = le32_to_cpu(ic->sb->journal_sections); 4191 __u32 test_journal_sections = prev_journal_sections | (1U << test_bit); 4192 4193 if (test_journal_sections > journal_sections) 4194 continue; 4195 ic->sb->journal_sections = cpu_to_le32(test_journal_sections); 4196 if (calculate_device_limits(ic)) 4197 ic->sb->journal_sections = cpu_to_le32(prev_journal_sections); 4198 4199 } 4200 if (!le32_to_cpu(ic->sb->journal_sections)) { 4201 if (ic->log2_buffer_sectors > 3) { 4202 ic->log2_buffer_sectors--; 4203 goto try_smaller_buffer; 4204 } 4205 return -EINVAL; 4206 } 4207 } 4208 4209 ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors); 4210 4211 sb_set_version(ic); 4212 4213 return 0; 4214 } 4215 4216 static void dm_integrity_free_page_list(struct page_list *pl) 4217 { 4218 unsigned int i; 4219 4220 if (!pl) 4221 return; 4222 for (i = 0; pl[i].page; i++) 4223 __free_page(pl[i].page); 4224 kvfree(pl); 4225 } 4226 4227 static struct page_list *dm_integrity_alloc_page_list(unsigned int n_pages) 4228 { 4229 struct page_list *pl; 4230 unsigned int i; 4231 4232 pl = kvmalloc_array(n_pages + 1, sizeof(struct page_list), GFP_KERNEL | __GFP_ZERO); 4233 if (!pl) 4234 return NULL; 4235 4236 for (i = 0; i < n_pages; i++) { 4237 pl[i].page = alloc_page(GFP_KERNEL); 4238 if (!pl[i].page) { 4239 dm_integrity_free_page_list(pl); 4240 return NULL; 4241 } 4242 if (i) 4243 pl[i - 1].next = &pl[i]; 4244 } 4245 pl[i].page = NULL; 4246 pl[i].next = NULL; 4247 4248 return pl; 4249 } 4250 4251 static void dm_integrity_free_journal_scatterlist(struct dm_integrity_c *ic, struct scatterlist **sl) 4252 { 4253 unsigned int i; 4254 4255 for (i = 0; i < ic->journal_sections; i++) 4256 kvfree(sl[i]); 4257 kvfree(sl); 4258 } 4259 4260 static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic, 4261 struct page_list *pl) 4262 { 4263 struct scatterlist **sl; 4264 unsigned int i; 4265 4266 sl = kvmalloc_array(ic->journal_sections, 4267 sizeof(struct scatterlist *), 4268 GFP_KERNEL | __GFP_ZERO); 4269 if (!sl) 4270 return NULL; 4271 4272 for (i = 0; i < ic->journal_sections; i++) { 4273 struct scatterlist *s; 4274 unsigned int start_index, start_offset; 4275 unsigned int end_index, end_offset; 4276 unsigned int n_pages; 4277 unsigned int idx; 4278 4279 page_list_location(ic, i, 0, &start_index, &start_offset); 4280 page_list_location(ic, i, ic->journal_section_sectors - 1, 4281 &end_index, &end_offset); 4282 4283 n_pages = (end_index - start_index + 1); 4284 4285 s = kvmalloc_array(n_pages, sizeof(struct scatterlist), 4286 GFP_KERNEL); 4287 if (!s) { 4288 dm_integrity_free_journal_scatterlist(ic, sl); 4289 return NULL; 4290 } 4291 4292 sg_init_table(s, n_pages); 4293 for (idx = start_index; idx <= end_index; idx++) { 4294 char *va = lowmem_page_address(pl[idx].page); 4295 unsigned int start = 0, end = PAGE_SIZE; 4296 4297 if (idx == start_index) 4298 start = start_offset; 4299 if (idx == end_index) 4300 end = end_offset + (1 << SECTOR_SHIFT); 4301 sg_set_buf(&s[idx - start_index], va + start, end - start); 4302 } 4303 4304 sl[i] = s; 4305 } 4306 4307 return sl; 4308 } 4309 4310 static void free_alg(struct alg_spec *a) 4311 { 4312 kfree_sensitive(a->alg_string); 4313 kfree_sensitive(a->key); 4314 memset(a, 0, sizeof(*a)); 4315 } 4316 4317 static int get_alg_and_key(const char *arg, struct alg_spec *a, char **error, char *error_inval) 4318 { 4319 char *k; 4320 4321 free_alg(a); 4322 4323 a->alg_string = kstrdup(strchr(arg, ':') + 1, GFP_KERNEL); 4324 if (!a->alg_string) 4325 goto nomem; 4326 4327 k = strchr(a->alg_string, ':'); 4328 if (k) { 4329 *k = 0; 4330 a->key_string = k + 1; 4331 if (strlen(a->key_string) & 1) 4332 goto inval; 4333 4334 a->key_size = strlen(a->key_string) / 2; 4335 a->key = kmalloc(a->key_size, GFP_KERNEL); 4336 if (!a->key) 4337 goto nomem; 4338 if (hex2bin(a->key, a->key_string, a->key_size)) 4339 goto inval; 4340 } 4341 4342 return 0; 4343 inval: 4344 *error = error_inval; 4345 return -EINVAL; 4346 nomem: 4347 *error = "Out of memory for an argument"; 4348 return -ENOMEM; 4349 } 4350 4351 static int get_mac(struct crypto_shash **shash, struct crypto_ahash **ahash, 4352 struct alg_spec *a, char **error, char *error_alg, char *error_key) 4353 { 4354 int r; 4355 4356 if (a->alg_string) { 4357 if (shash) { 4358 *shash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY); 4359 if (IS_ERR(*shash)) { 4360 *shash = NULL; 4361 goto try_ahash; 4362 } 4363 if (a->key) { 4364 r = crypto_shash_setkey(*shash, a->key, a->key_size); 4365 if (r) { 4366 *error = error_key; 4367 return r; 4368 } 4369 } else if (crypto_shash_get_flags(*shash) & CRYPTO_TFM_NEED_KEY) { 4370 *error = error_key; 4371 return -ENOKEY; 4372 } 4373 return 0; 4374 } 4375 try_ahash: 4376 if (ahash) { 4377 *ahash = crypto_alloc_ahash(a->alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY); 4378 if (IS_ERR(*ahash)) { 4379 *error = error_alg; 4380 r = PTR_ERR(*ahash); 4381 *ahash = NULL; 4382 return r; 4383 } 4384 if (a->key) { 4385 r = crypto_ahash_setkey(*ahash, a->key, a->key_size); 4386 if (r) { 4387 *error = error_key; 4388 return r; 4389 } 4390 } else if (crypto_ahash_get_flags(*ahash) & CRYPTO_TFM_NEED_KEY) { 4391 *error = error_key; 4392 return -ENOKEY; 4393 } 4394 return 0; 4395 } 4396 *error = error_alg; 4397 return -ENOENT; 4398 } 4399 4400 return 0; 4401 } 4402 4403 static int create_journal(struct dm_integrity_c *ic, char **error) 4404 { 4405 int r = 0; 4406 unsigned int i; 4407 __u64 journal_pages, journal_desc_size, journal_tree_size; 4408 unsigned char *crypt_data = NULL, *crypt_iv = NULL; 4409 struct skcipher_request *req = NULL; 4410 4411 ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL); 4412 ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL); 4413 ic->commit_ids[2] = cpu_to_le64(0x3333333333333333ULL); 4414 ic->commit_ids[3] = cpu_to_le64(0x4444444444444444ULL); 4415 4416 journal_pages = roundup((__u64)ic->journal_sections * ic->journal_section_sectors, 4417 PAGE_SIZE >> SECTOR_SHIFT) >> (PAGE_SHIFT - SECTOR_SHIFT); 4418 journal_desc_size = journal_pages * sizeof(struct page_list); 4419 if (journal_pages >= totalram_pages() - totalhigh_pages() || journal_desc_size > ULONG_MAX) { 4420 *error = "Journal doesn't fit into memory"; 4421 r = -ENOMEM; 4422 goto bad; 4423 } 4424 ic->journal_pages = journal_pages; 4425 4426 ic->journal = dm_integrity_alloc_page_list(ic->journal_pages); 4427 if (!ic->journal) { 4428 *error = "Could not allocate memory for journal"; 4429 r = -ENOMEM; 4430 goto bad; 4431 } 4432 if (ic->journal_crypt_alg.alg_string) { 4433 unsigned int ivsize, blocksize; 4434 struct journal_completion comp; 4435 4436 comp.ic = ic; 4437 ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY); 4438 if (IS_ERR(ic->journal_crypt)) { 4439 *error = "Invalid journal cipher"; 4440 r = PTR_ERR(ic->journal_crypt); 4441 ic->journal_crypt = NULL; 4442 goto bad; 4443 } 4444 ivsize = crypto_skcipher_ivsize(ic->journal_crypt); 4445 blocksize = crypto_skcipher_blocksize(ic->journal_crypt); 4446 4447 if (ic->journal_crypt_alg.key) { 4448 r = crypto_skcipher_setkey(ic->journal_crypt, ic->journal_crypt_alg.key, 4449 ic->journal_crypt_alg.key_size); 4450 if (r) { 4451 *error = "Error setting encryption key"; 4452 goto bad; 4453 } 4454 } 4455 DEBUG_print("cipher %s, block size %u iv size %u\n", 4456 ic->journal_crypt_alg.alg_string, blocksize, ivsize); 4457 4458 ic->journal_io = dm_integrity_alloc_page_list(ic->journal_pages); 4459 if (!ic->journal_io) { 4460 *error = "Could not allocate memory for journal io"; 4461 r = -ENOMEM; 4462 goto bad; 4463 } 4464 4465 if (blocksize == 1) { 4466 struct scatterlist *sg; 4467 4468 req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); 4469 if (!req) { 4470 *error = "Could not allocate crypt request"; 4471 r = -ENOMEM; 4472 goto bad; 4473 } 4474 4475 crypt_iv = kzalloc(ivsize, GFP_KERNEL); 4476 if (!crypt_iv) { 4477 *error = "Could not allocate iv"; 4478 r = -ENOMEM; 4479 goto bad; 4480 } 4481 4482 ic->journal_xor = dm_integrity_alloc_page_list(ic->journal_pages); 4483 if (!ic->journal_xor) { 4484 *error = "Could not allocate memory for journal xor"; 4485 r = -ENOMEM; 4486 goto bad; 4487 } 4488 4489 sg = kvmalloc_array(ic->journal_pages + 1, 4490 sizeof(struct scatterlist), 4491 GFP_KERNEL); 4492 if (!sg) { 4493 *error = "Unable to allocate sg list"; 4494 r = -ENOMEM; 4495 goto bad; 4496 } 4497 sg_init_table(sg, ic->journal_pages + 1); 4498 for (i = 0; i < ic->journal_pages; i++) { 4499 char *va = lowmem_page_address(ic->journal_xor[i].page); 4500 4501 clear_page(va); 4502 sg_set_buf(&sg[i], va, PAGE_SIZE); 4503 } 4504 sg_set_buf(&sg[i], &ic->commit_ids, sizeof(ic->commit_ids)); 4505 4506 skcipher_request_set_crypt(req, sg, sg, 4507 PAGE_SIZE * ic->journal_pages + sizeof(ic->commit_ids), crypt_iv); 4508 init_completion(&comp.comp); 4509 comp.in_flight = (atomic_t)ATOMIC_INIT(1); 4510 if (do_crypt(true, req, &comp)) 4511 wait_for_completion(&comp.comp); 4512 kvfree(sg); 4513 r = dm_integrity_failed(ic); 4514 if (r) { 4515 *error = "Unable to encrypt journal"; 4516 goto bad; 4517 } 4518 DEBUG_bytes(lowmem_page_address(ic->journal_xor[0].page), 64, "xor data"); 4519 4520 crypto_free_skcipher(ic->journal_crypt); 4521 ic->journal_crypt = NULL; 4522 } else { 4523 unsigned int crypt_len = roundup(ivsize, blocksize); 4524 4525 req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); 4526 if (!req) { 4527 *error = "Could not allocate crypt request"; 4528 r = -ENOMEM; 4529 goto bad; 4530 } 4531 4532 crypt_iv = kmalloc(ivsize, GFP_KERNEL); 4533 if (!crypt_iv) { 4534 *error = "Could not allocate iv"; 4535 r = -ENOMEM; 4536 goto bad; 4537 } 4538 4539 crypt_data = kmalloc(crypt_len, GFP_KERNEL); 4540 if (!crypt_data) { 4541 *error = "Unable to allocate crypt data"; 4542 r = -ENOMEM; 4543 goto bad; 4544 } 4545 4546 ic->journal_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal); 4547 if (!ic->journal_scatterlist) { 4548 *error = "Unable to allocate sg list"; 4549 r = -ENOMEM; 4550 goto bad; 4551 } 4552 ic->journal_io_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal_io); 4553 if (!ic->journal_io_scatterlist) { 4554 *error = "Unable to allocate sg list"; 4555 r = -ENOMEM; 4556 goto bad; 4557 } 4558 ic->sk_requests = kvmalloc_array(ic->journal_sections, 4559 sizeof(struct skcipher_request *), 4560 GFP_KERNEL | __GFP_ZERO); 4561 if (!ic->sk_requests) { 4562 *error = "Unable to allocate sk requests"; 4563 r = -ENOMEM; 4564 goto bad; 4565 } 4566 for (i = 0; i < ic->journal_sections; i++) { 4567 struct scatterlist sg; 4568 struct skcipher_request *section_req; 4569 __le32 section_le = cpu_to_le32(i); 4570 4571 memset(crypt_iv, 0x00, ivsize); 4572 memset(crypt_data, 0x00, crypt_len); 4573 memcpy(crypt_data, §ion_le, min_t(size_t, crypt_len, sizeof(section_le))); 4574 4575 sg_init_one(&sg, crypt_data, crypt_len); 4576 skcipher_request_set_crypt(req, &sg, &sg, crypt_len, crypt_iv); 4577 init_completion(&comp.comp); 4578 comp.in_flight = (atomic_t)ATOMIC_INIT(1); 4579 if (do_crypt(true, req, &comp)) 4580 wait_for_completion(&comp.comp); 4581 4582 r = dm_integrity_failed(ic); 4583 if (r) { 4584 *error = "Unable to generate iv"; 4585 goto bad; 4586 } 4587 4588 section_req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); 4589 if (!section_req) { 4590 *error = "Unable to allocate crypt request"; 4591 r = -ENOMEM; 4592 goto bad; 4593 } 4594 section_req->iv = kmalloc_array(ivsize, 2, 4595 GFP_KERNEL); 4596 if (!section_req->iv) { 4597 skcipher_request_free(section_req); 4598 *error = "Unable to allocate iv"; 4599 r = -ENOMEM; 4600 goto bad; 4601 } 4602 memcpy(section_req->iv + ivsize, crypt_data, ivsize); 4603 section_req->cryptlen = (size_t)ic->journal_section_sectors << SECTOR_SHIFT; 4604 ic->sk_requests[i] = section_req; 4605 DEBUG_bytes(crypt_data, ivsize, "iv(%u)", i); 4606 } 4607 } 4608 } 4609 4610 for (i = 0; i < N_COMMIT_IDS; i++) { 4611 unsigned int j; 4612 4613 retest_commit_id: 4614 for (j = 0; j < i; j++) { 4615 if (ic->commit_ids[j] == ic->commit_ids[i]) { 4616 ic->commit_ids[i] = cpu_to_le64(le64_to_cpu(ic->commit_ids[i]) + 1); 4617 goto retest_commit_id; 4618 } 4619 } 4620 DEBUG_print("commit id %u: %016llx\n", i, ic->commit_ids[i]); 4621 } 4622 4623 journal_tree_size = (__u64)ic->journal_entries * sizeof(struct journal_node); 4624 if (journal_tree_size > ULONG_MAX) { 4625 *error = "Journal doesn't fit into memory"; 4626 r = -ENOMEM; 4627 goto bad; 4628 } 4629 ic->journal_tree = kvmalloc(journal_tree_size, GFP_KERNEL); 4630 if (!ic->journal_tree) { 4631 *error = "Could not allocate memory for journal tree"; 4632 r = -ENOMEM; 4633 } 4634 bad: 4635 kfree(crypt_data); 4636 kfree(crypt_iv); 4637 skcipher_request_free(req); 4638 4639 return r; 4640 } 4641 4642 /* 4643 * Construct a integrity mapping 4644 * 4645 * Arguments: 4646 * device 4647 * offset from the start of the device 4648 * tag size 4649 * D - direct writes, J - journal writes, B - bitmap mode, R - recovery mode 4650 * number of optional arguments 4651 * optional arguments: 4652 * journal_sectors 4653 * interleave_sectors 4654 * buffer_sectors 4655 * journal_watermark 4656 * commit_time 4657 * meta_device 4658 * block_size 4659 * sectors_per_bit 4660 * bitmap_flush_interval 4661 * internal_hash 4662 * journal_crypt 4663 * journal_mac 4664 * recalculate 4665 */ 4666 static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv) 4667 { 4668 struct dm_integrity_c *ic; 4669 char dummy; 4670 int r; 4671 unsigned int extra_args; 4672 struct dm_arg_set as; 4673 static const struct dm_arg _args[] = { 4674 {0, 18, "Invalid number of feature args"}, 4675 }; 4676 unsigned int journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; 4677 bool should_write_sb; 4678 __u64 threshold; 4679 unsigned long long start; 4680 __s8 log2_sectors_per_bitmap_bit = -1; 4681 __s8 log2_blocks_per_bitmap_bit; 4682 __u64 bits_in_journal; 4683 __u64 n_bitmap_bits; 4684 4685 #define DIRECT_ARGUMENTS 4 4686 4687 if (argc <= DIRECT_ARGUMENTS) { 4688 ti->error = "Invalid argument count"; 4689 return -EINVAL; 4690 } 4691 4692 ic = kzalloc(sizeof(struct dm_integrity_c), GFP_KERNEL); 4693 if (!ic) { 4694 ti->error = "Cannot allocate integrity context"; 4695 return -ENOMEM; 4696 } 4697 ti->private = ic; 4698 ti->per_io_data_size = sizeof(struct dm_integrity_io); 4699 ic->ti = ti; 4700 4701 ic->in_progress = RB_ROOT; 4702 INIT_LIST_HEAD(&ic->wait_list); 4703 init_waitqueue_head(&ic->endio_wait); 4704 bio_list_init(&ic->flush_bio_list); 4705 init_waitqueue_head(&ic->copy_to_journal_wait); 4706 init_completion(&ic->crypto_backoff); 4707 atomic64_set(&ic->number_of_mismatches, 0); 4708 ic->bitmap_flush_interval = BITMAP_FLUSH_INTERVAL; 4709 4710 r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev); 4711 if (r) { 4712 ti->error = "Device lookup failed"; 4713 goto bad; 4714 } 4715 4716 if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1 || start != (sector_t)start) { 4717 ti->error = "Invalid starting offset"; 4718 r = -EINVAL; 4719 goto bad; 4720 } 4721 ic->start = start; 4722 4723 if (strcmp(argv[2], "-")) { 4724 if (sscanf(argv[2], "%u%c", &ic->tag_size, &dummy) != 1 || !ic->tag_size) { 4725 ti->error = "Invalid tag size"; 4726 r = -EINVAL; 4727 goto bad; 4728 } 4729 } 4730 4731 if (!strcmp(argv[3], "J") || !strcmp(argv[3], "B") || 4732 !strcmp(argv[3], "D") || !strcmp(argv[3], "R") || 4733 !strcmp(argv[3], "I")) { 4734 ic->mode = argv[3][0]; 4735 } else { 4736 ti->error = "Invalid mode (expecting J, B, D, R, I)"; 4737 r = -EINVAL; 4738 goto bad; 4739 } 4740 4741 journal_sectors = 0; 4742 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; 4743 buffer_sectors = DEFAULT_BUFFER_SECTORS; 4744 journal_watermark = DEFAULT_JOURNAL_WATERMARK; 4745 sync_msec = DEFAULT_SYNC_MSEC; 4746 ic->sectors_per_block = 1; 4747 4748 as.argc = argc - DIRECT_ARGUMENTS; 4749 as.argv = argv + DIRECT_ARGUMENTS; 4750 r = dm_read_arg_group(_args, &as, &extra_args, &ti->error); 4751 if (r) 4752 goto bad; 4753 4754 while (extra_args--) { 4755 const char *opt_string; 4756 unsigned int val; 4757 unsigned long long llval; 4758 4759 opt_string = dm_shift_arg(&as); 4760 if (!opt_string) { 4761 r = -EINVAL; 4762 ti->error = "Not enough feature arguments"; 4763 goto bad; 4764 } 4765 if (sscanf(opt_string, "journal_sectors:%u%c", &val, &dummy) == 1) 4766 journal_sectors = val ? val : 1; 4767 else if (sscanf(opt_string, "interleave_sectors:%u%c", &val, &dummy) == 1) 4768 interleave_sectors = val; 4769 else if (sscanf(opt_string, "buffer_sectors:%u%c", &val, &dummy) == 1) 4770 buffer_sectors = val; 4771 else if (sscanf(opt_string, "journal_watermark:%u%c", &val, &dummy) == 1 && val <= 100) 4772 journal_watermark = val; 4773 else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1) 4774 sync_msec = val; 4775 else if (!strncmp(opt_string, "meta_device:", strlen("meta_device:"))) { 4776 if (ic->meta_dev) { 4777 dm_put_device(ti, ic->meta_dev); 4778 ic->meta_dev = NULL; 4779 } 4780 r = dm_get_device(ti, strchr(opt_string, ':') + 1, 4781 dm_table_get_mode(ti->table), &ic->meta_dev); 4782 if (r) { 4783 ti->error = "Device lookup failed"; 4784 goto bad; 4785 } 4786 } else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) { 4787 if (val < 1 << SECTOR_SHIFT || 4788 val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT || 4789 (val & (val - 1))) { 4790 r = -EINVAL; 4791 ti->error = "Invalid block_size argument"; 4792 goto bad; 4793 } 4794 ic->sectors_per_block = val >> SECTOR_SHIFT; 4795 } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) { 4796 log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval); 4797 } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) { 4798 if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) { 4799 r = -EINVAL; 4800 ti->error = "Invalid bitmap_flush_interval argument"; 4801 goto bad; 4802 } 4803 ic->bitmap_flush_interval = msecs_to_jiffies(val); 4804 } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { 4805 r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error, 4806 "Invalid internal_hash argument"); 4807 if (r) 4808 goto bad; 4809 } else if (!strncmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) { 4810 r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error, 4811 "Invalid journal_crypt argument"); 4812 if (r) 4813 goto bad; 4814 } else if (!strncmp(opt_string, "journal_mac:", strlen("journal_mac:"))) { 4815 r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error, 4816 "Invalid journal_mac argument"); 4817 if (r) 4818 goto bad; 4819 } else if (!strcmp(opt_string, "recalculate")) { 4820 ic->recalculate_flag = true; 4821 } else if (!strcmp(opt_string, "reset_recalculate")) { 4822 ic->recalculate_flag = true; 4823 ic->reset_recalculate_flag = true; 4824 } else if (!strcmp(opt_string, "allow_discards")) { 4825 ic->discard = true; 4826 } else if (!strcmp(opt_string, "fix_padding")) { 4827 ic->fix_padding = true; 4828 } else if (!strcmp(opt_string, "fix_hmac")) { 4829 ic->fix_hmac = true; 4830 } else if (!strcmp(opt_string, "legacy_recalculate")) { 4831 ic->legacy_recalculate = true; 4832 } else { 4833 r = -EINVAL; 4834 ti->error = "Invalid argument"; 4835 goto bad; 4836 } 4837 } 4838 4839 ic->data_device_sectors = bdev_nr_sectors(ic->dev->bdev); 4840 if (!ic->meta_dev) 4841 ic->meta_device_sectors = ic->data_device_sectors; 4842 else 4843 ic->meta_device_sectors = bdev_nr_sectors(ic->meta_dev->bdev); 4844 4845 if (!journal_sectors) { 4846 journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS, 4847 ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR); 4848 } 4849 4850 if (!buffer_sectors) 4851 buffer_sectors = 1; 4852 ic->log2_buffer_sectors = min((int)__fls(buffer_sectors), 31 - SECTOR_SHIFT); 4853 4854 r = get_mac(&ic->internal_shash, &ic->internal_ahash, &ic->internal_hash_alg, &ti->error, 4855 "Invalid internal hash", "Error setting internal hash key"); 4856 if (r) 4857 goto bad; 4858 if (ic->internal_shash) { 4859 ic->internal_hash = true; 4860 ic->internal_hash_digestsize = crypto_shash_digestsize(ic->internal_shash); 4861 } 4862 if (ic->internal_ahash) { 4863 ic->internal_hash = true; 4864 ic->internal_hash_digestsize = crypto_ahash_digestsize(ic->internal_ahash); 4865 r = mempool_init_kmalloc_pool(&ic->ahash_req_pool, AHASH_MEMPOOL, 4866 sizeof(struct ahash_request) + crypto_ahash_reqsize(ic->internal_ahash)); 4867 if (r) { 4868 ti->error = "Cannot allocate mempool"; 4869 goto bad; 4870 } 4871 } 4872 4873 r = get_mac(&ic->journal_mac, NULL, &ic->journal_mac_alg, &ti->error, 4874 "Invalid journal mac", "Error setting journal mac key"); 4875 if (r) 4876 goto bad; 4877 4878 if (!ic->tag_size) { 4879 if (!ic->internal_hash) { 4880 ti->error = "Unknown tag size"; 4881 r = -EINVAL; 4882 goto bad; 4883 } 4884 ic->tag_size = ic->internal_hash_digestsize; 4885 } 4886 if (ic->tag_size > MAX_TAG_SIZE) { 4887 ti->error = "Too big tag size"; 4888 r = -EINVAL; 4889 goto bad; 4890 } 4891 if (!(ic->tag_size & (ic->tag_size - 1))) 4892 ic->log2_tag_size = __ffs(ic->tag_size); 4893 else 4894 ic->log2_tag_size = -1; 4895 4896 if (ic->mode == 'I') { 4897 struct blk_integrity *bi; 4898 if (ic->meta_dev) { 4899 r = -EINVAL; 4900 ti->error = "Metadata device not supported in inline mode"; 4901 goto bad; 4902 } 4903 if (!ic->internal_hash_alg.alg_string) { 4904 r = -EINVAL; 4905 ti->error = "Internal hash not set in inline mode"; 4906 goto bad; 4907 } 4908 if (ic->journal_crypt_alg.alg_string || ic->journal_mac_alg.alg_string) { 4909 r = -EINVAL; 4910 ti->error = "Journal crypt not supported in inline mode"; 4911 goto bad; 4912 } 4913 if (ic->discard) { 4914 r = -EINVAL; 4915 ti->error = "Discards not supported in inline mode"; 4916 goto bad; 4917 } 4918 bi = blk_get_integrity(ic->dev->bdev->bd_disk); 4919 if (!bi || bi->csum_type != BLK_INTEGRITY_CSUM_NONE) { 4920 r = -EINVAL; 4921 ti->error = "Integrity profile not supported"; 4922 goto bad; 4923 } 4924 /*printk("tag_size: %u, metadata_size: %u\n", bi->tag_size, bi->metadata_size);*/ 4925 if (bi->metadata_size < ic->tag_size) { 4926 r = -EINVAL; 4927 ti->error = "The integrity profile is smaller than tag size"; 4928 goto bad; 4929 } 4930 if ((unsigned long)bi->metadata_size > PAGE_SIZE / 2) { 4931 r = -EINVAL; 4932 ti->error = "Too big tuple size"; 4933 goto bad; 4934 } 4935 ic->tuple_size = bi->metadata_size; 4936 if (1 << bi->interval_exp != ic->sectors_per_block << SECTOR_SHIFT) { 4937 r = -EINVAL; 4938 ti->error = "Integrity profile sector size mismatch"; 4939 goto bad; 4940 } 4941 } 4942 4943 if (ic->mode == 'B' && !ic->internal_hash) { 4944 r = -EINVAL; 4945 ti->error = "Bitmap mode can be only used with internal hash"; 4946 goto bad; 4947 } 4948 4949 if (ic->discard && !ic->internal_hash) { 4950 r = -EINVAL; 4951 ti->error = "Discard can be only used with internal hash"; 4952 goto bad; 4953 } 4954 4955 ic->autocommit_jiffies = msecs_to_jiffies(sync_msec); 4956 ic->autocommit_msec = sync_msec; 4957 timer_setup(&ic->autocommit_timer, autocommit_fn, 0); 4958 4959 ic->io = dm_io_client_create(); 4960 if (IS_ERR(ic->io)) { 4961 r = PTR_ERR(ic->io); 4962 ic->io = NULL; 4963 ti->error = "Cannot allocate dm io"; 4964 goto bad; 4965 } 4966 4967 r = mempool_init_slab_pool(&ic->journal_io_mempool, JOURNAL_IO_MEMPOOL, journal_io_cache); 4968 if (r) { 4969 ti->error = "Cannot allocate mempool"; 4970 goto bad; 4971 } 4972 4973 r = mempool_init_page_pool(&ic->recheck_pool, 1, ic->mode == 'I' ? 1 : 0); 4974 if (r) { 4975 ti->error = "Cannot allocate mempool"; 4976 goto bad; 4977 } 4978 4979 if (ic->mode == 'I') { 4980 r = bioset_init(&ic->recheck_bios, RECHECK_POOL_SIZE, 0, BIOSET_NEED_BVECS); 4981 if (r) { 4982 ti->error = "Cannot allocate bio set"; 4983 goto bad; 4984 } 4985 r = bioset_init(&ic->recalc_bios, 1, 0, BIOSET_NEED_BVECS); 4986 if (r) { 4987 ti->error = "Cannot allocate bio set"; 4988 goto bad; 4989 } 4990 } 4991 4992 ic->metadata_wq = alloc_workqueue("dm-integrity-metadata", 4993 WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE); 4994 if (!ic->metadata_wq) { 4995 ti->error = "Cannot allocate workqueue"; 4996 r = -ENOMEM; 4997 goto bad; 4998 } 4999 5000 /* 5001 * If this workqueue weren't ordered, it would cause bio reordering 5002 * and reduced performance. 5003 */ 5004 ic->wait_wq = alloc_ordered_workqueue("dm-integrity-wait", WQ_MEM_RECLAIM); 5005 if (!ic->wait_wq) { 5006 ti->error = "Cannot allocate workqueue"; 5007 r = -ENOMEM; 5008 goto bad; 5009 } 5010 5011 ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM, 5012 METADATA_WORKQUEUE_MAX_ACTIVE); 5013 if (!ic->offload_wq) { 5014 ti->error = "Cannot allocate workqueue"; 5015 r = -ENOMEM; 5016 goto bad; 5017 } 5018 5019 ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1); 5020 if (!ic->commit_wq) { 5021 ti->error = "Cannot allocate workqueue"; 5022 r = -ENOMEM; 5023 goto bad; 5024 } 5025 INIT_WORK(&ic->commit_work, integrity_commit); 5026 5027 if (ic->mode == 'J' || ic->mode == 'B') { 5028 ic->writer_wq = alloc_workqueue("dm-integrity-writer", WQ_MEM_RECLAIM, 1); 5029 if (!ic->writer_wq) { 5030 ti->error = "Cannot allocate workqueue"; 5031 r = -ENOMEM; 5032 goto bad; 5033 } 5034 INIT_WORK(&ic->writer_work, integrity_writer); 5035 } 5036 5037 ic->sb = alloc_pages_exact(SB_SECTORS << SECTOR_SHIFT, GFP_KERNEL); 5038 if (!ic->sb) { 5039 r = -ENOMEM; 5040 ti->error = "Cannot allocate superblock area"; 5041 goto bad; 5042 } 5043 5044 r = sync_rw_sb(ic, REQ_OP_READ); 5045 if (r) { 5046 ti->error = "Error reading superblock"; 5047 goto bad; 5048 } 5049 should_write_sb = false; 5050 if (memcmp(ic->sb->magic, SB_MAGIC, 8)) { 5051 if (ic->mode != 'R') { 5052 if (memchr_inv(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT)) { 5053 r = -EINVAL; 5054 ti->error = "The device is not initialized"; 5055 goto bad; 5056 } 5057 } 5058 5059 r = initialize_superblock(ic, journal_sectors, interleave_sectors); 5060 if (r) { 5061 ti->error = "Could not initialize superblock"; 5062 goto bad; 5063 } 5064 if (ic->mode != 'R') 5065 should_write_sb = true; 5066 } 5067 5068 if (!ic->sb->version || ic->sb->version > SB_VERSION_6) { 5069 r = -EINVAL; 5070 ti->error = "Unknown version"; 5071 goto bad; 5072 } 5073 if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_INLINE)) != (ic->mode == 'I')) { 5074 r = -EINVAL; 5075 ti->error = "Inline flag mismatch"; 5076 goto bad; 5077 } 5078 if (le16_to_cpu(ic->sb->integrity_tag_size) != ic->tag_size) { 5079 r = -EINVAL; 5080 ti->error = "Tag size doesn't match the information in superblock"; 5081 goto bad; 5082 } 5083 if (ic->sb->log2_sectors_per_block != __ffs(ic->sectors_per_block)) { 5084 r = -EINVAL; 5085 ti->error = "Block size doesn't match the information in superblock"; 5086 goto bad; 5087 } 5088 if (ic->mode != 'I') { 5089 if (!le32_to_cpu(ic->sb->journal_sections)) { 5090 r = -EINVAL; 5091 ti->error = "Corrupted superblock, journal_sections is 0"; 5092 goto bad; 5093 } 5094 } else { 5095 if (le32_to_cpu(ic->sb->journal_sections)) { 5096 r = -EINVAL; 5097 ti->error = "Corrupted superblock, journal_sections is not 0"; 5098 goto bad; 5099 } 5100 } 5101 /* make sure that ti->max_io_len doesn't overflow */ 5102 if (!ic->meta_dev) { 5103 if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS || 5104 ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) { 5105 r = -EINVAL; 5106 ti->error = "Invalid interleave_sectors in the superblock"; 5107 goto bad; 5108 } 5109 } else { 5110 if (ic->sb->log2_interleave_sectors) { 5111 r = -EINVAL; 5112 ti->error = "Invalid interleave_sectors in the superblock"; 5113 goto bad; 5114 } 5115 } 5116 if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) != !!ic->journal_mac_alg.alg_string) { 5117 r = -EINVAL; 5118 ti->error = "Journal mac mismatch"; 5119 goto bad; 5120 } 5121 5122 get_provided_data_sectors(ic); 5123 if (!ic->provided_data_sectors) { 5124 r = -EINVAL; 5125 ti->error = "The device is too small"; 5126 goto bad; 5127 } 5128 5129 try_smaller_buffer: 5130 r = calculate_device_limits(ic); 5131 if (r) { 5132 if (ic->meta_dev) { 5133 if (ic->log2_buffer_sectors > 3) { 5134 ic->log2_buffer_sectors--; 5135 goto try_smaller_buffer; 5136 } 5137 } 5138 ti->error = "The device is too small"; 5139 goto bad; 5140 } 5141 5142 if (log2_sectors_per_bitmap_bit < 0) 5143 log2_sectors_per_bitmap_bit = __fls(DEFAULT_SECTORS_PER_BITMAP_BIT); 5144 if (log2_sectors_per_bitmap_bit < ic->sb->log2_sectors_per_block) 5145 log2_sectors_per_bitmap_bit = ic->sb->log2_sectors_per_block; 5146 5147 bits_in_journal = ((__u64)ic->journal_section_sectors * ic->journal_sections) << (SECTOR_SHIFT + 3); 5148 if (bits_in_journal > UINT_MAX) 5149 bits_in_journal = UINT_MAX; 5150 if (bits_in_journal) 5151 while (bits_in_journal < (ic->provided_data_sectors + ((sector_t)1 << log2_sectors_per_bitmap_bit) - 1) >> log2_sectors_per_bitmap_bit) 5152 log2_sectors_per_bitmap_bit++; 5153 5154 log2_blocks_per_bitmap_bit = log2_sectors_per_bitmap_bit - ic->sb->log2_sectors_per_block; 5155 ic->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit; 5156 if (should_write_sb) 5157 ic->sb->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit; 5158 5159 n_bitmap_bits = ((ic->provided_data_sectors >> ic->sb->log2_sectors_per_block) 5160 + (((sector_t)1 << log2_blocks_per_bitmap_bit) - 1)) >> log2_blocks_per_bitmap_bit; 5161 ic->n_bitmap_blocks = DIV_ROUND_UP(n_bitmap_bits, BITMAP_BLOCK_SIZE * 8); 5162 5163 if (!ic->meta_dev) 5164 ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run)); 5165 5166 if (ti->len > ic->provided_data_sectors) { 5167 r = -EINVAL; 5168 ti->error = "Not enough provided sectors for requested mapping size"; 5169 goto bad; 5170 } 5171 5172 threshold = (__u64)ic->journal_entries * (100 - journal_watermark); 5173 threshold += 50; 5174 do_div(threshold, 100); 5175 ic->free_sectors_threshold = threshold; 5176 5177 DEBUG_print("initialized:\n"); 5178 DEBUG_print(" integrity_tag_size %u\n", le16_to_cpu(ic->sb->integrity_tag_size)); 5179 DEBUG_print(" journal_entry_size %u\n", ic->journal_entry_size); 5180 DEBUG_print(" journal_entries_per_sector %u\n", ic->journal_entries_per_sector); 5181 DEBUG_print(" journal_section_entries %u\n", ic->journal_section_entries); 5182 DEBUG_print(" journal_section_sectors %u\n", ic->journal_section_sectors); 5183 DEBUG_print(" journal_sections %u\n", (unsigned int)le32_to_cpu(ic->sb->journal_sections)); 5184 DEBUG_print(" journal_entries %u\n", ic->journal_entries); 5185 DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors); 5186 DEBUG_print(" data_device_sectors 0x%llx\n", bdev_nr_sectors(ic->dev->bdev)); 5187 DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors); 5188 DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run); 5189 DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run); 5190 DEBUG_print(" provided_data_sectors 0x%llx (%llu)\n", ic->provided_data_sectors, ic->provided_data_sectors); 5191 DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors); 5192 DEBUG_print(" bits_in_journal %llu\n", bits_in_journal); 5193 5194 if (ic->recalculate_flag && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) { 5195 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); 5196 ic->sb->recalc_sector = cpu_to_le64(0); 5197 } 5198 5199 if (ic->internal_hash) { 5200 ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); 5201 if (!ic->recalc_wq) { 5202 ti->error = "Cannot allocate workqueue"; 5203 r = -ENOMEM; 5204 goto bad; 5205 } 5206 INIT_WORK(&ic->recalc_work, ic->mode == 'I' ? integrity_recalc_inline : integrity_recalc); 5207 } else { 5208 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { 5209 ti->error = "Recalculate can only be specified with internal_hash"; 5210 r = -EINVAL; 5211 goto bad; 5212 } 5213 } 5214 5215 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && 5216 le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors && 5217 dm_integrity_disable_recalculate(ic)) { 5218 ti->error = "Recalculating with HMAC is disabled for security reasons - if you really need it, use the argument \"legacy_recalculate\""; 5219 r = -EOPNOTSUPP; 5220 goto bad; 5221 } 5222 5223 ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev, 5224 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL, 0); 5225 if (IS_ERR(ic->bufio)) { 5226 r = PTR_ERR(ic->bufio); 5227 ti->error = "Cannot initialize dm-bufio"; 5228 ic->bufio = NULL; 5229 goto bad; 5230 } 5231 dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors); 5232 5233 if (ic->mode != 'R' && ic->mode != 'I') { 5234 r = create_journal(ic, &ti->error); 5235 if (r) 5236 goto bad; 5237 5238 } 5239 5240 if (ic->mode == 'B') { 5241 unsigned int i; 5242 unsigned int n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE); 5243 5244 ic->recalc_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages); 5245 if (!ic->recalc_bitmap) { 5246 ti->error = "Could not allocate memory for bitmap"; 5247 r = -ENOMEM; 5248 goto bad; 5249 } 5250 ic->may_write_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages); 5251 if (!ic->may_write_bitmap) { 5252 ti->error = "Could not allocate memory for bitmap"; 5253 r = -ENOMEM; 5254 goto bad; 5255 } 5256 ic->bbs = kvmalloc_array(ic->n_bitmap_blocks, sizeof(struct bitmap_block_status), GFP_KERNEL); 5257 if (!ic->bbs) { 5258 ti->error = "Could not allocate memory for bitmap"; 5259 r = -ENOMEM; 5260 goto bad; 5261 } 5262 INIT_DELAYED_WORK(&ic->bitmap_flush_work, bitmap_flush_work); 5263 for (i = 0; i < ic->n_bitmap_blocks; i++) { 5264 struct bitmap_block_status *bbs = &ic->bbs[i]; 5265 unsigned int sector, pl_index, pl_offset; 5266 5267 INIT_WORK(&bbs->work, bitmap_block_work); 5268 bbs->ic = ic; 5269 bbs->idx = i; 5270 bio_list_init(&bbs->bio_queue); 5271 spin_lock_init(&bbs->bio_queue_lock); 5272 5273 sector = i * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT); 5274 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); 5275 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); 5276 5277 bbs->bitmap = lowmem_page_address(ic->journal[pl_index].page) + pl_offset; 5278 } 5279 } 5280 5281 if (should_write_sb) { 5282 init_journal(ic, 0, ic->journal_sections, 0); 5283 r = dm_integrity_failed(ic); 5284 if (unlikely(r)) { 5285 ti->error = "Error initializing journal"; 5286 goto bad; 5287 } 5288 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); 5289 if (r) { 5290 ti->error = "Error initializing superblock"; 5291 goto bad; 5292 } 5293 ic->just_formatted = true; 5294 } 5295 5296 if (!ic->meta_dev && ic->mode != 'I') { 5297 r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors); 5298 if (r) 5299 goto bad; 5300 } 5301 if (ic->mode == 'B') { 5302 unsigned int max_io_len; 5303 5304 max_io_len = ((sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit) * (BITMAP_BLOCK_SIZE * 8); 5305 if (!max_io_len) 5306 max_io_len = 1U << 31; 5307 DEBUG_print("max_io_len: old %u, new %u\n", ti->max_io_len, max_io_len); 5308 if (!ti->max_io_len || ti->max_io_len > max_io_len) { 5309 r = dm_set_target_max_io_len(ti, max_io_len); 5310 if (r) 5311 goto bad; 5312 } 5313 } 5314 5315 ti->num_flush_bios = 1; 5316 ti->flush_supported = true; 5317 if (ic->discard) 5318 ti->num_discard_bios = 1; 5319 5320 if (ic->mode == 'I') 5321 ti->mempool_needs_integrity = true; 5322 5323 dm_audit_log_ctr(DM_MSG_PREFIX, ti, 1); 5324 return 0; 5325 5326 bad: 5327 dm_audit_log_ctr(DM_MSG_PREFIX, ti, 0); 5328 dm_integrity_dtr(ti); 5329 return r; 5330 } 5331 5332 static void dm_integrity_dtr(struct dm_target *ti) 5333 { 5334 struct dm_integrity_c *ic = ti->private; 5335 5336 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); 5337 BUG_ON(!list_empty(&ic->wait_list)); 5338 5339 if (ic->mode == 'B' && ic->bitmap_flush_work.work.func) 5340 cancel_delayed_work_sync(&ic->bitmap_flush_work); 5341 if (ic->metadata_wq) 5342 destroy_workqueue(ic->metadata_wq); 5343 if (ic->wait_wq) 5344 destroy_workqueue(ic->wait_wq); 5345 if (ic->offload_wq) 5346 destroy_workqueue(ic->offload_wq); 5347 if (ic->commit_wq) 5348 destroy_workqueue(ic->commit_wq); 5349 if (ic->writer_wq) 5350 destroy_workqueue(ic->writer_wq); 5351 if (ic->recalc_wq) 5352 destroy_workqueue(ic->recalc_wq); 5353 kvfree(ic->bbs); 5354 if (ic->bufio) 5355 dm_bufio_client_destroy(ic->bufio); 5356 mempool_free(ic->journal_ahash_req, &ic->ahash_req_pool); 5357 mempool_exit(&ic->ahash_req_pool); 5358 bioset_exit(&ic->recalc_bios); 5359 bioset_exit(&ic->recheck_bios); 5360 mempool_exit(&ic->recheck_pool); 5361 mempool_exit(&ic->journal_io_mempool); 5362 if (ic->io) 5363 dm_io_client_destroy(ic->io); 5364 if (ic->dev) 5365 dm_put_device(ti, ic->dev); 5366 if (ic->meta_dev) 5367 dm_put_device(ti, ic->meta_dev); 5368 dm_integrity_free_page_list(ic->journal); 5369 dm_integrity_free_page_list(ic->journal_io); 5370 dm_integrity_free_page_list(ic->journal_xor); 5371 dm_integrity_free_page_list(ic->recalc_bitmap); 5372 dm_integrity_free_page_list(ic->may_write_bitmap); 5373 if (ic->journal_scatterlist) 5374 dm_integrity_free_journal_scatterlist(ic, ic->journal_scatterlist); 5375 if (ic->journal_io_scatterlist) 5376 dm_integrity_free_journal_scatterlist(ic, ic->journal_io_scatterlist); 5377 if (ic->sk_requests) { 5378 unsigned int i; 5379 5380 for (i = 0; i < ic->journal_sections; i++) { 5381 struct skcipher_request *req; 5382 5383 req = ic->sk_requests[i]; 5384 if (req) { 5385 kfree_sensitive(req->iv); 5386 skcipher_request_free(req); 5387 } 5388 } 5389 kvfree(ic->sk_requests); 5390 } 5391 kvfree(ic->journal_tree); 5392 if (ic->sb) 5393 free_pages_exact(ic->sb, SB_SECTORS << SECTOR_SHIFT); 5394 5395 if (ic->internal_shash) 5396 crypto_free_shash(ic->internal_shash); 5397 if (ic->internal_ahash) 5398 crypto_free_ahash(ic->internal_ahash); 5399 free_alg(&ic->internal_hash_alg); 5400 5401 if (ic->journal_crypt) 5402 crypto_free_skcipher(ic->journal_crypt); 5403 free_alg(&ic->journal_crypt_alg); 5404 5405 if (ic->journal_mac) 5406 crypto_free_shash(ic->journal_mac); 5407 free_alg(&ic->journal_mac_alg); 5408 5409 kfree(ic); 5410 dm_audit_log_dtr(DM_MSG_PREFIX, ti, 1); 5411 } 5412 5413 static struct target_type integrity_target = { 5414 .name = "integrity", 5415 .version = {1, 14, 0}, 5416 .module = THIS_MODULE, 5417 .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, 5418 .ctr = dm_integrity_ctr, 5419 .dtr = dm_integrity_dtr, 5420 .map = dm_integrity_map, 5421 .end_io = dm_integrity_end_io, 5422 .postsuspend = dm_integrity_postsuspend, 5423 .resume = dm_integrity_resume, 5424 .status = dm_integrity_status, 5425 .iterate_devices = dm_integrity_iterate_devices, 5426 .io_hints = dm_integrity_io_hints, 5427 }; 5428 5429 static int __init dm_integrity_init(void) 5430 { 5431 int r; 5432 5433 journal_io_cache = kmem_cache_create("integrity_journal_io", 5434 sizeof(struct journal_io), 0, 0, NULL); 5435 if (!journal_io_cache) { 5436 DMERR("can't allocate journal io cache"); 5437 return -ENOMEM; 5438 } 5439 5440 r = dm_register_target(&integrity_target); 5441 if (r < 0) { 5442 kmem_cache_destroy(journal_io_cache); 5443 return r; 5444 } 5445 5446 return 0; 5447 } 5448 5449 static void __exit dm_integrity_exit(void) 5450 { 5451 dm_unregister_target(&integrity_target); 5452 kmem_cache_destroy(journal_io_cache); 5453 } 5454 5455 module_init(dm_integrity_init); 5456 module_exit(dm_integrity_exit); 5457 5458 MODULE_AUTHOR("Milan Broz"); 5459 MODULE_AUTHOR("Mikulas Patocka"); 5460 MODULE_DESCRIPTION(DM_NAME " target for integrity tags extension"); 5461 MODULE_LICENSE("GPL"); 5462