1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2016-2017 Red Hat, Inc. All rights reserved.
4 * Copyright (C) 2016-2017 Milan Broz
5 * Copyright (C) 2016-2017 Mikulas Patocka
6 *
7 * This file is released under the GPL.
8 */
9
10 #include "dm-bio-record.h"
11
12 #include <linux/compiler.h>
13 #include <linux/module.h>
14 #include <linux/device-mapper.h>
15 #include <linux/dm-io.h>
16 #include <linux/vmalloc.h>
17 #include <linux/sort.h>
18 #include <linux/rbtree.h>
19 #include <linux/delay.h>
20 #include <linux/random.h>
21 #include <linux/reboot.h>
22 #include <crypto/hash.h>
23 #include <crypto/skcipher.h>
24 #include <crypto/utils.h>
25 #include <linux/async_tx.h>
26 #include <linux/dm-bufio.h>
27
28 #include "dm-audit.h"
29
30 #define DM_MSG_PREFIX "integrity"
31
32 #define DEFAULT_INTERLEAVE_SECTORS 32768
33 #define DEFAULT_JOURNAL_SIZE_FACTOR 7
34 #define DEFAULT_SECTORS_PER_BITMAP_BIT 32768
35 #define DEFAULT_BUFFER_SECTORS 128
36 #define DEFAULT_JOURNAL_WATERMARK 50
37 #define DEFAULT_SYNC_MSEC 10000
38 #define DEFAULT_MAX_JOURNAL_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 131072 : 8192)
39 #define MIN_LOG2_INTERLEAVE_SECTORS 3
40 #define MAX_LOG2_INTERLEAVE_SECTORS 31
41 #define METADATA_WORKQUEUE_MAX_ACTIVE 16
42 #define RECALC_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 32768 : 2048)
43 #define RECALC_WRITE_SUPER 16
44 #define BITMAP_BLOCK_SIZE 4096 /* don't change it */
45 #define BITMAP_FLUSH_INTERVAL (10 * HZ)
46 #define DISCARD_FILLER 0xf6
47 #define SALT_SIZE 16
48 #define RECHECK_POOL_SIZE 256
49
50 /*
51 * Warning - DEBUG_PRINT prints security-sensitive data to the log,
52 * so it should not be enabled in the official kernel
53 */
54 //#define DEBUG_PRINT
55 //#define INTERNAL_VERIFY
56
57 /*
58 * On disk structures
59 */
60
61 #define SB_MAGIC "integrt"
62 #define SB_VERSION_1 1
63 #define SB_VERSION_2 2
64 #define SB_VERSION_3 3
65 #define SB_VERSION_4 4
66 #define SB_VERSION_5 5
67 #define SB_VERSION_6 6
68 #define SB_SECTORS 8
69 #define MAX_SECTORS_PER_BLOCK 8
70
71 struct superblock {
72 __u8 magic[8];
73 __u8 version;
74 __u8 log2_interleave_sectors;
75 __le16 integrity_tag_size;
76 __le32 journal_sections;
77 __le64 provided_data_sectors; /* userspace uses this value */
78 __le32 flags;
79 __u8 log2_sectors_per_block;
80 __u8 log2_blocks_per_bitmap_bit;
81 __u8 pad[2];
82 __le64 recalc_sector;
83 __u8 pad2[8];
84 __u8 salt[SALT_SIZE];
85 };
86
87 #define SB_FLAG_HAVE_JOURNAL_MAC 0x1
88 #define SB_FLAG_RECALCULATING 0x2
89 #define SB_FLAG_DIRTY_BITMAP 0x4
90 #define SB_FLAG_FIXED_PADDING 0x8
91 #define SB_FLAG_FIXED_HMAC 0x10
92 #define SB_FLAG_INLINE 0x20
93
94 #define JOURNAL_ENTRY_ROUNDUP 8
95
96 typedef __le64 commit_id_t;
97 #define JOURNAL_MAC_PER_SECTOR 8
98
99 struct journal_entry {
100 union {
101 struct {
102 __le32 sector_lo;
103 __le32 sector_hi;
104 } s;
105 __le64 sector;
106 } u;
107 commit_id_t last_bytes[];
108 /* __u8 tag[0]; */
109 };
110
111 #define journal_entry_tag(ic, je) ((__u8 *)&(je)->last_bytes[(ic)->sectors_per_block])
112
113 #if BITS_PER_LONG == 64
114 #define journal_entry_set_sector(je, x) do { smp_wmb(); WRITE_ONCE((je)->u.sector, cpu_to_le64(x)); } while (0)
115 #else
116 #define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32((x) >> 32)); } while (0)
117 #endif
118 #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector)
119 #define journal_entry_is_unused(je) ((je)->u.s.sector_hi == cpu_to_le32(-1))
120 #define journal_entry_set_unused(je) ((je)->u.s.sector_hi = cpu_to_le32(-1))
121 #define journal_entry_is_inprogress(je) ((je)->u.s.sector_hi == cpu_to_le32(-2))
122 #define journal_entry_set_inprogress(je) ((je)->u.s.sector_hi = cpu_to_le32(-2))
123
124 #define JOURNAL_BLOCK_SECTORS 8
125 #define JOURNAL_SECTOR_DATA ((1 << SECTOR_SHIFT) - sizeof(commit_id_t))
126 #define JOURNAL_MAC_SIZE (JOURNAL_MAC_PER_SECTOR * JOURNAL_BLOCK_SECTORS)
127
128 struct journal_sector {
129 struct_group(sectors,
130 __u8 entries[JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR];
131 __u8 mac[JOURNAL_MAC_PER_SECTOR];
132 );
133 commit_id_t commit_id;
134 };
135
136 #define MAX_TAG_SIZE 255
137
138 #define METADATA_PADDING_SECTORS 8
139
140 #define N_COMMIT_IDS 4
141
prev_commit_seq(unsigned char seq)142 static unsigned char prev_commit_seq(unsigned char seq)
143 {
144 return (seq + N_COMMIT_IDS - 1) % N_COMMIT_IDS;
145 }
146
next_commit_seq(unsigned char seq)147 static unsigned char next_commit_seq(unsigned char seq)
148 {
149 return (seq + 1) % N_COMMIT_IDS;
150 }
151
152 /*
153 * In-memory structures
154 */
155
156 struct journal_node {
157 struct rb_node node;
158 sector_t sector;
159 };
160
161 struct alg_spec {
162 char *alg_string;
163 char *key_string;
164 __u8 *key;
165 unsigned int key_size;
166 };
167
168 struct dm_integrity_c {
169 struct dm_dev *dev;
170 struct dm_dev *meta_dev;
171 unsigned int tag_size;
172 __s8 log2_tag_size;
173 unsigned int tuple_size;
174 sector_t start;
175 mempool_t journal_io_mempool;
176 struct dm_io_client *io;
177 struct dm_bufio_client *bufio;
178 struct workqueue_struct *metadata_wq;
179 struct superblock *sb;
180 unsigned int journal_pages;
181 unsigned int n_bitmap_blocks;
182
183 struct page_list *journal;
184 struct page_list *journal_io;
185 struct page_list *journal_xor;
186 struct page_list *recalc_bitmap;
187 struct page_list *may_write_bitmap;
188 struct bitmap_block_status *bbs;
189 unsigned int bitmap_flush_interval;
190 int synchronous_mode;
191 struct bio_list synchronous_bios;
192 struct delayed_work bitmap_flush_work;
193
194 struct crypto_skcipher *journal_crypt;
195 struct scatterlist **journal_scatterlist;
196 struct scatterlist **journal_io_scatterlist;
197 struct skcipher_request **sk_requests;
198
199 struct crypto_shash *journal_mac;
200
201 struct journal_node *journal_tree;
202 struct rb_root journal_tree_root;
203
204 sector_t provided_data_sectors;
205
206 unsigned short journal_entry_size;
207 unsigned char journal_entries_per_sector;
208 unsigned char journal_section_entries;
209 unsigned short journal_section_sectors;
210 unsigned int journal_sections;
211 unsigned int journal_entries;
212 sector_t data_device_sectors;
213 sector_t meta_device_sectors;
214 unsigned int initial_sectors;
215 unsigned int metadata_run;
216 __s8 log2_metadata_run;
217 __u8 log2_buffer_sectors;
218 __u8 sectors_per_block;
219 __u8 log2_blocks_per_bitmap_bit;
220
221 unsigned char mode;
222 bool internal_hash;
223
224 int failed;
225
226 struct crypto_shash *internal_shash;
227 struct crypto_ahash *internal_ahash;
228 unsigned int internal_hash_digestsize;
229
230 struct dm_target *ti;
231
232 /* these variables are locked with endio_wait.lock */
233 struct rb_root in_progress;
234 struct list_head wait_list;
235 wait_queue_head_t endio_wait;
236 struct workqueue_struct *wait_wq;
237 struct workqueue_struct *offload_wq;
238
239 unsigned char commit_seq;
240 commit_id_t commit_ids[N_COMMIT_IDS];
241
242 unsigned int committed_section;
243 unsigned int n_committed_sections;
244
245 unsigned int uncommitted_section;
246 unsigned int n_uncommitted_sections;
247
248 unsigned int free_section;
249 unsigned char free_section_entry;
250 unsigned int free_sectors;
251
252 unsigned int free_sectors_threshold;
253
254 struct workqueue_struct *commit_wq;
255 struct work_struct commit_work;
256
257 struct workqueue_struct *writer_wq;
258 struct work_struct writer_work;
259
260 struct workqueue_struct *recalc_wq;
261 struct work_struct recalc_work;
262
263 struct bio_list flush_bio_list;
264
265 unsigned long autocommit_jiffies;
266 struct timer_list autocommit_timer;
267 unsigned int autocommit_msec;
268
269 wait_queue_head_t copy_to_journal_wait;
270
271 struct completion crypto_backoff;
272
273 bool wrote_to_journal;
274 bool journal_uptodate;
275 bool just_formatted;
276 bool recalculate_flag;
277 bool reset_recalculate_flag;
278 bool discard;
279 bool fix_padding;
280 bool fix_hmac;
281 bool legacy_recalculate;
282
283 mempool_t ahash_req_pool;
284 struct ahash_request *journal_ahash_req;
285
286 struct alg_spec internal_hash_alg;
287 struct alg_spec journal_crypt_alg;
288 struct alg_spec journal_mac_alg;
289
290 atomic64_t number_of_mismatches;
291
292 mempool_t recheck_pool;
293 struct bio_set recheck_bios;
294 struct bio_set recalc_bios;
295
296 struct notifier_block reboot_notifier;
297 };
298
299 struct dm_integrity_range {
300 sector_t logical_sector;
301 sector_t n_sectors;
302 bool waiting;
303 union {
304 struct rb_node node;
305 struct {
306 struct task_struct *task;
307 struct list_head wait_entry;
308 };
309 };
310 };
311
312 struct dm_integrity_io {
313 struct work_struct work;
314
315 struct dm_integrity_c *ic;
316 enum req_op op;
317 bool fua;
318
319 struct dm_integrity_range range;
320
321 sector_t metadata_block;
322 unsigned int metadata_offset;
323
324 atomic_t in_flight;
325 blk_status_t bi_status;
326
327 struct completion *completion;
328
329 struct dm_bio_details bio_details;
330
331 char *integrity_payload;
332 unsigned payload_len;
333 bool integrity_payload_from_mempool;
334 bool integrity_range_locked;
335
336 struct ahash_request *ahash_req;
337 };
338
339 struct journal_completion {
340 struct dm_integrity_c *ic;
341 atomic_t in_flight;
342 struct completion comp;
343 };
344
345 struct journal_io {
346 struct dm_integrity_range range;
347 struct journal_completion *comp;
348 };
349
350 struct bitmap_block_status {
351 struct work_struct work;
352 struct dm_integrity_c *ic;
353 unsigned int idx;
354 unsigned long *bitmap;
355 struct bio_list bio_queue;
356 spinlock_t bio_queue_lock;
357
358 };
359
360 static struct kmem_cache *journal_io_cache;
361
362 #define JOURNAL_IO_MEMPOOL 32
363 #define AHASH_MEMPOOL 32
364
365 #ifdef DEBUG_PRINT
366 #define DEBUG_print(x, ...) printk(KERN_DEBUG x, ##__VA_ARGS__)
367 #define DEBUG_bytes(bytes, len, msg, ...) printk(KERN_DEBUG msg "%s%*ph\n", ##__VA_ARGS__, \
368 len ? ": " : "", len, bytes)
369 #else
370 #define DEBUG_print(x, ...) do { } while (0)
371 #define DEBUG_bytes(bytes, len, msg, ...) do { } while (0)
372 #endif
373
374 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map);
375 static int dm_integrity_map_inline(struct dm_integrity_io *dio, bool from_map);
376 static void integrity_bio_wait(struct work_struct *w);
377 static void dm_integrity_dtr(struct dm_target *ti);
378
dm_integrity_io_error(struct dm_integrity_c * ic,const char * msg,int err)379 static void dm_integrity_io_error(struct dm_integrity_c *ic, const char *msg, int err)
380 {
381 if (err == -EILSEQ)
382 atomic64_inc(&ic->number_of_mismatches);
383 if (!cmpxchg(&ic->failed, 0, err))
384 DMERR("Error on %s: %d", msg, err);
385 }
386
dm_integrity_failed(struct dm_integrity_c * ic)387 static int dm_integrity_failed(struct dm_integrity_c *ic)
388 {
389 return READ_ONCE(ic->failed);
390 }
391
dm_integrity_disable_recalculate(struct dm_integrity_c * ic)392 static bool dm_integrity_disable_recalculate(struct dm_integrity_c *ic)
393 {
394 if (ic->legacy_recalculate)
395 return false;
396 if (!(ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) ?
397 ic->internal_hash_alg.key || ic->journal_mac_alg.key :
398 ic->internal_hash_alg.key && !ic->journal_mac_alg.key)
399 return true;
400 return false;
401 }
402
dm_integrity_commit_id(struct dm_integrity_c * ic,unsigned int i,unsigned int j,unsigned char seq)403 static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned int i,
404 unsigned int j, unsigned char seq)
405 {
406 /*
407 * Xor the number with section and sector, so that if a piece of
408 * journal is written at wrong place, it is detected.
409 */
410 return ic->commit_ids[seq] ^ cpu_to_le64(((__u64)i << 32) ^ j);
411 }
412
get_area_and_offset(struct dm_integrity_c * ic,sector_t data_sector,sector_t * area,sector_t * offset)413 static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector,
414 sector_t *area, sector_t *offset)
415 {
416 if (!ic->meta_dev) {
417 __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors;
418 *area = data_sector >> log2_interleave_sectors;
419 *offset = (unsigned int)data_sector & ((1U << log2_interleave_sectors) - 1);
420 } else {
421 *area = 0;
422 *offset = data_sector;
423 }
424 }
425
426 #define sector_to_block(ic, n) \
427 do { \
428 BUG_ON((n) & (unsigned int)((ic)->sectors_per_block - 1)); \
429 (n) >>= (ic)->sb->log2_sectors_per_block; \
430 } while (0)
431
get_metadata_sector_and_offset(struct dm_integrity_c * ic,sector_t area,sector_t offset,unsigned int * metadata_offset)432 static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t area,
433 sector_t offset, unsigned int *metadata_offset)
434 {
435 __u64 ms;
436 unsigned int mo;
437
438 ms = area << ic->sb->log2_interleave_sectors;
439 if (likely(ic->log2_metadata_run >= 0))
440 ms += area << ic->log2_metadata_run;
441 else
442 ms += area * ic->metadata_run;
443 ms >>= ic->log2_buffer_sectors;
444
445 sector_to_block(ic, offset);
446
447 if (likely(ic->log2_tag_size >= 0)) {
448 ms += offset >> (SECTOR_SHIFT + ic->log2_buffer_sectors - ic->log2_tag_size);
449 mo = (offset << ic->log2_tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1);
450 } else {
451 ms += (__u64)offset * ic->tag_size >> (SECTOR_SHIFT + ic->log2_buffer_sectors);
452 mo = (offset * ic->tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1);
453 }
454 *metadata_offset = mo;
455 return ms;
456 }
457
get_data_sector(struct dm_integrity_c * ic,sector_t area,sector_t offset)458 static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector_t offset)
459 {
460 sector_t result;
461
462 if (ic->meta_dev)
463 return offset;
464
465 result = area << ic->sb->log2_interleave_sectors;
466 if (likely(ic->log2_metadata_run >= 0))
467 result += (area + 1) << ic->log2_metadata_run;
468 else
469 result += (area + 1) * ic->metadata_run;
470
471 result += (sector_t)ic->initial_sectors + offset;
472 result += ic->start;
473
474 return result;
475 }
476
wraparound_section(struct dm_integrity_c * ic,unsigned int * sec_ptr)477 static void wraparound_section(struct dm_integrity_c *ic, unsigned int *sec_ptr)
478 {
479 if (unlikely(*sec_ptr >= ic->journal_sections))
480 *sec_ptr -= ic->journal_sections;
481 }
482
sb_set_version(struct dm_integrity_c * ic)483 static void sb_set_version(struct dm_integrity_c *ic)
484 {
485 if (ic->sb->flags & cpu_to_le32(SB_FLAG_INLINE))
486 ic->sb->version = SB_VERSION_6;
487 else if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC))
488 ic->sb->version = SB_VERSION_5;
489 else if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING))
490 ic->sb->version = SB_VERSION_4;
491 else if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP))
492 ic->sb->version = SB_VERSION_3;
493 else if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
494 ic->sb->version = SB_VERSION_2;
495 else
496 ic->sb->version = SB_VERSION_1;
497 }
498
sb_mac(struct dm_integrity_c * ic,bool wr)499 static int sb_mac(struct dm_integrity_c *ic, bool wr)
500 {
501 SHASH_DESC_ON_STACK(desc, ic->journal_mac);
502 int r;
503 unsigned int mac_size = crypto_shash_digestsize(ic->journal_mac);
504 __u8 *sb = (__u8 *)ic->sb;
505 __u8 *mac = sb + (1 << SECTOR_SHIFT) - mac_size;
506
507 if (sizeof(struct superblock) + mac_size > 1 << SECTOR_SHIFT ||
508 mac_size > HASH_MAX_DIGESTSIZE) {
509 dm_integrity_io_error(ic, "digest is too long", -EINVAL);
510 return -EINVAL;
511 }
512
513 desc->tfm = ic->journal_mac;
514
515 if (likely(wr)) {
516 r = crypto_shash_digest(desc, sb, mac - sb, mac);
517 if (unlikely(r < 0)) {
518 dm_integrity_io_error(ic, "crypto_shash_digest", r);
519 return r;
520 }
521 } else {
522 __u8 actual_mac[HASH_MAX_DIGESTSIZE];
523
524 r = crypto_shash_digest(desc, sb, mac - sb, actual_mac);
525 if (unlikely(r < 0)) {
526 dm_integrity_io_error(ic, "crypto_shash_digest", r);
527 return r;
528 }
529 if (crypto_memneq(mac, actual_mac, mac_size)) {
530 dm_integrity_io_error(ic, "superblock mac", -EILSEQ);
531 dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0);
532 return -EILSEQ;
533 }
534 }
535
536 return 0;
537 }
538
sync_rw_sb(struct dm_integrity_c * ic,blk_opf_t opf)539 static int sync_rw_sb(struct dm_integrity_c *ic, blk_opf_t opf)
540 {
541 struct dm_io_request io_req;
542 struct dm_io_region io_loc;
543 const enum req_op op = opf & REQ_OP_MASK;
544 int r;
545
546 io_req.bi_opf = opf;
547 io_req.mem.type = DM_IO_KMEM;
548 io_req.mem.ptr.addr = ic->sb;
549 io_req.notify.fn = NULL;
550 io_req.client = ic->io;
551 io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev;
552 io_loc.sector = ic->start;
553 io_loc.count = SB_SECTORS;
554
555 if (op == REQ_OP_WRITE) {
556 sb_set_version(ic);
557 if (ic->journal_mac && ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) {
558 r = sb_mac(ic, true);
559 if (unlikely(r))
560 return r;
561 }
562 }
563
564 r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
565 if (unlikely(r))
566 return r;
567
568 if (op == REQ_OP_READ) {
569 if (ic->mode != 'R' && ic->journal_mac && ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) {
570 r = sb_mac(ic, false);
571 if (unlikely(r))
572 return r;
573 }
574 }
575
576 return 0;
577 }
578
579 #define BITMAP_OP_TEST_ALL_SET 0
580 #define BITMAP_OP_TEST_ALL_CLEAR 1
581 #define BITMAP_OP_SET 2
582 #define BITMAP_OP_CLEAR 3
583
block_bitmap_op(struct dm_integrity_c * ic,struct page_list * bitmap,sector_t sector,sector_t n_sectors,int mode)584 static bool block_bitmap_op(struct dm_integrity_c *ic, struct page_list *bitmap,
585 sector_t sector, sector_t n_sectors, int mode)
586 {
587 unsigned long bit, end_bit, this_end_bit, page, end_page;
588 unsigned long *data;
589
590 if (unlikely(((sector | n_sectors) & ((1 << ic->sb->log2_sectors_per_block) - 1)) != 0)) {
591 DMCRIT("invalid bitmap access (%llx,%llx,%d,%d,%d)",
592 sector,
593 n_sectors,
594 ic->sb->log2_sectors_per_block,
595 ic->log2_blocks_per_bitmap_bit,
596 mode);
597 BUG();
598 }
599
600 if (unlikely(!n_sectors))
601 return true;
602
603 bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
604 end_bit = (sector + n_sectors - 1) >>
605 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
606
607 page = bit / (PAGE_SIZE * 8);
608 bit %= PAGE_SIZE * 8;
609
610 end_page = end_bit / (PAGE_SIZE * 8);
611 end_bit %= PAGE_SIZE * 8;
612
613 repeat:
614 if (page < end_page)
615 this_end_bit = PAGE_SIZE * 8 - 1;
616 else
617 this_end_bit = end_bit;
618
619 data = lowmem_page_address(bitmap[page].page);
620
621 if (mode == BITMAP_OP_TEST_ALL_SET) {
622 while (bit <= this_end_bit) {
623 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
624 do {
625 if (data[bit / BITS_PER_LONG] != -1)
626 return false;
627 bit += BITS_PER_LONG;
628 } while (this_end_bit >= bit + BITS_PER_LONG - 1);
629 continue;
630 }
631 if (!test_bit(bit, data))
632 return false;
633 bit++;
634 }
635 } else if (mode == BITMAP_OP_TEST_ALL_CLEAR) {
636 while (bit <= this_end_bit) {
637 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
638 do {
639 if (data[bit / BITS_PER_LONG] != 0)
640 return false;
641 bit += BITS_PER_LONG;
642 } while (this_end_bit >= bit + BITS_PER_LONG - 1);
643 continue;
644 }
645 if (test_bit(bit, data))
646 return false;
647 bit++;
648 }
649 } else if (mode == BITMAP_OP_SET) {
650 while (bit <= this_end_bit) {
651 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
652 do {
653 data[bit / BITS_PER_LONG] = -1;
654 bit += BITS_PER_LONG;
655 } while (this_end_bit >= bit + BITS_PER_LONG - 1);
656 continue;
657 }
658 __set_bit(bit, data);
659 bit++;
660 }
661 } else if (mode == BITMAP_OP_CLEAR) {
662 if (!bit && this_end_bit == PAGE_SIZE * 8 - 1)
663 clear_page(data);
664 else {
665 while (bit <= this_end_bit) {
666 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
667 do {
668 data[bit / BITS_PER_LONG] = 0;
669 bit += BITS_PER_LONG;
670 } while (this_end_bit >= bit + BITS_PER_LONG - 1);
671 continue;
672 }
673 __clear_bit(bit, data);
674 bit++;
675 }
676 }
677 } else {
678 BUG();
679 }
680
681 if (unlikely(page < end_page)) {
682 bit = 0;
683 page++;
684 goto repeat;
685 }
686
687 return true;
688 }
689
block_bitmap_copy(struct dm_integrity_c * ic,struct page_list * dst,struct page_list * src)690 static void block_bitmap_copy(struct dm_integrity_c *ic, struct page_list *dst, struct page_list *src)
691 {
692 unsigned int n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE);
693 unsigned int i;
694
695 for (i = 0; i < n_bitmap_pages; i++) {
696 unsigned long *dst_data = lowmem_page_address(dst[i].page);
697 unsigned long *src_data = lowmem_page_address(src[i].page);
698
699 copy_page(dst_data, src_data);
700 }
701 }
702
sector_to_bitmap_block(struct dm_integrity_c * ic,sector_t sector)703 static struct bitmap_block_status *sector_to_bitmap_block(struct dm_integrity_c *ic, sector_t sector)
704 {
705 unsigned int bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
706 unsigned int bitmap_block = bit / (BITMAP_BLOCK_SIZE * 8);
707
708 BUG_ON(bitmap_block >= ic->n_bitmap_blocks);
709 return &ic->bbs[bitmap_block];
710 }
711
access_journal_check(struct dm_integrity_c * ic,unsigned int section,unsigned int offset,bool e,const char * function)712 static void access_journal_check(struct dm_integrity_c *ic, unsigned int section, unsigned int offset,
713 bool e, const char *function)
714 {
715 #if defined(CONFIG_DM_DEBUG) || defined(INTERNAL_VERIFY)
716 unsigned int limit = e ? ic->journal_section_entries : ic->journal_section_sectors;
717
718 if (unlikely(section >= ic->journal_sections) ||
719 unlikely(offset >= limit)) {
720 DMCRIT("%s: invalid access at (%u,%u), limit (%u,%u)",
721 function, section, offset, ic->journal_sections, limit);
722 BUG();
723 }
724 #endif
725 }
726
page_list_location(struct dm_integrity_c * ic,unsigned int section,unsigned int offset,unsigned int * pl_index,unsigned int * pl_offset)727 static void page_list_location(struct dm_integrity_c *ic, unsigned int section, unsigned int offset,
728 unsigned int *pl_index, unsigned int *pl_offset)
729 {
730 unsigned int sector;
731
732 access_journal_check(ic, section, offset, false, "page_list_location");
733
734 sector = section * ic->journal_section_sectors + offset;
735
736 *pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
737 *pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
738 }
739
access_page_list(struct dm_integrity_c * ic,struct page_list * pl,unsigned int section,unsigned int offset,unsigned int * n_sectors)740 static struct journal_sector *access_page_list(struct dm_integrity_c *ic, struct page_list *pl,
741 unsigned int section, unsigned int offset, unsigned int *n_sectors)
742 {
743 unsigned int pl_index, pl_offset;
744 char *va;
745
746 page_list_location(ic, section, offset, &pl_index, &pl_offset);
747
748 if (n_sectors)
749 *n_sectors = (PAGE_SIZE - pl_offset) >> SECTOR_SHIFT;
750
751 va = lowmem_page_address(pl[pl_index].page);
752
753 return (struct journal_sector *)(va + pl_offset);
754 }
755
access_journal(struct dm_integrity_c * ic,unsigned int section,unsigned int offset)756 static struct journal_sector *access_journal(struct dm_integrity_c *ic, unsigned int section, unsigned int offset)
757 {
758 return access_page_list(ic, ic->journal, section, offset, NULL);
759 }
760
access_journal_entry(struct dm_integrity_c * ic,unsigned int section,unsigned int n)761 static struct journal_entry *access_journal_entry(struct dm_integrity_c *ic, unsigned int section, unsigned int n)
762 {
763 unsigned int rel_sector, offset;
764 struct journal_sector *js;
765
766 access_journal_check(ic, section, n, true, "access_journal_entry");
767
768 rel_sector = n % JOURNAL_BLOCK_SECTORS;
769 offset = n / JOURNAL_BLOCK_SECTORS;
770
771 js = access_journal(ic, section, rel_sector);
772 return (struct journal_entry *)((char *)js + offset * ic->journal_entry_size);
773 }
774
access_journal_data(struct dm_integrity_c * ic,unsigned int section,unsigned int n)775 static struct journal_sector *access_journal_data(struct dm_integrity_c *ic, unsigned int section, unsigned int n)
776 {
777 n <<= ic->sb->log2_sectors_per_block;
778
779 n += JOURNAL_BLOCK_SECTORS;
780
781 access_journal_check(ic, section, n, false, "access_journal_data");
782
783 return access_journal(ic, section, n);
784 }
785
section_mac(struct dm_integrity_c * ic,unsigned int section,__u8 result[JOURNAL_MAC_SIZE])786 static void section_mac(struct dm_integrity_c *ic, unsigned int section, __u8 result[JOURNAL_MAC_SIZE])
787 {
788 SHASH_DESC_ON_STACK(desc, ic->journal_mac);
789 int r;
790 unsigned int j, size;
791
792 desc->tfm = ic->journal_mac;
793
794 r = crypto_shash_init(desc);
795 if (unlikely(r < 0)) {
796 dm_integrity_io_error(ic, "crypto_shash_init", r);
797 goto err;
798 }
799
800 if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) {
801 __le64 section_le;
802
803 r = crypto_shash_update(desc, (__u8 *)&ic->sb->salt, SALT_SIZE);
804 if (unlikely(r < 0)) {
805 dm_integrity_io_error(ic, "crypto_shash_update", r);
806 goto err;
807 }
808
809 section_le = cpu_to_le64(section);
810 r = crypto_shash_update(desc, (__u8 *)§ion_le, sizeof(section_le));
811 if (unlikely(r < 0)) {
812 dm_integrity_io_error(ic, "crypto_shash_update", r);
813 goto err;
814 }
815 }
816
817 for (j = 0; j < ic->journal_section_entries; j++) {
818 struct journal_entry *je = access_journal_entry(ic, section, j);
819
820 r = crypto_shash_update(desc, (__u8 *)&je->u.sector, sizeof(je->u.sector));
821 if (unlikely(r < 0)) {
822 dm_integrity_io_error(ic, "crypto_shash_update", r);
823 goto err;
824 }
825 }
826
827 size = crypto_shash_digestsize(ic->journal_mac);
828
829 if (likely(size <= JOURNAL_MAC_SIZE)) {
830 r = crypto_shash_final(desc, result);
831 if (unlikely(r < 0)) {
832 dm_integrity_io_error(ic, "crypto_shash_final", r);
833 goto err;
834 }
835 memset(result + size, 0, JOURNAL_MAC_SIZE - size);
836 } else {
837 __u8 digest[HASH_MAX_DIGESTSIZE];
838
839 if (WARN_ON(size > sizeof(digest))) {
840 dm_integrity_io_error(ic, "digest_size", -EINVAL);
841 goto err;
842 }
843 r = crypto_shash_final(desc, digest);
844 if (unlikely(r < 0)) {
845 dm_integrity_io_error(ic, "crypto_shash_final", r);
846 goto err;
847 }
848 memcpy(result, digest, JOURNAL_MAC_SIZE);
849 }
850
851 return;
852 err:
853 memset(result, 0, JOURNAL_MAC_SIZE);
854 }
855
rw_section_mac(struct dm_integrity_c * ic,unsigned int section,bool wr)856 static void rw_section_mac(struct dm_integrity_c *ic, unsigned int section, bool wr)
857 {
858 __u8 result[JOURNAL_MAC_SIZE];
859 unsigned int j;
860
861 if (!ic->journal_mac)
862 return;
863
864 section_mac(ic, section, result);
865
866 for (j = 0; j < JOURNAL_BLOCK_SECTORS; j++) {
867 struct journal_sector *js = access_journal(ic, section, j);
868
869 if (likely(wr))
870 memcpy(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR);
871 else {
872 if (crypto_memneq(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) {
873 dm_integrity_io_error(ic, "journal mac", -EILSEQ);
874 dm_audit_log_target(DM_MSG_PREFIX, "mac-journal", ic->ti, 0);
875 }
876 }
877 }
878 }
879
complete_journal_op(void * context)880 static void complete_journal_op(void *context)
881 {
882 struct journal_completion *comp = context;
883
884 BUG_ON(!atomic_read(&comp->in_flight));
885 if (likely(atomic_dec_and_test(&comp->in_flight)))
886 complete(&comp->comp);
887 }
888
xor_journal(struct dm_integrity_c * ic,bool encrypt,unsigned int section,unsigned int n_sections,struct journal_completion * comp)889 static void xor_journal(struct dm_integrity_c *ic, bool encrypt, unsigned int section,
890 unsigned int n_sections, struct journal_completion *comp)
891 {
892 struct async_submit_ctl submit;
893 size_t n_bytes = (size_t)(n_sections * ic->journal_section_sectors) << SECTOR_SHIFT;
894 unsigned int pl_index, pl_offset, section_index;
895 struct page_list *source_pl, *target_pl;
896
897 if (likely(encrypt)) {
898 source_pl = ic->journal;
899 target_pl = ic->journal_io;
900 } else {
901 source_pl = ic->journal_io;
902 target_pl = ic->journal;
903 }
904
905 page_list_location(ic, section, 0, &pl_index, &pl_offset);
906
907 atomic_add(roundup(pl_offset + n_bytes, PAGE_SIZE) >> PAGE_SHIFT, &comp->in_flight);
908
909 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, complete_journal_op, comp, NULL);
910
911 section_index = pl_index;
912
913 do {
914 size_t this_step;
915 struct page *src_pages[2];
916 struct page *dst_page;
917
918 while (unlikely(pl_index == section_index)) {
919 unsigned int dummy;
920
921 if (likely(encrypt))
922 rw_section_mac(ic, section, true);
923 section++;
924 n_sections--;
925 if (!n_sections)
926 break;
927 page_list_location(ic, section, 0, §ion_index, &dummy);
928 }
929
930 this_step = min(n_bytes, (size_t)PAGE_SIZE - pl_offset);
931 dst_page = target_pl[pl_index].page;
932 src_pages[0] = source_pl[pl_index].page;
933 src_pages[1] = ic->journal_xor[pl_index].page;
934
935 async_xor(dst_page, src_pages, pl_offset, 2, this_step, &submit);
936
937 pl_index++;
938 pl_offset = 0;
939 n_bytes -= this_step;
940 } while (n_bytes);
941
942 BUG_ON(n_sections);
943
944 async_tx_issue_pending_all();
945 }
946
complete_journal_encrypt(void * data,int err)947 static void complete_journal_encrypt(void *data, int err)
948 {
949 struct journal_completion *comp = data;
950
951 if (unlikely(err)) {
952 if (likely(err == -EINPROGRESS)) {
953 complete(&comp->ic->crypto_backoff);
954 return;
955 }
956 dm_integrity_io_error(comp->ic, "asynchronous encrypt", err);
957 }
958 complete_journal_op(comp);
959 }
960
do_crypt(bool encrypt,struct skcipher_request * req,struct journal_completion * comp)961 static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp)
962 {
963 int r;
964
965 skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
966 complete_journal_encrypt, comp);
967 if (likely(encrypt))
968 r = crypto_skcipher_encrypt(req);
969 else
970 r = crypto_skcipher_decrypt(req);
971 if (likely(!r))
972 return false;
973 if (likely(r == -EINPROGRESS))
974 return true;
975 if (likely(r == -EBUSY)) {
976 wait_for_completion(&comp->ic->crypto_backoff);
977 reinit_completion(&comp->ic->crypto_backoff);
978 return true;
979 }
980 dm_integrity_io_error(comp->ic, "encrypt", r);
981 return false;
982 }
983
crypt_journal(struct dm_integrity_c * ic,bool encrypt,unsigned int section,unsigned int n_sections,struct journal_completion * comp)984 static void crypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned int section,
985 unsigned int n_sections, struct journal_completion *comp)
986 {
987 struct scatterlist **source_sg;
988 struct scatterlist **target_sg;
989
990 atomic_add(2, &comp->in_flight);
991
992 if (likely(encrypt)) {
993 source_sg = ic->journal_scatterlist;
994 target_sg = ic->journal_io_scatterlist;
995 } else {
996 source_sg = ic->journal_io_scatterlist;
997 target_sg = ic->journal_scatterlist;
998 }
999
1000 do {
1001 struct skcipher_request *req;
1002 unsigned int ivsize;
1003 char *iv;
1004
1005 if (likely(encrypt))
1006 rw_section_mac(ic, section, true);
1007
1008 req = ic->sk_requests[section];
1009 ivsize = crypto_skcipher_ivsize(ic->journal_crypt);
1010 iv = req->iv;
1011
1012 memcpy(iv, iv + ivsize, ivsize);
1013
1014 req->src = source_sg[section];
1015 req->dst = target_sg[section];
1016
1017 if (unlikely(do_crypt(encrypt, req, comp)))
1018 atomic_inc(&comp->in_flight);
1019
1020 section++;
1021 n_sections--;
1022 } while (n_sections);
1023
1024 atomic_dec(&comp->in_flight);
1025 complete_journal_op(comp);
1026 }
1027
encrypt_journal(struct dm_integrity_c * ic,bool encrypt,unsigned int section,unsigned int n_sections,struct journal_completion * comp)1028 static void encrypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned int section,
1029 unsigned int n_sections, struct journal_completion *comp)
1030 {
1031 if (ic->journal_xor)
1032 return xor_journal(ic, encrypt, section, n_sections, comp);
1033 else
1034 return crypt_journal(ic, encrypt, section, n_sections, comp);
1035 }
1036
complete_journal_io(unsigned long error,void * context)1037 static void complete_journal_io(unsigned long error, void *context)
1038 {
1039 struct journal_completion *comp = context;
1040
1041 if (unlikely(error != 0))
1042 dm_integrity_io_error(comp->ic, "writing journal", -EIO);
1043 complete_journal_op(comp);
1044 }
1045
rw_journal_sectors(struct dm_integrity_c * ic,blk_opf_t opf,unsigned int sector,unsigned int n_sectors,struct journal_completion * comp)1046 static void rw_journal_sectors(struct dm_integrity_c *ic, blk_opf_t opf,
1047 unsigned int sector, unsigned int n_sectors,
1048 struct journal_completion *comp)
1049 {
1050 struct dm_io_request io_req;
1051 struct dm_io_region io_loc;
1052 unsigned int pl_index, pl_offset;
1053 int r;
1054
1055 if (unlikely(dm_integrity_failed(ic))) {
1056 if (comp)
1057 complete_journal_io(-1UL, comp);
1058 return;
1059 }
1060
1061 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
1062 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
1063
1064 io_req.bi_opf = opf;
1065 io_req.mem.type = DM_IO_PAGE_LIST;
1066 if (ic->journal_io)
1067 io_req.mem.ptr.pl = &ic->journal_io[pl_index];
1068 else
1069 io_req.mem.ptr.pl = &ic->journal[pl_index];
1070 io_req.mem.offset = pl_offset;
1071 if (likely(comp != NULL)) {
1072 io_req.notify.fn = complete_journal_io;
1073 io_req.notify.context = comp;
1074 } else {
1075 io_req.notify.fn = NULL;
1076 }
1077 io_req.client = ic->io;
1078 io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev;
1079 io_loc.sector = ic->start + SB_SECTORS + sector;
1080 io_loc.count = n_sectors;
1081
1082 r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
1083 if (unlikely(r)) {
1084 dm_integrity_io_error(ic, (opf & REQ_OP_MASK) == REQ_OP_READ ?
1085 "reading journal" : "writing journal", r);
1086 if (comp) {
1087 WARN_ONCE(1, "asynchronous dm_io failed: %d", r);
1088 complete_journal_io(-1UL, comp);
1089 }
1090 }
1091 }
1092
rw_journal(struct dm_integrity_c * ic,blk_opf_t opf,unsigned int section,unsigned int n_sections,struct journal_completion * comp)1093 static void rw_journal(struct dm_integrity_c *ic, blk_opf_t opf,
1094 unsigned int section, unsigned int n_sections,
1095 struct journal_completion *comp)
1096 {
1097 unsigned int sector, n_sectors;
1098
1099 sector = section * ic->journal_section_sectors;
1100 n_sectors = n_sections * ic->journal_section_sectors;
1101
1102 rw_journal_sectors(ic, opf, sector, n_sectors, comp);
1103 }
1104
write_journal(struct dm_integrity_c * ic,unsigned int commit_start,unsigned int commit_sections)1105 static void write_journal(struct dm_integrity_c *ic, unsigned int commit_start, unsigned int commit_sections)
1106 {
1107 struct journal_completion io_comp;
1108 struct journal_completion crypt_comp_1;
1109 struct journal_completion crypt_comp_2;
1110 unsigned int i;
1111
1112 io_comp.ic = ic;
1113 init_completion(&io_comp.comp);
1114
1115 if (commit_start + commit_sections <= ic->journal_sections) {
1116 io_comp.in_flight = (atomic_t)ATOMIC_INIT(1);
1117 if (ic->journal_io) {
1118 crypt_comp_1.ic = ic;
1119 init_completion(&crypt_comp_1.comp);
1120 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
1121 encrypt_journal(ic, true, commit_start, commit_sections, &crypt_comp_1);
1122 wait_for_completion_io(&crypt_comp_1.comp);
1123 } else {
1124 for (i = 0; i < commit_sections; i++)
1125 rw_section_mac(ic, commit_start + i, true);
1126 }
1127 rw_journal(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, commit_start,
1128 commit_sections, &io_comp);
1129 } else {
1130 unsigned int to_end;
1131
1132 io_comp.in_flight = (atomic_t)ATOMIC_INIT(2);
1133 to_end = ic->journal_sections - commit_start;
1134 if (ic->journal_io) {
1135 crypt_comp_1.ic = ic;
1136 init_completion(&crypt_comp_1.comp);
1137 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
1138 encrypt_journal(ic, true, commit_start, to_end, &crypt_comp_1);
1139 if (try_wait_for_completion(&crypt_comp_1.comp)) {
1140 rw_journal(ic, REQ_OP_WRITE | REQ_FUA,
1141 commit_start, to_end, &io_comp);
1142 reinit_completion(&crypt_comp_1.comp);
1143 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
1144 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_1);
1145 wait_for_completion_io(&crypt_comp_1.comp);
1146 } else {
1147 crypt_comp_2.ic = ic;
1148 init_completion(&crypt_comp_2.comp);
1149 crypt_comp_2.in_flight = (atomic_t)ATOMIC_INIT(0);
1150 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_2);
1151 wait_for_completion_io(&crypt_comp_1.comp);
1152 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp);
1153 wait_for_completion_io(&crypt_comp_2.comp);
1154 }
1155 } else {
1156 for (i = 0; i < to_end; i++)
1157 rw_section_mac(ic, commit_start + i, true);
1158 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp);
1159 for (i = 0; i < commit_sections - to_end; i++)
1160 rw_section_mac(ic, i, true);
1161 }
1162 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, 0, commit_sections - to_end, &io_comp);
1163 }
1164
1165 wait_for_completion_io(&io_comp.comp);
1166 }
1167
copy_from_journal(struct dm_integrity_c * ic,unsigned int section,unsigned int offset,unsigned int n_sectors,sector_t target,io_notify_fn fn,void * data)1168 static void copy_from_journal(struct dm_integrity_c *ic, unsigned int section, unsigned int offset,
1169 unsigned int n_sectors, sector_t target, io_notify_fn fn, void *data)
1170 {
1171 struct dm_io_request io_req;
1172 struct dm_io_region io_loc;
1173 int r;
1174 unsigned int sector, pl_index, pl_offset;
1175
1176 BUG_ON((target | n_sectors | offset) & (unsigned int)(ic->sectors_per_block - 1));
1177
1178 if (unlikely(dm_integrity_failed(ic))) {
1179 fn(-1UL, data);
1180 return;
1181 }
1182
1183 sector = section * ic->journal_section_sectors + JOURNAL_BLOCK_SECTORS + offset;
1184
1185 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
1186 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
1187
1188 io_req.bi_opf = REQ_OP_WRITE;
1189 io_req.mem.type = DM_IO_PAGE_LIST;
1190 io_req.mem.ptr.pl = &ic->journal[pl_index];
1191 io_req.mem.offset = pl_offset;
1192 io_req.notify.fn = fn;
1193 io_req.notify.context = data;
1194 io_req.client = ic->io;
1195 io_loc.bdev = ic->dev->bdev;
1196 io_loc.sector = target;
1197 io_loc.count = n_sectors;
1198
1199 r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
1200 if (unlikely(r)) {
1201 WARN_ONCE(1, "asynchronous dm_io failed: %d", r);
1202 fn(-1UL, data);
1203 }
1204 }
1205
ranges_overlap(struct dm_integrity_range * range1,struct dm_integrity_range * range2)1206 static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2)
1207 {
1208 return range1->logical_sector < range2->logical_sector + range2->n_sectors &&
1209 range1->logical_sector + range1->n_sectors > range2->logical_sector;
1210 }
1211
add_new_range(struct dm_integrity_c * ic,struct dm_integrity_range * new_range,bool check_waiting)1212 static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting)
1213 {
1214 struct rb_node **n = &ic->in_progress.rb_node;
1215 struct rb_node *parent;
1216
1217 BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned int)(ic->sectors_per_block - 1));
1218
1219 if (likely(check_waiting)) {
1220 struct dm_integrity_range *range;
1221
1222 list_for_each_entry(range, &ic->wait_list, wait_entry) {
1223 if (unlikely(ranges_overlap(range, new_range)))
1224 return false;
1225 }
1226 }
1227
1228 parent = NULL;
1229
1230 while (*n) {
1231 struct dm_integrity_range *range = container_of(*n, struct dm_integrity_range, node);
1232
1233 parent = *n;
1234 if (new_range->logical_sector + new_range->n_sectors <= range->logical_sector)
1235 n = &range->node.rb_left;
1236 else if (new_range->logical_sector >= range->logical_sector + range->n_sectors)
1237 n = &range->node.rb_right;
1238 else
1239 return false;
1240 }
1241
1242 rb_link_node(&new_range->node, parent, n);
1243 rb_insert_color(&new_range->node, &ic->in_progress);
1244
1245 return true;
1246 }
1247
remove_range_unlocked(struct dm_integrity_c * ic,struct dm_integrity_range * range)1248 static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity_range *range)
1249 {
1250 rb_erase(&range->node, &ic->in_progress);
1251 while (unlikely(!list_empty(&ic->wait_list))) {
1252 struct dm_integrity_range *last_range =
1253 list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry);
1254 struct task_struct *last_range_task;
1255
1256 last_range_task = last_range->task;
1257 list_del(&last_range->wait_entry);
1258 if (!add_new_range(ic, last_range, false)) {
1259 last_range->task = last_range_task;
1260 list_add(&last_range->wait_entry, &ic->wait_list);
1261 break;
1262 }
1263 last_range->waiting = false;
1264 wake_up_process(last_range_task);
1265 }
1266 }
1267
remove_range(struct dm_integrity_c * ic,struct dm_integrity_range * range)1268 static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *range)
1269 {
1270 unsigned long flags;
1271
1272 spin_lock_irqsave(&ic->endio_wait.lock, flags);
1273 remove_range_unlocked(ic, range);
1274 spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
1275 }
1276
wait_and_add_new_range(struct dm_integrity_c * ic,struct dm_integrity_range * new_range)1277 static void wait_and_add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range)
1278 {
1279 new_range->waiting = true;
1280 list_add_tail(&new_range->wait_entry, &ic->wait_list);
1281 new_range->task = current;
1282 do {
1283 __set_current_state(TASK_UNINTERRUPTIBLE);
1284 spin_unlock_irq(&ic->endio_wait.lock);
1285 io_schedule();
1286 spin_lock_irq(&ic->endio_wait.lock);
1287 } while (unlikely(new_range->waiting));
1288 }
1289
add_new_range_and_wait(struct dm_integrity_c * ic,struct dm_integrity_range * new_range)1290 static void add_new_range_and_wait(struct dm_integrity_c *ic, struct dm_integrity_range *new_range)
1291 {
1292 if (unlikely(!add_new_range(ic, new_range, true)))
1293 wait_and_add_new_range(ic, new_range);
1294 }
1295
init_journal_node(struct journal_node * node)1296 static void init_journal_node(struct journal_node *node)
1297 {
1298 RB_CLEAR_NODE(&node->node);
1299 node->sector = (sector_t)-1;
1300 }
1301
add_journal_node(struct dm_integrity_c * ic,struct journal_node * node,sector_t sector)1302 static void add_journal_node(struct dm_integrity_c *ic, struct journal_node *node, sector_t sector)
1303 {
1304 struct rb_node **link;
1305 struct rb_node *parent;
1306
1307 node->sector = sector;
1308 BUG_ON(!RB_EMPTY_NODE(&node->node));
1309
1310 link = &ic->journal_tree_root.rb_node;
1311 parent = NULL;
1312
1313 while (*link) {
1314 struct journal_node *j;
1315
1316 parent = *link;
1317 j = container_of(parent, struct journal_node, node);
1318 if (sector < j->sector)
1319 link = &j->node.rb_left;
1320 else
1321 link = &j->node.rb_right;
1322 }
1323
1324 rb_link_node(&node->node, parent, link);
1325 rb_insert_color(&node->node, &ic->journal_tree_root);
1326 }
1327
remove_journal_node(struct dm_integrity_c * ic,struct journal_node * node)1328 static void remove_journal_node(struct dm_integrity_c *ic, struct journal_node *node)
1329 {
1330 BUG_ON(RB_EMPTY_NODE(&node->node));
1331 rb_erase(&node->node, &ic->journal_tree_root);
1332 init_journal_node(node);
1333 }
1334
1335 #define NOT_FOUND (-1U)
1336
find_journal_node(struct dm_integrity_c * ic,sector_t sector,sector_t * next_sector)1337 static unsigned int find_journal_node(struct dm_integrity_c *ic, sector_t sector, sector_t *next_sector)
1338 {
1339 struct rb_node *n = ic->journal_tree_root.rb_node;
1340 unsigned int found = NOT_FOUND;
1341
1342 *next_sector = (sector_t)-1;
1343 while (n) {
1344 struct journal_node *j = container_of(n, struct journal_node, node);
1345
1346 if (sector == j->sector)
1347 found = j - ic->journal_tree;
1348
1349 if (sector < j->sector) {
1350 *next_sector = j->sector;
1351 n = j->node.rb_left;
1352 } else
1353 n = j->node.rb_right;
1354 }
1355
1356 return found;
1357 }
1358
test_journal_node(struct dm_integrity_c * ic,unsigned int pos,sector_t sector)1359 static bool test_journal_node(struct dm_integrity_c *ic, unsigned int pos, sector_t sector)
1360 {
1361 struct journal_node *node, *next_node;
1362 struct rb_node *next;
1363
1364 if (unlikely(pos >= ic->journal_entries))
1365 return false;
1366 node = &ic->journal_tree[pos];
1367 if (unlikely(RB_EMPTY_NODE(&node->node)))
1368 return false;
1369 if (unlikely(node->sector != sector))
1370 return false;
1371
1372 next = rb_next(&node->node);
1373 if (unlikely(!next))
1374 return true;
1375
1376 next_node = container_of(next, struct journal_node, node);
1377 return next_node->sector != sector;
1378 }
1379
find_newer_committed_node(struct dm_integrity_c * ic,struct journal_node * node)1380 static bool find_newer_committed_node(struct dm_integrity_c *ic, struct journal_node *node)
1381 {
1382 struct rb_node *next;
1383 struct journal_node *next_node;
1384 unsigned int next_section;
1385
1386 BUG_ON(RB_EMPTY_NODE(&node->node));
1387
1388 next = rb_next(&node->node);
1389 if (unlikely(!next))
1390 return false;
1391
1392 next_node = container_of(next, struct journal_node, node);
1393
1394 if (next_node->sector != node->sector)
1395 return false;
1396
1397 next_section = (unsigned int)(next_node - ic->journal_tree) / ic->journal_section_entries;
1398 if (next_section >= ic->committed_section &&
1399 next_section < ic->committed_section + ic->n_committed_sections)
1400 return true;
1401 if (next_section + ic->journal_sections < ic->committed_section + ic->n_committed_sections)
1402 return true;
1403
1404 return false;
1405 }
1406
1407 #define TAG_READ 0
1408 #define TAG_WRITE 1
1409 #define TAG_CMP 2
1410
dm_integrity_rw_tag(struct dm_integrity_c * ic,unsigned char * tag,sector_t * metadata_block,unsigned int * metadata_offset,unsigned int total_size,int op)1411 static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block,
1412 unsigned int *metadata_offset, unsigned int total_size, int op)
1413 {
1414 unsigned int hash_offset = 0;
1415 unsigned char mismatch_hash = 0;
1416 unsigned char mismatch_filler = !ic->discard;
1417
1418 do {
1419 unsigned char *data, *dp;
1420 struct dm_buffer *b;
1421 unsigned int to_copy;
1422 int r;
1423
1424 r = dm_integrity_failed(ic);
1425 if (unlikely(r))
1426 return r;
1427
1428 data = dm_bufio_read(ic->bufio, *metadata_block, &b);
1429 if (IS_ERR(data))
1430 return PTR_ERR(data);
1431
1432 to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
1433 dp = data + *metadata_offset;
1434 if (op == TAG_READ) {
1435 memcpy(tag, dp, to_copy);
1436 } else if (op == TAG_WRITE) {
1437 if (crypto_memneq(dp, tag, to_copy)) {
1438 memcpy(dp, tag, to_copy);
1439 dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy);
1440 }
1441 } else {
1442 /* e.g.: op == TAG_CMP */
1443
1444 if (likely(is_power_of_2(ic->tag_size))) {
1445 if (unlikely(crypto_memneq(dp, tag, to_copy)))
1446 goto thorough_test;
1447 } else {
1448 unsigned int i, ts;
1449 thorough_test:
1450 ts = total_size;
1451
1452 for (i = 0; i < to_copy; i++, ts--) {
1453 /*
1454 * Warning: the control flow must not be
1455 * dependent on match/mismatch of
1456 * individual bytes.
1457 */
1458 mismatch_hash |= dp[i] ^ tag[i];
1459 mismatch_filler |= dp[i] ^ DISCARD_FILLER;
1460 hash_offset++;
1461 if (unlikely(hash_offset == ic->tag_size)) {
1462 if (unlikely(mismatch_hash) && unlikely(mismatch_filler)) {
1463 dm_bufio_release(b);
1464 return ts;
1465 }
1466 hash_offset = 0;
1467 mismatch_hash = 0;
1468 mismatch_filler = !ic->discard;
1469 }
1470 }
1471 }
1472 }
1473 dm_bufio_release(b);
1474
1475 tag += to_copy;
1476 *metadata_offset += to_copy;
1477 if (unlikely(*metadata_offset == 1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
1478 (*metadata_block)++;
1479 *metadata_offset = 0;
1480 }
1481
1482 if (unlikely(!is_power_of_2(ic->tag_size)))
1483 hash_offset = (hash_offset + to_copy) % ic->tag_size;
1484
1485 total_size -= to_copy;
1486 } while (unlikely(total_size));
1487
1488 return 0;
1489 }
1490
1491 struct flush_request {
1492 struct dm_io_request io_req;
1493 struct dm_io_region io_reg;
1494 struct dm_integrity_c *ic;
1495 struct completion comp;
1496 };
1497
flush_notify(unsigned long error,void * fr_)1498 static void flush_notify(unsigned long error, void *fr_)
1499 {
1500 struct flush_request *fr = fr_;
1501
1502 if (unlikely(error != 0))
1503 dm_integrity_io_error(fr->ic, "flushing disk cache", -EIO);
1504 complete(&fr->comp);
1505 }
1506
dm_integrity_flush_buffers(struct dm_integrity_c * ic,bool flush_data)1507 static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data)
1508 {
1509 int r;
1510 struct flush_request fr;
1511
1512 if (!ic->meta_dev)
1513 flush_data = false;
1514 if (flush_data) {
1515 fr.io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
1516 fr.io_req.mem.type = DM_IO_KMEM;
1517 fr.io_req.mem.ptr.addr = NULL;
1518 fr.io_req.notify.fn = flush_notify;
1519 fr.io_req.notify.context = &fr;
1520 fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio);
1521 fr.io_reg.bdev = ic->dev->bdev;
1522 fr.io_reg.sector = 0;
1523 fr.io_reg.count = 0;
1524 fr.ic = ic;
1525 init_completion(&fr.comp);
1526 r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL, IOPRIO_DEFAULT);
1527 BUG_ON(r);
1528 }
1529
1530 r = dm_bufio_write_dirty_buffers(ic->bufio);
1531 if (unlikely(r))
1532 dm_integrity_io_error(ic, "writing tags", r);
1533
1534 if (flush_data)
1535 wait_for_completion(&fr.comp);
1536 }
1537
sleep_on_endio_wait(struct dm_integrity_c * ic)1538 static void sleep_on_endio_wait(struct dm_integrity_c *ic)
1539 {
1540 DECLARE_WAITQUEUE(wait, current);
1541
1542 __add_wait_queue(&ic->endio_wait, &wait);
1543 __set_current_state(TASK_UNINTERRUPTIBLE);
1544 spin_unlock_irq(&ic->endio_wait.lock);
1545 io_schedule();
1546 spin_lock_irq(&ic->endio_wait.lock);
1547 __remove_wait_queue(&ic->endio_wait, &wait);
1548 }
1549
autocommit_fn(struct timer_list * t)1550 static void autocommit_fn(struct timer_list *t)
1551 {
1552 struct dm_integrity_c *ic = timer_container_of(ic, t,
1553 autocommit_timer);
1554
1555 if (likely(!dm_integrity_failed(ic)))
1556 queue_work(ic->commit_wq, &ic->commit_work);
1557 }
1558
schedule_autocommit(struct dm_integrity_c * ic)1559 static void schedule_autocommit(struct dm_integrity_c *ic)
1560 {
1561 if (!timer_pending(&ic->autocommit_timer))
1562 mod_timer(&ic->autocommit_timer, jiffies + ic->autocommit_jiffies);
1563 }
1564
submit_flush_bio(struct dm_integrity_c * ic,struct dm_integrity_io * dio)1565 static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
1566 {
1567 struct bio *bio;
1568 unsigned long flags;
1569
1570 spin_lock_irqsave(&ic->endio_wait.lock, flags);
1571 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1572 bio_list_add(&ic->flush_bio_list, bio);
1573 spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
1574
1575 queue_work(ic->commit_wq, &ic->commit_work);
1576 }
1577
do_endio(struct dm_integrity_c * ic,struct bio * bio)1578 static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
1579 {
1580 int r;
1581
1582 r = dm_integrity_failed(ic);
1583 if (unlikely(r) && !bio->bi_status)
1584 bio->bi_status = errno_to_blk_status(r);
1585 if (unlikely(ic->synchronous_mode) && bio_op(bio) == REQ_OP_WRITE) {
1586 unsigned long flags;
1587
1588 spin_lock_irqsave(&ic->endio_wait.lock, flags);
1589 bio_list_add(&ic->synchronous_bios, bio);
1590 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
1591 spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
1592 return;
1593 }
1594 bio_endio(bio);
1595 }
1596
do_endio_flush(struct dm_integrity_c * ic,struct dm_integrity_io * dio)1597 static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
1598 {
1599 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1600
1601 if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic)))
1602 submit_flush_bio(ic, dio);
1603 else
1604 do_endio(ic, bio);
1605 }
1606
dec_in_flight(struct dm_integrity_io * dio)1607 static void dec_in_flight(struct dm_integrity_io *dio)
1608 {
1609 if (atomic_dec_and_test(&dio->in_flight)) {
1610 struct dm_integrity_c *ic = dio->ic;
1611 struct bio *bio;
1612
1613 remove_range(ic, &dio->range);
1614
1615 if (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD))
1616 schedule_autocommit(ic);
1617
1618 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1619 if (unlikely(dio->bi_status) && !bio->bi_status)
1620 bio->bi_status = dio->bi_status;
1621 if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
1622 dio->range.logical_sector += dio->range.n_sectors;
1623 bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
1624 INIT_WORK(&dio->work, integrity_bio_wait);
1625 queue_work(ic->offload_wq, &dio->work);
1626 return;
1627 }
1628 do_endio_flush(ic, dio);
1629 }
1630 }
1631
integrity_end_io(struct bio * bio)1632 static void integrity_end_io(struct bio *bio)
1633 {
1634 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
1635
1636 dm_bio_restore(&dio->bio_details, bio);
1637 if (bio->bi_integrity)
1638 bio->bi_opf |= REQ_INTEGRITY;
1639
1640 if (dio->completion)
1641 complete(dio->completion);
1642
1643 dec_in_flight(dio);
1644 }
1645
integrity_sector_checksum_shash(struct dm_integrity_c * ic,sector_t sector,const char * data,unsigned offset,char * result)1646 static void integrity_sector_checksum_shash(struct dm_integrity_c *ic, sector_t sector,
1647 const char *data, unsigned offset, char *result)
1648 {
1649 __le64 sector_le = cpu_to_le64(sector);
1650 SHASH_DESC_ON_STACK(req, ic->internal_shash);
1651 int r;
1652 unsigned int digest_size;
1653
1654 req->tfm = ic->internal_shash;
1655
1656 r = crypto_shash_init(req);
1657 if (unlikely(r < 0)) {
1658 dm_integrity_io_error(ic, "crypto_shash_init", r);
1659 goto failed;
1660 }
1661
1662 if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) {
1663 r = crypto_shash_update(req, (__u8 *)&ic->sb->salt, SALT_SIZE);
1664 if (unlikely(r < 0)) {
1665 dm_integrity_io_error(ic, "crypto_shash_update", r);
1666 goto failed;
1667 }
1668 }
1669
1670 r = crypto_shash_update(req, (const __u8 *)§or_le, sizeof(sector_le));
1671 if (unlikely(r < 0)) {
1672 dm_integrity_io_error(ic, "crypto_shash_update", r);
1673 goto failed;
1674 }
1675
1676 r = crypto_shash_update(req, data + offset, ic->sectors_per_block << SECTOR_SHIFT);
1677 if (unlikely(r < 0)) {
1678 dm_integrity_io_error(ic, "crypto_shash_update", r);
1679 goto failed;
1680 }
1681
1682 r = crypto_shash_final(req, result);
1683 if (unlikely(r < 0)) {
1684 dm_integrity_io_error(ic, "crypto_shash_final", r);
1685 goto failed;
1686 }
1687
1688 digest_size = ic->internal_hash_digestsize;
1689 if (unlikely(digest_size < ic->tag_size))
1690 memset(result + digest_size, 0, ic->tag_size - digest_size);
1691
1692 return;
1693
1694 failed:
1695 /* this shouldn't happen anyway, the hash functions have no reason to fail */
1696 get_random_bytes(result, ic->tag_size);
1697 }
1698
integrity_sector_checksum_ahash(struct dm_integrity_c * ic,struct ahash_request ** ahash_req,sector_t sector,struct page * page,unsigned offset,char * result)1699 static void integrity_sector_checksum_ahash(struct dm_integrity_c *ic, struct ahash_request **ahash_req,
1700 sector_t sector, struct page *page, unsigned offset, char *result)
1701 {
1702 __le64 sector_le = cpu_to_le64(sector);
1703 struct ahash_request *req;
1704 DECLARE_CRYPTO_WAIT(wait);
1705 struct scatterlist sg[3], *s = sg;
1706 int r;
1707 unsigned int digest_size;
1708 unsigned int nbytes = 0;
1709
1710 might_sleep();
1711
1712 req = *ahash_req;
1713 if (unlikely(!req)) {
1714 req = mempool_alloc(&ic->ahash_req_pool, GFP_NOIO);
1715 *ahash_req = req;
1716 }
1717
1718 ahash_request_set_tfm(req, ic->internal_ahash);
1719 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait);
1720
1721 if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) {
1722 sg_init_table(sg, 3);
1723 sg_set_buf(s, (const __u8 *)&ic->sb->salt, SALT_SIZE);
1724 nbytes += SALT_SIZE;
1725 s++;
1726 } else {
1727 sg_init_table(sg, 2);
1728 }
1729
1730 if (likely(!is_vmalloc_addr(§or_le))) {
1731 sg_set_buf(s, §or_le, sizeof(sector_le));
1732 } else {
1733 struct page *sec_page = vmalloc_to_page(§or_le);
1734 unsigned int sec_off = offset_in_page(§or_le);
1735 sg_set_page(s, sec_page, sizeof(sector_le), sec_off);
1736 }
1737 nbytes += sizeof(sector_le);
1738 s++;
1739
1740 sg_set_page(s, page, ic->sectors_per_block << SECTOR_SHIFT, offset);
1741 nbytes += ic->sectors_per_block << SECTOR_SHIFT;
1742
1743 ahash_request_set_crypt(req, sg, result, nbytes);
1744
1745 r = crypto_wait_req(crypto_ahash_digest(req), &wait);
1746 if (unlikely(r)) {
1747 dm_integrity_io_error(ic, "crypto_ahash_digest", r);
1748 goto failed;
1749 }
1750
1751 digest_size = ic->internal_hash_digestsize;
1752 if (unlikely(digest_size < ic->tag_size))
1753 memset(result + digest_size, 0, ic->tag_size - digest_size);
1754
1755 return;
1756
1757 failed:
1758 /* this shouldn't happen anyway, the hash functions have no reason to fail */
1759 get_random_bytes(result, ic->tag_size);
1760 }
1761
integrity_sector_checksum(struct dm_integrity_c * ic,struct ahash_request ** ahash_req,sector_t sector,const char * data,unsigned offset,char * result)1762 static void integrity_sector_checksum(struct dm_integrity_c *ic, struct ahash_request **ahash_req,
1763 sector_t sector, const char *data, unsigned offset, char *result)
1764 {
1765 if (likely(ic->internal_shash != NULL))
1766 integrity_sector_checksum_shash(ic, sector, data, offset, result);
1767 else
1768 integrity_sector_checksum_ahash(ic, ahash_req, sector, (struct page *)data, offset, result);
1769 }
1770
integrity_kmap(struct dm_integrity_c * ic,struct page * p)1771 static void *integrity_kmap(struct dm_integrity_c *ic, struct page *p)
1772 {
1773 if (likely(ic->internal_shash != NULL))
1774 return kmap_local_page(p);
1775 else
1776 return p;
1777 }
1778
integrity_kunmap(struct dm_integrity_c * ic,const void * ptr)1779 static void integrity_kunmap(struct dm_integrity_c *ic, const void *ptr)
1780 {
1781 if (likely(ic->internal_shash != NULL))
1782 kunmap_local(ptr);
1783 }
1784
integrity_identity(struct dm_integrity_c * ic,void * data)1785 static void *integrity_identity(struct dm_integrity_c *ic, void *data)
1786 {
1787 #ifdef CONFIG_DEBUG_SG
1788 BUG_ON(offset_in_page(data));
1789 BUG_ON(!virt_addr_valid(data));
1790 #endif
1791 if (likely(ic->internal_shash != NULL))
1792 return data;
1793 else
1794 return virt_to_page(data);
1795 }
1796
integrity_recheck(struct dm_integrity_io * dio,char * checksum)1797 static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checksum)
1798 {
1799 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1800 struct dm_integrity_c *ic = dio->ic;
1801 struct bvec_iter iter;
1802 struct bio_vec bv;
1803 sector_t sector, logical_sector, area, offset;
1804 struct page *page;
1805
1806 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
1807 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset,
1808 &dio->metadata_offset);
1809 sector = get_data_sector(ic, area, offset);
1810 logical_sector = dio->range.logical_sector;
1811
1812 page = mempool_alloc(&ic->recheck_pool, GFP_NOIO);
1813
1814 __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
1815 unsigned pos = 0;
1816
1817 do {
1818 sector_t alignment;
1819 char *mem;
1820 char *buffer = page_to_virt(page);
1821 unsigned int buffer_offset;
1822 int r;
1823 struct dm_io_request io_req;
1824 struct dm_io_region io_loc;
1825 io_req.bi_opf = REQ_OP_READ;
1826 io_req.mem.type = DM_IO_KMEM;
1827 io_req.mem.ptr.addr = buffer;
1828 io_req.notify.fn = NULL;
1829 io_req.client = ic->io;
1830 io_loc.bdev = ic->dev->bdev;
1831 io_loc.sector = sector;
1832 io_loc.count = ic->sectors_per_block;
1833
1834 /* Align the bio to logical block size */
1835 alignment = dio->range.logical_sector | bio_sectors(bio) | (PAGE_SIZE >> SECTOR_SHIFT);
1836 alignment &= -alignment;
1837 io_loc.sector = round_down(io_loc.sector, alignment);
1838 io_loc.count += sector - io_loc.sector;
1839 buffer_offset = (sector - io_loc.sector) << SECTOR_SHIFT;
1840 io_loc.count = round_up(io_loc.count, alignment);
1841
1842 r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
1843 if (unlikely(r)) {
1844 dio->bi_status = errno_to_blk_status(r);
1845 goto free_ret;
1846 }
1847
1848 integrity_sector_checksum(ic, &dio->ahash_req, logical_sector, integrity_identity(ic, buffer), buffer_offset, checksum);
1849 r = dm_integrity_rw_tag(ic, checksum, &dio->metadata_block,
1850 &dio->metadata_offset, ic->tag_size, TAG_CMP);
1851 if (r) {
1852 if (r > 0) {
1853 DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
1854 bio->bi_bdev, logical_sector);
1855 atomic64_inc(&ic->number_of_mismatches);
1856 dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum",
1857 bio, logical_sector, 0);
1858 r = -EILSEQ;
1859 }
1860 dio->bi_status = errno_to_blk_status(r);
1861 goto free_ret;
1862 }
1863
1864 mem = bvec_kmap_local(&bv);
1865 memcpy(mem + pos, buffer + buffer_offset, ic->sectors_per_block << SECTOR_SHIFT);
1866 kunmap_local(mem);
1867
1868 pos += ic->sectors_per_block << SECTOR_SHIFT;
1869 sector += ic->sectors_per_block;
1870 logical_sector += ic->sectors_per_block;
1871 } while (pos < bv.bv_len);
1872 }
1873 free_ret:
1874 mempool_free(page, &ic->recheck_pool);
1875 }
1876
integrity_metadata(struct work_struct * w)1877 static void integrity_metadata(struct work_struct *w)
1878 {
1879 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
1880 struct dm_integrity_c *ic = dio->ic;
1881
1882 int r;
1883
1884 if (ic->internal_hash) {
1885 struct bvec_iter iter;
1886 struct bio_vec bv;
1887 unsigned int digest_size = ic->internal_hash_digestsize;
1888 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1889 char *checksums;
1890 unsigned int extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
1891 char checksums_onstack[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
1892 sector_t sector;
1893 unsigned int sectors_to_process;
1894
1895 if (unlikely(ic->mode == 'R'))
1896 goto skip_io;
1897
1898 if (likely(dio->op != REQ_OP_DISCARD))
1899 checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
1900 GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
1901 else
1902 checksums = kmalloc(PAGE_SIZE, GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
1903 if (!checksums) {
1904 checksums = checksums_onstack;
1905 if (WARN_ON(extra_space &&
1906 digest_size > sizeof(checksums_onstack))) {
1907 r = -EINVAL;
1908 goto error;
1909 }
1910 }
1911
1912 if (unlikely(dio->op == REQ_OP_DISCARD)) {
1913 unsigned int bi_size = dio->bio_details.bi_iter.bi_size;
1914 unsigned int max_size = likely(checksums != checksums_onstack) ? PAGE_SIZE : HASH_MAX_DIGESTSIZE;
1915 unsigned int max_blocks = max_size / ic->tag_size;
1916
1917 memset(checksums, DISCARD_FILLER, max_size);
1918
1919 while (bi_size) {
1920 unsigned int this_step_blocks = bi_size >> (SECTOR_SHIFT + ic->sb->log2_sectors_per_block);
1921
1922 this_step_blocks = min(this_step_blocks, max_blocks);
1923 r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
1924 this_step_blocks * ic->tag_size, TAG_WRITE);
1925 if (unlikely(r)) {
1926 if (likely(checksums != checksums_onstack))
1927 kfree(checksums);
1928 goto error;
1929 }
1930
1931 bi_size -= this_step_blocks << (SECTOR_SHIFT + ic->sb->log2_sectors_per_block);
1932 }
1933
1934 if (likely(checksums != checksums_onstack))
1935 kfree(checksums);
1936 goto skip_io;
1937 }
1938
1939 sector = dio->range.logical_sector;
1940 sectors_to_process = dio->range.n_sectors;
1941
1942 __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
1943 struct bio_vec bv_copy = bv;
1944 unsigned int pos;
1945 char *mem, *checksums_ptr;
1946
1947 again:
1948 mem = integrity_kmap(ic, bv_copy.bv_page);
1949 pos = 0;
1950 checksums_ptr = checksums;
1951 do {
1952 integrity_sector_checksum(ic, &dio->ahash_req, sector, mem, bv_copy.bv_offset + pos, checksums_ptr);
1953 checksums_ptr += ic->tag_size;
1954 sectors_to_process -= ic->sectors_per_block;
1955 pos += ic->sectors_per_block << SECTOR_SHIFT;
1956 sector += ic->sectors_per_block;
1957 } while (pos < bv_copy.bv_len && sectors_to_process && checksums != checksums_onstack);
1958 integrity_kunmap(ic, mem);
1959
1960 r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
1961 checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE);
1962 if (unlikely(r)) {
1963 if (likely(checksums != checksums_onstack))
1964 kfree(checksums);
1965 if (r > 0) {
1966 integrity_recheck(dio, checksums_onstack);
1967 goto skip_io;
1968 }
1969 goto error;
1970 }
1971
1972 if (!sectors_to_process)
1973 break;
1974
1975 if (unlikely(pos < bv_copy.bv_len)) {
1976 bv_copy.bv_offset += pos;
1977 bv_copy.bv_len -= pos;
1978 goto again;
1979 }
1980 }
1981
1982 if (likely(checksums != checksums_onstack))
1983 kfree(checksums);
1984 } else {
1985 struct bio_integrity_payload *bip = dio->bio_details.bi_integrity;
1986
1987 if (bip) {
1988 struct bio_vec biv;
1989 struct bvec_iter iter;
1990 unsigned int data_to_process = dio->range.n_sectors;
1991
1992 sector_to_block(ic, data_to_process);
1993 data_to_process *= ic->tag_size;
1994
1995 bip_for_each_vec(biv, bip, iter) {
1996 unsigned char *tag;
1997 unsigned int this_len;
1998
1999 BUG_ON(PageHighMem(biv.bv_page));
2000 tag = bvec_virt(&biv);
2001 this_len = min(biv.bv_len, data_to_process);
2002 r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset,
2003 this_len, dio->op == REQ_OP_READ ? TAG_READ : TAG_WRITE);
2004 if (unlikely(r))
2005 goto error;
2006 data_to_process -= this_len;
2007 if (!data_to_process)
2008 break;
2009 }
2010 }
2011 }
2012 skip_io:
2013 dec_in_flight(dio);
2014 return;
2015 error:
2016 dio->bi_status = errno_to_blk_status(r);
2017 dec_in_flight(dio);
2018 }
2019
dm_integrity_check_limits(struct dm_integrity_c * ic,sector_t logical_sector,struct bio * bio)2020 static inline bool dm_integrity_check_limits(struct dm_integrity_c *ic, sector_t logical_sector, struct bio *bio)
2021 {
2022 if (unlikely(logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) {
2023 DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
2024 logical_sector, bio_sectors(bio),
2025 ic->provided_data_sectors);
2026 return false;
2027 }
2028 if (unlikely((logical_sector | bio_sectors(bio)) & (unsigned int)(ic->sectors_per_block - 1))) {
2029 DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
2030 ic->sectors_per_block,
2031 logical_sector, bio_sectors(bio));
2032 return false;
2033 }
2034 if (ic->sectors_per_block > 1 && likely(bio_op(bio) != REQ_OP_DISCARD)) {
2035 struct bvec_iter iter;
2036 struct bio_vec bv;
2037
2038 bio_for_each_segment(bv, bio, iter) {
2039 if (unlikely(bv.bv_len & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
2040 DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
2041 bv.bv_offset, bv.bv_len, ic->sectors_per_block);
2042 return false;
2043 }
2044 }
2045 }
2046 return true;
2047 }
2048
dm_integrity_map(struct dm_target * ti,struct bio * bio)2049 static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
2050 {
2051 struct dm_integrity_c *ic = ti->private;
2052 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
2053 struct bio_integrity_payload *bip;
2054
2055 sector_t area, offset;
2056
2057 dio->ic = ic;
2058 dio->bi_status = 0;
2059 dio->op = bio_op(bio);
2060 dio->ahash_req = NULL;
2061
2062 if (ic->mode == 'I') {
2063 bio->bi_iter.bi_sector = dm_target_offset(ic->ti, bio->bi_iter.bi_sector);
2064 dio->integrity_payload = NULL;
2065 dio->integrity_payload_from_mempool = false;
2066 dio->integrity_range_locked = false;
2067 return dm_integrity_map_inline(dio, true);
2068 }
2069
2070 if (unlikely(dio->op == REQ_OP_DISCARD)) {
2071 if (ti->max_io_len) {
2072 sector_t sec = dm_target_offset(ti, bio->bi_iter.bi_sector);
2073 unsigned int log2_max_io_len = __fls(ti->max_io_len);
2074 sector_t start_boundary = sec >> log2_max_io_len;
2075 sector_t end_boundary = (sec + bio_sectors(bio) - 1) >> log2_max_io_len;
2076
2077 if (start_boundary < end_boundary) {
2078 sector_t len = ti->max_io_len - (sec & (ti->max_io_len - 1));
2079
2080 dm_accept_partial_bio(bio, len);
2081 }
2082 }
2083 }
2084
2085 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
2086 submit_flush_bio(ic, dio);
2087 return DM_MAPIO_SUBMITTED;
2088 }
2089
2090 dio->range.logical_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
2091 dio->fua = dio->op == REQ_OP_WRITE && bio->bi_opf & REQ_FUA;
2092 if (unlikely(dio->fua)) {
2093 /*
2094 * Don't pass down the FUA flag because we have to flush
2095 * disk cache anyway.
2096 */
2097 bio->bi_opf &= ~REQ_FUA;
2098 }
2099 if (unlikely(!dm_integrity_check_limits(ic, dio->range.logical_sector, bio)))
2100 return DM_MAPIO_KILL;
2101
2102 bip = bio_integrity(bio);
2103 if (!ic->internal_hash) {
2104 if (bip) {
2105 unsigned int wanted_tag_size = bio_sectors(bio) >> ic->sb->log2_sectors_per_block;
2106
2107 if (ic->log2_tag_size >= 0)
2108 wanted_tag_size <<= ic->log2_tag_size;
2109 else
2110 wanted_tag_size *= ic->tag_size;
2111 if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
2112 DMERR("Invalid integrity data size %u, expected %u",
2113 bip->bip_iter.bi_size, wanted_tag_size);
2114 return DM_MAPIO_KILL;
2115 }
2116 }
2117 } else {
2118 if (unlikely(bip != NULL)) {
2119 DMERR("Unexpected integrity data when using internal hash");
2120 return DM_MAPIO_KILL;
2121 }
2122 }
2123
2124 if (unlikely(ic->mode == 'R') && unlikely(dio->op != REQ_OP_READ))
2125 return DM_MAPIO_KILL;
2126
2127 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
2128 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
2129 bio->bi_iter.bi_sector = get_data_sector(ic, area, offset);
2130
2131 dm_integrity_map_continue(dio, true);
2132 return DM_MAPIO_SUBMITTED;
2133 }
2134
__journal_read_write(struct dm_integrity_io * dio,struct bio * bio,unsigned int journal_section,unsigned int journal_entry)2135 static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio,
2136 unsigned int journal_section, unsigned int journal_entry)
2137 {
2138 struct dm_integrity_c *ic = dio->ic;
2139 sector_t logical_sector;
2140 unsigned int n_sectors;
2141
2142 logical_sector = dio->range.logical_sector;
2143 n_sectors = dio->range.n_sectors;
2144 do {
2145 struct bio_vec bv = bio_iovec(bio);
2146 char *mem;
2147
2148 if (unlikely(bv.bv_len >> SECTOR_SHIFT > n_sectors))
2149 bv.bv_len = n_sectors << SECTOR_SHIFT;
2150 n_sectors -= bv.bv_len >> SECTOR_SHIFT;
2151 bio_advance_iter(bio, &bio->bi_iter, bv.bv_len);
2152 retry_kmap:
2153 mem = kmap_local_page(bv.bv_page);
2154 if (likely(dio->op == REQ_OP_WRITE))
2155 flush_dcache_page(bv.bv_page);
2156
2157 do {
2158 struct journal_entry *je = access_journal_entry(ic, journal_section, journal_entry);
2159
2160 if (unlikely(dio->op == REQ_OP_READ)) {
2161 struct journal_sector *js;
2162 char *mem_ptr;
2163 unsigned int s;
2164
2165 if (unlikely(journal_entry_is_inprogress(je))) {
2166 flush_dcache_page(bv.bv_page);
2167 kunmap_local(mem);
2168
2169 __io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je));
2170 goto retry_kmap;
2171 }
2172 smp_rmb();
2173 BUG_ON(journal_entry_get_sector(je) != logical_sector);
2174 js = access_journal_data(ic, journal_section, journal_entry);
2175 mem_ptr = mem + bv.bv_offset;
2176 s = 0;
2177 do {
2178 memcpy(mem_ptr, js, JOURNAL_SECTOR_DATA);
2179 *(commit_id_t *)(mem_ptr + JOURNAL_SECTOR_DATA) = je->last_bytes[s];
2180 js++;
2181 mem_ptr += 1 << SECTOR_SHIFT;
2182 } while (++s < ic->sectors_per_block);
2183 }
2184
2185 if (!ic->internal_hash) {
2186 struct bio_integrity_payload *bip = bio_integrity(bio);
2187 unsigned int tag_todo = ic->tag_size;
2188 char *tag_ptr = journal_entry_tag(ic, je);
2189
2190 if (bip) {
2191 do {
2192 struct bio_vec biv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
2193 unsigned int tag_now = min(biv.bv_len, tag_todo);
2194 char *tag_addr;
2195
2196 BUG_ON(PageHighMem(biv.bv_page));
2197 tag_addr = bvec_virt(&biv);
2198 if (likely(dio->op == REQ_OP_WRITE))
2199 memcpy(tag_ptr, tag_addr, tag_now);
2200 else
2201 memcpy(tag_addr, tag_ptr, tag_now);
2202 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, tag_now);
2203 tag_ptr += tag_now;
2204 tag_todo -= tag_now;
2205 } while (unlikely(tag_todo));
2206 } else if (likely(dio->op == REQ_OP_WRITE))
2207 memset(tag_ptr, 0, tag_todo);
2208 }
2209
2210 if (likely(dio->op == REQ_OP_WRITE)) {
2211 struct journal_sector *js;
2212 unsigned int s;
2213
2214 js = access_journal_data(ic, journal_section, journal_entry);
2215 memcpy(js, mem + bv.bv_offset, ic->sectors_per_block << SECTOR_SHIFT);
2216
2217 s = 0;
2218 do {
2219 je->last_bytes[s] = js[s].commit_id;
2220 } while (++s < ic->sectors_per_block);
2221
2222 if (ic->internal_hash) {
2223 unsigned int digest_size = ic->internal_hash_digestsize;
2224 void *js_page = integrity_identity(ic, (char *)js - offset_in_page(js));
2225 unsigned js_offset = offset_in_page(js);
2226
2227 if (unlikely(digest_size > ic->tag_size)) {
2228 char checksums_onstack[HASH_MAX_DIGESTSIZE];
2229
2230 integrity_sector_checksum(ic, &dio->ahash_req, logical_sector, js_page, js_offset, checksums_onstack);
2231 memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size);
2232 } else
2233 integrity_sector_checksum(ic, &dio->ahash_req, logical_sector, js_page, js_offset, journal_entry_tag(ic, je));
2234 }
2235
2236 journal_entry_set_sector(je, logical_sector);
2237 }
2238 logical_sector += ic->sectors_per_block;
2239
2240 journal_entry++;
2241 if (unlikely(journal_entry == ic->journal_section_entries)) {
2242 journal_entry = 0;
2243 journal_section++;
2244 wraparound_section(ic, &journal_section);
2245 }
2246
2247 bv.bv_offset += ic->sectors_per_block << SECTOR_SHIFT;
2248 } while (bv.bv_len -= ic->sectors_per_block << SECTOR_SHIFT);
2249
2250 if (unlikely(dio->op == REQ_OP_READ))
2251 flush_dcache_page(bv.bv_page);
2252 kunmap_local(mem);
2253 } while (n_sectors);
2254
2255 if (likely(dio->op == REQ_OP_WRITE)) {
2256 smp_mb();
2257 if (unlikely(waitqueue_active(&ic->copy_to_journal_wait)))
2258 wake_up(&ic->copy_to_journal_wait);
2259 if (READ_ONCE(ic->free_sectors) <= ic->free_sectors_threshold)
2260 queue_work(ic->commit_wq, &ic->commit_work);
2261 else
2262 schedule_autocommit(ic);
2263 } else
2264 remove_range(ic, &dio->range);
2265
2266 if (unlikely(bio->bi_iter.bi_size)) {
2267 sector_t area, offset;
2268
2269 dio->range.logical_sector = logical_sector;
2270 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
2271 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
2272 return true;
2273 }
2274
2275 return false;
2276 }
2277
dm_integrity_map_continue(struct dm_integrity_io * dio,bool from_map)2278 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map)
2279 {
2280 struct dm_integrity_c *ic = dio->ic;
2281 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
2282 unsigned int journal_section, journal_entry;
2283 unsigned int journal_read_pos;
2284 sector_t recalc_sector;
2285 struct completion read_comp;
2286 bool discard_retried = false;
2287 bool need_sync_io = ic->internal_hash && dio->op == REQ_OP_READ;
2288
2289 if (unlikely(dio->op == REQ_OP_DISCARD) && ic->mode != 'D')
2290 need_sync_io = true;
2291
2292 if (need_sync_io && from_map) {
2293 INIT_WORK(&dio->work, integrity_bio_wait);
2294 queue_work(ic->offload_wq, &dio->work);
2295 return;
2296 }
2297
2298 lock_retry:
2299 spin_lock_irq(&ic->endio_wait.lock);
2300 retry:
2301 if (unlikely(dm_integrity_failed(ic))) {
2302 spin_unlock_irq(&ic->endio_wait.lock);
2303 do_endio(ic, bio);
2304 return;
2305 }
2306 dio->range.n_sectors = bio_sectors(bio);
2307 journal_read_pos = NOT_FOUND;
2308 if (ic->mode == 'J' && likely(dio->op != REQ_OP_DISCARD)) {
2309 if (dio->op == REQ_OP_WRITE) {
2310 unsigned int next_entry, i, pos;
2311 unsigned int ws, we, range_sectors;
2312
2313 dio->range.n_sectors = min(dio->range.n_sectors,
2314 (sector_t)ic->free_sectors << ic->sb->log2_sectors_per_block);
2315 if (unlikely(!dio->range.n_sectors)) {
2316 if (from_map)
2317 goto offload_to_thread;
2318 sleep_on_endio_wait(ic);
2319 goto retry;
2320 }
2321 range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block;
2322 ic->free_sectors -= range_sectors;
2323 journal_section = ic->free_section;
2324 journal_entry = ic->free_section_entry;
2325
2326 next_entry = ic->free_section_entry + range_sectors;
2327 ic->free_section_entry = next_entry % ic->journal_section_entries;
2328 ic->free_section += next_entry / ic->journal_section_entries;
2329 ic->n_uncommitted_sections += next_entry / ic->journal_section_entries;
2330 wraparound_section(ic, &ic->free_section);
2331
2332 pos = journal_section * ic->journal_section_entries + journal_entry;
2333 ws = journal_section;
2334 we = journal_entry;
2335 i = 0;
2336 do {
2337 struct journal_entry *je;
2338
2339 add_journal_node(ic, &ic->journal_tree[pos], dio->range.logical_sector + i);
2340 pos++;
2341 if (unlikely(pos >= ic->journal_entries))
2342 pos = 0;
2343
2344 je = access_journal_entry(ic, ws, we);
2345 BUG_ON(!journal_entry_is_unused(je));
2346 journal_entry_set_inprogress(je);
2347 we++;
2348 if (unlikely(we == ic->journal_section_entries)) {
2349 we = 0;
2350 ws++;
2351 wraparound_section(ic, &ws);
2352 }
2353 } while ((i += ic->sectors_per_block) < dio->range.n_sectors);
2354
2355 spin_unlock_irq(&ic->endio_wait.lock);
2356 goto journal_read_write;
2357 } else {
2358 sector_t next_sector;
2359
2360 journal_read_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector);
2361 if (likely(journal_read_pos == NOT_FOUND)) {
2362 if (unlikely(dio->range.n_sectors > next_sector - dio->range.logical_sector))
2363 dio->range.n_sectors = next_sector - dio->range.logical_sector;
2364 } else {
2365 unsigned int i;
2366 unsigned int jp = journal_read_pos + 1;
2367
2368 for (i = ic->sectors_per_block; i < dio->range.n_sectors; i += ic->sectors_per_block, jp++) {
2369 if (!test_journal_node(ic, jp, dio->range.logical_sector + i))
2370 break;
2371 }
2372 dio->range.n_sectors = i;
2373 }
2374 }
2375 }
2376 if (unlikely(!add_new_range(ic, &dio->range, true))) {
2377 /*
2378 * We must not sleep in the request routine because it could
2379 * stall bios on current->bio_list.
2380 * So, we offload the bio to a workqueue if we have to sleep.
2381 */
2382 if (from_map) {
2383 offload_to_thread:
2384 spin_unlock_irq(&ic->endio_wait.lock);
2385 INIT_WORK(&dio->work, integrity_bio_wait);
2386 queue_work(ic->wait_wq, &dio->work);
2387 return;
2388 }
2389 if (journal_read_pos != NOT_FOUND)
2390 dio->range.n_sectors = ic->sectors_per_block;
2391 wait_and_add_new_range(ic, &dio->range);
2392 /*
2393 * wait_and_add_new_range drops the spinlock, so the journal
2394 * may have been changed arbitrarily. We need to recheck.
2395 * To simplify the code, we restrict I/O size to just one block.
2396 */
2397 if (journal_read_pos != NOT_FOUND) {
2398 sector_t next_sector;
2399 unsigned int new_pos;
2400
2401 new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector);
2402 if (unlikely(new_pos != journal_read_pos)) {
2403 remove_range_unlocked(ic, &dio->range);
2404 goto retry;
2405 }
2406 }
2407 }
2408 if (ic->mode == 'J' && likely(dio->op == REQ_OP_DISCARD) && !discard_retried) {
2409 sector_t next_sector;
2410 unsigned int new_pos;
2411
2412 new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector);
2413 if (unlikely(new_pos != NOT_FOUND) ||
2414 unlikely(next_sector < dio->range.logical_sector - dio->range.n_sectors)) {
2415 remove_range_unlocked(ic, &dio->range);
2416 spin_unlock_irq(&ic->endio_wait.lock);
2417 queue_work(ic->commit_wq, &ic->commit_work);
2418 flush_workqueue(ic->commit_wq);
2419 queue_work(ic->writer_wq, &ic->writer_work);
2420 flush_workqueue(ic->writer_wq);
2421 discard_retried = true;
2422 goto lock_retry;
2423 }
2424 }
2425 recalc_sector = le64_to_cpu(ic->sb->recalc_sector);
2426 spin_unlock_irq(&ic->endio_wait.lock);
2427
2428 if (unlikely(journal_read_pos != NOT_FOUND)) {
2429 journal_section = journal_read_pos / ic->journal_section_entries;
2430 journal_entry = journal_read_pos % ic->journal_section_entries;
2431 goto journal_read_write;
2432 }
2433
2434 if (ic->mode == 'B' && (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD))) {
2435 if (!block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector,
2436 dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
2437 struct bitmap_block_status *bbs;
2438
2439 bbs = sector_to_bitmap_block(ic, dio->range.logical_sector);
2440 spin_lock(&bbs->bio_queue_lock);
2441 bio_list_add(&bbs->bio_queue, bio);
2442 spin_unlock(&bbs->bio_queue_lock);
2443 queue_work(ic->writer_wq, &bbs->work);
2444 return;
2445 }
2446 }
2447
2448 dio->in_flight = (atomic_t)ATOMIC_INIT(2);
2449
2450 if (need_sync_io) {
2451 init_completion(&read_comp);
2452 dio->completion = &read_comp;
2453 } else
2454 dio->completion = NULL;
2455
2456 dm_bio_record(&dio->bio_details, bio);
2457 bio_set_dev(bio, ic->dev->bdev);
2458 bio->bi_integrity = NULL;
2459 bio->bi_opf &= ~REQ_INTEGRITY;
2460 bio->bi_end_io = integrity_end_io;
2461 bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT;
2462
2463 if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) {
2464 integrity_metadata(&dio->work);
2465 dm_integrity_flush_buffers(ic, false);
2466
2467 dio->in_flight = (atomic_t)ATOMIC_INIT(1);
2468 dio->completion = NULL;
2469
2470 submit_bio_noacct(bio);
2471
2472 return;
2473 }
2474
2475 submit_bio_noacct(bio);
2476
2477 if (need_sync_io) {
2478 wait_for_completion_io(&read_comp);
2479 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
2480 dio->range.logical_sector + dio->range.n_sectors > recalc_sector)
2481 goto skip_check;
2482 if (ic->mode == 'B') {
2483 if (!block_bitmap_op(ic, ic->recalc_bitmap, dio->range.logical_sector,
2484 dio->range.n_sectors, BITMAP_OP_TEST_ALL_CLEAR))
2485 goto skip_check;
2486 }
2487
2488 if (likely(!bio->bi_status))
2489 integrity_metadata(&dio->work);
2490 else
2491 skip_check:
2492 dec_in_flight(dio);
2493 } else {
2494 INIT_WORK(&dio->work, integrity_metadata);
2495 queue_work(ic->metadata_wq, &dio->work);
2496 }
2497
2498 return;
2499
2500 journal_read_write:
2501 if (unlikely(__journal_read_write(dio, bio, journal_section, journal_entry)))
2502 goto lock_retry;
2503
2504 do_endio_flush(ic, dio);
2505 }
2506
dm_integrity_map_inline(struct dm_integrity_io * dio,bool from_map)2507 static int dm_integrity_map_inline(struct dm_integrity_io *dio, bool from_map)
2508 {
2509 struct dm_integrity_c *ic = dio->ic;
2510 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
2511 struct bio_integrity_payload *bip;
2512 unsigned ret;
2513 sector_t recalc_sector;
2514
2515 if (unlikely(bio_integrity(bio))) {
2516 bio->bi_status = BLK_STS_NOTSUPP;
2517 bio_endio(bio);
2518 return DM_MAPIO_SUBMITTED;
2519 }
2520
2521 bio_set_dev(bio, ic->dev->bdev);
2522 if (unlikely((bio->bi_opf & REQ_PREFLUSH) != 0))
2523 return DM_MAPIO_REMAPPED;
2524
2525 retry:
2526 if (!dio->integrity_payload) {
2527 unsigned digest_size, extra_size;
2528 dio->payload_len = ic->tuple_size * (bio_sectors(bio) >> ic->sb->log2_sectors_per_block);
2529 digest_size = ic->internal_hash_digestsize;
2530 extra_size = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
2531 dio->payload_len += extra_size;
2532 dio->integrity_payload = kmalloc(dio->payload_len, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
2533 if (unlikely(!dio->integrity_payload)) {
2534 const unsigned x_size = PAGE_SIZE << 1;
2535 if (dio->payload_len > x_size) {
2536 unsigned sectors = ((x_size - extra_size) / ic->tuple_size) << ic->sb->log2_sectors_per_block;
2537 if (WARN_ON(!sectors || sectors >= bio_sectors(bio))) {
2538 bio->bi_status = BLK_STS_NOTSUPP;
2539 bio_endio(bio);
2540 return DM_MAPIO_SUBMITTED;
2541 }
2542 dm_accept_partial_bio(bio, sectors);
2543 goto retry;
2544 }
2545 }
2546 }
2547
2548 dio->range.logical_sector = bio->bi_iter.bi_sector;
2549 dio->range.n_sectors = bio_sectors(bio);
2550
2551 if (!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)))
2552 goto skip_spinlock;
2553 #ifdef CONFIG_64BIT
2554 /*
2555 * On 64-bit CPUs we can optimize the lock away (so that it won't cause
2556 * cache line bouncing) and use acquire/release barriers instead.
2557 *
2558 * Paired with smp_store_release in integrity_recalc_inline.
2559 */
2560 recalc_sector = le64_to_cpu(smp_load_acquire(&ic->sb->recalc_sector));
2561 if (likely(dio->range.logical_sector + dio->range.n_sectors <= recalc_sector))
2562 goto skip_spinlock;
2563 #endif
2564 spin_lock_irq(&ic->endio_wait.lock);
2565 recalc_sector = le64_to_cpu(ic->sb->recalc_sector);
2566 if (dio->range.logical_sector + dio->range.n_sectors <= recalc_sector)
2567 goto skip_unlock;
2568 if (unlikely(!add_new_range(ic, &dio->range, true))) {
2569 if (from_map) {
2570 spin_unlock_irq(&ic->endio_wait.lock);
2571 INIT_WORK(&dio->work, integrity_bio_wait);
2572 queue_work(ic->wait_wq, &dio->work);
2573 return DM_MAPIO_SUBMITTED;
2574 }
2575 wait_and_add_new_range(ic, &dio->range);
2576 }
2577 dio->integrity_range_locked = true;
2578 skip_unlock:
2579 spin_unlock_irq(&ic->endio_wait.lock);
2580 skip_spinlock:
2581
2582 if (unlikely(!dio->integrity_payload)) {
2583 dio->integrity_payload = page_to_virt((struct page *)mempool_alloc(&ic->recheck_pool, GFP_NOIO));
2584 dio->integrity_payload_from_mempool = true;
2585 }
2586
2587 dio->bio_details.bi_iter = bio->bi_iter;
2588
2589 if (unlikely(!dm_integrity_check_limits(ic, bio->bi_iter.bi_sector, bio))) {
2590 return DM_MAPIO_KILL;
2591 }
2592
2593 bio->bi_iter.bi_sector += ic->start + SB_SECTORS;
2594
2595 bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
2596 if (IS_ERR(bip)) {
2597 bio->bi_status = errno_to_blk_status(PTR_ERR(bip));
2598 bio_endio(bio);
2599 return DM_MAPIO_SUBMITTED;
2600 }
2601
2602 if (dio->op == REQ_OP_WRITE) {
2603 unsigned pos = 0;
2604 while (dio->bio_details.bi_iter.bi_size) {
2605 struct bio_vec bv = bio_iter_iovec(bio, dio->bio_details.bi_iter);
2606 const char *mem = integrity_kmap(ic, bv.bv_page);
2607 if (ic->tag_size < ic->tuple_size)
2608 memset(dio->integrity_payload + pos + ic->tag_size, 0, ic->tuple_size - ic->tuple_size);
2609 integrity_sector_checksum(ic, &dio->ahash_req, dio->bio_details.bi_iter.bi_sector, mem, bv.bv_offset, dio->integrity_payload + pos);
2610 integrity_kunmap(ic, mem);
2611 pos += ic->tuple_size;
2612 bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT);
2613 }
2614 }
2615
2616 ret = bio_integrity_add_page(bio, virt_to_page(dio->integrity_payload),
2617 dio->payload_len, offset_in_page(dio->integrity_payload));
2618 if (unlikely(ret != dio->payload_len)) {
2619 bio->bi_status = BLK_STS_RESOURCE;
2620 bio_endio(bio);
2621 return DM_MAPIO_SUBMITTED;
2622 }
2623
2624 return DM_MAPIO_REMAPPED;
2625 }
2626
dm_integrity_free_payload(struct dm_integrity_io * dio)2627 static inline void dm_integrity_free_payload(struct dm_integrity_io *dio)
2628 {
2629 struct dm_integrity_c *ic = dio->ic;
2630 if (unlikely(dio->integrity_payload_from_mempool))
2631 mempool_free(virt_to_page(dio->integrity_payload), &ic->recheck_pool);
2632 else
2633 kfree(dio->integrity_payload);
2634 dio->integrity_payload = NULL;
2635 dio->integrity_payload_from_mempool = false;
2636 }
2637
dm_integrity_inline_recheck(struct work_struct * w)2638 static void dm_integrity_inline_recheck(struct work_struct *w)
2639 {
2640 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
2641 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
2642 struct dm_integrity_c *ic = dio->ic;
2643 struct bio *outgoing_bio;
2644 void *outgoing_data;
2645
2646 dio->integrity_payload = page_to_virt((struct page *)mempool_alloc(&ic->recheck_pool, GFP_NOIO));
2647 dio->integrity_payload_from_mempool = true;
2648
2649 outgoing_data = dio->integrity_payload + PAGE_SIZE;
2650
2651 while (dio->bio_details.bi_iter.bi_size) {
2652 char digest[HASH_MAX_DIGESTSIZE];
2653 int r;
2654 struct bio_integrity_payload *bip;
2655 struct bio_vec bv;
2656 char *mem;
2657
2658 outgoing_bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_READ, GFP_NOIO, &ic->recheck_bios);
2659 bio_add_virt_nofail(outgoing_bio, outgoing_data,
2660 ic->sectors_per_block << SECTOR_SHIFT);
2661
2662 bip = bio_integrity_alloc(outgoing_bio, GFP_NOIO, 1);
2663 if (IS_ERR(bip)) {
2664 bio_put(outgoing_bio);
2665 bio->bi_status = errno_to_blk_status(PTR_ERR(bip));
2666 bio_endio(bio);
2667 return;
2668 }
2669
2670 r = bio_integrity_add_page(outgoing_bio, virt_to_page(dio->integrity_payload), ic->tuple_size, 0);
2671 if (unlikely(r != ic->tuple_size)) {
2672 bio_put(outgoing_bio);
2673 bio->bi_status = BLK_STS_RESOURCE;
2674 bio_endio(bio);
2675 return;
2676 }
2677
2678 outgoing_bio->bi_iter.bi_sector = dio->bio_details.bi_iter.bi_sector + ic->start + SB_SECTORS;
2679
2680 r = submit_bio_wait(outgoing_bio);
2681 if (unlikely(r != 0)) {
2682 bio_put(outgoing_bio);
2683 bio->bi_status = errno_to_blk_status(r);
2684 bio_endio(bio);
2685 return;
2686 }
2687 bio_put(outgoing_bio);
2688
2689 integrity_sector_checksum(ic, &dio->ahash_req, dio->bio_details.bi_iter.bi_sector, integrity_identity(ic, outgoing_data), 0, digest);
2690 if (unlikely(crypto_memneq(digest, dio->integrity_payload, min(ic->internal_hash_digestsize, ic->tag_size)))) {
2691 DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
2692 ic->dev->bdev, dio->bio_details.bi_iter.bi_sector);
2693 atomic64_inc(&ic->number_of_mismatches);
2694 dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum",
2695 bio, dio->bio_details.bi_iter.bi_sector, 0);
2696
2697 bio->bi_status = BLK_STS_PROTECTION;
2698 bio_endio(bio);
2699 return;
2700 }
2701
2702 bv = bio_iter_iovec(bio, dio->bio_details.bi_iter);
2703 mem = bvec_kmap_local(&bv);
2704 memcpy(mem, outgoing_data, ic->sectors_per_block << SECTOR_SHIFT);
2705 kunmap_local(mem);
2706
2707 bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT);
2708 }
2709
2710 bio_endio(bio);
2711 }
2712
dm_integrity_check(struct dm_integrity_c * ic,struct dm_integrity_io * dio)2713 static inline bool dm_integrity_check(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
2714 {
2715 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
2716 unsigned pos = 0;
2717
2718 while (dio->bio_details.bi_iter.bi_size) {
2719 char digest[HASH_MAX_DIGESTSIZE];
2720 struct bio_vec bv = bio_iter_iovec(bio, dio->bio_details.bi_iter);
2721 char *mem = integrity_kmap(ic, bv.bv_page);
2722 integrity_sector_checksum(ic, &dio->ahash_req, dio->bio_details.bi_iter.bi_sector, mem, bv.bv_offset, digest);
2723 if (unlikely(crypto_memneq(digest, dio->integrity_payload + pos,
2724 min(ic->internal_hash_digestsize, ic->tag_size)))) {
2725 integrity_kunmap(ic, mem);
2726 dm_integrity_free_payload(dio);
2727 INIT_WORK(&dio->work, dm_integrity_inline_recheck);
2728 queue_work(ic->offload_wq, &dio->work);
2729 return false;
2730 }
2731 integrity_kunmap(ic, mem);
2732 pos += ic->tuple_size;
2733 bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT);
2734 }
2735
2736 return true;
2737 }
2738
dm_integrity_inline_async_check(struct work_struct * w)2739 static void dm_integrity_inline_async_check(struct work_struct *w)
2740 {
2741 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
2742 struct dm_integrity_c *ic = dio->ic;
2743 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
2744
2745 if (likely(dm_integrity_check(ic, dio)))
2746 bio_endio(bio);
2747 }
2748
dm_integrity_end_io(struct dm_target * ti,struct bio * bio,blk_status_t * status)2749 static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status)
2750 {
2751 struct dm_integrity_c *ic = ti->private;
2752 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
2753 if (ic->mode == 'I') {
2754 if (dio->op == REQ_OP_READ && likely(*status == BLK_STS_OK) && likely(dio->bio_details.bi_iter.bi_size != 0)) {
2755 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
2756 unlikely(dio->integrity_range_locked))
2757 goto skip_check;
2758 if (likely(ic->internal_shash != NULL)) {
2759 if (unlikely(!dm_integrity_check(ic, dio)))
2760 return DM_ENDIO_INCOMPLETE;
2761 } else {
2762 INIT_WORK(&dio->work, dm_integrity_inline_async_check);
2763 queue_work(ic->offload_wq, &dio->work);
2764 return DM_ENDIO_INCOMPLETE;
2765 }
2766 }
2767 skip_check:
2768 dm_integrity_free_payload(dio);
2769 if (unlikely(dio->integrity_range_locked))
2770 remove_range(ic, &dio->range);
2771 }
2772 if (unlikely(dio->ahash_req))
2773 mempool_free(dio->ahash_req, &ic->ahash_req_pool);
2774 return DM_ENDIO_DONE;
2775 }
2776
integrity_bio_wait(struct work_struct * w)2777 static void integrity_bio_wait(struct work_struct *w)
2778 {
2779 struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
2780 struct dm_integrity_c *ic = dio->ic;
2781
2782 if (ic->mode == 'I') {
2783 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
2784 int r = dm_integrity_map_inline(dio, false);
2785 switch (r) {
2786 case DM_MAPIO_KILL:
2787 bio->bi_status = BLK_STS_IOERR;
2788 fallthrough;
2789 case DM_MAPIO_REMAPPED:
2790 submit_bio_noacct(bio);
2791 fallthrough;
2792 case DM_MAPIO_SUBMITTED:
2793 return;
2794 default:
2795 BUG();
2796 }
2797 } else {
2798 dm_integrity_map_continue(dio, false);
2799 }
2800 }
2801
pad_uncommitted(struct dm_integrity_c * ic)2802 static void pad_uncommitted(struct dm_integrity_c *ic)
2803 {
2804 if (ic->free_section_entry) {
2805 ic->free_sectors -= ic->journal_section_entries - ic->free_section_entry;
2806 ic->free_section_entry = 0;
2807 ic->free_section++;
2808 wraparound_section(ic, &ic->free_section);
2809 ic->n_uncommitted_sections++;
2810 }
2811 if (WARN_ON(ic->journal_sections * ic->journal_section_entries !=
2812 (ic->n_uncommitted_sections + ic->n_committed_sections) *
2813 ic->journal_section_entries + ic->free_sectors)) {
2814 DMCRIT("journal_sections %u, journal_section_entries %u, "
2815 "n_uncommitted_sections %u, n_committed_sections %u, "
2816 "journal_section_entries %u, free_sectors %u",
2817 ic->journal_sections, ic->journal_section_entries,
2818 ic->n_uncommitted_sections, ic->n_committed_sections,
2819 ic->journal_section_entries, ic->free_sectors);
2820 }
2821 }
2822
integrity_commit(struct work_struct * w)2823 static void integrity_commit(struct work_struct *w)
2824 {
2825 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, commit_work);
2826 unsigned int commit_start, commit_sections;
2827 unsigned int i, j, n;
2828 struct bio *flushes;
2829
2830 timer_delete(&ic->autocommit_timer);
2831
2832 if (ic->mode == 'I')
2833 return;
2834
2835 spin_lock_irq(&ic->endio_wait.lock);
2836 flushes = bio_list_get(&ic->flush_bio_list);
2837 if (unlikely(ic->mode != 'J')) {
2838 spin_unlock_irq(&ic->endio_wait.lock);
2839 dm_integrity_flush_buffers(ic, true);
2840 goto release_flush_bios;
2841 }
2842
2843 pad_uncommitted(ic);
2844 commit_start = ic->uncommitted_section;
2845 commit_sections = ic->n_uncommitted_sections;
2846 spin_unlock_irq(&ic->endio_wait.lock);
2847
2848 if (!commit_sections)
2849 goto release_flush_bios;
2850
2851 ic->wrote_to_journal = true;
2852
2853 i = commit_start;
2854 for (n = 0; n < commit_sections; n++) {
2855 for (j = 0; j < ic->journal_section_entries; j++) {
2856 struct journal_entry *je;
2857
2858 je = access_journal_entry(ic, i, j);
2859 io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je));
2860 }
2861 for (j = 0; j < ic->journal_section_sectors; j++) {
2862 struct journal_sector *js;
2863
2864 js = access_journal(ic, i, j);
2865 js->commit_id = dm_integrity_commit_id(ic, i, j, ic->commit_seq);
2866 }
2867 i++;
2868 if (unlikely(i >= ic->journal_sections))
2869 ic->commit_seq = next_commit_seq(ic->commit_seq);
2870 wraparound_section(ic, &i);
2871 }
2872 smp_rmb();
2873
2874 write_journal(ic, commit_start, commit_sections);
2875
2876 spin_lock_irq(&ic->endio_wait.lock);
2877 ic->uncommitted_section += commit_sections;
2878 wraparound_section(ic, &ic->uncommitted_section);
2879 ic->n_uncommitted_sections -= commit_sections;
2880 ic->n_committed_sections += commit_sections;
2881 spin_unlock_irq(&ic->endio_wait.lock);
2882
2883 if (READ_ONCE(ic->free_sectors) <= ic->free_sectors_threshold)
2884 queue_work(ic->writer_wq, &ic->writer_work);
2885
2886 release_flush_bios:
2887 while (flushes) {
2888 struct bio *next = flushes->bi_next;
2889
2890 flushes->bi_next = NULL;
2891 do_endio(ic, flushes);
2892 flushes = next;
2893 }
2894 }
2895
complete_copy_from_journal(unsigned long error,void * context)2896 static void complete_copy_from_journal(unsigned long error, void *context)
2897 {
2898 struct journal_io *io = context;
2899 struct journal_completion *comp = io->comp;
2900 struct dm_integrity_c *ic = comp->ic;
2901
2902 remove_range(ic, &io->range);
2903 mempool_free(io, &ic->journal_io_mempool);
2904 if (unlikely(error != 0))
2905 dm_integrity_io_error(ic, "copying from journal", -EIO);
2906 complete_journal_op(comp);
2907 }
2908
restore_last_bytes(struct dm_integrity_c * ic,struct journal_sector * js,struct journal_entry * je)2909 static void restore_last_bytes(struct dm_integrity_c *ic, struct journal_sector *js,
2910 struct journal_entry *je)
2911 {
2912 unsigned int s = 0;
2913
2914 do {
2915 js->commit_id = je->last_bytes[s];
2916 js++;
2917 } while (++s < ic->sectors_per_block);
2918 }
2919
do_journal_write(struct dm_integrity_c * ic,unsigned int write_start,unsigned int write_sections,bool from_replay)2920 static void do_journal_write(struct dm_integrity_c *ic, unsigned int write_start,
2921 unsigned int write_sections, bool from_replay)
2922 {
2923 unsigned int i, j, n;
2924 struct journal_completion comp;
2925 struct blk_plug plug;
2926
2927 blk_start_plug(&plug);
2928
2929 comp.ic = ic;
2930 comp.in_flight = (atomic_t)ATOMIC_INIT(1);
2931 init_completion(&comp.comp);
2932
2933 i = write_start;
2934 for (n = 0; n < write_sections; n++, i++, wraparound_section(ic, &i)) {
2935 #ifndef INTERNAL_VERIFY
2936 if (unlikely(from_replay))
2937 #endif
2938 rw_section_mac(ic, i, false);
2939 for (j = 0; j < ic->journal_section_entries; j++) {
2940 struct journal_entry *je = access_journal_entry(ic, i, j);
2941 sector_t sec, area, offset;
2942 unsigned int k, l, next_loop;
2943 sector_t metadata_block;
2944 unsigned int metadata_offset;
2945 struct journal_io *io;
2946
2947 if (journal_entry_is_unused(je))
2948 continue;
2949 BUG_ON(unlikely(journal_entry_is_inprogress(je)) && !from_replay);
2950 sec = journal_entry_get_sector(je);
2951 if (unlikely(from_replay)) {
2952 if (unlikely(sec & (unsigned int)(ic->sectors_per_block - 1))) {
2953 dm_integrity_io_error(ic, "invalid sector in journal", -EIO);
2954 sec &= ~(sector_t)(ic->sectors_per_block - 1);
2955 }
2956 if (unlikely(sec >= ic->provided_data_sectors)) {
2957 journal_entry_set_unused(je);
2958 continue;
2959 }
2960 }
2961 get_area_and_offset(ic, sec, &area, &offset);
2962 restore_last_bytes(ic, access_journal_data(ic, i, j), je);
2963 for (k = j + 1; k < ic->journal_section_entries; k++) {
2964 struct journal_entry *je2 = access_journal_entry(ic, i, k);
2965 sector_t sec2, area2, offset2;
2966
2967 if (journal_entry_is_unused(je2))
2968 break;
2969 BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay);
2970 sec2 = journal_entry_get_sector(je2);
2971 if (unlikely(sec2 >= ic->provided_data_sectors))
2972 break;
2973 get_area_and_offset(ic, sec2, &area2, &offset2);
2974 if (area2 != area || offset2 != offset + ((k - j) << ic->sb->log2_sectors_per_block))
2975 break;
2976 restore_last_bytes(ic, access_journal_data(ic, i, k), je2);
2977 }
2978 next_loop = k - 1;
2979
2980 io = mempool_alloc(&ic->journal_io_mempool, GFP_NOIO);
2981 io->comp = ∁
2982 io->range.logical_sector = sec;
2983 io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block;
2984
2985 spin_lock_irq(&ic->endio_wait.lock);
2986 add_new_range_and_wait(ic, &io->range);
2987
2988 if (likely(!from_replay)) {
2989 struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries];
2990
2991 /* don't write if there is newer committed sector */
2992 while (j < k && find_newer_committed_node(ic, §ion_node[j])) {
2993 struct journal_entry *je2 = access_journal_entry(ic, i, j);
2994
2995 journal_entry_set_unused(je2);
2996 remove_journal_node(ic, §ion_node[j]);
2997 j++;
2998 sec += ic->sectors_per_block;
2999 offset += ic->sectors_per_block;
3000 }
3001 while (j < k && find_newer_committed_node(ic, §ion_node[k - 1])) {
3002 struct journal_entry *je2 = access_journal_entry(ic, i, k - 1);
3003
3004 journal_entry_set_unused(je2);
3005 remove_journal_node(ic, §ion_node[k - 1]);
3006 k--;
3007 }
3008 if (j == k) {
3009 remove_range_unlocked(ic, &io->range);
3010 spin_unlock_irq(&ic->endio_wait.lock);
3011 mempool_free(io, &ic->journal_io_mempool);
3012 goto skip_io;
3013 }
3014 for (l = j; l < k; l++)
3015 remove_journal_node(ic, §ion_node[l]);
3016 }
3017 spin_unlock_irq(&ic->endio_wait.lock);
3018
3019 metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset);
3020 for (l = j; l < k; l++) {
3021 int r;
3022 struct journal_entry *je2 = access_journal_entry(ic, i, l);
3023
3024 if (
3025 #ifndef INTERNAL_VERIFY
3026 unlikely(from_replay) &&
3027 #endif
3028 ic->internal_hash) {
3029 char test_tag[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
3030 struct journal_sector *js = access_journal_data(ic, i, l);
3031 void *js_page = integrity_identity(ic, (char *)js - offset_in_page(js));
3032 unsigned js_offset = offset_in_page(js);
3033
3034 integrity_sector_checksum(ic, &ic->journal_ahash_req, sec + ((l - j) << ic->sb->log2_sectors_per_block),
3035 js_page, js_offset, test_tag);
3036 if (unlikely(crypto_memneq(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) {
3037 dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ);
3038 dm_audit_log_target(DM_MSG_PREFIX, "integrity-replay-journal", ic->ti, 0);
3039 }
3040 }
3041
3042 journal_entry_set_unused(je2);
3043 r = dm_integrity_rw_tag(ic, journal_entry_tag(ic, je2), &metadata_block, &metadata_offset,
3044 ic->tag_size, TAG_WRITE);
3045 if (unlikely(r))
3046 dm_integrity_io_error(ic, "reading tags", r);
3047 }
3048
3049 atomic_inc(&comp.in_flight);
3050 copy_from_journal(ic, i, j << ic->sb->log2_sectors_per_block,
3051 (k - j) << ic->sb->log2_sectors_per_block,
3052 get_data_sector(ic, area, offset),
3053 complete_copy_from_journal, io);
3054 skip_io:
3055 j = next_loop;
3056 }
3057 }
3058
3059 dm_bufio_write_dirty_buffers_async(ic->bufio);
3060
3061 blk_finish_plug(&plug);
3062
3063 complete_journal_op(&comp);
3064 wait_for_completion_io(&comp.comp);
3065
3066 dm_integrity_flush_buffers(ic, true);
3067 }
3068
integrity_writer(struct work_struct * w)3069 static void integrity_writer(struct work_struct *w)
3070 {
3071 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, writer_work);
3072 unsigned int write_start, write_sections;
3073 unsigned int prev_free_sectors;
3074
3075 spin_lock_irq(&ic->endio_wait.lock);
3076 write_start = ic->committed_section;
3077 write_sections = ic->n_committed_sections;
3078 spin_unlock_irq(&ic->endio_wait.lock);
3079
3080 if (!write_sections)
3081 return;
3082
3083 do_journal_write(ic, write_start, write_sections, false);
3084
3085 spin_lock_irq(&ic->endio_wait.lock);
3086
3087 ic->committed_section += write_sections;
3088 wraparound_section(ic, &ic->committed_section);
3089 ic->n_committed_sections -= write_sections;
3090
3091 prev_free_sectors = ic->free_sectors;
3092 ic->free_sectors += write_sections * ic->journal_section_entries;
3093 if (unlikely(!prev_free_sectors))
3094 wake_up_locked(&ic->endio_wait);
3095
3096 spin_unlock_irq(&ic->endio_wait.lock);
3097 }
3098
recalc_write_super(struct dm_integrity_c * ic)3099 static void recalc_write_super(struct dm_integrity_c *ic)
3100 {
3101 int r;
3102
3103 dm_integrity_flush_buffers(ic, false);
3104 if (dm_integrity_failed(ic))
3105 return;
3106
3107 r = sync_rw_sb(ic, REQ_OP_WRITE);
3108 if (unlikely(r))
3109 dm_integrity_io_error(ic, "writing superblock", r);
3110 }
3111
integrity_recalc(struct work_struct * w)3112 static void integrity_recalc(struct work_struct *w)
3113 {
3114 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, recalc_work);
3115 size_t recalc_tags_size;
3116 u8 *recalc_buffer = NULL;
3117 u8 *recalc_tags = NULL;
3118 struct ahash_request *ahash_req = NULL;
3119 struct dm_integrity_range range;
3120 struct dm_io_request io_req;
3121 struct dm_io_region io_loc;
3122 sector_t area, offset;
3123 sector_t metadata_block;
3124 unsigned int metadata_offset;
3125 sector_t logical_sector, n_sectors;
3126 __u8 *t;
3127 unsigned int i;
3128 int r;
3129 unsigned int super_counter = 0;
3130 unsigned recalc_sectors = RECALC_SECTORS;
3131
3132 retry:
3133 recalc_buffer = kmalloc(recalc_sectors << SECTOR_SHIFT, GFP_NOIO | __GFP_NOWARN);
3134 if (!recalc_buffer) {
3135 oom:
3136 recalc_sectors >>= 1;
3137 if (recalc_sectors >= 1U << ic->sb->log2_sectors_per_block)
3138 goto retry;
3139 DMCRIT("out of memory for recalculate buffer - recalculation disabled");
3140 goto free_ret;
3141 }
3142 recalc_tags_size = (recalc_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size;
3143 if (ic->internal_hash_digestsize > ic->tag_size)
3144 recalc_tags_size += ic->internal_hash_digestsize - ic->tag_size;
3145 recalc_tags = kvmalloc(recalc_tags_size, GFP_NOIO);
3146 if (!recalc_tags) {
3147 kfree(recalc_buffer);
3148 recalc_buffer = NULL;
3149 goto oom;
3150 }
3151
3152 DEBUG_print("start recalculation... (position %llx)\n", le64_to_cpu(ic->sb->recalc_sector));
3153
3154 spin_lock_irq(&ic->endio_wait.lock);
3155
3156 next_chunk:
3157
3158 if (unlikely(dm_post_suspending(ic->ti)))
3159 goto unlock_ret;
3160
3161 range.logical_sector = le64_to_cpu(ic->sb->recalc_sector);
3162 if (unlikely(range.logical_sector >= ic->provided_data_sectors)) {
3163 if (ic->mode == 'B') {
3164 block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
3165 DEBUG_print("queue_delayed_work: bitmap_flush_work\n");
3166 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
3167 }
3168 goto unlock_ret;
3169 }
3170
3171 get_area_and_offset(ic, range.logical_sector, &area, &offset);
3172 range.n_sectors = min((sector_t)recalc_sectors, ic->provided_data_sectors - range.logical_sector);
3173 if (!ic->meta_dev)
3174 range.n_sectors = min(range.n_sectors, ((sector_t)1U << ic->sb->log2_interleave_sectors) - (unsigned int)offset);
3175
3176 add_new_range_and_wait(ic, &range);
3177 spin_unlock_irq(&ic->endio_wait.lock);
3178 logical_sector = range.logical_sector;
3179 n_sectors = range.n_sectors;
3180
3181 if (ic->mode == 'B') {
3182 if (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, n_sectors, BITMAP_OP_TEST_ALL_CLEAR))
3183 goto advance_and_next;
3184
3185 while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector,
3186 ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) {
3187 logical_sector += ic->sectors_per_block;
3188 n_sectors -= ic->sectors_per_block;
3189 cond_resched();
3190 }
3191 while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector + n_sectors - ic->sectors_per_block,
3192 ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) {
3193 n_sectors -= ic->sectors_per_block;
3194 cond_resched();
3195 }
3196 get_area_and_offset(ic, logical_sector, &area, &offset);
3197 }
3198
3199 DEBUG_print("recalculating: %llx, %llx\n", logical_sector, n_sectors);
3200
3201 if (unlikely(++super_counter == RECALC_WRITE_SUPER)) {
3202 recalc_write_super(ic);
3203 if (ic->mode == 'B')
3204 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
3205
3206 super_counter = 0;
3207 }
3208
3209 if (unlikely(dm_integrity_failed(ic)))
3210 goto err;
3211
3212 io_req.bi_opf = REQ_OP_READ;
3213 io_req.mem.type = DM_IO_KMEM;
3214 io_req.mem.ptr.addr = recalc_buffer;
3215 io_req.notify.fn = NULL;
3216 io_req.client = ic->io;
3217 io_loc.bdev = ic->dev->bdev;
3218 io_loc.sector = get_data_sector(ic, area, offset);
3219 io_loc.count = n_sectors;
3220
3221 r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
3222 if (unlikely(r)) {
3223 dm_integrity_io_error(ic, "reading data", r);
3224 goto err;
3225 }
3226
3227 t = recalc_tags;
3228 for (i = 0; i < n_sectors; i += ic->sectors_per_block) {
3229 void *ptr = recalc_buffer + (i << SECTOR_SHIFT);
3230 void *ptr_page = integrity_identity(ic, (char *)ptr - offset_in_page(ptr));
3231 unsigned ptr_offset = offset_in_page(ptr);
3232 integrity_sector_checksum(ic, &ahash_req, logical_sector + i, ptr_page, ptr_offset, t);
3233 t += ic->tag_size;
3234 }
3235
3236 metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset);
3237
3238 r = dm_integrity_rw_tag(ic, recalc_tags, &metadata_block, &metadata_offset, t - recalc_tags, TAG_WRITE);
3239 if (unlikely(r)) {
3240 dm_integrity_io_error(ic, "writing tags", r);
3241 goto err;
3242 }
3243
3244 if (ic->mode == 'B') {
3245 sector_t start, end;
3246
3247 start = (range.logical_sector >>
3248 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)) <<
3249 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
3250 end = ((range.logical_sector + range.n_sectors) >>
3251 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)) <<
3252 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
3253 block_bitmap_op(ic, ic->recalc_bitmap, start, end - start, BITMAP_OP_CLEAR);
3254 }
3255
3256 advance_and_next:
3257 cond_resched();
3258
3259 spin_lock_irq(&ic->endio_wait.lock);
3260 remove_range_unlocked(ic, &range);
3261 ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors);
3262 goto next_chunk;
3263
3264 err:
3265 remove_range(ic, &range);
3266 goto free_ret;
3267
3268 unlock_ret:
3269 spin_unlock_irq(&ic->endio_wait.lock);
3270
3271 recalc_write_super(ic);
3272
3273 free_ret:
3274 kfree(recalc_buffer);
3275 kvfree(recalc_tags);
3276 mempool_free(ahash_req, &ic->ahash_req_pool);
3277 }
3278
integrity_recalc_inline(struct work_struct * w)3279 static void integrity_recalc_inline(struct work_struct *w)
3280 {
3281 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, recalc_work);
3282 size_t recalc_tags_size;
3283 u8 *recalc_buffer = NULL;
3284 u8 *recalc_tags = NULL;
3285 struct ahash_request *ahash_req = NULL;
3286 struct dm_integrity_range range;
3287 struct bio *bio;
3288 struct bio_integrity_payload *bip;
3289 __u8 *t;
3290 unsigned int i;
3291 int r;
3292 unsigned ret;
3293 unsigned int super_counter = 0;
3294 unsigned recalc_sectors = RECALC_SECTORS;
3295
3296 retry:
3297 recalc_buffer = kmalloc(recalc_sectors << SECTOR_SHIFT, GFP_NOIO | __GFP_NOWARN);
3298 if (!recalc_buffer) {
3299 oom:
3300 recalc_sectors >>= 1;
3301 if (recalc_sectors >= 1U << ic->sb->log2_sectors_per_block)
3302 goto retry;
3303 DMCRIT("out of memory for recalculate buffer - recalculation disabled");
3304 goto free_ret;
3305 }
3306
3307 recalc_tags_size = (recalc_sectors >> ic->sb->log2_sectors_per_block) * ic->tuple_size;
3308 if (ic->internal_hash_digestsize > ic->tuple_size)
3309 recalc_tags_size += ic->internal_hash_digestsize - ic->tuple_size;
3310 recalc_tags = kmalloc(recalc_tags_size, GFP_NOIO | __GFP_NOWARN);
3311 if (!recalc_tags) {
3312 kfree(recalc_buffer);
3313 recalc_buffer = NULL;
3314 goto oom;
3315 }
3316
3317 spin_lock_irq(&ic->endio_wait.lock);
3318
3319 next_chunk:
3320 if (unlikely(dm_post_suspending(ic->ti)))
3321 goto unlock_ret;
3322
3323 range.logical_sector = le64_to_cpu(ic->sb->recalc_sector);
3324 if (unlikely(range.logical_sector >= ic->provided_data_sectors))
3325 goto unlock_ret;
3326 range.n_sectors = min((sector_t)recalc_sectors, ic->provided_data_sectors - range.logical_sector);
3327
3328 add_new_range_and_wait(ic, &range);
3329 spin_unlock_irq(&ic->endio_wait.lock);
3330
3331 if (unlikely(++super_counter == RECALC_WRITE_SUPER)) {
3332 recalc_write_super(ic);
3333 super_counter = 0;
3334 }
3335
3336 if (unlikely(dm_integrity_failed(ic)))
3337 goto err;
3338
3339 DEBUG_print("recalculating: %llx - %llx\n", range.logical_sector, range.n_sectors);
3340
3341 bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_READ, GFP_NOIO, &ic->recalc_bios);
3342 bio->bi_iter.bi_sector = ic->start + SB_SECTORS + range.logical_sector;
3343 bio_add_virt_nofail(bio, recalc_buffer,
3344 range.n_sectors << SECTOR_SHIFT);
3345 r = submit_bio_wait(bio);
3346 bio_put(bio);
3347 if (unlikely(r)) {
3348 dm_integrity_io_error(ic, "reading data", r);
3349 goto err;
3350 }
3351
3352 t = recalc_tags;
3353 for (i = 0; i < range.n_sectors; i += ic->sectors_per_block) {
3354 void *ptr = recalc_buffer + (i << SECTOR_SHIFT);
3355 void *ptr_page = integrity_identity(ic, (char *)ptr - offset_in_page(ptr));
3356 unsigned ptr_offset = offset_in_page(ptr);
3357 memset(t, 0, ic->tuple_size);
3358 integrity_sector_checksum(ic, &ahash_req, range.logical_sector + i, ptr_page, ptr_offset, t);
3359 t += ic->tuple_size;
3360 }
3361
3362 bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_WRITE, GFP_NOIO, &ic->recalc_bios);
3363 bio->bi_iter.bi_sector = ic->start + SB_SECTORS + range.logical_sector;
3364 bio_add_virt_nofail(bio, recalc_buffer,
3365 range.n_sectors << SECTOR_SHIFT);
3366
3367 bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
3368 if (unlikely(IS_ERR(bip))) {
3369 bio_put(bio);
3370 DMCRIT("out of memory for bio integrity payload - recalculation disabled");
3371 goto err;
3372 }
3373 ret = bio_integrity_add_page(bio, virt_to_page(recalc_tags), t - recalc_tags, offset_in_page(recalc_tags));
3374 if (unlikely(ret != t - recalc_tags)) {
3375 bio_put(bio);
3376 dm_integrity_io_error(ic, "attaching integrity tags", -ENOMEM);
3377 goto err;
3378 }
3379
3380 r = submit_bio_wait(bio);
3381 bio_put(bio);
3382 if (unlikely(r)) {
3383 dm_integrity_io_error(ic, "writing data", r);
3384 goto err;
3385 }
3386
3387 cond_resched();
3388 spin_lock_irq(&ic->endio_wait.lock);
3389 remove_range_unlocked(ic, &range);
3390 #ifdef CONFIG_64BIT
3391 /* Paired with smp_load_acquire in dm_integrity_map_inline. */
3392 smp_store_release(&ic->sb->recalc_sector, cpu_to_le64(range.logical_sector + range.n_sectors));
3393 #else
3394 ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors);
3395 #endif
3396 goto next_chunk;
3397
3398 err:
3399 remove_range(ic, &range);
3400 goto free_ret;
3401
3402 unlock_ret:
3403 spin_unlock_irq(&ic->endio_wait.lock);
3404
3405 recalc_write_super(ic);
3406
3407 free_ret:
3408 kfree(recalc_buffer);
3409 kfree(recalc_tags);
3410 mempool_free(ahash_req, &ic->ahash_req_pool);
3411 }
3412
bitmap_block_work(struct work_struct * w)3413 static void bitmap_block_work(struct work_struct *w)
3414 {
3415 struct bitmap_block_status *bbs = container_of(w, struct bitmap_block_status, work);
3416 struct dm_integrity_c *ic = bbs->ic;
3417 struct bio *bio;
3418 struct bio_list bio_queue;
3419 struct bio_list waiting;
3420
3421 bio_list_init(&waiting);
3422
3423 spin_lock(&bbs->bio_queue_lock);
3424 bio_queue = bbs->bio_queue;
3425 bio_list_init(&bbs->bio_queue);
3426 spin_unlock(&bbs->bio_queue_lock);
3427
3428 while ((bio = bio_list_pop(&bio_queue))) {
3429 struct dm_integrity_io *dio;
3430
3431 dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
3432
3433 if (block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector,
3434 dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
3435 remove_range(ic, &dio->range);
3436 INIT_WORK(&dio->work, integrity_bio_wait);
3437 queue_work(ic->offload_wq, &dio->work);
3438 } else {
3439 block_bitmap_op(ic, ic->journal, dio->range.logical_sector,
3440 dio->range.n_sectors, BITMAP_OP_SET);
3441 bio_list_add(&waiting, bio);
3442 }
3443 }
3444
3445 if (bio_list_empty(&waiting))
3446 return;
3447
3448 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC,
3449 bbs->idx * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT),
3450 BITMAP_BLOCK_SIZE >> SECTOR_SHIFT, NULL);
3451
3452 while ((bio = bio_list_pop(&waiting))) {
3453 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
3454
3455 block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector,
3456 dio->range.n_sectors, BITMAP_OP_SET);
3457
3458 remove_range(ic, &dio->range);
3459 INIT_WORK(&dio->work, integrity_bio_wait);
3460 queue_work(ic->offload_wq, &dio->work);
3461 }
3462
3463 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
3464 }
3465
bitmap_flush_work(struct work_struct * work)3466 static void bitmap_flush_work(struct work_struct *work)
3467 {
3468 struct dm_integrity_c *ic = container_of(work, struct dm_integrity_c, bitmap_flush_work.work);
3469 struct dm_integrity_range range;
3470 unsigned long limit;
3471 struct bio *bio;
3472
3473 dm_integrity_flush_buffers(ic, false);
3474
3475 range.logical_sector = 0;
3476 range.n_sectors = ic->provided_data_sectors;
3477
3478 spin_lock_irq(&ic->endio_wait.lock);
3479 add_new_range_and_wait(ic, &range);
3480 spin_unlock_irq(&ic->endio_wait.lock);
3481
3482 dm_integrity_flush_buffers(ic, true);
3483
3484 limit = ic->provided_data_sectors;
3485 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
3486 limit = le64_to_cpu(ic->sb->recalc_sector)
3487 >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)
3488 << (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
3489 }
3490 /*DEBUG_print("zeroing journal\n");*/
3491 block_bitmap_op(ic, ic->journal, 0, limit, BITMAP_OP_CLEAR);
3492 block_bitmap_op(ic, ic->may_write_bitmap, 0, limit, BITMAP_OP_CLEAR);
3493
3494 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0,
3495 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3496
3497 spin_lock_irq(&ic->endio_wait.lock);
3498 remove_range_unlocked(ic, &range);
3499 while (unlikely((bio = bio_list_pop(&ic->synchronous_bios)) != NULL)) {
3500 bio_endio(bio);
3501 spin_unlock_irq(&ic->endio_wait.lock);
3502 spin_lock_irq(&ic->endio_wait.lock);
3503 }
3504 spin_unlock_irq(&ic->endio_wait.lock);
3505 }
3506
3507
init_journal(struct dm_integrity_c * ic,unsigned int start_section,unsigned int n_sections,unsigned char commit_seq)3508 static void init_journal(struct dm_integrity_c *ic, unsigned int start_section,
3509 unsigned int n_sections, unsigned char commit_seq)
3510 {
3511 unsigned int i, j, n;
3512
3513 if (!n_sections)
3514 return;
3515
3516 for (n = 0; n < n_sections; n++) {
3517 i = start_section + n;
3518 wraparound_section(ic, &i);
3519 for (j = 0; j < ic->journal_section_sectors; j++) {
3520 struct journal_sector *js = access_journal(ic, i, j);
3521
3522 BUILD_BUG_ON(sizeof(js->sectors) != JOURNAL_SECTOR_DATA);
3523 memset(&js->sectors, 0, sizeof(js->sectors));
3524 js->commit_id = dm_integrity_commit_id(ic, i, j, commit_seq);
3525 }
3526 for (j = 0; j < ic->journal_section_entries; j++) {
3527 struct journal_entry *je = access_journal_entry(ic, i, j);
3528
3529 journal_entry_set_unused(je);
3530 }
3531 }
3532
3533 write_journal(ic, start_section, n_sections);
3534 }
3535
find_commit_seq(struct dm_integrity_c * ic,unsigned int i,unsigned int j,commit_id_t id)3536 static int find_commit_seq(struct dm_integrity_c *ic, unsigned int i, unsigned int j, commit_id_t id)
3537 {
3538 unsigned char k;
3539
3540 for (k = 0; k < N_COMMIT_IDS; k++) {
3541 if (dm_integrity_commit_id(ic, i, j, k) == id)
3542 return k;
3543 }
3544 dm_integrity_io_error(ic, "journal commit id", -EIO);
3545 return -EIO;
3546 }
3547
replay_journal(struct dm_integrity_c * ic)3548 static void replay_journal(struct dm_integrity_c *ic)
3549 {
3550 unsigned int i, j;
3551 bool used_commit_ids[N_COMMIT_IDS];
3552 unsigned int max_commit_id_sections[N_COMMIT_IDS];
3553 unsigned int write_start, write_sections;
3554 unsigned int continue_section;
3555 bool journal_empty;
3556 unsigned char unused, last_used, want_commit_seq;
3557
3558 if (ic->mode == 'R')
3559 return;
3560
3561 if (ic->journal_uptodate)
3562 return;
3563
3564 last_used = 0;
3565 write_start = 0;
3566
3567 if (!ic->just_formatted) {
3568 DEBUG_print("reading journal\n");
3569 rw_journal(ic, REQ_OP_READ, 0, ic->journal_sections, NULL);
3570 if (ic->journal_io)
3571 DEBUG_bytes(lowmem_page_address(ic->journal_io[0].page), 64, "read journal");
3572 if (ic->journal_io) {
3573 struct journal_completion crypt_comp;
3574
3575 crypt_comp.ic = ic;
3576 init_completion(&crypt_comp.comp);
3577 crypt_comp.in_flight = (atomic_t)ATOMIC_INIT(0);
3578 encrypt_journal(ic, false, 0, ic->journal_sections, &crypt_comp);
3579 wait_for_completion(&crypt_comp.comp);
3580 }
3581 DEBUG_bytes(lowmem_page_address(ic->journal[0].page), 64, "decrypted journal");
3582 }
3583
3584 if (dm_integrity_failed(ic))
3585 goto clear_journal;
3586
3587 journal_empty = true;
3588 memset(used_commit_ids, 0, sizeof(used_commit_ids));
3589 memset(max_commit_id_sections, 0, sizeof(max_commit_id_sections));
3590 for (i = 0; i < ic->journal_sections; i++) {
3591 for (j = 0; j < ic->journal_section_sectors; j++) {
3592 int k;
3593 struct journal_sector *js = access_journal(ic, i, j);
3594
3595 k = find_commit_seq(ic, i, j, js->commit_id);
3596 if (k < 0)
3597 goto clear_journal;
3598 used_commit_ids[k] = true;
3599 max_commit_id_sections[k] = i;
3600 }
3601 if (journal_empty) {
3602 for (j = 0; j < ic->journal_section_entries; j++) {
3603 struct journal_entry *je = access_journal_entry(ic, i, j);
3604
3605 if (!journal_entry_is_unused(je)) {
3606 journal_empty = false;
3607 break;
3608 }
3609 }
3610 }
3611 }
3612
3613 if (!used_commit_ids[N_COMMIT_IDS - 1]) {
3614 unused = N_COMMIT_IDS - 1;
3615 while (unused && !used_commit_ids[unused - 1])
3616 unused--;
3617 } else {
3618 for (unused = 0; unused < N_COMMIT_IDS; unused++)
3619 if (!used_commit_ids[unused])
3620 break;
3621 if (unused == N_COMMIT_IDS) {
3622 dm_integrity_io_error(ic, "journal commit ids", -EIO);
3623 goto clear_journal;
3624 }
3625 }
3626 DEBUG_print("first unused commit seq %d [%d,%d,%d,%d]\n",
3627 unused, used_commit_ids[0], used_commit_ids[1],
3628 used_commit_ids[2], used_commit_ids[3]);
3629
3630 last_used = prev_commit_seq(unused);
3631 want_commit_seq = prev_commit_seq(last_used);
3632
3633 if (!used_commit_ids[want_commit_seq] && used_commit_ids[prev_commit_seq(want_commit_seq)])
3634 journal_empty = true;
3635
3636 write_start = max_commit_id_sections[last_used] + 1;
3637 if (unlikely(write_start >= ic->journal_sections))
3638 want_commit_seq = next_commit_seq(want_commit_seq);
3639 wraparound_section(ic, &write_start);
3640
3641 i = write_start;
3642 for (write_sections = 0; write_sections < ic->journal_sections; write_sections++) {
3643 for (j = 0; j < ic->journal_section_sectors; j++) {
3644 struct journal_sector *js = access_journal(ic, i, j);
3645
3646 if (js->commit_id != dm_integrity_commit_id(ic, i, j, want_commit_seq)) {
3647 /*
3648 * This could be caused by crash during writing.
3649 * We won't replay the inconsistent part of the
3650 * journal.
3651 */
3652 DEBUG_print("commit id mismatch at position (%u, %u): %d != %d\n",
3653 i, j, find_commit_seq(ic, i, j, js->commit_id), want_commit_seq);
3654 goto brk;
3655 }
3656 }
3657 i++;
3658 if (unlikely(i >= ic->journal_sections))
3659 want_commit_seq = next_commit_seq(want_commit_seq);
3660 wraparound_section(ic, &i);
3661 }
3662 brk:
3663
3664 if (!journal_empty) {
3665 DEBUG_print("replaying %u sections, starting at %u, commit seq %d\n",
3666 write_sections, write_start, want_commit_seq);
3667 do_journal_write(ic, write_start, write_sections, true);
3668 }
3669
3670 if (write_sections == ic->journal_sections && (ic->mode == 'J' || journal_empty)) {
3671 continue_section = write_start;
3672 ic->commit_seq = want_commit_seq;
3673 DEBUG_print("continuing from section %u, commit seq %d\n", write_start, ic->commit_seq);
3674 } else {
3675 unsigned int s;
3676 unsigned char erase_seq;
3677
3678 clear_journal:
3679 DEBUG_print("clearing journal\n");
3680
3681 erase_seq = prev_commit_seq(prev_commit_seq(last_used));
3682 s = write_start;
3683 init_journal(ic, s, 1, erase_seq);
3684 s++;
3685 wraparound_section(ic, &s);
3686 if (ic->journal_sections >= 2) {
3687 init_journal(ic, s, ic->journal_sections - 2, erase_seq);
3688 s += ic->journal_sections - 2;
3689 wraparound_section(ic, &s);
3690 init_journal(ic, s, 1, erase_seq);
3691 }
3692
3693 continue_section = 0;
3694 ic->commit_seq = next_commit_seq(erase_seq);
3695 }
3696
3697 ic->committed_section = continue_section;
3698 ic->n_committed_sections = 0;
3699
3700 ic->uncommitted_section = continue_section;
3701 ic->n_uncommitted_sections = 0;
3702
3703 ic->free_section = continue_section;
3704 ic->free_section_entry = 0;
3705 ic->free_sectors = ic->journal_entries;
3706
3707 ic->journal_tree_root = RB_ROOT;
3708 for (i = 0; i < ic->journal_entries; i++)
3709 init_journal_node(&ic->journal_tree[i]);
3710 }
3711
dm_integrity_enter_synchronous_mode(struct dm_integrity_c * ic)3712 static void dm_integrity_enter_synchronous_mode(struct dm_integrity_c *ic)
3713 {
3714 DEBUG_print("%s\n", __func__);
3715
3716 if (ic->mode == 'B') {
3717 ic->bitmap_flush_interval = msecs_to_jiffies(10) + 1;
3718 ic->synchronous_mode = 1;
3719
3720 cancel_delayed_work_sync(&ic->bitmap_flush_work);
3721 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
3722 flush_workqueue(ic->commit_wq);
3723 }
3724 }
3725
dm_integrity_reboot(struct notifier_block * n,unsigned long code,void * x)3726 static int dm_integrity_reboot(struct notifier_block *n, unsigned long code, void *x)
3727 {
3728 struct dm_integrity_c *ic = container_of(n, struct dm_integrity_c, reboot_notifier);
3729
3730 DEBUG_print("%s\n", __func__);
3731
3732 dm_integrity_enter_synchronous_mode(ic);
3733
3734 return NOTIFY_DONE;
3735 }
3736
dm_integrity_postsuspend(struct dm_target * ti)3737 static void dm_integrity_postsuspend(struct dm_target *ti)
3738 {
3739 struct dm_integrity_c *ic = ti->private;
3740 int r;
3741
3742 WARN_ON(unregister_reboot_notifier(&ic->reboot_notifier));
3743
3744 timer_delete_sync(&ic->autocommit_timer);
3745
3746 if (ic->recalc_wq)
3747 drain_workqueue(ic->recalc_wq);
3748
3749 if (ic->mode == 'B')
3750 cancel_delayed_work_sync(&ic->bitmap_flush_work);
3751
3752 queue_work(ic->commit_wq, &ic->commit_work);
3753 drain_workqueue(ic->commit_wq);
3754
3755 if (ic->mode == 'J') {
3756 queue_work(ic->writer_wq, &ic->writer_work);
3757 drain_workqueue(ic->writer_wq);
3758 dm_integrity_flush_buffers(ic, true);
3759 if (ic->wrote_to_journal) {
3760 init_journal(ic, ic->free_section,
3761 ic->journal_sections - ic->free_section, ic->commit_seq);
3762 if (ic->free_section) {
3763 init_journal(ic, 0, ic->free_section,
3764 next_commit_seq(ic->commit_seq));
3765 }
3766 }
3767 }
3768
3769 if (ic->mode == 'B') {
3770 dm_integrity_flush_buffers(ic, true);
3771 #if 1
3772 /* set to 0 to test bitmap replay code */
3773 init_journal(ic, 0, ic->journal_sections, 0);
3774 ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
3775 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
3776 if (unlikely(r))
3777 dm_integrity_io_error(ic, "writing superblock", r);
3778 #endif
3779 }
3780
3781 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
3782
3783 ic->journal_uptodate = true;
3784 }
3785
dm_integrity_resume(struct dm_target * ti)3786 static void dm_integrity_resume(struct dm_target *ti)
3787 {
3788 struct dm_integrity_c *ic = ti->private;
3789 __u64 old_provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors);
3790 int r;
3791
3792 DEBUG_print("resume\n");
3793
3794 ic->wrote_to_journal = false;
3795
3796 if (ic->provided_data_sectors != old_provided_data_sectors) {
3797 if (ic->provided_data_sectors > old_provided_data_sectors &&
3798 ic->mode == 'B' &&
3799 ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) {
3800 rw_journal_sectors(ic, REQ_OP_READ, 0,
3801 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3802 block_bitmap_op(ic, ic->journal, old_provided_data_sectors,
3803 ic->provided_data_sectors - old_provided_data_sectors, BITMAP_OP_SET);
3804 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0,
3805 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3806 }
3807
3808 ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors);
3809 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
3810 if (unlikely(r))
3811 dm_integrity_io_error(ic, "writing superblock", r);
3812 }
3813
3814 if (ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) {
3815 DEBUG_print("resume dirty_bitmap\n");
3816 rw_journal_sectors(ic, REQ_OP_READ, 0,
3817 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3818 if (ic->mode == 'B') {
3819 if (ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit &&
3820 !ic->reset_recalculate_flag) {
3821 block_bitmap_copy(ic, ic->recalc_bitmap, ic->journal);
3822 block_bitmap_copy(ic, ic->may_write_bitmap, ic->journal);
3823 if (!block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors,
3824 BITMAP_OP_TEST_ALL_CLEAR)) {
3825 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3826 ic->sb->recalc_sector = cpu_to_le64(0);
3827 }
3828 } else {
3829 DEBUG_print("non-matching blocks_per_bitmap_bit: %u, %u\n",
3830 ic->sb->log2_blocks_per_bitmap_bit, ic->log2_blocks_per_bitmap_bit);
3831 ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
3832 block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET);
3833 block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET);
3834 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_SET);
3835 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0,
3836 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3837 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3838 ic->sb->recalc_sector = cpu_to_le64(0);
3839 }
3840 } else {
3841 if (!(ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit &&
3842 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_TEST_ALL_CLEAR)) ||
3843 ic->reset_recalculate_flag) {
3844 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3845 ic->sb->recalc_sector = cpu_to_le64(0);
3846 }
3847 init_journal(ic, 0, ic->journal_sections, 0);
3848 replay_journal(ic);
3849 ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
3850 }
3851 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
3852 if (unlikely(r))
3853 dm_integrity_io_error(ic, "writing superblock", r);
3854 } else {
3855 replay_journal(ic);
3856 if (ic->reset_recalculate_flag) {
3857 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3858 ic->sb->recalc_sector = cpu_to_le64(0);
3859 }
3860 if (ic->mode == 'B') {
3861 ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
3862 ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
3863 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
3864 if (unlikely(r))
3865 dm_integrity_io_error(ic, "writing superblock", r);
3866
3867 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
3868 block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
3869 block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
3870 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
3871 le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors) {
3872 block_bitmap_op(ic, ic->journal, le64_to_cpu(ic->sb->recalc_sector),
3873 ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
3874 block_bitmap_op(ic, ic->recalc_bitmap, le64_to_cpu(ic->sb->recalc_sector),
3875 ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
3876 block_bitmap_op(ic, ic->may_write_bitmap, le64_to_cpu(ic->sb->recalc_sector),
3877 ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
3878 }
3879 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0,
3880 ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3881 }
3882 }
3883
3884 DEBUG_print("testing recalc: %x\n", ic->sb->flags);
3885 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
3886 __u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector);
3887
3888 DEBUG_print("recalc pos: %llx / %llx\n", recalc_pos, ic->provided_data_sectors);
3889 if (recalc_pos < ic->provided_data_sectors) {
3890 queue_work(ic->recalc_wq, &ic->recalc_work);
3891 } else if (recalc_pos > ic->provided_data_sectors) {
3892 ic->sb->recalc_sector = cpu_to_le64(ic->provided_data_sectors);
3893 recalc_write_super(ic);
3894 }
3895 }
3896
3897 ic->reboot_notifier.notifier_call = dm_integrity_reboot;
3898 ic->reboot_notifier.next = NULL;
3899 ic->reboot_notifier.priority = INT_MAX - 1; /* be notified after md and before hardware drivers */
3900 WARN_ON(register_reboot_notifier(&ic->reboot_notifier));
3901
3902 #if 0
3903 /* set to 1 to stress test synchronous mode */
3904 dm_integrity_enter_synchronous_mode(ic);
3905 #endif
3906 }
3907
dm_integrity_status(struct dm_target * ti,status_type_t type,unsigned int status_flags,char * result,unsigned int maxlen)3908 static void dm_integrity_status(struct dm_target *ti, status_type_t type,
3909 unsigned int status_flags, char *result, unsigned int maxlen)
3910 {
3911 struct dm_integrity_c *ic = ti->private;
3912 unsigned int arg_count;
3913 size_t sz = 0;
3914
3915 switch (type) {
3916 case STATUSTYPE_INFO:
3917 DMEMIT("%llu %llu",
3918 (unsigned long long)atomic64_read(&ic->number_of_mismatches),
3919 ic->provided_data_sectors);
3920 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
3921 DMEMIT(" %llu", le64_to_cpu(ic->sb->recalc_sector));
3922 else
3923 DMEMIT(" -");
3924 break;
3925
3926 case STATUSTYPE_TABLE: {
3927 arg_count = 1; /* buffer_sectors */
3928 arg_count += !!ic->meta_dev;
3929 arg_count += ic->sectors_per_block != 1;
3930 arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING));
3931 arg_count += ic->reset_recalculate_flag;
3932 arg_count += ic->discard;
3933 arg_count += ic->mode != 'I'; /* interleave_sectors */
3934 arg_count += ic->mode == 'J'; /* journal_sectors */
3935 arg_count += ic->mode == 'J'; /* journal_watermark */
3936 arg_count += ic->mode == 'J'; /* commit_time */
3937 arg_count += ic->mode == 'B'; /* sectors_per_bit */
3938 arg_count += ic->mode == 'B'; /* bitmap_flush_interval */
3939 arg_count += !!ic->internal_hash_alg.alg_string;
3940 arg_count += !!ic->journal_crypt_alg.alg_string;
3941 arg_count += !!ic->journal_mac_alg.alg_string;
3942 arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0;
3943 arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0;
3944 arg_count += ic->legacy_recalculate;
3945 DMEMIT("%s %llu %u %c %u", ic->dev->name, ic->start,
3946 ic->tag_size, ic->mode, arg_count);
3947 if (ic->meta_dev)
3948 DMEMIT(" meta_device:%s", ic->meta_dev->name);
3949 if (ic->sectors_per_block != 1)
3950 DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
3951 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
3952 DMEMIT(" recalculate");
3953 if (ic->reset_recalculate_flag)
3954 DMEMIT(" reset_recalculate");
3955 if (ic->discard)
3956 DMEMIT(" allow_discards");
3957 if (ic->mode != 'I')
3958 DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
3959 DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
3960 if (ic->mode == 'J') {
3961 __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100;
3962
3963 watermark_percentage += ic->journal_entries / 2;
3964 do_div(watermark_percentage, ic->journal_entries);
3965 DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
3966 DMEMIT(" journal_watermark:%u", (unsigned int)watermark_percentage);
3967 DMEMIT(" commit_time:%u", ic->autocommit_msec);
3968 }
3969 if (ic->mode == 'B') {
3970 DMEMIT(" sectors_per_bit:%llu", (sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit);
3971 DMEMIT(" bitmap_flush_interval:%u", jiffies_to_msecs(ic->bitmap_flush_interval));
3972 }
3973 if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0)
3974 DMEMIT(" fix_padding");
3975 if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0)
3976 DMEMIT(" fix_hmac");
3977 if (ic->legacy_recalculate)
3978 DMEMIT(" legacy_recalculate");
3979
3980 #define EMIT_ALG(a, n) \
3981 do { \
3982 if (ic->a.alg_string) { \
3983 DMEMIT(" %s:%s", n, ic->a.alg_string); \
3984 if (ic->a.key_string) \
3985 DMEMIT(":%s", ic->a.key_string);\
3986 } \
3987 } while (0)
3988 EMIT_ALG(internal_hash_alg, "internal_hash");
3989 EMIT_ALG(journal_crypt_alg, "journal_crypt");
3990 EMIT_ALG(journal_mac_alg, "journal_mac");
3991 break;
3992 }
3993 case STATUSTYPE_IMA:
3994 DMEMIT_TARGET_NAME_VERSION(ti->type);
3995 DMEMIT(",dev_name=%s,start=%llu,tag_size=%u,mode=%c",
3996 ic->dev->name, ic->start, ic->tag_size, ic->mode);
3997
3998 if (ic->meta_dev)
3999 DMEMIT(",meta_device=%s", ic->meta_dev->name);
4000 if (ic->sectors_per_block != 1)
4001 DMEMIT(",block_size=%u", ic->sectors_per_block << SECTOR_SHIFT);
4002
4003 DMEMIT(",recalculate=%c", (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) ?
4004 'y' : 'n');
4005 DMEMIT(",allow_discards=%c", ic->discard ? 'y' : 'n');
4006 DMEMIT(",fix_padding=%c",
4007 ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) ? 'y' : 'n');
4008 DMEMIT(",fix_hmac=%c",
4009 ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0) ? 'y' : 'n');
4010 DMEMIT(",legacy_recalculate=%c", ic->legacy_recalculate ? 'y' : 'n');
4011
4012 DMEMIT(",journal_sectors=%u", ic->initial_sectors - SB_SECTORS);
4013 DMEMIT(",interleave_sectors=%u", 1U << ic->sb->log2_interleave_sectors);
4014 DMEMIT(",buffer_sectors=%u", 1U << ic->log2_buffer_sectors);
4015 DMEMIT(";");
4016 break;
4017 }
4018 }
4019
dm_integrity_iterate_devices(struct dm_target * ti,iterate_devices_callout_fn fn,void * data)4020 static int dm_integrity_iterate_devices(struct dm_target *ti,
4021 iterate_devices_callout_fn fn, void *data)
4022 {
4023 struct dm_integrity_c *ic = ti->private;
4024
4025 if (!ic->meta_dev)
4026 return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data);
4027 else
4028 return fn(ti, ic->dev, 0, ti->len, data);
4029 }
4030
dm_integrity_io_hints(struct dm_target * ti,struct queue_limits * limits)4031 static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits)
4032 {
4033 struct dm_integrity_c *ic = ti->private;
4034
4035 if (ic->sectors_per_block > 1) {
4036 limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
4037 limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
4038 limits->io_min = ic->sectors_per_block << SECTOR_SHIFT;
4039 limits->dma_alignment = limits->logical_block_size - 1;
4040 limits->discard_granularity = ic->sectors_per_block << SECTOR_SHIFT;
4041 }
4042
4043 if (!ic->internal_hash) {
4044 struct blk_integrity *bi = &limits->integrity;
4045
4046 memset(bi, 0, sizeof(*bi));
4047 bi->metadata_size = ic->tag_size;
4048 bi->tag_size = bi->metadata_size;
4049 bi->interval_exp =
4050 ic->sb->log2_sectors_per_block + SECTOR_SHIFT;
4051 }
4052
4053 limits->max_integrity_segments = USHRT_MAX;
4054 }
4055
calculate_journal_section_size(struct dm_integrity_c * ic)4056 static void calculate_journal_section_size(struct dm_integrity_c *ic)
4057 {
4058 unsigned int sector_space = JOURNAL_SECTOR_DATA;
4059
4060 ic->journal_sections = le32_to_cpu(ic->sb->journal_sections);
4061 ic->journal_entry_size = roundup(offsetof(struct journal_entry, last_bytes[ic->sectors_per_block]) + ic->tag_size,
4062 JOURNAL_ENTRY_ROUNDUP);
4063
4064 if (ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC))
4065 sector_space -= JOURNAL_MAC_PER_SECTOR;
4066 ic->journal_entries_per_sector = sector_space / ic->journal_entry_size;
4067 ic->journal_section_entries = ic->journal_entries_per_sector * JOURNAL_BLOCK_SECTORS;
4068 ic->journal_section_sectors = (ic->journal_section_entries << ic->sb->log2_sectors_per_block) + JOURNAL_BLOCK_SECTORS;
4069 ic->journal_entries = ic->journal_section_entries * ic->journal_sections;
4070 }
4071
calculate_device_limits(struct dm_integrity_c * ic)4072 static int calculate_device_limits(struct dm_integrity_c *ic)
4073 {
4074 __u64 initial_sectors;
4075
4076 calculate_journal_section_size(ic);
4077 initial_sectors = SB_SECTORS + (__u64)ic->journal_section_sectors * ic->journal_sections;
4078 if (initial_sectors + METADATA_PADDING_SECTORS >= ic->meta_device_sectors || initial_sectors > UINT_MAX)
4079 return -EINVAL;
4080 ic->initial_sectors = initial_sectors;
4081
4082 if (ic->mode == 'I') {
4083 if (ic->initial_sectors + ic->provided_data_sectors > ic->meta_device_sectors)
4084 return -EINVAL;
4085 } else if (!ic->meta_dev) {
4086 sector_t last_sector, last_area, last_offset;
4087
4088 /* we have to maintain excessive padding for compatibility with existing volumes */
4089 __u64 metadata_run_padding =
4090 ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING) ?
4091 (__u64)(METADATA_PADDING_SECTORS << SECTOR_SHIFT) :
4092 (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS);
4093
4094 ic->metadata_run = round_up((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block),
4095 metadata_run_padding) >> SECTOR_SHIFT;
4096 if (!(ic->metadata_run & (ic->metadata_run - 1)))
4097 ic->log2_metadata_run = __ffs(ic->metadata_run);
4098 else
4099 ic->log2_metadata_run = -1;
4100
4101 get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset);
4102 last_sector = get_data_sector(ic, last_area, last_offset);
4103 if (last_sector < ic->start || last_sector >= ic->meta_device_sectors)
4104 return -EINVAL;
4105 } else {
4106 __u64 meta_size = (ic->provided_data_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size;
4107
4108 meta_size = (meta_size + ((1U << (ic->log2_buffer_sectors + SECTOR_SHIFT)) - 1))
4109 >> (ic->log2_buffer_sectors + SECTOR_SHIFT);
4110 meta_size <<= ic->log2_buffer_sectors;
4111 if (ic->initial_sectors + meta_size < ic->initial_sectors ||
4112 ic->initial_sectors + meta_size > ic->meta_device_sectors)
4113 return -EINVAL;
4114 ic->metadata_run = 1;
4115 ic->log2_metadata_run = 0;
4116 }
4117
4118 return 0;
4119 }
4120
get_provided_data_sectors(struct dm_integrity_c * ic)4121 static void get_provided_data_sectors(struct dm_integrity_c *ic)
4122 {
4123 if (!ic->meta_dev) {
4124 int test_bit;
4125
4126 ic->provided_data_sectors = 0;
4127 for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) {
4128 __u64 prev_data_sectors = ic->provided_data_sectors;
4129
4130 ic->provided_data_sectors |= (sector_t)1 << test_bit;
4131 if (calculate_device_limits(ic))
4132 ic->provided_data_sectors = prev_data_sectors;
4133 }
4134 } else {
4135 ic->provided_data_sectors = ic->data_device_sectors;
4136 ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1);
4137 }
4138 }
4139
initialize_superblock(struct dm_integrity_c * ic,unsigned int journal_sectors,unsigned int interleave_sectors)4140 static int initialize_superblock(struct dm_integrity_c *ic,
4141 unsigned int journal_sectors, unsigned int interleave_sectors)
4142 {
4143 unsigned int journal_sections;
4144 int test_bit;
4145
4146 memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT);
4147 memcpy(ic->sb->magic, SB_MAGIC, 8);
4148 if (ic->mode == 'I')
4149 ic->sb->flags |= cpu_to_le32(SB_FLAG_INLINE);
4150 ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size);
4151 ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block);
4152 if (ic->journal_mac_alg.alg_string)
4153 ic->sb->flags |= cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC);
4154
4155 calculate_journal_section_size(ic);
4156 journal_sections = journal_sectors / ic->journal_section_sectors;
4157 if (!journal_sections)
4158 journal_sections = 1;
4159 if (ic->mode == 'I')
4160 journal_sections = 0;
4161
4162 if (ic->fix_hmac && (ic->internal_hash_alg.alg_string || ic->journal_mac_alg.alg_string)) {
4163 ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_HMAC);
4164 get_random_bytes(ic->sb->salt, SALT_SIZE);
4165 }
4166
4167 if (!ic->meta_dev) {
4168 if (ic->fix_padding)
4169 ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_PADDING);
4170 ic->sb->journal_sections = cpu_to_le32(journal_sections);
4171 if (!interleave_sectors)
4172 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
4173 ic->sb->log2_interleave_sectors = __fls(interleave_sectors);
4174 ic->sb->log2_interleave_sectors = max_t(__u8, MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
4175 ic->sb->log2_interleave_sectors = min_t(__u8, MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
4176
4177 get_provided_data_sectors(ic);
4178 if (!ic->provided_data_sectors)
4179 return -EINVAL;
4180 } else {
4181 ic->sb->log2_interleave_sectors = 0;
4182
4183 get_provided_data_sectors(ic);
4184 if (!ic->provided_data_sectors)
4185 return -EINVAL;
4186
4187 try_smaller_buffer:
4188 ic->sb->journal_sections = cpu_to_le32(0);
4189 for (test_bit = fls(journal_sections) - 1; test_bit >= 0; test_bit--) {
4190 __u32 prev_journal_sections = le32_to_cpu(ic->sb->journal_sections);
4191 __u32 test_journal_sections = prev_journal_sections | (1U << test_bit);
4192
4193 if (test_journal_sections > journal_sections)
4194 continue;
4195 ic->sb->journal_sections = cpu_to_le32(test_journal_sections);
4196 if (calculate_device_limits(ic))
4197 ic->sb->journal_sections = cpu_to_le32(prev_journal_sections);
4198
4199 }
4200 if (!le32_to_cpu(ic->sb->journal_sections)) {
4201 if (ic->log2_buffer_sectors > 3) {
4202 ic->log2_buffer_sectors--;
4203 goto try_smaller_buffer;
4204 }
4205 return -EINVAL;
4206 }
4207 }
4208
4209 ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors);
4210
4211 sb_set_version(ic);
4212
4213 return 0;
4214 }
4215
dm_integrity_free_page_list(struct page_list * pl)4216 static void dm_integrity_free_page_list(struct page_list *pl)
4217 {
4218 unsigned int i;
4219
4220 if (!pl)
4221 return;
4222 for (i = 0; pl[i].page; i++)
4223 __free_page(pl[i].page);
4224 kvfree(pl);
4225 }
4226
dm_integrity_alloc_page_list(unsigned int n_pages)4227 static struct page_list *dm_integrity_alloc_page_list(unsigned int n_pages)
4228 {
4229 struct page_list *pl;
4230 unsigned int i;
4231
4232 pl = kvmalloc_array(n_pages + 1, sizeof(struct page_list), GFP_KERNEL | __GFP_ZERO);
4233 if (!pl)
4234 return NULL;
4235
4236 for (i = 0; i < n_pages; i++) {
4237 pl[i].page = alloc_page(GFP_KERNEL);
4238 if (!pl[i].page) {
4239 dm_integrity_free_page_list(pl);
4240 return NULL;
4241 }
4242 if (i)
4243 pl[i - 1].next = &pl[i];
4244 }
4245 pl[i].page = NULL;
4246 pl[i].next = NULL;
4247
4248 return pl;
4249 }
4250
dm_integrity_free_journal_scatterlist(struct dm_integrity_c * ic,struct scatterlist ** sl)4251 static void dm_integrity_free_journal_scatterlist(struct dm_integrity_c *ic, struct scatterlist **sl)
4252 {
4253 unsigned int i;
4254
4255 for (i = 0; i < ic->journal_sections; i++)
4256 kvfree(sl[i]);
4257 kvfree(sl);
4258 }
4259
dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c * ic,struct page_list * pl)4260 static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic,
4261 struct page_list *pl)
4262 {
4263 struct scatterlist **sl;
4264 unsigned int i;
4265
4266 sl = kvmalloc_array(ic->journal_sections,
4267 sizeof(struct scatterlist *),
4268 GFP_KERNEL | __GFP_ZERO);
4269 if (!sl)
4270 return NULL;
4271
4272 for (i = 0; i < ic->journal_sections; i++) {
4273 struct scatterlist *s;
4274 unsigned int start_index, start_offset;
4275 unsigned int end_index, end_offset;
4276 unsigned int n_pages;
4277 unsigned int idx;
4278
4279 page_list_location(ic, i, 0, &start_index, &start_offset);
4280 page_list_location(ic, i, ic->journal_section_sectors - 1,
4281 &end_index, &end_offset);
4282
4283 n_pages = (end_index - start_index + 1);
4284
4285 s = kvmalloc_array(n_pages, sizeof(struct scatterlist),
4286 GFP_KERNEL);
4287 if (!s) {
4288 dm_integrity_free_journal_scatterlist(ic, sl);
4289 return NULL;
4290 }
4291
4292 sg_init_table(s, n_pages);
4293 for (idx = start_index; idx <= end_index; idx++) {
4294 char *va = lowmem_page_address(pl[idx].page);
4295 unsigned int start = 0, end = PAGE_SIZE;
4296
4297 if (idx == start_index)
4298 start = start_offset;
4299 if (idx == end_index)
4300 end = end_offset + (1 << SECTOR_SHIFT);
4301 sg_set_buf(&s[idx - start_index], va + start, end - start);
4302 }
4303
4304 sl[i] = s;
4305 }
4306
4307 return sl;
4308 }
4309
free_alg(struct alg_spec * a)4310 static void free_alg(struct alg_spec *a)
4311 {
4312 kfree_sensitive(a->alg_string);
4313 kfree_sensitive(a->key);
4314 memset(a, 0, sizeof(*a));
4315 }
4316
get_alg_and_key(const char * arg,struct alg_spec * a,char ** error,char * error_inval)4317 static int get_alg_and_key(const char *arg, struct alg_spec *a, char **error, char *error_inval)
4318 {
4319 char *k;
4320
4321 free_alg(a);
4322
4323 a->alg_string = kstrdup(strchr(arg, ':') + 1, GFP_KERNEL);
4324 if (!a->alg_string)
4325 goto nomem;
4326
4327 k = strchr(a->alg_string, ':');
4328 if (k) {
4329 *k = 0;
4330 a->key_string = k + 1;
4331 if (strlen(a->key_string) & 1)
4332 goto inval;
4333
4334 a->key_size = strlen(a->key_string) / 2;
4335 a->key = kmalloc(a->key_size, GFP_KERNEL);
4336 if (!a->key)
4337 goto nomem;
4338 if (hex2bin(a->key, a->key_string, a->key_size))
4339 goto inval;
4340 }
4341
4342 return 0;
4343 inval:
4344 *error = error_inval;
4345 return -EINVAL;
4346 nomem:
4347 *error = "Out of memory for an argument";
4348 return -ENOMEM;
4349 }
4350
get_mac(struct crypto_shash ** shash,struct crypto_ahash ** ahash,struct alg_spec * a,char ** error,char * error_alg,char * error_key)4351 static int get_mac(struct crypto_shash **shash, struct crypto_ahash **ahash,
4352 struct alg_spec *a, char **error, char *error_alg, char *error_key)
4353 {
4354 int r;
4355
4356 if (a->alg_string) {
4357 if (shash) {
4358 *shash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
4359 if (IS_ERR(*shash)) {
4360 *shash = NULL;
4361 goto try_ahash;
4362 }
4363 if (a->key) {
4364 r = crypto_shash_setkey(*shash, a->key, a->key_size);
4365 if (r) {
4366 *error = error_key;
4367 return r;
4368 }
4369 } else if (crypto_shash_get_flags(*shash) & CRYPTO_TFM_NEED_KEY) {
4370 *error = error_key;
4371 return -ENOKEY;
4372 }
4373 return 0;
4374 }
4375 try_ahash:
4376 if (ahash) {
4377 *ahash = crypto_alloc_ahash(a->alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
4378 if (IS_ERR(*ahash)) {
4379 *error = error_alg;
4380 r = PTR_ERR(*ahash);
4381 *ahash = NULL;
4382 return r;
4383 }
4384 if (a->key) {
4385 r = crypto_ahash_setkey(*ahash, a->key, a->key_size);
4386 if (r) {
4387 *error = error_key;
4388 return r;
4389 }
4390 } else if (crypto_ahash_get_flags(*ahash) & CRYPTO_TFM_NEED_KEY) {
4391 *error = error_key;
4392 return -ENOKEY;
4393 }
4394 return 0;
4395 }
4396 *error = error_alg;
4397 return -ENOENT;
4398 }
4399
4400 return 0;
4401 }
4402
create_journal(struct dm_integrity_c * ic,char ** error)4403 static int create_journal(struct dm_integrity_c *ic, char **error)
4404 {
4405 int r = 0;
4406 unsigned int i;
4407 __u64 journal_pages, journal_desc_size, journal_tree_size;
4408 unsigned char *crypt_data = NULL, *crypt_iv = NULL;
4409 struct skcipher_request *req = NULL;
4410
4411 ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL);
4412 ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL);
4413 ic->commit_ids[2] = cpu_to_le64(0x3333333333333333ULL);
4414 ic->commit_ids[3] = cpu_to_le64(0x4444444444444444ULL);
4415
4416 journal_pages = roundup((__u64)ic->journal_sections * ic->journal_section_sectors,
4417 PAGE_SIZE >> SECTOR_SHIFT) >> (PAGE_SHIFT - SECTOR_SHIFT);
4418 journal_desc_size = journal_pages * sizeof(struct page_list);
4419 if (journal_pages >= totalram_pages() - totalhigh_pages() || journal_desc_size > ULONG_MAX) {
4420 *error = "Journal doesn't fit into memory";
4421 r = -ENOMEM;
4422 goto bad;
4423 }
4424 ic->journal_pages = journal_pages;
4425
4426 ic->journal = dm_integrity_alloc_page_list(ic->journal_pages);
4427 if (!ic->journal) {
4428 *error = "Could not allocate memory for journal";
4429 r = -ENOMEM;
4430 goto bad;
4431 }
4432 if (ic->journal_crypt_alg.alg_string) {
4433 unsigned int ivsize, blocksize;
4434 struct journal_completion comp;
4435
4436 comp.ic = ic;
4437 ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
4438 if (IS_ERR(ic->journal_crypt)) {
4439 *error = "Invalid journal cipher";
4440 r = PTR_ERR(ic->journal_crypt);
4441 ic->journal_crypt = NULL;
4442 goto bad;
4443 }
4444 ivsize = crypto_skcipher_ivsize(ic->journal_crypt);
4445 blocksize = crypto_skcipher_blocksize(ic->journal_crypt);
4446
4447 if (ic->journal_crypt_alg.key) {
4448 r = crypto_skcipher_setkey(ic->journal_crypt, ic->journal_crypt_alg.key,
4449 ic->journal_crypt_alg.key_size);
4450 if (r) {
4451 *error = "Error setting encryption key";
4452 goto bad;
4453 }
4454 }
4455 DEBUG_print("cipher %s, block size %u iv size %u\n",
4456 ic->journal_crypt_alg.alg_string, blocksize, ivsize);
4457
4458 ic->journal_io = dm_integrity_alloc_page_list(ic->journal_pages);
4459 if (!ic->journal_io) {
4460 *error = "Could not allocate memory for journal io";
4461 r = -ENOMEM;
4462 goto bad;
4463 }
4464
4465 if (blocksize == 1) {
4466 struct scatterlist *sg;
4467
4468 req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
4469 if (!req) {
4470 *error = "Could not allocate crypt request";
4471 r = -ENOMEM;
4472 goto bad;
4473 }
4474
4475 crypt_iv = kzalloc(ivsize, GFP_KERNEL);
4476 if (!crypt_iv) {
4477 *error = "Could not allocate iv";
4478 r = -ENOMEM;
4479 goto bad;
4480 }
4481
4482 ic->journal_xor = dm_integrity_alloc_page_list(ic->journal_pages);
4483 if (!ic->journal_xor) {
4484 *error = "Could not allocate memory for journal xor";
4485 r = -ENOMEM;
4486 goto bad;
4487 }
4488
4489 sg = kvmalloc_array(ic->journal_pages + 1,
4490 sizeof(struct scatterlist),
4491 GFP_KERNEL);
4492 if (!sg) {
4493 *error = "Unable to allocate sg list";
4494 r = -ENOMEM;
4495 goto bad;
4496 }
4497 sg_init_table(sg, ic->journal_pages + 1);
4498 for (i = 0; i < ic->journal_pages; i++) {
4499 char *va = lowmem_page_address(ic->journal_xor[i].page);
4500
4501 clear_page(va);
4502 sg_set_buf(&sg[i], va, PAGE_SIZE);
4503 }
4504 sg_set_buf(&sg[i], &ic->commit_ids, sizeof(ic->commit_ids));
4505
4506 skcipher_request_set_crypt(req, sg, sg,
4507 PAGE_SIZE * ic->journal_pages + sizeof(ic->commit_ids), crypt_iv);
4508 init_completion(&comp.comp);
4509 comp.in_flight = (atomic_t)ATOMIC_INIT(1);
4510 if (do_crypt(true, req, &comp))
4511 wait_for_completion(&comp.comp);
4512 kvfree(sg);
4513 r = dm_integrity_failed(ic);
4514 if (r) {
4515 *error = "Unable to encrypt journal";
4516 goto bad;
4517 }
4518 DEBUG_bytes(lowmem_page_address(ic->journal_xor[0].page), 64, "xor data");
4519
4520 crypto_free_skcipher(ic->journal_crypt);
4521 ic->journal_crypt = NULL;
4522 } else {
4523 unsigned int crypt_len = roundup(ivsize, blocksize);
4524
4525 req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
4526 if (!req) {
4527 *error = "Could not allocate crypt request";
4528 r = -ENOMEM;
4529 goto bad;
4530 }
4531
4532 crypt_iv = kmalloc(ivsize, GFP_KERNEL);
4533 if (!crypt_iv) {
4534 *error = "Could not allocate iv";
4535 r = -ENOMEM;
4536 goto bad;
4537 }
4538
4539 crypt_data = kmalloc(crypt_len, GFP_KERNEL);
4540 if (!crypt_data) {
4541 *error = "Unable to allocate crypt data";
4542 r = -ENOMEM;
4543 goto bad;
4544 }
4545
4546 ic->journal_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal);
4547 if (!ic->journal_scatterlist) {
4548 *error = "Unable to allocate sg list";
4549 r = -ENOMEM;
4550 goto bad;
4551 }
4552 ic->journal_io_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal_io);
4553 if (!ic->journal_io_scatterlist) {
4554 *error = "Unable to allocate sg list";
4555 r = -ENOMEM;
4556 goto bad;
4557 }
4558 ic->sk_requests = kvmalloc_array(ic->journal_sections,
4559 sizeof(struct skcipher_request *),
4560 GFP_KERNEL | __GFP_ZERO);
4561 if (!ic->sk_requests) {
4562 *error = "Unable to allocate sk requests";
4563 r = -ENOMEM;
4564 goto bad;
4565 }
4566 for (i = 0; i < ic->journal_sections; i++) {
4567 struct scatterlist sg;
4568 struct skcipher_request *section_req;
4569 __le32 section_le = cpu_to_le32(i);
4570
4571 memset(crypt_iv, 0x00, ivsize);
4572 memset(crypt_data, 0x00, crypt_len);
4573 memcpy(crypt_data, §ion_le, min_t(size_t, crypt_len, sizeof(section_le)));
4574
4575 sg_init_one(&sg, crypt_data, crypt_len);
4576 skcipher_request_set_crypt(req, &sg, &sg, crypt_len, crypt_iv);
4577 init_completion(&comp.comp);
4578 comp.in_flight = (atomic_t)ATOMIC_INIT(1);
4579 if (do_crypt(true, req, &comp))
4580 wait_for_completion(&comp.comp);
4581
4582 r = dm_integrity_failed(ic);
4583 if (r) {
4584 *error = "Unable to generate iv";
4585 goto bad;
4586 }
4587
4588 section_req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
4589 if (!section_req) {
4590 *error = "Unable to allocate crypt request";
4591 r = -ENOMEM;
4592 goto bad;
4593 }
4594 section_req->iv = kmalloc_array(ivsize, 2,
4595 GFP_KERNEL);
4596 if (!section_req->iv) {
4597 skcipher_request_free(section_req);
4598 *error = "Unable to allocate iv";
4599 r = -ENOMEM;
4600 goto bad;
4601 }
4602 memcpy(section_req->iv + ivsize, crypt_data, ivsize);
4603 section_req->cryptlen = (size_t)ic->journal_section_sectors << SECTOR_SHIFT;
4604 ic->sk_requests[i] = section_req;
4605 DEBUG_bytes(crypt_data, ivsize, "iv(%u)", i);
4606 }
4607 }
4608 }
4609
4610 for (i = 0; i < N_COMMIT_IDS; i++) {
4611 unsigned int j;
4612
4613 retest_commit_id:
4614 for (j = 0; j < i; j++) {
4615 if (ic->commit_ids[j] == ic->commit_ids[i]) {
4616 ic->commit_ids[i] = cpu_to_le64(le64_to_cpu(ic->commit_ids[i]) + 1);
4617 goto retest_commit_id;
4618 }
4619 }
4620 DEBUG_print("commit id %u: %016llx\n", i, ic->commit_ids[i]);
4621 }
4622
4623 journal_tree_size = (__u64)ic->journal_entries * sizeof(struct journal_node);
4624 if (journal_tree_size > ULONG_MAX) {
4625 *error = "Journal doesn't fit into memory";
4626 r = -ENOMEM;
4627 goto bad;
4628 }
4629 ic->journal_tree = kvmalloc(journal_tree_size, GFP_KERNEL);
4630 if (!ic->journal_tree) {
4631 *error = "Could not allocate memory for journal tree";
4632 r = -ENOMEM;
4633 }
4634 bad:
4635 kfree(crypt_data);
4636 kfree(crypt_iv);
4637 skcipher_request_free(req);
4638
4639 return r;
4640 }
4641
4642 /*
4643 * Construct a integrity mapping
4644 *
4645 * Arguments:
4646 * device
4647 * offset from the start of the device
4648 * tag size
4649 * D - direct writes, J - journal writes, B - bitmap mode, R - recovery mode
4650 * number of optional arguments
4651 * optional arguments:
4652 * journal_sectors
4653 * interleave_sectors
4654 * buffer_sectors
4655 * journal_watermark
4656 * commit_time
4657 * meta_device
4658 * block_size
4659 * sectors_per_bit
4660 * bitmap_flush_interval
4661 * internal_hash
4662 * journal_crypt
4663 * journal_mac
4664 * recalculate
4665 */
dm_integrity_ctr(struct dm_target * ti,unsigned int argc,char ** argv)4666 static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
4667 {
4668 struct dm_integrity_c *ic;
4669 char dummy;
4670 int r;
4671 unsigned int extra_args;
4672 struct dm_arg_set as;
4673 static const struct dm_arg _args[] = {
4674 {0, 18, "Invalid number of feature args"},
4675 };
4676 unsigned int journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
4677 bool should_write_sb;
4678 __u64 threshold;
4679 unsigned long long start;
4680 __s8 log2_sectors_per_bitmap_bit = -1;
4681 __s8 log2_blocks_per_bitmap_bit;
4682 __u64 bits_in_journal;
4683 __u64 n_bitmap_bits;
4684
4685 #define DIRECT_ARGUMENTS 4
4686
4687 if (argc <= DIRECT_ARGUMENTS) {
4688 ti->error = "Invalid argument count";
4689 return -EINVAL;
4690 }
4691
4692 ic = kzalloc(sizeof(struct dm_integrity_c), GFP_KERNEL);
4693 if (!ic) {
4694 ti->error = "Cannot allocate integrity context";
4695 return -ENOMEM;
4696 }
4697 ti->private = ic;
4698 ti->per_io_data_size = sizeof(struct dm_integrity_io);
4699 ic->ti = ti;
4700
4701 ic->in_progress = RB_ROOT;
4702 INIT_LIST_HEAD(&ic->wait_list);
4703 init_waitqueue_head(&ic->endio_wait);
4704 bio_list_init(&ic->flush_bio_list);
4705 init_waitqueue_head(&ic->copy_to_journal_wait);
4706 init_completion(&ic->crypto_backoff);
4707 atomic64_set(&ic->number_of_mismatches, 0);
4708 ic->bitmap_flush_interval = BITMAP_FLUSH_INTERVAL;
4709
4710 r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev);
4711 if (r) {
4712 ti->error = "Device lookup failed";
4713 goto bad;
4714 }
4715
4716 if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1 || start != (sector_t)start) {
4717 ti->error = "Invalid starting offset";
4718 r = -EINVAL;
4719 goto bad;
4720 }
4721 ic->start = start;
4722
4723 if (strcmp(argv[2], "-")) {
4724 if (sscanf(argv[2], "%u%c", &ic->tag_size, &dummy) != 1 || !ic->tag_size) {
4725 ti->error = "Invalid tag size";
4726 r = -EINVAL;
4727 goto bad;
4728 }
4729 }
4730
4731 if (!strcmp(argv[3], "J") || !strcmp(argv[3], "B") ||
4732 !strcmp(argv[3], "D") || !strcmp(argv[3], "R") ||
4733 !strcmp(argv[3], "I")) {
4734 ic->mode = argv[3][0];
4735 } else {
4736 ti->error = "Invalid mode (expecting J, B, D, R, I)";
4737 r = -EINVAL;
4738 goto bad;
4739 }
4740
4741 journal_sectors = 0;
4742 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
4743 buffer_sectors = DEFAULT_BUFFER_SECTORS;
4744 journal_watermark = DEFAULT_JOURNAL_WATERMARK;
4745 sync_msec = DEFAULT_SYNC_MSEC;
4746 ic->sectors_per_block = 1;
4747
4748 as.argc = argc - DIRECT_ARGUMENTS;
4749 as.argv = argv + DIRECT_ARGUMENTS;
4750 r = dm_read_arg_group(_args, &as, &extra_args, &ti->error);
4751 if (r)
4752 goto bad;
4753
4754 while (extra_args--) {
4755 const char *opt_string;
4756 unsigned int val;
4757 unsigned long long llval;
4758
4759 opt_string = dm_shift_arg(&as);
4760 if (!opt_string) {
4761 r = -EINVAL;
4762 ti->error = "Not enough feature arguments";
4763 goto bad;
4764 }
4765 if (sscanf(opt_string, "journal_sectors:%u%c", &val, &dummy) == 1)
4766 journal_sectors = val ? val : 1;
4767 else if (sscanf(opt_string, "interleave_sectors:%u%c", &val, &dummy) == 1)
4768 interleave_sectors = val;
4769 else if (sscanf(opt_string, "buffer_sectors:%u%c", &val, &dummy) == 1)
4770 buffer_sectors = val;
4771 else if (sscanf(opt_string, "journal_watermark:%u%c", &val, &dummy) == 1 && val <= 100)
4772 journal_watermark = val;
4773 else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1)
4774 sync_msec = val;
4775 else if (!strncmp(opt_string, "meta_device:", strlen("meta_device:"))) {
4776 if (ic->meta_dev) {
4777 dm_put_device(ti, ic->meta_dev);
4778 ic->meta_dev = NULL;
4779 }
4780 r = dm_get_device(ti, strchr(opt_string, ':') + 1,
4781 dm_table_get_mode(ti->table), &ic->meta_dev);
4782 if (r) {
4783 ti->error = "Device lookup failed";
4784 goto bad;
4785 }
4786 } else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) {
4787 if (val < 1 << SECTOR_SHIFT ||
4788 val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT ||
4789 (val & (val - 1))) {
4790 r = -EINVAL;
4791 ti->error = "Invalid block_size argument";
4792 goto bad;
4793 }
4794 ic->sectors_per_block = val >> SECTOR_SHIFT;
4795 } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) {
4796 log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval);
4797 } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) {
4798 if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) {
4799 r = -EINVAL;
4800 ti->error = "Invalid bitmap_flush_interval argument";
4801 goto bad;
4802 }
4803 ic->bitmap_flush_interval = msecs_to_jiffies(val);
4804 } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
4805 r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error,
4806 "Invalid internal_hash argument");
4807 if (r)
4808 goto bad;
4809 } else if (!strncmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) {
4810 r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error,
4811 "Invalid journal_crypt argument");
4812 if (r)
4813 goto bad;
4814 } else if (!strncmp(opt_string, "journal_mac:", strlen("journal_mac:"))) {
4815 r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error,
4816 "Invalid journal_mac argument");
4817 if (r)
4818 goto bad;
4819 } else if (!strcmp(opt_string, "recalculate")) {
4820 ic->recalculate_flag = true;
4821 } else if (!strcmp(opt_string, "reset_recalculate")) {
4822 ic->recalculate_flag = true;
4823 ic->reset_recalculate_flag = true;
4824 } else if (!strcmp(opt_string, "allow_discards")) {
4825 ic->discard = true;
4826 } else if (!strcmp(opt_string, "fix_padding")) {
4827 ic->fix_padding = true;
4828 } else if (!strcmp(opt_string, "fix_hmac")) {
4829 ic->fix_hmac = true;
4830 } else if (!strcmp(opt_string, "legacy_recalculate")) {
4831 ic->legacy_recalculate = true;
4832 } else {
4833 r = -EINVAL;
4834 ti->error = "Invalid argument";
4835 goto bad;
4836 }
4837 }
4838
4839 ic->data_device_sectors = bdev_nr_sectors(ic->dev->bdev);
4840 if (!ic->meta_dev)
4841 ic->meta_device_sectors = ic->data_device_sectors;
4842 else
4843 ic->meta_device_sectors = bdev_nr_sectors(ic->meta_dev->bdev);
4844
4845 if (!journal_sectors) {
4846 journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS,
4847 ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR);
4848 }
4849
4850 if (!buffer_sectors)
4851 buffer_sectors = 1;
4852 ic->log2_buffer_sectors = min((int)__fls(buffer_sectors), 31 - SECTOR_SHIFT);
4853
4854 r = get_mac(&ic->internal_shash, &ic->internal_ahash, &ic->internal_hash_alg, &ti->error,
4855 "Invalid internal hash", "Error setting internal hash key");
4856 if (r)
4857 goto bad;
4858 if (ic->internal_shash) {
4859 ic->internal_hash = true;
4860 ic->internal_hash_digestsize = crypto_shash_digestsize(ic->internal_shash);
4861 }
4862 if (ic->internal_ahash) {
4863 ic->internal_hash = true;
4864 ic->internal_hash_digestsize = crypto_ahash_digestsize(ic->internal_ahash);
4865 r = mempool_init_kmalloc_pool(&ic->ahash_req_pool, AHASH_MEMPOOL,
4866 sizeof(struct ahash_request) + crypto_ahash_reqsize(ic->internal_ahash));
4867 if (r) {
4868 ti->error = "Cannot allocate mempool";
4869 goto bad;
4870 }
4871 }
4872
4873 r = get_mac(&ic->journal_mac, NULL, &ic->journal_mac_alg, &ti->error,
4874 "Invalid journal mac", "Error setting journal mac key");
4875 if (r)
4876 goto bad;
4877
4878 if (!ic->tag_size) {
4879 if (!ic->internal_hash) {
4880 ti->error = "Unknown tag size";
4881 r = -EINVAL;
4882 goto bad;
4883 }
4884 ic->tag_size = ic->internal_hash_digestsize;
4885 }
4886 if (ic->tag_size > MAX_TAG_SIZE) {
4887 ti->error = "Too big tag size";
4888 r = -EINVAL;
4889 goto bad;
4890 }
4891 if (!(ic->tag_size & (ic->tag_size - 1)))
4892 ic->log2_tag_size = __ffs(ic->tag_size);
4893 else
4894 ic->log2_tag_size = -1;
4895
4896 if (ic->mode == 'I') {
4897 struct blk_integrity *bi;
4898 if (ic->meta_dev) {
4899 r = -EINVAL;
4900 ti->error = "Metadata device not supported in inline mode";
4901 goto bad;
4902 }
4903 if (!ic->internal_hash_alg.alg_string) {
4904 r = -EINVAL;
4905 ti->error = "Internal hash not set in inline mode";
4906 goto bad;
4907 }
4908 if (ic->journal_crypt_alg.alg_string || ic->journal_mac_alg.alg_string) {
4909 r = -EINVAL;
4910 ti->error = "Journal crypt not supported in inline mode";
4911 goto bad;
4912 }
4913 if (ic->discard) {
4914 r = -EINVAL;
4915 ti->error = "Discards not supported in inline mode";
4916 goto bad;
4917 }
4918 bi = blk_get_integrity(ic->dev->bdev->bd_disk);
4919 if (!bi || bi->csum_type != BLK_INTEGRITY_CSUM_NONE) {
4920 r = -EINVAL;
4921 ti->error = "Integrity profile not supported";
4922 goto bad;
4923 }
4924 /*printk("tag_size: %u, metadata_size: %u\n", bi->tag_size, bi->metadata_size);*/
4925 if (bi->metadata_size < ic->tag_size) {
4926 r = -EINVAL;
4927 ti->error = "The integrity profile is smaller than tag size";
4928 goto bad;
4929 }
4930 if ((unsigned long)bi->metadata_size > PAGE_SIZE / 2) {
4931 r = -EINVAL;
4932 ti->error = "Too big tuple size";
4933 goto bad;
4934 }
4935 ic->tuple_size = bi->metadata_size;
4936 if (1 << bi->interval_exp != ic->sectors_per_block << SECTOR_SHIFT) {
4937 r = -EINVAL;
4938 ti->error = "Integrity profile sector size mismatch";
4939 goto bad;
4940 }
4941 }
4942
4943 if (ic->mode == 'B' && !ic->internal_hash) {
4944 r = -EINVAL;
4945 ti->error = "Bitmap mode can be only used with internal hash";
4946 goto bad;
4947 }
4948
4949 if (ic->discard && !ic->internal_hash) {
4950 r = -EINVAL;
4951 ti->error = "Discard can be only used with internal hash";
4952 goto bad;
4953 }
4954
4955 ic->autocommit_jiffies = msecs_to_jiffies(sync_msec);
4956 ic->autocommit_msec = sync_msec;
4957 timer_setup(&ic->autocommit_timer, autocommit_fn, 0);
4958
4959 ic->io = dm_io_client_create();
4960 if (IS_ERR(ic->io)) {
4961 r = PTR_ERR(ic->io);
4962 ic->io = NULL;
4963 ti->error = "Cannot allocate dm io";
4964 goto bad;
4965 }
4966
4967 r = mempool_init_slab_pool(&ic->journal_io_mempool, JOURNAL_IO_MEMPOOL, journal_io_cache);
4968 if (r) {
4969 ti->error = "Cannot allocate mempool";
4970 goto bad;
4971 }
4972
4973 r = mempool_init_page_pool(&ic->recheck_pool, 1, ic->mode == 'I' ? 1 : 0);
4974 if (r) {
4975 ti->error = "Cannot allocate mempool";
4976 goto bad;
4977 }
4978
4979 if (ic->mode == 'I') {
4980 r = bioset_init(&ic->recheck_bios, RECHECK_POOL_SIZE, 0, BIOSET_NEED_BVECS);
4981 if (r) {
4982 ti->error = "Cannot allocate bio set";
4983 goto bad;
4984 }
4985 r = bioset_init(&ic->recalc_bios, 1, 0, BIOSET_NEED_BVECS);
4986 if (r) {
4987 ti->error = "Cannot allocate bio set";
4988 goto bad;
4989 }
4990 }
4991
4992 ic->metadata_wq = alloc_workqueue("dm-integrity-metadata",
4993 WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE);
4994 if (!ic->metadata_wq) {
4995 ti->error = "Cannot allocate workqueue";
4996 r = -ENOMEM;
4997 goto bad;
4998 }
4999
5000 /*
5001 * If this workqueue weren't ordered, it would cause bio reordering
5002 * and reduced performance.
5003 */
5004 ic->wait_wq = alloc_ordered_workqueue("dm-integrity-wait", WQ_MEM_RECLAIM);
5005 if (!ic->wait_wq) {
5006 ti->error = "Cannot allocate workqueue";
5007 r = -ENOMEM;
5008 goto bad;
5009 }
5010
5011 ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM,
5012 METADATA_WORKQUEUE_MAX_ACTIVE);
5013 if (!ic->offload_wq) {
5014 ti->error = "Cannot allocate workqueue";
5015 r = -ENOMEM;
5016 goto bad;
5017 }
5018
5019 ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1);
5020 if (!ic->commit_wq) {
5021 ti->error = "Cannot allocate workqueue";
5022 r = -ENOMEM;
5023 goto bad;
5024 }
5025 INIT_WORK(&ic->commit_work, integrity_commit);
5026
5027 if (ic->mode == 'J' || ic->mode == 'B') {
5028 ic->writer_wq = alloc_workqueue("dm-integrity-writer", WQ_MEM_RECLAIM, 1);
5029 if (!ic->writer_wq) {
5030 ti->error = "Cannot allocate workqueue";
5031 r = -ENOMEM;
5032 goto bad;
5033 }
5034 INIT_WORK(&ic->writer_work, integrity_writer);
5035 }
5036
5037 ic->sb = alloc_pages_exact(SB_SECTORS << SECTOR_SHIFT, GFP_KERNEL);
5038 if (!ic->sb) {
5039 r = -ENOMEM;
5040 ti->error = "Cannot allocate superblock area";
5041 goto bad;
5042 }
5043
5044 r = sync_rw_sb(ic, REQ_OP_READ);
5045 if (r) {
5046 ti->error = "Error reading superblock";
5047 goto bad;
5048 }
5049 should_write_sb = false;
5050 if (memcmp(ic->sb->magic, SB_MAGIC, 8)) {
5051 if (ic->mode != 'R') {
5052 if (memchr_inv(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT)) {
5053 r = -EINVAL;
5054 ti->error = "The device is not initialized";
5055 goto bad;
5056 }
5057 }
5058
5059 r = initialize_superblock(ic, journal_sectors, interleave_sectors);
5060 if (r) {
5061 ti->error = "Could not initialize superblock";
5062 goto bad;
5063 }
5064 if (ic->mode != 'R')
5065 should_write_sb = true;
5066 }
5067
5068 if (!ic->sb->version || ic->sb->version > SB_VERSION_6) {
5069 r = -EINVAL;
5070 ti->error = "Unknown version";
5071 goto bad;
5072 }
5073 if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_INLINE)) != (ic->mode == 'I')) {
5074 r = -EINVAL;
5075 ti->error = "Inline flag mismatch";
5076 goto bad;
5077 }
5078 if (le16_to_cpu(ic->sb->integrity_tag_size) != ic->tag_size) {
5079 r = -EINVAL;
5080 ti->error = "Tag size doesn't match the information in superblock";
5081 goto bad;
5082 }
5083 if (ic->sb->log2_sectors_per_block != __ffs(ic->sectors_per_block)) {
5084 r = -EINVAL;
5085 ti->error = "Block size doesn't match the information in superblock";
5086 goto bad;
5087 }
5088 if (ic->mode != 'I') {
5089 if (!le32_to_cpu(ic->sb->journal_sections)) {
5090 r = -EINVAL;
5091 ti->error = "Corrupted superblock, journal_sections is 0";
5092 goto bad;
5093 }
5094 } else {
5095 if (le32_to_cpu(ic->sb->journal_sections)) {
5096 r = -EINVAL;
5097 ti->error = "Corrupted superblock, journal_sections is not 0";
5098 goto bad;
5099 }
5100 }
5101 /* make sure that ti->max_io_len doesn't overflow */
5102 if (!ic->meta_dev) {
5103 if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS ||
5104 ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) {
5105 r = -EINVAL;
5106 ti->error = "Invalid interleave_sectors in the superblock";
5107 goto bad;
5108 }
5109 } else {
5110 if (ic->sb->log2_interleave_sectors) {
5111 r = -EINVAL;
5112 ti->error = "Invalid interleave_sectors in the superblock";
5113 goto bad;
5114 }
5115 }
5116 if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) != !!ic->journal_mac_alg.alg_string) {
5117 r = -EINVAL;
5118 ti->error = "Journal mac mismatch";
5119 goto bad;
5120 }
5121
5122 get_provided_data_sectors(ic);
5123 if (!ic->provided_data_sectors) {
5124 r = -EINVAL;
5125 ti->error = "The device is too small";
5126 goto bad;
5127 }
5128
5129 try_smaller_buffer:
5130 r = calculate_device_limits(ic);
5131 if (r) {
5132 if (ic->meta_dev) {
5133 if (ic->log2_buffer_sectors > 3) {
5134 ic->log2_buffer_sectors--;
5135 goto try_smaller_buffer;
5136 }
5137 }
5138 ti->error = "The device is too small";
5139 goto bad;
5140 }
5141
5142 if (log2_sectors_per_bitmap_bit < 0)
5143 log2_sectors_per_bitmap_bit = __fls(DEFAULT_SECTORS_PER_BITMAP_BIT);
5144 if (log2_sectors_per_bitmap_bit < ic->sb->log2_sectors_per_block)
5145 log2_sectors_per_bitmap_bit = ic->sb->log2_sectors_per_block;
5146
5147 bits_in_journal = ((__u64)ic->journal_section_sectors * ic->journal_sections) << (SECTOR_SHIFT + 3);
5148 if (bits_in_journal > UINT_MAX)
5149 bits_in_journal = UINT_MAX;
5150 if (bits_in_journal)
5151 while (bits_in_journal < (ic->provided_data_sectors + ((sector_t)1 << log2_sectors_per_bitmap_bit) - 1) >> log2_sectors_per_bitmap_bit)
5152 log2_sectors_per_bitmap_bit++;
5153
5154 log2_blocks_per_bitmap_bit = log2_sectors_per_bitmap_bit - ic->sb->log2_sectors_per_block;
5155 ic->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit;
5156 if (should_write_sb)
5157 ic->sb->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit;
5158
5159 n_bitmap_bits = ((ic->provided_data_sectors >> ic->sb->log2_sectors_per_block)
5160 + (((sector_t)1 << log2_blocks_per_bitmap_bit) - 1)) >> log2_blocks_per_bitmap_bit;
5161 ic->n_bitmap_blocks = DIV_ROUND_UP(n_bitmap_bits, BITMAP_BLOCK_SIZE * 8);
5162
5163 if (!ic->meta_dev)
5164 ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run));
5165
5166 if (ti->len > ic->provided_data_sectors) {
5167 r = -EINVAL;
5168 ti->error = "Not enough provided sectors for requested mapping size";
5169 goto bad;
5170 }
5171
5172 threshold = (__u64)ic->journal_entries * (100 - journal_watermark);
5173 threshold += 50;
5174 do_div(threshold, 100);
5175 ic->free_sectors_threshold = threshold;
5176
5177 DEBUG_print("initialized:\n");
5178 DEBUG_print(" integrity_tag_size %u\n", le16_to_cpu(ic->sb->integrity_tag_size));
5179 DEBUG_print(" journal_entry_size %u\n", ic->journal_entry_size);
5180 DEBUG_print(" journal_entries_per_sector %u\n", ic->journal_entries_per_sector);
5181 DEBUG_print(" journal_section_entries %u\n", ic->journal_section_entries);
5182 DEBUG_print(" journal_section_sectors %u\n", ic->journal_section_sectors);
5183 DEBUG_print(" journal_sections %u\n", (unsigned int)le32_to_cpu(ic->sb->journal_sections));
5184 DEBUG_print(" journal_entries %u\n", ic->journal_entries);
5185 DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors);
5186 DEBUG_print(" data_device_sectors 0x%llx\n", bdev_nr_sectors(ic->dev->bdev));
5187 DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors);
5188 DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run);
5189 DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run);
5190 DEBUG_print(" provided_data_sectors 0x%llx (%llu)\n", ic->provided_data_sectors, ic->provided_data_sectors);
5191 DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors);
5192 DEBUG_print(" bits_in_journal %llu\n", bits_in_journal);
5193
5194 if (ic->recalculate_flag && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) {
5195 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
5196 ic->sb->recalc_sector = cpu_to_le64(0);
5197 }
5198
5199 if (ic->internal_hash) {
5200 ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1);
5201 if (!ic->recalc_wq) {
5202 ti->error = "Cannot allocate workqueue";
5203 r = -ENOMEM;
5204 goto bad;
5205 }
5206 INIT_WORK(&ic->recalc_work, ic->mode == 'I' ? integrity_recalc_inline : integrity_recalc);
5207 } else {
5208 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
5209 ti->error = "Recalculate can only be specified with internal_hash";
5210 r = -EINVAL;
5211 goto bad;
5212 }
5213 }
5214
5215 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
5216 le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors &&
5217 dm_integrity_disable_recalculate(ic)) {
5218 ti->error = "Recalculating with HMAC is disabled for security reasons - if you really need it, use the argument \"legacy_recalculate\"";
5219 r = -EOPNOTSUPP;
5220 goto bad;
5221 }
5222
5223 ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev,
5224 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL, 0);
5225 if (IS_ERR(ic->bufio)) {
5226 r = PTR_ERR(ic->bufio);
5227 ti->error = "Cannot initialize dm-bufio";
5228 ic->bufio = NULL;
5229 goto bad;
5230 }
5231 dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors);
5232
5233 if (ic->mode != 'R' && ic->mode != 'I') {
5234 r = create_journal(ic, &ti->error);
5235 if (r)
5236 goto bad;
5237
5238 }
5239
5240 if (ic->mode == 'B') {
5241 unsigned int i;
5242 unsigned int n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE);
5243
5244 ic->recalc_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
5245 if (!ic->recalc_bitmap) {
5246 ti->error = "Could not allocate memory for bitmap";
5247 r = -ENOMEM;
5248 goto bad;
5249 }
5250 ic->may_write_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
5251 if (!ic->may_write_bitmap) {
5252 ti->error = "Could not allocate memory for bitmap";
5253 r = -ENOMEM;
5254 goto bad;
5255 }
5256 ic->bbs = kvmalloc_array(ic->n_bitmap_blocks, sizeof(struct bitmap_block_status), GFP_KERNEL);
5257 if (!ic->bbs) {
5258 ti->error = "Could not allocate memory for bitmap";
5259 r = -ENOMEM;
5260 goto bad;
5261 }
5262 INIT_DELAYED_WORK(&ic->bitmap_flush_work, bitmap_flush_work);
5263 for (i = 0; i < ic->n_bitmap_blocks; i++) {
5264 struct bitmap_block_status *bbs = &ic->bbs[i];
5265 unsigned int sector, pl_index, pl_offset;
5266
5267 INIT_WORK(&bbs->work, bitmap_block_work);
5268 bbs->ic = ic;
5269 bbs->idx = i;
5270 bio_list_init(&bbs->bio_queue);
5271 spin_lock_init(&bbs->bio_queue_lock);
5272
5273 sector = i * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT);
5274 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
5275 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
5276
5277 bbs->bitmap = lowmem_page_address(ic->journal[pl_index].page) + pl_offset;
5278 }
5279 }
5280
5281 if (should_write_sb) {
5282 init_journal(ic, 0, ic->journal_sections, 0);
5283 r = dm_integrity_failed(ic);
5284 if (unlikely(r)) {
5285 ti->error = "Error initializing journal";
5286 goto bad;
5287 }
5288 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
5289 if (r) {
5290 ti->error = "Error initializing superblock";
5291 goto bad;
5292 }
5293 ic->just_formatted = true;
5294 }
5295
5296 if (!ic->meta_dev && ic->mode != 'I') {
5297 r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors);
5298 if (r)
5299 goto bad;
5300 }
5301 if (ic->mode == 'B') {
5302 unsigned int max_io_len;
5303
5304 max_io_len = ((sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit) * (BITMAP_BLOCK_SIZE * 8);
5305 if (!max_io_len)
5306 max_io_len = 1U << 31;
5307 DEBUG_print("max_io_len: old %u, new %u\n", ti->max_io_len, max_io_len);
5308 if (!ti->max_io_len || ti->max_io_len > max_io_len) {
5309 r = dm_set_target_max_io_len(ti, max_io_len);
5310 if (r)
5311 goto bad;
5312 }
5313 }
5314
5315 ti->num_flush_bios = 1;
5316 ti->flush_supported = true;
5317 if (ic->discard)
5318 ti->num_discard_bios = 1;
5319
5320 if (ic->mode == 'I')
5321 ti->mempool_needs_integrity = true;
5322
5323 dm_audit_log_ctr(DM_MSG_PREFIX, ti, 1);
5324 return 0;
5325
5326 bad:
5327 dm_audit_log_ctr(DM_MSG_PREFIX, ti, 0);
5328 dm_integrity_dtr(ti);
5329 return r;
5330 }
5331
dm_integrity_dtr(struct dm_target * ti)5332 static void dm_integrity_dtr(struct dm_target *ti)
5333 {
5334 struct dm_integrity_c *ic = ti->private;
5335
5336 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
5337 BUG_ON(!list_empty(&ic->wait_list));
5338
5339 if (ic->mode == 'B' && ic->bitmap_flush_work.work.func)
5340 cancel_delayed_work_sync(&ic->bitmap_flush_work);
5341 if (ic->metadata_wq)
5342 destroy_workqueue(ic->metadata_wq);
5343 if (ic->wait_wq)
5344 destroy_workqueue(ic->wait_wq);
5345 if (ic->offload_wq)
5346 destroy_workqueue(ic->offload_wq);
5347 if (ic->commit_wq)
5348 destroy_workqueue(ic->commit_wq);
5349 if (ic->writer_wq)
5350 destroy_workqueue(ic->writer_wq);
5351 if (ic->recalc_wq)
5352 destroy_workqueue(ic->recalc_wq);
5353 kvfree(ic->bbs);
5354 if (ic->bufio)
5355 dm_bufio_client_destroy(ic->bufio);
5356 mempool_free(ic->journal_ahash_req, &ic->ahash_req_pool);
5357 mempool_exit(&ic->ahash_req_pool);
5358 bioset_exit(&ic->recalc_bios);
5359 bioset_exit(&ic->recheck_bios);
5360 mempool_exit(&ic->recheck_pool);
5361 mempool_exit(&ic->journal_io_mempool);
5362 if (ic->io)
5363 dm_io_client_destroy(ic->io);
5364 if (ic->dev)
5365 dm_put_device(ti, ic->dev);
5366 if (ic->meta_dev)
5367 dm_put_device(ti, ic->meta_dev);
5368 dm_integrity_free_page_list(ic->journal);
5369 dm_integrity_free_page_list(ic->journal_io);
5370 dm_integrity_free_page_list(ic->journal_xor);
5371 dm_integrity_free_page_list(ic->recalc_bitmap);
5372 dm_integrity_free_page_list(ic->may_write_bitmap);
5373 if (ic->journal_scatterlist)
5374 dm_integrity_free_journal_scatterlist(ic, ic->journal_scatterlist);
5375 if (ic->journal_io_scatterlist)
5376 dm_integrity_free_journal_scatterlist(ic, ic->journal_io_scatterlist);
5377 if (ic->sk_requests) {
5378 unsigned int i;
5379
5380 for (i = 0; i < ic->journal_sections; i++) {
5381 struct skcipher_request *req;
5382
5383 req = ic->sk_requests[i];
5384 if (req) {
5385 kfree_sensitive(req->iv);
5386 skcipher_request_free(req);
5387 }
5388 }
5389 kvfree(ic->sk_requests);
5390 }
5391 kvfree(ic->journal_tree);
5392 if (ic->sb)
5393 free_pages_exact(ic->sb, SB_SECTORS << SECTOR_SHIFT);
5394
5395 if (ic->internal_shash)
5396 crypto_free_shash(ic->internal_shash);
5397 if (ic->internal_ahash)
5398 crypto_free_ahash(ic->internal_ahash);
5399 free_alg(&ic->internal_hash_alg);
5400
5401 if (ic->journal_crypt)
5402 crypto_free_skcipher(ic->journal_crypt);
5403 free_alg(&ic->journal_crypt_alg);
5404
5405 if (ic->journal_mac)
5406 crypto_free_shash(ic->journal_mac);
5407 free_alg(&ic->journal_mac_alg);
5408
5409 kfree(ic);
5410 dm_audit_log_dtr(DM_MSG_PREFIX, ti, 1);
5411 }
5412
5413 static struct target_type integrity_target = {
5414 .name = "integrity",
5415 .version = {1, 14, 0},
5416 .module = THIS_MODULE,
5417 .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
5418 .ctr = dm_integrity_ctr,
5419 .dtr = dm_integrity_dtr,
5420 .map = dm_integrity_map,
5421 .end_io = dm_integrity_end_io,
5422 .postsuspend = dm_integrity_postsuspend,
5423 .resume = dm_integrity_resume,
5424 .status = dm_integrity_status,
5425 .iterate_devices = dm_integrity_iterate_devices,
5426 .io_hints = dm_integrity_io_hints,
5427 };
5428
dm_integrity_init(void)5429 static int __init dm_integrity_init(void)
5430 {
5431 int r;
5432
5433 journal_io_cache = kmem_cache_create("integrity_journal_io",
5434 sizeof(struct journal_io), 0, 0, NULL);
5435 if (!journal_io_cache) {
5436 DMERR("can't allocate journal io cache");
5437 return -ENOMEM;
5438 }
5439
5440 r = dm_register_target(&integrity_target);
5441 if (r < 0) {
5442 kmem_cache_destroy(journal_io_cache);
5443 return r;
5444 }
5445
5446 return 0;
5447 }
5448
dm_integrity_exit(void)5449 static void __exit dm_integrity_exit(void)
5450 {
5451 dm_unregister_target(&integrity_target);
5452 kmem_cache_destroy(journal_io_cache);
5453 }
5454
5455 module_init(dm_integrity_init);
5456 module_exit(dm_integrity_exit);
5457
5458 MODULE_AUTHOR("Milan Broz");
5459 MODULE_AUTHOR("Mikulas Patocka");
5460 MODULE_DESCRIPTION(DM_NAME " target for integrity tags extension");
5461 MODULE_LICENSE("GPL");
5462