1*0e9cebe7SJosef Bacik /* 2*0e9cebe7SJosef Bacik * Copyright (C) 2014 Facebook. All rights reserved. 3*0e9cebe7SJosef Bacik * 4*0e9cebe7SJosef Bacik * This file is released under the GPL. 5*0e9cebe7SJosef Bacik */ 6*0e9cebe7SJosef Bacik 7*0e9cebe7SJosef Bacik #include <linux/device-mapper.h> 8*0e9cebe7SJosef Bacik 9*0e9cebe7SJosef Bacik #include <linux/module.h> 10*0e9cebe7SJosef Bacik #include <linux/init.h> 11*0e9cebe7SJosef Bacik #include <linux/blkdev.h> 12*0e9cebe7SJosef Bacik #include <linux/bio.h> 13*0e9cebe7SJosef Bacik #include <linux/slab.h> 14*0e9cebe7SJosef Bacik #include <linux/kthread.h> 15*0e9cebe7SJosef Bacik #include <linux/freezer.h> 16*0e9cebe7SJosef Bacik 17*0e9cebe7SJosef Bacik #define DM_MSG_PREFIX "log-writes" 18*0e9cebe7SJosef Bacik 19*0e9cebe7SJosef Bacik /* 20*0e9cebe7SJosef Bacik * This target will sequentially log all writes to the target device onto the 21*0e9cebe7SJosef Bacik * log device. This is helpful for replaying writes to check for fs consistency 22*0e9cebe7SJosef Bacik * at all times. This target provides a mechanism to mark specific events to 23*0e9cebe7SJosef Bacik * check data at a later time. So for example you would: 24*0e9cebe7SJosef Bacik * 25*0e9cebe7SJosef Bacik * write data 26*0e9cebe7SJosef Bacik * fsync 27*0e9cebe7SJosef Bacik * dmsetup message /dev/whatever mark mymark 28*0e9cebe7SJosef Bacik * unmount /mnt/test 29*0e9cebe7SJosef Bacik * 30*0e9cebe7SJosef Bacik * Then replay the log up to mymark and check the contents of the replay to 31*0e9cebe7SJosef Bacik * verify it matches what was written. 32*0e9cebe7SJosef Bacik * 33*0e9cebe7SJosef Bacik * We log writes only after they have been flushed, this makes the log describe 34*0e9cebe7SJosef Bacik * close to the order in which the data hits the actual disk, not its cache. So 35*0e9cebe7SJosef Bacik * for example the following sequence (W means write, C means complete) 36*0e9cebe7SJosef Bacik * 37*0e9cebe7SJosef Bacik * Wa,Wb,Wc,Cc,Ca,FLUSH,FUAd,Cb,CFLUSH,CFUAd 38*0e9cebe7SJosef Bacik * 39*0e9cebe7SJosef Bacik * Would result in the log looking like this: 40*0e9cebe7SJosef Bacik * 41*0e9cebe7SJosef Bacik * c,a,flush,fuad,b,<other writes>,<next flush> 42*0e9cebe7SJosef Bacik * 43*0e9cebe7SJosef Bacik * This is meant to help expose problems where file systems do not properly wait 44*0e9cebe7SJosef Bacik * on data being written before invoking a FLUSH. FUA bypasses cache so once it 45*0e9cebe7SJosef Bacik * completes it is added to the log as it should be on disk. 46*0e9cebe7SJosef Bacik * 47*0e9cebe7SJosef Bacik * We treat DISCARDs as if they don't bypass cache so that they are logged in 48*0e9cebe7SJosef Bacik * order of completion along with the normal writes. If we didn't do it this 49*0e9cebe7SJosef Bacik * way we would process all the discards first and then write all the data, when 50*0e9cebe7SJosef Bacik * in fact we want to do the data and the discard in the order that they 51*0e9cebe7SJosef Bacik * completed. 52*0e9cebe7SJosef Bacik */ 53*0e9cebe7SJosef Bacik #define LOG_FLUSH_FLAG (1 << 0) 54*0e9cebe7SJosef Bacik #define LOG_FUA_FLAG (1 << 1) 55*0e9cebe7SJosef Bacik #define LOG_DISCARD_FLAG (1 << 2) 56*0e9cebe7SJosef Bacik #define LOG_MARK_FLAG (1 << 3) 57*0e9cebe7SJosef Bacik 58*0e9cebe7SJosef Bacik #define WRITE_LOG_VERSION 1 59*0e9cebe7SJosef Bacik #define WRITE_LOG_MAGIC 0x6a736677736872 60*0e9cebe7SJosef Bacik 61*0e9cebe7SJosef Bacik /* 62*0e9cebe7SJosef Bacik * The disk format for this is braindead simple. 63*0e9cebe7SJosef Bacik * 64*0e9cebe7SJosef Bacik * At byte 0 we have our super, followed by the following sequence for 65*0e9cebe7SJosef Bacik * nr_entries: 66*0e9cebe7SJosef Bacik * 67*0e9cebe7SJosef Bacik * [ 1 sector ][ entry->nr_sectors ] 68*0e9cebe7SJosef Bacik * [log_write_entry][ data written ] 69*0e9cebe7SJosef Bacik * 70*0e9cebe7SJosef Bacik * The log_write_entry takes up a full sector so we can have arbitrary length 71*0e9cebe7SJosef Bacik * marks and it leaves us room for extra content in the future. 72*0e9cebe7SJosef Bacik */ 73*0e9cebe7SJosef Bacik 74*0e9cebe7SJosef Bacik /* 75*0e9cebe7SJosef Bacik * Basic info about the log for userspace. 76*0e9cebe7SJosef Bacik */ 77*0e9cebe7SJosef Bacik struct log_write_super { 78*0e9cebe7SJosef Bacik __le64 magic; 79*0e9cebe7SJosef Bacik __le64 version; 80*0e9cebe7SJosef Bacik __le64 nr_entries; 81*0e9cebe7SJosef Bacik __le32 sectorsize; 82*0e9cebe7SJosef Bacik }; 83*0e9cebe7SJosef Bacik 84*0e9cebe7SJosef Bacik /* 85*0e9cebe7SJosef Bacik * sector - the sector we wrote. 86*0e9cebe7SJosef Bacik * nr_sectors - the number of sectors we wrote. 87*0e9cebe7SJosef Bacik * flags - flags for this log entry. 88*0e9cebe7SJosef Bacik * data_len - the size of the data in this log entry, this is for private log 89*0e9cebe7SJosef Bacik * entry stuff, the MARK data provided by userspace for example. 90*0e9cebe7SJosef Bacik */ 91*0e9cebe7SJosef Bacik struct log_write_entry { 92*0e9cebe7SJosef Bacik __le64 sector; 93*0e9cebe7SJosef Bacik __le64 nr_sectors; 94*0e9cebe7SJosef Bacik __le64 flags; 95*0e9cebe7SJosef Bacik __le64 data_len; 96*0e9cebe7SJosef Bacik }; 97*0e9cebe7SJosef Bacik 98*0e9cebe7SJosef Bacik struct log_writes_c { 99*0e9cebe7SJosef Bacik struct dm_dev *dev; 100*0e9cebe7SJosef Bacik struct dm_dev *logdev; 101*0e9cebe7SJosef Bacik u64 logged_entries; 102*0e9cebe7SJosef Bacik u32 sectorsize; 103*0e9cebe7SJosef Bacik atomic_t io_blocks; 104*0e9cebe7SJosef Bacik atomic_t pending_blocks; 105*0e9cebe7SJosef Bacik sector_t next_sector; 106*0e9cebe7SJosef Bacik sector_t end_sector; 107*0e9cebe7SJosef Bacik bool logging_enabled; 108*0e9cebe7SJosef Bacik bool device_supports_discard; 109*0e9cebe7SJosef Bacik spinlock_t blocks_lock; 110*0e9cebe7SJosef Bacik struct list_head unflushed_blocks; 111*0e9cebe7SJosef Bacik struct list_head logging_blocks; 112*0e9cebe7SJosef Bacik wait_queue_head_t wait; 113*0e9cebe7SJosef Bacik struct task_struct *log_kthread; 114*0e9cebe7SJosef Bacik }; 115*0e9cebe7SJosef Bacik 116*0e9cebe7SJosef Bacik struct pending_block { 117*0e9cebe7SJosef Bacik int vec_cnt; 118*0e9cebe7SJosef Bacik u64 flags; 119*0e9cebe7SJosef Bacik sector_t sector; 120*0e9cebe7SJosef Bacik sector_t nr_sectors; 121*0e9cebe7SJosef Bacik char *data; 122*0e9cebe7SJosef Bacik u32 datalen; 123*0e9cebe7SJosef Bacik struct list_head list; 124*0e9cebe7SJosef Bacik struct bio_vec vecs[0]; 125*0e9cebe7SJosef Bacik }; 126*0e9cebe7SJosef Bacik 127*0e9cebe7SJosef Bacik struct per_bio_data { 128*0e9cebe7SJosef Bacik struct pending_block *block; 129*0e9cebe7SJosef Bacik }; 130*0e9cebe7SJosef Bacik 131*0e9cebe7SJosef Bacik static void put_pending_block(struct log_writes_c *lc) 132*0e9cebe7SJosef Bacik { 133*0e9cebe7SJosef Bacik if (atomic_dec_and_test(&lc->pending_blocks)) { 134*0e9cebe7SJosef Bacik smp_mb__after_atomic(); 135*0e9cebe7SJosef Bacik if (waitqueue_active(&lc->wait)) 136*0e9cebe7SJosef Bacik wake_up(&lc->wait); 137*0e9cebe7SJosef Bacik } 138*0e9cebe7SJosef Bacik } 139*0e9cebe7SJosef Bacik 140*0e9cebe7SJosef Bacik static void put_io_block(struct log_writes_c *lc) 141*0e9cebe7SJosef Bacik { 142*0e9cebe7SJosef Bacik if (atomic_dec_and_test(&lc->io_blocks)) { 143*0e9cebe7SJosef Bacik smp_mb__after_atomic(); 144*0e9cebe7SJosef Bacik if (waitqueue_active(&lc->wait)) 145*0e9cebe7SJosef Bacik wake_up(&lc->wait); 146*0e9cebe7SJosef Bacik } 147*0e9cebe7SJosef Bacik } 148*0e9cebe7SJosef Bacik 149*0e9cebe7SJosef Bacik static void log_end_io(struct bio *bio, int err) 150*0e9cebe7SJosef Bacik { 151*0e9cebe7SJosef Bacik struct log_writes_c *lc = bio->bi_private; 152*0e9cebe7SJosef Bacik struct bio_vec *bvec; 153*0e9cebe7SJosef Bacik int i; 154*0e9cebe7SJosef Bacik 155*0e9cebe7SJosef Bacik if (err) { 156*0e9cebe7SJosef Bacik unsigned long flags; 157*0e9cebe7SJosef Bacik 158*0e9cebe7SJosef Bacik DMERR("Error writing log block, error=%d", err); 159*0e9cebe7SJosef Bacik spin_lock_irqsave(&lc->blocks_lock, flags); 160*0e9cebe7SJosef Bacik lc->logging_enabled = false; 161*0e9cebe7SJosef Bacik spin_unlock_irqrestore(&lc->blocks_lock, flags); 162*0e9cebe7SJosef Bacik } 163*0e9cebe7SJosef Bacik 164*0e9cebe7SJosef Bacik bio_for_each_segment_all(bvec, bio, i) 165*0e9cebe7SJosef Bacik __free_page(bvec->bv_page); 166*0e9cebe7SJosef Bacik 167*0e9cebe7SJosef Bacik put_io_block(lc); 168*0e9cebe7SJosef Bacik bio_put(bio); 169*0e9cebe7SJosef Bacik } 170*0e9cebe7SJosef Bacik 171*0e9cebe7SJosef Bacik /* 172*0e9cebe7SJosef Bacik * Meant to be called if there is an error, it will free all the pages 173*0e9cebe7SJosef Bacik * associated with the block. 174*0e9cebe7SJosef Bacik */ 175*0e9cebe7SJosef Bacik static void free_pending_block(struct log_writes_c *lc, 176*0e9cebe7SJosef Bacik struct pending_block *block) 177*0e9cebe7SJosef Bacik { 178*0e9cebe7SJosef Bacik int i; 179*0e9cebe7SJosef Bacik 180*0e9cebe7SJosef Bacik for (i = 0; i < block->vec_cnt; i++) { 181*0e9cebe7SJosef Bacik if (block->vecs[i].bv_page) 182*0e9cebe7SJosef Bacik __free_page(block->vecs[i].bv_page); 183*0e9cebe7SJosef Bacik } 184*0e9cebe7SJosef Bacik kfree(block->data); 185*0e9cebe7SJosef Bacik kfree(block); 186*0e9cebe7SJosef Bacik put_pending_block(lc); 187*0e9cebe7SJosef Bacik } 188*0e9cebe7SJosef Bacik 189*0e9cebe7SJosef Bacik static int write_metadata(struct log_writes_c *lc, void *entry, 190*0e9cebe7SJosef Bacik size_t entrylen, void *data, size_t datalen, 191*0e9cebe7SJosef Bacik sector_t sector) 192*0e9cebe7SJosef Bacik { 193*0e9cebe7SJosef Bacik struct bio *bio; 194*0e9cebe7SJosef Bacik struct page *page; 195*0e9cebe7SJosef Bacik void *ptr; 196*0e9cebe7SJosef Bacik size_t ret; 197*0e9cebe7SJosef Bacik 198*0e9cebe7SJosef Bacik bio = bio_alloc(GFP_KERNEL, 1); 199*0e9cebe7SJosef Bacik if (!bio) { 200*0e9cebe7SJosef Bacik DMERR("Couldn't alloc log bio"); 201*0e9cebe7SJosef Bacik goto error; 202*0e9cebe7SJosef Bacik } 203*0e9cebe7SJosef Bacik bio->bi_iter.bi_size = 0; 204*0e9cebe7SJosef Bacik bio->bi_iter.bi_sector = sector; 205*0e9cebe7SJosef Bacik bio->bi_bdev = lc->logdev->bdev; 206*0e9cebe7SJosef Bacik bio->bi_end_io = log_end_io; 207*0e9cebe7SJosef Bacik bio->bi_private = lc; 208*0e9cebe7SJosef Bacik set_bit(BIO_UPTODATE, &bio->bi_flags); 209*0e9cebe7SJosef Bacik 210*0e9cebe7SJosef Bacik page = alloc_page(GFP_KERNEL); 211*0e9cebe7SJosef Bacik if (!page) { 212*0e9cebe7SJosef Bacik DMERR("Couldn't alloc log page"); 213*0e9cebe7SJosef Bacik bio_put(bio); 214*0e9cebe7SJosef Bacik goto error; 215*0e9cebe7SJosef Bacik } 216*0e9cebe7SJosef Bacik 217*0e9cebe7SJosef Bacik ptr = kmap_atomic(page); 218*0e9cebe7SJosef Bacik memcpy(ptr, entry, entrylen); 219*0e9cebe7SJosef Bacik if (datalen) 220*0e9cebe7SJosef Bacik memcpy(ptr + entrylen, data, datalen); 221*0e9cebe7SJosef Bacik memset(ptr + entrylen + datalen, 0, 222*0e9cebe7SJosef Bacik lc->sectorsize - entrylen - datalen); 223*0e9cebe7SJosef Bacik kunmap_atomic(ptr); 224*0e9cebe7SJosef Bacik 225*0e9cebe7SJosef Bacik ret = bio_add_page(bio, page, lc->sectorsize, 0); 226*0e9cebe7SJosef Bacik if (ret != lc->sectorsize) { 227*0e9cebe7SJosef Bacik DMERR("Couldn't add page to the log block"); 228*0e9cebe7SJosef Bacik goto error_bio; 229*0e9cebe7SJosef Bacik } 230*0e9cebe7SJosef Bacik submit_bio(WRITE, bio); 231*0e9cebe7SJosef Bacik return 0; 232*0e9cebe7SJosef Bacik error_bio: 233*0e9cebe7SJosef Bacik bio_put(bio); 234*0e9cebe7SJosef Bacik __free_page(page); 235*0e9cebe7SJosef Bacik error: 236*0e9cebe7SJosef Bacik put_io_block(lc); 237*0e9cebe7SJosef Bacik return -1; 238*0e9cebe7SJosef Bacik } 239*0e9cebe7SJosef Bacik 240*0e9cebe7SJosef Bacik static int log_one_block(struct log_writes_c *lc, 241*0e9cebe7SJosef Bacik struct pending_block *block, sector_t sector) 242*0e9cebe7SJosef Bacik { 243*0e9cebe7SJosef Bacik struct bio *bio; 244*0e9cebe7SJosef Bacik struct log_write_entry entry; 245*0e9cebe7SJosef Bacik size_t ret; 246*0e9cebe7SJosef Bacik int i; 247*0e9cebe7SJosef Bacik 248*0e9cebe7SJosef Bacik entry.sector = cpu_to_le64(block->sector); 249*0e9cebe7SJosef Bacik entry.nr_sectors = cpu_to_le64(block->nr_sectors); 250*0e9cebe7SJosef Bacik entry.flags = cpu_to_le64(block->flags); 251*0e9cebe7SJosef Bacik entry.data_len = cpu_to_le64(block->datalen); 252*0e9cebe7SJosef Bacik if (write_metadata(lc, &entry, sizeof(entry), block->data, 253*0e9cebe7SJosef Bacik block->datalen, sector)) { 254*0e9cebe7SJosef Bacik free_pending_block(lc, block); 255*0e9cebe7SJosef Bacik return -1; 256*0e9cebe7SJosef Bacik } 257*0e9cebe7SJosef Bacik 258*0e9cebe7SJosef Bacik if (!block->vec_cnt) 259*0e9cebe7SJosef Bacik goto out; 260*0e9cebe7SJosef Bacik sector++; 261*0e9cebe7SJosef Bacik 262*0e9cebe7SJosef Bacik bio = bio_alloc(GFP_KERNEL, block->vec_cnt); 263*0e9cebe7SJosef Bacik if (!bio) { 264*0e9cebe7SJosef Bacik DMERR("Couldn't alloc log bio"); 265*0e9cebe7SJosef Bacik goto error; 266*0e9cebe7SJosef Bacik } 267*0e9cebe7SJosef Bacik atomic_inc(&lc->io_blocks); 268*0e9cebe7SJosef Bacik bio->bi_iter.bi_size = 0; 269*0e9cebe7SJosef Bacik bio->bi_iter.bi_sector = sector; 270*0e9cebe7SJosef Bacik bio->bi_bdev = lc->logdev->bdev; 271*0e9cebe7SJosef Bacik bio->bi_end_io = log_end_io; 272*0e9cebe7SJosef Bacik bio->bi_private = lc; 273*0e9cebe7SJosef Bacik set_bit(BIO_UPTODATE, &bio->bi_flags); 274*0e9cebe7SJosef Bacik 275*0e9cebe7SJosef Bacik for (i = 0; i < block->vec_cnt; i++) { 276*0e9cebe7SJosef Bacik /* 277*0e9cebe7SJosef Bacik * The page offset is always 0 because we allocate a new page 278*0e9cebe7SJosef Bacik * for every bvec in the original bio for simplicity sake. 279*0e9cebe7SJosef Bacik */ 280*0e9cebe7SJosef Bacik ret = bio_add_page(bio, block->vecs[i].bv_page, 281*0e9cebe7SJosef Bacik block->vecs[i].bv_len, 0); 282*0e9cebe7SJosef Bacik if (ret != block->vecs[i].bv_len) { 283*0e9cebe7SJosef Bacik atomic_inc(&lc->io_blocks); 284*0e9cebe7SJosef Bacik submit_bio(WRITE, bio); 285*0e9cebe7SJosef Bacik bio = bio_alloc(GFP_KERNEL, block->vec_cnt - i); 286*0e9cebe7SJosef Bacik if (!bio) { 287*0e9cebe7SJosef Bacik DMERR("Couldn't alloc log bio"); 288*0e9cebe7SJosef Bacik goto error; 289*0e9cebe7SJosef Bacik } 290*0e9cebe7SJosef Bacik bio->bi_iter.bi_size = 0; 291*0e9cebe7SJosef Bacik bio->bi_iter.bi_sector = sector; 292*0e9cebe7SJosef Bacik bio->bi_bdev = lc->logdev->bdev; 293*0e9cebe7SJosef Bacik bio->bi_end_io = log_end_io; 294*0e9cebe7SJosef Bacik bio->bi_private = lc; 295*0e9cebe7SJosef Bacik set_bit(BIO_UPTODATE, &bio->bi_flags); 296*0e9cebe7SJosef Bacik 297*0e9cebe7SJosef Bacik ret = bio_add_page(bio, block->vecs[i].bv_page, 298*0e9cebe7SJosef Bacik block->vecs[i].bv_len, 0); 299*0e9cebe7SJosef Bacik if (ret != block->vecs[i].bv_len) { 300*0e9cebe7SJosef Bacik DMERR("Couldn't add page on new bio?"); 301*0e9cebe7SJosef Bacik bio_put(bio); 302*0e9cebe7SJosef Bacik goto error; 303*0e9cebe7SJosef Bacik } 304*0e9cebe7SJosef Bacik } 305*0e9cebe7SJosef Bacik sector += block->vecs[i].bv_len >> SECTOR_SHIFT; 306*0e9cebe7SJosef Bacik } 307*0e9cebe7SJosef Bacik submit_bio(WRITE, bio); 308*0e9cebe7SJosef Bacik out: 309*0e9cebe7SJosef Bacik kfree(block->data); 310*0e9cebe7SJosef Bacik kfree(block); 311*0e9cebe7SJosef Bacik put_pending_block(lc); 312*0e9cebe7SJosef Bacik return 0; 313*0e9cebe7SJosef Bacik error: 314*0e9cebe7SJosef Bacik free_pending_block(lc, block); 315*0e9cebe7SJosef Bacik put_io_block(lc); 316*0e9cebe7SJosef Bacik return -1; 317*0e9cebe7SJosef Bacik } 318*0e9cebe7SJosef Bacik 319*0e9cebe7SJosef Bacik static int log_super(struct log_writes_c *lc) 320*0e9cebe7SJosef Bacik { 321*0e9cebe7SJosef Bacik struct log_write_super super; 322*0e9cebe7SJosef Bacik 323*0e9cebe7SJosef Bacik super.magic = cpu_to_le64(WRITE_LOG_MAGIC); 324*0e9cebe7SJosef Bacik super.version = cpu_to_le64(WRITE_LOG_VERSION); 325*0e9cebe7SJosef Bacik super.nr_entries = cpu_to_le64(lc->logged_entries); 326*0e9cebe7SJosef Bacik super.sectorsize = cpu_to_le32(lc->sectorsize); 327*0e9cebe7SJosef Bacik 328*0e9cebe7SJosef Bacik if (write_metadata(lc, &super, sizeof(super), NULL, 0, 0)) { 329*0e9cebe7SJosef Bacik DMERR("Couldn't write super"); 330*0e9cebe7SJosef Bacik return -1; 331*0e9cebe7SJosef Bacik } 332*0e9cebe7SJosef Bacik 333*0e9cebe7SJosef Bacik return 0; 334*0e9cebe7SJosef Bacik } 335*0e9cebe7SJosef Bacik 336*0e9cebe7SJosef Bacik static inline sector_t logdev_last_sector(struct log_writes_c *lc) 337*0e9cebe7SJosef Bacik { 338*0e9cebe7SJosef Bacik return i_size_read(lc->logdev->bdev->bd_inode) >> SECTOR_SHIFT; 339*0e9cebe7SJosef Bacik } 340*0e9cebe7SJosef Bacik 341*0e9cebe7SJosef Bacik static int log_writes_kthread(void *arg) 342*0e9cebe7SJosef Bacik { 343*0e9cebe7SJosef Bacik struct log_writes_c *lc = (struct log_writes_c *)arg; 344*0e9cebe7SJosef Bacik sector_t sector = 0; 345*0e9cebe7SJosef Bacik 346*0e9cebe7SJosef Bacik while (!kthread_should_stop()) { 347*0e9cebe7SJosef Bacik bool super = false; 348*0e9cebe7SJosef Bacik bool logging_enabled; 349*0e9cebe7SJosef Bacik struct pending_block *block = NULL; 350*0e9cebe7SJosef Bacik int ret; 351*0e9cebe7SJosef Bacik 352*0e9cebe7SJosef Bacik spin_lock_irq(&lc->blocks_lock); 353*0e9cebe7SJosef Bacik if (!list_empty(&lc->logging_blocks)) { 354*0e9cebe7SJosef Bacik block = list_first_entry(&lc->logging_blocks, 355*0e9cebe7SJosef Bacik struct pending_block, list); 356*0e9cebe7SJosef Bacik list_del_init(&block->list); 357*0e9cebe7SJosef Bacik if (!lc->logging_enabled) 358*0e9cebe7SJosef Bacik goto next; 359*0e9cebe7SJosef Bacik 360*0e9cebe7SJosef Bacik sector = lc->next_sector; 361*0e9cebe7SJosef Bacik if (block->flags & LOG_DISCARD_FLAG) 362*0e9cebe7SJosef Bacik lc->next_sector++; 363*0e9cebe7SJosef Bacik else 364*0e9cebe7SJosef Bacik lc->next_sector += block->nr_sectors + 1; 365*0e9cebe7SJosef Bacik 366*0e9cebe7SJosef Bacik /* 367*0e9cebe7SJosef Bacik * Apparently the size of the device may not be known 368*0e9cebe7SJosef Bacik * right away, so handle this properly. 369*0e9cebe7SJosef Bacik */ 370*0e9cebe7SJosef Bacik if (!lc->end_sector) 371*0e9cebe7SJosef Bacik lc->end_sector = logdev_last_sector(lc); 372*0e9cebe7SJosef Bacik if (lc->end_sector && 373*0e9cebe7SJosef Bacik lc->next_sector >= lc->end_sector) { 374*0e9cebe7SJosef Bacik DMERR("Ran out of space on the logdev"); 375*0e9cebe7SJosef Bacik lc->logging_enabled = false; 376*0e9cebe7SJosef Bacik goto next; 377*0e9cebe7SJosef Bacik } 378*0e9cebe7SJosef Bacik lc->logged_entries++; 379*0e9cebe7SJosef Bacik atomic_inc(&lc->io_blocks); 380*0e9cebe7SJosef Bacik 381*0e9cebe7SJosef Bacik super = (block->flags & (LOG_FUA_FLAG | LOG_MARK_FLAG)); 382*0e9cebe7SJosef Bacik if (super) 383*0e9cebe7SJosef Bacik atomic_inc(&lc->io_blocks); 384*0e9cebe7SJosef Bacik } 385*0e9cebe7SJosef Bacik next: 386*0e9cebe7SJosef Bacik logging_enabled = lc->logging_enabled; 387*0e9cebe7SJosef Bacik spin_unlock_irq(&lc->blocks_lock); 388*0e9cebe7SJosef Bacik if (block) { 389*0e9cebe7SJosef Bacik if (logging_enabled) { 390*0e9cebe7SJosef Bacik ret = log_one_block(lc, block, sector); 391*0e9cebe7SJosef Bacik if (!ret && super) 392*0e9cebe7SJosef Bacik ret = log_super(lc); 393*0e9cebe7SJosef Bacik if (ret) { 394*0e9cebe7SJosef Bacik spin_lock_irq(&lc->blocks_lock); 395*0e9cebe7SJosef Bacik lc->logging_enabled = false; 396*0e9cebe7SJosef Bacik spin_unlock_irq(&lc->blocks_lock); 397*0e9cebe7SJosef Bacik } 398*0e9cebe7SJosef Bacik } else 399*0e9cebe7SJosef Bacik free_pending_block(lc, block); 400*0e9cebe7SJosef Bacik continue; 401*0e9cebe7SJosef Bacik } 402*0e9cebe7SJosef Bacik 403*0e9cebe7SJosef Bacik if (!try_to_freeze()) { 404*0e9cebe7SJosef Bacik set_current_state(TASK_INTERRUPTIBLE); 405*0e9cebe7SJosef Bacik if (!kthread_should_stop() && 406*0e9cebe7SJosef Bacik !atomic_read(&lc->pending_blocks)) 407*0e9cebe7SJosef Bacik schedule(); 408*0e9cebe7SJosef Bacik __set_current_state(TASK_RUNNING); 409*0e9cebe7SJosef Bacik } 410*0e9cebe7SJosef Bacik } 411*0e9cebe7SJosef Bacik return 0; 412*0e9cebe7SJosef Bacik } 413*0e9cebe7SJosef Bacik 414*0e9cebe7SJosef Bacik /* 415*0e9cebe7SJosef Bacik * Construct a log-writes mapping: 416*0e9cebe7SJosef Bacik * log-writes <dev_path> <log_dev_path> 417*0e9cebe7SJosef Bacik */ 418*0e9cebe7SJosef Bacik static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) 419*0e9cebe7SJosef Bacik { 420*0e9cebe7SJosef Bacik struct log_writes_c *lc; 421*0e9cebe7SJosef Bacik struct dm_arg_set as; 422*0e9cebe7SJosef Bacik const char *devname, *logdevname; 423*0e9cebe7SJosef Bacik 424*0e9cebe7SJosef Bacik as.argc = argc; 425*0e9cebe7SJosef Bacik as.argv = argv; 426*0e9cebe7SJosef Bacik 427*0e9cebe7SJosef Bacik if (argc < 2) { 428*0e9cebe7SJosef Bacik ti->error = "Invalid argument count"; 429*0e9cebe7SJosef Bacik return -EINVAL; 430*0e9cebe7SJosef Bacik } 431*0e9cebe7SJosef Bacik 432*0e9cebe7SJosef Bacik lc = kzalloc(sizeof(struct log_writes_c), GFP_KERNEL); 433*0e9cebe7SJosef Bacik if (!lc) { 434*0e9cebe7SJosef Bacik ti->error = "Cannot allocate context"; 435*0e9cebe7SJosef Bacik return -ENOMEM; 436*0e9cebe7SJosef Bacik } 437*0e9cebe7SJosef Bacik spin_lock_init(&lc->blocks_lock); 438*0e9cebe7SJosef Bacik INIT_LIST_HEAD(&lc->unflushed_blocks); 439*0e9cebe7SJosef Bacik INIT_LIST_HEAD(&lc->logging_blocks); 440*0e9cebe7SJosef Bacik init_waitqueue_head(&lc->wait); 441*0e9cebe7SJosef Bacik lc->sectorsize = 1 << SECTOR_SHIFT; 442*0e9cebe7SJosef Bacik atomic_set(&lc->io_blocks, 0); 443*0e9cebe7SJosef Bacik atomic_set(&lc->pending_blocks, 0); 444*0e9cebe7SJosef Bacik 445*0e9cebe7SJosef Bacik devname = dm_shift_arg(&as); 446*0e9cebe7SJosef Bacik if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &lc->dev)) { 447*0e9cebe7SJosef Bacik ti->error = "Device lookup failed"; 448*0e9cebe7SJosef Bacik goto bad; 449*0e9cebe7SJosef Bacik } 450*0e9cebe7SJosef Bacik 451*0e9cebe7SJosef Bacik logdevname = dm_shift_arg(&as); 452*0e9cebe7SJosef Bacik if (dm_get_device(ti, logdevname, dm_table_get_mode(ti->table), &lc->logdev)) { 453*0e9cebe7SJosef Bacik ti->error = "Log device lookup failed"; 454*0e9cebe7SJosef Bacik dm_put_device(ti, lc->dev); 455*0e9cebe7SJosef Bacik goto bad; 456*0e9cebe7SJosef Bacik } 457*0e9cebe7SJosef Bacik 458*0e9cebe7SJosef Bacik lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write"); 459*0e9cebe7SJosef Bacik if (!lc->log_kthread) { 460*0e9cebe7SJosef Bacik ti->error = "Couldn't alloc kthread"; 461*0e9cebe7SJosef Bacik dm_put_device(ti, lc->dev); 462*0e9cebe7SJosef Bacik dm_put_device(ti, lc->logdev); 463*0e9cebe7SJosef Bacik goto bad; 464*0e9cebe7SJosef Bacik } 465*0e9cebe7SJosef Bacik 466*0e9cebe7SJosef Bacik /* We put the super at sector 0, start logging at sector 1 */ 467*0e9cebe7SJosef Bacik lc->next_sector = 1; 468*0e9cebe7SJosef Bacik lc->logging_enabled = true; 469*0e9cebe7SJosef Bacik lc->end_sector = logdev_last_sector(lc); 470*0e9cebe7SJosef Bacik lc->device_supports_discard = true; 471*0e9cebe7SJosef Bacik 472*0e9cebe7SJosef Bacik ti->num_flush_bios = 1; 473*0e9cebe7SJosef Bacik ti->flush_supported = true; 474*0e9cebe7SJosef Bacik ti->num_discard_bios = 1; 475*0e9cebe7SJosef Bacik ti->discards_supported = true; 476*0e9cebe7SJosef Bacik ti->per_bio_data_size = sizeof(struct per_bio_data); 477*0e9cebe7SJosef Bacik ti->private = lc; 478*0e9cebe7SJosef Bacik return 0; 479*0e9cebe7SJosef Bacik 480*0e9cebe7SJosef Bacik bad: 481*0e9cebe7SJosef Bacik kfree(lc); 482*0e9cebe7SJosef Bacik return -EINVAL; 483*0e9cebe7SJosef Bacik } 484*0e9cebe7SJosef Bacik 485*0e9cebe7SJosef Bacik static int log_mark(struct log_writes_c *lc, char *data) 486*0e9cebe7SJosef Bacik { 487*0e9cebe7SJosef Bacik struct pending_block *block; 488*0e9cebe7SJosef Bacik size_t maxsize = lc->sectorsize - sizeof(struct log_write_entry); 489*0e9cebe7SJosef Bacik 490*0e9cebe7SJosef Bacik block = kzalloc(sizeof(struct pending_block), GFP_KERNEL); 491*0e9cebe7SJosef Bacik if (!block) { 492*0e9cebe7SJosef Bacik DMERR("Error allocating pending block"); 493*0e9cebe7SJosef Bacik return -ENOMEM; 494*0e9cebe7SJosef Bacik } 495*0e9cebe7SJosef Bacik 496*0e9cebe7SJosef Bacik block->data = kstrndup(data, maxsize, GFP_KERNEL); 497*0e9cebe7SJosef Bacik if (!block->data) { 498*0e9cebe7SJosef Bacik DMERR("Error copying mark data"); 499*0e9cebe7SJosef Bacik kfree(block); 500*0e9cebe7SJosef Bacik return -ENOMEM; 501*0e9cebe7SJosef Bacik } 502*0e9cebe7SJosef Bacik atomic_inc(&lc->pending_blocks); 503*0e9cebe7SJosef Bacik block->datalen = strlen(block->data); 504*0e9cebe7SJosef Bacik block->flags |= LOG_MARK_FLAG; 505*0e9cebe7SJosef Bacik spin_lock_irq(&lc->blocks_lock); 506*0e9cebe7SJosef Bacik list_add_tail(&block->list, &lc->logging_blocks); 507*0e9cebe7SJosef Bacik spin_unlock_irq(&lc->blocks_lock); 508*0e9cebe7SJosef Bacik wake_up_process(lc->log_kthread); 509*0e9cebe7SJosef Bacik return 0; 510*0e9cebe7SJosef Bacik } 511*0e9cebe7SJosef Bacik 512*0e9cebe7SJosef Bacik static void log_writes_dtr(struct dm_target *ti) 513*0e9cebe7SJosef Bacik { 514*0e9cebe7SJosef Bacik struct log_writes_c *lc = ti->private; 515*0e9cebe7SJosef Bacik 516*0e9cebe7SJosef Bacik spin_lock_irq(&lc->blocks_lock); 517*0e9cebe7SJosef Bacik list_splice_init(&lc->unflushed_blocks, &lc->logging_blocks); 518*0e9cebe7SJosef Bacik spin_unlock_irq(&lc->blocks_lock); 519*0e9cebe7SJosef Bacik 520*0e9cebe7SJosef Bacik /* 521*0e9cebe7SJosef Bacik * This is just nice to have since it'll update the super to include the 522*0e9cebe7SJosef Bacik * unflushed blocks, if it fails we don't really care. 523*0e9cebe7SJosef Bacik */ 524*0e9cebe7SJosef Bacik log_mark(lc, "dm-log-writes-end"); 525*0e9cebe7SJosef Bacik wake_up_process(lc->log_kthread); 526*0e9cebe7SJosef Bacik wait_event(lc->wait, !atomic_read(&lc->io_blocks) && 527*0e9cebe7SJosef Bacik !atomic_read(&lc->pending_blocks)); 528*0e9cebe7SJosef Bacik kthread_stop(lc->log_kthread); 529*0e9cebe7SJosef Bacik 530*0e9cebe7SJosef Bacik WARN_ON(!list_empty(&lc->logging_blocks)); 531*0e9cebe7SJosef Bacik WARN_ON(!list_empty(&lc->unflushed_blocks)); 532*0e9cebe7SJosef Bacik dm_put_device(ti, lc->dev); 533*0e9cebe7SJosef Bacik dm_put_device(ti, lc->logdev); 534*0e9cebe7SJosef Bacik kfree(lc); 535*0e9cebe7SJosef Bacik } 536*0e9cebe7SJosef Bacik 537*0e9cebe7SJosef Bacik static void normal_map_bio(struct dm_target *ti, struct bio *bio) 538*0e9cebe7SJosef Bacik { 539*0e9cebe7SJosef Bacik struct log_writes_c *lc = ti->private; 540*0e9cebe7SJosef Bacik 541*0e9cebe7SJosef Bacik bio->bi_bdev = lc->dev->bdev; 542*0e9cebe7SJosef Bacik } 543*0e9cebe7SJosef Bacik 544*0e9cebe7SJosef Bacik static int log_writes_map(struct dm_target *ti, struct bio *bio) 545*0e9cebe7SJosef Bacik { 546*0e9cebe7SJosef Bacik struct log_writes_c *lc = ti->private; 547*0e9cebe7SJosef Bacik struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); 548*0e9cebe7SJosef Bacik struct pending_block *block; 549*0e9cebe7SJosef Bacik struct bvec_iter iter; 550*0e9cebe7SJosef Bacik struct bio_vec bv; 551*0e9cebe7SJosef Bacik size_t alloc_size; 552*0e9cebe7SJosef Bacik int i = 0; 553*0e9cebe7SJosef Bacik bool flush_bio = (bio->bi_rw & REQ_FLUSH); 554*0e9cebe7SJosef Bacik bool fua_bio = (bio->bi_rw & REQ_FUA); 555*0e9cebe7SJosef Bacik bool discard_bio = (bio->bi_rw & REQ_DISCARD); 556*0e9cebe7SJosef Bacik 557*0e9cebe7SJosef Bacik pb->block = NULL; 558*0e9cebe7SJosef Bacik 559*0e9cebe7SJosef Bacik /* Don't bother doing anything if logging has been disabled */ 560*0e9cebe7SJosef Bacik if (!lc->logging_enabled) 561*0e9cebe7SJosef Bacik goto map_bio; 562*0e9cebe7SJosef Bacik 563*0e9cebe7SJosef Bacik /* 564*0e9cebe7SJosef Bacik * Map reads as normal. 565*0e9cebe7SJosef Bacik */ 566*0e9cebe7SJosef Bacik if (bio_data_dir(bio) == READ) 567*0e9cebe7SJosef Bacik goto map_bio; 568*0e9cebe7SJosef Bacik 569*0e9cebe7SJosef Bacik /* No sectors and not a flush? Don't care */ 570*0e9cebe7SJosef Bacik if (!bio_sectors(bio) && !flush_bio) 571*0e9cebe7SJosef Bacik goto map_bio; 572*0e9cebe7SJosef Bacik 573*0e9cebe7SJosef Bacik /* 574*0e9cebe7SJosef Bacik * Discards will have bi_size set but there's no actual data, so just 575*0e9cebe7SJosef Bacik * allocate the size of the pending block. 576*0e9cebe7SJosef Bacik */ 577*0e9cebe7SJosef Bacik if (discard_bio) 578*0e9cebe7SJosef Bacik alloc_size = sizeof(struct pending_block); 579*0e9cebe7SJosef Bacik else 580*0e9cebe7SJosef Bacik alloc_size = sizeof(struct pending_block) + sizeof(struct bio_vec) * bio_segments(bio); 581*0e9cebe7SJosef Bacik 582*0e9cebe7SJosef Bacik block = kzalloc(alloc_size, GFP_NOIO); 583*0e9cebe7SJosef Bacik if (!block) { 584*0e9cebe7SJosef Bacik DMERR("Error allocating pending block"); 585*0e9cebe7SJosef Bacik spin_lock_irq(&lc->blocks_lock); 586*0e9cebe7SJosef Bacik lc->logging_enabled = false; 587*0e9cebe7SJosef Bacik spin_unlock_irq(&lc->blocks_lock); 588*0e9cebe7SJosef Bacik return -ENOMEM; 589*0e9cebe7SJosef Bacik } 590*0e9cebe7SJosef Bacik INIT_LIST_HEAD(&block->list); 591*0e9cebe7SJosef Bacik pb->block = block; 592*0e9cebe7SJosef Bacik atomic_inc(&lc->pending_blocks); 593*0e9cebe7SJosef Bacik 594*0e9cebe7SJosef Bacik if (flush_bio) 595*0e9cebe7SJosef Bacik block->flags |= LOG_FLUSH_FLAG; 596*0e9cebe7SJosef Bacik if (fua_bio) 597*0e9cebe7SJosef Bacik block->flags |= LOG_FUA_FLAG; 598*0e9cebe7SJosef Bacik if (discard_bio) 599*0e9cebe7SJosef Bacik block->flags |= LOG_DISCARD_FLAG; 600*0e9cebe7SJosef Bacik 601*0e9cebe7SJosef Bacik block->sector = bio->bi_iter.bi_sector; 602*0e9cebe7SJosef Bacik block->nr_sectors = bio_sectors(bio); 603*0e9cebe7SJosef Bacik 604*0e9cebe7SJosef Bacik /* We don't need the data, just submit */ 605*0e9cebe7SJosef Bacik if (discard_bio) { 606*0e9cebe7SJosef Bacik WARN_ON(flush_bio || fua_bio); 607*0e9cebe7SJosef Bacik if (lc->device_supports_discard) 608*0e9cebe7SJosef Bacik goto map_bio; 609*0e9cebe7SJosef Bacik bio_endio(bio, 0); 610*0e9cebe7SJosef Bacik return DM_MAPIO_SUBMITTED; 611*0e9cebe7SJosef Bacik } 612*0e9cebe7SJosef Bacik 613*0e9cebe7SJosef Bacik /* Flush bio, splice the unflushed blocks onto this list and submit */ 614*0e9cebe7SJosef Bacik if (flush_bio && !bio_sectors(bio)) { 615*0e9cebe7SJosef Bacik spin_lock_irq(&lc->blocks_lock); 616*0e9cebe7SJosef Bacik list_splice_init(&lc->unflushed_blocks, &block->list); 617*0e9cebe7SJosef Bacik spin_unlock_irq(&lc->blocks_lock); 618*0e9cebe7SJosef Bacik goto map_bio; 619*0e9cebe7SJosef Bacik } 620*0e9cebe7SJosef Bacik 621*0e9cebe7SJosef Bacik /* 622*0e9cebe7SJosef Bacik * We will write this bio somewhere else way later so we need to copy 623*0e9cebe7SJosef Bacik * the actual contents into new pages so we know the data will always be 624*0e9cebe7SJosef Bacik * there. 625*0e9cebe7SJosef Bacik * 626*0e9cebe7SJosef Bacik * We do this because this could be a bio from O_DIRECT in which case we 627*0e9cebe7SJosef Bacik * can't just hold onto the page until some later point, we have to 628*0e9cebe7SJosef Bacik * manually copy the contents. 629*0e9cebe7SJosef Bacik */ 630*0e9cebe7SJosef Bacik bio_for_each_segment(bv, bio, iter) { 631*0e9cebe7SJosef Bacik struct page *page; 632*0e9cebe7SJosef Bacik void *src, *dst; 633*0e9cebe7SJosef Bacik 634*0e9cebe7SJosef Bacik page = alloc_page(GFP_NOIO); 635*0e9cebe7SJosef Bacik if (!page) { 636*0e9cebe7SJosef Bacik DMERR("Error allocing page"); 637*0e9cebe7SJosef Bacik free_pending_block(lc, block); 638*0e9cebe7SJosef Bacik spin_lock_irq(&lc->blocks_lock); 639*0e9cebe7SJosef Bacik lc->logging_enabled = false; 640*0e9cebe7SJosef Bacik spin_unlock_irq(&lc->blocks_lock); 641*0e9cebe7SJosef Bacik return -ENOMEM; 642*0e9cebe7SJosef Bacik } 643*0e9cebe7SJosef Bacik 644*0e9cebe7SJosef Bacik src = kmap_atomic(bv.bv_page); 645*0e9cebe7SJosef Bacik dst = kmap_atomic(page); 646*0e9cebe7SJosef Bacik memcpy(dst, src + bv.bv_offset, bv.bv_len); 647*0e9cebe7SJosef Bacik kunmap_atomic(dst); 648*0e9cebe7SJosef Bacik kunmap_atomic(src); 649*0e9cebe7SJosef Bacik block->vecs[i].bv_page = page; 650*0e9cebe7SJosef Bacik block->vecs[i].bv_len = bv.bv_len; 651*0e9cebe7SJosef Bacik block->vec_cnt++; 652*0e9cebe7SJosef Bacik i++; 653*0e9cebe7SJosef Bacik } 654*0e9cebe7SJosef Bacik 655*0e9cebe7SJosef Bacik /* Had a flush with data in it, weird */ 656*0e9cebe7SJosef Bacik if (flush_bio) { 657*0e9cebe7SJosef Bacik spin_lock_irq(&lc->blocks_lock); 658*0e9cebe7SJosef Bacik list_splice_init(&lc->unflushed_blocks, &block->list); 659*0e9cebe7SJosef Bacik spin_unlock_irq(&lc->blocks_lock); 660*0e9cebe7SJosef Bacik } 661*0e9cebe7SJosef Bacik map_bio: 662*0e9cebe7SJosef Bacik normal_map_bio(ti, bio); 663*0e9cebe7SJosef Bacik return DM_MAPIO_REMAPPED; 664*0e9cebe7SJosef Bacik } 665*0e9cebe7SJosef Bacik 666*0e9cebe7SJosef Bacik static int normal_end_io(struct dm_target *ti, struct bio *bio, int error) 667*0e9cebe7SJosef Bacik { 668*0e9cebe7SJosef Bacik struct log_writes_c *lc = ti->private; 669*0e9cebe7SJosef Bacik struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); 670*0e9cebe7SJosef Bacik 671*0e9cebe7SJosef Bacik if (bio_data_dir(bio) == WRITE && pb->block) { 672*0e9cebe7SJosef Bacik struct pending_block *block = pb->block; 673*0e9cebe7SJosef Bacik unsigned long flags; 674*0e9cebe7SJosef Bacik 675*0e9cebe7SJosef Bacik spin_lock_irqsave(&lc->blocks_lock, flags); 676*0e9cebe7SJosef Bacik if (block->flags & LOG_FLUSH_FLAG) { 677*0e9cebe7SJosef Bacik list_splice_tail_init(&block->list, &lc->logging_blocks); 678*0e9cebe7SJosef Bacik list_add_tail(&block->list, &lc->logging_blocks); 679*0e9cebe7SJosef Bacik wake_up_process(lc->log_kthread); 680*0e9cebe7SJosef Bacik } else if (block->flags & LOG_FUA_FLAG) { 681*0e9cebe7SJosef Bacik list_add_tail(&block->list, &lc->logging_blocks); 682*0e9cebe7SJosef Bacik wake_up_process(lc->log_kthread); 683*0e9cebe7SJosef Bacik } else 684*0e9cebe7SJosef Bacik list_add_tail(&block->list, &lc->unflushed_blocks); 685*0e9cebe7SJosef Bacik spin_unlock_irqrestore(&lc->blocks_lock, flags); 686*0e9cebe7SJosef Bacik } 687*0e9cebe7SJosef Bacik 688*0e9cebe7SJosef Bacik return error; 689*0e9cebe7SJosef Bacik } 690*0e9cebe7SJosef Bacik 691*0e9cebe7SJosef Bacik /* 692*0e9cebe7SJosef Bacik * INFO format: <logged entries> <highest allocated sector> 693*0e9cebe7SJosef Bacik */ 694*0e9cebe7SJosef Bacik static void log_writes_status(struct dm_target *ti, status_type_t type, 695*0e9cebe7SJosef Bacik unsigned status_flags, char *result, 696*0e9cebe7SJosef Bacik unsigned maxlen) 697*0e9cebe7SJosef Bacik { 698*0e9cebe7SJosef Bacik unsigned sz = 0; 699*0e9cebe7SJosef Bacik struct log_writes_c *lc = ti->private; 700*0e9cebe7SJosef Bacik 701*0e9cebe7SJosef Bacik switch (type) { 702*0e9cebe7SJosef Bacik case STATUSTYPE_INFO: 703*0e9cebe7SJosef Bacik DMEMIT("%llu %llu", lc->logged_entries, 704*0e9cebe7SJosef Bacik (unsigned long long)lc->next_sector - 1); 705*0e9cebe7SJosef Bacik if (!lc->logging_enabled) 706*0e9cebe7SJosef Bacik DMEMIT(" logging_disabled"); 707*0e9cebe7SJosef Bacik break; 708*0e9cebe7SJosef Bacik 709*0e9cebe7SJosef Bacik case STATUSTYPE_TABLE: 710*0e9cebe7SJosef Bacik DMEMIT("%s %s", lc->dev->name, lc->logdev->name); 711*0e9cebe7SJosef Bacik break; 712*0e9cebe7SJosef Bacik } 713*0e9cebe7SJosef Bacik } 714*0e9cebe7SJosef Bacik 715*0e9cebe7SJosef Bacik static int log_writes_ioctl(struct dm_target *ti, unsigned int cmd, 716*0e9cebe7SJosef Bacik unsigned long arg) 717*0e9cebe7SJosef Bacik { 718*0e9cebe7SJosef Bacik struct log_writes_c *lc = ti->private; 719*0e9cebe7SJosef Bacik struct dm_dev *dev = lc->dev; 720*0e9cebe7SJosef Bacik int r = 0; 721*0e9cebe7SJosef Bacik 722*0e9cebe7SJosef Bacik /* 723*0e9cebe7SJosef Bacik * Only pass ioctls through if the device sizes match exactly. 724*0e9cebe7SJosef Bacik */ 725*0e9cebe7SJosef Bacik if (ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT) 726*0e9cebe7SJosef Bacik r = scsi_verify_blk_ioctl(NULL, cmd); 727*0e9cebe7SJosef Bacik 728*0e9cebe7SJosef Bacik return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg); 729*0e9cebe7SJosef Bacik } 730*0e9cebe7SJosef Bacik 731*0e9cebe7SJosef Bacik static int log_writes_merge(struct dm_target *ti, struct bvec_merge_data *bvm, 732*0e9cebe7SJosef Bacik struct bio_vec *biovec, int max_size) 733*0e9cebe7SJosef Bacik { 734*0e9cebe7SJosef Bacik struct log_writes_c *lc = ti->private; 735*0e9cebe7SJosef Bacik struct request_queue *q = bdev_get_queue(lc->dev->bdev); 736*0e9cebe7SJosef Bacik 737*0e9cebe7SJosef Bacik if (!q->merge_bvec_fn) 738*0e9cebe7SJosef Bacik return max_size; 739*0e9cebe7SJosef Bacik 740*0e9cebe7SJosef Bacik bvm->bi_bdev = lc->dev->bdev; 741*0e9cebe7SJosef Bacik bvm->bi_sector = dm_target_offset(ti, bvm->bi_sector); 742*0e9cebe7SJosef Bacik 743*0e9cebe7SJosef Bacik return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 744*0e9cebe7SJosef Bacik } 745*0e9cebe7SJosef Bacik 746*0e9cebe7SJosef Bacik static int log_writes_iterate_devices(struct dm_target *ti, 747*0e9cebe7SJosef Bacik iterate_devices_callout_fn fn, 748*0e9cebe7SJosef Bacik void *data) 749*0e9cebe7SJosef Bacik { 750*0e9cebe7SJosef Bacik struct log_writes_c *lc = ti->private; 751*0e9cebe7SJosef Bacik 752*0e9cebe7SJosef Bacik return fn(ti, lc->dev, 0, ti->len, data); 753*0e9cebe7SJosef Bacik } 754*0e9cebe7SJosef Bacik 755*0e9cebe7SJosef Bacik /* 756*0e9cebe7SJosef Bacik * Messages supported: 757*0e9cebe7SJosef Bacik * mark <mark data> - specify the marked data. 758*0e9cebe7SJosef Bacik */ 759*0e9cebe7SJosef Bacik static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv) 760*0e9cebe7SJosef Bacik { 761*0e9cebe7SJosef Bacik int r = -EINVAL; 762*0e9cebe7SJosef Bacik struct log_writes_c *lc = ti->private; 763*0e9cebe7SJosef Bacik 764*0e9cebe7SJosef Bacik if (argc != 2) { 765*0e9cebe7SJosef Bacik DMWARN("Invalid log-writes message arguments, expect 2 arguments, got %d", argc); 766*0e9cebe7SJosef Bacik return r; 767*0e9cebe7SJosef Bacik } 768*0e9cebe7SJosef Bacik 769*0e9cebe7SJosef Bacik if (!strcasecmp(argv[0], "mark")) 770*0e9cebe7SJosef Bacik r = log_mark(lc, argv[1]); 771*0e9cebe7SJosef Bacik else 772*0e9cebe7SJosef Bacik DMWARN("Unrecognised log writes target message received: %s", argv[0]); 773*0e9cebe7SJosef Bacik 774*0e9cebe7SJosef Bacik return r; 775*0e9cebe7SJosef Bacik } 776*0e9cebe7SJosef Bacik 777*0e9cebe7SJosef Bacik static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limits) 778*0e9cebe7SJosef Bacik { 779*0e9cebe7SJosef Bacik struct log_writes_c *lc = ti->private; 780*0e9cebe7SJosef Bacik struct request_queue *q = bdev_get_queue(lc->dev->bdev); 781*0e9cebe7SJosef Bacik 782*0e9cebe7SJosef Bacik if (!q || !blk_queue_discard(q)) { 783*0e9cebe7SJosef Bacik lc->device_supports_discard = false; 784*0e9cebe7SJosef Bacik limits->discard_granularity = 1 << SECTOR_SHIFT; 785*0e9cebe7SJosef Bacik limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT); 786*0e9cebe7SJosef Bacik } 787*0e9cebe7SJosef Bacik } 788*0e9cebe7SJosef Bacik 789*0e9cebe7SJosef Bacik static struct target_type log_writes_target = { 790*0e9cebe7SJosef Bacik .name = "log-writes", 791*0e9cebe7SJosef Bacik .version = {1, 0, 0}, 792*0e9cebe7SJosef Bacik .module = THIS_MODULE, 793*0e9cebe7SJosef Bacik .ctr = log_writes_ctr, 794*0e9cebe7SJosef Bacik .dtr = log_writes_dtr, 795*0e9cebe7SJosef Bacik .map = log_writes_map, 796*0e9cebe7SJosef Bacik .end_io = normal_end_io, 797*0e9cebe7SJosef Bacik .status = log_writes_status, 798*0e9cebe7SJosef Bacik .ioctl = log_writes_ioctl, 799*0e9cebe7SJosef Bacik .merge = log_writes_merge, 800*0e9cebe7SJosef Bacik .message = log_writes_message, 801*0e9cebe7SJosef Bacik .iterate_devices = log_writes_iterate_devices, 802*0e9cebe7SJosef Bacik .io_hints = log_writes_io_hints, 803*0e9cebe7SJosef Bacik }; 804*0e9cebe7SJosef Bacik 805*0e9cebe7SJosef Bacik static int __init dm_log_writes_init(void) 806*0e9cebe7SJosef Bacik { 807*0e9cebe7SJosef Bacik int r = dm_register_target(&log_writes_target); 808*0e9cebe7SJosef Bacik 809*0e9cebe7SJosef Bacik if (r < 0) 810*0e9cebe7SJosef Bacik DMERR("register failed %d", r); 811*0e9cebe7SJosef Bacik 812*0e9cebe7SJosef Bacik return r; 813*0e9cebe7SJosef Bacik } 814*0e9cebe7SJosef Bacik 815*0e9cebe7SJosef Bacik static void __exit dm_log_writes_exit(void) 816*0e9cebe7SJosef Bacik { 817*0e9cebe7SJosef Bacik dm_unregister_target(&log_writes_target); 818*0e9cebe7SJosef Bacik } 819*0e9cebe7SJosef Bacik 820*0e9cebe7SJosef Bacik module_init(dm_log_writes_init); 821*0e9cebe7SJosef Bacik module_exit(dm_log_writes_exit); 822*0e9cebe7SJosef Bacik 823*0e9cebe7SJosef Bacik MODULE_DESCRIPTION(DM_NAME " log writes target"); 824*0e9cebe7SJosef Bacik MODULE_AUTHOR("Josef Bacik <jbacik@fb.com>"); 825*0e9cebe7SJosef Bacik MODULE_LICENSE("GPL"); 826