1 /* 2 * Copyright (C) 2003 Sistina Software 3 * 4 * This file is released under the LGPL. 5 */ 6 7 #include <linux/init.h> 8 #include <linux/slab.h> 9 #include <linux/module.h> 10 #include <linux/vmalloc.h> 11 12 #include "dm-log.h" 13 #include "dm-io.h" 14 15 static LIST_HEAD(_log_types); 16 static DEFINE_SPINLOCK(_lock); 17 18 int dm_register_dirty_log_type(struct dirty_log_type *type) 19 { 20 spin_lock(&_lock); 21 type->use_count = 0; 22 list_add(&type->list, &_log_types); 23 spin_unlock(&_lock); 24 25 return 0; 26 } 27 28 int dm_unregister_dirty_log_type(struct dirty_log_type *type) 29 { 30 spin_lock(&_lock); 31 32 if (type->use_count) 33 DMWARN("Attempt to unregister a log type that is still in use"); 34 else 35 list_del(&type->list); 36 37 spin_unlock(&_lock); 38 39 return 0; 40 } 41 42 static struct dirty_log_type *get_type(const char *type_name) 43 { 44 struct dirty_log_type *type; 45 46 spin_lock(&_lock); 47 list_for_each_entry (type, &_log_types, list) 48 if (!strcmp(type_name, type->name)) { 49 if (!type->use_count && !try_module_get(type->module)){ 50 spin_unlock(&_lock); 51 return NULL; 52 } 53 type->use_count++; 54 spin_unlock(&_lock); 55 return type; 56 } 57 58 spin_unlock(&_lock); 59 return NULL; 60 } 61 62 static void put_type(struct dirty_log_type *type) 63 { 64 spin_lock(&_lock); 65 if (!--type->use_count) 66 module_put(type->module); 67 spin_unlock(&_lock); 68 } 69 70 struct dirty_log *dm_create_dirty_log(const char *type_name, struct dm_target *ti, 71 unsigned int argc, char **argv) 72 { 73 struct dirty_log_type *type; 74 struct dirty_log *log; 75 76 log = kmalloc(sizeof(*log), GFP_KERNEL); 77 if (!log) 78 return NULL; 79 80 type = get_type(type_name); 81 if (!type) { 82 kfree(log); 83 return NULL; 84 } 85 86 log->type = type; 87 if (type->ctr(log, ti, argc, argv)) { 88 kfree(log); 89 put_type(type); 90 return NULL; 91 } 92 93 return log; 94 } 95 96 void dm_destroy_dirty_log(struct dirty_log *log) 97 { 98 log->type->dtr(log); 99 put_type(log->type); 100 kfree(log); 101 } 102 103 /*----------------------------------------------------------------- 104 * Persistent and core logs share a lot of their implementation. 105 * FIXME: need a reload method to be called from a resume 106 *---------------------------------------------------------------*/ 107 /* 108 * Magic for persistent mirrors: "MiRr" 109 */ 110 #define MIRROR_MAGIC 0x4D695272 111 112 /* 113 * The on-disk version of the metadata. 114 */ 115 #define MIRROR_DISK_VERSION 2 116 #define LOG_OFFSET 2 117 118 struct log_header { 119 uint32_t magic; 120 121 /* 122 * Simple, incrementing version. no backward 123 * compatibility. 124 */ 125 uint32_t version; 126 sector_t nr_regions; 127 }; 128 129 struct log_c { 130 struct dm_target *ti; 131 int touched; 132 uint32_t region_size; 133 unsigned int region_count; 134 region_t sync_count; 135 136 unsigned bitset_uint32_count; 137 uint32_t *clean_bits; 138 uint32_t *sync_bits; 139 uint32_t *recovering_bits; /* FIXME: this seems excessive */ 140 141 int sync_search; 142 143 /* Resync flag */ 144 enum sync { 145 DEFAULTSYNC, /* Synchronize if necessary */ 146 NOSYNC, /* Devices known to be already in sync */ 147 FORCESYNC, /* Force a sync to happen */ 148 } sync; 149 150 /* 151 * Disk log fields 152 */ 153 struct dm_dev *log_dev; 154 struct log_header header; 155 156 struct io_region header_location; 157 struct log_header *disk_header; 158 159 struct io_region bits_location; 160 }; 161 162 /* 163 * The touched member needs to be updated every time we access 164 * one of the bitsets. 165 */ 166 static inline int log_test_bit(uint32_t *bs, unsigned bit) 167 { 168 return ext2_test_bit(bit, (unsigned long *) bs) ? 1 : 0; 169 } 170 171 static inline void log_set_bit(struct log_c *l, 172 uint32_t *bs, unsigned bit) 173 { 174 ext2_set_bit(bit, (unsigned long *) bs); 175 l->touched = 1; 176 } 177 178 static inline void log_clear_bit(struct log_c *l, 179 uint32_t *bs, unsigned bit) 180 { 181 ext2_clear_bit(bit, (unsigned long *) bs); 182 l->touched = 1; 183 } 184 185 /*---------------------------------------------------------------- 186 * Header IO 187 *--------------------------------------------------------------*/ 188 static void header_to_disk(struct log_header *core, struct log_header *disk) 189 { 190 disk->magic = cpu_to_le32(core->magic); 191 disk->version = cpu_to_le32(core->version); 192 disk->nr_regions = cpu_to_le64(core->nr_regions); 193 } 194 195 static void header_from_disk(struct log_header *core, struct log_header *disk) 196 { 197 core->magic = le32_to_cpu(disk->magic); 198 core->version = le32_to_cpu(disk->version); 199 core->nr_regions = le64_to_cpu(disk->nr_regions); 200 } 201 202 static int read_header(struct log_c *log) 203 { 204 int r; 205 unsigned long ebits; 206 207 r = dm_io_sync_vm(1, &log->header_location, READ, 208 log->disk_header, &ebits); 209 if (r) 210 return r; 211 212 header_from_disk(&log->header, log->disk_header); 213 214 /* New log required? */ 215 if (log->sync != DEFAULTSYNC || log->header.magic != MIRROR_MAGIC) { 216 log->header.magic = MIRROR_MAGIC; 217 log->header.version = MIRROR_DISK_VERSION; 218 log->header.nr_regions = 0; 219 } 220 221 #ifdef __LITTLE_ENDIAN 222 if (log->header.version == 1) 223 log->header.version = 2; 224 #endif 225 226 if (log->header.version != MIRROR_DISK_VERSION) { 227 DMWARN("incompatible disk log version"); 228 return -EINVAL; 229 } 230 231 return 0; 232 } 233 234 static inline int write_header(struct log_c *log) 235 { 236 unsigned long ebits; 237 238 header_to_disk(&log->header, log->disk_header); 239 return dm_io_sync_vm(1, &log->header_location, WRITE, 240 log->disk_header, &ebits); 241 } 242 243 /*---------------------------------------------------------------- 244 * Bits IO 245 *--------------------------------------------------------------*/ 246 static int read_bits(struct log_c *log) 247 { 248 int r; 249 unsigned long ebits; 250 251 r = dm_io_sync_vm(1, &log->bits_location, READ, 252 log->clean_bits, &ebits); 253 if (r) 254 return r; 255 256 return 0; 257 } 258 259 static int write_bits(struct log_c *log) 260 { 261 unsigned long ebits; 262 return dm_io_sync_vm(1, &log->bits_location, WRITE, 263 log->clean_bits, &ebits); 264 } 265 266 /*---------------------------------------------------------------- 267 * core log constructor/destructor 268 * 269 * argv contains region_size followed optionally by [no]sync 270 *--------------------------------------------------------------*/ 271 #define BYTE_SHIFT 3 272 static int core_ctr(struct dirty_log *log, struct dm_target *ti, 273 unsigned int argc, char **argv) 274 { 275 enum sync sync = DEFAULTSYNC; 276 277 struct log_c *lc; 278 uint32_t region_size; 279 unsigned int region_count; 280 size_t bitset_size; 281 282 if (argc < 1 || argc > 2) { 283 DMWARN("wrong number of arguments to mirror log"); 284 return -EINVAL; 285 } 286 287 if (argc > 1) { 288 if (!strcmp(argv[1], "sync")) 289 sync = FORCESYNC; 290 else if (!strcmp(argv[1], "nosync")) 291 sync = NOSYNC; 292 else { 293 DMWARN("unrecognised sync argument to mirror log: %s", 294 argv[1]); 295 return -EINVAL; 296 } 297 } 298 299 if (sscanf(argv[0], "%u", ®ion_size) != 1) { 300 DMWARN("invalid region size string"); 301 return -EINVAL; 302 } 303 304 region_count = dm_sector_div_up(ti->len, region_size); 305 306 lc = kmalloc(sizeof(*lc), GFP_KERNEL); 307 if (!lc) { 308 DMWARN("couldn't allocate core log"); 309 return -ENOMEM; 310 } 311 312 lc->ti = ti; 313 lc->touched = 0; 314 lc->region_size = region_size; 315 lc->region_count = region_count; 316 lc->sync = sync; 317 318 /* 319 * Work out how many "unsigned long"s we need to hold the bitset. 320 */ 321 bitset_size = dm_round_up(region_count, 322 sizeof(unsigned long) << BYTE_SHIFT); 323 bitset_size >>= BYTE_SHIFT; 324 325 lc->bitset_uint32_count = bitset_size / 4; 326 lc->clean_bits = vmalloc(bitset_size); 327 if (!lc->clean_bits) { 328 DMWARN("couldn't allocate clean bitset"); 329 kfree(lc); 330 return -ENOMEM; 331 } 332 memset(lc->clean_bits, -1, bitset_size); 333 334 lc->sync_bits = vmalloc(bitset_size); 335 if (!lc->sync_bits) { 336 DMWARN("couldn't allocate sync bitset"); 337 vfree(lc->clean_bits); 338 kfree(lc); 339 return -ENOMEM; 340 } 341 memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size); 342 lc->sync_count = (sync == NOSYNC) ? region_count : 0; 343 344 lc->recovering_bits = vmalloc(bitset_size); 345 if (!lc->recovering_bits) { 346 DMWARN("couldn't allocate sync bitset"); 347 vfree(lc->sync_bits); 348 vfree(lc->clean_bits); 349 kfree(lc); 350 return -ENOMEM; 351 } 352 memset(lc->recovering_bits, 0, bitset_size); 353 lc->sync_search = 0; 354 log->context = lc; 355 return 0; 356 } 357 358 static void core_dtr(struct dirty_log *log) 359 { 360 struct log_c *lc = (struct log_c *) log->context; 361 vfree(lc->clean_bits); 362 vfree(lc->sync_bits); 363 vfree(lc->recovering_bits); 364 kfree(lc); 365 } 366 367 /*---------------------------------------------------------------- 368 * disk log constructor/destructor 369 * 370 * argv contains log_device region_size followed optionally by [no]sync 371 *--------------------------------------------------------------*/ 372 static int disk_ctr(struct dirty_log *log, struct dm_target *ti, 373 unsigned int argc, char **argv) 374 { 375 int r; 376 size_t size; 377 struct log_c *lc; 378 struct dm_dev *dev; 379 380 if (argc < 2 || argc > 3) { 381 DMWARN("wrong number of arguments to disk mirror log"); 382 return -EINVAL; 383 } 384 385 r = dm_get_device(ti, argv[0], 0, 0 /* FIXME */, 386 FMODE_READ | FMODE_WRITE, &dev); 387 if (r) 388 return r; 389 390 r = core_ctr(log, ti, argc - 1, argv + 1); 391 if (r) { 392 dm_put_device(ti, dev); 393 return r; 394 } 395 396 lc = (struct log_c *) log->context; 397 lc->log_dev = dev; 398 399 /* setup the disk header fields */ 400 lc->header_location.bdev = lc->log_dev->bdev; 401 lc->header_location.sector = 0; 402 lc->header_location.count = 1; 403 404 /* 405 * We can't read less than this amount, even though we'll 406 * not be using most of this space. 407 */ 408 lc->disk_header = vmalloc(1 << SECTOR_SHIFT); 409 if (!lc->disk_header) 410 goto bad; 411 412 /* setup the disk bitset fields */ 413 lc->bits_location.bdev = lc->log_dev->bdev; 414 lc->bits_location.sector = LOG_OFFSET; 415 416 size = dm_round_up(lc->bitset_uint32_count * sizeof(uint32_t), 417 1 << SECTOR_SHIFT); 418 lc->bits_location.count = size >> SECTOR_SHIFT; 419 return 0; 420 421 bad: 422 dm_put_device(ti, lc->log_dev); 423 core_dtr(log); 424 return -ENOMEM; 425 } 426 427 static void disk_dtr(struct dirty_log *log) 428 { 429 struct log_c *lc = (struct log_c *) log->context; 430 dm_put_device(lc->ti, lc->log_dev); 431 vfree(lc->disk_header); 432 core_dtr(log); 433 } 434 435 static int count_bits32(uint32_t *addr, unsigned size) 436 { 437 int count = 0, i; 438 439 for (i = 0; i < size; i++) { 440 count += hweight32(*(addr+i)); 441 } 442 return count; 443 } 444 445 static int disk_resume(struct dirty_log *log) 446 { 447 int r; 448 unsigned i; 449 struct log_c *lc = (struct log_c *) log->context; 450 size_t size = lc->bitset_uint32_count * sizeof(uint32_t); 451 452 /* read the disk header */ 453 r = read_header(lc); 454 if (r) 455 return r; 456 457 /* read the bits */ 458 r = read_bits(lc); 459 if (r) 460 return r; 461 462 /* set or clear any new bits */ 463 if (lc->sync == NOSYNC) 464 for (i = lc->header.nr_regions; i < lc->region_count; i++) 465 /* FIXME: amazingly inefficient */ 466 log_set_bit(lc, lc->clean_bits, i); 467 else 468 for (i = lc->header.nr_regions; i < lc->region_count; i++) 469 /* FIXME: amazingly inefficient */ 470 log_clear_bit(lc, lc->clean_bits, i); 471 472 /* copy clean across to sync */ 473 memcpy(lc->sync_bits, lc->clean_bits, size); 474 lc->sync_count = count_bits32(lc->clean_bits, lc->bitset_uint32_count); 475 476 /* write the bits */ 477 r = write_bits(lc); 478 if (r) 479 return r; 480 481 /* set the correct number of regions in the header */ 482 lc->header.nr_regions = lc->region_count; 483 484 /* write the new header */ 485 return write_header(lc); 486 } 487 488 static uint32_t core_get_region_size(struct dirty_log *log) 489 { 490 struct log_c *lc = (struct log_c *) log->context; 491 return lc->region_size; 492 } 493 494 static int core_is_clean(struct dirty_log *log, region_t region) 495 { 496 struct log_c *lc = (struct log_c *) log->context; 497 return log_test_bit(lc->clean_bits, region); 498 } 499 500 static int core_in_sync(struct dirty_log *log, region_t region, int block) 501 { 502 struct log_c *lc = (struct log_c *) log->context; 503 return log_test_bit(lc->sync_bits, region); 504 } 505 506 static int core_flush(struct dirty_log *log) 507 { 508 /* no op */ 509 return 0; 510 } 511 512 static int disk_flush(struct dirty_log *log) 513 { 514 int r; 515 struct log_c *lc = (struct log_c *) log->context; 516 517 /* only write if the log has changed */ 518 if (!lc->touched) 519 return 0; 520 521 r = write_bits(lc); 522 if (!r) 523 lc->touched = 0; 524 525 return r; 526 } 527 528 static void core_mark_region(struct dirty_log *log, region_t region) 529 { 530 struct log_c *lc = (struct log_c *) log->context; 531 log_clear_bit(lc, lc->clean_bits, region); 532 } 533 534 static void core_clear_region(struct dirty_log *log, region_t region) 535 { 536 struct log_c *lc = (struct log_c *) log->context; 537 log_set_bit(lc, lc->clean_bits, region); 538 } 539 540 static int core_get_resync_work(struct dirty_log *log, region_t *region) 541 { 542 struct log_c *lc = (struct log_c *) log->context; 543 544 if (lc->sync_search >= lc->region_count) 545 return 0; 546 547 do { 548 *region = ext2_find_next_zero_bit( 549 (unsigned long *) lc->sync_bits, 550 lc->region_count, 551 lc->sync_search); 552 lc->sync_search = *region + 1; 553 554 if (*region >= lc->region_count) 555 return 0; 556 557 } while (log_test_bit(lc->recovering_bits, *region)); 558 559 log_set_bit(lc, lc->recovering_bits, *region); 560 return 1; 561 } 562 563 static void core_complete_resync_work(struct dirty_log *log, region_t region, 564 int success) 565 { 566 struct log_c *lc = (struct log_c *) log->context; 567 568 log_clear_bit(lc, lc->recovering_bits, region); 569 if (success) { 570 log_set_bit(lc, lc->sync_bits, region); 571 lc->sync_count++; 572 } 573 } 574 575 static region_t core_get_sync_count(struct dirty_log *log) 576 { 577 struct log_c *lc = (struct log_c *) log->context; 578 579 return lc->sync_count; 580 } 581 582 #define DMEMIT_SYNC \ 583 if (lc->sync != DEFAULTSYNC) \ 584 DMEMIT("%ssync ", lc->sync == NOSYNC ? "no" : "") 585 586 static int core_status(struct dirty_log *log, status_type_t status, 587 char *result, unsigned int maxlen) 588 { 589 int sz = 0; 590 struct log_c *lc = log->context; 591 592 switch(status) { 593 case STATUSTYPE_INFO: 594 break; 595 596 case STATUSTYPE_TABLE: 597 DMEMIT("%s %u %u ", log->type->name, 598 lc->sync == DEFAULTSYNC ? 1 : 2, lc->region_size); 599 DMEMIT_SYNC; 600 } 601 602 return sz; 603 } 604 605 static int disk_status(struct dirty_log *log, status_type_t status, 606 char *result, unsigned int maxlen) 607 { 608 int sz = 0; 609 char buffer[16]; 610 struct log_c *lc = log->context; 611 612 switch(status) { 613 case STATUSTYPE_INFO: 614 break; 615 616 case STATUSTYPE_TABLE: 617 format_dev_t(buffer, lc->log_dev->bdev->bd_dev); 618 DMEMIT("%s %u %s %u ", log->type->name, 619 lc->sync == DEFAULTSYNC ? 2 : 3, buffer, 620 lc->region_size); 621 DMEMIT_SYNC; 622 } 623 624 return sz; 625 } 626 627 static struct dirty_log_type _core_type = { 628 .name = "core", 629 .module = THIS_MODULE, 630 .ctr = core_ctr, 631 .dtr = core_dtr, 632 .get_region_size = core_get_region_size, 633 .is_clean = core_is_clean, 634 .in_sync = core_in_sync, 635 .flush = core_flush, 636 .mark_region = core_mark_region, 637 .clear_region = core_clear_region, 638 .get_resync_work = core_get_resync_work, 639 .complete_resync_work = core_complete_resync_work, 640 .get_sync_count = core_get_sync_count, 641 .status = core_status, 642 }; 643 644 static struct dirty_log_type _disk_type = { 645 .name = "disk", 646 .module = THIS_MODULE, 647 .ctr = disk_ctr, 648 .dtr = disk_dtr, 649 .suspend = disk_flush, 650 .resume = disk_resume, 651 .get_region_size = core_get_region_size, 652 .is_clean = core_is_clean, 653 .in_sync = core_in_sync, 654 .flush = disk_flush, 655 .mark_region = core_mark_region, 656 .clear_region = core_clear_region, 657 .get_resync_work = core_get_resync_work, 658 .complete_resync_work = core_complete_resync_work, 659 .get_sync_count = core_get_sync_count, 660 .status = disk_status, 661 }; 662 663 int __init dm_dirty_log_init(void) 664 { 665 int r; 666 667 r = dm_register_dirty_log_type(&_core_type); 668 if (r) 669 DMWARN("couldn't register core log"); 670 671 r = dm_register_dirty_log_type(&_disk_type); 672 if (r) { 673 DMWARN("couldn't register disk type"); 674 dm_unregister_dirty_log_type(&_core_type); 675 } 676 677 return r; 678 } 679 680 void dm_dirty_log_exit(void) 681 { 682 dm_unregister_dirty_log_type(&_disk_type); 683 dm_unregister_dirty_log_type(&_core_type); 684 } 685 686 EXPORT_SYMBOL(dm_register_dirty_log_type); 687 EXPORT_SYMBOL(dm_unregister_dirty_log_type); 688 EXPORT_SYMBOL(dm_create_dirty_log); 689 EXPORT_SYMBOL(dm_destroy_dirty_log); 690