1 /*- 2 * Copyright (c) 2009-2010 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Pawel Jakub Dawidek under sponsorship from 6 * the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> /* powerof2() */ 34 #include <sys/queue.h> 35 36 #include <assert.h> 37 #include <bitstring.h> 38 #include <errno.h> 39 #include <stdint.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 44 #include <activemap.h> 45 46 #define ACTIVEMAP_MAGIC 0xac71e4 47 struct activemap { 48 int am_magic; /* Magic value. */ 49 off_t am_mediasize; /* Media size in bytes. */ 50 uint32_t am_extentsize; /* Extent size in bytes, 51 must be power of 2. */ 52 uint8_t am_extentshift;/* 2 ^ extentbits == extentsize */ 53 int am_nextents; /* Number of extents. */ 54 size_t am_mapsize; /* Bitmap size in bytes. */ 55 uint16_t *am_memtab; /* An array that holds number of pending 56 writes per extent. */ 57 bitstr_t *am_diskmap; /* On-disk bitmap of dirty extents. */ 58 bitstr_t *am_memmap; /* In-memory bitmap of dirty extents. */ 59 size_t am_diskmapsize; /* Map size rounded up to sector size. */ 60 uint64_t am_ndirty; /* Number of dirty regions. */ 61 bitstr_t *am_syncmap; /* Bitmap of extents to sync. */ 62 off_t am_syncoff; /* Next synchronization offset. */ 63 TAILQ_HEAD(skeepdirty, keepdirty) am_keepdirty; /* List of extents that 64 we keep dirty to reduce bitmap 65 updates. */ 66 int am_nkeepdirty; /* Number of am_keepdirty elements. */ 67 int am_nkeepdirty_limit; /* Maximum number of am_keepdirty 68 elements. */ 69 }; 70 71 struct keepdirty { 72 int kd_extent; 73 TAILQ_ENTRY(keepdirty) kd_next; 74 }; 75 76 /* 77 * Helper function taken from sys/systm.h to calculate extentshift. 78 */ 79 static uint32_t 80 bitcount32(uint32_t x) 81 { 82 83 x = (x & 0x55555555) + ((x & 0xaaaaaaaa) >> 1); 84 x = (x & 0x33333333) + ((x & 0xcccccccc) >> 2); 85 x = (x + (x >> 4)) & 0x0f0f0f0f; 86 x = (x + (x >> 8)); 87 x = (x + (x >> 16)) & 0x000000ff; 88 return (x); 89 } 90 91 static __inline int 92 off2ext(const struct activemap *amp, off_t offset) 93 { 94 int extent; 95 96 assert(offset >= 0 && offset < amp->am_mediasize); 97 extent = (offset >> amp->am_extentshift); 98 assert(extent >= 0 && extent < amp->am_nextents); 99 return (extent); 100 } 101 102 static __inline off_t 103 ext2off(const struct activemap *amp, int extent) 104 { 105 off_t offset; 106 107 assert(extent >= 0 && extent < amp->am_nextents); 108 offset = ((off_t)extent << amp->am_extentshift); 109 assert(offset >= 0 && offset < amp->am_mediasize); 110 return (offset); 111 } 112 113 /* 114 * Function calculates number of requests needed to synchronize the given 115 * extent. 116 */ 117 static __inline int 118 ext2reqs(const struct activemap *amp, int ext) 119 { 120 off_t left; 121 122 if (ext < amp->am_nextents - 1) 123 return (((amp->am_extentsize - 1) / MAXPHYS) + 1); 124 125 assert(ext == amp->am_nextents - 1); 126 left = amp->am_mediasize % amp->am_extentsize; 127 if (left == 0) 128 left = amp->am_extentsize; 129 return (((left - 1) / MAXPHYS) + 1); 130 } 131 132 /* 133 * Initialize activemap structure and allocate memory for internal needs. 134 * Function returns 0 on success and -1 if any of the allocations failed. 135 */ 136 int 137 activemap_init(struct activemap **ampp, uint64_t mediasize, uint32_t extentsize, 138 uint32_t sectorsize, uint32_t keepdirty) 139 { 140 struct activemap *amp; 141 142 assert(ampp != NULL); 143 assert(mediasize > 0); 144 assert(extentsize > 0); 145 assert(powerof2(extentsize)); 146 assert(sectorsize > 0); 147 assert(powerof2(sectorsize)); 148 assert(keepdirty > 0); 149 150 amp = malloc(sizeof(*amp)); 151 if (amp == NULL) 152 return (-1); 153 154 amp->am_mediasize = mediasize; 155 amp->am_nkeepdirty_limit = keepdirty; 156 amp->am_extentsize = extentsize; 157 amp->am_extentshift = bitcount32(extentsize - 1); 158 amp->am_nextents = ((mediasize - 1) / extentsize) + 1; 159 amp->am_mapsize = sizeof(bitstr_t) * bitstr_size(amp->am_nextents); 160 amp->am_diskmapsize = roundup2(amp->am_mapsize, sectorsize); 161 amp->am_ndirty = 0; 162 amp->am_syncoff = -2; 163 TAILQ_INIT(&->am_keepdirty); 164 amp->am_nkeepdirty = 0; 165 166 amp->am_memtab = calloc(amp->am_nextents, sizeof(amp->am_memtab[0])); 167 amp->am_diskmap = calloc(1, amp->am_diskmapsize); 168 amp->am_memmap = bit_alloc(amp->am_nextents); 169 amp->am_syncmap = bit_alloc(amp->am_nextents); 170 171 /* 172 * Check to see if any of the allocations above failed. 173 */ 174 if (amp->am_memtab == NULL || amp->am_diskmap == NULL || 175 amp->am_memmap == NULL || amp->am_syncmap == NULL) { 176 if (amp->am_memtab != NULL) 177 free(amp->am_memtab); 178 if (amp->am_diskmap != NULL) 179 free(amp->am_diskmap); 180 if (amp->am_memmap != NULL) 181 free(amp->am_memmap); 182 if (amp->am_syncmap != NULL) 183 free(amp->am_syncmap); 184 amp->am_magic = 0; 185 free(amp); 186 errno = ENOMEM; 187 return (-1); 188 } 189 190 amp->am_magic = ACTIVEMAP_MAGIC; 191 *ampp = amp; 192 193 return (0); 194 } 195 196 static struct keepdirty * 197 keepdirty_find(struct activemap *amp, int extent) 198 { 199 struct keepdirty *kd; 200 201 TAILQ_FOREACH(kd, &->am_keepdirty, kd_next) { 202 if (kd->kd_extent == extent) 203 break; 204 } 205 return (kd); 206 } 207 208 static void 209 keepdirty_add(struct activemap *amp, int extent) 210 { 211 struct keepdirty *kd; 212 213 kd = keepdirty_find(amp, extent); 214 if (kd != NULL) { 215 /* 216 * Only move element at the begining. 217 */ 218 TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); 219 TAILQ_INSERT_HEAD(&->am_keepdirty, kd, kd_next); 220 return; 221 } 222 /* 223 * Add new element, but first remove the most unused one if 224 * we have too many. 225 */ 226 if (amp->am_nkeepdirty >= amp->am_nkeepdirty_limit) { 227 kd = TAILQ_LAST(&->am_keepdirty, skeepdirty); 228 assert(kd != NULL); 229 TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); 230 amp->am_nkeepdirty--; 231 assert(amp->am_nkeepdirty > 0); 232 } 233 if (kd == NULL) 234 kd = malloc(sizeof(*kd)); 235 /* We can ignore allocation failure. */ 236 if (kd != NULL) { 237 kd->kd_extent = extent; 238 amp->am_nkeepdirty++; 239 TAILQ_INSERT_HEAD(&->am_keepdirty, kd, kd_next); 240 } 241 } 242 243 static void 244 keepdirty_fill(struct activemap *amp) 245 { 246 struct keepdirty *kd; 247 248 TAILQ_FOREACH(kd, &->am_keepdirty, kd_next) 249 bit_set(amp->am_diskmap, kd->kd_extent); 250 } 251 252 static void 253 keepdirty_free(struct activemap *amp) 254 { 255 struct keepdirty *kd; 256 257 while ((kd = TAILQ_FIRST(&->am_keepdirty)) != NULL) { 258 TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); 259 amp->am_nkeepdirty--; 260 free(kd); 261 } 262 assert(amp->am_nkeepdirty == 0); 263 } 264 265 /* 266 * Function frees resources allocated by activemap_init() function. 267 */ 268 void 269 activemap_free(struct activemap *amp) 270 { 271 272 assert(amp->am_magic == ACTIVEMAP_MAGIC); 273 274 amp->am_magic = 0; 275 276 keepdirty_free(amp); 277 free(amp->am_memtab); 278 free(amp->am_diskmap); 279 free(amp->am_memmap); 280 free(amp->am_syncmap); 281 } 282 283 /* 284 * Function should be called before we handle write requests. It updates 285 * internal structures and returns true if on-disk metadata should be updated. 286 */ 287 bool 288 activemap_write_start(struct activemap *amp, off_t offset, off_t length) 289 { 290 bool modified; 291 off_t end; 292 int ext; 293 294 assert(amp->am_magic == ACTIVEMAP_MAGIC); 295 assert(length > 0); 296 297 modified = false; 298 end = offset + length - 1; 299 300 for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { 301 /* 302 * If the number of pending writes is increased from 0, 303 * we have to mark the extent as dirty also in on-disk bitmap. 304 * By returning true we inform the caller that on-disk bitmap 305 * was modified and has to be flushed to disk. 306 */ 307 if (amp->am_memtab[ext]++ == 0) { 308 assert(!bit_test(amp->am_memmap, ext)); 309 bit_set(amp->am_memmap, ext); 310 amp->am_ndirty++; 311 modified = true; 312 } 313 keepdirty_add(amp, ext); 314 } 315 316 return (modified); 317 } 318 319 /* 320 * Function should be called after receiving write confirmation. It updates 321 * internal structures and returns true if on-disk metadata should be updated. 322 */ 323 bool 324 activemap_write_complete(struct activemap *amp, off_t offset, off_t length) 325 { 326 bool modified; 327 off_t end; 328 int ext; 329 330 assert(amp->am_magic == ACTIVEMAP_MAGIC); 331 assert(length > 0); 332 333 modified = false; 334 end = offset + length - 1; 335 336 for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { 337 /* 338 * If the number of pending writes goes down to 0, we have to 339 * mark the extent as clean also in on-disk bitmap. 340 * By returning true we inform the caller that on-disk bitmap 341 * was modified and has to be flushed to disk. 342 */ 343 assert(amp->am_memtab[ext] > 0); 344 assert(bit_test(amp->am_memmap, ext)); 345 if (--amp->am_memtab[ext] == 0) { 346 bit_clear(amp->am_memmap, ext); 347 amp->am_ndirty--; 348 modified = true; 349 } 350 } 351 352 return (modified); 353 } 354 355 /* 356 * Function should be called after finishing synchronization of one extent. 357 * It returns true if on-disk metadata should be updated. 358 */ 359 bool 360 activemap_extent_complete(struct activemap *amp, int extent) 361 { 362 bool modified; 363 int reqs; 364 365 assert(amp->am_magic == ACTIVEMAP_MAGIC); 366 assert(extent >= 0 && extent < amp->am_nextents); 367 368 modified = false; 369 370 reqs = ext2reqs(amp, extent); 371 assert(amp->am_memtab[extent] >= reqs); 372 amp->am_memtab[extent] -= reqs; 373 assert(bit_test(amp->am_memmap, extent)); 374 if (amp->am_memtab[extent] == 0) { 375 bit_clear(amp->am_memmap, extent); 376 amp->am_ndirty--; 377 modified = true; 378 } 379 380 return (modified); 381 } 382 383 /* 384 * Function returns number of dirty regions. 385 */ 386 uint64_t 387 activemap_ndirty(const struct activemap *amp) 388 { 389 390 assert(amp->am_magic == ACTIVEMAP_MAGIC); 391 392 return (amp->am_ndirty); 393 } 394 395 /* 396 * Function compare on-disk bitmap and in-memory bitmap and returns true if 397 * they differ and should be flushed to the disk. 398 */ 399 bool 400 activemap_differ(const struct activemap *amp) 401 { 402 403 assert(amp->am_magic == ACTIVEMAP_MAGIC); 404 405 return (memcmp(amp->am_diskmap, amp->am_memmap, 406 amp->am_mapsize) != 0); 407 } 408 409 /* 410 * Function returns number of bytes used by bitmap. 411 */ 412 size_t 413 activemap_size(const struct activemap *amp) 414 { 415 416 assert(amp->am_magic == ACTIVEMAP_MAGIC); 417 418 return (amp->am_mapsize); 419 } 420 421 /* 422 * Function returns number of bytes needed for storing on-disk bitmap. 423 * This is the same as activemap_size(), but rounded up to sector size. 424 */ 425 size_t 426 activemap_ondisk_size(const struct activemap *amp) 427 { 428 429 assert(amp->am_magic == ACTIVEMAP_MAGIC); 430 431 return (amp->am_diskmapsize); 432 } 433 434 /* 435 * Function copies the given buffer read from disk to the internal bitmap. 436 */ 437 void 438 activemap_copyin(struct activemap *amp, const unsigned char *buf, size_t size) 439 { 440 int ext; 441 442 assert(amp->am_magic == ACTIVEMAP_MAGIC); 443 assert(size >= amp->am_mapsize); 444 445 memcpy(amp->am_diskmap, buf, amp->am_mapsize); 446 memcpy(amp->am_memmap, buf, amp->am_mapsize); 447 memcpy(amp->am_syncmap, buf, amp->am_mapsize); 448 449 bit_ffs(amp->am_memmap, amp->am_nextents, &ext); 450 if (ext == -1) { 451 /* There are no dirty extents, so we can leave now. */ 452 return; 453 } 454 /* 455 * Set synchronization offset to the first dirty extent. 456 */ 457 activemap_sync_rewind(amp); 458 /* 459 * We have dirty extents and we want them to stay that way until 460 * we synchronize, so we set number of pending writes to number 461 * of requests needed to synchronize one extent. 462 */ 463 amp->am_ndirty = 0; 464 for (; ext < amp->am_nextents; ext++) { 465 if (bit_test(amp->am_memmap, ext)) { 466 amp->am_memtab[ext] = ext2reqs(amp, ext); 467 amp->am_ndirty++; 468 } 469 } 470 } 471 472 /* 473 * Function merges the given bitmap with existing one. 474 */ 475 void 476 activemap_merge(struct activemap *amp, const unsigned char *buf, size_t size) 477 { 478 bitstr_t *remmap = __DECONST(bitstr_t *, buf); 479 int ext; 480 481 assert(amp->am_magic == ACTIVEMAP_MAGIC); 482 assert(size >= amp->am_mapsize); 483 484 bit_ffs(remmap, amp->am_nextents, &ext); 485 if (ext == -1) { 486 /* There are no dirty extents, so we can leave now. */ 487 return; 488 } 489 /* 490 * We have dirty extents and we want them to stay that way until 491 * we synchronize, so we set number of pending writes to number 492 * of requests needed to synchronize one extent. 493 */ 494 for (; ext < amp->am_nextents; ext++) { 495 /* Local extent already dirty. */ 496 if (bit_test(amp->am_syncmap, ext)) 497 continue; 498 /* Remote extent isn't dirty. */ 499 if (!bit_test(remmap, ext)) 500 continue; 501 bit_set(amp->am_syncmap, ext); 502 bit_set(amp->am_memmap, ext); 503 bit_set(amp->am_diskmap, ext); 504 if (amp->am_memtab[ext] == 0) 505 amp->am_ndirty++; 506 amp->am_memtab[ext] = ext2reqs(amp, ext); 507 } 508 /* 509 * Set synchronization offset to the first dirty extent. 510 */ 511 activemap_sync_rewind(amp); 512 } 513 514 /* 515 * Function returns pointer to internal bitmap that should be written to disk. 516 */ 517 const unsigned char * 518 activemap_bitmap(struct activemap *amp, size_t *sizep) 519 { 520 521 assert(amp->am_magic == ACTIVEMAP_MAGIC); 522 523 if (sizep != NULL) 524 *sizep = amp->am_diskmapsize; 525 memcpy(amp->am_diskmap, amp->am_memmap, amp->am_mapsize); 526 keepdirty_fill(amp); 527 return ((const unsigned char *)amp->am_diskmap); 528 } 529 530 /* 531 * Function calculates size needed to store bitmap on disk. 532 */ 533 size_t 534 activemap_calc_ondisk_size(uint64_t mediasize, uint32_t extentsize, 535 uint32_t sectorsize) 536 { 537 uint64_t nextents, mapsize; 538 539 assert(mediasize > 0); 540 assert(extentsize > 0); 541 assert(powerof2(extentsize)); 542 assert(sectorsize > 0); 543 assert(powerof2(sectorsize)); 544 545 nextents = ((mediasize - 1) / extentsize) + 1; 546 mapsize = sizeof(bitstr_t) * bitstr_size(nextents); 547 return (roundup2(mapsize, sectorsize)); 548 } 549 550 /* 551 * Set synchronization offset to the first dirty extent. 552 */ 553 void 554 activemap_sync_rewind(struct activemap *amp) 555 { 556 int ext; 557 558 assert(amp->am_magic == ACTIVEMAP_MAGIC); 559 560 bit_ffs(amp->am_syncmap, amp->am_nextents, &ext); 561 if (ext == -1) { 562 /* There are no extents to synchronize. */ 563 amp->am_syncoff = -2; 564 return; 565 } 566 /* 567 * Mark that we want to start synchronization from the begining. 568 */ 569 amp->am_syncoff = -1; 570 } 571 572 /* 573 * Return next offset of where we should synchronize. 574 */ 575 off_t 576 activemap_sync_offset(struct activemap *amp, off_t *lengthp, int *syncextp) 577 { 578 off_t syncoff, left; 579 int ext; 580 581 assert(amp->am_magic == ACTIVEMAP_MAGIC); 582 assert(lengthp != NULL); 583 assert(syncextp != NULL); 584 585 *syncextp = -1; 586 587 if (amp->am_syncoff == -2) 588 return (-1); 589 590 if (amp->am_syncoff >= 0 && 591 (amp->am_syncoff + MAXPHYS >= amp->am_mediasize || 592 off2ext(amp, amp->am_syncoff) != 593 off2ext(amp, amp->am_syncoff + MAXPHYS))) { 594 /* 595 * We are about to change extent, so mark previous one as clean. 596 */ 597 ext = off2ext(amp, amp->am_syncoff); 598 bit_clear(amp->am_syncmap, ext); 599 *syncextp = ext; 600 amp->am_syncoff = -1; 601 } 602 603 if (amp->am_syncoff == -1) { 604 /* 605 * Let's find first extent to synchronize. 606 */ 607 bit_ffs(amp->am_syncmap, amp->am_nextents, &ext); 608 if (ext == -1) { 609 amp->am_syncoff = -2; 610 return (-1); 611 } 612 amp->am_syncoff = ext2off(amp, ext); 613 } else { 614 /* 615 * We don't change extent, so just increase offset. 616 */ 617 amp->am_syncoff += MAXPHYS; 618 if (amp->am_syncoff >= amp->am_mediasize) { 619 amp->am_syncoff = -2; 620 return (-1); 621 } 622 } 623 624 syncoff = amp->am_syncoff; 625 left = ext2off(amp, off2ext(amp, syncoff)) + 626 amp->am_extentsize - syncoff; 627 if (syncoff + left > amp->am_mediasize) 628 left = amp->am_mediasize - syncoff; 629 if (left > MAXPHYS) 630 left = MAXPHYS; 631 632 assert(left >= 0 && left <= MAXPHYS); 633 assert(syncoff >= 0 && syncoff < amp->am_mediasize); 634 assert(syncoff + left >= 0 && syncoff + left <= amp->am_mediasize); 635 636 *lengthp = left; 637 return (syncoff); 638 } 639 640 /* 641 * Mark extent(s) containing the given region for synchronization. 642 * Most likely one of the components is unavailable. 643 */ 644 bool 645 activemap_need_sync(struct activemap *amp, off_t offset, off_t length) 646 { 647 bool modified; 648 off_t end; 649 int ext; 650 651 assert(amp->am_magic == ACTIVEMAP_MAGIC); 652 653 modified = false; 654 end = offset + length - 1; 655 656 for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { 657 if (bit_test(amp->am_syncmap, ext)) { 658 /* Already marked for synchronization. */ 659 assert(bit_test(amp->am_memmap, ext)); 660 continue; 661 } 662 bit_set(amp->am_syncmap, ext); 663 if (!bit_test(amp->am_memmap, ext)) { 664 bit_set(amp->am_memmap, ext); 665 amp->am_ndirty++; 666 } 667 amp->am_memtab[ext] += ext2reqs(amp, ext); 668 modified = true; 669 } 670 671 return (modified); 672 } 673 674 void 675 activemap_dump(const struct activemap *amp) 676 { 677 int bit; 678 679 printf("M: "); 680 for (bit = 0; bit < amp->am_nextents; bit++) 681 printf("%d", bit_test(amp->am_memmap, bit) ? 1 : 0); 682 printf("\n"); 683 printf("D: "); 684 for (bit = 0; bit < amp->am_nextents; bit++) 685 printf("%d", bit_test(amp->am_diskmap, bit) ? 1 : 0); 686 printf("\n"); 687 printf("S: "); 688 for (bit = 0; bit < amp->am_nextents; bit++) 689 printf("%d", bit_test(amp->am_syncmap, bit) ? 1 : 0); 690 printf("\n"); 691 } 692