1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright 2023 Red Hat 4 */ 5 6 #include "encodings.h" 7 8 #include <linux/log2.h> 9 10 #include "logger.h" 11 #include "memory-alloc.h" 12 #include "permassert.h" 13 14 #include "constants.h" 15 #include "status-codes.h" 16 #include "types.h" 17 18 /** The maximum logical space is 4 petabytes, which is 1 terablock. */ 19 static const block_count_t MAXIMUM_VDO_LOGICAL_BLOCKS = 1024ULL * 1024 * 1024 * 1024; 20 21 /** The maximum physical space is 256 terabytes, which is 64 gigablocks. */ 22 static const block_count_t MAXIMUM_VDO_PHYSICAL_BLOCKS = 1024ULL * 1024 * 1024 * 64; 23 24 struct geometry_block { 25 char magic_number[VDO_GEOMETRY_MAGIC_NUMBER_SIZE]; 26 struct packed_header header; 27 u32 checksum; 28 } __packed; 29 30 static const struct header GEOMETRY_BLOCK_HEADER_5_0 = { 31 .id = VDO_GEOMETRY_BLOCK, 32 .version = { 33 .major_version = 5, 34 .minor_version = 0, 35 }, 36 /* 37 * Note: this size isn't just the payload size following the header, like it is everywhere 38 * else in VDO. 39 */ 40 .size = sizeof(struct geometry_block) + sizeof(struct volume_geometry), 41 }; 42 43 static const struct header GEOMETRY_BLOCK_HEADER_4_0 = { 44 .id = VDO_GEOMETRY_BLOCK, 45 .version = { 46 .major_version = 4, 47 .minor_version = 0, 48 }, 49 /* 50 * Note: this size isn't just the payload size following the header, like it is everywhere 51 * else in VDO. 52 */ 53 .size = sizeof(struct geometry_block) + sizeof(struct volume_geometry_4_0), 54 }; 55 56 const u8 VDO_GEOMETRY_MAGIC_NUMBER[VDO_GEOMETRY_MAGIC_NUMBER_SIZE + 1] = "dmvdo001"; 57 58 #define PAGE_HEADER_4_1_SIZE (8 + 8 + 8 + 1 + 1 + 1 + 1) 59 60 static const struct version_number BLOCK_MAP_4_1 = { 61 .major_version = 4, 62 .minor_version = 1, 63 }; 64 65 const struct header VDO_BLOCK_MAP_HEADER_2_0 = { 66 .id = VDO_BLOCK_MAP, 67 .version = { 68 .major_version = 2, 69 .minor_version = 0, 70 }, 71 .size = sizeof(struct block_map_state_2_0), 72 }; 73 74 const struct header VDO_RECOVERY_JOURNAL_HEADER_7_0 = { 75 .id = VDO_RECOVERY_JOURNAL, 76 .version = { 77 .major_version = 7, 78 .minor_version = 0, 79 }, 80 .size = sizeof(struct recovery_journal_state_7_0), 81 }; 82 83 const struct header VDO_SLAB_DEPOT_HEADER_2_0 = { 84 .id = VDO_SLAB_DEPOT, 85 .version = { 86 .major_version = 2, 87 .minor_version = 0, 88 }, 89 .size = sizeof(struct slab_depot_state_2_0), 90 }; 91 92 static const struct header VDO_LAYOUT_HEADER_3_0 = { 93 .id = VDO_LAYOUT, 94 .version = { 95 .major_version = 3, 96 .minor_version = 0, 97 }, 98 .size = sizeof(struct layout_3_0) + (sizeof(struct partition_3_0) * VDO_PARTITION_COUNT), 99 }; 100 101 static const enum partition_id REQUIRED_PARTITIONS[] = { 102 VDO_BLOCK_MAP_PARTITION, 103 VDO_SLAB_DEPOT_PARTITION, 104 VDO_RECOVERY_JOURNAL_PARTITION, 105 VDO_SLAB_SUMMARY_PARTITION, 106 }; 107 108 /* 109 * The current version for the data encoded in the super block. This must be changed any time there 110 * is a change to encoding of the component data of any VDO component. 111 */ 112 static const struct version_number VDO_COMPONENT_DATA_41_0 = { 113 .major_version = 41, 114 .minor_version = 0, 115 }; 116 117 const struct version_number VDO_VOLUME_VERSION_67_0 = { 118 .major_version = 67, 119 .minor_version = 0, 120 }; 121 122 static const struct header SUPER_BLOCK_HEADER_12_0 = { 123 .id = VDO_SUPER_BLOCK, 124 .version = { 125 .major_version = 12, 126 .minor_version = 0, 127 }, 128 129 /* This is the minimum size, if the super block contains no components. */ 130 .size = VDO_SUPER_BLOCK_FIXED_SIZE - VDO_ENCODED_HEADER_SIZE, 131 }; 132 133 /** 134 * validate_version() - Check whether a version matches an expected version. 135 * @expected_version: The expected version. 136 * @actual_version: The version being validated. 137 * @component_name: The name of the component or the calling function (for error logging). 138 * 139 * Logs an error describing a mismatch. 140 * 141 * Return: VDO_SUCCESS if the versions are the same, 142 * VDO_UNSUPPORTED_VERSION if the versions don't match. 143 */ 144 static int __must_check validate_version(struct version_number expected_version, 145 struct version_number actual_version, 146 const char *component_name) 147 { 148 if (!vdo_are_same_version(expected_version, actual_version)) { 149 return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION, 150 "%s version mismatch, expected %d.%d, got %d.%d", 151 component_name, 152 expected_version.major_version, 153 expected_version.minor_version, 154 actual_version.major_version, 155 actual_version.minor_version); 156 } 157 158 return VDO_SUCCESS; 159 } 160 161 /** 162 * vdo_validate_header() - Check whether a header matches expectations. 163 * @expected_header: The expected header. 164 * @actual_header: The header being validated. 165 * @exact_size: If true, the size fields of the two headers must be the same, otherwise it is 166 * required that actual_header.size >= expected_header.size. 167 * @name: The name of the component or the calling function (for error logging). 168 * 169 * Logs an error describing the first mismatch found. 170 * 171 * Return: VDO_SUCCESS if the header meets expectations, 172 * VDO_INCORRECT_COMPONENT if the component ids don't match, 173 * VDO_UNSUPPORTED_VERSION if the versions or sizes don't match. 174 */ 175 int vdo_validate_header(const struct header *expected_header, 176 const struct header *actual_header, bool exact_size, 177 const char *name) 178 { 179 int result; 180 181 if (expected_header->id != actual_header->id) { 182 return vdo_log_error_strerror(VDO_INCORRECT_COMPONENT, 183 "%s ID mismatch, expected %d, got %d", 184 name, expected_header->id, 185 actual_header->id); 186 } 187 188 result = validate_version(expected_header->version, actual_header->version, 189 name); 190 if (result != VDO_SUCCESS) 191 return result; 192 193 if ((expected_header->size > actual_header->size) || 194 (exact_size && (expected_header->size < actual_header->size))) { 195 return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION, 196 "%s size mismatch, expected %zu, got %zu", 197 name, expected_header->size, 198 actual_header->size); 199 } 200 201 return VDO_SUCCESS; 202 } 203 204 static void encode_version_number(u8 *buffer, size_t *offset, 205 struct version_number version) 206 { 207 struct packed_version_number packed = vdo_pack_version_number(version); 208 209 memcpy(buffer + *offset, &packed, sizeof(packed)); 210 *offset += sizeof(packed); 211 } 212 213 void vdo_encode_header(u8 *buffer, size_t *offset, const struct header *header) 214 { 215 struct packed_header packed = vdo_pack_header(header); 216 217 memcpy(buffer + *offset, &packed, sizeof(packed)); 218 *offset += sizeof(packed); 219 } 220 221 static void decode_version_number(u8 *buffer, size_t *offset, 222 struct version_number *version) 223 { 224 struct packed_version_number packed; 225 226 memcpy(&packed, buffer + *offset, sizeof(packed)); 227 *offset += sizeof(packed); 228 *version = vdo_unpack_version_number(packed); 229 } 230 231 void vdo_decode_header(u8 *buffer, size_t *offset, struct header *header) 232 { 233 struct packed_header packed; 234 235 memcpy(&packed, buffer + *offset, sizeof(packed)); 236 *offset += sizeof(packed); 237 238 *header = vdo_unpack_header(&packed); 239 } 240 241 /** 242 * decode_volume_geometry() - Decode the on-disk representation of a volume geometry from a buffer. 243 * @buffer: A buffer to decode from. 244 * @offset: The offset in the buffer at which to decode. 245 * @geometry: The structure to receive the decoded fields. 246 * @version: The geometry block version to decode. 247 */ 248 static void decode_volume_geometry(u8 *buffer, size_t *offset, 249 struct volume_geometry *geometry, u32 version) 250 { 251 u32 unused, mem; 252 enum volume_region_id id; 253 nonce_t nonce; 254 block_count_t bio_offset = 0; 255 bool sparse; 256 257 /* This is for backwards compatibility. */ 258 decode_u32_le(buffer, offset, &unused); 259 geometry->unused = unused; 260 261 decode_u64_le(buffer, offset, &nonce); 262 geometry->nonce = nonce; 263 264 memcpy((unsigned char *) &geometry->uuid, buffer + *offset, sizeof(uuid_t)); 265 *offset += sizeof(uuid_t); 266 267 if (version > 4) 268 decode_u64_le(buffer, offset, &bio_offset); 269 geometry->bio_offset = bio_offset; 270 271 for (id = 0; id < VDO_VOLUME_REGION_COUNT; id++) { 272 physical_block_number_t start_block; 273 enum volume_region_id saved_id; 274 275 decode_u32_le(buffer, offset, &saved_id); 276 decode_u64_le(buffer, offset, &start_block); 277 278 geometry->regions[id] = (struct volume_region) { 279 .id = saved_id, 280 .start_block = start_block, 281 }; 282 } 283 284 decode_u32_le(buffer, offset, &mem); 285 *offset += sizeof(u32); 286 sparse = buffer[(*offset)++]; 287 288 geometry->index_config = (struct index_config) { 289 .mem = mem, 290 .sparse = sparse, 291 }; 292 } 293 294 /** 295 * vdo_parse_geometry_block() - Decode and validate an encoded geometry block. 296 * @block: The encoded geometry block. 297 * @geometry: The structure to receive the decoded fields. 298 */ 299 int __must_check vdo_parse_geometry_block(u8 *block, struct volume_geometry *geometry) 300 { 301 u32 checksum, saved_checksum; 302 struct header header; 303 size_t offset = 0; 304 int result; 305 306 if (memcmp(block, VDO_GEOMETRY_MAGIC_NUMBER, VDO_GEOMETRY_MAGIC_NUMBER_SIZE) != 0) 307 return VDO_BAD_MAGIC; 308 offset += VDO_GEOMETRY_MAGIC_NUMBER_SIZE; 309 310 vdo_decode_header(block, &offset, &header); 311 if (header.version.major_version <= 4) { 312 result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_4_0, &header, 313 true, __func__); 314 } else { 315 result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_5_0, &header, 316 true, __func__); 317 } 318 if (result != VDO_SUCCESS) 319 return result; 320 321 decode_volume_geometry(block, &offset, geometry, header.version.major_version); 322 323 result = VDO_ASSERT(header.size == offset + sizeof(u32), 324 "should have decoded up to the geometry checksum"); 325 if (result != VDO_SUCCESS) 326 return result; 327 328 /* Decode and verify the checksum. */ 329 checksum = vdo_crc32(block, offset); 330 decode_u32_le(block, &offset, &saved_checksum); 331 332 return ((checksum == saved_checksum) ? VDO_SUCCESS : VDO_CHECKSUM_MISMATCH); 333 } 334 335 struct block_map_page *vdo_format_block_map_page(void *buffer, nonce_t nonce, 336 physical_block_number_t pbn, 337 bool initialized) 338 { 339 struct block_map_page *page = buffer; 340 341 memset(buffer, 0, VDO_BLOCK_SIZE); 342 page->version = vdo_pack_version_number(BLOCK_MAP_4_1); 343 page->header.nonce = __cpu_to_le64(nonce); 344 page->header.pbn = __cpu_to_le64(pbn); 345 page->header.initialized = initialized; 346 return page; 347 } 348 349 enum block_map_page_validity vdo_validate_block_map_page(struct block_map_page *page, 350 nonce_t nonce, 351 physical_block_number_t pbn) 352 { 353 BUILD_BUG_ON(sizeof(struct block_map_page_header) != PAGE_HEADER_4_1_SIZE); 354 355 if (!vdo_are_same_version(BLOCK_MAP_4_1, 356 vdo_unpack_version_number(page->version)) || 357 !page->header.initialized || (nonce != __le64_to_cpu(page->header.nonce))) 358 return VDO_BLOCK_MAP_PAGE_INVALID; 359 360 if (pbn != vdo_get_block_map_page_pbn(page)) 361 return VDO_BLOCK_MAP_PAGE_BAD; 362 363 return VDO_BLOCK_MAP_PAGE_VALID; 364 } 365 366 static int decode_block_map_state_2_0(u8 *buffer, size_t *offset, 367 struct block_map_state_2_0 *state) 368 { 369 size_t initial_offset; 370 block_count_t flat_page_count, root_count; 371 physical_block_number_t flat_page_origin, root_origin; 372 struct header header; 373 int result; 374 375 vdo_decode_header(buffer, offset, &header); 376 result = vdo_validate_header(&VDO_BLOCK_MAP_HEADER_2_0, &header, true, __func__); 377 if (result != VDO_SUCCESS) 378 return result; 379 380 initial_offset = *offset; 381 382 decode_u64_le(buffer, offset, &flat_page_origin); 383 result = VDO_ASSERT(flat_page_origin == VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN, 384 "Flat page origin must be %u (recorded as %llu)", 385 VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN, 386 (unsigned long long) state->flat_page_origin); 387 if (result != VDO_SUCCESS) 388 return result; 389 390 decode_u64_le(buffer, offset, &flat_page_count); 391 result = VDO_ASSERT(flat_page_count == 0, 392 "Flat page count must be 0 (recorded as %llu)", 393 (unsigned long long) state->flat_page_count); 394 if (result != VDO_SUCCESS) 395 return result; 396 397 decode_u64_le(buffer, offset, &root_origin); 398 decode_u64_le(buffer, offset, &root_count); 399 400 result = VDO_ASSERT(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset, 401 "decoded block map component size must match header size"); 402 if (result != VDO_SUCCESS) 403 return result; 404 405 *state = (struct block_map_state_2_0) { 406 .flat_page_origin = flat_page_origin, 407 .flat_page_count = flat_page_count, 408 .root_origin = root_origin, 409 .root_count = root_count, 410 }; 411 412 return VDO_SUCCESS; 413 } 414 415 static void encode_block_map_state_2_0(u8 *buffer, size_t *offset, 416 struct block_map_state_2_0 state) 417 { 418 size_t initial_offset; 419 420 vdo_encode_header(buffer, offset, &VDO_BLOCK_MAP_HEADER_2_0); 421 422 initial_offset = *offset; 423 encode_u64_le(buffer, offset, state.flat_page_origin); 424 encode_u64_le(buffer, offset, state.flat_page_count); 425 encode_u64_le(buffer, offset, state.root_origin); 426 encode_u64_le(buffer, offset, state.root_count); 427 428 VDO_ASSERT_LOG_ONLY(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset, 429 "encoded block map component size must match header size"); 430 } 431 432 /** 433 * vdo_compute_new_forest_pages() - Compute the number of pages which must be allocated at each 434 * level in order to grow the forest to a new number of entries. 435 * @entries: The new number of entries the block map must address. 436 * 437 * Return: The total number of non-leaf pages required. 438 */ 439 block_count_t vdo_compute_new_forest_pages(root_count_t root_count, 440 struct boundary *old_sizes, 441 block_count_t entries, 442 struct boundary *new_sizes) 443 { 444 page_count_t leaf_pages = max(vdo_compute_block_map_page_count(entries), 1U); 445 page_count_t level_size = DIV_ROUND_UP(leaf_pages, root_count); 446 block_count_t total_pages = 0; 447 height_t height; 448 449 for (height = 0; height < VDO_BLOCK_MAP_TREE_HEIGHT; height++) { 450 block_count_t new_pages; 451 452 level_size = DIV_ROUND_UP(level_size, VDO_BLOCK_MAP_ENTRIES_PER_PAGE); 453 new_sizes->levels[height] = level_size; 454 new_pages = level_size; 455 if (old_sizes != NULL) 456 new_pages -= old_sizes->levels[height]; 457 total_pages += (new_pages * root_count); 458 } 459 460 return total_pages; 461 } 462 463 /** 464 * encode_recovery_journal_state_7_0() - Encode the state of a recovery journal. 465 * 466 * Return: VDO_SUCCESS or an error code. 467 */ 468 static void encode_recovery_journal_state_7_0(u8 *buffer, size_t *offset, 469 struct recovery_journal_state_7_0 state) 470 { 471 size_t initial_offset; 472 473 vdo_encode_header(buffer, offset, &VDO_RECOVERY_JOURNAL_HEADER_7_0); 474 475 initial_offset = *offset; 476 encode_u64_le(buffer, offset, state.journal_start); 477 encode_u64_le(buffer, offset, state.logical_blocks_used); 478 encode_u64_le(buffer, offset, state.block_map_data_blocks); 479 480 VDO_ASSERT_LOG_ONLY(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset, 481 "encoded recovery journal component size must match header size"); 482 } 483 484 /** 485 * decode_recovery_journal_state_7_0() - Decode the state of a recovery journal saved in a buffer. 486 * @buffer: The buffer containing the saved state. 487 * @state: A pointer to a recovery journal state to hold the result of a successful decode. 488 * 489 * Return: VDO_SUCCESS or an error code. 490 */ 491 static int __must_check decode_recovery_journal_state_7_0(u8 *buffer, size_t *offset, 492 struct recovery_journal_state_7_0 *state) 493 { 494 struct header header; 495 int result; 496 size_t initial_offset; 497 sequence_number_t journal_start; 498 block_count_t logical_blocks_used, block_map_data_blocks; 499 500 vdo_decode_header(buffer, offset, &header); 501 result = vdo_validate_header(&VDO_RECOVERY_JOURNAL_HEADER_7_0, &header, true, 502 __func__); 503 if (result != VDO_SUCCESS) 504 return result; 505 506 initial_offset = *offset; 507 decode_u64_le(buffer, offset, &journal_start); 508 decode_u64_le(buffer, offset, &logical_blocks_used); 509 decode_u64_le(buffer, offset, &block_map_data_blocks); 510 511 result = VDO_ASSERT(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset, 512 "decoded recovery journal component size must match header size"); 513 if (result != VDO_SUCCESS) 514 return result; 515 516 *state = (struct recovery_journal_state_7_0) { 517 .journal_start = journal_start, 518 .logical_blocks_used = logical_blocks_used, 519 .block_map_data_blocks = block_map_data_blocks, 520 }; 521 522 return VDO_SUCCESS; 523 } 524 525 /** 526 * vdo_get_journal_operation_name() - Get the name of a journal operation. 527 * @operation: The operation to name. 528 * 529 * Return: The name of the operation. 530 */ 531 const char *vdo_get_journal_operation_name(enum journal_operation operation) 532 { 533 switch (operation) { 534 case VDO_JOURNAL_DATA_REMAPPING: 535 return "data remapping"; 536 537 case VDO_JOURNAL_BLOCK_MAP_REMAPPING: 538 return "block map remapping"; 539 540 default: 541 return "unknown journal operation"; 542 } 543 } 544 545 /** 546 * encode_slab_depot_state_2_0() - Encode the state of a slab depot into a buffer. 547 */ 548 static void encode_slab_depot_state_2_0(u8 *buffer, size_t *offset, 549 struct slab_depot_state_2_0 state) 550 { 551 size_t initial_offset; 552 553 vdo_encode_header(buffer, offset, &VDO_SLAB_DEPOT_HEADER_2_0); 554 555 initial_offset = *offset; 556 encode_u64_le(buffer, offset, state.slab_config.slab_blocks); 557 encode_u64_le(buffer, offset, state.slab_config.data_blocks); 558 encode_u64_le(buffer, offset, state.slab_config.reference_count_blocks); 559 encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocks); 560 encode_u64_le(buffer, offset, state.slab_config.slab_journal_flushing_threshold); 561 encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocking_threshold); 562 encode_u64_le(buffer, offset, state.slab_config.slab_journal_scrubbing_threshold); 563 encode_u64_le(buffer, offset, state.first_block); 564 encode_u64_le(buffer, offset, state.last_block); 565 buffer[(*offset)++] = state.zone_count; 566 567 VDO_ASSERT_LOG_ONLY(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset, 568 "encoded block map component size must match header size"); 569 } 570 571 /** 572 * decode_slab_depot_state_2_0() - Decode slab depot component state version 2.0 from a buffer. 573 * 574 * Return: VDO_SUCCESS or an error code. 575 */ 576 static int decode_slab_depot_state_2_0(u8 *buffer, size_t *offset, 577 struct slab_depot_state_2_0 *state) 578 { 579 struct header header; 580 int result; 581 size_t initial_offset; 582 struct slab_config slab_config; 583 block_count_t count; 584 physical_block_number_t first_block, last_block; 585 zone_count_t zone_count; 586 587 vdo_decode_header(buffer, offset, &header); 588 result = vdo_validate_header(&VDO_SLAB_DEPOT_HEADER_2_0, &header, true, 589 __func__); 590 if (result != VDO_SUCCESS) 591 return result; 592 593 initial_offset = *offset; 594 decode_u64_le(buffer, offset, &count); 595 slab_config.slab_blocks = count; 596 597 decode_u64_le(buffer, offset, &count); 598 slab_config.data_blocks = count; 599 600 decode_u64_le(buffer, offset, &count); 601 slab_config.reference_count_blocks = count; 602 603 decode_u64_le(buffer, offset, &count); 604 slab_config.slab_journal_blocks = count; 605 606 decode_u64_le(buffer, offset, &count); 607 slab_config.slab_journal_flushing_threshold = count; 608 609 decode_u64_le(buffer, offset, &count); 610 slab_config.slab_journal_blocking_threshold = count; 611 612 decode_u64_le(buffer, offset, &count); 613 slab_config.slab_journal_scrubbing_threshold = count; 614 615 decode_u64_le(buffer, offset, &first_block); 616 decode_u64_le(buffer, offset, &last_block); 617 zone_count = buffer[(*offset)++]; 618 619 result = VDO_ASSERT(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset, 620 "decoded slab depot component size must match header size"); 621 if (result != VDO_SUCCESS) 622 return result; 623 624 *state = (struct slab_depot_state_2_0) { 625 .slab_config = slab_config, 626 .first_block = first_block, 627 .last_block = last_block, 628 .zone_count = zone_count, 629 }; 630 631 return VDO_SUCCESS; 632 } 633 634 /** 635 * vdo_configure_slab_depot() - Configure the slab depot. 636 * @partition: The slab depot partition 637 * @slab_config: The configuration of a single slab. 638 * @zone_count: The number of zones the depot will use. 639 * @state: The state structure to be configured. 640 * 641 * Configures the slab_depot for the specified storage capacity, finding the number of data blocks 642 * that will fit and still leave room for the depot metadata, then return the saved state for that 643 * configuration. 644 * 645 * Return: VDO_SUCCESS or an error code. 646 */ 647 int vdo_configure_slab_depot(const struct partition *partition, 648 struct slab_config slab_config, zone_count_t zone_count, 649 struct slab_depot_state_2_0 *state) 650 { 651 block_count_t total_slab_blocks, total_data_blocks; 652 size_t slab_count; 653 physical_block_number_t last_block; 654 block_count_t slab_size = slab_config.slab_blocks; 655 656 vdo_log_debug("slabDepot %s(block_count=%llu, first_block=%llu, slab_size=%llu, zone_count=%u)", 657 __func__, (unsigned long long) partition->count, 658 (unsigned long long) partition->offset, 659 (unsigned long long) slab_size, zone_count); 660 661 /* We do not allow runt slabs, so we waste up to a slab's worth. */ 662 slab_count = (partition->count / slab_size); 663 if (slab_count == 0) 664 return VDO_NO_SPACE; 665 666 if (slab_count > MAX_VDO_SLABS) 667 return VDO_TOO_MANY_SLABS; 668 669 total_slab_blocks = slab_count * slab_config.slab_blocks; 670 total_data_blocks = slab_count * slab_config.data_blocks; 671 last_block = partition->offset + total_slab_blocks; 672 673 *state = (struct slab_depot_state_2_0) { 674 .slab_config = slab_config, 675 .first_block = partition->offset, 676 .last_block = last_block, 677 .zone_count = zone_count, 678 }; 679 680 vdo_log_debug("slab_depot last_block=%llu, total_data_blocks=%llu, slab_count=%zu, left_over=%llu", 681 (unsigned long long) last_block, 682 (unsigned long long) total_data_blocks, slab_count, 683 (unsigned long long) (partition->count - (last_block - partition->offset))); 684 685 return VDO_SUCCESS; 686 } 687 688 /** 689 * vdo_configure_slab() - Measure and initialize the configuration to use for each slab. 690 * @slab_size: The number of blocks per slab. 691 * @slab_journal_blocks: The number of blocks for the slab journal. 692 * @slab_config: The slab configuration to initialize. 693 * 694 * Return: VDO_SUCCESS or an error code. 695 */ 696 int vdo_configure_slab(block_count_t slab_size, block_count_t slab_journal_blocks, 697 struct slab_config *slab_config) 698 { 699 block_count_t ref_blocks, meta_blocks, data_blocks; 700 block_count_t flushing_threshold, remaining, blocking_threshold; 701 block_count_t minimal_extra_space, scrubbing_threshold; 702 703 if (slab_journal_blocks >= slab_size) 704 return VDO_BAD_CONFIGURATION; 705 706 /* 707 * This calculation should technically be a recurrence, but the total number of metadata 708 * blocks is currently less than a single block of ref_counts, so we'd gain at most one 709 * data block in each slab with more iteration. 710 */ 711 ref_blocks = vdo_get_saved_reference_count_size(slab_size - slab_journal_blocks); 712 meta_blocks = (ref_blocks + slab_journal_blocks); 713 714 /* Make sure test code hasn't configured slabs to be too small. */ 715 if (meta_blocks >= slab_size) 716 return VDO_BAD_CONFIGURATION; 717 718 /* 719 * If the slab size is very small, assume this must be a unit test and override the number 720 * of data blocks to be a power of two (wasting blocks in the slab). Many tests need their 721 * data_blocks fields to be the exact capacity of the configured volume, and that used to 722 * fall out since they use a power of two for the number of data blocks, the slab size was 723 * a power of two, and every block in a slab was a data block. 724 * 725 * TODO: Try to figure out some way of structuring testParameters and unit tests so this 726 * hack isn't needed without having to edit several unit tests every time the metadata size 727 * changes by one block. 728 */ 729 data_blocks = slab_size - meta_blocks; 730 if ((slab_size < 1024) && !is_power_of_2(data_blocks)) 731 data_blocks = ((block_count_t) 1 << ilog2(data_blocks)); 732 733 /* 734 * Configure the slab journal thresholds. The flush threshold is 168 of 224 blocks in 735 * production, or 3/4ths, so we use this ratio for all sizes. 736 */ 737 flushing_threshold = ((slab_journal_blocks * 3) + 3) / 4; 738 /* 739 * The blocking threshold should be far enough from the flushing threshold to not produce 740 * delays, but far enough from the end of the journal to allow multiple successive recovery 741 * failures. 742 */ 743 remaining = slab_journal_blocks - flushing_threshold; 744 blocking_threshold = flushing_threshold + ((remaining * 5) / 7); 745 /* The scrubbing threshold should be at least 2048 entries before the end of the journal. */ 746 minimal_extra_space = 1 + (MAXIMUM_VDO_USER_VIOS / VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK); 747 scrubbing_threshold = blocking_threshold; 748 if (slab_journal_blocks > minimal_extra_space) 749 scrubbing_threshold = slab_journal_blocks - minimal_extra_space; 750 if (blocking_threshold > scrubbing_threshold) 751 blocking_threshold = scrubbing_threshold; 752 753 *slab_config = (struct slab_config) { 754 .slab_blocks = slab_size, 755 .data_blocks = data_blocks, 756 .reference_count_blocks = ref_blocks, 757 .slab_journal_blocks = slab_journal_blocks, 758 .slab_journal_flushing_threshold = flushing_threshold, 759 .slab_journal_blocking_threshold = blocking_threshold, 760 .slab_journal_scrubbing_threshold = scrubbing_threshold}; 761 return VDO_SUCCESS; 762 } 763 764 /** 765 * vdo_decode_slab_journal_entry() - Decode a slab journal entry. 766 * @block: The journal block holding the entry. 767 * @entry_count: The number of the entry. 768 * 769 * Return: The decoded entry. 770 */ 771 struct slab_journal_entry vdo_decode_slab_journal_entry(struct packed_slab_journal_block *block, 772 journal_entry_count_t entry_count) 773 { 774 struct slab_journal_entry entry = 775 vdo_unpack_slab_journal_entry(&block->payload.entries[entry_count]); 776 777 if (block->header.has_block_map_increments && 778 ((block->payload.full_entries.entry_types[entry_count / 8] & 779 ((u8) 1 << (entry_count % 8))) != 0)) 780 entry.operation = VDO_JOURNAL_BLOCK_MAP_REMAPPING; 781 782 return entry; 783 } 784 785 /** 786 * allocate_partition() - Allocate a partition and add it to a layout. 787 * @layout: The layout containing the partition. 788 * @id: The id of the partition. 789 * @offset: The offset into the layout at which the partition begins. 790 * @size: The size of the partition in blocks. 791 * 792 * Return: VDO_SUCCESS or an error. 793 */ 794 static int allocate_partition(struct layout *layout, u8 id, 795 physical_block_number_t offset, block_count_t size) 796 { 797 struct partition *partition; 798 int result; 799 800 result = vdo_allocate(1, struct partition, __func__, &partition); 801 if (result != VDO_SUCCESS) 802 return result; 803 804 partition->id = id; 805 partition->offset = offset; 806 partition->count = size; 807 partition->next = layout->head; 808 layout->head = partition; 809 810 return VDO_SUCCESS; 811 } 812 813 /** 814 * make_partition() - Create a new partition from the beginning or end of the unused space in a 815 * layout. 816 * @layout: The layout. 817 * @id: The id of the partition to make. 818 * @size: The number of blocks to carve out; if 0, all remaining space will be used. 819 * @beginning: True if the partition should start at the beginning of the unused space. 820 * 821 * Return: A success or error code, particularly VDO_NO_SPACE if there are fewer than size blocks 822 * remaining. 823 */ 824 static int __must_check make_partition(struct layout *layout, enum partition_id id, 825 block_count_t size, bool beginning) 826 { 827 int result; 828 physical_block_number_t offset; 829 block_count_t free_blocks = layout->last_free - layout->first_free; 830 831 if (size == 0) { 832 if (free_blocks == 0) 833 return VDO_NO_SPACE; 834 size = free_blocks; 835 } else if (size > free_blocks) { 836 return VDO_NO_SPACE; 837 } 838 839 result = vdo_get_partition(layout, id, NULL); 840 if (result != VDO_UNKNOWN_PARTITION) 841 return VDO_PARTITION_EXISTS; 842 843 offset = beginning ? layout->first_free : (layout->last_free - size); 844 845 result = allocate_partition(layout, id, offset, size); 846 if (result != VDO_SUCCESS) 847 return result; 848 849 layout->num_partitions++; 850 if (beginning) 851 layout->first_free += size; 852 else 853 layout->last_free = layout->last_free - size; 854 855 return VDO_SUCCESS; 856 } 857 858 /** 859 * vdo_initialize_layout() - Lay out the partitions of a vdo. 860 * @size: The entire size of the vdo. 861 * @origin: The start of the layout on the underlying storage in blocks. 862 * @block_map_blocks: The size of the block map partition. 863 * @journal_blocks: The size of the journal partition. 864 * @summary_blocks: The size of the slab summary partition. 865 * @layout: The layout to initialize. 866 * 867 * Return: VDO_SUCCESS or an error. 868 */ 869 int vdo_initialize_layout(block_count_t size, physical_block_number_t offset, 870 block_count_t block_map_blocks, block_count_t journal_blocks, 871 block_count_t summary_blocks, struct layout *layout) 872 { 873 int result; 874 block_count_t necessary_size = 875 (offset + block_map_blocks + journal_blocks + summary_blocks); 876 877 if (necessary_size > size) 878 return vdo_log_error_strerror(VDO_NO_SPACE, 879 "Not enough space to make a VDO"); 880 881 *layout = (struct layout) { 882 .start = offset, 883 .size = size, 884 .first_free = offset, 885 .last_free = size, 886 .num_partitions = 0, 887 .head = NULL, 888 }; 889 890 result = make_partition(layout, VDO_BLOCK_MAP_PARTITION, block_map_blocks, true); 891 if (result != VDO_SUCCESS) { 892 vdo_uninitialize_layout(layout); 893 return result; 894 } 895 896 result = make_partition(layout, VDO_SLAB_SUMMARY_PARTITION, summary_blocks, 897 false); 898 if (result != VDO_SUCCESS) { 899 vdo_uninitialize_layout(layout); 900 return result; 901 } 902 903 result = make_partition(layout, VDO_RECOVERY_JOURNAL_PARTITION, journal_blocks, 904 false); 905 if (result != VDO_SUCCESS) { 906 vdo_uninitialize_layout(layout); 907 return result; 908 } 909 910 result = make_partition(layout, VDO_SLAB_DEPOT_PARTITION, 0, true); 911 if (result != VDO_SUCCESS) 912 vdo_uninitialize_layout(layout); 913 914 return result; 915 } 916 917 /** 918 * vdo_uninitialize_layout() - Clean up a layout. 919 * @layout: The layout to clean up. 920 * 921 * All partitions created by this layout become invalid pointers. 922 */ 923 void vdo_uninitialize_layout(struct layout *layout) 924 { 925 while (layout->head != NULL) { 926 struct partition *part = layout->head; 927 928 layout->head = part->next; 929 vdo_free(part); 930 } 931 932 memset(layout, 0, sizeof(struct layout)); 933 } 934 935 /** 936 * vdo_get_partition() - Get a partition by id. 937 * @layout: The layout from which to get a partition. 938 * @id: The id of the partition. 939 * @partition_ptr: A pointer to hold the partition. 940 * 941 * Return: VDO_SUCCESS or an error. 942 */ 943 int vdo_get_partition(struct layout *layout, enum partition_id id, 944 struct partition **partition_ptr) 945 { 946 struct partition *partition; 947 948 for (partition = layout->head; partition != NULL; partition = partition->next) { 949 if (partition->id == id) { 950 if (partition_ptr != NULL) 951 *partition_ptr = partition; 952 return VDO_SUCCESS; 953 } 954 } 955 956 return VDO_UNKNOWN_PARTITION; 957 } 958 959 /** 960 * vdo_get_known_partition() - Get a partition by id from a validated layout. 961 * @layout: The layout from which to get a partition. 962 * @id: The id of the partition. 963 * 964 * Return: the partition 965 */ 966 struct partition *vdo_get_known_partition(struct layout *layout, enum partition_id id) 967 { 968 struct partition *partition; 969 int result = vdo_get_partition(layout, id, &partition); 970 971 VDO_ASSERT_LOG_ONLY(result == VDO_SUCCESS, "layout has expected partition: %u", id); 972 973 return partition; 974 } 975 976 static void encode_layout(u8 *buffer, size_t *offset, const struct layout *layout) 977 { 978 const struct partition *partition; 979 size_t initial_offset; 980 struct header header = VDO_LAYOUT_HEADER_3_0; 981 982 BUILD_BUG_ON(sizeof(enum partition_id) != sizeof(u8)); 983 VDO_ASSERT_LOG_ONLY(layout->num_partitions <= U8_MAX, 984 "layout partition count must fit in a byte"); 985 986 vdo_encode_header(buffer, offset, &header); 987 988 initial_offset = *offset; 989 encode_u64_le(buffer, offset, layout->first_free); 990 encode_u64_le(buffer, offset, layout->last_free); 991 buffer[(*offset)++] = layout->num_partitions; 992 993 VDO_ASSERT_LOG_ONLY(sizeof(struct layout_3_0) == *offset - initial_offset, 994 "encoded size of a layout header must match structure"); 995 996 for (partition = layout->head; partition != NULL; partition = partition->next) { 997 buffer[(*offset)++] = partition->id; 998 encode_u64_le(buffer, offset, partition->offset); 999 /* This field only exists for backwards compatibility */ 1000 encode_u64_le(buffer, offset, 0); 1001 encode_u64_le(buffer, offset, partition->count); 1002 } 1003 1004 VDO_ASSERT_LOG_ONLY(header.size == *offset - initial_offset, 1005 "encoded size of a layout must match header size"); 1006 } 1007 1008 static int decode_layout(u8 *buffer, size_t *offset, physical_block_number_t start, 1009 block_count_t size, struct layout *layout) 1010 { 1011 struct header header; 1012 struct layout_3_0 layout_header; 1013 struct partition *partition; 1014 size_t initial_offset; 1015 physical_block_number_t first_free, last_free; 1016 u8 partition_count; 1017 u8 i; 1018 int result; 1019 1020 vdo_decode_header(buffer, offset, &header); 1021 /* Layout is variable size, so only do a minimum size check here. */ 1022 result = vdo_validate_header(&VDO_LAYOUT_HEADER_3_0, &header, false, __func__); 1023 if (result != VDO_SUCCESS) 1024 return result; 1025 1026 initial_offset = *offset; 1027 decode_u64_le(buffer, offset, &first_free); 1028 decode_u64_le(buffer, offset, &last_free); 1029 partition_count = buffer[(*offset)++]; 1030 layout_header = (struct layout_3_0) { 1031 .first_free = first_free, 1032 .last_free = last_free, 1033 .partition_count = partition_count, 1034 }; 1035 1036 result = VDO_ASSERT(sizeof(struct layout_3_0) == *offset - initial_offset, 1037 "decoded size of a layout header must match structure"); 1038 if (result != VDO_SUCCESS) 1039 return result; 1040 1041 layout->start = start; 1042 layout->size = size; 1043 layout->first_free = layout_header.first_free; 1044 layout->last_free = layout_header.last_free; 1045 layout->num_partitions = layout_header.partition_count; 1046 1047 if (layout->num_partitions > VDO_PARTITION_COUNT) { 1048 return vdo_log_error_strerror(VDO_UNKNOWN_PARTITION, 1049 "layout has extra partitions"); 1050 } 1051 1052 for (i = 0; i < layout->num_partitions; i++) { 1053 u8 id; 1054 u64 partition_offset, count; 1055 1056 id = buffer[(*offset)++]; 1057 decode_u64_le(buffer, offset, &partition_offset); 1058 *offset += sizeof(u64); 1059 decode_u64_le(buffer, offset, &count); 1060 1061 result = allocate_partition(layout, id, partition_offset, count); 1062 if (result != VDO_SUCCESS) { 1063 vdo_uninitialize_layout(layout); 1064 return result; 1065 } 1066 } 1067 1068 /* Validate that the layout has all (and only) the required partitions */ 1069 for (i = 0; i < VDO_PARTITION_COUNT; i++) { 1070 result = vdo_get_partition(layout, REQUIRED_PARTITIONS[i], &partition); 1071 if (result != VDO_SUCCESS) { 1072 vdo_uninitialize_layout(layout); 1073 return vdo_log_error_strerror(result, 1074 "layout is missing required partition %u", 1075 REQUIRED_PARTITIONS[i]); 1076 } 1077 1078 start += partition->count; 1079 } 1080 1081 if (start != size) { 1082 vdo_uninitialize_layout(layout); 1083 return vdo_log_error_strerror(UDS_BAD_STATE, 1084 "partitions do not cover the layout"); 1085 } 1086 1087 return VDO_SUCCESS; 1088 } 1089 1090 /** 1091 * pack_vdo_config() - Convert a vdo_config to its packed on-disk representation. 1092 * @config: The vdo config to convert. 1093 * 1094 * Return: The platform-independent representation of the config. 1095 */ 1096 static struct packed_vdo_config pack_vdo_config(struct vdo_config config) 1097 { 1098 return (struct packed_vdo_config) { 1099 .logical_blocks = __cpu_to_le64(config.logical_blocks), 1100 .physical_blocks = __cpu_to_le64(config.physical_blocks), 1101 .slab_size = __cpu_to_le64(config.slab_size), 1102 .recovery_journal_size = __cpu_to_le64(config.recovery_journal_size), 1103 .slab_journal_blocks = __cpu_to_le64(config.slab_journal_blocks), 1104 }; 1105 } 1106 1107 /** 1108 * pack_vdo_component() - Convert a vdo_component to its packed on-disk representation. 1109 * @component: The VDO component data to convert. 1110 * 1111 * Return: The platform-independent representation of the component. 1112 */ 1113 static struct packed_vdo_component_41_0 pack_vdo_component(const struct vdo_component component) 1114 { 1115 return (struct packed_vdo_component_41_0) { 1116 .state = __cpu_to_le32(component.state), 1117 .complete_recoveries = __cpu_to_le64(component.complete_recoveries), 1118 .read_only_recoveries = __cpu_to_le64(component.read_only_recoveries), 1119 .config = pack_vdo_config(component.config), 1120 .nonce = __cpu_to_le64(component.nonce), 1121 }; 1122 } 1123 1124 static void encode_vdo_component(u8 *buffer, size_t *offset, 1125 struct vdo_component component) 1126 { 1127 struct packed_vdo_component_41_0 packed; 1128 1129 encode_version_number(buffer, offset, VDO_COMPONENT_DATA_41_0); 1130 packed = pack_vdo_component(component); 1131 memcpy(buffer + *offset, &packed, sizeof(packed)); 1132 *offset += sizeof(packed); 1133 } 1134 1135 /** 1136 * unpack_vdo_config() - Convert a packed_vdo_config to its native in-memory representation. 1137 * @config: The packed vdo config to convert. 1138 * 1139 * Return: The native in-memory representation of the vdo config. 1140 */ 1141 static struct vdo_config unpack_vdo_config(struct packed_vdo_config config) 1142 { 1143 return (struct vdo_config) { 1144 .logical_blocks = __le64_to_cpu(config.logical_blocks), 1145 .physical_blocks = __le64_to_cpu(config.physical_blocks), 1146 .slab_size = __le64_to_cpu(config.slab_size), 1147 .recovery_journal_size = __le64_to_cpu(config.recovery_journal_size), 1148 .slab_journal_blocks = __le64_to_cpu(config.slab_journal_blocks), 1149 }; 1150 } 1151 1152 /** 1153 * unpack_vdo_component_41_0() - Convert a packed_vdo_component_41_0 to its native in-memory 1154 * representation. 1155 * @component: The packed vdo component data to convert. 1156 * 1157 * Return: The native in-memory representation of the component. 1158 */ 1159 static struct vdo_component unpack_vdo_component_41_0(struct packed_vdo_component_41_0 component) 1160 { 1161 return (struct vdo_component) { 1162 .state = __le32_to_cpu(component.state), 1163 .complete_recoveries = __le64_to_cpu(component.complete_recoveries), 1164 .read_only_recoveries = __le64_to_cpu(component.read_only_recoveries), 1165 .config = unpack_vdo_config(component.config), 1166 .nonce = __le64_to_cpu(component.nonce), 1167 }; 1168 } 1169 1170 /** 1171 * decode_vdo_component() - Decode the component data for the vdo itself out of the super block. 1172 * 1173 * Return: VDO_SUCCESS or an error. 1174 */ 1175 static int decode_vdo_component(u8 *buffer, size_t *offset, struct vdo_component *component) 1176 { 1177 struct version_number version; 1178 struct packed_vdo_component_41_0 packed; 1179 int result; 1180 1181 decode_version_number(buffer, offset, &version); 1182 result = validate_version(version, VDO_COMPONENT_DATA_41_0, 1183 "VDO component data"); 1184 if (result != VDO_SUCCESS) 1185 return result; 1186 1187 memcpy(&packed, buffer + *offset, sizeof(packed)); 1188 *offset += sizeof(packed); 1189 *component = unpack_vdo_component_41_0(packed); 1190 return VDO_SUCCESS; 1191 } 1192 1193 /** 1194 * vdo_validate_config() - Validate constraints on a VDO config. 1195 * @config: The VDO config. 1196 * @physical_block_count: The minimum block count of the underlying storage. 1197 * @logical_block_count: The expected logical size of the VDO, or 0 if the logical size may be 1198 * unspecified. 1199 * 1200 * Return: A success or error code. 1201 */ 1202 int vdo_validate_config(const struct vdo_config *config, 1203 block_count_t physical_block_count, 1204 block_count_t logical_block_count) 1205 { 1206 struct slab_config slab_config; 1207 int result; 1208 1209 result = VDO_ASSERT(config->slab_size > 0, "slab size unspecified"); 1210 if (result != VDO_SUCCESS) 1211 return result; 1212 1213 result = VDO_ASSERT(is_power_of_2(config->slab_size), 1214 "slab size must be a power of two"); 1215 if (result != VDO_SUCCESS) 1216 return result; 1217 1218 result = VDO_ASSERT(config->slab_size <= (1 << MAX_VDO_SLAB_BITS), 1219 "slab size must be less than or equal to 2^%d", 1220 MAX_VDO_SLAB_BITS); 1221 if (result != VDO_SUCCESS) 1222 return result; 1223 1224 result = VDO_ASSERT(config->slab_journal_blocks >= MINIMUM_VDO_SLAB_JOURNAL_BLOCKS, 1225 "slab journal size meets minimum size"); 1226 if (result != VDO_SUCCESS) 1227 return result; 1228 1229 result = VDO_ASSERT(config->slab_journal_blocks <= config->slab_size, 1230 "slab journal size is within expected bound"); 1231 if (result != VDO_SUCCESS) 1232 return result; 1233 1234 result = vdo_configure_slab(config->slab_size, config->slab_journal_blocks, 1235 &slab_config); 1236 if (result != VDO_SUCCESS) 1237 return result; 1238 1239 result = VDO_ASSERT((slab_config.data_blocks >= 1), 1240 "slab must be able to hold at least one block"); 1241 if (result != VDO_SUCCESS) 1242 return result; 1243 1244 result = VDO_ASSERT(config->physical_blocks > 0, "physical blocks unspecified"); 1245 if (result != VDO_SUCCESS) 1246 return result; 1247 1248 result = VDO_ASSERT(config->physical_blocks <= MAXIMUM_VDO_PHYSICAL_BLOCKS, 1249 "physical block count %llu exceeds maximum %llu", 1250 (unsigned long long) config->physical_blocks, 1251 (unsigned long long) MAXIMUM_VDO_PHYSICAL_BLOCKS); 1252 if (result != VDO_SUCCESS) 1253 return VDO_OUT_OF_RANGE; 1254 1255 if (physical_block_count != config->physical_blocks) { 1256 vdo_log_error("A physical size of %llu blocks was specified, not the %llu blocks configured in the vdo super block", 1257 (unsigned long long) physical_block_count, 1258 (unsigned long long) config->physical_blocks); 1259 return VDO_PARAMETER_MISMATCH; 1260 } 1261 1262 if (logical_block_count > 0) { 1263 result = VDO_ASSERT((config->logical_blocks > 0), 1264 "logical blocks unspecified"); 1265 if (result != VDO_SUCCESS) 1266 return result; 1267 1268 if (logical_block_count != config->logical_blocks) { 1269 vdo_log_error("A logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block", 1270 (unsigned long long) logical_block_count, 1271 (unsigned long long) config->logical_blocks); 1272 return VDO_PARAMETER_MISMATCH; 1273 } 1274 } 1275 1276 result = VDO_ASSERT(config->logical_blocks <= MAXIMUM_VDO_LOGICAL_BLOCKS, 1277 "logical blocks too large"); 1278 if (result != VDO_SUCCESS) 1279 return result; 1280 1281 result = VDO_ASSERT(config->recovery_journal_size > 0, 1282 "recovery journal size unspecified"); 1283 if (result != VDO_SUCCESS) 1284 return result; 1285 1286 result = VDO_ASSERT(is_power_of_2(config->recovery_journal_size), 1287 "recovery journal size must be a power of two"); 1288 if (result != VDO_SUCCESS) 1289 return result; 1290 1291 return result; 1292 } 1293 1294 /** 1295 * vdo_destroy_component_states() - Clean up any allocations in a vdo_component_states. 1296 * @states: The component states to destroy. 1297 */ 1298 void vdo_destroy_component_states(struct vdo_component_states *states) 1299 { 1300 if (states == NULL) 1301 return; 1302 1303 vdo_uninitialize_layout(&states->layout); 1304 } 1305 1306 /** 1307 * decode_components() - Decode the components now that we know the component data is a version we 1308 * understand. 1309 * @buffer: The buffer being decoded. 1310 * @offset: The offset to start decoding from. 1311 * @geometry: The vdo geometry 1312 * @states: An object to hold the successfully decoded state. 1313 * 1314 * Return: VDO_SUCCESS or an error. 1315 */ 1316 static int __must_check decode_components(u8 *buffer, size_t *offset, 1317 struct volume_geometry *geometry, 1318 struct vdo_component_states *states) 1319 { 1320 int result; 1321 1322 decode_vdo_component(buffer, offset, &states->vdo); 1323 1324 result = decode_layout(buffer, offset, vdo_get_data_region_start(*geometry) + 1, 1325 states->vdo.config.physical_blocks, &states->layout); 1326 if (result != VDO_SUCCESS) 1327 return result; 1328 1329 result = decode_recovery_journal_state_7_0(buffer, offset, 1330 &states->recovery_journal); 1331 if (result != VDO_SUCCESS) 1332 return result; 1333 1334 result = decode_slab_depot_state_2_0(buffer, offset, &states->slab_depot); 1335 if (result != VDO_SUCCESS) 1336 return result; 1337 1338 result = decode_block_map_state_2_0(buffer, offset, &states->block_map); 1339 if (result != VDO_SUCCESS) 1340 return result; 1341 1342 VDO_ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE, 1343 "All decoded component data was used"); 1344 return VDO_SUCCESS; 1345 } 1346 1347 /** 1348 * vdo_decode_component_states() - Decode the payload of a super block. 1349 * @buffer: The buffer containing the encoded super block contents. 1350 * @geometry: The vdo geometry 1351 * @states: A pointer to hold the decoded states. 1352 * 1353 * Return: VDO_SUCCESS or an error. 1354 */ 1355 int vdo_decode_component_states(u8 *buffer, struct volume_geometry *geometry, 1356 struct vdo_component_states *states) 1357 { 1358 int result; 1359 size_t offset = VDO_COMPONENT_DATA_OFFSET; 1360 1361 /* This is for backwards compatibility. */ 1362 decode_u32_le(buffer, &offset, &states->unused); 1363 1364 /* Check the VDO volume version */ 1365 decode_version_number(buffer, &offset, &states->volume_version); 1366 result = validate_version(VDO_VOLUME_VERSION_67_0, states->volume_version, 1367 "volume"); 1368 if (result != VDO_SUCCESS) 1369 return result; 1370 1371 result = decode_components(buffer, &offset, geometry, states); 1372 if (result != VDO_SUCCESS) 1373 vdo_uninitialize_layout(&states->layout); 1374 1375 return result; 1376 } 1377 1378 /** 1379 * vdo_validate_component_states() - Validate the decoded super block configuration. 1380 * @states: The state decoded from the super block. 1381 * @geometry_nonce: The nonce from the geometry block. 1382 * @physical_size: The minimum block count of the underlying storage. 1383 * @logical_size: The expected logical size of the VDO, or 0 if the logical size may be 1384 * unspecified. 1385 * 1386 * Return: VDO_SUCCESS or an error if the configuration is invalid. 1387 */ 1388 int vdo_validate_component_states(struct vdo_component_states *states, 1389 nonce_t geometry_nonce, block_count_t physical_size, 1390 block_count_t logical_size) 1391 { 1392 if (geometry_nonce != states->vdo.nonce) { 1393 return vdo_log_error_strerror(VDO_BAD_NONCE, 1394 "Geometry nonce %llu does not match superblock nonce %llu", 1395 (unsigned long long) geometry_nonce, 1396 (unsigned long long) states->vdo.nonce); 1397 } 1398 1399 return vdo_validate_config(&states->vdo.config, physical_size, logical_size); 1400 } 1401 1402 /** 1403 * vdo_encode_component_states() - Encode the state of all vdo components in the super block. 1404 */ 1405 static void vdo_encode_component_states(u8 *buffer, size_t *offset, 1406 const struct vdo_component_states *states) 1407 { 1408 /* This is for backwards compatibility. */ 1409 encode_u32_le(buffer, offset, states->unused); 1410 encode_version_number(buffer, offset, states->volume_version); 1411 encode_vdo_component(buffer, offset, states->vdo); 1412 encode_layout(buffer, offset, &states->layout); 1413 encode_recovery_journal_state_7_0(buffer, offset, states->recovery_journal); 1414 encode_slab_depot_state_2_0(buffer, offset, states->slab_depot); 1415 encode_block_map_state_2_0(buffer, offset, states->block_map); 1416 1417 VDO_ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE, 1418 "All super block component data was encoded"); 1419 } 1420 1421 /** 1422 * vdo_encode_super_block() - Encode a super block into its on-disk representation. 1423 */ 1424 void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states) 1425 { 1426 u32 checksum; 1427 struct header header = SUPER_BLOCK_HEADER_12_0; 1428 size_t offset = 0; 1429 1430 header.size += VDO_COMPONENT_DATA_SIZE; 1431 vdo_encode_header(buffer, &offset, &header); 1432 vdo_encode_component_states(buffer, &offset, states); 1433 1434 checksum = vdo_crc32(buffer, offset); 1435 encode_u32_le(buffer, &offset, checksum); 1436 1437 /* 1438 * Even though the buffer is a full block, to avoid the potential corruption from a torn 1439 * write, the entire encoding must fit in the first sector. 1440 */ 1441 VDO_ASSERT_LOG_ONLY(offset <= VDO_SECTOR_SIZE, 1442 "entire superblock must fit in one sector"); 1443 } 1444 1445 /** 1446 * vdo_decode_super_block() - Decode a super block from its on-disk representation. 1447 */ 1448 int vdo_decode_super_block(u8 *buffer) 1449 { 1450 struct header header; 1451 int result; 1452 u32 checksum, saved_checksum; 1453 size_t offset = 0; 1454 1455 /* Decode and validate the header. */ 1456 vdo_decode_header(buffer, &offset, &header); 1457 result = vdo_validate_header(&SUPER_BLOCK_HEADER_12_0, &header, false, __func__); 1458 if (result != VDO_SUCCESS) 1459 return result; 1460 1461 if (header.size > VDO_COMPONENT_DATA_SIZE + sizeof(u32)) { 1462 /* 1463 * We can't check release version or checksum until we know the content size, so we 1464 * have to assume a version mismatch on unexpected values. 1465 */ 1466 return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION, 1467 "super block contents too large: %zu", 1468 header.size); 1469 } 1470 1471 /* Skip past the component data for now, to verify the checksum. */ 1472 offset += VDO_COMPONENT_DATA_SIZE; 1473 1474 checksum = vdo_crc32(buffer, offset); 1475 decode_u32_le(buffer, &offset, &saved_checksum); 1476 1477 result = VDO_ASSERT(offset == VDO_SUPER_BLOCK_FIXED_SIZE + VDO_COMPONENT_DATA_SIZE, 1478 "must have decoded entire superblock payload"); 1479 if (result != VDO_SUCCESS) 1480 return result; 1481 1482 return ((checksum != saved_checksum) ? VDO_CHECKSUM_MISMATCH : VDO_SUCCESS); 1483 } 1484