1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright 2023 Red Hat 4 */ 5 6 #ifndef VDO_ENCODINGS_H 7 #define VDO_ENCODINGS_H 8 9 #include <linux/blk_types.h> 10 #include <linux/crc32.h> 11 #include <linux/limits.h> 12 #include <linux/uuid.h> 13 14 #include "numeric.h" 15 16 #include "constants.h" 17 #include "types.h" 18 19 /* 20 * An in-memory representation of a version number for versioned structures on disk. 21 * 22 * A version number consists of two portions, a major version and a minor version. Any format 23 * change which does not require an explicit upgrade step from the previous version should 24 * increment the minor version. Any format change which either requires an explicit upgrade step, 25 * or is wholly incompatible (i.e. can not be upgraded to), should increment the major version, and 26 * set the minor version to 0. 27 */ 28 struct version_number { 29 u32 major_version; 30 u32 minor_version; 31 }; 32 33 /* 34 * A packed, machine-independent, on-disk representation of a version_number. Both fields are 35 * stored in little-endian byte order. 36 */ 37 struct packed_version_number { 38 __le32 major_version; 39 __le32 minor_version; 40 } __packed; 41 42 /* The registry of component ids for use in headers */ 43 #define VDO_SUPER_BLOCK 0 44 #define VDO_LAYOUT 1 45 #define VDO_RECOVERY_JOURNAL 2 46 #define VDO_SLAB_DEPOT 3 47 #define VDO_BLOCK_MAP 4 48 #define VDO_GEOMETRY_BLOCK 5 49 50 /* The header for versioned data stored on disk. */ 51 struct header { 52 u32 id; /* The component this is a header for */ 53 struct version_number version; /* The version of the data format */ 54 size_t size; /* The size of the data following this header */ 55 }; 56 57 /* A packed, machine-independent, on-disk representation of a component header. */ 58 struct packed_header { 59 __le32 id; 60 struct packed_version_number version; 61 __le64 size; 62 } __packed; 63 64 enum { 65 VDO_GEOMETRY_BLOCK_LOCATION = 0, 66 VDO_GEOMETRY_MAGIC_NUMBER_SIZE = 8, 67 VDO_DEFAULT_GEOMETRY_BLOCK_VERSION = 5, 68 }; 69 70 struct index_config { 71 u32 mem; 72 u32 unused; 73 bool sparse; 74 } __packed; 75 76 enum volume_region_id { 77 VDO_INDEX_REGION = 0, 78 VDO_DATA_REGION = 1, 79 VDO_VOLUME_REGION_COUNT, 80 }; 81 82 struct volume_region { 83 /* The ID of the region */ 84 enum volume_region_id id; 85 /* 86 * The absolute starting offset on the device. The region continues until the next region 87 * begins. 88 */ 89 physical_block_number_t start_block; 90 } __packed; 91 92 struct volume_geometry { 93 /* For backwards compatibility */ 94 u32 unused; 95 /* The nonce of this volume */ 96 nonce_t nonce; 97 /* The uuid of this volume */ 98 uuid_t uuid; 99 /* The block offset to be applied to bios */ 100 block_count_t bio_offset; 101 /* The regions in ID order */ 102 struct volume_region regions[VDO_VOLUME_REGION_COUNT]; 103 /* The index config */ 104 struct index_config index_config; 105 } __packed; 106 107 /* This volume geometry struct is used for sizing only */ 108 struct volume_geometry_4_0 { 109 /* For backwards compatibility */ 110 u32 unused; 111 /* The nonce of this volume */ 112 nonce_t nonce; 113 /* The uuid of this volume */ 114 uuid_t uuid; 115 /* The regions in ID order */ 116 struct volume_region regions[VDO_VOLUME_REGION_COUNT]; 117 /* The index config */ 118 struct index_config index_config; 119 } __packed; 120 121 extern const u8 VDO_GEOMETRY_MAGIC_NUMBER[VDO_GEOMETRY_MAGIC_NUMBER_SIZE + 1]; 122 123 /** 124 * DOC: Block map entries 125 * 126 * The entry for each logical block in the block map is encoded into five bytes, which saves space 127 * in both the on-disk and in-memory layouts. It consists of the 36 low-order bits of a 128 * physical_block_number_t (addressing 256 terabytes with a 4KB block size) and a 4-bit encoding of 129 * a block_mapping_state. 130 * 131 * Of the 8 high bits of the 5-byte structure: 132 * 133 * Bits 7..4: The four highest bits of the 36-bit physical block number 134 * Bits 3..0: The 4-bit block_mapping_state 135 * 136 * The following 4 bytes are the low order bytes of the physical block number, in little-endian 137 * order. 138 * 139 * Conversion functions to and from a data location are provided. 140 */ 141 struct block_map_entry { 142 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 143 unsigned mapping_state : 4; 144 unsigned pbn_high_nibble : 4; 145 #else 146 unsigned pbn_high_nibble : 4; 147 unsigned mapping_state : 4; 148 #endif 149 150 __le32 pbn_low_word; 151 } __packed; 152 153 struct block_map_page_header { 154 __le64 nonce; 155 __le64 pbn; 156 157 /* May be non-zero on disk */ 158 u8 unused_long_word[8]; 159 160 /* Whether this page has been written twice to disk */ 161 bool initialized; 162 163 /* Always zero on disk */ 164 u8 unused_byte1; 165 166 /* May be non-zero on disk */ 167 u8 unused_byte2; 168 u8 unused_byte3; 169 } __packed; 170 171 struct block_map_page { 172 struct packed_version_number version; 173 struct block_map_page_header header; 174 struct block_map_entry entries[]; 175 } __packed; 176 177 enum block_map_page_validity { 178 VDO_BLOCK_MAP_PAGE_VALID, 179 VDO_BLOCK_MAP_PAGE_INVALID, 180 /* Valid page found in the wrong location on disk */ 181 VDO_BLOCK_MAP_PAGE_BAD, 182 }; 183 184 struct block_map_state_2_0 { 185 physical_block_number_t flat_page_origin; 186 block_count_t flat_page_count; 187 physical_block_number_t root_origin; 188 block_count_t root_count; 189 } __packed; 190 191 struct boundary { 192 page_number_t levels[VDO_BLOCK_MAP_TREE_HEIGHT]; 193 }; 194 195 extern const struct header VDO_BLOCK_MAP_HEADER_2_0; 196 197 /* The state of the recovery journal as encoded in the VDO super block. */ 198 struct recovery_journal_state_7_0 { 199 /* Sequence number to start the journal */ 200 sequence_number_t journal_start; 201 /* Number of logical blocks used by VDO */ 202 block_count_t logical_blocks_used; 203 /* Number of block map pages allocated */ 204 block_count_t block_map_data_blocks; 205 } __packed; 206 207 extern const struct header VDO_RECOVERY_JOURNAL_HEADER_7_0; 208 209 typedef u16 journal_entry_count_t; 210 211 /* 212 * A recovery journal entry stores three physical locations: a data location that is the value of a 213 * single mapping in the block map tree, and the two locations of the block map pages and slots 214 * that are acquiring and releasing a reference to the location. The journal entry also stores an 215 * operation code that says whether the mapping is for a logical block or for the block map tree 216 * itself. 217 */ 218 struct recovery_journal_entry { 219 struct block_map_slot slot; 220 struct data_location mapping; 221 struct data_location unmapping; 222 enum journal_operation operation; 223 }; 224 225 /* The packed, on-disk representation of a recovery journal entry. */ 226 struct packed_recovery_journal_entry { 227 /* 228 * In little-endian bit order: 229 * Bits 15..12: The four highest bits of the 36-bit physical block number of the block map 230 * tree page 231 * Bits 11..2: The 10-bit block map page slot number 232 * Bit 1..0: The journal_operation of the entry (this actually only requires 1 bit, but 233 * it is convenient to keep the extra bit as part of this field. 234 */ 235 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 236 unsigned operation : 2; 237 unsigned slot_low : 6; 238 unsigned slot_high : 4; 239 unsigned pbn_high_nibble : 4; 240 #else 241 unsigned slot_low : 6; 242 unsigned operation : 2; 243 unsigned pbn_high_nibble : 4; 244 unsigned slot_high : 4; 245 #endif 246 247 /* 248 * Bits 47..16: The 32 low-order bits of the block map page PBN, in little-endian byte 249 * order 250 */ 251 __le32 pbn_low_word; 252 253 /* 254 * Bits 87..48: The five-byte block map entry encoding the location that will be stored in 255 * the block map page slot 256 */ 257 struct block_map_entry mapping; 258 259 /* 260 * Bits 127..88: The five-byte block map entry encoding the location that was stored in the 261 * block map page slot 262 */ 263 struct block_map_entry unmapping; 264 } __packed; 265 266 /* The packed, on-disk representation of an old format recovery journal entry. */ 267 struct packed_recovery_journal_entry_1 { 268 /* 269 * In little-endian bit order: 270 * Bits 15..12: The four highest bits of the 36-bit physical block number of the block map 271 * tree page 272 * Bits 11..2: The 10-bit block map page slot number 273 * Bits 1..0: The 2-bit journal_operation of the entry 274 * 275 */ 276 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 277 unsigned operation : 2; 278 unsigned slot_low : 6; 279 unsigned slot_high : 4; 280 unsigned pbn_high_nibble : 4; 281 #else 282 unsigned slot_low : 6; 283 unsigned operation : 2; 284 unsigned pbn_high_nibble : 4; 285 unsigned slot_high : 4; 286 #endif 287 288 /* 289 * Bits 47..16: The 32 low-order bits of the block map page PBN, in little-endian byte 290 * order 291 */ 292 __le32 pbn_low_word; 293 294 /* 295 * Bits 87..48: The five-byte block map entry encoding the location that was or will be 296 * stored in the block map page slot 297 */ 298 struct block_map_entry block_map_entry; 299 } __packed; 300 301 enum journal_operation_1 { 302 VDO_JOURNAL_DATA_DECREMENT = 0, 303 VDO_JOURNAL_DATA_INCREMENT = 1, 304 VDO_JOURNAL_BLOCK_MAP_DECREMENT = 2, 305 VDO_JOURNAL_BLOCK_MAP_INCREMENT = 3, 306 } __packed; 307 308 struct recovery_block_header { 309 sequence_number_t block_map_head; /* Block map head sequence number */ 310 sequence_number_t slab_journal_head; /* Slab journal head seq. number */ 311 sequence_number_t sequence_number; /* Sequence number for this block */ 312 nonce_t nonce; /* A given VDO instance's nonce */ 313 block_count_t logical_blocks_used; /* Logical blocks in use */ 314 block_count_t block_map_data_blocks; /* Allocated block map pages */ 315 journal_entry_count_t entry_count; /* Number of entries written */ 316 u8 check_byte; /* The protection check byte */ 317 u8 recovery_count; /* Number of recoveries completed */ 318 enum vdo_metadata_type metadata_type; /* Metadata type */ 319 }; 320 321 /* 322 * The packed, on-disk representation of a recovery journal block header. All fields are kept in 323 * little-endian byte order. 324 */ 325 struct packed_journal_header { 326 /* Block map head 64-bit sequence number */ 327 __le64 block_map_head; 328 329 /* Slab journal head 64-bit sequence number */ 330 __le64 slab_journal_head; 331 332 /* The 64-bit sequence number for this block */ 333 __le64 sequence_number; 334 335 /* A given VDO instance's 64-bit nonce */ 336 __le64 nonce; 337 338 /* 8-bit metadata type (should always be one for the recovery journal) */ 339 u8 metadata_type; 340 341 /* 16-bit count of the entries encoded in the block */ 342 __le16 entry_count; 343 344 /* 64-bit count of the logical blocks used when this block was opened */ 345 __le64 logical_blocks_used; 346 347 /* 64-bit count of the block map blocks used when this block was opened */ 348 __le64 block_map_data_blocks; 349 350 /* The protection check byte */ 351 u8 check_byte; 352 353 /* The number of recoveries completed */ 354 u8 recovery_count; 355 } __packed; 356 357 struct packed_journal_sector { 358 /* The protection check byte */ 359 u8 check_byte; 360 361 /* The number of recoveries completed */ 362 u8 recovery_count; 363 364 /* The number of entries in this sector */ 365 u8 entry_count; 366 367 /* Journal entries for this sector */ 368 struct packed_recovery_journal_entry entries[]; 369 } __packed; 370 371 enum { 372 /* The number of entries in each sector (except the last) when filled */ 373 RECOVERY_JOURNAL_ENTRIES_PER_SECTOR = 374 ((VDO_SECTOR_SIZE - sizeof(struct packed_journal_sector)) / 375 sizeof(struct packed_recovery_journal_entry)), 376 RECOVERY_JOURNAL_ENTRIES_PER_BLOCK = RECOVERY_JOURNAL_ENTRIES_PER_SECTOR * 7, 377 /* The number of entries in a v1 recovery journal block. */ 378 RECOVERY_JOURNAL_1_ENTRIES_PER_BLOCK = 311, 379 /* The number of entries in each v1 sector (except the last) when filled */ 380 RECOVERY_JOURNAL_1_ENTRIES_PER_SECTOR = 381 ((VDO_SECTOR_SIZE - sizeof(struct packed_journal_sector)) / 382 sizeof(struct packed_recovery_journal_entry_1)), 383 /* The number of entries in the last sector when a block is full */ 384 RECOVERY_JOURNAL_1_ENTRIES_IN_LAST_SECTOR = 385 (RECOVERY_JOURNAL_1_ENTRIES_PER_BLOCK % RECOVERY_JOURNAL_1_ENTRIES_PER_SECTOR), 386 }; 387 388 /* A type representing a reference count of a block. */ 389 typedef u8 vdo_refcount_t; 390 391 /* The absolute position of an entry in a recovery journal or slab journal. */ 392 struct journal_point { 393 sequence_number_t sequence_number; 394 journal_entry_count_t entry_count; 395 }; 396 397 /* A packed, platform-independent encoding of a struct journal_point. */ 398 struct packed_journal_point { 399 /* 400 * The packed representation is the little-endian 64-bit representation of the low-order 48 401 * bits of the sequence number, shifted up 16 bits, or'ed with the 16-bit entry count. 402 * 403 * Very long-term, the top 16 bits of the sequence number may not always be zero, as this 404 * encoding assumes--see BZ 1523240. 405 */ 406 __le64 encoded_point; 407 } __packed; 408 409 /* Special vdo_refcount_t values. */ 410 #define EMPTY_REFERENCE_COUNT 0 411 enum { 412 MAXIMUM_REFERENCE_COUNT = 254, 413 PROVISIONAL_REFERENCE_COUNT = 255, 414 }; 415 416 enum { 417 COUNTS_PER_SECTOR = 418 ((VDO_SECTOR_SIZE - sizeof(struct packed_journal_point)) / sizeof(vdo_refcount_t)), 419 COUNTS_PER_BLOCK = COUNTS_PER_SECTOR * VDO_SECTORS_PER_BLOCK, 420 }; 421 422 /* The format of each sector of a reference_block on disk. */ 423 struct packed_reference_sector { 424 struct packed_journal_point commit_point; 425 vdo_refcount_t counts[COUNTS_PER_SECTOR]; 426 } __packed; 427 428 struct packed_reference_block { 429 struct packed_reference_sector sectors[VDO_SECTORS_PER_BLOCK]; 430 }; 431 432 struct slab_depot_state_2_0 { 433 struct slab_config slab_config; 434 physical_block_number_t first_block; 435 physical_block_number_t last_block; 436 zone_count_t zone_count; 437 } __packed; 438 439 extern const struct header VDO_SLAB_DEPOT_HEADER_2_0; 440 441 /* 442 * vdo_slab journal blocks may have one of two formats, depending upon whether or not any of the 443 * entries in the block are block map increments. Since the steady state for a VDO is that all of 444 * the necessary block map pages will be allocated, most slab journal blocks will have only data 445 * entries. Such blocks can hold more entries, hence the two formats. 446 */ 447 448 /* A single slab journal entry */ 449 struct slab_journal_entry { 450 slab_block_number sbn; 451 enum journal_operation operation; 452 bool increment; 453 }; 454 455 /* A single slab journal entry in its on-disk form */ 456 typedef struct { 457 u8 offset_low8; 458 u8 offset_mid8; 459 460 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 461 unsigned offset_high7 : 7; 462 unsigned increment : 1; 463 #else 464 unsigned increment : 1; 465 unsigned offset_high7 : 7; 466 #endif 467 } __packed packed_slab_journal_entry; 468 469 /* The unpacked representation of the header of a slab journal block */ 470 struct slab_journal_block_header { 471 /* Sequence number for head of journal */ 472 sequence_number_t head; 473 /* Sequence number for this block */ 474 sequence_number_t sequence_number; 475 /* The nonce for a given VDO instance */ 476 nonce_t nonce; 477 /* Recovery journal point for last entry */ 478 struct journal_point recovery_point; 479 /* Metadata type */ 480 enum vdo_metadata_type metadata_type; 481 /* Whether this block contains block map increments */ 482 bool has_block_map_increments; 483 /* The number of entries in the block */ 484 journal_entry_count_t entry_count; 485 }; 486 487 /* 488 * The packed, on-disk representation of a slab journal block header. All fields are kept in 489 * little-endian byte order. 490 */ 491 struct packed_slab_journal_block_header { 492 /* 64-bit sequence number for head of journal */ 493 __le64 head; 494 /* 64-bit sequence number for this block */ 495 __le64 sequence_number; 496 /* Recovery journal point for the last entry, packed into 64 bits */ 497 struct packed_journal_point recovery_point; 498 /* The 64-bit nonce for a given VDO instance */ 499 __le64 nonce; 500 /* 8-bit metadata type (should always be two, for the slab journal) */ 501 u8 metadata_type; 502 /* Whether this block contains block map increments */ 503 bool has_block_map_increments; 504 /* 16-bit count of the entries encoded in the block */ 505 __le16 entry_count; 506 } __packed; 507 508 enum { 509 VDO_SLAB_JOURNAL_PAYLOAD_SIZE = 510 VDO_BLOCK_SIZE - sizeof(struct packed_slab_journal_block_header), 511 VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK = (VDO_SLAB_JOURNAL_PAYLOAD_SIZE * 8) / 25, 512 VDO_SLAB_JOURNAL_ENTRY_TYPES_SIZE = 513 ((VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK - 1) / 8) + 1, 514 VDO_SLAB_JOURNAL_ENTRIES_PER_BLOCK = 515 (VDO_SLAB_JOURNAL_PAYLOAD_SIZE / sizeof(packed_slab_journal_entry)), 516 }; 517 518 /* The payload of a slab journal block which has block map increments */ 519 struct full_slab_journal_entries { 520 /* The entries themselves */ 521 packed_slab_journal_entry entries[VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK]; 522 /* The bit map indicating which entries are block map increments */ 523 u8 entry_types[VDO_SLAB_JOURNAL_ENTRY_TYPES_SIZE]; 524 } __packed; 525 526 typedef union { 527 /* Entries which include block map increments */ 528 struct full_slab_journal_entries full_entries; 529 /* Entries which are only data updates */ 530 packed_slab_journal_entry entries[VDO_SLAB_JOURNAL_ENTRIES_PER_BLOCK]; 531 /* Ensure the payload fills to the end of the block */ 532 u8 space[VDO_SLAB_JOURNAL_PAYLOAD_SIZE]; 533 } __packed slab_journal_payload; 534 535 struct packed_slab_journal_block { 536 struct packed_slab_journal_block_header header; 537 slab_journal_payload payload; 538 } __packed; 539 540 /* The offset of a slab journal tail block. */ 541 typedef u8 tail_block_offset_t; 542 543 struct slab_summary_entry { 544 /* Bits 7..0: The offset of the tail block within the slab journal */ 545 tail_block_offset_t tail_block_offset; 546 547 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 548 /* Bits 13..8: A hint about the fullness of the slab */ 549 unsigned int fullness_hint : 6; 550 /* Bit 14: Whether the ref_counts must be loaded from the layer */ 551 unsigned int load_ref_counts : 1; 552 /* Bit 15: The believed cleanliness of this slab */ 553 unsigned int is_dirty : 1; 554 #else 555 /* Bit 15: The believed cleanliness of this slab */ 556 unsigned int is_dirty : 1; 557 /* Bit 14: Whether the ref_counts must be loaded from the layer */ 558 unsigned int load_ref_counts : 1; 559 /* Bits 13..8: A hint about the fullness of the slab */ 560 unsigned int fullness_hint : 6; 561 #endif 562 } __packed; 563 564 enum { 565 VDO_SLAB_SUMMARY_FULLNESS_HINT_BITS = 6, 566 VDO_SLAB_SUMMARY_ENTRIES_PER_BLOCK = VDO_BLOCK_SIZE / sizeof(struct slab_summary_entry), 567 VDO_SLAB_SUMMARY_BLOCKS_PER_ZONE = MAX_VDO_SLABS / VDO_SLAB_SUMMARY_ENTRIES_PER_BLOCK, 568 VDO_SLAB_SUMMARY_BLOCKS = VDO_SLAB_SUMMARY_BLOCKS_PER_ZONE * MAX_VDO_PHYSICAL_ZONES, 569 }; 570 571 struct layout { 572 physical_block_number_t start; 573 block_count_t size; 574 physical_block_number_t first_free; 575 physical_block_number_t last_free; 576 size_t num_partitions; 577 struct partition *head; 578 }; 579 580 struct partition { 581 enum partition_id id; /* The id of this partition */ 582 physical_block_number_t offset; /* The offset into the layout of this partition */ 583 block_count_t count; /* The number of blocks in the partition */ 584 struct partition *next; /* A pointer to the next partition in the layout */ 585 }; 586 587 struct layout_3_0 { 588 physical_block_number_t first_free; 589 physical_block_number_t last_free; 590 u8 partition_count; 591 } __packed; 592 593 struct partition_3_0 { 594 enum partition_id id; 595 physical_block_number_t offset; 596 physical_block_number_t base; /* unused but retained for backwards compatibility */ 597 block_count_t count; 598 } __packed; 599 600 /* 601 * The configuration of the VDO service. 602 */ 603 struct vdo_config { 604 block_count_t logical_blocks; /* number of logical blocks */ 605 block_count_t physical_blocks; /* number of physical blocks */ 606 block_count_t slab_size; /* number of blocks in a slab */ 607 block_count_t recovery_journal_size; /* number of recovery journal blocks */ 608 block_count_t slab_journal_blocks; /* number of slab journal blocks */ 609 }; 610 611 /* This is the structure that captures the vdo fields saved as a super block component. */ 612 struct vdo_component { 613 enum vdo_state state; 614 u64 complete_recoveries; 615 u64 read_only_recoveries; 616 struct vdo_config config; 617 nonce_t nonce; 618 }; 619 620 /* 621 * A packed, machine-independent, on-disk representation of the vdo_config in the VDO component 622 * data in the super block. 623 */ 624 struct packed_vdo_config { 625 __le64 logical_blocks; 626 __le64 physical_blocks; 627 __le64 slab_size; 628 __le64 recovery_journal_size; 629 __le64 slab_journal_blocks; 630 } __packed; 631 632 /* 633 * A packed, machine-independent, on-disk representation of version 41.0 of the VDO component data 634 * in the super block. 635 */ 636 struct packed_vdo_component_41_0 { 637 __le32 state; 638 __le64 complete_recoveries; 639 __le64 read_only_recoveries; 640 struct packed_vdo_config config; 641 __le64 nonce; 642 } __packed; 643 644 /* 645 * The version of the on-disk format of a VDO volume. This should be incremented any time the 646 * on-disk representation of any VDO structure changes. Changes which require only online upgrade 647 * steps should increment the minor version. Changes which require an offline upgrade or which can 648 * not be upgraded to at all should increment the major version and set the minor version to 0. 649 */ 650 extern const struct version_number VDO_VOLUME_VERSION_67_0; 651 652 enum { 653 VDO_ENCODED_HEADER_SIZE = sizeof(struct packed_header), 654 BLOCK_MAP_COMPONENT_ENCODED_SIZE = 655 VDO_ENCODED_HEADER_SIZE + sizeof(struct block_map_state_2_0), 656 RECOVERY_JOURNAL_COMPONENT_ENCODED_SIZE = 657 VDO_ENCODED_HEADER_SIZE + sizeof(struct recovery_journal_state_7_0), 658 SLAB_DEPOT_COMPONENT_ENCODED_SIZE = 659 VDO_ENCODED_HEADER_SIZE + sizeof(struct slab_depot_state_2_0), 660 VDO_PARTITION_COUNT = 4, 661 VDO_LAYOUT_ENCODED_SIZE = (VDO_ENCODED_HEADER_SIZE + 662 sizeof(struct layout_3_0) + 663 (sizeof(struct partition_3_0) * VDO_PARTITION_COUNT)), 664 VDO_SUPER_BLOCK_FIXED_SIZE = VDO_ENCODED_HEADER_SIZE + sizeof(u32), 665 VDO_MAX_COMPONENT_DATA_SIZE = VDO_SECTOR_SIZE - VDO_SUPER_BLOCK_FIXED_SIZE, 666 VDO_COMPONENT_ENCODED_SIZE = 667 (sizeof(struct packed_version_number) + sizeof(struct packed_vdo_component_41_0)), 668 VDO_COMPONENT_DATA_OFFSET = VDO_ENCODED_HEADER_SIZE, 669 VDO_COMPONENT_DATA_SIZE = (sizeof(u32) + 670 sizeof(struct packed_version_number) + 671 VDO_COMPONENT_ENCODED_SIZE + 672 VDO_LAYOUT_ENCODED_SIZE + 673 RECOVERY_JOURNAL_COMPONENT_ENCODED_SIZE + 674 SLAB_DEPOT_COMPONENT_ENCODED_SIZE + 675 BLOCK_MAP_COMPONENT_ENCODED_SIZE), 676 }; 677 678 /* The entirety of the component data encoded in the VDO super block. */ 679 struct vdo_component_states { 680 /* For backwards compatibility */ 681 u32 unused; 682 683 /* The VDO volume version */ 684 struct version_number volume_version; 685 686 /* Components */ 687 struct vdo_component vdo; 688 struct block_map_state_2_0 block_map; 689 struct recovery_journal_state_7_0 recovery_journal; 690 struct slab_depot_state_2_0 slab_depot; 691 692 /* Our partitioning of the underlying storage */ 693 struct layout layout; 694 }; 695 696 /** 697 * vdo_are_same_version() - Check whether two version numbers are the same. 698 * @version_a: The first version. 699 * @version_b: The second version. 700 * 701 * Return: true if the two versions are the same. 702 */ 703 static inline bool vdo_are_same_version(struct version_number version_a, 704 struct version_number version_b) 705 { 706 return ((version_a.major_version == version_b.major_version) && 707 (version_a.minor_version == version_b.minor_version)); 708 } 709 710 /** 711 * vdo_pack_version_number() - Convert a version_number to its packed on-disk representation. 712 * @version: The version number to convert. 713 * 714 * Return: the platform-independent representation of the version 715 */ 716 static inline struct packed_version_number vdo_pack_version_number(struct version_number version) 717 { 718 return (struct packed_version_number) { 719 .major_version = __cpu_to_le32(version.major_version), 720 .minor_version = __cpu_to_le32(version.minor_version), 721 }; 722 } 723 724 /** 725 * vdo_unpack_version_number() - Convert a packed_version_number to its native in-memory 726 * representation. 727 * @version: The version number to convert. 728 * 729 * Return: The platform-independent representation of the version. 730 */ 731 static inline struct version_number vdo_unpack_version_number(struct packed_version_number version) 732 { 733 return (struct version_number) { 734 .major_version = __le32_to_cpu(version.major_version), 735 .minor_version = __le32_to_cpu(version.minor_version), 736 }; 737 } 738 739 /** 740 * vdo_pack_header() - Convert a component header to its packed on-disk representation. 741 * @header: The header to convert. 742 * 743 * Return: the platform-independent representation of the header 744 */ 745 static inline struct packed_header vdo_pack_header(const struct header *header) 746 { 747 return (struct packed_header) { 748 .id = __cpu_to_le32(header->id), 749 .version = vdo_pack_version_number(header->version), 750 .size = __cpu_to_le64(header->size), 751 }; 752 } 753 754 /** 755 * vdo_unpack_header() - Convert a packed_header to its native in-memory representation. 756 * @header: The header to convert. 757 * 758 * Return: The platform-independent representation of the version. 759 */ 760 static inline struct header vdo_unpack_header(const struct packed_header *header) 761 { 762 return (struct header) { 763 .id = __le32_to_cpu(header->id), 764 .version = vdo_unpack_version_number(header->version), 765 .size = __le64_to_cpu(header->size), 766 }; 767 } 768 769 /** 770 * vdo_get_index_region_start() - Get the start of the index region from a geometry. 771 * @geometry: The geometry. 772 * 773 * Return: The start of the index region. 774 */ 775 static inline physical_block_number_t __must_check 776 vdo_get_index_region_start(struct volume_geometry geometry) 777 { 778 return geometry.regions[VDO_INDEX_REGION].start_block; 779 } 780 781 /** 782 * vdo_get_data_region_start() - Get the start of the data region from a geometry. 783 * @geometry: The geometry. 784 * 785 * Return: The start of the data region. 786 */ 787 static inline physical_block_number_t __must_check 788 vdo_get_data_region_start(struct volume_geometry geometry) 789 { 790 return geometry.regions[VDO_DATA_REGION].start_block; 791 } 792 793 /** 794 * vdo_get_index_region_size() - Get the size of the index region from a geometry. 795 * @geometry: The geometry. 796 * 797 * Return: The size of the index region. 798 */ 799 static inline physical_block_number_t __must_check 800 vdo_get_index_region_size(struct volume_geometry geometry) 801 { 802 return vdo_get_data_region_start(geometry) - 803 vdo_get_index_region_start(geometry); 804 } 805 806 int __must_check vdo_parse_geometry_block(unsigned char *block, 807 struct volume_geometry *geometry); 808 809 static inline bool vdo_is_state_compressed(const enum block_mapping_state mapping_state) 810 { 811 return (mapping_state > VDO_MAPPING_STATE_UNCOMPRESSED); 812 } 813 814 static inline struct block_map_entry 815 vdo_pack_block_map_entry(physical_block_number_t pbn, enum block_mapping_state mapping_state) 816 { 817 return (struct block_map_entry) { 818 .mapping_state = (mapping_state & 0x0F), 819 .pbn_high_nibble = ((pbn >> 32) & 0x0F), 820 .pbn_low_word = __cpu_to_le32(pbn & UINT_MAX), 821 }; 822 } 823 824 static inline struct data_location vdo_unpack_block_map_entry(const struct block_map_entry *entry) 825 { 826 physical_block_number_t low32 = __le32_to_cpu(entry->pbn_low_word); 827 physical_block_number_t high4 = entry->pbn_high_nibble; 828 829 return (struct data_location) { 830 .pbn = ((high4 << 32) | low32), 831 .state = entry->mapping_state, 832 }; 833 } 834 835 static inline bool vdo_is_mapped_location(const struct data_location *location) 836 { 837 return (location->state != VDO_MAPPING_STATE_UNMAPPED); 838 } 839 840 static inline bool vdo_is_valid_location(const struct data_location *location) 841 { 842 if (location->pbn == VDO_ZERO_BLOCK) 843 return !vdo_is_state_compressed(location->state); 844 else 845 return vdo_is_mapped_location(location); 846 } 847 848 static inline physical_block_number_t __must_check 849 vdo_get_block_map_page_pbn(const struct block_map_page *page) 850 { 851 return __le64_to_cpu(page->header.pbn); 852 } 853 854 struct block_map_page *vdo_format_block_map_page(void *buffer, nonce_t nonce, 855 physical_block_number_t pbn, 856 bool initialized); 857 858 enum block_map_page_validity __must_check vdo_validate_block_map_page(struct block_map_page *page, 859 nonce_t nonce, 860 physical_block_number_t pbn); 861 862 static inline page_count_t vdo_compute_block_map_page_count(block_count_t entries) 863 { 864 return DIV_ROUND_UP(entries, VDO_BLOCK_MAP_ENTRIES_PER_PAGE); 865 } 866 867 block_count_t __must_check vdo_compute_new_forest_pages(root_count_t root_count, 868 struct boundary *old_sizes, 869 block_count_t entries, 870 struct boundary *new_sizes); 871 872 /** 873 * vdo_pack_recovery_journal_entry() - Return the packed, on-disk representation of a recovery 874 * journal entry. 875 * @entry: The journal entry to pack. 876 * 877 * Return: The packed representation of the journal entry. 878 */ 879 static inline struct packed_recovery_journal_entry 880 vdo_pack_recovery_journal_entry(const struct recovery_journal_entry *entry) 881 { 882 return (struct packed_recovery_journal_entry) { 883 .operation = entry->operation, 884 .slot_low = entry->slot.slot & 0x3F, 885 .slot_high = (entry->slot.slot >> 6) & 0x0F, 886 .pbn_high_nibble = (entry->slot.pbn >> 32) & 0x0F, 887 .pbn_low_word = __cpu_to_le32(entry->slot.pbn & UINT_MAX), 888 .mapping = vdo_pack_block_map_entry(entry->mapping.pbn, 889 entry->mapping.state), 890 .unmapping = vdo_pack_block_map_entry(entry->unmapping.pbn, 891 entry->unmapping.state), 892 }; 893 } 894 895 /** 896 * vdo_unpack_recovery_journal_entry() - Unpack the on-disk representation of a recovery journal 897 * entry. 898 * @entry: The recovery journal entry to unpack. 899 * 900 * Return: The unpacked entry. 901 */ 902 static inline struct recovery_journal_entry 903 vdo_unpack_recovery_journal_entry(const struct packed_recovery_journal_entry *entry) 904 { 905 physical_block_number_t low32 = __le32_to_cpu(entry->pbn_low_word); 906 physical_block_number_t high4 = entry->pbn_high_nibble; 907 908 return (struct recovery_journal_entry) { 909 .operation = entry->operation, 910 .slot = { 911 .pbn = ((high4 << 32) | low32), 912 .slot = (entry->slot_low | (entry->slot_high << 6)), 913 }, 914 .mapping = vdo_unpack_block_map_entry(&entry->mapping), 915 .unmapping = vdo_unpack_block_map_entry(&entry->unmapping), 916 }; 917 } 918 919 const char * __must_check vdo_get_journal_operation_name(enum journal_operation operation); 920 921 /** 922 * vdo_is_valid_recovery_journal_sector() - Determine whether the header of the given sector could 923 * describe a valid sector for the given journal block 924 * header. 925 * @header: The unpacked block header to compare against. 926 * @sector: The packed sector to check. 927 * @sector_number: The number of the sector being checked. 928 * 929 * Return: true if the sector matches the block header. 930 */ 931 static inline bool __must_check 932 vdo_is_valid_recovery_journal_sector(const struct recovery_block_header *header, 933 const struct packed_journal_sector *sector, 934 u8 sector_number) 935 { 936 if ((header->check_byte != sector->check_byte) || 937 (header->recovery_count != sector->recovery_count)) 938 return false; 939 940 if (header->metadata_type == VDO_METADATA_RECOVERY_JOURNAL_2) 941 return sector->entry_count <= RECOVERY_JOURNAL_ENTRIES_PER_SECTOR; 942 943 if (sector_number == 7) 944 return sector->entry_count <= RECOVERY_JOURNAL_1_ENTRIES_IN_LAST_SECTOR; 945 946 return sector->entry_count <= RECOVERY_JOURNAL_1_ENTRIES_PER_SECTOR; 947 } 948 949 /** 950 * vdo_compute_recovery_journal_block_number() - Compute the physical block number of the recovery 951 * journal block which would have a given sequence 952 * number. 953 * @journal_size: The size of the journal. 954 * @sequence_number: The sequence number. 955 * 956 * Return: The pbn of the journal block which would the specified sequence number. 957 */ 958 static inline physical_block_number_t __must_check 959 vdo_compute_recovery_journal_block_number(block_count_t journal_size, 960 sequence_number_t sequence_number) 961 { 962 /* 963 * Since journal size is a power of two, the block number modulus can just be extracted 964 * from the low-order bits of the sequence. 965 */ 966 return (sequence_number & (journal_size - 1)); 967 } 968 969 /** 970 * vdo_get_journal_block_sector() - Find the recovery journal sector from the block header and 971 * sector number. 972 * @header: The header of the recovery journal block. 973 * @sector_number: The index of the sector (1-based). 974 * 975 * Return: A packed recovery journal sector. 976 */ 977 static inline struct packed_journal_sector * __must_check 978 vdo_get_journal_block_sector(struct packed_journal_header *header, int sector_number) 979 { 980 char *sector_data = ((char *) header) + (VDO_SECTOR_SIZE * sector_number); 981 982 return (struct packed_journal_sector *) sector_data; 983 } 984 985 /** 986 * vdo_pack_recovery_block_header() - Generate the packed representation of a recovery block 987 * header. 988 * @header: The header containing the values to encode. 989 * @packed: The header into which to pack the values. 990 */ 991 static inline void vdo_pack_recovery_block_header(const struct recovery_block_header *header, 992 struct packed_journal_header *packed) 993 { 994 *packed = (struct packed_journal_header) { 995 .block_map_head = __cpu_to_le64(header->block_map_head), 996 .slab_journal_head = __cpu_to_le64(header->slab_journal_head), 997 .sequence_number = __cpu_to_le64(header->sequence_number), 998 .nonce = __cpu_to_le64(header->nonce), 999 .logical_blocks_used = __cpu_to_le64(header->logical_blocks_used), 1000 .block_map_data_blocks = __cpu_to_le64(header->block_map_data_blocks), 1001 .entry_count = __cpu_to_le16(header->entry_count), 1002 .check_byte = header->check_byte, 1003 .recovery_count = header->recovery_count, 1004 .metadata_type = header->metadata_type, 1005 }; 1006 } 1007 1008 /** 1009 * vdo_unpack_recovery_block_header() - Decode the packed representation of a recovery block 1010 * header. 1011 * @packed: The packed header to decode. 1012 * 1013 * Return: The unpacked header. 1014 */ 1015 static inline struct recovery_block_header 1016 vdo_unpack_recovery_block_header(const struct packed_journal_header *packed) 1017 { 1018 return (struct recovery_block_header) { 1019 .block_map_head = __le64_to_cpu(packed->block_map_head), 1020 .slab_journal_head = __le64_to_cpu(packed->slab_journal_head), 1021 .sequence_number = __le64_to_cpu(packed->sequence_number), 1022 .nonce = __le64_to_cpu(packed->nonce), 1023 .logical_blocks_used = __le64_to_cpu(packed->logical_blocks_used), 1024 .block_map_data_blocks = __le64_to_cpu(packed->block_map_data_blocks), 1025 .entry_count = __le16_to_cpu(packed->entry_count), 1026 .check_byte = packed->check_byte, 1027 .recovery_count = packed->recovery_count, 1028 .metadata_type = packed->metadata_type, 1029 }; 1030 } 1031 1032 /** 1033 * vdo_compute_slab_count() - Compute the number of slabs a depot with given parameters would have. 1034 * @first_block: PBN of the first data block. 1035 * @last_block: PBN of the last data block. 1036 * @slab_size_shift: Exponent for the number of blocks per slab. 1037 * 1038 * Return: The number of slabs. 1039 */ 1040 static inline slab_count_t vdo_compute_slab_count(physical_block_number_t first_block, 1041 physical_block_number_t last_block, 1042 unsigned int slab_size_shift) 1043 { 1044 return (slab_count_t) ((last_block - first_block) >> slab_size_shift); 1045 } 1046 1047 int __must_check vdo_configure_slab_depot(const struct partition *partition, 1048 struct slab_config slab_config, 1049 zone_count_t zone_count, 1050 struct slab_depot_state_2_0 *state); 1051 1052 int __must_check vdo_configure_slab(block_count_t slab_size, 1053 block_count_t slab_journal_blocks, 1054 struct slab_config *slab_config); 1055 1056 /** 1057 * vdo_get_saved_reference_count_size() - Get the number of blocks required to save a reference 1058 * counts state covering the specified number of data 1059 * blocks. 1060 * @block_count: The number of physical data blocks that can be referenced. 1061 * 1062 * Return: The number of blocks required to save reference counts with the given block count. 1063 */ 1064 static inline block_count_t vdo_get_saved_reference_count_size(block_count_t block_count) 1065 { 1066 return DIV_ROUND_UP(block_count, COUNTS_PER_BLOCK); 1067 } 1068 1069 /** 1070 * vdo_get_slab_journal_start_block() - Get the physical block number of the start of the slab 1071 * journal relative to the start block allocator partition. 1072 * @slab_config: The slab configuration of the VDO. 1073 * @origin: The first block of the slab. 1074 */ 1075 static inline physical_block_number_t __must_check 1076 vdo_get_slab_journal_start_block(const struct slab_config *slab_config, 1077 physical_block_number_t origin) 1078 { 1079 return origin + slab_config->data_blocks + slab_config->reference_count_blocks; 1080 } 1081 1082 /** 1083 * vdo_advance_journal_point() - Move the given journal point forward by one entry. 1084 * @point: The journal point to adjust. 1085 * @entries_per_block: The number of entries in one full block. 1086 */ 1087 static inline void vdo_advance_journal_point(struct journal_point *point, 1088 journal_entry_count_t entries_per_block) 1089 { 1090 point->entry_count++; 1091 if (point->entry_count == entries_per_block) { 1092 point->sequence_number++; 1093 point->entry_count = 0; 1094 } 1095 } 1096 1097 /** 1098 * vdo_before_journal_point() - Check whether the first point precedes the second point. 1099 * @first: The first journal point. 1100 * @second: The second journal point. 1101 * 1102 * Return: true if the first point precedes the second point. 1103 */ 1104 static inline bool vdo_before_journal_point(const struct journal_point *first, 1105 const struct journal_point *second) 1106 { 1107 return ((first->sequence_number < second->sequence_number) || 1108 ((first->sequence_number == second->sequence_number) && 1109 (first->entry_count < second->entry_count))); 1110 } 1111 1112 /** 1113 * vdo_pack_journal_point() - Encode the journal location represented by a 1114 * journal_point into a packed_journal_point. 1115 * @unpacked: The unpacked input point. 1116 * @packed: The packed output point. 1117 */ 1118 static inline void vdo_pack_journal_point(const struct journal_point *unpacked, 1119 struct packed_journal_point *packed) 1120 { 1121 packed->encoded_point = 1122 __cpu_to_le64((unpacked->sequence_number << 16) | unpacked->entry_count); 1123 } 1124 1125 /** 1126 * vdo_unpack_journal_point() - Decode the journal location represented by a packed_journal_point 1127 * into a journal_point. 1128 * @packed: The packed input point. 1129 * @unpacked: The unpacked output point. 1130 */ 1131 static inline void vdo_unpack_journal_point(const struct packed_journal_point *packed, 1132 struct journal_point *unpacked) 1133 { 1134 u64 native = __le64_to_cpu(packed->encoded_point); 1135 1136 unpacked->sequence_number = (native >> 16); 1137 unpacked->entry_count = (native & 0xffff); 1138 } 1139 1140 /** 1141 * vdo_pack_slab_journal_block_header() - Generate the packed representation of a slab block 1142 * header. 1143 * @header: The header containing the values to encode. 1144 * @packed: The header into which to pack the values. 1145 */ 1146 static inline void 1147 vdo_pack_slab_journal_block_header(const struct slab_journal_block_header *header, 1148 struct packed_slab_journal_block_header *packed) 1149 { 1150 packed->head = __cpu_to_le64(header->head); 1151 packed->sequence_number = __cpu_to_le64(header->sequence_number); 1152 packed->nonce = __cpu_to_le64(header->nonce); 1153 packed->entry_count = __cpu_to_le16(header->entry_count); 1154 packed->metadata_type = header->metadata_type; 1155 packed->has_block_map_increments = header->has_block_map_increments; 1156 1157 vdo_pack_journal_point(&header->recovery_point, &packed->recovery_point); 1158 } 1159 1160 /** 1161 * vdo_unpack_slab_journal_block_header() - Decode the packed representation of a slab block 1162 * header. 1163 * @packed: The packed header to decode. 1164 * @header: The header into which to unpack the values. 1165 */ 1166 static inline void 1167 vdo_unpack_slab_journal_block_header(const struct packed_slab_journal_block_header *packed, 1168 struct slab_journal_block_header *header) 1169 { 1170 *header = (struct slab_journal_block_header) { 1171 .head = __le64_to_cpu(packed->head), 1172 .sequence_number = __le64_to_cpu(packed->sequence_number), 1173 .nonce = __le64_to_cpu(packed->nonce), 1174 .entry_count = __le16_to_cpu(packed->entry_count), 1175 .metadata_type = packed->metadata_type, 1176 .has_block_map_increments = packed->has_block_map_increments, 1177 }; 1178 vdo_unpack_journal_point(&packed->recovery_point, &header->recovery_point); 1179 } 1180 1181 /** 1182 * vdo_pack_slab_journal_entry() - Generate the packed encoding of a slab journal entry. 1183 * @packed: The entry into which to pack the values. 1184 * @sbn: The slab block number of the entry to encode. 1185 * @is_increment: The increment flag. 1186 */ 1187 static inline void vdo_pack_slab_journal_entry(packed_slab_journal_entry *packed, 1188 slab_block_number sbn, bool is_increment) 1189 { 1190 packed->offset_low8 = (sbn & 0x0000FF); 1191 packed->offset_mid8 = (sbn & 0x00FF00) >> 8; 1192 packed->offset_high7 = (sbn & 0x7F0000) >> 16; 1193 packed->increment = is_increment ? 1 : 0; 1194 } 1195 1196 /** 1197 * vdo_unpack_slab_journal_entry() - Decode the packed representation of a slab journal entry. 1198 * @packed: The packed entry to decode. 1199 * 1200 * Return: The decoded slab journal entry. 1201 */ 1202 static inline struct slab_journal_entry __must_check 1203 vdo_unpack_slab_journal_entry(const packed_slab_journal_entry *packed) 1204 { 1205 struct slab_journal_entry entry; 1206 1207 entry.sbn = packed->offset_high7; 1208 entry.sbn <<= 8; 1209 entry.sbn |= packed->offset_mid8; 1210 entry.sbn <<= 8; 1211 entry.sbn |= packed->offset_low8; 1212 entry.operation = VDO_JOURNAL_DATA_REMAPPING; 1213 entry.increment = packed->increment; 1214 return entry; 1215 } 1216 1217 struct slab_journal_entry __must_check 1218 vdo_decode_slab_journal_entry(struct packed_slab_journal_block *block, 1219 journal_entry_count_t entry_count); 1220 1221 /** 1222 * vdo_get_slab_summary_hint_shift() - Compute the shift for slab summary hints. 1223 * @slab_size_shift: Exponent for the number of blocks per slab. 1224 * 1225 * Return: The hint shift. 1226 */ 1227 static inline u8 __must_check vdo_get_slab_summary_hint_shift(unsigned int slab_size_shift) 1228 { 1229 return ((slab_size_shift > VDO_SLAB_SUMMARY_FULLNESS_HINT_BITS) ? 1230 (slab_size_shift - VDO_SLAB_SUMMARY_FULLNESS_HINT_BITS) : 1231 0); 1232 } 1233 1234 int __must_check vdo_initialize_layout(block_count_t size, 1235 physical_block_number_t offset, 1236 block_count_t block_map_blocks, 1237 block_count_t journal_blocks, 1238 block_count_t summary_blocks, 1239 struct layout *layout); 1240 1241 void vdo_uninitialize_layout(struct layout *layout); 1242 1243 int __must_check vdo_get_partition(struct layout *layout, enum partition_id id, 1244 struct partition **partition_ptr); 1245 1246 struct partition * __must_check vdo_get_known_partition(struct layout *layout, 1247 enum partition_id id); 1248 1249 int vdo_validate_config(const struct vdo_config *config, 1250 block_count_t physical_block_count, 1251 block_count_t logical_block_count); 1252 1253 void vdo_destroy_component_states(struct vdo_component_states *states); 1254 1255 int __must_check vdo_decode_component_states(u8 *buffer, 1256 struct volume_geometry *geometry, 1257 struct vdo_component_states *states); 1258 1259 int __must_check vdo_validate_component_states(struct vdo_component_states *states, 1260 nonce_t geometry_nonce, 1261 block_count_t physical_size, 1262 block_count_t logical_size); 1263 1264 void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states); 1265 int __must_check vdo_decode_super_block(u8 *buffer); 1266 1267 /* We start with 0L and postcondition with ~0L to match our historical usage in userspace. */ 1268 static inline u32 vdo_crc32(const void *buf, unsigned long len) 1269 { 1270 return (crc32(0L, buf, len) ^ ~0L); 1271 } 1272 1273 #endif /* VDO_ENCODINGS_H */ 1274