1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright 2023 Red Hat
4 */
5
6 #include "encodings.h"
7
8 #include <linux/log2.h>
9
10 #include "logger.h"
11 #include "memory-alloc.h"
12 #include "permassert.h"
13
14 #include "constants.h"
15 #include "status-codes.h"
16 #include "types.h"
17
18 /** The maximum logical space is 4 petabytes, which is 1 terablock. */
19 static const block_count_t MAXIMUM_VDO_LOGICAL_BLOCKS = 1024ULL * 1024 * 1024 * 1024;
20
21 /** The maximum physical space is 256 terabytes, which is 64 gigablocks. */
22 static const block_count_t MAXIMUM_VDO_PHYSICAL_BLOCKS = 1024ULL * 1024 * 1024 * 64;
23
24 struct geometry_block {
25 char magic_number[VDO_GEOMETRY_MAGIC_NUMBER_SIZE];
26 struct packed_header header;
27 u32 checksum;
28 } __packed;
29
30 static const struct header GEOMETRY_BLOCK_HEADER_5_0 = {
31 .id = VDO_GEOMETRY_BLOCK,
32 .version = {
33 .major_version = 5,
34 .minor_version = 0,
35 },
36 /*
37 * Note: this size isn't just the payload size following the header, like it is everywhere
38 * else in VDO.
39 */
40 .size = sizeof(struct geometry_block) + sizeof(struct volume_geometry),
41 };
42
43 static const struct header GEOMETRY_BLOCK_HEADER_4_0 = {
44 .id = VDO_GEOMETRY_BLOCK,
45 .version = {
46 .major_version = 4,
47 .minor_version = 0,
48 },
49 /*
50 * Note: this size isn't just the payload size following the header, like it is everywhere
51 * else in VDO.
52 */
53 .size = sizeof(struct geometry_block) + sizeof(struct volume_geometry_4_0),
54 };
55
56 const u8 VDO_GEOMETRY_MAGIC_NUMBER[VDO_GEOMETRY_MAGIC_NUMBER_SIZE + 1] = "dmvdo001";
57
58 #define PAGE_HEADER_4_1_SIZE (8 + 8 + 8 + 1 + 1 + 1 + 1)
59
60 static const struct version_number BLOCK_MAP_4_1 = {
61 .major_version = 4,
62 .minor_version = 1,
63 };
64
65 const struct header VDO_BLOCK_MAP_HEADER_2_0 = {
66 .id = VDO_BLOCK_MAP,
67 .version = {
68 .major_version = 2,
69 .minor_version = 0,
70 },
71 .size = sizeof(struct block_map_state_2_0),
72 };
73
74 const struct header VDO_RECOVERY_JOURNAL_HEADER_7_0 = {
75 .id = VDO_RECOVERY_JOURNAL,
76 .version = {
77 .major_version = 7,
78 .minor_version = 0,
79 },
80 .size = sizeof(struct recovery_journal_state_7_0),
81 };
82
83 const struct header VDO_SLAB_DEPOT_HEADER_2_0 = {
84 .id = VDO_SLAB_DEPOT,
85 .version = {
86 .major_version = 2,
87 .minor_version = 0,
88 },
89 .size = sizeof(struct slab_depot_state_2_0),
90 };
91
92 static const struct header VDO_LAYOUT_HEADER_3_0 = {
93 .id = VDO_LAYOUT,
94 .version = {
95 .major_version = 3,
96 .minor_version = 0,
97 },
98 .size = sizeof(struct layout_3_0) + (sizeof(struct partition_3_0) * VDO_PARTITION_COUNT),
99 };
100
101 static const enum partition_id REQUIRED_PARTITIONS[] = {
102 VDO_BLOCK_MAP_PARTITION,
103 VDO_SLAB_DEPOT_PARTITION,
104 VDO_RECOVERY_JOURNAL_PARTITION,
105 VDO_SLAB_SUMMARY_PARTITION,
106 };
107
108 /*
109 * The current version for the data encoded in the super block. This must be changed any time there
110 * is a change to encoding of the component data of any VDO component.
111 */
112 static const struct version_number VDO_COMPONENT_DATA_41_0 = {
113 .major_version = 41,
114 .minor_version = 0,
115 };
116
117 const struct version_number VDO_VOLUME_VERSION_67_0 = {
118 .major_version = 67,
119 .minor_version = 0,
120 };
121
122 static const struct header SUPER_BLOCK_HEADER_12_0 = {
123 .id = VDO_SUPER_BLOCK,
124 .version = {
125 .major_version = 12,
126 .minor_version = 0,
127 },
128
129 /* This is the minimum size, if the super block contains no components. */
130 .size = VDO_SUPER_BLOCK_FIXED_SIZE - VDO_ENCODED_HEADER_SIZE,
131 };
132
133 /**
134 * validate_version() - Check whether a version matches an expected version.
135 * @expected_version: The expected version.
136 * @actual_version: The version being validated.
137 * @component_name: The name of the component or the calling function (for error logging).
138 *
139 * Logs an error describing a mismatch.
140 *
141 * Return: VDO_SUCCESS if the versions are the same,
142 * VDO_UNSUPPORTED_VERSION if the versions don't match.
143 */
validate_version(struct version_number expected_version,struct version_number actual_version,const char * component_name)144 static int __must_check validate_version(struct version_number expected_version,
145 struct version_number actual_version,
146 const char *component_name)
147 {
148 if (!vdo_are_same_version(expected_version, actual_version)) {
149 return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
150 "%s version mismatch, expected %d.%d, got %d.%d",
151 component_name,
152 expected_version.major_version,
153 expected_version.minor_version,
154 actual_version.major_version,
155 actual_version.minor_version);
156 }
157
158 return VDO_SUCCESS;
159 }
160
161 /**
162 * vdo_validate_header() - Check whether a header matches expectations.
163 * @expected_header: The expected header.
164 * @actual_header: The header being validated.
165 * @exact_size: If true, the size fields of the two headers must be the same, otherwise it is
166 * required that actual_header.size >= expected_header.size.
167 * @name: The name of the component or the calling function (for error logging).
168 *
169 * Logs an error describing the first mismatch found.
170 *
171 * Return: VDO_SUCCESS if the header meets expectations,
172 * VDO_INCORRECT_COMPONENT if the component ids don't match,
173 * VDO_UNSUPPORTED_VERSION if the versions or sizes don't match.
174 */
vdo_validate_header(const struct header * expected_header,const struct header * actual_header,bool exact_size,const char * name)175 int vdo_validate_header(const struct header *expected_header,
176 const struct header *actual_header, bool exact_size,
177 const char *name)
178 {
179 int result;
180
181 if (expected_header->id != actual_header->id) {
182 return vdo_log_error_strerror(VDO_INCORRECT_COMPONENT,
183 "%s ID mismatch, expected %d, got %d",
184 name, expected_header->id,
185 actual_header->id);
186 }
187
188 result = validate_version(expected_header->version, actual_header->version,
189 name);
190 if (result != VDO_SUCCESS)
191 return result;
192
193 if ((expected_header->size > actual_header->size) ||
194 (exact_size && (expected_header->size < actual_header->size))) {
195 return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
196 "%s size mismatch, expected %zu, got %zu",
197 name, expected_header->size,
198 actual_header->size);
199 }
200
201 return VDO_SUCCESS;
202 }
203
encode_version_number(u8 * buffer,size_t * offset,struct version_number version)204 static void encode_version_number(u8 *buffer, size_t *offset,
205 struct version_number version)
206 {
207 struct packed_version_number packed = vdo_pack_version_number(version);
208
209 memcpy(buffer + *offset, &packed, sizeof(packed));
210 *offset += sizeof(packed);
211 }
212
vdo_encode_header(u8 * buffer,size_t * offset,const struct header * header)213 void vdo_encode_header(u8 *buffer, size_t *offset, const struct header *header)
214 {
215 struct packed_header packed = vdo_pack_header(header);
216
217 memcpy(buffer + *offset, &packed, sizeof(packed));
218 *offset += sizeof(packed);
219 }
220
decode_version_number(u8 * buffer,size_t * offset,struct version_number * version)221 static void decode_version_number(u8 *buffer, size_t *offset,
222 struct version_number *version)
223 {
224 struct packed_version_number packed;
225
226 memcpy(&packed, buffer + *offset, sizeof(packed));
227 *offset += sizeof(packed);
228 *version = vdo_unpack_version_number(packed);
229 }
230
vdo_decode_header(u8 * buffer,size_t * offset,struct header * header)231 void vdo_decode_header(u8 *buffer, size_t *offset, struct header *header)
232 {
233 struct packed_header packed;
234
235 memcpy(&packed, buffer + *offset, sizeof(packed));
236 *offset += sizeof(packed);
237
238 *header = vdo_unpack_header(&packed);
239 }
240
241 /**
242 * decode_volume_geometry() - Decode the on-disk representation of a volume geometry from a buffer.
243 * @buffer: A buffer to decode from.
244 * @offset: The offset in the buffer at which to decode.
245 * @geometry: The structure to receive the decoded fields.
246 * @version: The geometry block version to decode.
247 */
decode_volume_geometry(u8 * buffer,size_t * offset,struct volume_geometry * geometry,u32 version)248 static void decode_volume_geometry(u8 *buffer, size_t *offset,
249 struct volume_geometry *geometry, u32 version)
250 {
251 u32 unused, mem;
252 enum volume_region_id id;
253 nonce_t nonce;
254 block_count_t bio_offset = 0;
255 bool sparse;
256
257 /* This is for backwards compatibility. */
258 decode_u32_le(buffer, offset, &unused);
259 geometry->unused = unused;
260
261 decode_u64_le(buffer, offset, &nonce);
262 geometry->nonce = nonce;
263
264 memcpy((unsigned char *) &geometry->uuid, buffer + *offset, sizeof(uuid_t));
265 *offset += sizeof(uuid_t);
266
267 if (version > 4)
268 decode_u64_le(buffer, offset, &bio_offset);
269 geometry->bio_offset = bio_offset;
270
271 for (id = 0; id < VDO_VOLUME_REGION_COUNT; id++) {
272 physical_block_number_t start_block;
273 enum volume_region_id saved_id;
274
275 decode_u32_le(buffer, offset, &saved_id);
276 decode_u64_le(buffer, offset, &start_block);
277
278 geometry->regions[id] = (struct volume_region) {
279 .id = saved_id,
280 .start_block = start_block,
281 };
282 }
283
284 decode_u32_le(buffer, offset, &mem);
285 *offset += sizeof(u32);
286 sparse = buffer[(*offset)++];
287
288 geometry->index_config = (struct index_config) {
289 .mem = mem,
290 .sparse = sparse,
291 };
292 }
293
294 /**
295 * vdo_parse_geometry_block() - Decode and validate an encoded geometry block.
296 * @block: The encoded geometry block.
297 * @geometry: The structure to receive the decoded fields.
298 */
vdo_parse_geometry_block(u8 * block,struct volume_geometry * geometry)299 int __must_check vdo_parse_geometry_block(u8 *block, struct volume_geometry *geometry)
300 {
301 u32 checksum, saved_checksum;
302 struct header header;
303 size_t offset = 0;
304 int result;
305
306 if (memcmp(block, VDO_GEOMETRY_MAGIC_NUMBER, VDO_GEOMETRY_MAGIC_NUMBER_SIZE) != 0)
307 return VDO_BAD_MAGIC;
308 offset += VDO_GEOMETRY_MAGIC_NUMBER_SIZE;
309
310 vdo_decode_header(block, &offset, &header);
311 if (header.version.major_version <= 4) {
312 result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_4_0, &header,
313 true, __func__);
314 } else {
315 result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_5_0, &header,
316 true, __func__);
317 }
318 if (result != VDO_SUCCESS)
319 return result;
320
321 decode_volume_geometry(block, &offset, geometry, header.version.major_version);
322
323 result = VDO_ASSERT(header.size == offset + sizeof(u32),
324 "should have decoded up to the geometry checksum");
325 if (result != VDO_SUCCESS)
326 return result;
327
328 /* Decode and verify the checksum. */
329 checksum = vdo_crc32(block, offset);
330 decode_u32_le(block, &offset, &saved_checksum);
331
332 return ((checksum == saved_checksum) ? VDO_SUCCESS : VDO_CHECKSUM_MISMATCH);
333 }
334
vdo_format_block_map_page(void * buffer,nonce_t nonce,physical_block_number_t pbn,bool initialized)335 struct block_map_page *vdo_format_block_map_page(void *buffer, nonce_t nonce,
336 physical_block_number_t pbn,
337 bool initialized)
338 {
339 struct block_map_page *page = buffer;
340
341 memset(buffer, 0, VDO_BLOCK_SIZE);
342 page->version = vdo_pack_version_number(BLOCK_MAP_4_1);
343 page->header.nonce = __cpu_to_le64(nonce);
344 page->header.pbn = __cpu_to_le64(pbn);
345 page->header.initialized = initialized;
346 return page;
347 }
348
vdo_validate_block_map_page(struct block_map_page * page,nonce_t nonce,physical_block_number_t pbn)349 enum block_map_page_validity vdo_validate_block_map_page(struct block_map_page *page,
350 nonce_t nonce,
351 physical_block_number_t pbn)
352 {
353 BUILD_BUG_ON(sizeof(struct block_map_page_header) != PAGE_HEADER_4_1_SIZE);
354
355 if (!vdo_are_same_version(BLOCK_MAP_4_1,
356 vdo_unpack_version_number(page->version)) ||
357 !page->header.initialized || (nonce != __le64_to_cpu(page->header.nonce)))
358 return VDO_BLOCK_MAP_PAGE_INVALID;
359
360 if (pbn != vdo_get_block_map_page_pbn(page))
361 return VDO_BLOCK_MAP_PAGE_BAD;
362
363 return VDO_BLOCK_MAP_PAGE_VALID;
364 }
365
decode_block_map_state_2_0(u8 * buffer,size_t * offset,struct block_map_state_2_0 * state)366 static int decode_block_map_state_2_0(u8 *buffer, size_t *offset,
367 struct block_map_state_2_0 *state)
368 {
369 size_t initial_offset;
370 block_count_t flat_page_count, root_count;
371 physical_block_number_t flat_page_origin, root_origin;
372 struct header header;
373 int result;
374
375 vdo_decode_header(buffer, offset, &header);
376 result = vdo_validate_header(&VDO_BLOCK_MAP_HEADER_2_0, &header, true, __func__);
377 if (result != VDO_SUCCESS)
378 return result;
379
380 initial_offset = *offset;
381
382 decode_u64_le(buffer, offset, &flat_page_origin);
383 result = VDO_ASSERT(flat_page_origin == VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
384 "Flat page origin must be %u (recorded as %llu)",
385 VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
386 (unsigned long long) state->flat_page_origin);
387 if (result != VDO_SUCCESS)
388 return result;
389
390 decode_u64_le(buffer, offset, &flat_page_count);
391 result = VDO_ASSERT(flat_page_count == 0,
392 "Flat page count must be 0 (recorded as %llu)",
393 (unsigned long long) state->flat_page_count);
394 if (result != VDO_SUCCESS)
395 return result;
396
397 decode_u64_le(buffer, offset, &root_origin);
398 decode_u64_le(buffer, offset, &root_count);
399
400 result = VDO_ASSERT(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
401 "decoded block map component size must match header size");
402 if (result != VDO_SUCCESS)
403 return result;
404
405 *state = (struct block_map_state_2_0) {
406 .flat_page_origin = flat_page_origin,
407 .flat_page_count = flat_page_count,
408 .root_origin = root_origin,
409 .root_count = root_count,
410 };
411
412 return VDO_SUCCESS;
413 }
414
encode_block_map_state_2_0(u8 * buffer,size_t * offset,struct block_map_state_2_0 state)415 static void encode_block_map_state_2_0(u8 *buffer, size_t *offset,
416 struct block_map_state_2_0 state)
417 {
418 size_t initial_offset;
419
420 vdo_encode_header(buffer, offset, &VDO_BLOCK_MAP_HEADER_2_0);
421
422 initial_offset = *offset;
423 encode_u64_le(buffer, offset, state.flat_page_origin);
424 encode_u64_le(buffer, offset, state.flat_page_count);
425 encode_u64_le(buffer, offset, state.root_origin);
426 encode_u64_le(buffer, offset, state.root_count);
427
428 VDO_ASSERT_LOG_ONLY(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
429 "encoded block map component size must match header size");
430 }
431
432 /**
433 * vdo_compute_new_forest_pages() - Compute the number of pages which must be allocated at each
434 * level in order to grow the forest to a new number of entries.
435 * @root_count: The number of block map roots.
436 * @old_sizes: The sizes of the old tree segments.
437 * @entries: The new number of entries the block map must address.
438 * @new_sizes: The sizes of the new tree segments.
439 *
440 * Return: The total number of non-leaf pages required.
441 */
vdo_compute_new_forest_pages(root_count_t root_count,struct boundary * old_sizes,block_count_t entries,struct boundary * new_sizes)442 block_count_t vdo_compute_new_forest_pages(root_count_t root_count,
443 struct boundary *old_sizes,
444 block_count_t entries,
445 struct boundary *new_sizes)
446 {
447 page_count_t leaf_pages = max(vdo_compute_block_map_page_count(entries), 1U);
448 page_count_t level_size = DIV_ROUND_UP(leaf_pages, root_count);
449 block_count_t total_pages = 0;
450 height_t height;
451
452 for (height = 0; height < VDO_BLOCK_MAP_TREE_HEIGHT; height++) {
453 block_count_t new_pages;
454
455 level_size = DIV_ROUND_UP(level_size, VDO_BLOCK_MAP_ENTRIES_PER_PAGE);
456 new_sizes->levels[height] = level_size;
457 new_pages = level_size;
458 if (old_sizes != NULL)
459 new_pages -= old_sizes->levels[height];
460 total_pages += (new_pages * root_count);
461 }
462
463 return total_pages;
464 }
465
466 /**
467 * encode_recovery_journal_state_7_0() - Encode the state of a recovery journal.
468 * @buffer: A buffer to store the encoding.
469 * @offset: The offset in the buffer at which to encode.
470 * @state: The recovery journal state to encode.
471 *
472 * Return: VDO_SUCCESS or an error code.
473 */
encode_recovery_journal_state_7_0(u8 * buffer,size_t * offset,struct recovery_journal_state_7_0 state)474 static void encode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
475 struct recovery_journal_state_7_0 state)
476 {
477 size_t initial_offset;
478
479 vdo_encode_header(buffer, offset, &VDO_RECOVERY_JOURNAL_HEADER_7_0);
480
481 initial_offset = *offset;
482 encode_u64_le(buffer, offset, state.journal_start);
483 encode_u64_le(buffer, offset, state.logical_blocks_used);
484 encode_u64_le(buffer, offset, state.block_map_data_blocks);
485
486 VDO_ASSERT_LOG_ONLY(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
487 "encoded recovery journal component size must match header size");
488 }
489
490 /**
491 * decode_recovery_journal_state_7_0() - Decode the state of a recovery journal saved in a buffer.
492 * @buffer: The buffer containing the saved state.
493 * @offset: The offset to start decoding from.
494 * @state: A pointer to a recovery journal state to hold the result of a successful decode.
495 *
496 * Return: VDO_SUCCESS or an error code.
497 */
decode_recovery_journal_state_7_0(u8 * buffer,size_t * offset,struct recovery_journal_state_7_0 * state)498 static int __must_check decode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
499 struct recovery_journal_state_7_0 *state)
500 {
501 struct header header;
502 int result;
503 size_t initial_offset;
504 sequence_number_t journal_start;
505 block_count_t logical_blocks_used, block_map_data_blocks;
506
507 vdo_decode_header(buffer, offset, &header);
508 result = vdo_validate_header(&VDO_RECOVERY_JOURNAL_HEADER_7_0, &header, true,
509 __func__);
510 if (result != VDO_SUCCESS)
511 return result;
512
513 initial_offset = *offset;
514 decode_u64_le(buffer, offset, &journal_start);
515 decode_u64_le(buffer, offset, &logical_blocks_used);
516 decode_u64_le(buffer, offset, &block_map_data_blocks);
517
518 result = VDO_ASSERT(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
519 "decoded recovery journal component size must match header size");
520 if (result != VDO_SUCCESS)
521 return result;
522
523 *state = (struct recovery_journal_state_7_0) {
524 .journal_start = journal_start,
525 .logical_blocks_used = logical_blocks_used,
526 .block_map_data_blocks = block_map_data_blocks,
527 };
528
529 return VDO_SUCCESS;
530 }
531
532 /**
533 * vdo_get_journal_operation_name() - Get the name of a journal operation.
534 * @operation: The operation to name.
535 *
536 * Return: The name of the operation.
537 */
vdo_get_journal_operation_name(enum journal_operation operation)538 const char *vdo_get_journal_operation_name(enum journal_operation operation)
539 {
540 switch (operation) {
541 case VDO_JOURNAL_DATA_REMAPPING:
542 return "data remapping";
543
544 case VDO_JOURNAL_BLOCK_MAP_REMAPPING:
545 return "block map remapping";
546
547 default:
548 return "unknown journal operation";
549 }
550 }
551
552 /**
553 * encode_slab_depot_state_2_0() - Encode the state of a slab depot into a buffer.
554 * @buffer: A buffer to store the encoding.
555 * @offset: The offset in the buffer at which to encode.
556 * @state: The slab depot state to encode.
557 */
encode_slab_depot_state_2_0(u8 * buffer,size_t * offset,struct slab_depot_state_2_0 state)558 static void encode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
559 struct slab_depot_state_2_0 state)
560 {
561 size_t initial_offset;
562
563 vdo_encode_header(buffer, offset, &VDO_SLAB_DEPOT_HEADER_2_0);
564
565 initial_offset = *offset;
566 encode_u64_le(buffer, offset, state.slab_config.slab_blocks);
567 encode_u64_le(buffer, offset, state.slab_config.data_blocks);
568 encode_u64_le(buffer, offset, state.slab_config.reference_count_blocks);
569 encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocks);
570 encode_u64_le(buffer, offset, state.slab_config.slab_journal_flushing_threshold);
571 encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocking_threshold);
572 encode_u64_le(buffer, offset, state.slab_config.slab_journal_scrubbing_threshold);
573 encode_u64_le(buffer, offset, state.first_block);
574 encode_u64_le(buffer, offset, state.last_block);
575 buffer[(*offset)++] = state.zone_count;
576
577 VDO_ASSERT_LOG_ONLY(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
578 "encoded block map component size must match header size");
579 }
580
581 /**
582 * decode_slab_depot_state_2_0() - Decode slab depot component state version 2.0 from a buffer.
583 * @buffer: The buffer being decoded.
584 * @offset: The offset to start decoding from.
585 * @state: A pointer to a slab depot state to hold the decoded result.
586 *
587 * Return: VDO_SUCCESS or an error code.
588 */
decode_slab_depot_state_2_0(u8 * buffer,size_t * offset,struct slab_depot_state_2_0 * state)589 static int decode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
590 struct slab_depot_state_2_0 *state)
591 {
592 struct header header;
593 int result;
594 size_t initial_offset;
595 struct slab_config slab_config;
596 block_count_t count;
597 physical_block_number_t first_block, last_block;
598 zone_count_t zone_count;
599
600 vdo_decode_header(buffer, offset, &header);
601 result = vdo_validate_header(&VDO_SLAB_DEPOT_HEADER_2_0, &header, true,
602 __func__);
603 if (result != VDO_SUCCESS)
604 return result;
605
606 initial_offset = *offset;
607 decode_u64_le(buffer, offset, &count);
608 slab_config.slab_blocks = count;
609
610 decode_u64_le(buffer, offset, &count);
611 slab_config.data_blocks = count;
612
613 decode_u64_le(buffer, offset, &count);
614 slab_config.reference_count_blocks = count;
615
616 decode_u64_le(buffer, offset, &count);
617 slab_config.slab_journal_blocks = count;
618
619 decode_u64_le(buffer, offset, &count);
620 slab_config.slab_journal_flushing_threshold = count;
621
622 decode_u64_le(buffer, offset, &count);
623 slab_config.slab_journal_blocking_threshold = count;
624
625 decode_u64_le(buffer, offset, &count);
626 slab_config.slab_journal_scrubbing_threshold = count;
627
628 decode_u64_le(buffer, offset, &first_block);
629 decode_u64_le(buffer, offset, &last_block);
630 zone_count = buffer[(*offset)++];
631
632 result = VDO_ASSERT(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
633 "decoded slab depot component size must match header size");
634 if (result != VDO_SUCCESS)
635 return result;
636
637 *state = (struct slab_depot_state_2_0) {
638 .slab_config = slab_config,
639 .first_block = first_block,
640 .last_block = last_block,
641 .zone_count = zone_count,
642 };
643
644 return VDO_SUCCESS;
645 }
646
647 /**
648 * vdo_configure_slab_depot() - Configure the slab depot.
649 * @partition: The slab depot partition
650 * @slab_config: The configuration of a single slab.
651 * @zone_count: The number of zones the depot will use.
652 * @state: The state structure to be configured.
653 *
654 * Configures the slab_depot for the specified storage capacity, finding the number of data blocks
655 * that will fit and still leave room for the depot metadata, then return the saved state for that
656 * configuration.
657 *
658 * Return: VDO_SUCCESS or an error code.
659 */
vdo_configure_slab_depot(const struct partition * partition,struct slab_config slab_config,zone_count_t zone_count,struct slab_depot_state_2_0 * state)660 int vdo_configure_slab_depot(const struct partition *partition,
661 struct slab_config slab_config, zone_count_t zone_count,
662 struct slab_depot_state_2_0 *state)
663 {
664 block_count_t total_slab_blocks, total_data_blocks;
665 size_t slab_count;
666 physical_block_number_t last_block;
667 block_count_t slab_size = slab_config.slab_blocks;
668
669 vdo_log_debug("slabDepot %s(block_count=%llu, first_block=%llu, slab_size=%llu, zone_count=%u)",
670 __func__, (unsigned long long) partition->count,
671 (unsigned long long) partition->offset,
672 (unsigned long long) slab_size, zone_count);
673
674 /* We do not allow runt slabs, so we waste up to a slab's worth. */
675 slab_count = (partition->count / slab_size);
676 if (slab_count == 0)
677 return VDO_NO_SPACE;
678
679 if (slab_count > MAX_VDO_SLABS)
680 return VDO_TOO_MANY_SLABS;
681
682 total_slab_blocks = slab_count * slab_config.slab_blocks;
683 total_data_blocks = slab_count * slab_config.data_blocks;
684 last_block = partition->offset + total_slab_blocks;
685
686 *state = (struct slab_depot_state_2_0) {
687 .slab_config = slab_config,
688 .first_block = partition->offset,
689 .last_block = last_block,
690 .zone_count = zone_count,
691 };
692
693 vdo_log_debug("slab_depot last_block=%llu, total_data_blocks=%llu, slab_count=%zu, left_over=%llu",
694 (unsigned long long) last_block,
695 (unsigned long long) total_data_blocks, slab_count,
696 (unsigned long long) (partition->count - (last_block - partition->offset)));
697
698 return VDO_SUCCESS;
699 }
700
701 /**
702 * vdo_configure_slab() - Measure and initialize the configuration to use for each slab.
703 * @slab_size: The number of blocks per slab.
704 * @slab_journal_blocks: The number of blocks for the slab journal.
705 * @slab_config: The slab configuration to initialize.
706 *
707 * Return: VDO_SUCCESS or an error code.
708 */
vdo_configure_slab(block_count_t slab_size,block_count_t slab_journal_blocks,struct slab_config * slab_config)709 int vdo_configure_slab(block_count_t slab_size, block_count_t slab_journal_blocks,
710 struct slab_config *slab_config)
711 {
712 block_count_t ref_blocks, meta_blocks, data_blocks;
713 block_count_t flushing_threshold, remaining, blocking_threshold;
714 block_count_t minimal_extra_space, scrubbing_threshold;
715
716 if (slab_journal_blocks >= slab_size)
717 return VDO_BAD_CONFIGURATION;
718
719 /*
720 * This calculation should technically be a recurrence, but the total number of metadata
721 * blocks is currently less than a single block of ref_counts, so we'd gain at most one
722 * data block in each slab with more iteration.
723 */
724 ref_blocks = vdo_get_saved_reference_count_size(slab_size - slab_journal_blocks);
725 meta_blocks = (ref_blocks + slab_journal_blocks);
726
727 /* Make sure configured slabs are not too small. */
728 if (meta_blocks >= slab_size)
729 return VDO_BAD_CONFIGURATION;
730
731 data_blocks = slab_size - meta_blocks;
732
733 /*
734 * Configure the slab journal thresholds. The flush threshold is 168 of 224 blocks in
735 * production, or 3/4ths, so we use this ratio for all sizes.
736 */
737 flushing_threshold = ((slab_journal_blocks * 3) + 3) / 4;
738 /*
739 * The blocking threshold should be far enough from the flushing threshold to not produce
740 * delays, but far enough from the end of the journal to allow multiple successive recovery
741 * failures.
742 */
743 remaining = slab_journal_blocks - flushing_threshold;
744 blocking_threshold = flushing_threshold + ((remaining * 5) / 7);
745 /* The scrubbing threshold should be at least 2048 entries before the end of the journal. */
746 minimal_extra_space = 1 + (MAXIMUM_VDO_USER_VIOS / VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK);
747 scrubbing_threshold = blocking_threshold;
748 if (slab_journal_blocks > minimal_extra_space)
749 scrubbing_threshold = slab_journal_blocks - minimal_extra_space;
750 if (blocking_threshold > scrubbing_threshold)
751 blocking_threshold = scrubbing_threshold;
752
753 *slab_config = (struct slab_config) {
754 .slab_blocks = slab_size,
755 .data_blocks = data_blocks,
756 .reference_count_blocks = ref_blocks,
757 .slab_journal_blocks = slab_journal_blocks,
758 .slab_journal_flushing_threshold = flushing_threshold,
759 .slab_journal_blocking_threshold = blocking_threshold,
760 .slab_journal_scrubbing_threshold = scrubbing_threshold};
761 return VDO_SUCCESS;
762 }
763
764 /**
765 * vdo_decode_slab_journal_entry() - Decode a slab journal entry.
766 * @block: The journal block holding the entry.
767 * @entry_count: The number of the entry.
768 *
769 * Return: The decoded entry.
770 */
vdo_decode_slab_journal_entry(struct packed_slab_journal_block * block,journal_entry_count_t entry_count)771 struct slab_journal_entry vdo_decode_slab_journal_entry(struct packed_slab_journal_block *block,
772 journal_entry_count_t entry_count)
773 {
774 struct slab_journal_entry entry =
775 vdo_unpack_slab_journal_entry(&block->payload.entries[entry_count]);
776
777 if (block->header.has_block_map_increments &&
778 ((block->payload.full_entries.entry_types[entry_count / 8] &
779 ((u8) 1 << (entry_count % 8))) != 0))
780 entry.operation = VDO_JOURNAL_BLOCK_MAP_REMAPPING;
781
782 return entry;
783 }
784
785 /**
786 * allocate_partition() - Allocate a partition and add it to a layout.
787 * @layout: The layout containing the partition.
788 * @id: The id of the partition.
789 * @offset: The offset into the layout at which the partition begins.
790 * @size: The size of the partition in blocks.
791 *
792 * Return: VDO_SUCCESS or an error.
793 */
allocate_partition(struct layout * layout,u8 id,physical_block_number_t offset,block_count_t size)794 static int allocate_partition(struct layout *layout, u8 id,
795 physical_block_number_t offset, block_count_t size)
796 {
797 struct partition *partition;
798 int result;
799
800 result = vdo_allocate(1, struct partition, __func__, &partition);
801 if (result != VDO_SUCCESS)
802 return result;
803
804 partition->id = id;
805 partition->offset = offset;
806 partition->count = size;
807 partition->next = layout->head;
808 layout->head = partition;
809
810 return VDO_SUCCESS;
811 }
812
813 /**
814 * make_partition() - Create a new partition from the beginning or end of the unused space in a
815 * layout.
816 * @layout: The layout.
817 * @id: The id of the partition to make.
818 * @size: The number of blocks to carve out; if 0, all remaining space will be used.
819 * @beginning: True if the partition should start at the beginning of the unused space.
820 *
821 * Return: A success or error code, particularly VDO_NO_SPACE if there are fewer than size blocks
822 * remaining.
823 */
make_partition(struct layout * layout,enum partition_id id,block_count_t size,bool beginning)824 static int __must_check make_partition(struct layout *layout, enum partition_id id,
825 block_count_t size, bool beginning)
826 {
827 int result;
828 physical_block_number_t offset;
829 block_count_t free_blocks = layout->last_free - layout->first_free;
830
831 if (size == 0) {
832 if (free_blocks == 0)
833 return VDO_NO_SPACE;
834 size = free_blocks;
835 } else if (size > free_blocks) {
836 return VDO_NO_SPACE;
837 }
838
839 result = vdo_get_partition(layout, id, NULL);
840 if (result != VDO_UNKNOWN_PARTITION)
841 return VDO_PARTITION_EXISTS;
842
843 offset = beginning ? layout->first_free : (layout->last_free - size);
844
845 result = allocate_partition(layout, id, offset, size);
846 if (result != VDO_SUCCESS)
847 return result;
848
849 layout->num_partitions++;
850 if (beginning)
851 layout->first_free += size;
852 else
853 layout->last_free = layout->last_free - size;
854
855 return VDO_SUCCESS;
856 }
857
858 /**
859 * vdo_initialize_layout() - Lay out the partitions of a vdo.
860 * @size: The entire size of the vdo.
861 * @offset: The start of the layout on the underlying storage in blocks.
862 * @block_map_blocks: The size of the block map partition.
863 * @journal_blocks: The size of the journal partition.
864 * @summary_blocks: The size of the slab summary partition.
865 * @layout: The layout to initialize.
866 *
867 * Return: VDO_SUCCESS or an error.
868 */
vdo_initialize_layout(block_count_t size,physical_block_number_t offset,block_count_t block_map_blocks,block_count_t journal_blocks,block_count_t summary_blocks,struct layout * layout)869 int vdo_initialize_layout(block_count_t size, physical_block_number_t offset,
870 block_count_t block_map_blocks, block_count_t journal_blocks,
871 block_count_t summary_blocks, struct layout *layout)
872 {
873 int result;
874 block_count_t necessary_size =
875 (offset + block_map_blocks + journal_blocks + summary_blocks);
876
877 if (necessary_size > size)
878 return vdo_log_error_strerror(VDO_NO_SPACE,
879 "Not enough space to make a VDO");
880
881 *layout = (struct layout) {
882 .start = offset,
883 .size = size,
884 .first_free = offset,
885 .last_free = size,
886 .num_partitions = 0,
887 .head = NULL,
888 };
889
890 result = make_partition(layout, VDO_BLOCK_MAP_PARTITION, block_map_blocks, true);
891 if (result != VDO_SUCCESS) {
892 vdo_uninitialize_layout(layout);
893 return result;
894 }
895
896 result = make_partition(layout, VDO_SLAB_SUMMARY_PARTITION, summary_blocks,
897 false);
898 if (result != VDO_SUCCESS) {
899 vdo_uninitialize_layout(layout);
900 return result;
901 }
902
903 result = make_partition(layout, VDO_RECOVERY_JOURNAL_PARTITION, journal_blocks,
904 false);
905 if (result != VDO_SUCCESS) {
906 vdo_uninitialize_layout(layout);
907 return result;
908 }
909
910 result = make_partition(layout, VDO_SLAB_DEPOT_PARTITION, 0, true);
911 if (result != VDO_SUCCESS)
912 vdo_uninitialize_layout(layout);
913
914 return result;
915 }
916
917 /**
918 * vdo_uninitialize_layout() - Clean up a layout.
919 * @layout: The layout to clean up.
920 *
921 * All partitions created by this layout become invalid pointers.
922 */
vdo_uninitialize_layout(struct layout * layout)923 void vdo_uninitialize_layout(struct layout *layout)
924 {
925 while (layout->head != NULL) {
926 struct partition *part = layout->head;
927
928 layout->head = part->next;
929 vdo_free(part);
930 }
931
932 memset(layout, 0, sizeof(struct layout));
933 }
934
935 /**
936 * vdo_get_partition() - Get a partition by id.
937 * @layout: The layout from which to get a partition.
938 * @id: The id of the partition.
939 * @partition_ptr: A pointer to hold the partition.
940 *
941 * Return: VDO_SUCCESS or an error.
942 */
vdo_get_partition(struct layout * layout,enum partition_id id,struct partition ** partition_ptr)943 int vdo_get_partition(struct layout *layout, enum partition_id id,
944 struct partition **partition_ptr)
945 {
946 struct partition *partition;
947
948 for (partition = layout->head; partition != NULL; partition = partition->next) {
949 if (partition->id == id) {
950 if (partition_ptr != NULL)
951 *partition_ptr = partition;
952 return VDO_SUCCESS;
953 }
954 }
955
956 return VDO_UNKNOWN_PARTITION;
957 }
958
959 /**
960 * vdo_get_known_partition() - Get a partition by id from a validated layout.
961 * @layout: The layout from which to get a partition.
962 * @id: The id of the partition.
963 *
964 * Return: the partition
965 */
vdo_get_known_partition(struct layout * layout,enum partition_id id)966 struct partition *vdo_get_known_partition(struct layout *layout, enum partition_id id)
967 {
968 struct partition *partition;
969 int result = vdo_get_partition(layout, id, &partition);
970
971 VDO_ASSERT_LOG_ONLY(result == VDO_SUCCESS, "layout has expected partition: %u", id);
972
973 return partition;
974 }
975
encode_layout(u8 * buffer,size_t * offset,const struct layout * layout)976 static void encode_layout(u8 *buffer, size_t *offset, const struct layout *layout)
977 {
978 const struct partition *partition;
979 size_t initial_offset;
980 struct header header = VDO_LAYOUT_HEADER_3_0;
981
982 BUILD_BUG_ON(sizeof(enum partition_id) != sizeof(u8));
983 VDO_ASSERT_LOG_ONLY(layout->num_partitions <= U8_MAX,
984 "layout partition count must fit in a byte");
985
986 vdo_encode_header(buffer, offset, &header);
987
988 initial_offset = *offset;
989 encode_u64_le(buffer, offset, layout->first_free);
990 encode_u64_le(buffer, offset, layout->last_free);
991 buffer[(*offset)++] = layout->num_partitions;
992
993 VDO_ASSERT_LOG_ONLY(sizeof(struct layout_3_0) == *offset - initial_offset,
994 "encoded size of a layout header must match structure");
995
996 for (partition = layout->head; partition != NULL; partition = partition->next) {
997 buffer[(*offset)++] = partition->id;
998 encode_u64_le(buffer, offset, partition->offset);
999 /* This field only exists for backwards compatibility */
1000 encode_u64_le(buffer, offset, 0);
1001 encode_u64_le(buffer, offset, partition->count);
1002 }
1003
1004 VDO_ASSERT_LOG_ONLY(header.size == *offset - initial_offset,
1005 "encoded size of a layout must match header size");
1006 }
1007
decode_layout(u8 * buffer,size_t * offset,physical_block_number_t start,block_count_t size,struct layout * layout)1008 static int decode_layout(u8 *buffer, size_t *offset, physical_block_number_t start,
1009 block_count_t size, struct layout *layout)
1010 {
1011 struct header header;
1012 struct layout_3_0 layout_header;
1013 struct partition *partition;
1014 size_t initial_offset;
1015 physical_block_number_t first_free, last_free;
1016 u8 partition_count;
1017 u8 i;
1018 int result;
1019
1020 vdo_decode_header(buffer, offset, &header);
1021 /* Layout is variable size, so only do a minimum size check here. */
1022 result = vdo_validate_header(&VDO_LAYOUT_HEADER_3_0, &header, false, __func__);
1023 if (result != VDO_SUCCESS)
1024 return result;
1025
1026 initial_offset = *offset;
1027 decode_u64_le(buffer, offset, &first_free);
1028 decode_u64_le(buffer, offset, &last_free);
1029 partition_count = buffer[(*offset)++];
1030 layout_header = (struct layout_3_0) {
1031 .first_free = first_free,
1032 .last_free = last_free,
1033 .partition_count = partition_count,
1034 };
1035
1036 result = VDO_ASSERT(sizeof(struct layout_3_0) == *offset - initial_offset,
1037 "decoded size of a layout header must match structure");
1038 if (result != VDO_SUCCESS)
1039 return result;
1040
1041 layout->start = start;
1042 layout->size = size;
1043 layout->first_free = layout_header.first_free;
1044 layout->last_free = layout_header.last_free;
1045 layout->num_partitions = layout_header.partition_count;
1046
1047 if (layout->num_partitions > VDO_PARTITION_COUNT) {
1048 return vdo_log_error_strerror(VDO_UNKNOWN_PARTITION,
1049 "layout has extra partitions");
1050 }
1051
1052 for (i = 0; i < layout->num_partitions; i++) {
1053 u8 id;
1054 u64 partition_offset, count;
1055
1056 id = buffer[(*offset)++];
1057 decode_u64_le(buffer, offset, &partition_offset);
1058 *offset += sizeof(u64);
1059 decode_u64_le(buffer, offset, &count);
1060
1061 result = allocate_partition(layout, id, partition_offset, count);
1062 if (result != VDO_SUCCESS) {
1063 vdo_uninitialize_layout(layout);
1064 return result;
1065 }
1066 }
1067
1068 /* Validate that the layout has all (and only) the required partitions */
1069 for (i = 0; i < VDO_PARTITION_COUNT; i++) {
1070 result = vdo_get_partition(layout, REQUIRED_PARTITIONS[i], &partition);
1071 if (result != VDO_SUCCESS) {
1072 vdo_uninitialize_layout(layout);
1073 return vdo_log_error_strerror(result,
1074 "layout is missing required partition %u",
1075 REQUIRED_PARTITIONS[i]);
1076 }
1077
1078 start += partition->count;
1079 }
1080
1081 if (start != size) {
1082 vdo_uninitialize_layout(layout);
1083 return vdo_log_error_strerror(UDS_BAD_STATE,
1084 "partitions do not cover the layout");
1085 }
1086
1087 return VDO_SUCCESS;
1088 }
1089
1090 /**
1091 * pack_vdo_config() - Convert a vdo_config to its packed on-disk representation.
1092 * @config: The vdo config to convert.
1093 *
1094 * Return: The platform-independent representation of the config.
1095 */
pack_vdo_config(struct vdo_config config)1096 static struct packed_vdo_config pack_vdo_config(struct vdo_config config)
1097 {
1098 return (struct packed_vdo_config) {
1099 .logical_blocks = __cpu_to_le64(config.logical_blocks),
1100 .physical_blocks = __cpu_to_le64(config.physical_blocks),
1101 .slab_size = __cpu_to_le64(config.slab_size),
1102 .recovery_journal_size = __cpu_to_le64(config.recovery_journal_size),
1103 .slab_journal_blocks = __cpu_to_le64(config.slab_journal_blocks),
1104 };
1105 }
1106
1107 /**
1108 * pack_vdo_component() - Convert a vdo_component to its packed on-disk representation.
1109 * @component: The VDO component data to convert.
1110 *
1111 * Return: The platform-independent representation of the component.
1112 */
pack_vdo_component(const struct vdo_component component)1113 static struct packed_vdo_component_41_0 pack_vdo_component(const struct vdo_component component)
1114 {
1115 return (struct packed_vdo_component_41_0) {
1116 .state = __cpu_to_le32(component.state),
1117 .complete_recoveries = __cpu_to_le64(component.complete_recoveries),
1118 .read_only_recoveries = __cpu_to_le64(component.read_only_recoveries),
1119 .config = pack_vdo_config(component.config),
1120 .nonce = __cpu_to_le64(component.nonce),
1121 };
1122 }
1123
encode_vdo_component(u8 * buffer,size_t * offset,struct vdo_component component)1124 static void encode_vdo_component(u8 *buffer, size_t *offset,
1125 struct vdo_component component)
1126 {
1127 struct packed_vdo_component_41_0 packed;
1128
1129 encode_version_number(buffer, offset, VDO_COMPONENT_DATA_41_0);
1130 packed = pack_vdo_component(component);
1131 memcpy(buffer + *offset, &packed, sizeof(packed));
1132 *offset += sizeof(packed);
1133 }
1134
1135 /**
1136 * unpack_vdo_config() - Convert a packed_vdo_config to its native in-memory representation.
1137 * @config: The packed vdo config to convert.
1138 *
1139 * Return: The native in-memory representation of the vdo config.
1140 */
unpack_vdo_config(struct packed_vdo_config config)1141 static struct vdo_config unpack_vdo_config(struct packed_vdo_config config)
1142 {
1143 return (struct vdo_config) {
1144 .logical_blocks = __le64_to_cpu(config.logical_blocks),
1145 .physical_blocks = __le64_to_cpu(config.physical_blocks),
1146 .slab_size = __le64_to_cpu(config.slab_size),
1147 .recovery_journal_size = __le64_to_cpu(config.recovery_journal_size),
1148 .slab_journal_blocks = __le64_to_cpu(config.slab_journal_blocks),
1149 };
1150 }
1151
1152 /**
1153 * unpack_vdo_component_41_0() - Convert a packed_vdo_component_41_0 to its native in-memory
1154 * representation.
1155 * @component: The packed vdo component data to convert.
1156 *
1157 * Return: The native in-memory representation of the component.
1158 */
unpack_vdo_component_41_0(struct packed_vdo_component_41_0 component)1159 static struct vdo_component unpack_vdo_component_41_0(struct packed_vdo_component_41_0 component)
1160 {
1161 return (struct vdo_component) {
1162 .state = __le32_to_cpu(component.state),
1163 .complete_recoveries = __le64_to_cpu(component.complete_recoveries),
1164 .read_only_recoveries = __le64_to_cpu(component.read_only_recoveries),
1165 .config = unpack_vdo_config(component.config),
1166 .nonce = __le64_to_cpu(component.nonce),
1167 };
1168 }
1169
1170 /**
1171 * decode_vdo_component() - Decode the component data for the vdo itself out of the super block.
1172 * @buffer: The buffer being decoded.
1173 * @offset: The offset to start decoding from.
1174 * @component: The vdo component structure to decode into.
1175 *
1176 * Return: VDO_SUCCESS or an error.
1177 */
decode_vdo_component(u8 * buffer,size_t * offset,struct vdo_component * component)1178 static int decode_vdo_component(u8 *buffer, size_t *offset, struct vdo_component *component)
1179 {
1180 struct version_number version;
1181 struct packed_vdo_component_41_0 packed;
1182 int result;
1183
1184 decode_version_number(buffer, offset, &version);
1185 result = validate_version(version, VDO_COMPONENT_DATA_41_0,
1186 "VDO component data");
1187 if (result != VDO_SUCCESS)
1188 return result;
1189
1190 memcpy(&packed, buffer + *offset, sizeof(packed));
1191 *offset += sizeof(packed);
1192 *component = unpack_vdo_component_41_0(packed);
1193 return VDO_SUCCESS;
1194 }
1195
1196 /**
1197 * vdo_validate_config() - Validate constraints on a VDO config.
1198 * @config: The VDO config.
1199 * @physical_block_count: The minimum block count of the underlying storage.
1200 * @logical_block_count: The expected logical size of the VDO, or 0 if the logical size may be
1201 * unspecified.
1202 *
1203 * Return: A success or error code.
1204 */
vdo_validate_config(const struct vdo_config * config,block_count_t physical_block_count,block_count_t logical_block_count)1205 int vdo_validate_config(const struct vdo_config *config,
1206 block_count_t physical_block_count,
1207 block_count_t logical_block_count)
1208 {
1209 struct slab_config slab_config;
1210 int result;
1211
1212 result = VDO_ASSERT(config->slab_size > 0, "slab size unspecified");
1213 if (result != VDO_SUCCESS)
1214 return result;
1215
1216 result = VDO_ASSERT(is_power_of_2(config->slab_size),
1217 "slab size must be a power of two");
1218 if (result != VDO_SUCCESS)
1219 return result;
1220
1221 result = VDO_ASSERT(config->slab_size <= (1 << MAX_VDO_SLAB_BITS),
1222 "slab size must be less than or equal to 2^%d",
1223 MAX_VDO_SLAB_BITS);
1224 if (result != VDO_SUCCESS)
1225 return result;
1226
1227 result = VDO_ASSERT(config->slab_journal_blocks <= config->slab_size,
1228 "slab journal size is within expected bound");
1229 if (result != VDO_SUCCESS)
1230 return result;
1231
1232 result = vdo_configure_slab(config->slab_size, config->slab_journal_blocks,
1233 &slab_config);
1234 if (result != VDO_SUCCESS)
1235 return result;
1236
1237 result = VDO_ASSERT((slab_config.data_blocks >= 1),
1238 "slab must be able to hold at least one block");
1239 if (result != VDO_SUCCESS)
1240 return result;
1241
1242 result = VDO_ASSERT(config->physical_blocks > 0, "physical blocks unspecified");
1243 if (result != VDO_SUCCESS)
1244 return result;
1245
1246 result = VDO_ASSERT(config->physical_blocks <= MAXIMUM_VDO_PHYSICAL_BLOCKS,
1247 "physical block count %llu exceeds maximum %llu",
1248 (unsigned long long) config->physical_blocks,
1249 (unsigned long long) MAXIMUM_VDO_PHYSICAL_BLOCKS);
1250 if (result != VDO_SUCCESS)
1251 return VDO_OUT_OF_RANGE;
1252
1253 if (physical_block_count != config->physical_blocks) {
1254 vdo_log_error("A physical size of %llu blocks was specified, not the %llu blocks configured in the vdo super block",
1255 (unsigned long long) physical_block_count,
1256 (unsigned long long) config->physical_blocks);
1257 return VDO_PARAMETER_MISMATCH;
1258 }
1259
1260 if (logical_block_count > 0) {
1261 result = VDO_ASSERT((config->logical_blocks > 0),
1262 "logical blocks unspecified");
1263 if (result != VDO_SUCCESS)
1264 return result;
1265
1266 if (logical_block_count != config->logical_blocks) {
1267 vdo_log_error("A logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block",
1268 (unsigned long long) logical_block_count,
1269 (unsigned long long) config->logical_blocks);
1270 return VDO_PARAMETER_MISMATCH;
1271 }
1272 }
1273
1274 result = VDO_ASSERT(config->logical_blocks <= MAXIMUM_VDO_LOGICAL_BLOCKS,
1275 "logical blocks too large");
1276 if (result != VDO_SUCCESS)
1277 return result;
1278
1279 result = VDO_ASSERT(config->recovery_journal_size > 0,
1280 "recovery journal size unspecified");
1281 if (result != VDO_SUCCESS)
1282 return result;
1283
1284 result = VDO_ASSERT(is_power_of_2(config->recovery_journal_size),
1285 "recovery journal size must be a power of two");
1286 if (result != VDO_SUCCESS)
1287 return result;
1288
1289 return result;
1290 }
1291
1292 /**
1293 * vdo_destroy_component_states() - Clean up any allocations in a vdo_component_states.
1294 * @states: The component states to destroy.
1295 */
vdo_destroy_component_states(struct vdo_component_states * states)1296 void vdo_destroy_component_states(struct vdo_component_states *states)
1297 {
1298 if (states == NULL)
1299 return;
1300
1301 vdo_uninitialize_layout(&states->layout);
1302 }
1303
1304 /**
1305 * decode_components() - Decode the components now that we know the component data is a version we
1306 * understand.
1307 * @buffer: The buffer being decoded.
1308 * @offset: The offset to start decoding from.
1309 * @geometry: The vdo geometry.
1310 * @states: An object to hold the successfully decoded state.
1311 *
1312 * Return: VDO_SUCCESS or an error.
1313 */
decode_components(u8 * buffer,size_t * offset,struct volume_geometry * geometry,struct vdo_component_states * states)1314 static int __must_check decode_components(u8 *buffer, size_t *offset,
1315 struct volume_geometry *geometry,
1316 struct vdo_component_states *states)
1317 {
1318 int result;
1319
1320 decode_vdo_component(buffer, offset, &states->vdo);
1321
1322 result = decode_layout(buffer, offset, vdo_get_data_region_start(*geometry) + 1,
1323 states->vdo.config.physical_blocks, &states->layout);
1324 if (result != VDO_SUCCESS)
1325 return result;
1326
1327 result = decode_recovery_journal_state_7_0(buffer, offset,
1328 &states->recovery_journal);
1329 if (result != VDO_SUCCESS)
1330 return result;
1331
1332 result = decode_slab_depot_state_2_0(buffer, offset, &states->slab_depot);
1333 if (result != VDO_SUCCESS)
1334 return result;
1335
1336 result = decode_block_map_state_2_0(buffer, offset, &states->block_map);
1337 if (result != VDO_SUCCESS)
1338 return result;
1339
1340 VDO_ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
1341 "All decoded component data was used");
1342 return VDO_SUCCESS;
1343 }
1344
1345 /**
1346 * vdo_decode_component_states() - Decode the payload of a super block.
1347 * @buffer: The buffer containing the encoded super block contents.
1348 * @geometry: The vdo geometry.
1349 * @states: A pointer to hold the decoded states.
1350 *
1351 * Return: VDO_SUCCESS or an error.
1352 */
vdo_decode_component_states(u8 * buffer,struct volume_geometry * geometry,struct vdo_component_states * states)1353 int vdo_decode_component_states(u8 *buffer, struct volume_geometry *geometry,
1354 struct vdo_component_states *states)
1355 {
1356 int result;
1357 size_t offset = VDO_COMPONENT_DATA_OFFSET;
1358
1359 /* This is for backwards compatibility. */
1360 decode_u32_le(buffer, &offset, &states->unused);
1361
1362 /* Check the VDO volume version */
1363 decode_version_number(buffer, &offset, &states->volume_version);
1364 result = validate_version(VDO_VOLUME_VERSION_67_0, states->volume_version,
1365 "volume");
1366 if (result != VDO_SUCCESS)
1367 return result;
1368
1369 result = decode_components(buffer, &offset, geometry, states);
1370 if (result != VDO_SUCCESS)
1371 vdo_uninitialize_layout(&states->layout);
1372
1373 return result;
1374 }
1375
1376 /**
1377 * vdo_validate_component_states() - Validate the decoded super block configuration.
1378 * @states: The state decoded from the super block.
1379 * @geometry_nonce: The nonce from the geometry block.
1380 * @physical_size: The minimum block count of the underlying storage.
1381 * @logical_size: The expected logical size of the VDO, or 0 if the logical size may be
1382 * unspecified.
1383 *
1384 * Return: VDO_SUCCESS or an error if the configuration is invalid.
1385 */
vdo_validate_component_states(struct vdo_component_states * states,nonce_t geometry_nonce,block_count_t physical_size,block_count_t logical_size)1386 int vdo_validate_component_states(struct vdo_component_states *states,
1387 nonce_t geometry_nonce, block_count_t physical_size,
1388 block_count_t logical_size)
1389 {
1390 if (geometry_nonce != states->vdo.nonce) {
1391 return vdo_log_error_strerror(VDO_BAD_NONCE,
1392 "Geometry nonce %llu does not match superblock nonce %llu",
1393 (unsigned long long) geometry_nonce,
1394 (unsigned long long) states->vdo.nonce);
1395 }
1396
1397 return vdo_validate_config(&states->vdo.config, physical_size, logical_size);
1398 }
1399
1400 /**
1401 * vdo_encode_component_states() - Encode the state of all vdo components in the super block.
1402 * @buffer: A buffer to store the encoding.
1403 * @offset: The offset into the buffer to start the encoding.
1404 * @states: The component states to encode.
1405 */
vdo_encode_component_states(u8 * buffer,size_t * offset,const struct vdo_component_states * states)1406 static void vdo_encode_component_states(u8 *buffer, size_t *offset,
1407 const struct vdo_component_states *states)
1408 {
1409 /* This is for backwards compatibility. */
1410 encode_u32_le(buffer, offset, states->unused);
1411 encode_version_number(buffer, offset, states->volume_version);
1412 encode_vdo_component(buffer, offset, states->vdo);
1413 encode_layout(buffer, offset, &states->layout);
1414 encode_recovery_journal_state_7_0(buffer, offset, states->recovery_journal);
1415 encode_slab_depot_state_2_0(buffer, offset, states->slab_depot);
1416 encode_block_map_state_2_0(buffer, offset, states->block_map);
1417
1418 VDO_ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
1419 "All super block component data was encoded");
1420 }
1421
1422 /**
1423 * vdo_encode_super_block() - Encode a super block into its on-disk representation.
1424 * @buffer: A buffer to store the encoding.
1425 * @states: The component states to encode.
1426 */
vdo_encode_super_block(u8 * buffer,struct vdo_component_states * states)1427 void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states)
1428 {
1429 u32 checksum;
1430 struct header header = SUPER_BLOCK_HEADER_12_0;
1431 size_t offset = 0;
1432
1433 header.size += VDO_COMPONENT_DATA_SIZE;
1434 vdo_encode_header(buffer, &offset, &header);
1435 vdo_encode_component_states(buffer, &offset, states);
1436
1437 checksum = vdo_crc32(buffer, offset);
1438 encode_u32_le(buffer, &offset, checksum);
1439
1440 /*
1441 * Even though the buffer is a full block, to avoid the potential corruption from a torn
1442 * write, the entire encoding must fit in the first sector.
1443 */
1444 VDO_ASSERT_LOG_ONLY(offset <= VDO_SECTOR_SIZE,
1445 "entire superblock must fit in one sector");
1446 }
1447
1448 /**
1449 * vdo_decode_super_block() - Decode a super block from its on-disk representation.
1450 * @buffer: The buffer to decode from.
1451 */
vdo_decode_super_block(u8 * buffer)1452 int vdo_decode_super_block(u8 *buffer)
1453 {
1454 struct header header;
1455 int result;
1456 u32 checksum, saved_checksum;
1457 size_t offset = 0;
1458
1459 /* Decode and validate the header. */
1460 vdo_decode_header(buffer, &offset, &header);
1461 result = vdo_validate_header(&SUPER_BLOCK_HEADER_12_0, &header, false, __func__);
1462 if (result != VDO_SUCCESS)
1463 return result;
1464
1465 if (header.size > VDO_COMPONENT_DATA_SIZE + sizeof(u32)) {
1466 /*
1467 * We can't check release version or checksum until we know the content size, so we
1468 * have to assume a version mismatch on unexpected values.
1469 */
1470 return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
1471 "super block contents too large: %zu",
1472 header.size);
1473 }
1474
1475 /* Skip past the component data for now, to verify the checksum. */
1476 offset += VDO_COMPONENT_DATA_SIZE;
1477
1478 checksum = vdo_crc32(buffer, offset);
1479 decode_u32_le(buffer, &offset, &saved_checksum);
1480
1481 result = VDO_ASSERT(offset == VDO_SUPER_BLOCK_FIXED_SIZE + VDO_COMPONENT_DATA_SIZE,
1482 "must have decoded entire superblock payload");
1483 if (result != VDO_SUCCESS)
1484 return result;
1485
1486 return ((checksum != saved_checksum) ? VDO_CHECKSUM_MISMATCH : VDO_SUCCESS);
1487 }
1488