xref: /linux/drivers/md/dm-vdo/encodings.c (revision 5014bebee0cffda14fafae5a2534d08120b7b9e8)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright 2023 Red Hat
4  */
5 
6 #include "encodings.h"
7 
8 #include <linux/log2.h>
9 
10 #include "logger.h"
11 #include "memory-alloc.h"
12 #include "permassert.h"
13 
14 #include "constants.h"
15 #include "status-codes.h"
16 #include "types.h"
17 
18 /** The maximum logical space is 4 petabytes, which is 1 terablock. */
19 static const block_count_t MAXIMUM_VDO_LOGICAL_BLOCKS = 1024ULL * 1024 * 1024 * 1024;
20 
21 /** The maximum physical space is 256 terabytes, which is 64 gigablocks. */
22 static const block_count_t MAXIMUM_VDO_PHYSICAL_BLOCKS = 1024ULL * 1024 * 1024 * 64;
23 
24 struct geometry_block {
25 	char magic_number[VDO_GEOMETRY_MAGIC_NUMBER_SIZE];
26 	struct packed_header header;
27 	u32 checksum;
28 } __packed;
29 
30 static const struct header GEOMETRY_BLOCK_HEADER_5_0 = {
31 	.id = VDO_GEOMETRY_BLOCK,
32 	.version = {
33 		.major_version = 5,
34 		.minor_version = 0,
35 	},
36 	/*
37 	 * Note: this size isn't just the payload size following the header, like it is everywhere
38 	 * else in VDO.
39 	 */
40 	.size = sizeof(struct geometry_block) + sizeof(struct volume_geometry),
41 };
42 
43 static const struct header GEOMETRY_BLOCK_HEADER_4_0 = {
44 	.id = VDO_GEOMETRY_BLOCK,
45 	.version = {
46 		.major_version = 4,
47 		.minor_version = 0,
48 	},
49 	/*
50 	 * Note: this size isn't just the payload size following the header, like it is everywhere
51 	 * else in VDO.
52 	 */
53 	.size = sizeof(struct geometry_block) + sizeof(struct volume_geometry_4_0),
54 };
55 
56 const u8 VDO_GEOMETRY_MAGIC_NUMBER[VDO_GEOMETRY_MAGIC_NUMBER_SIZE + 1] = "dmvdo001";
57 
58 #define PAGE_HEADER_4_1_SIZE (8 + 8 + 8 + 1 + 1 + 1 + 1)
59 
60 static const struct version_number BLOCK_MAP_4_1 = {
61 	.major_version = 4,
62 	.minor_version = 1,
63 };
64 
65 const struct header VDO_BLOCK_MAP_HEADER_2_0 = {
66 	.id = VDO_BLOCK_MAP,
67 	.version = {
68 		.major_version = 2,
69 		.minor_version = 0,
70 	},
71 	.size = sizeof(struct block_map_state_2_0),
72 };
73 
74 const struct header VDO_RECOVERY_JOURNAL_HEADER_7_0 = {
75 	.id = VDO_RECOVERY_JOURNAL,
76 	.version = {
77 			.major_version = 7,
78 			.minor_version = 0,
79 		},
80 	.size = sizeof(struct recovery_journal_state_7_0),
81 };
82 
83 const struct header VDO_SLAB_DEPOT_HEADER_2_0 = {
84 	.id = VDO_SLAB_DEPOT,
85 	.version = {
86 		.major_version = 2,
87 		.minor_version = 0,
88 	},
89 	.size = sizeof(struct slab_depot_state_2_0),
90 };
91 
92 static const struct header VDO_LAYOUT_HEADER_3_0 = {
93 	.id = VDO_LAYOUT,
94 	.version = {
95 		.major_version = 3,
96 		.minor_version = 0,
97 	},
98 	.size = sizeof(struct layout_3_0) + (sizeof(struct partition_3_0) * VDO_PARTITION_COUNT),
99 };
100 
101 static const enum partition_id REQUIRED_PARTITIONS[] = {
102 	VDO_BLOCK_MAP_PARTITION,
103 	VDO_SLAB_DEPOT_PARTITION,
104 	VDO_RECOVERY_JOURNAL_PARTITION,
105 	VDO_SLAB_SUMMARY_PARTITION,
106 };
107 
108 /*
109  * The current version for the data encoded in the super block. This must be changed any time there
110  * is a change to encoding of the component data of any VDO component.
111  */
112 static const struct version_number VDO_COMPONENT_DATA_41_0 = {
113 	.major_version = 41,
114 	.minor_version = 0,
115 };
116 
117 const struct version_number VDO_VOLUME_VERSION_67_0 = {
118 	.major_version = 67,
119 	.minor_version = 0,
120 };
121 
122 static const struct header SUPER_BLOCK_HEADER_12_0 = {
123 	.id = VDO_SUPER_BLOCK,
124 	.version = {
125 			.major_version = 12,
126 			.minor_version = 0,
127 		},
128 
129 	/* This is the minimum size, if the super block contains no components. */
130 	.size = VDO_SUPER_BLOCK_FIXED_SIZE - VDO_ENCODED_HEADER_SIZE,
131 };
132 
133 /**
134  * validate_version() - Check whether a version matches an expected version.
135  * @expected_version: The expected version.
136  * @actual_version: The version being validated.
137  * @component_name: The name of the component or the calling function (for error logging).
138  *
139  * Logs an error describing a mismatch.
140  *
141  * Return: VDO_SUCCESS             if the versions are the same,
142  *         VDO_UNSUPPORTED_VERSION if the versions don't match.
143  */
validate_version(struct version_number expected_version,struct version_number actual_version,const char * component_name)144 static int __must_check validate_version(struct version_number expected_version,
145 					 struct version_number actual_version,
146 					 const char *component_name)
147 {
148 	if (!vdo_are_same_version(expected_version, actual_version)) {
149 		return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
150 					      "%s version mismatch, expected %d.%d, got %d.%d",
151 					      component_name,
152 					      expected_version.major_version,
153 					      expected_version.minor_version,
154 					      actual_version.major_version,
155 					      actual_version.minor_version);
156 	}
157 
158 	return VDO_SUCCESS;
159 }
160 
161 /**
162  * vdo_validate_header() - Check whether a header matches expectations.
163  * @expected_header: The expected header.
164  * @actual_header: The header being validated.
165  * @exact_size: If true, the size fields of the two headers must be the same, otherwise it is
166  *              required that actual_header.size >= expected_header.size.
167  * @name: The name of the component or the calling function (for error logging).
168  *
169  * Logs an error describing the first mismatch found.
170  *
171  * Return: VDO_SUCCESS             if the header meets expectations,
172  *         VDO_INCORRECT_COMPONENT if the component ids don't match,
173  *         VDO_UNSUPPORTED_VERSION if the versions or sizes don't match.
174  */
vdo_validate_header(const struct header * expected_header,const struct header * actual_header,bool exact_size,const char * name)175 int vdo_validate_header(const struct header *expected_header,
176 			const struct header *actual_header, bool exact_size,
177 			const char *name)
178 {
179 	int result;
180 
181 	if (expected_header->id != actual_header->id) {
182 		return vdo_log_error_strerror(VDO_INCORRECT_COMPONENT,
183 					      "%s ID mismatch, expected %d, got %d",
184 					      name, expected_header->id,
185 					      actual_header->id);
186 	}
187 
188 	result = validate_version(expected_header->version, actual_header->version,
189 				  name);
190 	if (result != VDO_SUCCESS)
191 		return result;
192 
193 	if ((expected_header->size > actual_header->size) ||
194 	    (exact_size && (expected_header->size < actual_header->size))) {
195 		return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
196 					      "%s size mismatch, expected %zu, got %zu",
197 					      name, expected_header->size,
198 					      actual_header->size);
199 	}
200 
201 	return VDO_SUCCESS;
202 }
203 
encode_version_number(u8 * buffer,size_t * offset,struct version_number version)204 static void encode_version_number(u8 *buffer, size_t *offset,
205 				  struct version_number version)
206 {
207 	struct packed_version_number packed = vdo_pack_version_number(version);
208 
209 	memcpy(buffer + *offset, &packed, sizeof(packed));
210 	*offset += sizeof(packed);
211 }
212 
vdo_encode_header(u8 * buffer,size_t * offset,const struct header * header)213 void vdo_encode_header(u8 *buffer, size_t *offset, const struct header *header)
214 {
215 	struct packed_header packed = vdo_pack_header(header);
216 
217 	memcpy(buffer + *offset, &packed, sizeof(packed));
218 	*offset += sizeof(packed);
219 }
220 
decode_version_number(u8 * buffer,size_t * offset,struct version_number * version)221 static void decode_version_number(u8 *buffer, size_t *offset,
222 				  struct version_number *version)
223 {
224 	struct packed_version_number packed;
225 
226 	memcpy(&packed, buffer + *offset, sizeof(packed));
227 	*offset += sizeof(packed);
228 	*version = vdo_unpack_version_number(packed);
229 }
230 
vdo_decode_header(u8 * buffer,size_t * offset,struct header * header)231 void vdo_decode_header(u8 *buffer, size_t *offset, struct header *header)
232 {
233 	struct packed_header packed;
234 
235 	memcpy(&packed, buffer + *offset, sizeof(packed));
236 	*offset += sizeof(packed);
237 
238 	*header = vdo_unpack_header(&packed);
239 }
240 
241 /**
242  * decode_volume_geometry() - Decode the on-disk representation of a volume geometry from a buffer.
243  * @buffer: A buffer to decode from.
244  * @offset: The offset in the buffer at which to decode.
245  * @geometry: The structure to receive the decoded fields.
246  * @version: The geometry block version to decode.
247  */
decode_volume_geometry(u8 * buffer,size_t * offset,struct volume_geometry * geometry,u32 version)248 static void decode_volume_geometry(u8 *buffer, size_t *offset,
249 				   struct volume_geometry *geometry, u32 version)
250 {
251 	u32 unused, mem;
252 	enum volume_region_id id;
253 	nonce_t nonce;
254 	block_count_t bio_offset = 0;
255 	bool sparse;
256 
257 	/* This is for backwards compatibility. */
258 	decode_u32_le(buffer, offset, &unused);
259 	geometry->unused = unused;
260 
261 	decode_u64_le(buffer, offset, &nonce);
262 	geometry->nonce = nonce;
263 
264 	memcpy((unsigned char *) &geometry->uuid, buffer + *offset, sizeof(uuid_t));
265 	*offset += sizeof(uuid_t);
266 
267 	if (version > 4)
268 		decode_u64_le(buffer, offset, &bio_offset);
269 	geometry->bio_offset = bio_offset;
270 
271 	for (id = 0; id < VDO_VOLUME_REGION_COUNT; id++) {
272 		physical_block_number_t start_block;
273 		enum volume_region_id saved_id;
274 
275 		decode_u32_le(buffer, offset, &saved_id);
276 		decode_u64_le(buffer, offset, &start_block);
277 
278 		geometry->regions[id] = (struct volume_region) {
279 			.id = saved_id,
280 			.start_block = start_block,
281 		};
282 	}
283 
284 	decode_u32_le(buffer, offset, &mem);
285 	*offset += sizeof(u32);
286 	sparse = buffer[(*offset)++];
287 
288 	geometry->index_config = (struct index_config) {
289 		.mem = mem,
290 		.sparse = sparse,
291 	};
292 }
293 
294 /**
295  * vdo_parse_geometry_block() - Decode and validate an encoded geometry block.
296  * @block: The encoded geometry block.
297  * @geometry: The structure to receive the decoded fields.
298  */
vdo_parse_geometry_block(u8 * block,struct volume_geometry * geometry)299 int __must_check vdo_parse_geometry_block(u8 *block, struct volume_geometry *geometry)
300 {
301 	u32 checksum, saved_checksum;
302 	struct header header;
303 	size_t offset = 0;
304 	int result;
305 
306 	if (memcmp(block, VDO_GEOMETRY_MAGIC_NUMBER, VDO_GEOMETRY_MAGIC_NUMBER_SIZE) != 0)
307 		return VDO_BAD_MAGIC;
308 	offset += VDO_GEOMETRY_MAGIC_NUMBER_SIZE;
309 
310 	vdo_decode_header(block, &offset, &header);
311 	if (header.version.major_version <= 4) {
312 		result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_4_0, &header,
313 					     true, __func__);
314 	} else {
315 		result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_5_0, &header,
316 					     true, __func__);
317 	}
318 	if (result != VDO_SUCCESS)
319 		return result;
320 
321 	decode_volume_geometry(block, &offset, geometry, header.version.major_version);
322 
323 	result = VDO_ASSERT(header.size == offset + sizeof(u32),
324 			    "should have decoded up to the geometry checksum");
325 	if (result != VDO_SUCCESS)
326 		return result;
327 
328 	/* Decode and verify the checksum. */
329 	checksum = vdo_crc32(block, offset);
330 	decode_u32_le(block, &offset, &saved_checksum);
331 
332 	return ((checksum == saved_checksum) ? VDO_SUCCESS : VDO_CHECKSUM_MISMATCH);
333 }
334 
vdo_format_block_map_page(void * buffer,nonce_t nonce,physical_block_number_t pbn,bool initialized)335 struct block_map_page *vdo_format_block_map_page(void *buffer, nonce_t nonce,
336 						 physical_block_number_t pbn,
337 						 bool initialized)
338 {
339 	struct block_map_page *page = buffer;
340 
341 	memset(buffer, 0, VDO_BLOCK_SIZE);
342 	page->version = vdo_pack_version_number(BLOCK_MAP_4_1);
343 	page->header.nonce = __cpu_to_le64(nonce);
344 	page->header.pbn = __cpu_to_le64(pbn);
345 	page->header.initialized = initialized;
346 	return page;
347 }
348 
vdo_validate_block_map_page(struct block_map_page * page,nonce_t nonce,physical_block_number_t pbn)349 enum block_map_page_validity vdo_validate_block_map_page(struct block_map_page *page,
350 							 nonce_t nonce,
351 							 physical_block_number_t pbn)
352 {
353 	BUILD_BUG_ON(sizeof(struct block_map_page_header) != PAGE_HEADER_4_1_SIZE);
354 
355 	if (!vdo_are_same_version(BLOCK_MAP_4_1,
356 				  vdo_unpack_version_number(page->version)) ||
357 	    !page->header.initialized || (nonce != __le64_to_cpu(page->header.nonce)))
358 		return VDO_BLOCK_MAP_PAGE_INVALID;
359 
360 	if (pbn != vdo_get_block_map_page_pbn(page))
361 		return VDO_BLOCK_MAP_PAGE_BAD;
362 
363 	return VDO_BLOCK_MAP_PAGE_VALID;
364 }
365 
decode_block_map_state_2_0(u8 * buffer,size_t * offset,struct block_map_state_2_0 * state)366 static int decode_block_map_state_2_0(u8 *buffer, size_t *offset,
367 				      struct block_map_state_2_0 *state)
368 {
369 	size_t initial_offset;
370 	block_count_t flat_page_count, root_count;
371 	physical_block_number_t flat_page_origin, root_origin;
372 	struct header header;
373 	int result;
374 
375 	vdo_decode_header(buffer, offset, &header);
376 	result = vdo_validate_header(&VDO_BLOCK_MAP_HEADER_2_0, &header, true, __func__);
377 	if (result != VDO_SUCCESS)
378 		return result;
379 
380 	initial_offset = *offset;
381 
382 	decode_u64_le(buffer, offset, &flat_page_origin);
383 	result = VDO_ASSERT(flat_page_origin == VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
384 			    "Flat page origin must be %u (recorded as %llu)",
385 			    VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
386 			    (unsigned long long) state->flat_page_origin);
387 	if (result != VDO_SUCCESS)
388 		return result;
389 
390 	decode_u64_le(buffer, offset, &flat_page_count);
391 	result = VDO_ASSERT(flat_page_count == 0,
392 			    "Flat page count must be 0 (recorded as %llu)",
393 			    (unsigned long long) state->flat_page_count);
394 	if (result != VDO_SUCCESS)
395 		return result;
396 
397 	decode_u64_le(buffer, offset, &root_origin);
398 	decode_u64_le(buffer, offset, &root_count);
399 
400 	result = VDO_ASSERT(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
401 			    "decoded block map component size must match header size");
402 	if (result != VDO_SUCCESS)
403 		return result;
404 
405 	*state = (struct block_map_state_2_0) {
406 		.flat_page_origin = flat_page_origin,
407 		.flat_page_count = flat_page_count,
408 		.root_origin = root_origin,
409 		.root_count = root_count,
410 	};
411 
412 	return VDO_SUCCESS;
413 }
414 
encode_block_map_state_2_0(u8 * buffer,size_t * offset,struct block_map_state_2_0 state)415 static void encode_block_map_state_2_0(u8 *buffer, size_t *offset,
416 				       struct block_map_state_2_0 state)
417 {
418 	size_t initial_offset;
419 
420 	vdo_encode_header(buffer, offset, &VDO_BLOCK_MAP_HEADER_2_0);
421 
422 	initial_offset = *offset;
423 	encode_u64_le(buffer, offset, state.flat_page_origin);
424 	encode_u64_le(buffer, offset, state.flat_page_count);
425 	encode_u64_le(buffer, offset, state.root_origin);
426 	encode_u64_le(buffer, offset, state.root_count);
427 
428 	VDO_ASSERT_LOG_ONLY(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
429 			    "encoded block map component size must match header size");
430 }
431 
432 /**
433  * vdo_compute_new_forest_pages() - Compute the number of pages which must be allocated at each
434  *                                  level in order to grow the forest to a new number of entries.
435  * @entries: The new number of entries the block map must address.
436  *
437  * Return: The total number of non-leaf pages required.
438  */
vdo_compute_new_forest_pages(root_count_t root_count,struct boundary * old_sizes,block_count_t entries,struct boundary * new_sizes)439 block_count_t vdo_compute_new_forest_pages(root_count_t root_count,
440 					   struct boundary *old_sizes,
441 					   block_count_t entries,
442 					   struct boundary *new_sizes)
443 {
444 	page_count_t leaf_pages = max(vdo_compute_block_map_page_count(entries), 1U);
445 	page_count_t level_size = DIV_ROUND_UP(leaf_pages, root_count);
446 	block_count_t total_pages = 0;
447 	height_t height;
448 
449 	for (height = 0; height < VDO_BLOCK_MAP_TREE_HEIGHT; height++) {
450 		block_count_t new_pages;
451 
452 		level_size = DIV_ROUND_UP(level_size, VDO_BLOCK_MAP_ENTRIES_PER_PAGE);
453 		new_sizes->levels[height] = level_size;
454 		new_pages = level_size;
455 		if (old_sizes != NULL)
456 			new_pages -= old_sizes->levels[height];
457 		total_pages += (new_pages * root_count);
458 	}
459 
460 	return total_pages;
461 }
462 
463 /**
464  * encode_recovery_journal_state_7_0() - Encode the state of a recovery journal.
465  *
466  * Return: VDO_SUCCESS or an error code.
467  */
encode_recovery_journal_state_7_0(u8 * buffer,size_t * offset,struct recovery_journal_state_7_0 state)468 static void encode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
469 					      struct recovery_journal_state_7_0 state)
470 {
471 	size_t initial_offset;
472 
473 	vdo_encode_header(buffer, offset, &VDO_RECOVERY_JOURNAL_HEADER_7_0);
474 
475 	initial_offset = *offset;
476 	encode_u64_le(buffer, offset, state.journal_start);
477 	encode_u64_le(buffer, offset, state.logical_blocks_used);
478 	encode_u64_le(buffer, offset, state.block_map_data_blocks);
479 
480 	VDO_ASSERT_LOG_ONLY(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
481 			    "encoded recovery journal component size must match header size");
482 }
483 
484 /**
485  * decode_recovery_journal_state_7_0() - Decode the state of a recovery journal saved in a buffer.
486  * @buffer: The buffer containing the saved state.
487  * @state: A pointer to a recovery journal state to hold the result of a successful decode.
488  *
489  * Return: VDO_SUCCESS or an error code.
490  */
decode_recovery_journal_state_7_0(u8 * buffer,size_t * offset,struct recovery_journal_state_7_0 * state)491 static int __must_check decode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
492 							  struct recovery_journal_state_7_0 *state)
493 {
494 	struct header header;
495 	int result;
496 	size_t initial_offset;
497 	sequence_number_t journal_start;
498 	block_count_t logical_blocks_used, block_map_data_blocks;
499 
500 	vdo_decode_header(buffer, offset, &header);
501 	result = vdo_validate_header(&VDO_RECOVERY_JOURNAL_HEADER_7_0, &header, true,
502 				     __func__);
503 	if (result != VDO_SUCCESS)
504 		return result;
505 
506 	initial_offset = *offset;
507 	decode_u64_le(buffer, offset, &journal_start);
508 	decode_u64_le(buffer, offset, &logical_blocks_used);
509 	decode_u64_le(buffer, offset, &block_map_data_blocks);
510 
511 	result = VDO_ASSERT(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
512 			    "decoded recovery journal component size must match header size");
513 	if (result != VDO_SUCCESS)
514 		return result;
515 
516 	*state = (struct recovery_journal_state_7_0) {
517 		.journal_start = journal_start,
518 		.logical_blocks_used = logical_blocks_used,
519 		.block_map_data_blocks = block_map_data_blocks,
520 	};
521 
522 	return VDO_SUCCESS;
523 }
524 
525 /**
526  * vdo_get_journal_operation_name() - Get the name of a journal operation.
527  * @operation: The operation to name.
528  *
529  * Return: The name of the operation.
530  */
vdo_get_journal_operation_name(enum journal_operation operation)531 const char *vdo_get_journal_operation_name(enum journal_operation operation)
532 {
533 	switch (operation) {
534 	case VDO_JOURNAL_DATA_REMAPPING:
535 		return "data remapping";
536 
537 	case VDO_JOURNAL_BLOCK_MAP_REMAPPING:
538 		return "block map remapping";
539 
540 	default:
541 		return "unknown journal operation";
542 	}
543 }
544 
545 /**
546  * encode_slab_depot_state_2_0() - Encode the state of a slab depot into a buffer.
547  */
encode_slab_depot_state_2_0(u8 * buffer,size_t * offset,struct slab_depot_state_2_0 state)548 static void encode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
549 					struct slab_depot_state_2_0 state)
550 {
551 	size_t initial_offset;
552 
553 	vdo_encode_header(buffer, offset, &VDO_SLAB_DEPOT_HEADER_2_0);
554 
555 	initial_offset = *offset;
556 	encode_u64_le(buffer, offset, state.slab_config.slab_blocks);
557 	encode_u64_le(buffer, offset, state.slab_config.data_blocks);
558 	encode_u64_le(buffer, offset, state.slab_config.reference_count_blocks);
559 	encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocks);
560 	encode_u64_le(buffer, offset, state.slab_config.slab_journal_flushing_threshold);
561 	encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocking_threshold);
562 	encode_u64_le(buffer, offset, state.slab_config.slab_journal_scrubbing_threshold);
563 	encode_u64_le(buffer, offset, state.first_block);
564 	encode_u64_le(buffer, offset, state.last_block);
565 	buffer[(*offset)++] = state.zone_count;
566 
567 	VDO_ASSERT_LOG_ONLY(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
568 			    "encoded block map component size must match header size");
569 }
570 
571 /**
572  * decode_slab_depot_state_2_0() - Decode slab depot component state version 2.0 from a buffer.
573  *
574  * Return: VDO_SUCCESS or an error code.
575  */
decode_slab_depot_state_2_0(u8 * buffer,size_t * offset,struct slab_depot_state_2_0 * state)576 static int decode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
577 				       struct slab_depot_state_2_0 *state)
578 {
579 	struct header header;
580 	int result;
581 	size_t initial_offset;
582 	struct slab_config slab_config;
583 	block_count_t count;
584 	physical_block_number_t first_block, last_block;
585 	zone_count_t zone_count;
586 
587 	vdo_decode_header(buffer, offset, &header);
588 	result = vdo_validate_header(&VDO_SLAB_DEPOT_HEADER_2_0, &header, true,
589 				     __func__);
590 	if (result != VDO_SUCCESS)
591 		return result;
592 
593 	initial_offset = *offset;
594 	decode_u64_le(buffer, offset, &count);
595 	slab_config.slab_blocks = count;
596 
597 	decode_u64_le(buffer, offset, &count);
598 	slab_config.data_blocks = count;
599 
600 	decode_u64_le(buffer, offset, &count);
601 	slab_config.reference_count_blocks = count;
602 
603 	decode_u64_le(buffer, offset, &count);
604 	slab_config.slab_journal_blocks = count;
605 
606 	decode_u64_le(buffer, offset, &count);
607 	slab_config.slab_journal_flushing_threshold = count;
608 
609 	decode_u64_le(buffer, offset, &count);
610 	slab_config.slab_journal_blocking_threshold = count;
611 
612 	decode_u64_le(buffer, offset, &count);
613 	slab_config.slab_journal_scrubbing_threshold = count;
614 
615 	decode_u64_le(buffer, offset, &first_block);
616 	decode_u64_le(buffer, offset, &last_block);
617 	zone_count = buffer[(*offset)++];
618 
619 	result = VDO_ASSERT(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
620 			    "decoded slab depot component size must match header size");
621 	if (result != VDO_SUCCESS)
622 		return result;
623 
624 	*state = (struct slab_depot_state_2_0) {
625 		.slab_config = slab_config,
626 		.first_block = first_block,
627 		.last_block = last_block,
628 		.zone_count = zone_count,
629 	};
630 
631 	return VDO_SUCCESS;
632 }
633 
634 /**
635  * vdo_configure_slab_depot() - Configure the slab depot.
636  * @partition: The slab depot partition
637  * @slab_config: The configuration of a single slab.
638  * @zone_count: The number of zones the depot will use.
639  * @state: The state structure to be configured.
640  *
641  * Configures the slab_depot for the specified storage capacity, finding the number of data blocks
642  * that will fit and still leave room for the depot metadata, then return the saved state for that
643  * configuration.
644  *
645  * Return: VDO_SUCCESS or an error code.
646  */
vdo_configure_slab_depot(const struct partition * partition,struct slab_config slab_config,zone_count_t zone_count,struct slab_depot_state_2_0 * state)647 int vdo_configure_slab_depot(const struct partition *partition,
648 			     struct slab_config slab_config, zone_count_t zone_count,
649 			     struct slab_depot_state_2_0 *state)
650 {
651 	block_count_t total_slab_blocks, total_data_blocks;
652 	size_t slab_count;
653 	physical_block_number_t last_block;
654 	block_count_t slab_size = slab_config.slab_blocks;
655 
656 	vdo_log_debug("slabDepot %s(block_count=%llu, first_block=%llu, slab_size=%llu, zone_count=%u)",
657 		      __func__, (unsigned long long) partition->count,
658 		      (unsigned long long) partition->offset,
659 		      (unsigned long long) slab_size, zone_count);
660 
661 	/* We do not allow runt slabs, so we waste up to a slab's worth. */
662 	slab_count = (partition->count / slab_size);
663 	if (slab_count == 0)
664 		return VDO_NO_SPACE;
665 
666 	if (slab_count > MAX_VDO_SLABS)
667 		return VDO_TOO_MANY_SLABS;
668 
669 	total_slab_blocks = slab_count * slab_config.slab_blocks;
670 	total_data_blocks = slab_count * slab_config.data_blocks;
671 	last_block = partition->offset + total_slab_blocks;
672 
673 	*state = (struct slab_depot_state_2_0) {
674 		.slab_config = slab_config,
675 		.first_block = partition->offset,
676 		.last_block = last_block,
677 		.zone_count = zone_count,
678 	};
679 
680 	vdo_log_debug("slab_depot last_block=%llu, total_data_blocks=%llu, slab_count=%zu, left_over=%llu",
681 		      (unsigned long long) last_block,
682 		      (unsigned long long) total_data_blocks, slab_count,
683 		      (unsigned long long) (partition->count - (last_block - partition->offset)));
684 
685 	return VDO_SUCCESS;
686 }
687 
688 /**
689  * vdo_configure_slab() - Measure and initialize the configuration to use for each slab.
690  * @slab_size: The number of blocks per slab.
691  * @slab_journal_blocks: The number of blocks for the slab journal.
692  * @slab_config: The slab configuration to initialize.
693  *
694  * Return: VDO_SUCCESS or an error code.
695  */
vdo_configure_slab(block_count_t slab_size,block_count_t slab_journal_blocks,struct slab_config * slab_config)696 int vdo_configure_slab(block_count_t slab_size, block_count_t slab_journal_blocks,
697 		       struct slab_config *slab_config)
698 {
699 	block_count_t ref_blocks, meta_blocks, data_blocks;
700 	block_count_t flushing_threshold, remaining, blocking_threshold;
701 	block_count_t minimal_extra_space, scrubbing_threshold;
702 
703 	if (slab_journal_blocks >= slab_size)
704 		return VDO_BAD_CONFIGURATION;
705 
706 	/*
707 	 * This calculation should technically be a recurrence, but the total number of metadata
708 	 * blocks is currently less than a single block of ref_counts, so we'd gain at most one
709 	 * data block in each slab with more iteration.
710 	 */
711 	ref_blocks = vdo_get_saved_reference_count_size(slab_size - slab_journal_blocks);
712 	meta_blocks = (ref_blocks + slab_journal_blocks);
713 
714 	/* Make sure configured slabs are not too small. */
715 	if (meta_blocks >= slab_size)
716 		return VDO_BAD_CONFIGURATION;
717 
718 	data_blocks = slab_size - meta_blocks;
719 
720 	/*
721 	 * Configure the slab journal thresholds. The flush threshold is 168 of 224 blocks in
722 	 * production, or 3/4ths, so we use this ratio for all sizes.
723 	 */
724 	flushing_threshold = ((slab_journal_blocks * 3) + 3) / 4;
725 	/*
726 	 * The blocking threshold should be far enough from the flushing threshold to not produce
727 	 * delays, but far enough from the end of the journal to allow multiple successive recovery
728 	 * failures.
729 	 */
730 	remaining = slab_journal_blocks - flushing_threshold;
731 	blocking_threshold = flushing_threshold + ((remaining * 5) / 7);
732 	/* The scrubbing threshold should be at least 2048 entries before the end of the journal. */
733 	minimal_extra_space = 1 + (MAXIMUM_VDO_USER_VIOS / VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK);
734 	scrubbing_threshold = blocking_threshold;
735 	if (slab_journal_blocks > minimal_extra_space)
736 		scrubbing_threshold = slab_journal_blocks - minimal_extra_space;
737 	if (blocking_threshold > scrubbing_threshold)
738 		blocking_threshold = scrubbing_threshold;
739 
740 	*slab_config = (struct slab_config) {
741 		.slab_blocks = slab_size,
742 		.data_blocks = data_blocks,
743 		.reference_count_blocks = ref_blocks,
744 		.slab_journal_blocks = slab_journal_blocks,
745 		.slab_journal_flushing_threshold = flushing_threshold,
746 		.slab_journal_blocking_threshold = blocking_threshold,
747 		.slab_journal_scrubbing_threshold = scrubbing_threshold};
748 	return VDO_SUCCESS;
749 }
750 
751 /**
752  * vdo_decode_slab_journal_entry() - Decode a slab journal entry.
753  * @block: The journal block holding the entry.
754  * @entry_count: The number of the entry.
755  *
756  * Return: The decoded entry.
757  */
vdo_decode_slab_journal_entry(struct packed_slab_journal_block * block,journal_entry_count_t entry_count)758 struct slab_journal_entry vdo_decode_slab_journal_entry(struct packed_slab_journal_block *block,
759 							journal_entry_count_t entry_count)
760 {
761 	struct slab_journal_entry entry =
762 		vdo_unpack_slab_journal_entry(&block->payload.entries[entry_count]);
763 
764 	if (block->header.has_block_map_increments &&
765 	    ((block->payload.full_entries.entry_types[entry_count / 8] &
766 	      ((u8) 1 << (entry_count % 8))) != 0))
767 		entry.operation = VDO_JOURNAL_BLOCK_MAP_REMAPPING;
768 
769 	return entry;
770 }
771 
772 /**
773  * allocate_partition() - Allocate a partition and add it to a layout.
774  * @layout: The layout containing the partition.
775  * @id: The id of the partition.
776  * @offset: The offset into the layout at which the partition begins.
777  * @size: The size of the partition in blocks.
778  *
779  * Return: VDO_SUCCESS or an error.
780  */
allocate_partition(struct layout * layout,u8 id,physical_block_number_t offset,block_count_t size)781 static int allocate_partition(struct layout *layout, u8 id,
782 			      physical_block_number_t offset, block_count_t size)
783 {
784 	struct partition *partition;
785 	int result;
786 
787 	result = vdo_allocate(1, struct partition, __func__, &partition);
788 	if (result != VDO_SUCCESS)
789 		return result;
790 
791 	partition->id = id;
792 	partition->offset = offset;
793 	partition->count = size;
794 	partition->next = layout->head;
795 	layout->head = partition;
796 
797 	return VDO_SUCCESS;
798 }
799 
800 /**
801  * make_partition() - Create a new partition from the beginning or end of the unused space in a
802  *                    layout.
803  * @layout: The layout.
804  * @id: The id of the partition to make.
805  * @size: The number of blocks to carve out; if 0, all remaining space will be used.
806  * @beginning: True if the partition should start at the beginning of the unused space.
807  *
808  * Return: A success or error code, particularly VDO_NO_SPACE if there are fewer than size blocks
809  *         remaining.
810  */
make_partition(struct layout * layout,enum partition_id id,block_count_t size,bool beginning)811 static int __must_check make_partition(struct layout *layout, enum partition_id id,
812 				       block_count_t size, bool beginning)
813 {
814 	int result;
815 	physical_block_number_t offset;
816 	block_count_t free_blocks = layout->last_free - layout->first_free;
817 
818 	if (size == 0) {
819 		if (free_blocks == 0)
820 			return VDO_NO_SPACE;
821 		size = free_blocks;
822 	} else if (size > free_blocks) {
823 		return VDO_NO_SPACE;
824 	}
825 
826 	result = vdo_get_partition(layout, id, NULL);
827 	if (result != VDO_UNKNOWN_PARTITION)
828 		return VDO_PARTITION_EXISTS;
829 
830 	offset = beginning ? layout->first_free : (layout->last_free - size);
831 
832 	result = allocate_partition(layout, id, offset, size);
833 	if (result != VDO_SUCCESS)
834 		return result;
835 
836 	layout->num_partitions++;
837 	if (beginning)
838 		layout->first_free += size;
839 	else
840 		layout->last_free = layout->last_free - size;
841 
842 	return VDO_SUCCESS;
843 }
844 
845 /**
846  * vdo_initialize_layout() - Lay out the partitions of a vdo.
847  * @size: The entire size of the vdo.
848  * @offset: The start of the layout on the underlying storage in blocks.
849  * @block_map_blocks: The size of the block map partition.
850  * @journal_blocks: The size of the journal partition.
851  * @summary_blocks: The size of the slab summary partition.
852  * @layout: The layout to initialize.
853  *
854  * Return: VDO_SUCCESS or an error.
855  */
vdo_initialize_layout(block_count_t size,physical_block_number_t offset,block_count_t block_map_blocks,block_count_t journal_blocks,block_count_t summary_blocks,struct layout * layout)856 int vdo_initialize_layout(block_count_t size, physical_block_number_t offset,
857 			  block_count_t block_map_blocks, block_count_t journal_blocks,
858 			  block_count_t summary_blocks, struct layout *layout)
859 {
860 	int result;
861 	block_count_t necessary_size =
862 		(offset + block_map_blocks + journal_blocks + summary_blocks);
863 
864 	if (necessary_size > size)
865 		return vdo_log_error_strerror(VDO_NO_SPACE,
866 					      "Not enough space to make a VDO");
867 
868 	*layout = (struct layout) {
869 		.start = offset,
870 		.size = size,
871 		.first_free = offset,
872 		.last_free = size,
873 		.num_partitions = 0,
874 		.head = NULL,
875 	};
876 
877 	result = make_partition(layout, VDO_BLOCK_MAP_PARTITION, block_map_blocks, true);
878 	if (result != VDO_SUCCESS) {
879 		vdo_uninitialize_layout(layout);
880 		return result;
881 	}
882 
883 	result = make_partition(layout, VDO_SLAB_SUMMARY_PARTITION, summary_blocks,
884 				false);
885 	if (result != VDO_SUCCESS) {
886 		vdo_uninitialize_layout(layout);
887 		return result;
888 	}
889 
890 	result = make_partition(layout, VDO_RECOVERY_JOURNAL_PARTITION, journal_blocks,
891 				false);
892 	if (result != VDO_SUCCESS) {
893 		vdo_uninitialize_layout(layout);
894 		return result;
895 	}
896 
897 	result = make_partition(layout, VDO_SLAB_DEPOT_PARTITION, 0, true);
898 	if (result != VDO_SUCCESS)
899 		vdo_uninitialize_layout(layout);
900 
901 	return result;
902 }
903 
904 /**
905  * vdo_uninitialize_layout() - Clean up a layout.
906  * @layout: The layout to clean up.
907  *
908  * All partitions created by this layout become invalid pointers.
909  */
vdo_uninitialize_layout(struct layout * layout)910 void vdo_uninitialize_layout(struct layout *layout)
911 {
912 	while (layout->head != NULL) {
913 		struct partition *part = layout->head;
914 
915 		layout->head = part->next;
916 		vdo_free(part);
917 	}
918 
919 	memset(layout, 0, sizeof(struct layout));
920 }
921 
922 /**
923  * vdo_get_partition() - Get a partition by id.
924  * @layout: The layout from which to get a partition.
925  * @id: The id of the partition.
926  * @partition_ptr: A pointer to hold the partition.
927  *
928  * Return: VDO_SUCCESS or an error.
929  */
vdo_get_partition(struct layout * layout,enum partition_id id,struct partition ** partition_ptr)930 int vdo_get_partition(struct layout *layout, enum partition_id id,
931 		      struct partition **partition_ptr)
932 {
933 	struct partition *partition;
934 
935 	for (partition = layout->head; partition != NULL; partition = partition->next) {
936 		if (partition->id == id) {
937 			if (partition_ptr != NULL)
938 				*partition_ptr = partition;
939 			return VDO_SUCCESS;
940 		}
941 	}
942 
943 	return VDO_UNKNOWN_PARTITION;
944 }
945 
946 /**
947  * vdo_get_known_partition() - Get a partition by id from a validated layout.
948  * @layout: The layout from which to get a partition.
949  * @id: The id of the partition.
950  *
951  * Return: the partition
952  */
vdo_get_known_partition(struct layout * layout,enum partition_id id)953 struct partition *vdo_get_known_partition(struct layout *layout, enum partition_id id)
954 {
955 	struct partition *partition;
956 	int result = vdo_get_partition(layout, id, &partition);
957 
958 	VDO_ASSERT_LOG_ONLY(result == VDO_SUCCESS, "layout has expected partition: %u", id);
959 
960 	return partition;
961 }
962 
encode_layout(u8 * buffer,size_t * offset,const struct layout * layout)963 static void encode_layout(u8 *buffer, size_t *offset, const struct layout *layout)
964 {
965 	const struct partition *partition;
966 	size_t initial_offset;
967 	struct header header = VDO_LAYOUT_HEADER_3_0;
968 
969 	BUILD_BUG_ON(sizeof(enum partition_id) != sizeof(u8));
970 	VDO_ASSERT_LOG_ONLY(layout->num_partitions <= U8_MAX,
971 			    "layout partition count must fit in a byte");
972 
973 	vdo_encode_header(buffer, offset, &header);
974 
975 	initial_offset = *offset;
976 	encode_u64_le(buffer, offset, layout->first_free);
977 	encode_u64_le(buffer, offset, layout->last_free);
978 	buffer[(*offset)++] = layout->num_partitions;
979 
980 	VDO_ASSERT_LOG_ONLY(sizeof(struct layout_3_0) == *offset - initial_offset,
981 			    "encoded size of a layout header must match structure");
982 
983 	for (partition = layout->head; partition != NULL; partition = partition->next) {
984 		buffer[(*offset)++] = partition->id;
985 		encode_u64_le(buffer, offset, partition->offset);
986 		/* This field only exists for backwards compatibility */
987 		encode_u64_le(buffer, offset, 0);
988 		encode_u64_le(buffer, offset, partition->count);
989 	}
990 
991 	VDO_ASSERT_LOG_ONLY(header.size == *offset - initial_offset,
992 			    "encoded size of a layout must match header size");
993 }
994 
decode_layout(u8 * buffer,size_t * offset,physical_block_number_t start,block_count_t size,struct layout * layout)995 static int decode_layout(u8 *buffer, size_t *offset, physical_block_number_t start,
996 			 block_count_t size, struct layout *layout)
997 {
998 	struct header header;
999 	struct layout_3_0 layout_header;
1000 	struct partition *partition;
1001 	size_t initial_offset;
1002 	physical_block_number_t first_free, last_free;
1003 	u8 partition_count;
1004 	u8 i;
1005 	int result;
1006 
1007 	vdo_decode_header(buffer, offset, &header);
1008 	/* Layout is variable size, so only do a minimum size check here. */
1009 	result = vdo_validate_header(&VDO_LAYOUT_HEADER_3_0, &header, false, __func__);
1010 	if (result != VDO_SUCCESS)
1011 		return result;
1012 
1013 	initial_offset = *offset;
1014 	decode_u64_le(buffer, offset, &first_free);
1015 	decode_u64_le(buffer, offset, &last_free);
1016 	partition_count = buffer[(*offset)++];
1017 	layout_header = (struct layout_3_0) {
1018 		.first_free = first_free,
1019 		.last_free = last_free,
1020 		.partition_count = partition_count,
1021 	};
1022 
1023 	result = VDO_ASSERT(sizeof(struct layout_3_0) == *offset - initial_offset,
1024 			    "decoded size of a layout header must match structure");
1025 	if (result != VDO_SUCCESS)
1026 		return result;
1027 
1028 	layout->start = start;
1029 	layout->size = size;
1030 	layout->first_free = layout_header.first_free;
1031 	layout->last_free = layout_header.last_free;
1032 	layout->num_partitions = layout_header.partition_count;
1033 
1034 	if (layout->num_partitions > VDO_PARTITION_COUNT) {
1035 		return vdo_log_error_strerror(VDO_UNKNOWN_PARTITION,
1036 					      "layout has extra partitions");
1037 	}
1038 
1039 	for (i = 0; i < layout->num_partitions; i++) {
1040 		u8 id;
1041 		u64 partition_offset, count;
1042 
1043 		id = buffer[(*offset)++];
1044 		decode_u64_le(buffer, offset, &partition_offset);
1045 		*offset += sizeof(u64);
1046 		decode_u64_le(buffer, offset, &count);
1047 
1048 		result = allocate_partition(layout, id, partition_offset, count);
1049 		if (result != VDO_SUCCESS) {
1050 			vdo_uninitialize_layout(layout);
1051 			return result;
1052 		}
1053 	}
1054 
1055 	/* Validate that the layout has all (and only) the required partitions */
1056 	for (i = 0; i < VDO_PARTITION_COUNT; i++) {
1057 		result = vdo_get_partition(layout, REQUIRED_PARTITIONS[i], &partition);
1058 		if (result != VDO_SUCCESS) {
1059 			vdo_uninitialize_layout(layout);
1060 			return vdo_log_error_strerror(result,
1061 						      "layout is missing required partition %u",
1062 						      REQUIRED_PARTITIONS[i]);
1063 		}
1064 
1065 		start += partition->count;
1066 	}
1067 
1068 	if (start != size) {
1069 		vdo_uninitialize_layout(layout);
1070 		return vdo_log_error_strerror(UDS_BAD_STATE,
1071 					      "partitions do not cover the layout");
1072 	}
1073 
1074 	return VDO_SUCCESS;
1075 }
1076 
1077 /**
1078  * pack_vdo_config() - Convert a vdo_config to its packed on-disk representation.
1079  * @config: The vdo config to convert.
1080  *
1081  * Return: The platform-independent representation of the config.
1082  */
pack_vdo_config(struct vdo_config config)1083 static struct packed_vdo_config pack_vdo_config(struct vdo_config config)
1084 {
1085 	return (struct packed_vdo_config) {
1086 		.logical_blocks = __cpu_to_le64(config.logical_blocks),
1087 		.physical_blocks = __cpu_to_le64(config.physical_blocks),
1088 		.slab_size = __cpu_to_le64(config.slab_size),
1089 		.recovery_journal_size = __cpu_to_le64(config.recovery_journal_size),
1090 		.slab_journal_blocks = __cpu_to_le64(config.slab_journal_blocks),
1091 	};
1092 }
1093 
1094 /**
1095  * pack_vdo_component() - Convert a vdo_component to its packed on-disk representation.
1096  * @component: The VDO component data to convert.
1097  *
1098  * Return: The platform-independent representation of the component.
1099  */
pack_vdo_component(const struct vdo_component component)1100 static struct packed_vdo_component_41_0 pack_vdo_component(const struct vdo_component component)
1101 {
1102 	return (struct packed_vdo_component_41_0) {
1103 		.state = __cpu_to_le32(component.state),
1104 		.complete_recoveries = __cpu_to_le64(component.complete_recoveries),
1105 		.read_only_recoveries = __cpu_to_le64(component.read_only_recoveries),
1106 		.config = pack_vdo_config(component.config),
1107 		.nonce = __cpu_to_le64(component.nonce),
1108 	};
1109 }
1110 
encode_vdo_component(u8 * buffer,size_t * offset,struct vdo_component component)1111 static void encode_vdo_component(u8 *buffer, size_t *offset,
1112 				 struct vdo_component component)
1113 {
1114 	struct packed_vdo_component_41_0 packed;
1115 
1116 	encode_version_number(buffer, offset, VDO_COMPONENT_DATA_41_0);
1117 	packed = pack_vdo_component(component);
1118 	memcpy(buffer + *offset, &packed, sizeof(packed));
1119 	*offset += sizeof(packed);
1120 }
1121 
1122 /**
1123  * unpack_vdo_config() - Convert a packed_vdo_config to its native in-memory representation.
1124  * @config: The packed vdo config to convert.
1125  *
1126  * Return: The native in-memory representation of the vdo config.
1127  */
unpack_vdo_config(struct packed_vdo_config config)1128 static struct vdo_config unpack_vdo_config(struct packed_vdo_config config)
1129 {
1130 	return (struct vdo_config) {
1131 		.logical_blocks = __le64_to_cpu(config.logical_blocks),
1132 		.physical_blocks = __le64_to_cpu(config.physical_blocks),
1133 		.slab_size = __le64_to_cpu(config.slab_size),
1134 		.recovery_journal_size = __le64_to_cpu(config.recovery_journal_size),
1135 		.slab_journal_blocks = __le64_to_cpu(config.slab_journal_blocks),
1136 	};
1137 }
1138 
1139 /**
1140  * unpack_vdo_component_41_0() - Convert a packed_vdo_component_41_0 to its native in-memory
1141  *				 representation.
1142  * @component: The packed vdo component data to convert.
1143  *
1144  * Return: The native in-memory representation of the component.
1145  */
unpack_vdo_component_41_0(struct packed_vdo_component_41_0 component)1146 static struct vdo_component unpack_vdo_component_41_0(struct packed_vdo_component_41_0 component)
1147 {
1148 	return (struct vdo_component) {
1149 		.state = __le32_to_cpu(component.state),
1150 		.complete_recoveries = __le64_to_cpu(component.complete_recoveries),
1151 		.read_only_recoveries = __le64_to_cpu(component.read_only_recoveries),
1152 		.config = unpack_vdo_config(component.config),
1153 		.nonce = __le64_to_cpu(component.nonce),
1154 	};
1155 }
1156 
1157 /**
1158  * decode_vdo_component() - Decode the component data for the vdo itself out of the super block.
1159  *
1160  * Return: VDO_SUCCESS or an error.
1161  */
decode_vdo_component(u8 * buffer,size_t * offset,struct vdo_component * component)1162 static int decode_vdo_component(u8 *buffer, size_t *offset, struct vdo_component *component)
1163 {
1164 	struct version_number version;
1165 	struct packed_vdo_component_41_0 packed;
1166 	int result;
1167 
1168 	decode_version_number(buffer, offset, &version);
1169 	result = validate_version(version, VDO_COMPONENT_DATA_41_0,
1170 				  "VDO component data");
1171 	if (result != VDO_SUCCESS)
1172 		return result;
1173 
1174 	memcpy(&packed, buffer + *offset, sizeof(packed));
1175 	*offset += sizeof(packed);
1176 	*component = unpack_vdo_component_41_0(packed);
1177 	return VDO_SUCCESS;
1178 }
1179 
1180 /**
1181  * vdo_validate_config() - Validate constraints on a VDO config.
1182  * @config: The VDO config.
1183  * @physical_block_count: The minimum block count of the underlying storage.
1184  * @logical_block_count: The expected logical size of the VDO, or 0 if the logical size may be
1185  *			 unspecified.
1186  *
1187  * Return: A success or error code.
1188  */
vdo_validate_config(const struct vdo_config * config,block_count_t physical_block_count,block_count_t logical_block_count)1189 int vdo_validate_config(const struct vdo_config *config,
1190 			block_count_t physical_block_count,
1191 			block_count_t logical_block_count)
1192 {
1193 	struct slab_config slab_config;
1194 	int result;
1195 
1196 	result = VDO_ASSERT(config->slab_size > 0, "slab size unspecified");
1197 	if (result != VDO_SUCCESS)
1198 		return result;
1199 
1200 	result = VDO_ASSERT(is_power_of_2(config->slab_size),
1201 			    "slab size must be a power of two");
1202 	if (result != VDO_SUCCESS)
1203 		return result;
1204 
1205 	result = VDO_ASSERT(config->slab_size <= (1 << MAX_VDO_SLAB_BITS),
1206 			    "slab size must be less than or equal to 2^%d",
1207 			    MAX_VDO_SLAB_BITS);
1208 	if (result != VDO_SUCCESS)
1209 		return result;
1210 
1211 	result = VDO_ASSERT(config->slab_journal_blocks <= config->slab_size,
1212 			    "slab journal size is within expected bound");
1213 	if (result != VDO_SUCCESS)
1214 		return result;
1215 
1216 	result = vdo_configure_slab(config->slab_size, config->slab_journal_blocks,
1217 				    &slab_config);
1218 	if (result != VDO_SUCCESS)
1219 		return result;
1220 
1221 	result = VDO_ASSERT((slab_config.data_blocks >= 1),
1222 			    "slab must be able to hold at least one block");
1223 	if (result != VDO_SUCCESS)
1224 		return result;
1225 
1226 	result = VDO_ASSERT(config->physical_blocks > 0, "physical blocks unspecified");
1227 	if (result != VDO_SUCCESS)
1228 		return result;
1229 
1230 	result = VDO_ASSERT(config->physical_blocks <= MAXIMUM_VDO_PHYSICAL_BLOCKS,
1231 			    "physical block count %llu exceeds maximum %llu",
1232 			    (unsigned long long) config->physical_blocks,
1233 			    (unsigned long long) MAXIMUM_VDO_PHYSICAL_BLOCKS);
1234 	if (result != VDO_SUCCESS)
1235 		return VDO_OUT_OF_RANGE;
1236 
1237 	if (physical_block_count != config->physical_blocks) {
1238 		vdo_log_error("A physical size of %llu blocks was specified, not the %llu blocks configured in the vdo super block",
1239 			      (unsigned long long) physical_block_count,
1240 			      (unsigned long long) config->physical_blocks);
1241 		return VDO_PARAMETER_MISMATCH;
1242 	}
1243 
1244 	if (logical_block_count > 0) {
1245 		result = VDO_ASSERT((config->logical_blocks > 0),
1246 				    "logical blocks unspecified");
1247 		if (result != VDO_SUCCESS)
1248 			return result;
1249 
1250 		if (logical_block_count != config->logical_blocks) {
1251 			vdo_log_error("A logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block",
1252 				      (unsigned long long) logical_block_count,
1253 				      (unsigned long long) config->logical_blocks);
1254 			return VDO_PARAMETER_MISMATCH;
1255 		}
1256 	}
1257 
1258 	result = VDO_ASSERT(config->logical_blocks <= MAXIMUM_VDO_LOGICAL_BLOCKS,
1259 			    "logical blocks too large");
1260 	if (result != VDO_SUCCESS)
1261 		return result;
1262 
1263 	result = VDO_ASSERT(config->recovery_journal_size > 0,
1264 			    "recovery journal size unspecified");
1265 	if (result != VDO_SUCCESS)
1266 		return result;
1267 
1268 	result = VDO_ASSERT(is_power_of_2(config->recovery_journal_size),
1269 			    "recovery journal size must be a power of two");
1270 	if (result != VDO_SUCCESS)
1271 		return result;
1272 
1273 	return result;
1274 }
1275 
1276 /**
1277  * vdo_destroy_component_states() - Clean up any allocations in a vdo_component_states.
1278  * @states: The component states to destroy.
1279  */
vdo_destroy_component_states(struct vdo_component_states * states)1280 void vdo_destroy_component_states(struct vdo_component_states *states)
1281 {
1282 	if (states == NULL)
1283 		return;
1284 
1285 	vdo_uninitialize_layout(&states->layout);
1286 }
1287 
1288 /**
1289  * decode_components() - Decode the components now that we know the component data is a version we
1290  *                       understand.
1291  * @buffer: The buffer being decoded.
1292  * @offset: The offset to start decoding from.
1293  * @geometry: The vdo geometry
1294  * @states: An object to hold the successfully decoded state.
1295  *
1296  * Return: VDO_SUCCESS or an error.
1297  */
decode_components(u8 * buffer,size_t * offset,struct volume_geometry * geometry,struct vdo_component_states * states)1298 static int __must_check decode_components(u8 *buffer, size_t *offset,
1299 					  struct volume_geometry *geometry,
1300 					  struct vdo_component_states *states)
1301 {
1302 	int result;
1303 
1304 	decode_vdo_component(buffer, offset, &states->vdo);
1305 
1306 	result = decode_layout(buffer, offset, vdo_get_data_region_start(*geometry) + 1,
1307 			       states->vdo.config.physical_blocks, &states->layout);
1308 	if (result != VDO_SUCCESS)
1309 		return result;
1310 
1311 	result = decode_recovery_journal_state_7_0(buffer, offset,
1312 						   &states->recovery_journal);
1313 	if (result != VDO_SUCCESS)
1314 		return result;
1315 
1316 	result = decode_slab_depot_state_2_0(buffer, offset, &states->slab_depot);
1317 	if (result != VDO_SUCCESS)
1318 		return result;
1319 
1320 	result = decode_block_map_state_2_0(buffer, offset, &states->block_map);
1321 	if (result != VDO_SUCCESS)
1322 		return result;
1323 
1324 	VDO_ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
1325 			    "All decoded component data was used");
1326 	return VDO_SUCCESS;
1327 }
1328 
1329 /**
1330  * vdo_decode_component_states() - Decode the payload of a super block.
1331  * @buffer: The buffer containing the encoded super block contents.
1332  * @geometry: The vdo geometry
1333  * @states: A pointer to hold the decoded states.
1334  *
1335  * Return: VDO_SUCCESS or an error.
1336  */
vdo_decode_component_states(u8 * buffer,struct volume_geometry * geometry,struct vdo_component_states * states)1337 int vdo_decode_component_states(u8 *buffer, struct volume_geometry *geometry,
1338 				struct vdo_component_states *states)
1339 {
1340 	int result;
1341 	size_t offset = VDO_COMPONENT_DATA_OFFSET;
1342 
1343 	/* This is for backwards compatibility. */
1344 	decode_u32_le(buffer, &offset, &states->unused);
1345 
1346 	/* Check the VDO volume version */
1347 	decode_version_number(buffer, &offset, &states->volume_version);
1348 	result = validate_version(VDO_VOLUME_VERSION_67_0, states->volume_version,
1349 				  "volume");
1350 	if (result != VDO_SUCCESS)
1351 		return result;
1352 
1353 	result = decode_components(buffer, &offset, geometry, states);
1354 	if (result != VDO_SUCCESS)
1355 		vdo_uninitialize_layout(&states->layout);
1356 
1357 	return result;
1358 }
1359 
1360 /**
1361  * vdo_validate_component_states() - Validate the decoded super block configuration.
1362  * @states: The state decoded from the super block.
1363  * @geometry_nonce: The nonce from the geometry block.
1364  * @physical_size: The minimum block count of the underlying storage.
1365  * @logical_size: The expected logical size of the VDO, or 0 if the logical size may be
1366  *                unspecified.
1367  *
1368  * Return: VDO_SUCCESS or an error if the configuration is invalid.
1369  */
vdo_validate_component_states(struct vdo_component_states * states,nonce_t geometry_nonce,block_count_t physical_size,block_count_t logical_size)1370 int vdo_validate_component_states(struct vdo_component_states *states,
1371 				  nonce_t geometry_nonce, block_count_t physical_size,
1372 				  block_count_t logical_size)
1373 {
1374 	if (geometry_nonce != states->vdo.nonce) {
1375 		return vdo_log_error_strerror(VDO_BAD_NONCE,
1376 					      "Geometry nonce %llu does not match superblock nonce %llu",
1377 					      (unsigned long long) geometry_nonce,
1378 					      (unsigned long long) states->vdo.nonce);
1379 	}
1380 
1381 	return vdo_validate_config(&states->vdo.config, physical_size, logical_size);
1382 }
1383 
1384 /**
1385  * vdo_encode_component_states() - Encode the state of all vdo components in the super block.
1386  */
vdo_encode_component_states(u8 * buffer,size_t * offset,const struct vdo_component_states * states)1387 static void vdo_encode_component_states(u8 *buffer, size_t *offset,
1388 					const struct vdo_component_states *states)
1389 {
1390 	/* This is for backwards compatibility. */
1391 	encode_u32_le(buffer, offset, states->unused);
1392 	encode_version_number(buffer, offset, states->volume_version);
1393 	encode_vdo_component(buffer, offset, states->vdo);
1394 	encode_layout(buffer, offset, &states->layout);
1395 	encode_recovery_journal_state_7_0(buffer, offset, states->recovery_journal);
1396 	encode_slab_depot_state_2_0(buffer, offset, states->slab_depot);
1397 	encode_block_map_state_2_0(buffer, offset, states->block_map);
1398 
1399 	VDO_ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
1400 			    "All super block component data was encoded");
1401 }
1402 
1403 /**
1404  * vdo_encode_super_block() - Encode a super block into its on-disk representation.
1405  */
vdo_encode_super_block(u8 * buffer,struct vdo_component_states * states)1406 void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states)
1407 {
1408 	u32 checksum;
1409 	struct header header = SUPER_BLOCK_HEADER_12_0;
1410 	size_t offset = 0;
1411 
1412 	header.size += VDO_COMPONENT_DATA_SIZE;
1413 	vdo_encode_header(buffer, &offset, &header);
1414 	vdo_encode_component_states(buffer, &offset, states);
1415 
1416 	checksum = vdo_crc32(buffer, offset);
1417 	encode_u32_le(buffer, &offset, checksum);
1418 
1419 	/*
1420 	 * Even though the buffer is a full block, to avoid the potential corruption from a torn
1421 	 * write, the entire encoding must fit in the first sector.
1422 	 */
1423 	VDO_ASSERT_LOG_ONLY(offset <= VDO_SECTOR_SIZE,
1424 			    "entire superblock must fit in one sector");
1425 }
1426 
1427 /**
1428  * vdo_decode_super_block() - Decode a super block from its on-disk representation.
1429  */
vdo_decode_super_block(u8 * buffer)1430 int vdo_decode_super_block(u8 *buffer)
1431 {
1432 	struct header header;
1433 	int result;
1434 	u32 checksum, saved_checksum;
1435 	size_t offset = 0;
1436 
1437 	/* Decode and validate the header. */
1438 	vdo_decode_header(buffer, &offset, &header);
1439 	result = vdo_validate_header(&SUPER_BLOCK_HEADER_12_0, &header, false, __func__);
1440 	if (result != VDO_SUCCESS)
1441 		return result;
1442 
1443 	if (header.size > VDO_COMPONENT_DATA_SIZE + sizeof(u32)) {
1444 		/*
1445 		 * We can't check release version or checksum until we know the content size, so we
1446 		 * have to assume a version mismatch on unexpected values.
1447 		 */
1448 		return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
1449 					      "super block contents too large: %zu",
1450 					      header.size);
1451 	}
1452 
1453 	/* Skip past the component data for now, to verify the checksum. */
1454 	offset += VDO_COMPONENT_DATA_SIZE;
1455 
1456 	checksum = vdo_crc32(buffer, offset);
1457 	decode_u32_le(buffer, &offset, &saved_checksum);
1458 
1459 	result = VDO_ASSERT(offset == VDO_SUPER_BLOCK_FIXED_SIZE + VDO_COMPONENT_DATA_SIZE,
1460 			    "must have decoded entire superblock payload");
1461 	if (result != VDO_SUCCESS)
1462 		return result;
1463 
1464 	return ((checksum != saved_checksum) ? VDO_CHECKSUM_MISMATCH : VDO_SUCCESS);
1465 }
1466