xref: /linux/drivers/md/dm-vdo/indexer/index-layout.c (revision 1f20a5769446a1acae67ac9e63d07a594829a789)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright 2023 Red Hat
4  */
5 
6 #include "index-layout.h"
7 
8 #include <linux/random.h>
9 
10 #include "logger.h"
11 #include "memory-alloc.h"
12 #include "murmurhash3.h"
13 #include "numeric.h"
14 #include "time-utils.h"
15 
16 #include "config.h"
17 #include "open-chapter.h"
18 #include "volume-index.h"
19 
20 /*
21  * The UDS layout on storage media is divided into a number of fixed-size regions, the sizes of
22  * which are computed when the index is created. Every header and region begins on 4K block
23  * boundary. Save regions are further sub-divided into regions of their own.
24  *
25  * Each region has a kind and an instance number. Some kinds only have one instance and therefore
26  * use RL_SOLE_INSTANCE (-1) as the instance number. The RL_KIND_INDEX used to use instances to
27  * represent sub-indices; now, however there is only ever one sub-index and therefore one instance.
28  * The RL_KIND_VOLUME_INDEX uses instances to record which zone is being saved.
29  *
30  * Every region header has a type and version.
31  *
32  *     +-+-+---------+--------+--------+-+
33  *     | | |   I N D E X  0   101, 0   | |
34  *     |H|C+---------+--------+--------+S|
35  *     |D|f| Volume  | Save   | Save   |e|
36  *     |R|g| Region  | Region | Region |a|
37  *     | | | 201, -1 | 202, 0 | 202, 1 |l|
38  *     +-+-+--------+---------+--------+-+
39  *
40  * The header contains the encoded region layout table as well as some index configuration data.
41  * The sub-index region and its subdivisions are maintained in the same table.
42  *
43  * There are two save regions to preserve the old state in case saving the new state is incomplete.
44  * They are used in alternation. Each save region is further divided into sub-regions.
45  *
46  *     +-+-----+------+------+-----+-----+
47  *     |H| IPM | MI   | MI   |     | OC  |
48  *     |D|     | zone | zone | ... |     |
49  *     |R| 301 | 302  | 302  |     | 303 |
50  *     | | -1  |  0   |  1   |     | -1  |
51  *     +-+-----+------+------+-----+-----+
52  *
53  * The header contains the encoded region layout table as well as index state data for that save.
54  * Each save also has a unique nonce.
55  */
56 
57 #define MAGIC_SIZE 32
58 #define NONCE_INFO_SIZE 32
59 #define MAX_SAVES 2
60 
61 enum region_kind {
62 	RL_KIND_EMPTY = 0,
63 	RL_KIND_HEADER = 1,
64 	RL_KIND_CONFIG = 100,
65 	RL_KIND_INDEX = 101,
66 	RL_KIND_SEAL = 102,
67 	RL_KIND_VOLUME = 201,
68 	RL_KIND_SAVE = 202,
69 	RL_KIND_INDEX_PAGE_MAP = 301,
70 	RL_KIND_VOLUME_INDEX = 302,
71 	RL_KIND_OPEN_CHAPTER = 303,
72 };
73 
74 /* Some region types are historical and are no longer used. */
75 enum region_type {
76 	RH_TYPE_FREE = 0, /* unused */
77 	RH_TYPE_SUPER = 1,
78 	RH_TYPE_SAVE = 2,
79 	RH_TYPE_CHECKPOINT = 3, /* unused */
80 	RH_TYPE_UNSAVED = 4,
81 };
82 
83 #define RL_SOLE_INSTANCE 65535
84 
85 /*
86  * Super block version 2 is the first released version.
87  *
88  * Super block version 3 is the normal version used from RHEL 8.2 onwards.
89  *
90  * Super block versions 4 through 6 were incremental development versions and
91  * are not supported.
92  *
93  * Super block version 7 is used for volumes which have been reduced in size by one chapter in
94  * order to make room to prepend LVM metadata to a volume originally created without lvm. This
95  * allows the index to retain most its deduplication records.
96  */
97 #define SUPER_VERSION_MINIMUM 3
98 #define SUPER_VERSION_CURRENT 3
99 #define SUPER_VERSION_MAXIMUM 7
100 
101 static const u8 LAYOUT_MAGIC[MAGIC_SIZE] = "*ALBIREO*SINGLE*FILE*LAYOUT*001*";
102 static const u64 REGION_MAGIC = 0x416c6252676e3031; /* 'AlbRgn01' */
103 
104 struct region_header {
105 	u64 magic;
106 	u64 region_blocks;
107 	u16 type;
108 	/* Currently always version 1 */
109 	u16 version;
110 	u16 region_count;
111 	u16 payload;
112 };
113 
114 struct layout_region {
115 	u64 start_block;
116 	u64 block_count;
117 	u32 __unused;
118 	u16 kind;
119 	u16 instance;
120 };
121 
122 struct region_table {
123 	size_t encoded_size;
124 	struct region_header header;
125 	struct layout_region regions[];
126 };
127 
128 struct index_save_data {
129 	u64 timestamp;
130 	u64 nonce;
131 	/* Currently always version 1 */
132 	u32 version;
133 	u32 unused__;
134 };
135 
136 struct index_state_version {
137 	s32 signature;
138 	s32 version_id;
139 };
140 
141 static const struct index_state_version INDEX_STATE_VERSION_301 = {
142 	.signature  = -1,
143 	.version_id = 301,
144 };
145 
146 struct index_state_data301 {
147 	struct index_state_version version;
148 	u64 newest_chapter;
149 	u64 oldest_chapter;
150 	u64 last_save;
151 	u32 unused;
152 	u32 padding;
153 };
154 
155 struct index_save_layout {
156 	unsigned int zone_count;
157 	struct layout_region index_save;
158 	struct layout_region header;
159 	struct layout_region index_page_map;
160 	struct layout_region free_space;
161 	struct layout_region volume_index_zones[MAX_ZONES];
162 	struct layout_region open_chapter;
163 	struct index_save_data save_data;
164 	struct index_state_data301 state_data;
165 };
166 
167 struct sub_index_layout {
168 	u64 nonce;
169 	struct layout_region sub_index;
170 	struct layout_region volume;
171 	struct index_save_layout *saves;
172 };
173 
174 struct super_block_data {
175 	u8 magic_label[MAGIC_SIZE];
176 	u8 nonce_info[NONCE_INFO_SIZE];
177 	u64 nonce;
178 	u32 version;
179 	u32 block_size;
180 	u16 index_count;
181 	u16 max_saves;
182 	/* Padding reflects a blank field on permanent storage */
183 	u8 padding[4];
184 	u64 open_chapter_blocks;
185 	u64 page_map_blocks;
186 	u64 volume_offset;
187 	u64 start_offset;
188 };
189 
190 struct index_layout {
191 	struct io_factory *factory;
192 	size_t factory_size;
193 	off_t offset;
194 	struct super_block_data super;
195 	struct layout_region header;
196 	struct layout_region config;
197 	struct sub_index_layout index;
198 	struct layout_region seal;
199 	u64 total_blocks;
200 };
201 
202 struct save_layout_sizes {
203 	unsigned int save_count;
204 	size_t block_size;
205 	u64 volume_blocks;
206 	u64 volume_index_blocks;
207 	u64 page_map_blocks;
208 	u64 open_chapter_blocks;
209 	u64 save_blocks;
210 	u64 sub_index_blocks;
211 	u64 total_blocks;
212 	size_t total_size;
213 };
214 
215 static inline bool is_converted_super_block(struct super_block_data *super)
216 {
217 	return super->version == 7;
218 }
219 
220 static int __must_check compute_sizes(const struct uds_configuration *config,
221 				      struct save_layout_sizes *sls)
222 {
223 	int result;
224 	struct index_geometry *geometry = config->geometry;
225 
226 	memset(sls, 0, sizeof(*sls));
227 	sls->save_count = MAX_SAVES;
228 	sls->block_size = UDS_BLOCK_SIZE;
229 	sls->volume_blocks = geometry->bytes_per_volume / sls->block_size;
230 
231 	result = uds_compute_volume_index_save_blocks(config, sls->block_size,
232 						      &sls->volume_index_blocks);
233 	if (result != UDS_SUCCESS)
234 		return vdo_log_error_strerror(result, "cannot compute index save size");
235 
236 	sls->page_map_blocks =
237 		DIV_ROUND_UP(uds_compute_index_page_map_save_size(geometry),
238 			     sls->block_size);
239 	sls->open_chapter_blocks =
240 		DIV_ROUND_UP(uds_compute_saved_open_chapter_size(geometry),
241 			     sls->block_size);
242 	sls->save_blocks =
243 		1 + (sls->volume_index_blocks + sls->page_map_blocks + sls->open_chapter_blocks);
244 	sls->sub_index_blocks = sls->volume_blocks + (sls->save_count * sls->save_blocks);
245 	sls->total_blocks = 3 + sls->sub_index_blocks;
246 	sls->total_size = sls->total_blocks * sls->block_size;
247 
248 	return UDS_SUCCESS;
249 }
250 
251 int uds_compute_index_size(const struct uds_parameters *parameters, u64 *index_size)
252 {
253 	int result;
254 	struct uds_configuration *index_config;
255 	struct save_layout_sizes sizes;
256 
257 	if (index_size == NULL) {
258 		vdo_log_error("Missing output size pointer");
259 		return -EINVAL;
260 	}
261 
262 	result = uds_make_configuration(parameters, &index_config);
263 	if (result != UDS_SUCCESS) {
264 		vdo_log_error_strerror(result, "cannot compute index size");
265 		return uds_status_to_errno(result);
266 	}
267 
268 	result = compute_sizes(index_config, &sizes);
269 	uds_free_configuration(index_config);
270 	if (result != UDS_SUCCESS)
271 		return uds_status_to_errno(result);
272 
273 	*index_size = sizes.total_size;
274 	return UDS_SUCCESS;
275 }
276 
277 /* Create unique data using the current time and a pseudorandom number. */
278 static void create_unique_nonce_data(u8 *buffer)
279 {
280 	ktime_t now = current_time_ns(CLOCK_REALTIME);
281 	u32 rand;
282 	size_t offset = 0;
283 
284 	get_random_bytes(&rand, sizeof(u32));
285 	memcpy(buffer + offset, &now, sizeof(now));
286 	offset += sizeof(now);
287 	memcpy(buffer + offset, &rand, sizeof(rand));
288 	offset += sizeof(rand);
289 	while (offset < NONCE_INFO_SIZE) {
290 		size_t len = min(NONCE_INFO_SIZE - offset, offset);
291 
292 		memcpy(buffer + offset, buffer, len);
293 		offset += len;
294 	}
295 }
296 
297 static u64 hash_stuff(u64 start, const void *data, size_t len)
298 {
299 	u32 seed = start ^ (start >> 27);
300 	u8 hash_buffer[16];
301 
302 	murmurhash3_128(data, len, seed, hash_buffer);
303 	return get_unaligned_le64(hash_buffer + 4);
304 }
305 
306 /* Generate a primary nonce from the provided data. */
307 static u64 generate_primary_nonce(const void *data, size_t len)
308 {
309 	return hash_stuff(0xa1b1e0fc, data, len);
310 }
311 
312 /*
313  * Deterministically generate a secondary nonce from an existing nonce and some arbitrary data by
314  * hashing the original nonce and the data to produce a new nonce.
315  */
316 static u64 generate_secondary_nonce(u64 nonce, const void *data, size_t len)
317 {
318 	return hash_stuff(nonce + 1, data, len);
319 }
320 
321 static int __must_check open_layout_reader(struct index_layout *layout,
322 					   struct layout_region *lr, off_t offset,
323 					   struct buffered_reader **reader_ptr)
324 {
325 	return uds_make_buffered_reader(layout->factory, lr->start_block + offset,
326 					lr->block_count, reader_ptr);
327 }
328 
329 static int open_region_reader(struct index_layout *layout, struct layout_region *region,
330 			      struct buffered_reader **reader_ptr)
331 {
332 	return open_layout_reader(layout, region, -layout->super.start_offset,
333 				  reader_ptr);
334 }
335 
336 static int __must_check open_layout_writer(struct index_layout *layout,
337 					   struct layout_region *lr, off_t offset,
338 					   struct buffered_writer **writer_ptr)
339 {
340 	return uds_make_buffered_writer(layout->factory, lr->start_block + offset,
341 					lr->block_count, writer_ptr);
342 }
343 
344 static int open_region_writer(struct index_layout *layout, struct layout_region *region,
345 			      struct buffered_writer **writer_ptr)
346 {
347 	return open_layout_writer(layout, region, -layout->super.start_offset,
348 				  writer_ptr);
349 }
350 
351 static void generate_super_block_data(struct save_layout_sizes *sls,
352 				      struct super_block_data *super)
353 {
354 	memset(super, 0, sizeof(*super));
355 	memcpy(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE);
356 	create_unique_nonce_data(super->nonce_info);
357 
358 	super->nonce = generate_primary_nonce(super->nonce_info,
359 					      sizeof(super->nonce_info));
360 	super->version = SUPER_VERSION_CURRENT;
361 	super->block_size = sls->block_size;
362 	super->index_count = 1;
363 	super->max_saves = sls->save_count;
364 	super->open_chapter_blocks = sls->open_chapter_blocks;
365 	super->page_map_blocks = sls->page_map_blocks;
366 	super->volume_offset = 0;
367 	super->start_offset = 0;
368 }
369 
370 static void define_sub_index_nonce(struct index_layout *layout)
371 {
372 	struct sub_index_nonce_data {
373 		u64 offset;
374 		u16 index_id;
375 	};
376 	struct sub_index_layout *sil = &layout->index;
377 	u64 primary_nonce = layout->super.nonce;
378 	u8 buffer[sizeof(struct sub_index_nonce_data)] = { 0 };
379 	size_t offset = 0;
380 
381 	encode_u64_le(buffer, &offset, sil->sub_index.start_block);
382 	encode_u16_le(buffer, &offset, 0);
383 	sil->nonce = generate_secondary_nonce(primary_nonce, buffer, sizeof(buffer));
384 	if (sil->nonce == 0) {
385 		sil->nonce = generate_secondary_nonce(~primary_nonce + 1, buffer,
386 						      sizeof(buffer));
387 	}
388 }
389 
390 static void setup_sub_index(struct index_layout *layout, u64 start_block,
391 			    struct save_layout_sizes *sls)
392 {
393 	struct sub_index_layout *sil = &layout->index;
394 	u64 next_block = start_block;
395 	unsigned int i;
396 
397 	sil->sub_index = (struct layout_region) {
398 		.start_block = start_block,
399 		.block_count = sls->sub_index_blocks,
400 		.kind = RL_KIND_INDEX,
401 		.instance = 0,
402 	};
403 
404 	sil->volume = (struct layout_region) {
405 		.start_block = next_block,
406 		.block_count = sls->volume_blocks,
407 		.kind = RL_KIND_VOLUME,
408 		.instance = RL_SOLE_INSTANCE,
409 	};
410 
411 	next_block += sls->volume_blocks;
412 
413 	for (i = 0; i < sls->save_count; i++) {
414 		sil->saves[i].index_save = (struct layout_region) {
415 			.start_block = next_block,
416 			.block_count = sls->save_blocks,
417 			.kind = RL_KIND_SAVE,
418 			.instance = i,
419 		};
420 
421 		next_block += sls->save_blocks;
422 	}
423 
424 	define_sub_index_nonce(layout);
425 }
426 
427 static void initialize_layout(struct index_layout *layout, struct save_layout_sizes *sls)
428 {
429 	u64 next_block = layout->offset / sls->block_size;
430 
431 	layout->total_blocks = sls->total_blocks;
432 	generate_super_block_data(sls, &layout->super);
433 	layout->header = (struct layout_region) {
434 		.start_block = next_block++,
435 		.block_count = 1,
436 		.kind = RL_KIND_HEADER,
437 		.instance = RL_SOLE_INSTANCE,
438 	};
439 
440 	layout->config = (struct layout_region) {
441 		.start_block = next_block++,
442 		.block_count = 1,
443 		.kind = RL_KIND_CONFIG,
444 		.instance = RL_SOLE_INSTANCE,
445 	};
446 
447 	setup_sub_index(layout, next_block, sls);
448 	next_block += sls->sub_index_blocks;
449 
450 	layout->seal = (struct layout_region) {
451 		.start_block = next_block,
452 		.block_count = 1,
453 		.kind = RL_KIND_SEAL,
454 		.instance = RL_SOLE_INSTANCE,
455 	};
456 }
457 
458 static int __must_check make_index_save_region_table(struct index_save_layout *isl,
459 						     struct region_table **table_ptr)
460 {
461 	int result;
462 	unsigned int z;
463 	struct region_table *table;
464 	struct layout_region *lr;
465 	u16 region_count;
466 	size_t payload;
467 	size_t type;
468 
469 	if (isl->zone_count > 0) {
470 		/*
471 		 * Normal save regions: header, page map, volume index zones,
472 		 * open chapter, and possibly free space.
473 		 */
474 		region_count = 3 + isl->zone_count;
475 		if (isl->free_space.block_count > 0)
476 			region_count++;
477 
478 		payload = sizeof(isl->save_data) + sizeof(isl->state_data);
479 		type = RH_TYPE_SAVE;
480 	} else {
481 		/* Empty save regions: header, page map, free space. */
482 		region_count = 3;
483 		payload = sizeof(isl->save_data);
484 		type = RH_TYPE_UNSAVED;
485 	}
486 
487 	result = vdo_allocate_extended(struct region_table, region_count,
488 				       struct layout_region,
489 				       "layout region table for ISL", &table);
490 	if (result != VDO_SUCCESS)
491 		return result;
492 
493 	lr = &table->regions[0];
494 	*lr++ = isl->header;
495 	*lr++ = isl->index_page_map;
496 	for (z = 0; z < isl->zone_count; z++)
497 		*lr++ = isl->volume_index_zones[z];
498 
499 	if (isl->zone_count > 0)
500 		*lr++ = isl->open_chapter;
501 
502 	if (isl->free_space.block_count > 0)
503 		*lr++ = isl->free_space;
504 
505 	table->header = (struct region_header) {
506 		.magic = REGION_MAGIC,
507 		.region_blocks = isl->index_save.block_count,
508 		.type = type,
509 		.version = 1,
510 		.region_count = region_count,
511 		.payload = payload,
512 	};
513 
514 	table->encoded_size = (sizeof(struct region_header) + payload +
515 			       region_count * sizeof(struct layout_region));
516 	*table_ptr = table;
517 	return UDS_SUCCESS;
518 }
519 
520 static void encode_region_table(u8 *buffer, size_t *offset, struct region_table *table)
521 {
522 	unsigned int i;
523 
524 	encode_u64_le(buffer, offset, REGION_MAGIC);
525 	encode_u64_le(buffer, offset, table->header.region_blocks);
526 	encode_u16_le(buffer, offset, table->header.type);
527 	encode_u16_le(buffer, offset, table->header.version);
528 	encode_u16_le(buffer, offset, table->header.region_count);
529 	encode_u16_le(buffer, offset, table->header.payload);
530 
531 	for (i = 0; i < table->header.region_count; i++) {
532 		encode_u64_le(buffer, offset, table->regions[i].start_block);
533 		encode_u64_le(buffer, offset, table->regions[i].block_count);
534 		encode_u32_le(buffer, offset, 0);
535 		encode_u16_le(buffer, offset, table->regions[i].kind);
536 		encode_u16_le(buffer, offset, table->regions[i].instance);
537 	}
538 }
539 
540 static int __must_check write_index_save_header(struct index_save_layout *isl,
541 						struct region_table *table,
542 						struct buffered_writer *writer)
543 {
544 	int result;
545 	u8 *buffer;
546 	size_t offset = 0;
547 
548 	result = vdo_allocate(table->encoded_size, u8, "index save data", &buffer);
549 	if (result != VDO_SUCCESS)
550 		return result;
551 
552 	encode_region_table(buffer, &offset, table);
553 	encode_u64_le(buffer, &offset, isl->save_data.timestamp);
554 	encode_u64_le(buffer, &offset, isl->save_data.nonce);
555 	encode_u32_le(buffer, &offset, isl->save_data.version);
556 	encode_u32_le(buffer, &offset, 0);
557 	if (isl->zone_count > 0) {
558 		encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.signature);
559 		encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.version_id);
560 		encode_u64_le(buffer, &offset, isl->state_data.newest_chapter);
561 		encode_u64_le(buffer, &offset, isl->state_data.oldest_chapter);
562 		encode_u64_le(buffer, &offset, isl->state_data.last_save);
563 		encode_u64_le(buffer, &offset, 0);
564 	}
565 
566 	result = uds_write_to_buffered_writer(writer, buffer, offset);
567 	vdo_free(buffer);
568 	if (result != UDS_SUCCESS)
569 		return result;
570 
571 	return uds_flush_buffered_writer(writer);
572 }
573 
574 static int write_index_save_layout(struct index_layout *layout,
575 				   struct index_save_layout *isl)
576 {
577 	int result;
578 	struct region_table *table;
579 	struct buffered_writer *writer;
580 
581 	result = make_index_save_region_table(isl, &table);
582 	if (result != UDS_SUCCESS)
583 		return result;
584 
585 	result = open_region_writer(layout, &isl->header, &writer);
586 	if (result != UDS_SUCCESS) {
587 		vdo_free(table);
588 		return result;
589 	}
590 
591 	result = write_index_save_header(isl, table, writer);
592 	vdo_free(table);
593 	uds_free_buffered_writer(writer);
594 
595 	return result;
596 }
597 
598 static void reset_index_save_layout(struct index_save_layout *isl, u64 page_map_blocks)
599 {
600 	u64 free_blocks;
601 	u64 next_block = isl->index_save.start_block;
602 
603 	isl->zone_count = 0;
604 	memset(&isl->save_data, 0, sizeof(isl->save_data));
605 
606 	isl->header = (struct layout_region) {
607 		.start_block = next_block++,
608 		.block_count = 1,
609 		.kind = RL_KIND_HEADER,
610 		.instance = RL_SOLE_INSTANCE,
611 	};
612 
613 	isl->index_page_map = (struct layout_region) {
614 		.start_block = next_block,
615 		.block_count = page_map_blocks,
616 		.kind = RL_KIND_INDEX_PAGE_MAP,
617 		.instance = RL_SOLE_INSTANCE,
618 	};
619 
620 	next_block += page_map_blocks;
621 
622 	free_blocks = isl->index_save.block_count - page_map_blocks - 1;
623 	isl->free_space = (struct layout_region) {
624 		.start_block = next_block,
625 		.block_count = free_blocks,
626 		.kind = RL_KIND_EMPTY,
627 		.instance = RL_SOLE_INSTANCE,
628 	};
629 }
630 
631 static int __must_check invalidate_old_save(struct index_layout *layout,
632 					    struct index_save_layout *isl)
633 {
634 	reset_index_save_layout(isl, layout->super.page_map_blocks);
635 	return write_index_save_layout(layout, isl);
636 }
637 
638 static int discard_index_state_data(struct index_layout *layout)
639 {
640 	int result;
641 	int saved_result = UDS_SUCCESS;
642 	unsigned int i;
643 
644 	for (i = 0; i < layout->super.max_saves; i++) {
645 		result = invalidate_old_save(layout, &layout->index.saves[i]);
646 		if (result != UDS_SUCCESS)
647 			saved_result = result;
648 	}
649 
650 	if (saved_result != UDS_SUCCESS) {
651 		return vdo_log_error_strerror(result,
652 					      "%s: cannot destroy all index saves",
653 					      __func__);
654 	}
655 
656 	return UDS_SUCCESS;
657 }
658 
659 static int __must_check make_layout_region_table(struct index_layout *layout,
660 						 struct region_table **table_ptr)
661 {
662 	int result;
663 	unsigned int i;
664 	/* Regions: header, config, index, volume, saves, seal */
665 	u16 region_count = 5 + layout->super.max_saves;
666 	u16 payload;
667 	struct region_table *table;
668 	struct layout_region *lr;
669 
670 	result = vdo_allocate_extended(struct region_table, region_count,
671 				       struct layout_region, "layout region table",
672 				       &table);
673 	if (result != VDO_SUCCESS)
674 		return result;
675 
676 	lr = &table->regions[0];
677 	*lr++ = layout->header;
678 	*lr++ = layout->config;
679 	*lr++ = layout->index.sub_index;
680 	*lr++ = layout->index.volume;
681 
682 	for (i = 0; i < layout->super.max_saves; i++)
683 		*lr++ = layout->index.saves[i].index_save;
684 
685 	*lr++ = layout->seal;
686 
687 	if (is_converted_super_block(&layout->super)) {
688 		payload = sizeof(struct super_block_data);
689 	} else {
690 		payload = (sizeof(struct super_block_data) -
691 			   sizeof(layout->super.volume_offset) -
692 			   sizeof(layout->super.start_offset));
693 	}
694 
695 	table->header = (struct region_header) {
696 		.magic = REGION_MAGIC,
697 		.region_blocks = layout->total_blocks,
698 		.type = RH_TYPE_SUPER,
699 		.version = 1,
700 		.region_count = region_count,
701 		.payload = payload,
702 	};
703 
704 	table->encoded_size = (sizeof(struct region_header) + payload +
705 			       region_count * sizeof(struct layout_region));
706 	*table_ptr = table;
707 	return UDS_SUCCESS;
708 }
709 
710 static int __must_check write_layout_header(struct index_layout *layout,
711 					    struct region_table *table,
712 					    struct buffered_writer *writer)
713 {
714 	int result;
715 	u8 *buffer;
716 	size_t offset = 0;
717 
718 	result = vdo_allocate(table->encoded_size, u8, "layout data", &buffer);
719 	if (result != VDO_SUCCESS)
720 		return result;
721 
722 	encode_region_table(buffer, &offset, table);
723 	memcpy(buffer + offset, &layout->super.magic_label, MAGIC_SIZE);
724 	offset += MAGIC_SIZE;
725 	memcpy(buffer + offset, &layout->super.nonce_info, NONCE_INFO_SIZE);
726 	offset += NONCE_INFO_SIZE;
727 	encode_u64_le(buffer, &offset, layout->super.nonce);
728 	encode_u32_le(buffer, &offset, layout->super.version);
729 	encode_u32_le(buffer, &offset, layout->super.block_size);
730 	encode_u16_le(buffer, &offset, layout->super.index_count);
731 	encode_u16_le(buffer, &offset, layout->super.max_saves);
732 	encode_u32_le(buffer, &offset, 0);
733 	encode_u64_le(buffer, &offset, layout->super.open_chapter_blocks);
734 	encode_u64_le(buffer, &offset, layout->super.page_map_blocks);
735 
736 	if (is_converted_super_block(&layout->super)) {
737 		encode_u64_le(buffer, &offset, layout->super.volume_offset);
738 		encode_u64_le(buffer, &offset, layout->super.start_offset);
739 	}
740 
741 	result = uds_write_to_buffered_writer(writer, buffer, offset);
742 	vdo_free(buffer);
743 	if (result != UDS_SUCCESS)
744 		return result;
745 
746 	return uds_flush_buffered_writer(writer);
747 }
748 
749 static int __must_check write_uds_index_config(struct index_layout *layout,
750 					       struct uds_configuration *config,
751 					       off_t offset)
752 {
753 	int result;
754 	struct buffered_writer *writer = NULL;
755 
756 	result = open_layout_writer(layout, &layout->config, offset, &writer);
757 	if (result != UDS_SUCCESS)
758 		return vdo_log_error_strerror(result, "failed to open config region");
759 
760 	result = uds_write_config_contents(writer, config, layout->super.version);
761 	if (result != UDS_SUCCESS) {
762 		uds_free_buffered_writer(writer);
763 		return vdo_log_error_strerror(result, "failed to write config region");
764 	}
765 
766 	result = uds_flush_buffered_writer(writer);
767 	if (result != UDS_SUCCESS) {
768 		uds_free_buffered_writer(writer);
769 		return vdo_log_error_strerror(result, "cannot flush config writer");
770 	}
771 
772 	uds_free_buffered_writer(writer);
773 	return UDS_SUCCESS;
774 }
775 
776 static int __must_check save_layout(struct index_layout *layout, off_t offset)
777 {
778 	int result;
779 	struct buffered_writer *writer = NULL;
780 	struct region_table *table;
781 
782 	result = make_layout_region_table(layout, &table);
783 	if (result != UDS_SUCCESS)
784 		return result;
785 
786 	result = open_layout_writer(layout, &layout->header, offset, &writer);
787 	if (result != UDS_SUCCESS) {
788 		vdo_free(table);
789 		return result;
790 	}
791 
792 	result = write_layout_header(layout, table, writer);
793 	vdo_free(table);
794 	uds_free_buffered_writer(writer);
795 
796 	return result;
797 }
798 
799 static int create_index_layout(struct index_layout *layout, struct uds_configuration *config)
800 {
801 	int result;
802 	struct save_layout_sizes sizes;
803 
804 	result = compute_sizes(config, &sizes);
805 	if (result != UDS_SUCCESS)
806 		return result;
807 
808 	result = vdo_allocate(sizes.save_count, struct index_save_layout, __func__,
809 			      &layout->index.saves);
810 	if (result != VDO_SUCCESS)
811 		return result;
812 
813 	initialize_layout(layout, &sizes);
814 
815 	result = discard_index_state_data(layout);
816 	if (result != UDS_SUCCESS)
817 		return result;
818 
819 	result = write_uds_index_config(layout, config, 0);
820 	if (result != UDS_SUCCESS)
821 		return result;
822 
823 	return save_layout(layout, 0);
824 }
825 
826 static u64 generate_index_save_nonce(u64 volume_nonce, struct index_save_layout *isl)
827 {
828 	struct save_nonce_data {
829 		struct index_save_data data;
830 		u64 offset;
831 	} nonce_data;
832 	u8 buffer[sizeof(nonce_data)];
833 	size_t offset = 0;
834 
835 	encode_u64_le(buffer, &offset, isl->save_data.timestamp);
836 	encode_u64_le(buffer, &offset, 0);
837 	encode_u32_le(buffer, &offset, isl->save_data.version);
838 	encode_u32_le(buffer, &offset, 0U);
839 	encode_u64_le(buffer, &offset, isl->index_save.start_block);
840 	VDO_ASSERT_LOG_ONLY(offset == sizeof(nonce_data),
841 			    "%zu bytes encoded of %zu expected",
842 			    offset, sizeof(nonce_data));
843 	return generate_secondary_nonce(volume_nonce, buffer, sizeof(buffer));
844 }
845 
846 static u64 validate_index_save_layout(struct index_save_layout *isl, u64 volume_nonce)
847 {
848 	if ((isl->zone_count == 0) || (isl->save_data.timestamp == 0))
849 		return 0;
850 
851 	if (isl->save_data.nonce != generate_index_save_nonce(volume_nonce, isl))
852 		return 0;
853 
854 	return isl->save_data.timestamp;
855 }
856 
857 static int find_latest_uds_index_save_slot(struct index_layout *layout,
858 					   struct index_save_layout **isl_ptr)
859 {
860 	struct index_save_layout *latest = NULL;
861 	struct index_save_layout *isl;
862 	unsigned int i;
863 	u64 save_time = 0;
864 	u64 latest_time = 0;
865 
866 	for (i = 0; i < layout->super.max_saves; i++) {
867 		isl = &layout->index.saves[i];
868 		save_time = validate_index_save_layout(isl, layout->index.nonce);
869 		if (save_time > latest_time) {
870 			latest = isl;
871 			latest_time = save_time;
872 		}
873 	}
874 
875 	if (latest == NULL) {
876 		vdo_log_error("No valid index save found");
877 		return UDS_INDEX_NOT_SAVED_CLEANLY;
878 	}
879 
880 	*isl_ptr = latest;
881 	return UDS_SUCCESS;
882 }
883 
884 int uds_discard_open_chapter(struct index_layout *layout)
885 {
886 	int result;
887 	struct index_save_layout *isl;
888 	struct buffered_writer *writer;
889 
890 	result = find_latest_uds_index_save_slot(layout, &isl);
891 	if (result != UDS_SUCCESS)
892 		return result;
893 
894 	result = open_region_writer(layout, &isl->open_chapter, &writer);
895 	if (result != UDS_SUCCESS)
896 		return result;
897 
898 	result = uds_write_to_buffered_writer(writer, NULL, UDS_BLOCK_SIZE);
899 	if (result != UDS_SUCCESS) {
900 		uds_free_buffered_writer(writer);
901 		return result;
902 	}
903 
904 	result = uds_flush_buffered_writer(writer);
905 	uds_free_buffered_writer(writer);
906 	return result;
907 }
908 
909 int uds_load_index_state(struct index_layout *layout, struct uds_index *index)
910 {
911 	int result;
912 	unsigned int zone;
913 	struct index_save_layout *isl;
914 	struct buffered_reader *readers[MAX_ZONES];
915 
916 	result = find_latest_uds_index_save_slot(layout, &isl);
917 	if (result != UDS_SUCCESS)
918 		return result;
919 
920 	index->newest_virtual_chapter = isl->state_data.newest_chapter;
921 	index->oldest_virtual_chapter = isl->state_data.oldest_chapter;
922 	index->last_save = isl->state_data.last_save;
923 
924 	result = open_region_reader(layout, &isl->open_chapter, &readers[0]);
925 	if (result != UDS_SUCCESS)
926 		return result;
927 
928 	result = uds_load_open_chapter(index, readers[0]);
929 	uds_free_buffered_reader(readers[0]);
930 	if (result != UDS_SUCCESS)
931 		return result;
932 
933 	for (zone = 0; zone < isl->zone_count; zone++) {
934 		result = open_region_reader(layout, &isl->volume_index_zones[zone],
935 					    &readers[zone]);
936 		if (result != UDS_SUCCESS) {
937 			for (; zone > 0; zone--)
938 				uds_free_buffered_reader(readers[zone - 1]);
939 
940 			return result;
941 		}
942 	}
943 
944 	result = uds_load_volume_index(index->volume_index, readers, isl->zone_count);
945 	for (zone = 0; zone < isl->zone_count; zone++)
946 		uds_free_buffered_reader(readers[zone]);
947 	if (result != UDS_SUCCESS)
948 		return result;
949 
950 	result = open_region_reader(layout, &isl->index_page_map, &readers[0]);
951 	if (result != UDS_SUCCESS)
952 		return result;
953 
954 	result = uds_read_index_page_map(index->volume->index_page_map, readers[0]);
955 	uds_free_buffered_reader(readers[0]);
956 
957 	return result;
958 }
959 
960 static struct index_save_layout *select_oldest_index_save_layout(struct index_layout *layout)
961 {
962 	struct index_save_layout *oldest = NULL;
963 	struct index_save_layout *isl;
964 	unsigned int i;
965 	u64 save_time = 0;
966 	u64 oldest_time = 0;
967 
968 	for (i = 0; i < layout->super.max_saves; i++) {
969 		isl = &layout->index.saves[i];
970 		save_time = validate_index_save_layout(isl, layout->index.nonce);
971 		if (oldest == NULL || save_time < oldest_time) {
972 			oldest = isl;
973 			oldest_time = save_time;
974 		}
975 	}
976 
977 	return oldest;
978 }
979 
980 static void instantiate_index_save_layout(struct index_save_layout *isl,
981 					  struct super_block_data *super,
982 					  u64 volume_nonce, unsigned int zone_count)
983 {
984 	unsigned int z;
985 	u64 next_block;
986 	u64 free_blocks;
987 	u64 volume_index_blocks;
988 
989 	isl->zone_count = zone_count;
990 	memset(&isl->save_data, 0, sizeof(isl->save_data));
991 	isl->save_data.timestamp = ktime_to_ms(current_time_ns(CLOCK_REALTIME));
992 	isl->save_data.version = 1;
993 	isl->save_data.nonce = generate_index_save_nonce(volume_nonce, isl);
994 
995 	next_block = isl->index_save.start_block;
996 	isl->header = (struct layout_region) {
997 		.start_block = next_block++,
998 		.block_count = 1,
999 		.kind = RL_KIND_HEADER,
1000 		.instance = RL_SOLE_INSTANCE,
1001 	};
1002 
1003 	isl->index_page_map = (struct layout_region) {
1004 		.start_block = next_block,
1005 		.block_count = super->page_map_blocks,
1006 		.kind = RL_KIND_INDEX_PAGE_MAP,
1007 		.instance = RL_SOLE_INSTANCE,
1008 	};
1009 	next_block += super->page_map_blocks;
1010 
1011 	free_blocks = (isl->index_save.block_count - 1 -
1012 		       super->page_map_blocks -
1013 		       super->open_chapter_blocks);
1014 	volume_index_blocks = free_blocks / isl->zone_count;
1015 	for (z = 0; z < isl->zone_count; z++) {
1016 		isl->volume_index_zones[z] = (struct layout_region) {
1017 			.start_block = next_block,
1018 			.block_count = volume_index_blocks,
1019 			.kind = RL_KIND_VOLUME_INDEX,
1020 			.instance = z,
1021 		};
1022 
1023 		next_block += volume_index_blocks;
1024 		free_blocks -= volume_index_blocks;
1025 	}
1026 
1027 	isl->open_chapter = (struct layout_region) {
1028 		.start_block = next_block,
1029 		.block_count = super->open_chapter_blocks,
1030 		.kind = RL_KIND_OPEN_CHAPTER,
1031 		.instance = RL_SOLE_INSTANCE,
1032 	};
1033 
1034 	next_block += super->open_chapter_blocks;
1035 
1036 	isl->free_space = (struct layout_region) {
1037 		.start_block = next_block,
1038 		.block_count = free_blocks,
1039 		.kind = RL_KIND_EMPTY,
1040 		.instance = RL_SOLE_INSTANCE,
1041 	};
1042 }
1043 
1044 static int setup_uds_index_save_slot(struct index_layout *layout,
1045 				     unsigned int zone_count,
1046 				     struct index_save_layout **isl_ptr)
1047 {
1048 	int result;
1049 	struct index_save_layout *isl;
1050 
1051 	isl = select_oldest_index_save_layout(layout);
1052 	result = invalidate_old_save(layout, isl);
1053 	if (result != UDS_SUCCESS)
1054 		return result;
1055 
1056 	instantiate_index_save_layout(isl, &layout->super, layout->index.nonce,
1057 				      zone_count);
1058 
1059 	*isl_ptr = isl;
1060 	return UDS_SUCCESS;
1061 }
1062 
1063 static void cancel_uds_index_save(struct index_save_layout *isl)
1064 {
1065 	memset(&isl->save_data, 0, sizeof(isl->save_data));
1066 	memset(&isl->state_data, 0, sizeof(isl->state_data));
1067 	isl->zone_count = 0;
1068 }
1069 
1070 int uds_save_index_state(struct index_layout *layout, struct uds_index *index)
1071 {
1072 	int result;
1073 	unsigned int zone;
1074 	struct index_save_layout *isl;
1075 	struct buffered_writer *writers[MAX_ZONES];
1076 
1077 	result = setup_uds_index_save_slot(layout, index->zone_count, &isl);
1078 	if (result != UDS_SUCCESS)
1079 		return result;
1080 
1081 	isl->state_data	= (struct index_state_data301) {
1082 		.newest_chapter = index->newest_virtual_chapter,
1083 		.oldest_chapter = index->oldest_virtual_chapter,
1084 		.last_save = index->last_save,
1085 	};
1086 
1087 	result = open_region_writer(layout, &isl->open_chapter, &writers[0]);
1088 	if (result != UDS_SUCCESS) {
1089 		cancel_uds_index_save(isl);
1090 		return result;
1091 	}
1092 
1093 	result = uds_save_open_chapter(index, writers[0]);
1094 	uds_free_buffered_writer(writers[0]);
1095 	if (result != UDS_SUCCESS) {
1096 		cancel_uds_index_save(isl);
1097 		return result;
1098 	}
1099 
1100 	for (zone = 0; zone < index->zone_count; zone++) {
1101 		result = open_region_writer(layout, &isl->volume_index_zones[zone],
1102 					    &writers[zone]);
1103 		if (result != UDS_SUCCESS) {
1104 			for (; zone > 0; zone--)
1105 				uds_free_buffered_writer(writers[zone - 1]);
1106 
1107 			cancel_uds_index_save(isl);
1108 			return result;
1109 		}
1110 	}
1111 
1112 	result = uds_save_volume_index(index->volume_index, writers, index->zone_count);
1113 	for (zone = 0; zone < index->zone_count; zone++)
1114 		uds_free_buffered_writer(writers[zone]);
1115 	if (result != UDS_SUCCESS) {
1116 		cancel_uds_index_save(isl);
1117 		return result;
1118 	}
1119 
1120 	result = open_region_writer(layout, &isl->index_page_map, &writers[0]);
1121 	if (result != UDS_SUCCESS) {
1122 		cancel_uds_index_save(isl);
1123 		return result;
1124 	}
1125 
1126 	result = uds_write_index_page_map(index->volume->index_page_map, writers[0]);
1127 	uds_free_buffered_writer(writers[0]);
1128 	if (result != UDS_SUCCESS) {
1129 		cancel_uds_index_save(isl);
1130 		return result;
1131 	}
1132 
1133 	return write_index_save_layout(layout, isl);
1134 }
1135 
1136 static int __must_check load_region_table(struct buffered_reader *reader,
1137 					  struct region_table **table_ptr)
1138 {
1139 	int result;
1140 	unsigned int i;
1141 	struct region_header header;
1142 	struct region_table *table;
1143 	u8 buffer[sizeof(struct region_header)];
1144 	size_t offset = 0;
1145 
1146 	result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1147 	if (result != UDS_SUCCESS)
1148 		return vdo_log_error_strerror(result, "cannot read region table header");
1149 
1150 	decode_u64_le(buffer, &offset, &header.magic);
1151 	decode_u64_le(buffer, &offset, &header.region_blocks);
1152 	decode_u16_le(buffer, &offset, &header.type);
1153 	decode_u16_le(buffer, &offset, &header.version);
1154 	decode_u16_le(buffer, &offset, &header.region_count);
1155 	decode_u16_le(buffer, &offset, &header.payload);
1156 
1157 	if (header.magic != REGION_MAGIC)
1158 		return UDS_NO_INDEX;
1159 
1160 	if (header.version != 1) {
1161 		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1162 					      "unknown region table version %hu",
1163 					      header.version);
1164 	}
1165 
1166 	result = vdo_allocate_extended(struct region_table, header.region_count,
1167 				       struct layout_region,
1168 				       "single file layout region table", &table);
1169 	if (result != VDO_SUCCESS)
1170 		return result;
1171 
1172 	table->header = header;
1173 	for (i = 0; i < header.region_count; i++) {
1174 		u8 region_buffer[sizeof(struct layout_region)];
1175 
1176 		offset = 0;
1177 		result = uds_read_from_buffered_reader(reader, region_buffer,
1178 						       sizeof(region_buffer));
1179 		if (result != UDS_SUCCESS) {
1180 			vdo_free(table);
1181 			return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1182 						      "cannot read region table layouts");
1183 		}
1184 
1185 		decode_u64_le(region_buffer, &offset, &table->regions[i].start_block);
1186 		decode_u64_le(region_buffer, &offset, &table->regions[i].block_count);
1187 		offset += sizeof(u32);
1188 		decode_u16_le(region_buffer, &offset, &table->regions[i].kind);
1189 		decode_u16_le(region_buffer, &offset, &table->regions[i].instance);
1190 	}
1191 
1192 	*table_ptr = table;
1193 	return UDS_SUCCESS;
1194 }
1195 
1196 static int __must_check read_super_block_data(struct buffered_reader *reader,
1197 					      struct index_layout *layout,
1198 					      size_t saved_size)
1199 {
1200 	int result;
1201 	struct super_block_data *super = &layout->super;
1202 	u8 *buffer;
1203 	size_t offset = 0;
1204 
1205 	result = vdo_allocate(saved_size, u8, "super block data", &buffer);
1206 	if (result != VDO_SUCCESS)
1207 		return result;
1208 
1209 	result = uds_read_from_buffered_reader(reader, buffer, saved_size);
1210 	if (result != UDS_SUCCESS) {
1211 		vdo_free(buffer);
1212 		return vdo_log_error_strerror(result, "cannot read region table header");
1213 	}
1214 
1215 	memcpy(&super->magic_label, buffer, MAGIC_SIZE);
1216 	offset += MAGIC_SIZE;
1217 	memcpy(&super->nonce_info, buffer + offset, NONCE_INFO_SIZE);
1218 	offset += NONCE_INFO_SIZE;
1219 	decode_u64_le(buffer, &offset, &super->nonce);
1220 	decode_u32_le(buffer, &offset, &super->version);
1221 	decode_u32_le(buffer, &offset, &super->block_size);
1222 	decode_u16_le(buffer, &offset, &super->index_count);
1223 	decode_u16_le(buffer, &offset, &super->max_saves);
1224 	offset += sizeof(u32);
1225 	decode_u64_le(buffer, &offset, &super->open_chapter_blocks);
1226 	decode_u64_le(buffer, &offset, &super->page_map_blocks);
1227 
1228 	if (is_converted_super_block(super)) {
1229 		decode_u64_le(buffer, &offset, &super->volume_offset);
1230 		decode_u64_le(buffer, &offset, &super->start_offset);
1231 	} else {
1232 		super->volume_offset = 0;
1233 		super->start_offset = 0;
1234 	}
1235 
1236 	vdo_free(buffer);
1237 
1238 	if (memcmp(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE) != 0)
1239 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1240 					      "unknown superblock magic label");
1241 
1242 	if ((super->version < SUPER_VERSION_MINIMUM) ||
1243 	    (super->version == 4) || (super->version == 5) || (super->version == 6) ||
1244 	    (super->version > SUPER_VERSION_MAXIMUM)) {
1245 		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1246 					      "unknown superblock version number %u",
1247 					      super->version);
1248 	}
1249 
1250 	if (super->volume_offset < super->start_offset) {
1251 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1252 					      "inconsistent offsets (start %llu, volume %llu)",
1253 					      (unsigned long long) super->start_offset,
1254 					      (unsigned long long) super->volume_offset);
1255 	}
1256 
1257 	/* Sub-indexes are no longer used but the layout retains this field. */
1258 	if (super->index_count != 1) {
1259 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1260 					      "invalid subindex count %u",
1261 					      super->index_count);
1262 	}
1263 
1264 	if (generate_primary_nonce(super->nonce_info, sizeof(super->nonce_info)) != super->nonce) {
1265 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1266 					      "inconsistent superblock nonce");
1267 	}
1268 
1269 	return UDS_SUCCESS;
1270 }
1271 
1272 static int __must_check verify_region(struct layout_region *lr, u64 start_block,
1273 				      enum region_kind kind, unsigned int instance)
1274 {
1275 	if (lr->start_block != start_block)
1276 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1277 					      "incorrect layout region offset");
1278 
1279 	if (lr->kind != kind)
1280 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1281 					      "incorrect layout region kind");
1282 
1283 	if (lr->instance != instance) {
1284 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1285 					      "incorrect layout region instance");
1286 	}
1287 
1288 	return UDS_SUCCESS;
1289 }
1290 
1291 static int __must_check verify_sub_index(struct index_layout *layout, u64 start_block,
1292 					 struct region_table *table)
1293 {
1294 	int result;
1295 	unsigned int i;
1296 	struct sub_index_layout *sil = &layout->index;
1297 	u64 next_block = start_block;
1298 
1299 	sil->sub_index = table->regions[2];
1300 	result = verify_region(&sil->sub_index, next_block, RL_KIND_INDEX, 0);
1301 	if (result != UDS_SUCCESS)
1302 		return result;
1303 
1304 	define_sub_index_nonce(layout);
1305 
1306 	sil->volume = table->regions[3];
1307 	result = verify_region(&sil->volume, next_block, RL_KIND_VOLUME,
1308 			       RL_SOLE_INSTANCE);
1309 	if (result != UDS_SUCCESS)
1310 		return result;
1311 
1312 	next_block += sil->volume.block_count + layout->super.volume_offset;
1313 
1314 	for (i = 0; i < layout->super.max_saves; i++) {
1315 		sil->saves[i].index_save = table->regions[i + 4];
1316 		result = verify_region(&sil->saves[i].index_save, next_block,
1317 				       RL_KIND_SAVE, i);
1318 		if (result != UDS_SUCCESS)
1319 			return result;
1320 
1321 		next_block += sil->saves[i].index_save.block_count;
1322 	}
1323 
1324 	next_block -= layout->super.volume_offset;
1325 	if (next_block != start_block + sil->sub_index.block_count) {
1326 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1327 					      "sub index region does not span all saves");
1328 	}
1329 
1330 	return UDS_SUCCESS;
1331 }
1332 
1333 static int __must_check reconstitute_layout(struct index_layout *layout,
1334 					    struct region_table *table, u64 first_block)
1335 {
1336 	int result;
1337 	u64 next_block = first_block;
1338 
1339 	result = vdo_allocate(layout->super.max_saves, struct index_save_layout,
1340 			      __func__, &layout->index.saves);
1341 	if (result != VDO_SUCCESS)
1342 		return result;
1343 
1344 	layout->total_blocks = table->header.region_blocks;
1345 
1346 	layout->header = table->regions[0];
1347 	result = verify_region(&layout->header, next_block++, RL_KIND_HEADER,
1348 			       RL_SOLE_INSTANCE);
1349 	if (result != UDS_SUCCESS)
1350 		return result;
1351 
1352 	layout->config = table->regions[1];
1353 	result = verify_region(&layout->config, next_block++, RL_KIND_CONFIG,
1354 			       RL_SOLE_INSTANCE);
1355 	if (result != UDS_SUCCESS)
1356 		return result;
1357 
1358 	result = verify_sub_index(layout, next_block, table);
1359 	if (result != UDS_SUCCESS)
1360 		return result;
1361 
1362 	next_block += layout->index.sub_index.block_count;
1363 
1364 	layout->seal = table->regions[table->header.region_count - 1];
1365 	result = verify_region(&layout->seal, next_block + layout->super.volume_offset,
1366 			       RL_KIND_SEAL, RL_SOLE_INSTANCE);
1367 	if (result != UDS_SUCCESS)
1368 		return result;
1369 
1370 	if (++next_block != (first_block + layout->total_blocks)) {
1371 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1372 					      "layout table does not span total blocks");
1373 	}
1374 
1375 	return UDS_SUCCESS;
1376 }
1377 
1378 static int __must_check load_super_block(struct index_layout *layout, size_t block_size,
1379 					 u64 first_block, struct buffered_reader *reader)
1380 {
1381 	int result;
1382 	struct region_table *table = NULL;
1383 	struct super_block_data *super = &layout->super;
1384 
1385 	result = load_region_table(reader, &table);
1386 	if (result != UDS_SUCCESS)
1387 		return result;
1388 
1389 	if (table->header.type != RH_TYPE_SUPER) {
1390 		vdo_free(table);
1391 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1392 					      "not a superblock region table");
1393 	}
1394 
1395 	result = read_super_block_data(reader, layout, table->header.payload);
1396 	if (result != UDS_SUCCESS) {
1397 		vdo_free(table);
1398 		return vdo_log_error_strerror(result, "unknown superblock format");
1399 	}
1400 
1401 	if (super->block_size != block_size) {
1402 		vdo_free(table);
1403 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1404 					      "superblock saved block_size %u differs from supplied block_size %zu",
1405 					      super->block_size, block_size);
1406 	}
1407 
1408 	first_block -= (super->volume_offset - super->start_offset);
1409 	result = reconstitute_layout(layout, table, first_block);
1410 	vdo_free(table);
1411 	return result;
1412 }
1413 
1414 static int __must_check read_index_save_data(struct buffered_reader *reader,
1415 					     struct index_save_layout *isl,
1416 					     size_t saved_size)
1417 {
1418 	int result;
1419 	struct index_state_version file_version;
1420 	u8 buffer[sizeof(struct index_save_data) + sizeof(struct index_state_data301)];
1421 	size_t offset = 0;
1422 
1423 	if (saved_size != sizeof(buffer)) {
1424 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1425 					      "unexpected index save data size %zu",
1426 					      saved_size);
1427 	}
1428 
1429 	result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1430 	if (result != UDS_SUCCESS)
1431 		return vdo_log_error_strerror(result, "cannot read index save data");
1432 
1433 	decode_u64_le(buffer, &offset, &isl->save_data.timestamp);
1434 	decode_u64_le(buffer, &offset, &isl->save_data.nonce);
1435 	decode_u32_le(buffer, &offset, &isl->save_data.version);
1436 	offset += sizeof(u32);
1437 
1438 	if (isl->save_data.version > 1) {
1439 		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1440 					      "unknown index save version number %u",
1441 					      isl->save_data.version);
1442 	}
1443 
1444 	decode_s32_le(buffer, &offset, &file_version.signature);
1445 	decode_s32_le(buffer, &offset, &file_version.version_id);
1446 
1447 	if ((file_version.signature != INDEX_STATE_VERSION_301.signature) ||
1448 	    (file_version.version_id != INDEX_STATE_VERSION_301.version_id)) {
1449 		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1450 					      "index state version %d,%d is unsupported",
1451 					      file_version.signature,
1452 					      file_version.version_id);
1453 	}
1454 
1455 	decode_u64_le(buffer, &offset, &isl->state_data.newest_chapter);
1456 	decode_u64_le(buffer, &offset, &isl->state_data.oldest_chapter);
1457 	decode_u64_le(buffer, &offset, &isl->state_data.last_save);
1458 	/* Skip past some historical fields that are now unused */
1459 	offset += sizeof(u32) + sizeof(u32);
1460 	return UDS_SUCCESS;
1461 }
1462 
1463 static int __must_check reconstruct_index_save(struct index_save_layout *isl,
1464 					       struct region_table *table)
1465 {
1466 	int result;
1467 	unsigned int z;
1468 	struct layout_region *last_region;
1469 	u64 next_block = isl->index_save.start_block;
1470 	u64 last_block = next_block + isl->index_save.block_count;
1471 
1472 	isl->zone_count = table->header.region_count - 3;
1473 
1474 	last_region = &table->regions[table->header.region_count - 1];
1475 	if (last_region->kind == RL_KIND_EMPTY) {
1476 		isl->free_space = *last_region;
1477 		isl->zone_count--;
1478 	} else {
1479 		isl->free_space = (struct layout_region) {
1480 			.start_block = last_block,
1481 			.block_count = 0,
1482 			.kind = RL_KIND_EMPTY,
1483 			.instance = RL_SOLE_INSTANCE,
1484 		};
1485 	}
1486 
1487 	isl->header = table->regions[0];
1488 	result = verify_region(&isl->header, next_block++, RL_KIND_HEADER,
1489 			       RL_SOLE_INSTANCE);
1490 	if (result != UDS_SUCCESS)
1491 		return result;
1492 
1493 	isl->index_page_map = table->regions[1];
1494 	result = verify_region(&isl->index_page_map, next_block, RL_KIND_INDEX_PAGE_MAP,
1495 			       RL_SOLE_INSTANCE);
1496 	if (result != UDS_SUCCESS)
1497 		return result;
1498 
1499 	next_block += isl->index_page_map.block_count;
1500 
1501 	for (z = 0; z < isl->zone_count; z++) {
1502 		isl->volume_index_zones[z] = table->regions[z + 2];
1503 		result = verify_region(&isl->volume_index_zones[z], next_block,
1504 				       RL_KIND_VOLUME_INDEX, z);
1505 		if (result != UDS_SUCCESS)
1506 			return result;
1507 
1508 		next_block += isl->volume_index_zones[z].block_count;
1509 	}
1510 
1511 	isl->open_chapter = table->regions[isl->zone_count + 2];
1512 	result = verify_region(&isl->open_chapter, next_block, RL_KIND_OPEN_CHAPTER,
1513 			       RL_SOLE_INSTANCE);
1514 	if (result != UDS_SUCCESS)
1515 		return result;
1516 
1517 	next_block += isl->open_chapter.block_count;
1518 
1519 	result = verify_region(&isl->free_space, next_block, RL_KIND_EMPTY,
1520 			       RL_SOLE_INSTANCE);
1521 	if (result != UDS_SUCCESS)
1522 		return result;
1523 
1524 	next_block += isl->free_space.block_count;
1525 	if (next_block != last_block) {
1526 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1527 					      "index save layout table incomplete");
1528 	}
1529 
1530 	return UDS_SUCCESS;
1531 }
1532 
1533 static int __must_check load_index_save(struct index_save_layout *isl,
1534 					struct buffered_reader *reader,
1535 					unsigned int instance)
1536 {
1537 	int result;
1538 	struct region_table *table = NULL;
1539 
1540 	result = load_region_table(reader, &table);
1541 	if (result != UDS_SUCCESS) {
1542 		return vdo_log_error_strerror(result, "cannot read index save %u header",
1543 					      instance);
1544 	}
1545 
1546 	if (table->header.region_blocks != isl->index_save.block_count) {
1547 		u64 region_blocks = table->header.region_blocks;
1548 
1549 		vdo_free(table);
1550 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1551 					      "unexpected index save %u region block count %llu",
1552 					      instance,
1553 					      (unsigned long long) region_blocks);
1554 	}
1555 
1556 	if (table->header.type == RH_TYPE_UNSAVED) {
1557 		vdo_free(table);
1558 		reset_index_save_layout(isl, 0);
1559 		return UDS_SUCCESS;
1560 	}
1561 
1562 
1563 	if (table->header.type != RH_TYPE_SAVE) {
1564 		vdo_log_error_strerror(UDS_CORRUPT_DATA,
1565 				       "unexpected index save %u header type %u",
1566 				       instance, table->header.type);
1567 		vdo_free(table);
1568 		return UDS_CORRUPT_DATA;
1569 	}
1570 
1571 	result = read_index_save_data(reader, isl, table->header.payload);
1572 	if (result != UDS_SUCCESS) {
1573 		vdo_free(table);
1574 		return vdo_log_error_strerror(result,
1575 					      "unknown index save %u data format",
1576 					      instance);
1577 	}
1578 
1579 	result = reconstruct_index_save(isl, table);
1580 	vdo_free(table);
1581 	if (result != UDS_SUCCESS) {
1582 		return vdo_log_error_strerror(result, "cannot reconstruct index save %u",
1583 					      instance);
1584 	}
1585 
1586 	return UDS_SUCCESS;
1587 }
1588 
1589 static int __must_check load_sub_index_regions(struct index_layout *layout)
1590 {
1591 	int result;
1592 	unsigned int j;
1593 	struct index_save_layout *isl;
1594 	struct buffered_reader *reader;
1595 
1596 	for (j = 0; j < layout->super.max_saves; j++) {
1597 		isl = &layout->index.saves[j];
1598 		result = open_region_reader(layout, &isl->index_save, &reader);
1599 
1600 		if (result != UDS_SUCCESS) {
1601 			vdo_log_error_strerror(result,
1602 					       "cannot get reader for index 0 save %u",
1603 					       j);
1604 			return result;
1605 		}
1606 
1607 		result = load_index_save(isl, reader, j);
1608 		uds_free_buffered_reader(reader);
1609 		if (result != UDS_SUCCESS) {
1610 			/* Another save slot might be valid. */
1611 			reset_index_save_layout(isl, 0);
1612 			continue;
1613 		}
1614 	}
1615 
1616 	return UDS_SUCCESS;
1617 }
1618 
1619 static int __must_check verify_uds_index_config(struct index_layout *layout,
1620 						struct uds_configuration *config)
1621 {
1622 	int result;
1623 	struct buffered_reader *reader = NULL;
1624 	u64 offset;
1625 
1626 	offset = layout->super.volume_offset - layout->super.start_offset;
1627 	result = open_layout_reader(layout, &layout->config, offset, &reader);
1628 	if (result != UDS_SUCCESS)
1629 		return vdo_log_error_strerror(result, "failed to open config reader");
1630 
1631 	result = uds_validate_config_contents(reader, config);
1632 	if (result != UDS_SUCCESS) {
1633 		uds_free_buffered_reader(reader);
1634 		return vdo_log_error_strerror(result, "failed to read config region");
1635 	}
1636 
1637 	uds_free_buffered_reader(reader);
1638 	return UDS_SUCCESS;
1639 }
1640 
1641 static int load_index_layout(struct index_layout *layout, struct uds_configuration *config)
1642 {
1643 	int result;
1644 	struct buffered_reader *reader;
1645 
1646 	result = uds_make_buffered_reader(layout->factory,
1647 					  layout->offset / UDS_BLOCK_SIZE, 1, &reader);
1648 	if (result != UDS_SUCCESS)
1649 		return vdo_log_error_strerror(result, "unable to read superblock");
1650 
1651 	result = load_super_block(layout, UDS_BLOCK_SIZE,
1652 				  layout->offset / UDS_BLOCK_SIZE, reader);
1653 	uds_free_buffered_reader(reader);
1654 	if (result != UDS_SUCCESS)
1655 		return result;
1656 
1657 	result = verify_uds_index_config(layout, config);
1658 	if (result != UDS_SUCCESS)
1659 		return result;
1660 
1661 	return load_sub_index_regions(layout);
1662 }
1663 
1664 static int create_layout_factory(struct index_layout *layout,
1665 				 const struct uds_configuration *config)
1666 {
1667 	int result;
1668 	size_t writable_size;
1669 	struct io_factory *factory = NULL;
1670 
1671 	result = uds_make_io_factory(config->bdev, &factory);
1672 	if (result != UDS_SUCCESS)
1673 		return result;
1674 
1675 	writable_size = uds_get_writable_size(factory) & -UDS_BLOCK_SIZE;
1676 	if (writable_size < config->size + config->offset) {
1677 		uds_put_io_factory(factory);
1678 		vdo_log_error("index storage (%zu) is smaller than the requested size %zu",
1679 			      writable_size, config->size + config->offset);
1680 		return -ENOSPC;
1681 	}
1682 
1683 	layout->factory = factory;
1684 	layout->factory_size = (config->size > 0) ? config->size : writable_size;
1685 	layout->offset = config->offset;
1686 	return UDS_SUCCESS;
1687 }
1688 
1689 int uds_make_index_layout(struct uds_configuration *config, bool new_layout,
1690 			  struct index_layout **layout_ptr)
1691 {
1692 	int result;
1693 	struct index_layout *layout = NULL;
1694 	struct save_layout_sizes sizes;
1695 
1696 	result = compute_sizes(config, &sizes);
1697 	if (result != UDS_SUCCESS)
1698 		return result;
1699 
1700 	result = vdo_allocate(1, struct index_layout, __func__, &layout);
1701 	if (result != VDO_SUCCESS)
1702 		return result;
1703 
1704 	result = create_layout_factory(layout, config);
1705 	if (result != UDS_SUCCESS) {
1706 		uds_free_index_layout(layout);
1707 		return result;
1708 	}
1709 
1710 	if (layout->factory_size < sizes.total_size) {
1711 		vdo_log_error("index storage (%zu) is smaller than the required size %llu",
1712 			      layout->factory_size,
1713 			      (unsigned long long) sizes.total_size);
1714 		uds_free_index_layout(layout);
1715 		return -ENOSPC;
1716 	}
1717 
1718 	if (new_layout)
1719 		result = create_index_layout(layout, config);
1720 	else
1721 		result = load_index_layout(layout, config);
1722 	if (result != UDS_SUCCESS) {
1723 		uds_free_index_layout(layout);
1724 		return result;
1725 	}
1726 
1727 	*layout_ptr = layout;
1728 	return UDS_SUCCESS;
1729 }
1730 
1731 void uds_free_index_layout(struct index_layout *layout)
1732 {
1733 	if (layout == NULL)
1734 		return;
1735 
1736 	vdo_free(layout->index.saves);
1737 	if (layout->factory != NULL)
1738 		uds_put_io_factory(layout->factory);
1739 
1740 	vdo_free(layout);
1741 }
1742 
1743 int uds_replace_index_layout_storage(struct index_layout *layout,
1744 				     struct block_device *bdev)
1745 {
1746 	return uds_replace_storage(layout->factory, bdev);
1747 }
1748 
1749 /* Obtain a dm_bufio_client for the volume region. */
1750 int uds_open_volume_bufio(struct index_layout *layout, size_t block_size,
1751 			  unsigned int reserved_buffers,
1752 			  struct dm_bufio_client **client_ptr)
1753 {
1754 	off_t offset = (layout->index.volume.start_block +
1755 			layout->super.volume_offset -
1756 			layout->super.start_offset);
1757 
1758 	return uds_make_bufio(layout->factory, offset, block_size, reserved_buffers,
1759 			      client_ptr);
1760 }
1761 
1762 u64 uds_get_volume_nonce(struct index_layout *layout)
1763 {
1764 	return layout->index.nonce;
1765 }
1766