xref: /linux/drivers/md/dm-vdo/indexer/index-layout.c (revision 4b4a8d9560d987f4df17b7248ab1c8146138d0f5)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright 2023 Red Hat
4  */
5 
6 #include "index-layout.h"
7 
8 #include <linux/random.h>
9 
10 #include "logger.h"
11 #include "memory-alloc.h"
12 #include "murmurhash3.h"
13 #include "numeric.h"
14 #include "time-utils.h"
15 
16 #include "config.h"
17 #include "open-chapter.h"
18 #include "volume-index.h"
19 
20 /*
21  * The UDS layout on storage media is divided into a number of fixed-size regions, the sizes of
22  * which are computed when the index is created. Every header and region begins on 4K block
23  * boundary. Save regions are further sub-divided into regions of their own.
24  *
25  * Each region has a kind and an instance number. Some kinds only have one instance and therefore
26  * use RL_SOLE_INSTANCE (-1) as the instance number. The RL_KIND_INDEX used to use instances to
27  * represent sub-indices; now, however there is only ever one sub-index and therefore one instance.
28  * The RL_KIND_VOLUME_INDEX uses instances to record which zone is being saved.
29  *
30  * Every region header has a type and version.
31  *
32  *     +-+-+---------+--------+--------+-+
33  *     | | |   I N D E X  0   101, 0   | |
34  *     |H|C+---------+--------+--------+S|
35  *     |D|f| Volume  | Save   | Save   |e|
36  *     |R|g| Region  | Region | Region |a|
37  *     | | | 201, -1 | 202, 0 | 202, 1 |l|
38  *     +-+-+--------+---------+--------+-+
39  *
40  * The header contains the encoded region layout table as well as some index configuration data.
41  * The sub-index region and its subdivisions are maintained in the same table.
42  *
43  * There are two save regions to preserve the old state in case saving the new state is incomplete.
44  * They are used in alternation. Each save region is further divided into sub-regions.
45  *
46  *     +-+-----+------+------+-----+-----+
47  *     |H| IPM | MI   | MI   |     | OC  |
48  *     |D|     | zone | zone | ... |     |
49  *     |R| 301 | 302  | 302  |     | 303 |
50  *     | | -1  |  0   |  1   |     | -1  |
51  *     +-+-----+------+------+-----+-----+
52  *
53  * The header contains the encoded region layout table as well as index state data for that save.
54  * Each save also has a unique nonce.
55  */
56 
57 #define NONCE_INFO_SIZE 32
58 #define MAX_SAVES 2
59 
60 enum region_kind {
61 	RL_KIND_EMPTY = 0,
62 	RL_KIND_HEADER = 1,
63 	RL_KIND_CONFIG = 100,
64 	RL_KIND_INDEX = 101,
65 	RL_KIND_SEAL = 102,
66 	RL_KIND_VOLUME = 201,
67 	RL_KIND_SAVE = 202,
68 	RL_KIND_INDEX_PAGE_MAP = 301,
69 	RL_KIND_VOLUME_INDEX = 302,
70 	RL_KIND_OPEN_CHAPTER = 303,
71 };
72 
73 /* Some region types are historical and are no longer used. */
74 enum region_type {
75 	RH_TYPE_FREE = 0, /* unused */
76 	RH_TYPE_SUPER = 1,
77 	RH_TYPE_SAVE = 2,
78 	RH_TYPE_CHECKPOINT = 3, /* unused */
79 	RH_TYPE_UNSAVED = 4,
80 };
81 
82 #define RL_SOLE_INSTANCE 65535
83 
84 /*
85  * Super block version 2 is the first released version.
86  *
87  * Super block version 3 is the normal version used from RHEL 8.2 onwards.
88  *
89  * Super block versions 4 through 6 were incremental development versions and
90  * are not supported.
91  *
92  * Super block version 7 is used for volumes which have been reduced in size by one chapter in
93  * order to make room to prepend LVM metadata to a volume originally created without lvm. This
94  * allows the index to retain most its deduplication records.
95  */
96 #define SUPER_VERSION_MINIMUM 3
97 #define SUPER_VERSION_CURRENT 3
98 #define SUPER_VERSION_MAXIMUM 7
99 
100 static const u8 LAYOUT_MAGIC[] = "*ALBIREO*SINGLE*FILE*LAYOUT*001*";
101 static const u64 REGION_MAGIC = 0x416c6252676e3031; /* 'AlbRgn01' */
102 
103 #define MAGIC_SIZE (sizeof(LAYOUT_MAGIC) - 1)
104 
105 struct region_header {
106 	u64 magic;
107 	u64 region_blocks;
108 	u16 type;
109 	/* Currently always version 1 */
110 	u16 version;
111 	u16 region_count;
112 	u16 payload;
113 };
114 
115 struct layout_region {
116 	u64 start_block;
117 	u64 block_count;
118 	u32 __unused;
119 	u16 kind;
120 	u16 instance;
121 };
122 
123 struct region_table {
124 	size_t encoded_size;
125 	struct region_header header;
126 	struct layout_region regions[];
127 };
128 
129 struct index_save_data {
130 	u64 timestamp;
131 	u64 nonce;
132 	/* Currently always version 1 */
133 	u32 version;
134 	u32 unused__;
135 };
136 
137 struct index_state_version {
138 	s32 signature;
139 	s32 version_id;
140 };
141 
142 static const struct index_state_version INDEX_STATE_VERSION_301 = {
143 	.signature  = -1,
144 	.version_id = 301,
145 };
146 
147 struct index_state_data301 {
148 	struct index_state_version version;
149 	u64 newest_chapter;
150 	u64 oldest_chapter;
151 	u64 last_save;
152 	u32 unused;
153 	u32 padding;
154 };
155 
156 struct index_save_layout {
157 	unsigned int zone_count;
158 	struct layout_region index_save;
159 	struct layout_region header;
160 	struct layout_region index_page_map;
161 	struct layout_region free_space;
162 	struct layout_region volume_index_zones[MAX_ZONES];
163 	struct layout_region open_chapter;
164 	struct index_save_data save_data;
165 	struct index_state_data301 state_data;
166 };
167 
168 struct sub_index_layout {
169 	u64 nonce;
170 	struct layout_region sub_index;
171 	struct layout_region volume;
172 	struct index_save_layout *saves;
173 };
174 
175 struct super_block_data {
176 	u8 magic_label[MAGIC_SIZE];
177 	u8 nonce_info[NONCE_INFO_SIZE];
178 	u64 nonce;
179 	u32 version;
180 	u32 block_size;
181 	u16 index_count;
182 	u16 max_saves;
183 	/* Padding reflects a blank field on permanent storage */
184 	u8 padding[4];
185 	u64 open_chapter_blocks;
186 	u64 page_map_blocks;
187 	u64 volume_offset;
188 	u64 start_offset;
189 };
190 
191 struct index_layout {
192 	struct io_factory *factory;
193 	size_t factory_size;
194 	off_t offset;
195 	struct super_block_data super;
196 	struct layout_region header;
197 	struct layout_region config;
198 	struct sub_index_layout index;
199 	struct layout_region seal;
200 	u64 total_blocks;
201 };
202 
203 struct save_layout_sizes {
204 	unsigned int save_count;
205 	size_t block_size;
206 	u64 volume_blocks;
207 	u64 volume_index_blocks;
208 	u64 page_map_blocks;
209 	u64 open_chapter_blocks;
210 	u64 save_blocks;
211 	u64 sub_index_blocks;
212 	u64 total_blocks;
213 	size_t total_size;
214 };
215 
216 static inline bool is_converted_super_block(struct super_block_data *super)
217 {
218 	return super->version == 7;
219 }
220 
221 static int __must_check compute_sizes(const struct uds_configuration *config,
222 				      struct save_layout_sizes *sls)
223 {
224 	int result;
225 	struct index_geometry *geometry = config->geometry;
226 
227 	memset(sls, 0, sizeof(*sls));
228 	sls->save_count = MAX_SAVES;
229 	sls->block_size = UDS_BLOCK_SIZE;
230 	sls->volume_blocks = geometry->bytes_per_volume / sls->block_size;
231 
232 	result = uds_compute_volume_index_save_blocks(config, sls->block_size,
233 						      &sls->volume_index_blocks);
234 	if (result != UDS_SUCCESS)
235 		return vdo_log_error_strerror(result, "cannot compute index save size");
236 
237 	sls->page_map_blocks =
238 		DIV_ROUND_UP(uds_compute_index_page_map_save_size(geometry),
239 			     sls->block_size);
240 	sls->open_chapter_blocks =
241 		DIV_ROUND_UP(uds_compute_saved_open_chapter_size(geometry),
242 			     sls->block_size);
243 	sls->save_blocks =
244 		1 + (sls->volume_index_blocks + sls->page_map_blocks + sls->open_chapter_blocks);
245 	sls->sub_index_blocks = sls->volume_blocks + (sls->save_count * sls->save_blocks);
246 	sls->total_blocks = 3 + sls->sub_index_blocks;
247 	sls->total_size = sls->total_blocks * sls->block_size;
248 
249 	return UDS_SUCCESS;
250 }
251 
252 int uds_compute_index_size(const struct uds_parameters *parameters, u64 *index_size)
253 {
254 	int result;
255 	struct uds_configuration *index_config;
256 	struct save_layout_sizes sizes;
257 
258 	if (index_size == NULL) {
259 		vdo_log_error("Missing output size pointer");
260 		return -EINVAL;
261 	}
262 
263 	result = uds_make_configuration(parameters, &index_config);
264 	if (result != UDS_SUCCESS) {
265 		vdo_log_error_strerror(result, "cannot compute index size");
266 		return result;
267 	}
268 
269 	result = compute_sizes(index_config, &sizes);
270 	uds_free_configuration(index_config);
271 	if (result != UDS_SUCCESS)
272 		return result;
273 
274 	*index_size = sizes.total_size;
275 	return UDS_SUCCESS;
276 }
277 
278 /* Create unique data using the current time and a pseudorandom number. */
279 static void create_unique_nonce_data(u8 *buffer)
280 {
281 	ktime_t now = current_time_ns(CLOCK_REALTIME);
282 	u32 rand;
283 	size_t offset = 0;
284 
285 	get_random_bytes(&rand, sizeof(u32));
286 	memcpy(buffer + offset, &now, sizeof(now));
287 	offset += sizeof(now);
288 	memcpy(buffer + offset, &rand, sizeof(rand));
289 	offset += sizeof(rand);
290 	while (offset < NONCE_INFO_SIZE) {
291 		size_t len = min(NONCE_INFO_SIZE - offset, offset);
292 
293 		memcpy(buffer + offset, buffer, len);
294 		offset += len;
295 	}
296 }
297 
298 static u64 hash_stuff(u64 start, const void *data, size_t len)
299 {
300 	u32 seed = start ^ (start >> 27);
301 	u8 hash_buffer[16];
302 
303 	murmurhash3_128(data, len, seed, hash_buffer);
304 	return get_unaligned_le64(hash_buffer + 4);
305 }
306 
307 /* Generate a primary nonce from the provided data. */
308 static u64 generate_primary_nonce(const void *data, size_t len)
309 {
310 	return hash_stuff(0xa1b1e0fc, data, len);
311 }
312 
313 /*
314  * Deterministically generate a secondary nonce from an existing nonce and some arbitrary data by
315  * hashing the original nonce and the data to produce a new nonce.
316  */
317 static u64 generate_secondary_nonce(u64 nonce, const void *data, size_t len)
318 {
319 	return hash_stuff(nonce + 1, data, len);
320 }
321 
322 static int __must_check open_layout_reader(struct index_layout *layout,
323 					   struct layout_region *lr, off_t offset,
324 					   struct buffered_reader **reader_ptr)
325 {
326 	return uds_make_buffered_reader(layout->factory, lr->start_block + offset,
327 					lr->block_count, reader_ptr);
328 }
329 
330 static int open_region_reader(struct index_layout *layout, struct layout_region *region,
331 			      struct buffered_reader **reader_ptr)
332 {
333 	return open_layout_reader(layout, region, -layout->super.start_offset,
334 				  reader_ptr);
335 }
336 
337 static int __must_check open_layout_writer(struct index_layout *layout,
338 					   struct layout_region *lr, off_t offset,
339 					   struct buffered_writer **writer_ptr)
340 {
341 	return uds_make_buffered_writer(layout->factory, lr->start_block + offset,
342 					lr->block_count, writer_ptr);
343 }
344 
345 static int open_region_writer(struct index_layout *layout, struct layout_region *region,
346 			      struct buffered_writer **writer_ptr)
347 {
348 	return open_layout_writer(layout, region, -layout->super.start_offset,
349 				  writer_ptr);
350 }
351 
352 static void generate_super_block_data(struct save_layout_sizes *sls,
353 				      struct super_block_data *super)
354 {
355 	memset(super, 0, sizeof(*super));
356 	memcpy(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE);
357 	create_unique_nonce_data(super->nonce_info);
358 
359 	super->nonce = generate_primary_nonce(super->nonce_info,
360 					      sizeof(super->nonce_info));
361 	super->version = SUPER_VERSION_CURRENT;
362 	super->block_size = sls->block_size;
363 	super->index_count = 1;
364 	super->max_saves = sls->save_count;
365 	super->open_chapter_blocks = sls->open_chapter_blocks;
366 	super->page_map_blocks = sls->page_map_blocks;
367 	super->volume_offset = 0;
368 	super->start_offset = 0;
369 }
370 
371 static void define_sub_index_nonce(struct index_layout *layout)
372 {
373 	struct sub_index_nonce_data {
374 		u64 offset;
375 		u16 index_id;
376 	};
377 	struct sub_index_layout *sil = &layout->index;
378 	u64 primary_nonce = layout->super.nonce;
379 	u8 buffer[sizeof(struct sub_index_nonce_data)] = { 0 };
380 	size_t offset = 0;
381 
382 	encode_u64_le(buffer, &offset, sil->sub_index.start_block);
383 	encode_u16_le(buffer, &offset, 0);
384 	sil->nonce = generate_secondary_nonce(primary_nonce, buffer, sizeof(buffer));
385 	if (sil->nonce == 0) {
386 		sil->nonce = generate_secondary_nonce(~primary_nonce + 1, buffer,
387 						      sizeof(buffer));
388 	}
389 }
390 
391 static void setup_sub_index(struct index_layout *layout, u64 start_block,
392 			    struct save_layout_sizes *sls)
393 {
394 	struct sub_index_layout *sil = &layout->index;
395 	u64 next_block = start_block;
396 	unsigned int i;
397 
398 	sil->sub_index = (struct layout_region) {
399 		.start_block = start_block,
400 		.block_count = sls->sub_index_blocks,
401 		.kind = RL_KIND_INDEX,
402 		.instance = 0,
403 	};
404 
405 	sil->volume = (struct layout_region) {
406 		.start_block = next_block,
407 		.block_count = sls->volume_blocks,
408 		.kind = RL_KIND_VOLUME,
409 		.instance = RL_SOLE_INSTANCE,
410 	};
411 
412 	next_block += sls->volume_blocks;
413 
414 	for (i = 0; i < sls->save_count; i++) {
415 		sil->saves[i].index_save = (struct layout_region) {
416 			.start_block = next_block,
417 			.block_count = sls->save_blocks,
418 			.kind = RL_KIND_SAVE,
419 			.instance = i,
420 		};
421 
422 		next_block += sls->save_blocks;
423 	}
424 
425 	define_sub_index_nonce(layout);
426 }
427 
428 static void initialize_layout(struct index_layout *layout, struct save_layout_sizes *sls)
429 {
430 	u64 next_block = layout->offset / sls->block_size;
431 
432 	layout->total_blocks = sls->total_blocks;
433 	generate_super_block_data(sls, &layout->super);
434 	layout->header = (struct layout_region) {
435 		.start_block = next_block++,
436 		.block_count = 1,
437 		.kind = RL_KIND_HEADER,
438 		.instance = RL_SOLE_INSTANCE,
439 	};
440 
441 	layout->config = (struct layout_region) {
442 		.start_block = next_block++,
443 		.block_count = 1,
444 		.kind = RL_KIND_CONFIG,
445 		.instance = RL_SOLE_INSTANCE,
446 	};
447 
448 	setup_sub_index(layout, next_block, sls);
449 	next_block += sls->sub_index_blocks;
450 
451 	layout->seal = (struct layout_region) {
452 		.start_block = next_block,
453 		.block_count = 1,
454 		.kind = RL_KIND_SEAL,
455 		.instance = RL_SOLE_INSTANCE,
456 	};
457 }
458 
459 static int __must_check make_index_save_region_table(struct index_save_layout *isl,
460 						     struct region_table **table_ptr)
461 {
462 	int result;
463 	unsigned int z;
464 	struct region_table *table;
465 	struct layout_region *lr;
466 	u16 region_count;
467 	size_t payload;
468 	size_t type;
469 
470 	if (isl->zone_count > 0) {
471 		/*
472 		 * Normal save regions: header, page map, volume index zones,
473 		 * open chapter, and possibly free space.
474 		 */
475 		region_count = 3 + isl->zone_count;
476 		if (isl->free_space.block_count > 0)
477 			region_count++;
478 
479 		payload = sizeof(isl->save_data) + sizeof(isl->state_data);
480 		type = RH_TYPE_SAVE;
481 	} else {
482 		/* Empty save regions: header, page map, free space. */
483 		region_count = 3;
484 		payload = sizeof(isl->save_data);
485 		type = RH_TYPE_UNSAVED;
486 	}
487 
488 	result = vdo_allocate_extended(region_count, regions,
489 				       "layout region table for ISL", &table);
490 	if (result != VDO_SUCCESS)
491 		return result;
492 
493 	lr = &table->regions[0];
494 	*lr++ = isl->header;
495 	*lr++ = isl->index_page_map;
496 	for (z = 0; z < isl->zone_count; z++)
497 		*lr++ = isl->volume_index_zones[z];
498 
499 	if (isl->zone_count > 0)
500 		*lr++ = isl->open_chapter;
501 
502 	if (isl->free_space.block_count > 0)
503 		*lr++ = isl->free_space;
504 
505 	table->header = (struct region_header) {
506 		.magic = REGION_MAGIC,
507 		.region_blocks = isl->index_save.block_count,
508 		.type = type,
509 		.version = 1,
510 		.region_count = region_count,
511 		.payload = payload,
512 	};
513 
514 	table->encoded_size = (sizeof(struct region_header) + payload +
515 			       region_count * sizeof(struct layout_region));
516 	*table_ptr = table;
517 	return UDS_SUCCESS;
518 }
519 
520 static void encode_region_table(u8 *buffer, size_t *offset, struct region_table *table)
521 {
522 	unsigned int i;
523 
524 	encode_u64_le(buffer, offset, REGION_MAGIC);
525 	encode_u64_le(buffer, offset, table->header.region_blocks);
526 	encode_u16_le(buffer, offset, table->header.type);
527 	encode_u16_le(buffer, offset, table->header.version);
528 	encode_u16_le(buffer, offset, table->header.region_count);
529 	encode_u16_le(buffer, offset, table->header.payload);
530 
531 	for (i = 0; i < table->header.region_count; i++) {
532 		encode_u64_le(buffer, offset, table->regions[i].start_block);
533 		encode_u64_le(buffer, offset, table->regions[i].block_count);
534 		encode_u32_le(buffer, offset, 0);
535 		encode_u16_le(buffer, offset, table->regions[i].kind);
536 		encode_u16_le(buffer, offset, table->regions[i].instance);
537 	}
538 }
539 
540 static int __must_check write_index_save_header(struct index_save_layout *isl,
541 						struct region_table *table,
542 						struct buffered_writer *writer)
543 {
544 	int result;
545 	u8 *buffer;
546 	size_t offset = 0;
547 
548 	result = vdo_allocate(table->encoded_size, "index save data", &buffer);
549 	if (result != VDO_SUCCESS)
550 		return result;
551 
552 	encode_region_table(buffer, &offset, table);
553 	encode_u64_le(buffer, &offset, isl->save_data.timestamp);
554 	encode_u64_le(buffer, &offset, isl->save_data.nonce);
555 	encode_u32_le(buffer, &offset, isl->save_data.version);
556 	encode_u32_le(buffer, &offset, 0);
557 	if (isl->zone_count > 0) {
558 		encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.signature);
559 		encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.version_id);
560 		encode_u64_le(buffer, &offset, isl->state_data.newest_chapter);
561 		encode_u64_le(buffer, &offset, isl->state_data.oldest_chapter);
562 		encode_u64_le(buffer, &offset, isl->state_data.last_save);
563 		encode_u64_le(buffer, &offset, 0);
564 	}
565 
566 	result = uds_write_to_buffered_writer(writer, buffer, offset);
567 	vdo_free(buffer);
568 	if (result != UDS_SUCCESS)
569 		return result;
570 
571 	return uds_flush_buffered_writer(writer);
572 }
573 
574 static int write_index_save_layout(struct index_layout *layout,
575 				   struct index_save_layout *isl)
576 {
577 	int result;
578 	struct region_table *table;
579 	struct buffered_writer *writer;
580 
581 	result = make_index_save_region_table(isl, &table);
582 	if (result != UDS_SUCCESS)
583 		return result;
584 
585 	result = open_region_writer(layout, &isl->header, &writer);
586 	if (result != UDS_SUCCESS) {
587 		vdo_free(table);
588 		return result;
589 	}
590 
591 	result = write_index_save_header(isl, table, writer);
592 	vdo_free(table);
593 	uds_free_buffered_writer(writer);
594 
595 	return result;
596 }
597 
598 static void reset_index_save_layout(struct index_save_layout *isl, u64 page_map_blocks)
599 {
600 	u64 free_blocks;
601 	u64 next_block = isl->index_save.start_block;
602 
603 	isl->zone_count = 0;
604 	memset(&isl->save_data, 0, sizeof(isl->save_data));
605 
606 	isl->header = (struct layout_region) {
607 		.start_block = next_block++,
608 		.block_count = 1,
609 		.kind = RL_KIND_HEADER,
610 		.instance = RL_SOLE_INSTANCE,
611 	};
612 
613 	isl->index_page_map = (struct layout_region) {
614 		.start_block = next_block,
615 		.block_count = page_map_blocks,
616 		.kind = RL_KIND_INDEX_PAGE_MAP,
617 		.instance = RL_SOLE_INSTANCE,
618 	};
619 
620 	next_block += page_map_blocks;
621 
622 	free_blocks = isl->index_save.block_count - page_map_blocks - 1;
623 	isl->free_space = (struct layout_region) {
624 		.start_block = next_block,
625 		.block_count = free_blocks,
626 		.kind = RL_KIND_EMPTY,
627 		.instance = RL_SOLE_INSTANCE,
628 	};
629 }
630 
631 static int __must_check invalidate_old_save(struct index_layout *layout,
632 					    struct index_save_layout *isl)
633 {
634 	reset_index_save_layout(isl, layout->super.page_map_blocks);
635 	return write_index_save_layout(layout, isl);
636 }
637 
638 static int discard_index_state_data(struct index_layout *layout)
639 {
640 	int result;
641 	int saved_result = UDS_SUCCESS;
642 	unsigned int i;
643 
644 	for (i = 0; i < layout->super.max_saves; i++) {
645 		result = invalidate_old_save(layout, &layout->index.saves[i]);
646 		if (result != UDS_SUCCESS)
647 			saved_result = result;
648 	}
649 
650 	if (saved_result != UDS_SUCCESS) {
651 		return vdo_log_error_strerror(result,
652 					      "%s: cannot destroy all index saves",
653 					      __func__);
654 	}
655 
656 	return UDS_SUCCESS;
657 }
658 
659 static int __must_check make_layout_region_table(struct index_layout *layout,
660 						 struct region_table **table_ptr)
661 {
662 	int result;
663 	unsigned int i;
664 	/* Regions: header, config, index, volume, saves, seal */
665 	u16 region_count = 5 + layout->super.max_saves;
666 	u16 payload;
667 	struct region_table *table;
668 	struct layout_region *lr;
669 
670 	result = vdo_allocate_extended(region_count, regions,
671 				       "layout region table", &table);
672 	if (result != VDO_SUCCESS)
673 		return result;
674 
675 	lr = &table->regions[0];
676 	*lr++ = layout->header;
677 	*lr++ = layout->config;
678 	*lr++ = layout->index.sub_index;
679 	*lr++ = layout->index.volume;
680 
681 	for (i = 0; i < layout->super.max_saves; i++)
682 		*lr++ = layout->index.saves[i].index_save;
683 
684 	*lr++ = layout->seal;
685 
686 	if (is_converted_super_block(&layout->super)) {
687 		payload = sizeof(struct super_block_data);
688 	} else {
689 		payload = (sizeof(struct super_block_data) -
690 			   sizeof(layout->super.volume_offset) -
691 			   sizeof(layout->super.start_offset));
692 	}
693 
694 	table->header = (struct region_header) {
695 		.magic = REGION_MAGIC,
696 		.region_blocks = layout->total_blocks,
697 		.type = RH_TYPE_SUPER,
698 		.version = 1,
699 		.region_count = region_count,
700 		.payload = payload,
701 	};
702 
703 	table->encoded_size = (sizeof(struct region_header) + payload +
704 			       region_count * sizeof(struct layout_region));
705 	*table_ptr = table;
706 	return UDS_SUCCESS;
707 }
708 
709 static int __must_check write_layout_header(struct index_layout *layout,
710 					    struct region_table *table,
711 					    struct buffered_writer *writer)
712 {
713 	int result;
714 	u8 *buffer;
715 	size_t offset = 0;
716 
717 	result = vdo_allocate(table->encoded_size, "layout data", &buffer);
718 	if (result != VDO_SUCCESS)
719 		return result;
720 
721 	encode_region_table(buffer, &offset, table);
722 	memcpy(buffer + offset, &layout->super.magic_label, MAGIC_SIZE);
723 	offset += MAGIC_SIZE;
724 	memcpy(buffer + offset, &layout->super.nonce_info, NONCE_INFO_SIZE);
725 	offset += NONCE_INFO_SIZE;
726 	encode_u64_le(buffer, &offset, layout->super.nonce);
727 	encode_u32_le(buffer, &offset, layout->super.version);
728 	encode_u32_le(buffer, &offset, layout->super.block_size);
729 	encode_u16_le(buffer, &offset, layout->super.index_count);
730 	encode_u16_le(buffer, &offset, layout->super.max_saves);
731 	encode_u32_le(buffer, &offset, 0);
732 	encode_u64_le(buffer, &offset, layout->super.open_chapter_blocks);
733 	encode_u64_le(buffer, &offset, layout->super.page_map_blocks);
734 
735 	if (is_converted_super_block(&layout->super)) {
736 		encode_u64_le(buffer, &offset, layout->super.volume_offset);
737 		encode_u64_le(buffer, &offset, layout->super.start_offset);
738 	}
739 
740 	result = uds_write_to_buffered_writer(writer, buffer, offset);
741 	vdo_free(buffer);
742 	if (result != UDS_SUCCESS)
743 		return result;
744 
745 	return uds_flush_buffered_writer(writer);
746 }
747 
748 static int __must_check write_uds_index_config(struct index_layout *layout,
749 					       struct uds_configuration *config,
750 					       off_t offset)
751 {
752 	int result;
753 	struct buffered_writer *writer = NULL;
754 
755 	result = open_layout_writer(layout, &layout->config, offset, &writer);
756 	if (result != UDS_SUCCESS)
757 		return vdo_log_error_strerror(result, "failed to open config region");
758 
759 	result = uds_write_config_contents(writer, config, layout->super.version);
760 	if (result != UDS_SUCCESS) {
761 		uds_free_buffered_writer(writer);
762 		return vdo_log_error_strerror(result, "failed to write config region");
763 	}
764 
765 	result = uds_flush_buffered_writer(writer);
766 	if (result != UDS_SUCCESS) {
767 		uds_free_buffered_writer(writer);
768 		return vdo_log_error_strerror(result, "cannot flush config writer");
769 	}
770 
771 	uds_free_buffered_writer(writer);
772 	return UDS_SUCCESS;
773 }
774 
775 static int __must_check save_layout(struct index_layout *layout, off_t offset)
776 {
777 	int result;
778 	struct buffered_writer *writer = NULL;
779 	struct region_table *table;
780 
781 	result = make_layout_region_table(layout, &table);
782 	if (result != UDS_SUCCESS)
783 		return result;
784 
785 	result = open_layout_writer(layout, &layout->header, offset, &writer);
786 	if (result != UDS_SUCCESS) {
787 		vdo_free(table);
788 		return result;
789 	}
790 
791 	result = write_layout_header(layout, table, writer);
792 	vdo_free(table);
793 	uds_free_buffered_writer(writer);
794 
795 	return result;
796 }
797 
798 static int create_index_layout(struct index_layout *layout, struct uds_configuration *config)
799 {
800 	int result;
801 	struct save_layout_sizes sizes;
802 
803 	result = compute_sizes(config, &sizes);
804 	if (result != UDS_SUCCESS)
805 		return result;
806 
807 	result = vdo_allocate(sizes.save_count, __func__, &layout->index.saves);
808 	if (result != VDO_SUCCESS)
809 		return result;
810 
811 	initialize_layout(layout, &sizes);
812 
813 	result = discard_index_state_data(layout);
814 	if (result != UDS_SUCCESS)
815 		return result;
816 
817 	result = write_uds_index_config(layout, config, 0);
818 	if (result != UDS_SUCCESS)
819 		return result;
820 
821 	return save_layout(layout, 0);
822 }
823 
824 static u64 generate_index_save_nonce(u64 volume_nonce, struct index_save_layout *isl)
825 {
826 	struct save_nonce_data {
827 		struct index_save_data data;
828 		u64 offset;
829 	} nonce_data;
830 	u8 buffer[sizeof(nonce_data)];
831 	size_t offset = 0;
832 
833 	encode_u64_le(buffer, &offset, isl->save_data.timestamp);
834 	encode_u64_le(buffer, &offset, 0);
835 	encode_u32_le(buffer, &offset, isl->save_data.version);
836 	encode_u32_le(buffer, &offset, 0U);
837 	encode_u64_le(buffer, &offset, isl->index_save.start_block);
838 	VDO_ASSERT_LOG_ONLY(offset == sizeof(nonce_data),
839 			    "%zu bytes encoded of %zu expected",
840 			    offset, sizeof(nonce_data));
841 	return generate_secondary_nonce(volume_nonce, buffer, sizeof(buffer));
842 }
843 
844 static u64 validate_index_save_layout(struct index_save_layout *isl, u64 volume_nonce)
845 {
846 	if ((isl->zone_count == 0) || (isl->save_data.timestamp == 0))
847 		return 0;
848 
849 	if (isl->save_data.nonce != generate_index_save_nonce(volume_nonce, isl))
850 		return 0;
851 
852 	return isl->save_data.timestamp;
853 }
854 
855 static int find_latest_uds_index_save_slot(struct index_layout *layout,
856 					   struct index_save_layout **isl_ptr)
857 {
858 	struct index_save_layout *latest = NULL;
859 	struct index_save_layout *isl;
860 	unsigned int i;
861 	u64 save_time = 0;
862 	u64 latest_time = 0;
863 
864 	for (i = 0; i < layout->super.max_saves; i++) {
865 		isl = &layout->index.saves[i];
866 		save_time = validate_index_save_layout(isl, layout->index.nonce);
867 		if (save_time > latest_time) {
868 			latest = isl;
869 			latest_time = save_time;
870 		}
871 	}
872 
873 	if (latest == NULL) {
874 		vdo_log_error("No valid index save found");
875 		return UDS_INDEX_NOT_SAVED_CLEANLY;
876 	}
877 
878 	*isl_ptr = latest;
879 	return UDS_SUCCESS;
880 }
881 
882 int uds_discard_open_chapter(struct index_layout *layout)
883 {
884 	int result;
885 	struct index_save_layout *isl;
886 	struct buffered_writer *writer;
887 
888 	result = find_latest_uds_index_save_slot(layout, &isl);
889 	if (result != UDS_SUCCESS)
890 		return result;
891 
892 	result = open_region_writer(layout, &isl->open_chapter, &writer);
893 	if (result != UDS_SUCCESS)
894 		return result;
895 
896 	result = uds_write_to_buffered_writer(writer, NULL, UDS_BLOCK_SIZE);
897 	if (result != UDS_SUCCESS) {
898 		uds_free_buffered_writer(writer);
899 		return result;
900 	}
901 
902 	result = uds_flush_buffered_writer(writer);
903 	uds_free_buffered_writer(writer);
904 	return result;
905 }
906 
907 int uds_load_index_state(struct index_layout *layout, struct uds_index *index)
908 {
909 	int result;
910 	unsigned int zone;
911 	struct index_save_layout *isl;
912 	struct buffered_reader *readers[MAX_ZONES];
913 
914 	result = find_latest_uds_index_save_slot(layout, &isl);
915 	if (result != UDS_SUCCESS)
916 		return result;
917 
918 	index->newest_virtual_chapter = isl->state_data.newest_chapter;
919 	index->oldest_virtual_chapter = isl->state_data.oldest_chapter;
920 	index->last_save = isl->state_data.last_save;
921 
922 	result = open_region_reader(layout, &isl->open_chapter, &readers[0]);
923 	if (result != UDS_SUCCESS)
924 		return result;
925 
926 	result = uds_load_open_chapter(index, readers[0]);
927 	uds_free_buffered_reader(readers[0]);
928 	if (result != UDS_SUCCESS)
929 		return result;
930 
931 	for (zone = 0; zone < isl->zone_count; zone++) {
932 		result = open_region_reader(layout, &isl->volume_index_zones[zone],
933 					    &readers[zone]);
934 		if (result != UDS_SUCCESS) {
935 			for (; zone > 0; zone--)
936 				uds_free_buffered_reader(readers[zone - 1]);
937 
938 			return result;
939 		}
940 	}
941 
942 	result = uds_load_volume_index(index->volume_index, readers, isl->zone_count);
943 	for (zone = 0; zone < isl->zone_count; zone++)
944 		uds_free_buffered_reader(readers[zone]);
945 	if (result != UDS_SUCCESS)
946 		return result;
947 
948 	result = open_region_reader(layout, &isl->index_page_map, &readers[0]);
949 	if (result != UDS_SUCCESS)
950 		return result;
951 
952 	result = uds_read_index_page_map(index->volume->index_page_map, readers[0]);
953 	uds_free_buffered_reader(readers[0]);
954 
955 	return result;
956 }
957 
958 static struct index_save_layout *select_oldest_index_save_layout(struct index_layout *layout)
959 {
960 	struct index_save_layout *oldest = NULL;
961 	struct index_save_layout *isl;
962 	unsigned int i;
963 	u64 save_time = 0;
964 	u64 oldest_time = 0;
965 
966 	for (i = 0; i < layout->super.max_saves; i++) {
967 		isl = &layout->index.saves[i];
968 		save_time = validate_index_save_layout(isl, layout->index.nonce);
969 		if (oldest == NULL || save_time < oldest_time) {
970 			oldest = isl;
971 			oldest_time = save_time;
972 		}
973 	}
974 
975 	return oldest;
976 }
977 
978 static void instantiate_index_save_layout(struct index_save_layout *isl,
979 					  struct super_block_data *super,
980 					  u64 volume_nonce, unsigned int zone_count)
981 {
982 	unsigned int z;
983 	u64 next_block;
984 	u64 free_blocks;
985 	u64 volume_index_blocks;
986 
987 	isl->zone_count = zone_count;
988 	memset(&isl->save_data, 0, sizeof(isl->save_data));
989 	isl->save_data.timestamp = ktime_to_ms(current_time_ns(CLOCK_REALTIME));
990 	isl->save_data.version = 1;
991 	isl->save_data.nonce = generate_index_save_nonce(volume_nonce, isl);
992 
993 	next_block = isl->index_save.start_block;
994 	isl->header = (struct layout_region) {
995 		.start_block = next_block++,
996 		.block_count = 1,
997 		.kind = RL_KIND_HEADER,
998 		.instance = RL_SOLE_INSTANCE,
999 	};
1000 
1001 	isl->index_page_map = (struct layout_region) {
1002 		.start_block = next_block,
1003 		.block_count = super->page_map_blocks,
1004 		.kind = RL_KIND_INDEX_PAGE_MAP,
1005 		.instance = RL_SOLE_INSTANCE,
1006 	};
1007 	next_block += super->page_map_blocks;
1008 
1009 	free_blocks = (isl->index_save.block_count - 1 -
1010 		       super->page_map_blocks -
1011 		       super->open_chapter_blocks);
1012 	volume_index_blocks = free_blocks / isl->zone_count;
1013 	for (z = 0; z < isl->zone_count; z++) {
1014 		isl->volume_index_zones[z] = (struct layout_region) {
1015 			.start_block = next_block,
1016 			.block_count = volume_index_blocks,
1017 			.kind = RL_KIND_VOLUME_INDEX,
1018 			.instance = z,
1019 		};
1020 
1021 		next_block += volume_index_blocks;
1022 		free_blocks -= volume_index_blocks;
1023 	}
1024 
1025 	isl->open_chapter = (struct layout_region) {
1026 		.start_block = next_block,
1027 		.block_count = super->open_chapter_blocks,
1028 		.kind = RL_KIND_OPEN_CHAPTER,
1029 		.instance = RL_SOLE_INSTANCE,
1030 	};
1031 
1032 	next_block += super->open_chapter_blocks;
1033 
1034 	isl->free_space = (struct layout_region) {
1035 		.start_block = next_block,
1036 		.block_count = free_blocks,
1037 		.kind = RL_KIND_EMPTY,
1038 		.instance = RL_SOLE_INSTANCE,
1039 	};
1040 }
1041 
1042 static int setup_uds_index_save_slot(struct index_layout *layout,
1043 				     unsigned int zone_count,
1044 				     struct index_save_layout **isl_ptr)
1045 {
1046 	int result;
1047 	struct index_save_layout *isl;
1048 
1049 	isl = select_oldest_index_save_layout(layout);
1050 	result = invalidate_old_save(layout, isl);
1051 	if (result != UDS_SUCCESS)
1052 		return result;
1053 
1054 	instantiate_index_save_layout(isl, &layout->super, layout->index.nonce,
1055 				      zone_count);
1056 
1057 	*isl_ptr = isl;
1058 	return UDS_SUCCESS;
1059 }
1060 
1061 static void cancel_uds_index_save(struct index_save_layout *isl)
1062 {
1063 	memset(&isl->save_data, 0, sizeof(isl->save_data));
1064 	memset(&isl->state_data, 0, sizeof(isl->state_data));
1065 	isl->zone_count = 0;
1066 }
1067 
1068 int uds_save_index_state(struct index_layout *layout, struct uds_index *index)
1069 {
1070 	int result;
1071 	unsigned int zone;
1072 	struct index_save_layout *isl;
1073 	struct buffered_writer *writers[MAX_ZONES];
1074 
1075 	result = setup_uds_index_save_slot(layout, index->zone_count, &isl);
1076 	if (result != UDS_SUCCESS)
1077 		return result;
1078 
1079 	isl->state_data	= (struct index_state_data301) {
1080 		.newest_chapter = index->newest_virtual_chapter,
1081 		.oldest_chapter = index->oldest_virtual_chapter,
1082 		.last_save = index->last_save,
1083 	};
1084 
1085 	result = open_region_writer(layout, &isl->open_chapter, &writers[0]);
1086 	if (result != UDS_SUCCESS) {
1087 		cancel_uds_index_save(isl);
1088 		return result;
1089 	}
1090 
1091 	result = uds_save_open_chapter(index, writers[0]);
1092 	uds_free_buffered_writer(writers[0]);
1093 	if (result != UDS_SUCCESS) {
1094 		cancel_uds_index_save(isl);
1095 		return result;
1096 	}
1097 
1098 	for (zone = 0; zone < index->zone_count; zone++) {
1099 		result = open_region_writer(layout, &isl->volume_index_zones[zone],
1100 					    &writers[zone]);
1101 		if (result != UDS_SUCCESS) {
1102 			for (; zone > 0; zone--)
1103 				uds_free_buffered_writer(writers[zone - 1]);
1104 
1105 			cancel_uds_index_save(isl);
1106 			return result;
1107 		}
1108 	}
1109 
1110 	result = uds_save_volume_index(index->volume_index, writers, index->zone_count);
1111 	for (zone = 0; zone < index->zone_count; zone++)
1112 		uds_free_buffered_writer(writers[zone]);
1113 	if (result != UDS_SUCCESS) {
1114 		cancel_uds_index_save(isl);
1115 		return result;
1116 	}
1117 
1118 	result = open_region_writer(layout, &isl->index_page_map, &writers[0]);
1119 	if (result != UDS_SUCCESS) {
1120 		cancel_uds_index_save(isl);
1121 		return result;
1122 	}
1123 
1124 	result = uds_write_index_page_map(index->volume->index_page_map, writers[0]);
1125 	uds_free_buffered_writer(writers[0]);
1126 	if (result != UDS_SUCCESS) {
1127 		cancel_uds_index_save(isl);
1128 		return result;
1129 	}
1130 
1131 	return write_index_save_layout(layout, isl);
1132 }
1133 
1134 static int __must_check load_region_table(struct buffered_reader *reader,
1135 					  struct region_table **table_ptr)
1136 {
1137 	int result;
1138 	unsigned int i;
1139 	struct region_header header;
1140 	struct region_table *table;
1141 	u8 buffer[sizeof(struct region_header)];
1142 	size_t offset = 0;
1143 
1144 	result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1145 	if (result != UDS_SUCCESS)
1146 		return vdo_log_error_strerror(result, "cannot read region table header");
1147 
1148 	decode_u64_le(buffer, &offset, &header.magic);
1149 	decode_u64_le(buffer, &offset, &header.region_blocks);
1150 	decode_u16_le(buffer, &offset, &header.type);
1151 	decode_u16_le(buffer, &offset, &header.version);
1152 	decode_u16_le(buffer, &offset, &header.region_count);
1153 	decode_u16_le(buffer, &offset, &header.payload);
1154 
1155 	if (header.magic != REGION_MAGIC)
1156 		return UDS_NO_INDEX;
1157 
1158 	if (header.version != 1) {
1159 		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1160 					      "unknown region table version %hu",
1161 					      header.version);
1162 	}
1163 
1164 	result = vdo_allocate_extended(header.region_count, regions,
1165 				       "single file layout region table", &table);
1166 	if (result != VDO_SUCCESS)
1167 		return result;
1168 
1169 	table->header = header;
1170 	for (i = 0; i < header.region_count; i++) {
1171 		u8 region_buffer[sizeof(struct layout_region)];
1172 
1173 		offset = 0;
1174 		result = uds_read_from_buffered_reader(reader, region_buffer,
1175 						       sizeof(region_buffer));
1176 		if (result != UDS_SUCCESS) {
1177 			vdo_free(table);
1178 			return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1179 						      "cannot read region table layouts");
1180 		}
1181 
1182 		decode_u64_le(region_buffer, &offset, &table->regions[i].start_block);
1183 		decode_u64_le(region_buffer, &offset, &table->regions[i].block_count);
1184 		offset += sizeof(u32);
1185 		decode_u16_le(region_buffer, &offset, &table->regions[i].kind);
1186 		decode_u16_le(region_buffer, &offset, &table->regions[i].instance);
1187 	}
1188 
1189 	*table_ptr = table;
1190 	return UDS_SUCCESS;
1191 }
1192 
1193 static int __must_check read_super_block_data(struct buffered_reader *reader,
1194 					      struct index_layout *layout,
1195 					      size_t saved_size)
1196 {
1197 	int result;
1198 	struct super_block_data *super = &layout->super;
1199 	u8 *buffer;
1200 	size_t offset = 0;
1201 
1202 	result = vdo_allocate(saved_size, "super block data", &buffer);
1203 	if (result != VDO_SUCCESS)
1204 		return result;
1205 
1206 	result = uds_read_from_buffered_reader(reader, buffer, saved_size);
1207 	if (result != UDS_SUCCESS) {
1208 		vdo_free(buffer);
1209 		return vdo_log_error_strerror(result, "cannot read region table header");
1210 	}
1211 
1212 	memcpy(&super->magic_label, buffer, MAGIC_SIZE);
1213 	offset += MAGIC_SIZE;
1214 	memcpy(&super->nonce_info, buffer + offset, NONCE_INFO_SIZE);
1215 	offset += NONCE_INFO_SIZE;
1216 	decode_u64_le(buffer, &offset, &super->nonce);
1217 	decode_u32_le(buffer, &offset, &super->version);
1218 	decode_u32_le(buffer, &offset, &super->block_size);
1219 	decode_u16_le(buffer, &offset, &super->index_count);
1220 	decode_u16_le(buffer, &offset, &super->max_saves);
1221 	offset += sizeof(u32);
1222 	decode_u64_le(buffer, &offset, &super->open_chapter_blocks);
1223 	decode_u64_le(buffer, &offset, &super->page_map_blocks);
1224 
1225 	if (is_converted_super_block(super)) {
1226 		decode_u64_le(buffer, &offset, &super->volume_offset);
1227 		decode_u64_le(buffer, &offset, &super->start_offset);
1228 	} else {
1229 		super->volume_offset = 0;
1230 		super->start_offset = 0;
1231 	}
1232 
1233 	vdo_free(buffer);
1234 
1235 	if (memcmp(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE) != 0)
1236 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1237 					      "unknown superblock magic label");
1238 
1239 	if ((super->version < SUPER_VERSION_MINIMUM) ||
1240 	    (super->version == 4) || (super->version == 5) || (super->version == 6) ||
1241 	    (super->version > SUPER_VERSION_MAXIMUM)) {
1242 		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1243 					      "unknown superblock version number %u",
1244 					      super->version);
1245 	}
1246 
1247 	if (super->volume_offset < super->start_offset) {
1248 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1249 					      "inconsistent offsets (start %llu, volume %llu)",
1250 					      (unsigned long long) super->start_offset,
1251 					      (unsigned long long) super->volume_offset);
1252 	}
1253 
1254 	/* Sub-indexes are no longer used but the layout retains this field. */
1255 	if (super->index_count != 1) {
1256 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1257 					      "invalid subindex count %u",
1258 					      super->index_count);
1259 	}
1260 
1261 	if (generate_primary_nonce(super->nonce_info, sizeof(super->nonce_info)) != super->nonce) {
1262 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1263 					      "inconsistent superblock nonce");
1264 	}
1265 
1266 	return UDS_SUCCESS;
1267 }
1268 
1269 static int __must_check verify_region(struct layout_region *lr, u64 start_block,
1270 				      enum region_kind kind, unsigned int instance)
1271 {
1272 	if (lr->start_block != start_block)
1273 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1274 					      "incorrect layout region offset");
1275 
1276 	if (lr->kind != kind)
1277 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1278 					      "incorrect layout region kind");
1279 
1280 	if (lr->instance != instance) {
1281 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1282 					      "incorrect layout region instance");
1283 	}
1284 
1285 	return UDS_SUCCESS;
1286 }
1287 
1288 static int __must_check verify_sub_index(struct index_layout *layout, u64 start_block,
1289 					 struct region_table *table)
1290 {
1291 	int result;
1292 	unsigned int i;
1293 	struct sub_index_layout *sil = &layout->index;
1294 	u64 next_block = start_block;
1295 
1296 	sil->sub_index = table->regions[2];
1297 	result = verify_region(&sil->sub_index, next_block, RL_KIND_INDEX, 0);
1298 	if (result != UDS_SUCCESS)
1299 		return result;
1300 
1301 	define_sub_index_nonce(layout);
1302 
1303 	sil->volume = table->regions[3];
1304 	result = verify_region(&sil->volume, next_block, RL_KIND_VOLUME,
1305 			       RL_SOLE_INSTANCE);
1306 	if (result != UDS_SUCCESS)
1307 		return result;
1308 
1309 	next_block += sil->volume.block_count + layout->super.volume_offset;
1310 
1311 	for (i = 0; i < layout->super.max_saves; i++) {
1312 		sil->saves[i].index_save = table->regions[i + 4];
1313 		result = verify_region(&sil->saves[i].index_save, next_block,
1314 				       RL_KIND_SAVE, i);
1315 		if (result != UDS_SUCCESS)
1316 			return result;
1317 
1318 		next_block += sil->saves[i].index_save.block_count;
1319 	}
1320 
1321 	next_block -= layout->super.volume_offset;
1322 	if (next_block != start_block + sil->sub_index.block_count) {
1323 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1324 					      "sub index region does not span all saves");
1325 	}
1326 
1327 	return UDS_SUCCESS;
1328 }
1329 
1330 static int __must_check reconstitute_layout(struct index_layout *layout,
1331 					    struct region_table *table, u64 first_block)
1332 {
1333 	int result;
1334 	u64 next_block = first_block;
1335 
1336 	result = vdo_allocate(layout->super.max_saves, __func__, &layout->index.saves);
1337 	if (result != VDO_SUCCESS)
1338 		return result;
1339 
1340 	layout->total_blocks = table->header.region_blocks;
1341 
1342 	layout->header = table->regions[0];
1343 	result = verify_region(&layout->header, next_block++, RL_KIND_HEADER,
1344 			       RL_SOLE_INSTANCE);
1345 	if (result != UDS_SUCCESS)
1346 		return result;
1347 
1348 	layout->config = table->regions[1];
1349 	result = verify_region(&layout->config, next_block++, RL_KIND_CONFIG,
1350 			       RL_SOLE_INSTANCE);
1351 	if (result != UDS_SUCCESS)
1352 		return result;
1353 
1354 	result = verify_sub_index(layout, next_block, table);
1355 	if (result != UDS_SUCCESS)
1356 		return result;
1357 
1358 	next_block += layout->index.sub_index.block_count;
1359 
1360 	layout->seal = table->regions[table->header.region_count - 1];
1361 	result = verify_region(&layout->seal, next_block + layout->super.volume_offset,
1362 			       RL_KIND_SEAL, RL_SOLE_INSTANCE);
1363 	if (result != UDS_SUCCESS)
1364 		return result;
1365 
1366 	if (++next_block != (first_block + layout->total_blocks)) {
1367 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1368 					      "layout table does not span total blocks");
1369 	}
1370 
1371 	return UDS_SUCCESS;
1372 }
1373 
1374 static int __must_check load_super_block(struct index_layout *layout, size_t block_size,
1375 					 u64 first_block, struct buffered_reader *reader)
1376 {
1377 	int result;
1378 	struct region_table *table = NULL;
1379 	struct super_block_data *super = &layout->super;
1380 
1381 	result = load_region_table(reader, &table);
1382 	if (result != UDS_SUCCESS)
1383 		return result;
1384 
1385 	if (table->header.type != RH_TYPE_SUPER) {
1386 		vdo_free(table);
1387 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1388 					      "not a superblock region table");
1389 	}
1390 
1391 	result = read_super_block_data(reader, layout, table->header.payload);
1392 	if (result != UDS_SUCCESS) {
1393 		vdo_free(table);
1394 		return vdo_log_error_strerror(result, "unknown superblock format");
1395 	}
1396 
1397 	if (super->block_size != block_size) {
1398 		vdo_free(table);
1399 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1400 					      "superblock saved block_size %u differs from supplied block_size %zu",
1401 					      super->block_size, block_size);
1402 	}
1403 
1404 	first_block -= (super->volume_offset - super->start_offset);
1405 	result = reconstitute_layout(layout, table, first_block);
1406 	vdo_free(table);
1407 	return result;
1408 }
1409 
1410 static int __must_check read_index_save_data(struct buffered_reader *reader,
1411 					     struct index_save_layout *isl,
1412 					     size_t saved_size)
1413 {
1414 	int result;
1415 	struct index_state_version file_version;
1416 	u8 buffer[sizeof(struct index_save_data) + sizeof(struct index_state_data301)];
1417 	size_t offset = 0;
1418 
1419 	if (saved_size != sizeof(buffer)) {
1420 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1421 					      "unexpected index save data size %zu",
1422 					      saved_size);
1423 	}
1424 
1425 	result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1426 	if (result != UDS_SUCCESS)
1427 		return vdo_log_error_strerror(result, "cannot read index save data");
1428 
1429 	decode_u64_le(buffer, &offset, &isl->save_data.timestamp);
1430 	decode_u64_le(buffer, &offset, &isl->save_data.nonce);
1431 	decode_u32_le(buffer, &offset, &isl->save_data.version);
1432 	offset += sizeof(u32);
1433 
1434 	if (isl->save_data.version > 1) {
1435 		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1436 					      "unknown index save version number %u",
1437 					      isl->save_data.version);
1438 	}
1439 
1440 	decode_s32_le(buffer, &offset, &file_version.signature);
1441 	decode_s32_le(buffer, &offset, &file_version.version_id);
1442 
1443 	if ((file_version.signature != INDEX_STATE_VERSION_301.signature) ||
1444 	    (file_version.version_id != INDEX_STATE_VERSION_301.version_id)) {
1445 		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1446 					      "index state version %d,%d is unsupported",
1447 					      file_version.signature,
1448 					      file_version.version_id);
1449 	}
1450 
1451 	decode_u64_le(buffer, &offset, &isl->state_data.newest_chapter);
1452 	decode_u64_le(buffer, &offset, &isl->state_data.oldest_chapter);
1453 	decode_u64_le(buffer, &offset, &isl->state_data.last_save);
1454 	/* Skip past some historical fields that are now unused */
1455 	offset += sizeof(u32) + sizeof(u32);
1456 	return UDS_SUCCESS;
1457 }
1458 
1459 static int __must_check reconstruct_index_save(struct index_save_layout *isl,
1460 					       struct region_table *table)
1461 {
1462 	int result;
1463 	unsigned int z;
1464 	struct layout_region *last_region;
1465 	u64 next_block = isl->index_save.start_block;
1466 	u64 last_block = next_block + isl->index_save.block_count;
1467 
1468 	isl->zone_count = table->header.region_count - 3;
1469 	if (isl->zone_count > MAX_ZONES)
1470 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1471 					      "invalid zone count");
1472 
1473 	last_region = &table->regions[table->header.region_count - 1];
1474 	if (last_region->kind == RL_KIND_EMPTY) {
1475 		isl->free_space = *last_region;
1476 		isl->zone_count--;
1477 	} else {
1478 		isl->free_space = (struct layout_region) {
1479 			.start_block = last_block,
1480 			.block_count = 0,
1481 			.kind = RL_KIND_EMPTY,
1482 			.instance = RL_SOLE_INSTANCE,
1483 		};
1484 	}
1485 
1486 	isl->header = table->regions[0];
1487 	result = verify_region(&isl->header, next_block++, RL_KIND_HEADER,
1488 			       RL_SOLE_INSTANCE);
1489 	if (result != UDS_SUCCESS)
1490 		return result;
1491 
1492 	isl->index_page_map = table->regions[1];
1493 	result = verify_region(&isl->index_page_map, next_block, RL_KIND_INDEX_PAGE_MAP,
1494 			       RL_SOLE_INSTANCE);
1495 	if (result != UDS_SUCCESS)
1496 		return result;
1497 
1498 	next_block += isl->index_page_map.block_count;
1499 
1500 	for (z = 0; z < isl->zone_count; z++) {
1501 		isl->volume_index_zones[z] = table->regions[z + 2];
1502 		result = verify_region(&isl->volume_index_zones[z], next_block,
1503 				       RL_KIND_VOLUME_INDEX, z);
1504 		if (result != UDS_SUCCESS)
1505 			return result;
1506 
1507 		next_block += isl->volume_index_zones[z].block_count;
1508 	}
1509 
1510 	isl->open_chapter = table->regions[isl->zone_count + 2];
1511 	result = verify_region(&isl->open_chapter, next_block, RL_KIND_OPEN_CHAPTER,
1512 			       RL_SOLE_INSTANCE);
1513 	if (result != UDS_SUCCESS)
1514 		return result;
1515 
1516 	next_block += isl->open_chapter.block_count;
1517 
1518 	result = verify_region(&isl->free_space, next_block, RL_KIND_EMPTY,
1519 			       RL_SOLE_INSTANCE);
1520 	if (result != UDS_SUCCESS)
1521 		return result;
1522 
1523 	next_block += isl->free_space.block_count;
1524 	if (next_block != last_block) {
1525 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1526 					      "index save layout table incomplete");
1527 	}
1528 
1529 	return UDS_SUCCESS;
1530 }
1531 
1532 static int __must_check load_index_save(struct index_save_layout *isl,
1533 					struct buffered_reader *reader,
1534 					unsigned int instance)
1535 {
1536 	int result;
1537 	struct region_table *table = NULL;
1538 
1539 	result = load_region_table(reader, &table);
1540 	if (result != UDS_SUCCESS) {
1541 		return vdo_log_error_strerror(result, "cannot read index save %u header",
1542 					      instance);
1543 	}
1544 
1545 	if (table->header.region_blocks != isl->index_save.block_count) {
1546 		u64 region_blocks = table->header.region_blocks;
1547 
1548 		vdo_free(table);
1549 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1550 					      "unexpected index save %u region block count %llu",
1551 					      instance,
1552 					      (unsigned long long) region_blocks);
1553 	}
1554 
1555 	if (table->header.type == RH_TYPE_UNSAVED) {
1556 		vdo_free(table);
1557 		reset_index_save_layout(isl, 0);
1558 		return UDS_SUCCESS;
1559 	}
1560 
1561 
1562 	if (table->header.type != RH_TYPE_SAVE) {
1563 		vdo_log_error_strerror(UDS_CORRUPT_DATA,
1564 				       "unexpected index save %u header type %u",
1565 				       instance, table->header.type);
1566 		vdo_free(table);
1567 		return UDS_CORRUPT_DATA;
1568 	}
1569 
1570 	result = read_index_save_data(reader, isl, table->header.payload);
1571 	if (result != UDS_SUCCESS) {
1572 		vdo_free(table);
1573 		return vdo_log_error_strerror(result,
1574 					      "unknown index save %u data format",
1575 					      instance);
1576 	}
1577 
1578 	result = reconstruct_index_save(isl, table);
1579 	vdo_free(table);
1580 	if (result != UDS_SUCCESS) {
1581 		return vdo_log_error_strerror(result, "cannot reconstruct index save %u",
1582 					      instance);
1583 	}
1584 
1585 	return UDS_SUCCESS;
1586 }
1587 
1588 static int __must_check load_sub_index_regions(struct index_layout *layout)
1589 {
1590 	int result;
1591 	unsigned int j;
1592 	struct index_save_layout *isl;
1593 	struct buffered_reader *reader;
1594 
1595 	for (j = 0; j < layout->super.max_saves; j++) {
1596 		isl = &layout->index.saves[j];
1597 		result = open_region_reader(layout, &isl->index_save, &reader);
1598 
1599 		if (result != UDS_SUCCESS) {
1600 			vdo_log_error_strerror(result,
1601 					       "cannot get reader for index 0 save %u",
1602 					       j);
1603 			return result;
1604 		}
1605 
1606 		result = load_index_save(isl, reader, j);
1607 		uds_free_buffered_reader(reader);
1608 		if (result != UDS_SUCCESS) {
1609 			/* Another save slot might be valid. */
1610 			reset_index_save_layout(isl, 0);
1611 			continue;
1612 		}
1613 	}
1614 
1615 	return UDS_SUCCESS;
1616 }
1617 
1618 static int __must_check verify_uds_index_config(struct index_layout *layout,
1619 						struct uds_configuration *config)
1620 {
1621 	int result;
1622 	struct buffered_reader *reader = NULL;
1623 	u64 offset;
1624 
1625 	offset = layout->super.volume_offset - layout->super.start_offset;
1626 	result = open_layout_reader(layout, &layout->config, offset, &reader);
1627 	if (result != UDS_SUCCESS)
1628 		return vdo_log_error_strerror(result, "failed to open config reader");
1629 
1630 	result = uds_validate_config_contents(reader, config);
1631 	if (result != UDS_SUCCESS) {
1632 		uds_free_buffered_reader(reader);
1633 		return vdo_log_error_strerror(result, "failed to read config region");
1634 	}
1635 
1636 	uds_free_buffered_reader(reader);
1637 	return UDS_SUCCESS;
1638 }
1639 
1640 static int load_index_layout(struct index_layout *layout, struct uds_configuration *config)
1641 {
1642 	int result;
1643 	struct buffered_reader *reader;
1644 
1645 	result = uds_make_buffered_reader(layout->factory,
1646 					  layout->offset / UDS_BLOCK_SIZE, 1, &reader);
1647 	if (result != UDS_SUCCESS)
1648 		return vdo_log_error_strerror(result, "unable to read superblock");
1649 
1650 	result = load_super_block(layout, UDS_BLOCK_SIZE,
1651 				  layout->offset / UDS_BLOCK_SIZE, reader);
1652 	uds_free_buffered_reader(reader);
1653 	if (result != UDS_SUCCESS)
1654 		return result;
1655 
1656 	result = verify_uds_index_config(layout, config);
1657 	if (result != UDS_SUCCESS)
1658 		return result;
1659 
1660 	return load_sub_index_regions(layout);
1661 }
1662 
1663 static int create_layout_factory(struct index_layout *layout,
1664 				 const struct uds_configuration *config)
1665 {
1666 	int result;
1667 	size_t writable_size;
1668 	struct io_factory *factory = NULL;
1669 
1670 	result = uds_make_io_factory(config->bdev, &factory);
1671 	if (result != UDS_SUCCESS)
1672 		return result;
1673 
1674 	writable_size = uds_get_writable_size(factory) & -UDS_BLOCK_SIZE;
1675 	if (writable_size < config->size + config->offset) {
1676 		uds_put_io_factory(factory);
1677 		vdo_log_error("index storage (%zu) is smaller than the requested size %zu",
1678 			      writable_size, config->size + config->offset);
1679 		return -ENOSPC;
1680 	}
1681 
1682 	layout->factory = factory;
1683 	layout->factory_size = (config->size > 0) ? config->size : writable_size;
1684 	layout->offset = config->offset;
1685 	return UDS_SUCCESS;
1686 }
1687 
1688 int uds_make_index_layout(struct uds_configuration *config, bool new_layout,
1689 			  struct index_layout **layout_ptr)
1690 {
1691 	int result;
1692 	struct index_layout *layout = NULL;
1693 	struct save_layout_sizes sizes;
1694 
1695 	result = compute_sizes(config, &sizes);
1696 	if (result != UDS_SUCCESS)
1697 		return result;
1698 
1699 	result = vdo_allocate(1, __func__, &layout);
1700 	if (result != VDO_SUCCESS)
1701 		return result;
1702 
1703 	result = create_layout_factory(layout, config);
1704 	if (result != UDS_SUCCESS) {
1705 		uds_free_index_layout(layout);
1706 		return result;
1707 	}
1708 
1709 	if (layout->factory_size < sizes.total_size) {
1710 		vdo_log_error("index storage (%zu) is smaller than the required size %llu",
1711 			      layout->factory_size,
1712 			      (unsigned long long) sizes.total_size);
1713 		uds_free_index_layout(layout);
1714 		return -ENOSPC;
1715 	}
1716 
1717 	if (new_layout)
1718 		result = create_index_layout(layout, config);
1719 	else
1720 		result = load_index_layout(layout, config);
1721 	if (result != UDS_SUCCESS) {
1722 		uds_free_index_layout(layout);
1723 		return result;
1724 	}
1725 
1726 	*layout_ptr = layout;
1727 	return UDS_SUCCESS;
1728 }
1729 
1730 void uds_free_index_layout(struct index_layout *layout)
1731 {
1732 	if (layout == NULL)
1733 		return;
1734 
1735 	vdo_free(layout->index.saves);
1736 	if (layout->factory != NULL)
1737 		uds_put_io_factory(layout->factory);
1738 
1739 	vdo_free(layout);
1740 }
1741 
1742 int uds_replace_index_layout_storage(struct index_layout *layout,
1743 				     struct block_device *bdev)
1744 {
1745 	return uds_replace_storage(layout->factory, bdev);
1746 }
1747 
1748 /* Obtain a dm_bufio_client for the volume region. */
1749 int uds_open_volume_bufio(struct index_layout *layout, size_t block_size,
1750 			  unsigned int reserved_buffers,
1751 			  struct dm_bufio_client **client_ptr)
1752 {
1753 	off_t offset = (layout->index.volume.start_block +
1754 			layout->super.volume_offset -
1755 			layout->super.start_offset);
1756 
1757 	return uds_make_bufio(layout->factory, offset, block_size, reserved_buffers,
1758 			      client_ptr);
1759 }
1760 
1761 u64 uds_get_volume_nonce(struct index_layout *layout)
1762 {
1763 	return layout->index.nonce;
1764 }
1765