xref: /linux/drivers/md/dm-vdo/indexer/index-layout.c (revision 9ad8d22f2f3fad7a366c9772362795ef6d6a2d51)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright 2023 Red Hat
4  */
5 
6 #include "index-layout.h"
7 
8 #include <linux/random.h>
9 
10 #include "logger.h"
11 #include "memory-alloc.h"
12 #include "murmurhash3.h"
13 #include "numeric.h"
14 #include "time-utils.h"
15 
16 #include "config.h"
17 #include "open-chapter.h"
18 #include "volume-index.h"
19 
20 /*
21  * The UDS layout on storage media is divided into a number of fixed-size regions, the sizes of
22  * which are computed when the index is created. Every header and region begins on 4K block
23  * boundary. Save regions are further sub-divided into regions of their own.
24  *
25  * Each region has a kind and an instance number. Some kinds only have one instance and therefore
26  * use RL_SOLE_INSTANCE (-1) as the instance number. The RL_KIND_INDEX used to use instances to
27  * represent sub-indices; now, however there is only ever one sub-index and therefore one instance.
28  * The RL_KIND_VOLUME_INDEX uses instances to record which zone is being saved.
29  *
30  * Every region header has a type and version.
31  *
32  *     +-+-+---------+--------+--------+-+
33  *     | | |   I N D E X  0   101, 0   | |
34  *     |H|C+---------+--------+--------+S|
35  *     |D|f| Volume  | Save   | Save   |e|
36  *     |R|g| Region  | Region | Region |a|
37  *     | | | 201, -1 | 202, 0 | 202, 1 |l|
38  *     +-+-+--------+---------+--------+-+
39  *
40  * The header contains the encoded region layout table as well as some index configuration data.
41  * The sub-index region and its subdivisions are maintained in the same table.
42  *
43  * There are two save regions to preserve the old state in case saving the new state is incomplete.
44  * They are used in alternation. Each save region is further divided into sub-regions.
45  *
46  *     +-+-----+------+------+-----+-----+
47  *     |H| IPM | MI   | MI   |     | OC  |
48  *     |D|     | zone | zone | ... |     |
49  *     |R| 301 | 302  | 302  |     | 303 |
50  *     | | -1  |  0   |  1   |     | -1  |
51  *     +-+-----+------+------+-----+-----+
52  *
53  * The header contains the encoded region layout table as well as index state data for that save.
54  * Each save also has a unique nonce.
55  */
56 
57 #define MAGIC_SIZE 32
58 #define NONCE_INFO_SIZE 32
59 #define MAX_SAVES 2
60 
61 enum region_kind {
62 	RL_KIND_EMPTY = 0,
63 	RL_KIND_HEADER = 1,
64 	RL_KIND_CONFIG = 100,
65 	RL_KIND_INDEX = 101,
66 	RL_KIND_SEAL = 102,
67 	RL_KIND_VOLUME = 201,
68 	RL_KIND_SAVE = 202,
69 	RL_KIND_INDEX_PAGE_MAP = 301,
70 	RL_KIND_VOLUME_INDEX = 302,
71 	RL_KIND_OPEN_CHAPTER = 303,
72 };
73 
74 /* Some region types are historical and are no longer used. */
75 enum region_type {
76 	RH_TYPE_FREE = 0, /* unused */
77 	RH_TYPE_SUPER = 1,
78 	RH_TYPE_SAVE = 2,
79 	RH_TYPE_CHECKPOINT = 3, /* unused */
80 	RH_TYPE_UNSAVED = 4,
81 };
82 
83 #define RL_SOLE_INSTANCE 65535
84 
85 /*
86  * Super block version 2 is the first released version.
87  *
88  * Super block version 3 is the normal version used from RHEL 8.2 onwards.
89  *
90  * Super block versions 4 through 6 were incremental development versions and
91  * are not supported.
92  *
93  * Super block version 7 is used for volumes which have been reduced in size by one chapter in
94  * order to make room to prepend LVM metadata to a volume originally created without lvm. This
95  * allows the index to retain most its deduplication records.
96  */
97 #define SUPER_VERSION_MINIMUM 3
98 #define SUPER_VERSION_CURRENT 3
99 #define SUPER_VERSION_MAXIMUM 7
100 
101 static const u8 LAYOUT_MAGIC[MAGIC_SIZE] = "*ALBIREO*SINGLE*FILE*LAYOUT*001*";
102 static const u64 REGION_MAGIC = 0x416c6252676e3031; /* 'AlbRgn01' */
103 
104 struct region_header {
105 	u64 magic;
106 	u64 region_blocks;
107 	u16 type;
108 	/* Currently always version 1 */
109 	u16 version;
110 	u16 region_count;
111 	u16 payload;
112 };
113 
114 struct layout_region {
115 	u64 start_block;
116 	u64 block_count;
117 	u32 __unused;
118 	u16 kind;
119 	u16 instance;
120 };
121 
122 struct region_table {
123 	size_t encoded_size;
124 	struct region_header header;
125 	struct layout_region regions[];
126 };
127 
128 struct index_save_data {
129 	u64 timestamp;
130 	u64 nonce;
131 	/* Currently always version 1 */
132 	u32 version;
133 	u32 unused__;
134 };
135 
136 struct index_state_version {
137 	s32 signature;
138 	s32 version_id;
139 };
140 
141 static const struct index_state_version INDEX_STATE_VERSION_301 = {
142 	.signature  = -1,
143 	.version_id = 301,
144 };
145 
146 struct index_state_data301 {
147 	struct index_state_version version;
148 	u64 newest_chapter;
149 	u64 oldest_chapter;
150 	u64 last_save;
151 	u32 unused;
152 	u32 padding;
153 };
154 
155 struct index_save_layout {
156 	unsigned int zone_count;
157 	struct layout_region index_save;
158 	struct layout_region header;
159 	struct layout_region index_page_map;
160 	struct layout_region free_space;
161 	struct layout_region volume_index_zones[MAX_ZONES];
162 	struct layout_region open_chapter;
163 	struct index_save_data save_data;
164 	struct index_state_data301 state_data;
165 };
166 
167 struct sub_index_layout {
168 	u64 nonce;
169 	struct layout_region sub_index;
170 	struct layout_region volume;
171 	struct index_save_layout *saves;
172 };
173 
174 struct super_block_data {
175 	u8 magic_label[MAGIC_SIZE];
176 	u8 nonce_info[NONCE_INFO_SIZE];
177 	u64 nonce;
178 	u32 version;
179 	u32 block_size;
180 	u16 index_count;
181 	u16 max_saves;
182 	/* Padding reflects a blank field on permanent storage */
183 	u8 padding[4];
184 	u64 open_chapter_blocks;
185 	u64 page_map_blocks;
186 	u64 volume_offset;
187 	u64 start_offset;
188 };
189 
190 struct index_layout {
191 	struct io_factory *factory;
192 	size_t factory_size;
193 	off_t offset;
194 	struct super_block_data super;
195 	struct layout_region header;
196 	struct layout_region config;
197 	struct sub_index_layout index;
198 	struct layout_region seal;
199 	u64 total_blocks;
200 };
201 
202 struct save_layout_sizes {
203 	unsigned int save_count;
204 	size_t block_size;
205 	u64 volume_blocks;
206 	u64 volume_index_blocks;
207 	u64 page_map_blocks;
208 	u64 open_chapter_blocks;
209 	u64 save_blocks;
210 	u64 sub_index_blocks;
211 	u64 total_blocks;
212 	size_t total_size;
213 };
214 
215 static inline bool is_converted_super_block(struct super_block_data *super)
216 {
217 	return super->version == 7;
218 }
219 
220 static int __must_check compute_sizes(const struct uds_configuration *config,
221 				      struct save_layout_sizes *sls)
222 {
223 	int result;
224 	struct index_geometry *geometry = config->geometry;
225 
226 	memset(sls, 0, sizeof(*sls));
227 	sls->save_count = MAX_SAVES;
228 	sls->block_size = UDS_BLOCK_SIZE;
229 	sls->volume_blocks = geometry->bytes_per_volume / sls->block_size;
230 
231 	result = uds_compute_volume_index_save_blocks(config, sls->block_size,
232 						      &sls->volume_index_blocks);
233 	if (result != UDS_SUCCESS)
234 		return vdo_log_error_strerror(result, "cannot compute index save size");
235 
236 	sls->page_map_blocks =
237 		DIV_ROUND_UP(uds_compute_index_page_map_save_size(geometry),
238 			     sls->block_size);
239 	sls->open_chapter_blocks =
240 		DIV_ROUND_UP(uds_compute_saved_open_chapter_size(geometry),
241 			     sls->block_size);
242 	sls->save_blocks =
243 		1 + (sls->volume_index_blocks + sls->page_map_blocks + sls->open_chapter_blocks);
244 	sls->sub_index_blocks = sls->volume_blocks + (sls->save_count * sls->save_blocks);
245 	sls->total_blocks = 3 + sls->sub_index_blocks;
246 	sls->total_size = sls->total_blocks * sls->block_size;
247 
248 	return UDS_SUCCESS;
249 }
250 
251 /* Create unique data using the current time and a pseudorandom number. */
252 static void create_unique_nonce_data(u8 *buffer)
253 {
254 	ktime_t now = current_time_ns(CLOCK_REALTIME);
255 	u32 rand;
256 	size_t offset = 0;
257 
258 	get_random_bytes(&rand, sizeof(u32));
259 	memcpy(buffer + offset, &now, sizeof(now));
260 	offset += sizeof(now);
261 	memcpy(buffer + offset, &rand, sizeof(rand));
262 	offset += sizeof(rand);
263 	while (offset < NONCE_INFO_SIZE) {
264 		size_t len = min(NONCE_INFO_SIZE - offset, offset);
265 
266 		memcpy(buffer + offset, buffer, len);
267 		offset += len;
268 	}
269 }
270 
271 static u64 hash_stuff(u64 start, const void *data, size_t len)
272 {
273 	u32 seed = start ^ (start >> 27);
274 	u8 hash_buffer[16];
275 
276 	murmurhash3_128(data, len, seed, hash_buffer);
277 	return get_unaligned_le64(hash_buffer + 4);
278 }
279 
280 /* Generate a primary nonce from the provided data. */
281 static u64 generate_primary_nonce(const void *data, size_t len)
282 {
283 	return hash_stuff(0xa1b1e0fc, data, len);
284 }
285 
286 /*
287  * Deterministically generate a secondary nonce from an existing nonce and some arbitrary data by
288  * hashing the original nonce and the data to produce a new nonce.
289  */
290 static u64 generate_secondary_nonce(u64 nonce, const void *data, size_t len)
291 {
292 	return hash_stuff(nonce + 1, data, len);
293 }
294 
295 static int __must_check open_layout_reader(struct index_layout *layout,
296 					   struct layout_region *lr, off_t offset,
297 					   struct buffered_reader **reader_ptr)
298 {
299 	return uds_make_buffered_reader(layout->factory, lr->start_block + offset,
300 					lr->block_count, reader_ptr);
301 }
302 
303 static int open_region_reader(struct index_layout *layout, struct layout_region *region,
304 			      struct buffered_reader **reader_ptr)
305 {
306 	return open_layout_reader(layout, region, -layout->super.start_offset,
307 				  reader_ptr);
308 }
309 
310 static int __must_check open_layout_writer(struct index_layout *layout,
311 					   struct layout_region *lr, off_t offset,
312 					   struct buffered_writer **writer_ptr)
313 {
314 	return uds_make_buffered_writer(layout->factory, lr->start_block + offset,
315 					lr->block_count, writer_ptr);
316 }
317 
318 static int open_region_writer(struct index_layout *layout, struct layout_region *region,
319 			      struct buffered_writer **writer_ptr)
320 {
321 	return open_layout_writer(layout, region, -layout->super.start_offset,
322 				  writer_ptr);
323 }
324 
325 static void generate_super_block_data(struct save_layout_sizes *sls,
326 				      struct super_block_data *super)
327 {
328 	memset(super, 0, sizeof(*super));
329 	memcpy(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE);
330 	create_unique_nonce_data(super->nonce_info);
331 
332 	super->nonce = generate_primary_nonce(super->nonce_info,
333 					      sizeof(super->nonce_info));
334 	super->version = SUPER_VERSION_CURRENT;
335 	super->block_size = sls->block_size;
336 	super->index_count = 1;
337 	super->max_saves = sls->save_count;
338 	super->open_chapter_blocks = sls->open_chapter_blocks;
339 	super->page_map_blocks = sls->page_map_blocks;
340 	super->volume_offset = 0;
341 	super->start_offset = 0;
342 }
343 
344 static void define_sub_index_nonce(struct index_layout *layout)
345 {
346 	struct sub_index_nonce_data {
347 		u64 offset;
348 		u16 index_id;
349 	};
350 	struct sub_index_layout *sil = &layout->index;
351 	u64 primary_nonce = layout->super.nonce;
352 	u8 buffer[sizeof(struct sub_index_nonce_data)] = { 0 };
353 	size_t offset = 0;
354 
355 	encode_u64_le(buffer, &offset, sil->sub_index.start_block);
356 	encode_u16_le(buffer, &offset, 0);
357 	sil->nonce = generate_secondary_nonce(primary_nonce, buffer, sizeof(buffer));
358 	if (sil->nonce == 0) {
359 		sil->nonce = generate_secondary_nonce(~primary_nonce + 1, buffer,
360 						      sizeof(buffer));
361 	}
362 }
363 
364 static void setup_sub_index(struct index_layout *layout, u64 start_block,
365 			    struct save_layout_sizes *sls)
366 {
367 	struct sub_index_layout *sil = &layout->index;
368 	u64 next_block = start_block;
369 	unsigned int i;
370 
371 	sil->sub_index = (struct layout_region) {
372 		.start_block = start_block,
373 		.block_count = sls->sub_index_blocks,
374 		.kind = RL_KIND_INDEX,
375 		.instance = 0,
376 	};
377 
378 	sil->volume = (struct layout_region) {
379 		.start_block = next_block,
380 		.block_count = sls->volume_blocks,
381 		.kind = RL_KIND_VOLUME,
382 		.instance = RL_SOLE_INSTANCE,
383 	};
384 
385 	next_block += sls->volume_blocks;
386 
387 	for (i = 0; i < sls->save_count; i++) {
388 		sil->saves[i].index_save = (struct layout_region) {
389 			.start_block = next_block,
390 			.block_count = sls->save_blocks,
391 			.kind = RL_KIND_SAVE,
392 			.instance = i,
393 		};
394 
395 		next_block += sls->save_blocks;
396 	}
397 
398 	define_sub_index_nonce(layout);
399 }
400 
401 static void initialize_layout(struct index_layout *layout, struct save_layout_sizes *sls)
402 {
403 	u64 next_block = layout->offset / sls->block_size;
404 
405 	layout->total_blocks = sls->total_blocks;
406 	generate_super_block_data(sls, &layout->super);
407 	layout->header = (struct layout_region) {
408 		.start_block = next_block++,
409 		.block_count = 1,
410 		.kind = RL_KIND_HEADER,
411 		.instance = RL_SOLE_INSTANCE,
412 	};
413 
414 	layout->config = (struct layout_region) {
415 		.start_block = next_block++,
416 		.block_count = 1,
417 		.kind = RL_KIND_CONFIG,
418 		.instance = RL_SOLE_INSTANCE,
419 	};
420 
421 	setup_sub_index(layout, next_block, sls);
422 	next_block += sls->sub_index_blocks;
423 
424 	layout->seal = (struct layout_region) {
425 		.start_block = next_block,
426 		.block_count = 1,
427 		.kind = RL_KIND_SEAL,
428 		.instance = RL_SOLE_INSTANCE,
429 	};
430 }
431 
432 static int __must_check make_index_save_region_table(struct index_save_layout *isl,
433 						     struct region_table **table_ptr)
434 {
435 	int result;
436 	unsigned int z;
437 	struct region_table *table;
438 	struct layout_region *lr;
439 	u16 region_count;
440 	size_t payload;
441 	size_t type;
442 
443 	if (isl->zone_count > 0) {
444 		/*
445 		 * Normal save regions: header, page map, volume index zones,
446 		 * open chapter, and possibly free space.
447 		 */
448 		region_count = 3 + isl->zone_count;
449 		if (isl->free_space.block_count > 0)
450 			region_count++;
451 
452 		payload = sizeof(isl->save_data) + sizeof(isl->state_data);
453 		type = RH_TYPE_SAVE;
454 	} else {
455 		/* Empty save regions: header, page map, free space. */
456 		region_count = 3;
457 		payload = sizeof(isl->save_data);
458 		type = RH_TYPE_UNSAVED;
459 	}
460 
461 	result = vdo_allocate_extended(struct region_table, region_count,
462 				       struct layout_region,
463 				       "layout region table for ISL", &table);
464 	if (result != VDO_SUCCESS)
465 		return result;
466 
467 	lr = &table->regions[0];
468 	*lr++ = isl->header;
469 	*lr++ = isl->index_page_map;
470 	for (z = 0; z < isl->zone_count; z++)
471 		*lr++ = isl->volume_index_zones[z];
472 
473 	if (isl->zone_count > 0)
474 		*lr++ = isl->open_chapter;
475 
476 	if (isl->free_space.block_count > 0)
477 		*lr++ = isl->free_space;
478 
479 	table->header = (struct region_header) {
480 		.magic = REGION_MAGIC,
481 		.region_blocks = isl->index_save.block_count,
482 		.type = type,
483 		.version = 1,
484 		.region_count = region_count,
485 		.payload = payload,
486 	};
487 
488 	table->encoded_size = (sizeof(struct region_header) + payload +
489 			       region_count * sizeof(struct layout_region));
490 	*table_ptr = table;
491 	return UDS_SUCCESS;
492 }
493 
494 static void encode_region_table(u8 *buffer, size_t *offset, struct region_table *table)
495 {
496 	unsigned int i;
497 
498 	encode_u64_le(buffer, offset, REGION_MAGIC);
499 	encode_u64_le(buffer, offset, table->header.region_blocks);
500 	encode_u16_le(buffer, offset, table->header.type);
501 	encode_u16_le(buffer, offset, table->header.version);
502 	encode_u16_le(buffer, offset, table->header.region_count);
503 	encode_u16_le(buffer, offset, table->header.payload);
504 
505 	for (i = 0; i < table->header.region_count; i++) {
506 		encode_u64_le(buffer, offset, table->regions[i].start_block);
507 		encode_u64_le(buffer, offset, table->regions[i].block_count);
508 		encode_u32_le(buffer, offset, 0);
509 		encode_u16_le(buffer, offset, table->regions[i].kind);
510 		encode_u16_le(buffer, offset, table->regions[i].instance);
511 	}
512 }
513 
514 static int __must_check write_index_save_header(struct index_save_layout *isl,
515 						struct region_table *table,
516 						struct buffered_writer *writer)
517 {
518 	int result;
519 	u8 *buffer;
520 	size_t offset = 0;
521 
522 	result = vdo_allocate(table->encoded_size, u8, "index save data", &buffer);
523 	if (result != VDO_SUCCESS)
524 		return result;
525 
526 	encode_region_table(buffer, &offset, table);
527 	encode_u64_le(buffer, &offset, isl->save_data.timestamp);
528 	encode_u64_le(buffer, &offset, isl->save_data.nonce);
529 	encode_u32_le(buffer, &offset, isl->save_data.version);
530 	encode_u32_le(buffer, &offset, 0);
531 	if (isl->zone_count > 0) {
532 		encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.signature);
533 		encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.version_id);
534 		encode_u64_le(buffer, &offset, isl->state_data.newest_chapter);
535 		encode_u64_le(buffer, &offset, isl->state_data.oldest_chapter);
536 		encode_u64_le(buffer, &offset, isl->state_data.last_save);
537 		encode_u64_le(buffer, &offset, 0);
538 	}
539 
540 	result = uds_write_to_buffered_writer(writer, buffer, offset);
541 	vdo_free(buffer);
542 	if (result != UDS_SUCCESS)
543 		return result;
544 
545 	return uds_flush_buffered_writer(writer);
546 }
547 
548 static int write_index_save_layout(struct index_layout *layout,
549 				   struct index_save_layout *isl)
550 {
551 	int result;
552 	struct region_table *table;
553 	struct buffered_writer *writer;
554 
555 	result = make_index_save_region_table(isl, &table);
556 	if (result != UDS_SUCCESS)
557 		return result;
558 
559 	result = open_region_writer(layout, &isl->header, &writer);
560 	if (result != UDS_SUCCESS) {
561 		vdo_free(table);
562 		return result;
563 	}
564 
565 	result = write_index_save_header(isl, table, writer);
566 	vdo_free(table);
567 	uds_free_buffered_writer(writer);
568 
569 	return result;
570 }
571 
572 static void reset_index_save_layout(struct index_save_layout *isl, u64 page_map_blocks)
573 {
574 	u64 free_blocks;
575 	u64 next_block = isl->index_save.start_block;
576 
577 	isl->zone_count = 0;
578 	memset(&isl->save_data, 0, sizeof(isl->save_data));
579 
580 	isl->header = (struct layout_region) {
581 		.start_block = next_block++,
582 		.block_count = 1,
583 		.kind = RL_KIND_HEADER,
584 		.instance = RL_SOLE_INSTANCE,
585 	};
586 
587 	isl->index_page_map = (struct layout_region) {
588 		.start_block = next_block,
589 		.block_count = page_map_blocks,
590 		.kind = RL_KIND_INDEX_PAGE_MAP,
591 		.instance = RL_SOLE_INSTANCE,
592 	};
593 
594 	next_block += page_map_blocks;
595 
596 	free_blocks = isl->index_save.block_count - page_map_blocks - 1;
597 	isl->free_space = (struct layout_region) {
598 		.start_block = next_block,
599 		.block_count = free_blocks,
600 		.kind = RL_KIND_EMPTY,
601 		.instance = RL_SOLE_INSTANCE,
602 	};
603 }
604 
605 static int __must_check invalidate_old_save(struct index_layout *layout,
606 					    struct index_save_layout *isl)
607 {
608 	reset_index_save_layout(isl, layout->super.page_map_blocks);
609 	return write_index_save_layout(layout, isl);
610 }
611 
612 static int discard_index_state_data(struct index_layout *layout)
613 {
614 	int result;
615 	int saved_result = UDS_SUCCESS;
616 	unsigned int i;
617 
618 	for (i = 0; i < layout->super.max_saves; i++) {
619 		result = invalidate_old_save(layout, &layout->index.saves[i]);
620 		if (result != UDS_SUCCESS)
621 			saved_result = result;
622 	}
623 
624 	if (saved_result != UDS_SUCCESS) {
625 		return vdo_log_error_strerror(result,
626 					      "%s: cannot destroy all index saves",
627 					      __func__);
628 	}
629 
630 	return UDS_SUCCESS;
631 }
632 
633 static int __must_check make_layout_region_table(struct index_layout *layout,
634 						 struct region_table **table_ptr)
635 {
636 	int result;
637 	unsigned int i;
638 	/* Regions: header, config, index, volume, saves, seal */
639 	u16 region_count = 5 + layout->super.max_saves;
640 	u16 payload;
641 	struct region_table *table;
642 	struct layout_region *lr;
643 
644 	result = vdo_allocate_extended(struct region_table, region_count,
645 				       struct layout_region, "layout region table",
646 				       &table);
647 	if (result != VDO_SUCCESS)
648 		return result;
649 
650 	lr = &table->regions[0];
651 	*lr++ = layout->header;
652 	*lr++ = layout->config;
653 	*lr++ = layout->index.sub_index;
654 	*lr++ = layout->index.volume;
655 
656 	for (i = 0; i < layout->super.max_saves; i++)
657 		*lr++ = layout->index.saves[i].index_save;
658 
659 	*lr++ = layout->seal;
660 
661 	if (is_converted_super_block(&layout->super)) {
662 		payload = sizeof(struct super_block_data);
663 	} else {
664 		payload = (sizeof(struct super_block_data) -
665 			   sizeof(layout->super.volume_offset) -
666 			   sizeof(layout->super.start_offset));
667 	}
668 
669 	table->header = (struct region_header) {
670 		.magic = REGION_MAGIC,
671 		.region_blocks = layout->total_blocks,
672 		.type = RH_TYPE_SUPER,
673 		.version = 1,
674 		.region_count = region_count,
675 		.payload = payload,
676 	};
677 
678 	table->encoded_size = (sizeof(struct region_header) + payload +
679 			       region_count * sizeof(struct layout_region));
680 	*table_ptr = table;
681 	return UDS_SUCCESS;
682 }
683 
684 static int __must_check write_layout_header(struct index_layout *layout,
685 					    struct region_table *table,
686 					    struct buffered_writer *writer)
687 {
688 	int result;
689 	u8 *buffer;
690 	size_t offset = 0;
691 
692 	result = vdo_allocate(table->encoded_size, u8, "layout data", &buffer);
693 	if (result != VDO_SUCCESS)
694 		return result;
695 
696 	encode_region_table(buffer, &offset, table);
697 	memcpy(buffer + offset, &layout->super.magic_label, MAGIC_SIZE);
698 	offset += MAGIC_SIZE;
699 	memcpy(buffer + offset, &layout->super.nonce_info, NONCE_INFO_SIZE);
700 	offset += NONCE_INFO_SIZE;
701 	encode_u64_le(buffer, &offset, layout->super.nonce);
702 	encode_u32_le(buffer, &offset, layout->super.version);
703 	encode_u32_le(buffer, &offset, layout->super.block_size);
704 	encode_u16_le(buffer, &offset, layout->super.index_count);
705 	encode_u16_le(buffer, &offset, layout->super.max_saves);
706 	encode_u32_le(buffer, &offset, 0);
707 	encode_u64_le(buffer, &offset, layout->super.open_chapter_blocks);
708 	encode_u64_le(buffer, &offset, layout->super.page_map_blocks);
709 
710 	if (is_converted_super_block(&layout->super)) {
711 		encode_u64_le(buffer, &offset, layout->super.volume_offset);
712 		encode_u64_le(buffer, &offset, layout->super.start_offset);
713 	}
714 
715 	result = uds_write_to_buffered_writer(writer, buffer, offset);
716 	vdo_free(buffer);
717 	if (result != UDS_SUCCESS)
718 		return result;
719 
720 	return uds_flush_buffered_writer(writer);
721 }
722 
723 static int __must_check write_uds_index_config(struct index_layout *layout,
724 					       struct uds_configuration *config,
725 					       off_t offset)
726 {
727 	int result;
728 	struct buffered_writer *writer = NULL;
729 
730 	result = open_layout_writer(layout, &layout->config, offset, &writer);
731 	if (result != UDS_SUCCESS)
732 		return vdo_log_error_strerror(result, "failed to open config region");
733 
734 	result = uds_write_config_contents(writer, config, layout->super.version);
735 	if (result != UDS_SUCCESS) {
736 		uds_free_buffered_writer(writer);
737 		return vdo_log_error_strerror(result, "failed to write config region");
738 	}
739 
740 	result = uds_flush_buffered_writer(writer);
741 	if (result != UDS_SUCCESS) {
742 		uds_free_buffered_writer(writer);
743 		return vdo_log_error_strerror(result, "cannot flush config writer");
744 	}
745 
746 	uds_free_buffered_writer(writer);
747 	return UDS_SUCCESS;
748 }
749 
750 static int __must_check save_layout(struct index_layout *layout, off_t offset)
751 {
752 	int result;
753 	struct buffered_writer *writer = NULL;
754 	struct region_table *table;
755 
756 	result = make_layout_region_table(layout, &table);
757 	if (result != UDS_SUCCESS)
758 		return result;
759 
760 	result = open_layout_writer(layout, &layout->header, offset, &writer);
761 	if (result != UDS_SUCCESS) {
762 		vdo_free(table);
763 		return result;
764 	}
765 
766 	result = write_layout_header(layout, table, writer);
767 	vdo_free(table);
768 	uds_free_buffered_writer(writer);
769 
770 	return result;
771 }
772 
773 static int create_index_layout(struct index_layout *layout, struct uds_configuration *config)
774 {
775 	int result;
776 	struct save_layout_sizes sizes;
777 
778 	result = compute_sizes(config, &sizes);
779 	if (result != UDS_SUCCESS)
780 		return result;
781 
782 	result = vdo_allocate(sizes.save_count, struct index_save_layout, __func__,
783 			      &layout->index.saves);
784 	if (result != VDO_SUCCESS)
785 		return result;
786 
787 	initialize_layout(layout, &sizes);
788 
789 	result = discard_index_state_data(layout);
790 	if (result != UDS_SUCCESS)
791 		return result;
792 
793 	result = write_uds_index_config(layout, config, 0);
794 	if (result != UDS_SUCCESS)
795 		return result;
796 
797 	return save_layout(layout, 0);
798 }
799 
800 static u64 generate_index_save_nonce(u64 volume_nonce, struct index_save_layout *isl)
801 {
802 	struct save_nonce_data {
803 		struct index_save_data data;
804 		u64 offset;
805 	} nonce_data;
806 	u8 buffer[sizeof(nonce_data)];
807 	size_t offset = 0;
808 
809 	encode_u64_le(buffer, &offset, isl->save_data.timestamp);
810 	encode_u64_le(buffer, &offset, 0);
811 	encode_u32_le(buffer, &offset, isl->save_data.version);
812 	encode_u32_le(buffer, &offset, 0U);
813 	encode_u64_le(buffer, &offset, isl->index_save.start_block);
814 	VDO_ASSERT_LOG_ONLY(offset == sizeof(nonce_data),
815 			    "%zu bytes encoded of %zu expected",
816 			    offset, sizeof(nonce_data));
817 	return generate_secondary_nonce(volume_nonce, buffer, sizeof(buffer));
818 }
819 
820 static u64 validate_index_save_layout(struct index_save_layout *isl, u64 volume_nonce)
821 {
822 	if ((isl->zone_count == 0) || (isl->save_data.timestamp == 0))
823 		return 0;
824 
825 	if (isl->save_data.nonce != generate_index_save_nonce(volume_nonce, isl))
826 		return 0;
827 
828 	return isl->save_data.timestamp;
829 }
830 
831 static int find_latest_uds_index_save_slot(struct index_layout *layout,
832 					   struct index_save_layout **isl_ptr)
833 {
834 	struct index_save_layout *latest = NULL;
835 	struct index_save_layout *isl;
836 	unsigned int i;
837 	u64 save_time = 0;
838 	u64 latest_time = 0;
839 
840 	for (i = 0; i < layout->super.max_saves; i++) {
841 		isl = &layout->index.saves[i];
842 		save_time = validate_index_save_layout(isl, layout->index.nonce);
843 		if (save_time > latest_time) {
844 			latest = isl;
845 			latest_time = save_time;
846 		}
847 	}
848 
849 	if (latest == NULL) {
850 		vdo_log_error("No valid index save found");
851 		return UDS_INDEX_NOT_SAVED_CLEANLY;
852 	}
853 
854 	*isl_ptr = latest;
855 	return UDS_SUCCESS;
856 }
857 
858 int uds_discard_open_chapter(struct index_layout *layout)
859 {
860 	int result;
861 	struct index_save_layout *isl;
862 	struct buffered_writer *writer;
863 
864 	result = find_latest_uds_index_save_slot(layout, &isl);
865 	if (result != UDS_SUCCESS)
866 		return result;
867 
868 	result = open_region_writer(layout, &isl->open_chapter, &writer);
869 	if (result != UDS_SUCCESS)
870 		return result;
871 
872 	result = uds_write_to_buffered_writer(writer, NULL, UDS_BLOCK_SIZE);
873 	if (result != UDS_SUCCESS) {
874 		uds_free_buffered_writer(writer);
875 		return result;
876 	}
877 
878 	result = uds_flush_buffered_writer(writer);
879 	uds_free_buffered_writer(writer);
880 	return result;
881 }
882 
883 int uds_load_index_state(struct index_layout *layout, struct uds_index *index)
884 {
885 	int result;
886 	unsigned int zone;
887 	struct index_save_layout *isl;
888 	struct buffered_reader *readers[MAX_ZONES];
889 
890 	result = find_latest_uds_index_save_slot(layout, &isl);
891 	if (result != UDS_SUCCESS)
892 		return result;
893 
894 	index->newest_virtual_chapter = isl->state_data.newest_chapter;
895 	index->oldest_virtual_chapter = isl->state_data.oldest_chapter;
896 	index->last_save = isl->state_data.last_save;
897 
898 	result = open_region_reader(layout, &isl->open_chapter, &readers[0]);
899 	if (result != UDS_SUCCESS)
900 		return result;
901 
902 	result = uds_load_open_chapter(index, readers[0]);
903 	uds_free_buffered_reader(readers[0]);
904 	if (result != UDS_SUCCESS)
905 		return result;
906 
907 	for (zone = 0; zone < isl->zone_count; zone++) {
908 		result = open_region_reader(layout, &isl->volume_index_zones[zone],
909 					    &readers[zone]);
910 		if (result != UDS_SUCCESS) {
911 			for (; zone > 0; zone--)
912 				uds_free_buffered_reader(readers[zone - 1]);
913 
914 			return result;
915 		}
916 	}
917 
918 	result = uds_load_volume_index(index->volume_index, readers, isl->zone_count);
919 	for (zone = 0; zone < isl->zone_count; zone++)
920 		uds_free_buffered_reader(readers[zone]);
921 	if (result != UDS_SUCCESS)
922 		return result;
923 
924 	result = open_region_reader(layout, &isl->index_page_map, &readers[0]);
925 	if (result != UDS_SUCCESS)
926 		return result;
927 
928 	result = uds_read_index_page_map(index->volume->index_page_map, readers[0]);
929 	uds_free_buffered_reader(readers[0]);
930 
931 	return result;
932 }
933 
934 static struct index_save_layout *select_oldest_index_save_layout(struct index_layout *layout)
935 {
936 	struct index_save_layout *oldest = NULL;
937 	struct index_save_layout *isl;
938 	unsigned int i;
939 	u64 save_time = 0;
940 	u64 oldest_time = 0;
941 
942 	for (i = 0; i < layout->super.max_saves; i++) {
943 		isl = &layout->index.saves[i];
944 		save_time = validate_index_save_layout(isl, layout->index.nonce);
945 		if (oldest == NULL || save_time < oldest_time) {
946 			oldest = isl;
947 			oldest_time = save_time;
948 		}
949 	}
950 
951 	return oldest;
952 }
953 
954 static void instantiate_index_save_layout(struct index_save_layout *isl,
955 					  struct super_block_data *super,
956 					  u64 volume_nonce, unsigned int zone_count)
957 {
958 	unsigned int z;
959 	u64 next_block;
960 	u64 free_blocks;
961 	u64 volume_index_blocks;
962 
963 	isl->zone_count = zone_count;
964 	memset(&isl->save_data, 0, sizeof(isl->save_data));
965 	isl->save_data.timestamp = ktime_to_ms(current_time_ns(CLOCK_REALTIME));
966 	isl->save_data.version = 1;
967 	isl->save_data.nonce = generate_index_save_nonce(volume_nonce, isl);
968 
969 	next_block = isl->index_save.start_block;
970 	isl->header = (struct layout_region) {
971 		.start_block = next_block++,
972 		.block_count = 1,
973 		.kind = RL_KIND_HEADER,
974 		.instance = RL_SOLE_INSTANCE,
975 	};
976 
977 	isl->index_page_map = (struct layout_region) {
978 		.start_block = next_block,
979 		.block_count = super->page_map_blocks,
980 		.kind = RL_KIND_INDEX_PAGE_MAP,
981 		.instance = RL_SOLE_INSTANCE,
982 	};
983 	next_block += super->page_map_blocks;
984 
985 	free_blocks = (isl->index_save.block_count - 1 -
986 		       super->page_map_blocks -
987 		       super->open_chapter_blocks);
988 	volume_index_blocks = free_blocks / isl->zone_count;
989 	for (z = 0; z < isl->zone_count; z++) {
990 		isl->volume_index_zones[z] = (struct layout_region) {
991 			.start_block = next_block,
992 			.block_count = volume_index_blocks,
993 			.kind = RL_KIND_VOLUME_INDEX,
994 			.instance = z,
995 		};
996 
997 		next_block += volume_index_blocks;
998 		free_blocks -= volume_index_blocks;
999 	}
1000 
1001 	isl->open_chapter = (struct layout_region) {
1002 		.start_block = next_block,
1003 		.block_count = super->open_chapter_blocks,
1004 		.kind = RL_KIND_OPEN_CHAPTER,
1005 		.instance = RL_SOLE_INSTANCE,
1006 	};
1007 
1008 	next_block += super->open_chapter_blocks;
1009 
1010 	isl->free_space = (struct layout_region) {
1011 		.start_block = next_block,
1012 		.block_count = free_blocks,
1013 		.kind = RL_KIND_EMPTY,
1014 		.instance = RL_SOLE_INSTANCE,
1015 	};
1016 }
1017 
1018 static int setup_uds_index_save_slot(struct index_layout *layout,
1019 				     unsigned int zone_count,
1020 				     struct index_save_layout **isl_ptr)
1021 {
1022 	int result;
1023 	struct index_save_layout *isl;
1024 
1025 	isl = select_oldest_index_save_layout(layout);
1026 	result = invalidate_old_save(layout, isl);
1027 	if (result != UDS_SUCCESS)
1028 		return result;
1029 
1030 	instantiate_index_save_layout(isl, &layout->super, layout->index.nonce,
1031 				      zone_count);
1032 
1033 	*isl_ptr = isl;
1034 	return UDS_SUCCESS;
1035 }
1036 
1037 static void cancel_uds_index_save(struct index_save_layout *isl)
1038 {
1039 	memset(&isl->save_data, 0, sizeof(isl->save_data));
1040 	memset(&isl->state_data, 0, sizeof(isl->state_data));
1041 	isl->zone_count = 0;
1042 }
1043 
1044 int uds_save_index_state(struct index_layout *layout, struct uds_index *index)
1045 {
1046 	int result;
1047 	unsigned int zone;
1048 	struct index_save_layout *isl;
1049 	struct buffered_writer *writers[MAX_ZONES];
1050 
1051 	result = setup_uds_index_save_slot(layout, index->zone_count, &isl);
1052 	if (result != UDS_SUCCESS)
1053 		return result;
1054 
1055 	isl->state_data	= (struct index_state_data301) {
1056 		.newest_chapter = index->newest_virtual_chapter,
1057 		.oldest_chapter = index->oldest_virtual_chapter,
1058 		.last_save = index->last_save,
1059 	};
1060 
1061 	result = open_region_writer(layout, &isl->open_chapter, &writers[0]);
1062 	if (result != UDS_SUCCESS) {
1063 		cancel_uds_index_save(isl);
1064 		return result;
1065 	}
1066 
1067 	result = uds_save_open_chapter(index, writers[0]);
1068 	uds_free_buffered_writer(writers[0]);
1069 	if (result != UDS_SUCCESS) {
1070 		cancel_uds_index_save(isl);
1071 		return result;
1072 	}
1073 
1074 	for (zone = 0; zone < index->zone_count; zone++) {
1075 		result = open_region_writer(layout, &isl->volume_index_zones[zone],
1076 					    &writers[zone]);
1077 		if (result != UDS_SUCCESS) {
1078 			for (; zone > 0; zone--)
1079 				uds_free_buffered_writer(writers[zone - 1]);
1080 
1081 			cancel_uds_index_save(isl);
1082 			return result;
1083 		}
1084 	}
1085 
1086 	result = uds_save_volume_index(index->volume_index, writers, index->zone_count);
1087 	for (zone = 0; zone < index->zone_count; zone++)
1088 		uds_free_buffered_writer(writers[zone]);
1089 	if (result != UDS_SUCCESS) {
1090 		cancel_uds_index_save(isl);
1091 		return result;
1092 	}
1093 
1094 	result = open_region_writer(layout, &isl->index_page_map, &writers[0]);
1095 	if (result != UDS_SUCCESS) {
1096 		cancel_uds_index_save(isl);
1097 		return result;
1098 	}
1099 
1100 	result = uds_write_index_page_map(index->volume->index_page_map, writers[0]);
1101 	uds_free_buffered_writer(writers[0]);
1102 	if (result != UDS_SUCCESS) {
1103 		cancel_uds_index_save(isl);
1104 		return result;
1105 	}
1106 
1107 	return write_index_save_layout(layout, isl);
1108 }
1109 
1110 static int __must_check load_region_table(struct buffered_reader *reader,
1111 					  struct region_table **table_ptr)
1112 {
1113 	int result;
1114 	unsigned int i;
1115 	struct region_header header;
1116 	struct region_table *table;
1117 	u8 buffer[sizeof(struct region_header)];
1118 	size_t offset = 0;
1119 
1120 	result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1121 	if (result != UDS_SUCCESS)
1122 		return vdo_log_error_strerror(result, "cannot read region table header");
1123 
1124 	decode_u64_le(buffer, &offset, &header.magic);
1125 	decode_u64_le(buffer, &offset, &header.region_blocks);
1126 	decode_u16_le(buffer, &offset, &header.type);
1127 	decode_u16_le(buffer, &offset, &header.version);
1128 	decode_u16_le(buffer, &offset, &header.region_count);
1129 	decode_u16_le(buffer, &offset, &header.payload);
1130 
1131 	if (header.magic != REGION_MAGIC)
1132 		return UDS_NO_INDEX;
1133 
1134 	if (header.version != 1) {
1135 		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1136 					      "unknown region table version %hu",
1137 					      header.version);
1138 	}
1139 
1140 	result = vdo_allocate_extended(struct region_table, header.region_count,
1141 				       struct layout_region,
1142 				       "single file layout region table", &table);
1143 	if (result != VDO_SUCCESS)
1144 		return result;
1145 
1146 	table->header = header;
1147 	for (i = 0; i < header.region_count; i++) {
1148 		u8 region_buffer[sizeof(struct layout_region)];
1149 
1150 		offset = 0;
1151 		result = uds_read_from_buffered_reader(reader, region_buffer,
1152 						       sizeof(region_buffer));
1153 		if (result != UDS_SUCCESS) {
1154 			vdo_free(table);
1155 			return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1156 						      "cannot read region table layouts");
1157 		}
1158 
1159 		decode_u64_le(region_buffer, &offset, &table->regions[i].start_block);
1160 		decode_u64_le(region_buffer, &offset, &table->regions[i].block_count);
1161 		offset += sizeof(u32);
1162 		decode_u16_le(region_buffer, &offset, &table->regions[i].kind);
1163 		decode_u16_le(region_buffer, &offset, &table->regions[i].instance);
1164 	}
1165 
1166 	*table_ptr = table;
1167 	return UDS_SUCCESS;
1168 }
1169 
1170 static int __must_check read_super_block_data(struct buffered_reader *reader,
1171 					      struct index_layout *layout,
1172 					      size_t saved_size)
1173 {
1174 	int result;
1175 	struct super_block_data *super = &layout->super;
1176 	u8 *buffer;
1177 	size_t offset = 0;
1178 
1179 	result = vdo_allocate(saved_size, u8, "super block data", &buffer);
1180 	if (result != VDO_SUCCESS)
1181 		return result;
1182 
1183 	result = uds_read_from_buffered_reader(reader, buffer, saved_size);
1184 	if (result != UDS_SUCCESS) {
1185 		vdo_free(buffer);
1186 		return vdo_log_error_strerror(result, "cannot read region table header");
1187 	}
1188 
1189 	memcpy(&super->magic_label, buffer, MAGIC_SIZE);
1190 	offset += MAGIC_SIZE;
1191 	memcpy(&super->nonce_info, buffer + offset, NONCE_INFO_SIZE);
1192 	offset += NONCE_INFO_SIZE;
1193 	decode_u64_le(buffer, &offset, &super->nonce);
1194 	decode_u32_le(buffer, &offset, &super->version);
1195 	decode_u32_le(buffer, &offset, &super->block_size);
1196 	decode_u16_le(buffer, &offset, &super->index_count);
1197 	decode_u16_le(buffer, &offset, &super->max_saves);
1198 	offset += sizeof(u32);
1199 	decode_u64_le(buffer, &offset, &super->open_chapter_blocks);
1200 	decode_u64_le(buffer, &offset, &super->page_map_blocks);
1201 
1202 	if (is_converted_super_block(super)) {
1203 		decode_u64_le(buffer, &offset, &super->volume_offset);
1204 		decode_u64_le(buffer, &offset, &super->start_offset);
1205 	} else {
1206 		super->volume_offset = 0;
1207 		super->start_offset = 0;
1208 	}
1209 
1210 	vdo_free(buffer);
1211 
1212 	if (memcmp(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE) != 0)
1213 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1214 					      "unknown superblock magic label");
1215 
1216 	if ((super->version < SUPER_VERSION_MINIMUM) ||
1217 	    (super->version == 4) || (super->version == 5) || (super->version == 6) ||
1218 	    (super->version > SUPER_VERSION_MAXIMUM)) {
1219 		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1220 					      "unknown superblock version number %u",
1221 					      super->version);
1222 	}
1223 
1224 	if (super->volume_offset < super->start_offset) {
1225 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1226 					      "inconsistent offsets (start %llu, volume %llu)",
1227 					      (unsigned long long) super->start_offset,
1228 					      (unsigned long long) super->volume_offset);
1229 	}
1230 
1231 	/* Sub-indexes are no longer used but the layout retains this field. */
1232 	if (super->index_count != 1) {
1233 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1234 					      "invalid subindex count %u",
1235 					      super->index_count);
1236 	}
1237 
1238 	if (generate_primary_nonce(super->nonce_info, sizeof(super->nonce_info)) != super->nonce) {
1239 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1240 					      "inconsistent superblock nonce");
1241 	}
1242 
1243 	return UDS_SUCCESS;
1244 }
1245 
1246 static int __must_check verify_region(struct layout_region *lr, u64 start_block,
1247 				      enum region_kind kind, unsigned int instance)
1248 {
1249 	if (lr->start_block != start_block)
1250 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1251 					      "incorrect layout region offset");
1252 
1253 	if (lr->kind != kind)
1254 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1255 					      "incorrect layout region kind");
1256 
1257 	if (lr->instance != instance) {
1258 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1259 					      "incorrect layout region instance");
1260 	}
1261 
1262 	return UDS_SUCCESS;
1263 }
1264 
1265 static int __must_check verify_sub_index(struct index_layout *layout, u64 start_block,
1266 					 struct region_table *table)
1267 {
1268 	int result;
1269 	unsigned int i;
1270 	struct sub_index_layout *sil = &layout->index;
1271 	u64 next_block = start_block;
1272 
1273 	sil->sub_index = table->regions[2];
1274 	result = verify_region(&sil->sub_index, next_block, RL_KIND_INDEX, 0);
1275 	if (result != UDS_SUCCESS)
1276 		return result;
1277 
1278 	define_sub_index_nonce(layout);
1279 
1280 	sil->volume = table->regions[3];
1281 	result = verify_region(&sil->volume, next_block, RL_KIND_VOLUME,
1282 			       RL_SOLE_INSTANCE);
1283 	if (result != UDS_SUCCESS)
1284 		return result;
1285 
1286 	next_block += sil->volume.block_count + layout->super.volume_offset;
1287 
1288 	for (i = 0; i < layout->super.max_saves; i++) {
1289 		sil->saves[i].index_save = table->regions[i + 4];
1290 		result = verify_region(&sil->saves[i].index_save, next_block,
1291 				       RL_KIND_SAVE, i);
1292 		if (result != UDS_SUCCESS)
1293 			return result;
1294 
1295 		next_block += sil->saves[i].index_save.block_count;
1296 	}
1297 
1298 	next_block -= layout->super.volume_offset;
1299 	if (next_block != start_block + sil->sub_index.block_count) {
1300 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1301 					      "sub index region does not span all saves");
1302 	}
1303 
1304 	return UDS_SUCCESS;
1305 }
1306 
1307 static int __must_check reconstitute_layout(struct index_layout *layout,
1308 					    struct region_table *table, u64 first_block)
1309 {
1310 	int result;
1311 	u64 next_block = first_block;
1312 
1313 	result = vdo_allocate(layout->super.max_saves, struct index_save_layout,
1314 			      __func__, &layout->index.saves);
1315 	if (result != VDO_SUCCESS)
1316 		return result;
1317 
1318 	layout->total_blocks = table->header.region_blocks;
1319 
1320 	layout->header = table->regions[0];
1321 	result = verify_region(&layout->header, next_block++, RL_KIND_HEADER,
1322 			       RL_SOLE_INSTANCE);
1323 	if (result != UDS_SUCCESS)
1324 		return result;
1325 
1326 	layout->config = table->regions[1];
1327 	result = verify_region(&layout->config, next_block++, RL_KIND_CONFIG,
1328 			       RL_SOLE_INSTANCE);
1329 	if (result != UDS_SUCCESS)
1330 		return result;
1331 
1332 	result = verify_sub_index(layout, next_block, table);
1333 	if (result != UDS_SUCCESS)
1334 		return result;
1335 
1336 	next_block += layout->index.sub_index.block_count;
1337 
1338 	layout->seal = table->regions[table->header.region_count - 1];
1339 	result = verify_region(&layout->seal, next_block + layout->super.volume_offset,
1340 			       RL_KIND_SEAL, RL_SOLE_INSTANCE);
1341 	if (result != UDS_SUCCESS)
1342 		return result;
1343 
1344 	if (++next_block != (first_block + layout->total_blocks)) {
1345 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1346 					      "layout table does not span total blocks");
1347 	}
1348 
1349 	return UDS_SUCCESS;
1350 }
1351 
1352 static int __must_check load_super_block(struct index_layout *layout, size_t block_size,
1353 					 u64 first_block, struct buffered_reader *reader)
1354 {
1355 	int result;
1356 	struct region_table *table = NULL;
1357 	struct super_block_data *super = &layout->super;
1358 
1359 	result = load_region_table(reader, &table);
1360 	if (result != UDS_SUCCESS)
1361 		return result;
1362 
1363 	if (table->header.type != RH_TYPE_SUPER) {
1364 		vdo_free(table);
1365 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1366 					      "not a superblock region table");
1367 	}
1368 
1369 	result = read_super_block_data(reader, layout, table->header.payload);
1370 	if (result != UDS_SUCCESS) {
1371 		vdo_free(table);
1372 		return vdo_log_error_strerror(result, "unknown superblock format");
1373 	}
1374 
1375 	if (super->block_size != block_size) {
1376 		vdo_free(table);
1377 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1378 					      "superblock saved block_size %u differs from supplied block_size %zu",
1379 					      super->block_size, block_size);
1380 	}
1381 
1382 	first_block -= (super->volume_offset - super->start_offset);
1383 	result = reconstitute_layout(layout, table, first_block);
1384 	vdo_free(table);
1385 	return result;
1386 }
1387 
1388 static int __must_check read_index_save_data(struct buffered_reader *reader,
1389 					     struct index_save_layout *isl,
1390 					     size_t saved_size)
1391 {
1392 	int result;
1393 	struct index_state_version file_version;
1394 	u8 buffer[sizeof(struct index_save_data) + sizeof(struct index_state_data301)];
1395 	size_t offset = 0;
1396 
1397 	if (saved_size != sizeof(buffer)) {
1398 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1399 					      "unexpected index save data size %zu",
1400 					      saved_size);
1401 	}
1402 
1403 	result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1404 	if (result != UDS_SUCCESS)
1405 		return vdo_log_error_strerror(result, "cannot read index save data");
1406 
1407 	decode_u64_le(buffer, &offset, &isl->save_data.timestamp);
1408 	decode_u64_le(buffer, &offset, &isl->save_data.nonce);
1409 	decode_u32_le(buffer, &offset, &isl->save_data.version);
1410 	offset += sizeof(u32);
1411 
1412 	if (isl->save_data.version > 1) {
1413 		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1414 					      "unknown index save version number %u",
1415 					      isl->save_data.version);
1416 	}
1417 
1418 	decode_s32_le(buffer, &offset, &file_version.signature);
1419 	decode_s32_le(buffer, &offset, &file_version.version_id);
1420 
1421 	if ((file_version.signature != INDEX_STATE_VERSION_301.signature) ||
1422 	    (file_version.version_id != INDEX_STATE_VERSION_301.version_id)) {
1423 		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1424 					      "index state version %d,%d is unsupported",
1425 					      file_version.signature,
1426 					      file_version.version_id);
1427 	}
1428 
1429 	decode_u64_le(buffer, &offset, &isl->state_data.newest_chapter);
1430 	decode_u64_le(buffer, &offset, &isl->state_data.oldest_chapter);
1431 	decode_u64_le(buffer, &offset, &isl->state_data.last_save);
1432 	/* Skip past some historical fields that are now unused */
1433 	offset += sizeof(u32) + sizeof(u32);
1434 	return UDS_SUCCESS;
1435 }
1436 
1437 static int __must_check reconstruct_index_save(struct index_save_layout *isl,
1438 					       struct region_table *table)
1439 {
1440 	int result;
1441 	unsigned int z;
1442 	struct layout_region *last_region;
1443 	u64 next_block = isl->index_save.start_block;
1444 	u64 last_block = next_block + isl->index_save.block_count;
1445 
1446 	isl->zone_count = table->header.region_count - 3;
1447 
1448 	last_region = &table->regions[table->header.region_count - 1];
1449 	if (last_region->kind == RL_KIND_EMPTY) {
1450 		isl->free_space = *last_region;
1451 		isl->zone_count--;
1452 	} else {
1453 		isl->free_space = (struct layout_region) {
1454 			.start_block = last_block,
1455 			.block_count = 0,
1456 			.kind = RL_KIND_EMPTY,
1457 			.instance = RL_SOLE_INSTANCE,
1458 		};
1459 	}
1460 
1461 	isl->header = table->regions[0];
1462 	result = verify_region(&isl->header, next_block++, RL_KIND_HEADER,
1463 			       RL_SOLE_INSTANCE);
1464 	if (result != UDS_SUCCESS)
1465 		return result;
1466 
1467 	isl->index_page_map = table->regions[1];
1468 	result = verify_region(&isl->index_page_map, next_block, RL_KIND_INDEX_PAGE_MAP,
1469 			       RL_SOLE_INSTANCE);
1470 	if (result != UDS_SUCCESS)
1471 		return result;
1472 
1473 	next_block += isl->index_page_map.block_count;
1474 
1475 	for (z = 0; z < isl->zone_count; z++) {
1476 		isl->volume_index_zones[z] = table->regions[z + 2];
1477 		result = verify_region(&isl->volume_index_zones[z], next_block,
1478 				       RL_KIND_VOLUME_INDEX, z);
1479 		if (result != UDS_SUCCESS)
1480 			return result;
1481 
1482 		next_block += isl->volume_index_zones[z].block_count;
1483 	}
1484 
1485 	isl->open_chapter = table->regions[isl->zone_count + 2];
1486 	result = verify_region(&isl->open_chapter, next_block, RL_KIND_OPEN_CHAPTER,
1487 			       RL_SOLE_INSTANCE);
1488 	if (result != UDS_SUCCESS)
1489 		return result;
1490 
1491 	next_block += isl->open_chapter.block_count;
1492 
1493 	result = verify_region(&isl->free_space, next_block, RL_KIND_EMPTY,
1494 			       RL_SOLE_INSTANCE);
1495 	if (result != UDS_SUCCESS)
1496 		return result;
1497 
1498 	next_block += isl->free_space.block_count;
1499 	if (next_block != last_block) {
1500 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1501 					      "index save layout table incomplete");
1502 	}
1503 
1504 	return UDS_SUCCESS;
1505 }
1506 
1507 static int __must_check load_index_save(struct index_save_layout *isl,
1508 					struct buffered_reader *reader,
1509 					unsigned int instance)
1510 {
1511 	int result;
1512 	struct region_table *table = NULL;
1513 
1514 	result = load_region_table(reader, &table);
1515 	if (result != UDS_SUCCESS) {
1516 		return vdo_log_error_strerror(result, "cannot read index save %u header",
1517 					      instance);
1518 	}
1519 
1520 	if (table->header.region_blocks != isl->index_save.block_count) {
1521 		u64 region_blocks = table->header.region_blocks;
1522 
1523 		vdo_free(table);
1524 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1525 					      "unexpected index save %u region block count %llu",
1526 					      instance,
1527 					      (unsigned long long) region_blocks);
1528 	}
1529 
1530 	if (table->header.type == RH_TYPE_UNSAVED) {
1531 		vdo_free(table);
1532 		reset_index_save_layout(isl, 0);
1533 		return UDS_SUCCESS;
1534 	}
1535 
1536 
1537 	if (table->header.type != RH_TYPE_SAVE) {
1538 		vdo_log_error_strerror(UDS_CORRUPT_DATA,
1539 				       "unexpected index save %u header type %u",
1540 				       instance, table->header.type);
1541 		vdo_free(table);
1542 		return UDS_CORRUPT_DATA;
1543 	}
1544 
1545 	result = read_index_save_data(reader, isl, table->header.payload);
1546 	if (result != UDS_SUCCESS) {
1547 		vdo_free(table);
1548 		return vdo_log_error_strerror(result,
1549 					      "unknown index save %u data format",
1550 					      instance);
1551 	}
1552 
1553 	result = reconstruct_index_save(isl, table);
1554 	vdo_free(table);
1555 	if (result != UDS_SUCCESS) {
1556 		return vdo_log_error_strerror(result, "cannot reconstruct index save %u",
1557 					      instance);
1558 	}
1559 
1560 	return UDS_SUCCESS;
1561 }
1562 
1563 static int __must_check load_sub_index_regions(struct index_layout *layout)
1564 {
1565 	int result;
1566 	unsigned int j;
1567 	struct index_save_layout *isl;
1568 	struct buffered_reader *reader;
1569 
1570 	for (j = 0; j < layout->super.max_saves; j++) {
1571 		isl = &layout->index.saves[j];
1572 		result = open_region_reader(layout, &isl->index_save, &reader);
1573 
1574 		if (result != UDS_SUCCESS) {
1575 			vdo_log_error_strerror(result,
1576 					       "cannot get reader for index 0 save %u",
1577 					       j);
1578 			return result;
1579 		}
1580 
1581 		result = load_index_save(isl, reader, j);
1582 		uds_free_buffered_reader(reader);
1583 		if (result != UDS_SUCCESS) {
1584 			/* Another save slot might be valid. */
1585 			reset_index_save_layout(isl, 0);
1586 			continue;
1587 		}
1588 	}
1589 
1590 	return UDS_SUCCESS;
1591 }
1592 
1593 static int __must_check verify_uds_index_config(struct index_layout *layout,
1594 						struct uds_configuration *config)
1595 {
1596 	int result;
1597 	struct buffered_reader *reader = NULL;
1598 	u64 offset;
1599 
1600 	offset = layout->super.volume_offset - layout->super.start_offset;
1601 	result = open_layout_reader(layout, &layout->config, offset, &reader);
1602 	if (result != UDS_SUCCESS)
1603 		return vdo_log_error_strerror(result, "failed to open config reader");
1604 
1605 	result = uds_validate_config_contents(reader, config);
1606 	if (result != UDS_SUCCESS) {
1607 		uds_free_buffered_reader(reader);
1608 		return vdo_log_error_strerror(result, "failed to read config region");
1609 	}
1610 
1611 	uds_free_buffered_reader(reader);
1612 	return UDS_SUCCESS;
1613 }
1614 
1615 static int load_index_layout(struct index_layout *layout, struct uds_configuration *config)
1616 {
1617 	int result;
1618 	struct buffered_reader *reader;
1619 
1620 	result = uds_make_buffered_reader(layout->factory,
1621 					  layout->offset / UDS_BLOCK_SIZE, 1, &reader);
1622 	if (result != UDS_SUCCESS)
1623 		return vdo_log_error_strerror(result, "unable to read superblock");
1624 
1625 	result = load_super_block(layout, UDS_BLOCK_SIZE,
1626 				  layout->offset / UDS_BLOCK_SIZE, reader);
1627 	uds_free_buffered_reader(reader);
1628 	if (result != UDS_SUCCESS)
1629 		return result;
1630 
1631 	result = verify_uds_index_config(layout, config);
1632 	if (result != UDS_SUCCESS)
1633 		return result;
1634 
1635 	return load_sub_index_regions(layout);
1636 }
1637 
1638 static int create_layout_factory(struct index_layout *layout,
1639 				 const struct uds_configuration *config)
1640 {
1641 	int result;
1642 	size_t writable_size;
1643 	struct io_factory *factory = NULL;
1644 
1645 	result = uds_make_io_factory(config->bdev, &factory);
1646 	if (result != UDS_SUCCESS)
1647 		return result;
1648 
1649 	writable_size = uds_get_writable_size(factory) & -UDS_BLOCK_SIZE;
1650 	if (writable_size < config->size + config->offset) {
1651 		uds_put_io_factory(factory);
1652 		vdo_log_error("index storage (%zu) is smaller than the requested size %zu",
1653 			      writable_size, config->size + config->offset);
1654 		return -ENOSPC;
1655 	}
1656 
1657 	layout->factory = factory;
1658 	layout->factory_size = (config->size > 0) ? config->size : writable_size;
1659 	layout->offset = config->offset;
1660 	return UDS_SUCCESS;
1661 }
1662 
1663 int uds_make_index_layout(struct uds_configuration *config, bool new_layout,
1664 			  struct index_layout **layout_ptr)
1665 {
1666 	int result;
1667 	struct index_layout *layout = NULL;
1668 	struct save_layout_sizes sizes;
1669 
1670 	result = compute_sizes(config, &sizes);
1671 	if (result != UDS_SUCCESS)
1672 		return result;
1673 
1674 	result = vdo_allocate(1, struct index_layout, __func__, &layout);
1675 	if (result != VDO_SUCCESS)
1676 		return result;
1677 
1678 	result = create_layout_factory(layout, config);
1679 	if (result != UDS_SUCCESS) {
1680 		uds_free_index_layout(layout);
1681 		return result;
1682 	}
1683 
1684 	if (layout->factory_size < sizes.total_size) {
1685 		vdo_log_error("index storage (%zu) is smaller than the required size %llu",
1686 			      layout->factory_size,
1687 			      (unsigned long long) sizes.total_size);
1688 		uds_free_index_layout(layout);
1689 		return -ENOSPC;
1690 	}
1691 
1692 	if (new_layout)
1693 		result = create_index_layout(layout, config);
1694 	else
1695 		result = load_index_layout(layout, config);
1696 	if (result != UDS_SUCCESS) {
1697 		uds_free_index_layout(layout);
1698 		return result;
1699 	}
1700 
1701 	*layout_ptr = layout;
1702 	return UDS_SUCCESS;
1703 }
1704 
1705 void uds_free_index_layout(struct index_layout *layout)
1706 {
1707 	if (layout == NULL)
1708 		return;
1709 
1710 	vdo_free(layout->index.saves);
1711 	if (layout->factory != NULL)
1712 		uds_put_io_factory(layout->factory);
1713 
1714 	vdo_free(layout);
1715 }
1716 
1717 int uds_replace_index_layout_storage(struct index_layout *layout,
1718 				     struct block_device *bdev)
1719 {
1720 	return uds_replace_storage(layout->factory, bdev);
1721 }
1722 
1723 /* Obtain a dm_bufio_client for the volume region. */
1724 int uds_open_volume_bufio(struct index_layout *layout, size_t block_size,
1725 			  unsigned int reserved_buffers,
1726 			  struct dm_bufio_client **client_ptr)
1727 {
1728 	off_t offset = (layout->index.volume.start_block +
1729 			layout->super.volume_offset -
1730 			layout->super.start_offset);
1731 
1732 	return uds_make_bufio(layout->factory, offset, block_size, reserved_buffers,
1733 			      client_ptr);
1734 }
1735 
1736 u64 uds_get_volume_nonce(struct index_layout *layout)
1737 {
1738 	return layout->index.nonce;
1739 }
1740