xref: /linux/drivers/md/dm-vdo/indexer/index-layout.c (revision 5014bebee0cffda14fafae5a2534d08120b7b9e8)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright 2023 Red Hat
4  */
5 
6 #include "index-layout.h"
7 
8 #include <linux/random.h>
9 
10 #include "logger.h"
11 #include "memory-alloc.h"
12 #include "murmurhash3.h"
13 #include "numeric.h"
14 #include "time-utils.h"
15 
16 #include "config.h"
17 #include "open-chapter.h"
18 #include "volume-index.h"
19 
20 /*
21  * The UDS layout on storage media is divided into a number of fixed-size regions, the sizes of
22  * which are computed when the index is created. Every header and region begins on 4K block
23  * boundary. Save regions are further sub-divided into regions of their own.
24  *
25  * Each region has a kind and an instance number. Some kinds only have one instance and therefore
26  * use RL_SOLE_INSTANCE (-1) as the instance number. The RL_KIND_INDEX used to use instances to
27  * represent sub-indices; now, however there is only ever one sub-index and therefore one instance.
28  * The RL_KIND_VOLUME_INDEX uses instances to record which zone is being saved.
29  *
30  * Every region header has a type and version.
31  *
32  *     +-+-+---------+--------+--------+-+
33  *     | | |   I N D E X  0   101, 0   | |
34  *     |H|C+---------+--------+--------+S|
35  *     |D|f| Volume  | Save   | Save   |e|
36  *     |R|g| Region  | Region | Region |a|
37  *     | | | 201, -1 | 202, 0 | 202, 1 |l|
38  *     +-+-+--------+---------+--------+-+
39  *
40  * The header contains the encoded region layout table as well as some index configuration data.
41  * The sub-index region and its subdivisions are maintained in the same table.
42  *
43  * There are two save regions to preserve the old state in case saving the new state is incomplete.
44  * They are used in alternation. Each save region is further divided into sub-regions.
45  *
46  *     +-+-----+------+------+-----+-----+
47  *     |H| IPM | MI   | MI   |     | OC  |
48  *     |D|     | zone | zone | ... |     |
49  *     |R| 301 | 302  | 302  |     | 303 |
50  *     | | -1  |  0   |  1   |     | -1  |
51  *     +-+-----+------+------+-----+-----+
52  *
53  * The header contains the encoded region layout table as well as index state data for that save.
54  * Each save also has a unique nonce.
55  */
56 
57 #define NONCE_INFO_SIZE 32
58 #define MAX_SAVES 2
59 
60 enum region_kind {
61 	RL_KIND_EMPTY = 0,
62 	RL_KIND_HEADER = 1,
63 	RL_KIND_CONFIG = 100,
64 	RL_KIND_INDEX = 101,
65 	RL_KIND_SEAL = 102,
66 	RL_KIND_VOLUME = 201,
67 	RL_KIND_SAVE = 202,
68 	RL_KIND_INDEX_PAGE_MAP = 301,
69 	RL_KIND_VOLUME_INDEX = 302,
70 	RL_KIND_OPEN_CHAPTER = 303,
71 };
72 
73 /* Some region types are historical and are no longer used. */
74 enum region_type {
75 	RH_TYPE_FREE = 0, /* unused */
76 	RH_TYPE_SUPER = 1,
77 	RH_TYPE_SAVE = 2,
78 	RH_TYPE_CHECKPOINT = 3, /* unused */
79 	RH_TYPE_UNSAVED = 4,
80 };
81 
82 #define RL_SOLE_INSTANCE 65535
83 
84 /*
85  * Super block version 2 is the first released version.
86  *
87  * Super block version 3 is the normal version used from RHEL 8.2 onwards.
88  *
89  * Super block versions 4 through 6 were incremental development versions and
90  * are not supported.
91  *
92  * Super block version 7 is used for volumes which have been reduced in size by one chapter in
93  * order to make room to prepend LVM metadata to a volume originally created without lvm. This
94  * allows the index to retain most its deduplication records.
95  */
96 #define SUPER_VERSION_MINIMUM 3
97 #define SUPER_VERSION_CURRENT 3
98 #define SUPER_VERSION_MAXIMUM 7
99 
100 static const u8 LAYOUT_MAGIC[] = "*ALBIREO*SINGLE*FILE*LAYOUT*001*";
101 static const u64 REGION_MAGIC = 0x416c6252676e3031; /* 'AlbRgn01' */
102 
103 #define MAGIC_SIZE (sizeof(LAYOUT_MAGIC) - 1)
104 
105 struct region_header {
106 	u64 magic;
107 	u64 region_blocks;
108 	u16 type;
109 	/* Currently always version 1 */
110 	u16 version;
111 	u16 region_count;
112 	u16 payload;
113 };
114 
115 struct layout_region {
116 	u64 start_block;
117 	u64 block_count;
118 	u32 __unused;
119 	u16 kind;
120 	u16 instance;
121 };
122 
123 struct region_table {
124 	size_t encoded_size;
125 	struct region_header header;
126 	struct layout_region regions[];
127 };
128 
129 struct index_save_data {
130 	u64 timestamp;
131 	u64 nonce;
132 	/* Currently always version 1 */
133 	u32 version;
134 	u32 unused__;
135 };
136 
137 struct index_state_version {
138 	s32 signature;
139 	s32 version_id;
140 };
141 
142 static const struct index_state_version INDEX_STATE_VERSION_301 = {
143 	.signature  = -1,
144 	.version_id = 301,
145 };
146 
147 struct index_state_data301 {
148 	struct index_state_version version;
149 	u64 newest_chapter;
150 	u64 oldest_chapter;
151 	u64 last_save;
152 	u32 unused;
153 	u32 padding;
154 };
155 
156 struct index_save_layout {
157 	unsigned int zone_count;
158 	struct layout_region index_save;
159 	struct layout_region header;
160 	struct layout_region index_page_map;
161 	struct layout_region free_space;
162 	struct layout_region volume_index_zones[MAX_ZONES];
163 	struct layout_region open_chapter;
164 	struct index_save_data save_data;
165 	struct index_state_data301 state_data;
166 };
167 
168 struct sub_index_layout {
169 	u64 nonce;
170 	struct layout_region sub_index;
171 	struct layout_region volume;
172 	struct index_save_layout *saves;
173 };
174 
175 struct super_block_data {
176 	u8 magic_label[MAGIC_SIZE];
177 	u8 nonce_info[NONCE_INFO_SIZE];
178 	u64 nonce;
179 	u32 version;
180 	u32 block_size;
181 	u16 index_count;
182 	u16 max_saves;
183 	/* Padding reflects a blank field on permanent storage */
184 	u8 padding[4];
185 	u64 open_chapter_blocks;
186 	u64 page_map_blocks;
187 	u64 volume_offset;
188 	u64 start_offset;
189 };
190 
191 struct index_layout {
192 	struct io_factory *factory;
193 	size_t factory_size;
194 	off_t offset;
195 	struct super_block_data super;
196 	struct layout_region header;
197 	struct layout_region config;
198 	struct sub_index_layout index;
199 	struct layout_region seal;
200 	u64 total_blocks;
201 };
202 
203 struct save_layout_sizes {
204 	unsigned int save_count;
205 	size_t block_size;
206 	u64 volume_blocks;
207 	u64 volume_index_blocks;
208 	u64 page_map_blocks;
209 	u64 open_chapter_blocks;
210 	u64 save_blocks;
211 	u64 sub_index_blocks;
212 	u64 total_blocks;
213 	size_t total_size;
214 };
215 
is_converted_super_block(struct super_block_data * super)216 static inline bool is_converted_super_block(struct super_block_data *super)
217 {
218 	return super->version == 7;
219 }
220 
compute_sizes(const struct uds_configuration * config,struct save_layout_sizes * sls)221 static int __must_check compute_sizes(const struct uds_configuration *config,
222 				      struct save_layout_sizes *sls)
223 {
224 	int result;
225 	struct index_geometry *geometry = config->geometry;
226 
227 	memset(sls, 0, sizeof(*sls));
228 	sls->save_count = MAX_SAVES;
229 	sls->block_size = UDS_BLOCK_SIZE;
230 	sls->volume_blocks = geometry->bytes_per_volume / sls->block_size;
231 
232 	result = uds_compute_volume_index_save_blocks(config, sls->block_size,
233 						      &sls->volume_index_blocks);
234 	if (result != UDS_SUCCESS)
235 		return vdo_log_error_strerror(result, "cannot compute index save size");
236 
237 	sls->page_map_blocks =
238 		DIV_ROUND_UP(uds_compute_index_page_map_save_size(geometry),
239 			     sls->block_size);
240 	sls->open_chapter_blocks =
241 		DIV_ROUND_UP(uds_compute_saved_open_chapter_size(geometry),
242 			     sls->block_size);
243 	sls->save_blocks =
244 		1 + (sls->volume_index_blocks + sls->page_map_blocks + sls->open_chapter_blocks);
245 	sls->sub_index_blocks = sls->volume_blocks + (sls->save_count * sls->save_blocks);
246 	sls->total_blocks = 3 + sls->sub_index_blocks;
247 	sls->total_size = sls->total_blocks * sls->block_size;
248 
249 	return UDS_SUCCESS;
250 }
251 
252 /* Create unique data using the current time and a pseudorandom number. */
create_unique_nonce_data(u8 * buffer)253 static void create_unique_nonce_data(u8 *buffer)
254 {
255 	ktime_t now = current_time_ns(CLOCK_REALTIME);
256 	u32 rand;
257 	size_t offset = 0;
258 
259 	get_random_bytes(&rand, sizeof(u32));
260 	memcpy(buffer + offset, &now, sizeof(now));
261 	offset += sizeof(now);
262 	memcpy(buffer + offset, &rand, sizeof(rand));
263 	offset += sizeof(rand);
264 	while (offset < NONCE_INFO_SIZE) {
265 		size_t len = min(NONCE_INFO_SIZE - offset, offset);
266 
267 		memcpy(buffer + offset, buffer, len);
268 		offset += len;
269 	}
270 }
271 
hash_stuff(u64 start,const void * data,size_t len)272 static u64 hash_stuff(u64 start, const void *data, size_t len)
273 {
274 	u32 seed = start ^ (start >> 27);
275 	u8 hash_buffer[16];
276 
277 	murmurhash3_128(data, len, seed, hash_buffer);
278 	return get_unaligned_le64(hash_buffer + 4);
279 }
280 
281 /* Generate a primary nonce from the provided data. */
generate_primary_nonce(const void * data,size_t len)282 static u64 generate_primary_nonce(const void *data, size_t len)
283 {
284 	return hash_stuff(0xa1b1e0fc, data, len);
285 }
286 
287 /*
288  * Deterministically generate a secondary nonce from an existing nonce and some arbitrary data by
289  * hashing the original nonce and the data to produce a new nonce.
290  */
generate_secondary_nonce(u64 nonce,const void * data,size_t len)291 static u64 generate_secondary_nonce(u64 nonce, const void *data, size_t len)
292 {
293 	return hash_stuff(nonce + 1, data, len);
294 }
295 
open_layout_reader(struct index_layout * layout,struct layout_region * lr,off_t offset,struct buffered_reader ** reader_ptr)296 static int __must_check open_layout_reader(struct index_layout *layout,
297 					   struct layout_region *lr, off_t offset,
298 					   struct buffered_reader **reader_ptr)
299 {
300 	return uds_make_buffered_reader(layout->factory, lr->start_block + offset,
301 					lr->block_count, reader_ptr);
302 }
303 
open_region_reader(struct index_layout * layout,struct layout_region * region,struct buffered_reader ** reader_ptr)304 static int open_region_reader(struct index_layout *layout, struct layout_region *region,
305 			      struct buffered_reader **reader_ptr)
306 {
307 	return open_layout_reader(layout, region, -layout->super.start_offset,
308 				  reader_ptr);
309 }
310 
open_layout_writer(struct index_layout * layout,struct layout_region * lr,off_t offset,struct buffered_writer ** writer_ptr)311 static int __must_check open_layout_writer(struct index_layout *layout,
312 					   struct layout_region *lr, off_t offset,
313 					   struct buffered_writer **writer_ptr)
314 {
315 	return uds_make_buffered_writer(layout->factory, lr->start_block + offset,
316 					lr->block_count, writer_ptr);
317 }
318 
open_region_writer(struct index_layout * layout,struct layout_region * region,struct buffered_writer ** writer_ptr)319 static int open_region_writer(struct index_layout *layout, struct layout_region *region,
320 			      struct buffered_writer **writer_ptr)
321 {
322 	return open_layout_writer(layout, region, -layout->super.start_offset,
323 				  writer_ptr);
324 }
325 
generate_super_block_data(struct save_layout_sizes * sls,struct super_block_data * super)326 static void generate_super_block_data(struct save_layout_sizes *sls,
327 				      struct super_block_data *super)
328 {
329 	memset(super, 0, sizeof(*super));
330 	memcpy(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE);
331 	create_unique_nonce_data(super->nonce_info);
332 
333 	super->nonce = generate_primary_nonce(super->nonce_info,
334 					      sizeof(super->nonce_info));
335 	super->version = SUPER_VERSION_CURRENT;
336 	super->block_size = sls->block_size;
337 	super->index_count = 1;
338 	super->max_saves = sls->save_count;
339 	super->open_chapter_blocks = sls->open_chapter_blocks;
340 	super->page_map_blocks = sls->page_map_blocks;
341 	super->volume_offset = 0;
342 	super->start_offset = 0;
343 }
344 
define_sub_index_nonce(struct index_layout * layout)345 static void define_sub_index_nonce(struct index_layout *layout)
346 {
347 	struct sub_index_nonce_data {
348 		u64 offset;
349 		u16 index_id;
350 	};
351 	struct sub_index_layout *sil = &layout->index;
352 	u64 primary_nonce = layout->super.nonce;
353 	u8 buffer[sizeof(struct sub_index_nonce_data)] = { 0 };
354 	size_t offset = 0;
355 
356 	encode_u64_le(buffer, &offset, sil->sub_index.start_block);
357 	encode_u16_le(buffer, &offset, 0);
358 	sil->nonce = generate_secondary_nonce(primary_nonce, buffer, sizeof(buffer));
359 	if (sil->nonce == 0) {
360 		sil->nonce = generate_secondary_nonce(~primary_nonce + 1, buffer,
361 						      sizeof(buffer));
362 	}
363 }
364 
setup_sub_index(struct index_layout * layout,u64 start_block,struct save_layout_sizes * sls)365 static void setup_sub_index(struct index_layout *layout, u64 start_block,
366 			    struct save_layout_sizes *sls)
367 {
368 	struct sub_index_layout *sil = &layout->index;
369 	u64 next_block = start_block;
370 	unsigned int i;
371 
372 	sil->sub_index = (struct layout_region) {
373 		.start_block = start_block,
374 		.block_count = sls->sub_index_blocks,
375 		.kind = RL_KIND_INDEX,
376 		.instance = 0,
377 	};
378 
379 	sil->volume = (struct layout_region) {
380 		.start_block = next_block,
381 		.block_count = sls->volume_blocks,
382 		.kind = RL_KIND_VOLUME,
383 		.instance = RL_SOLE_INSTANCE,
384 	};
385 
386 	next_block += sls->volume_blocks;
387 
388 	for (i = 0; i < sls->save_count; i++) {
389 		sil->saves[i].index_save = (struct layout_region) {
390 			.start_block = next_block,
391 			.block_count = sls->save_blocks,
392 			.kind = RL_KIND_SAVE,
393 			.instance = i,
394 		};
395 
396 		next_block += sls->save_blocks;
397 	}
398 
399 	define_sub_index_nonce(layout);
400 }
401 
initialize_layout(struct index_layout * layout,struct save_layout_sizes * sls)402 static void initialize_layout(struct index_layout *layout, struct save_layout_sizes *sls)
403 {
404 	u64 next_block = layout->offset / sls->block_size;
405 
406 	layout->total_blocks = sls->total_blocks;
407 	generate_super_block_data(sls, &layout->super);
408 	layout->header = (struct layout_region) {
409 		.start_block = next_block++,
410 		.block_count = 1,
411 		.kind = RL_KIND_HEADER,
412 		.instance = RL_SOLE_INSTANCE,
413 	};
414 
415 	layout->config = (struct layout_region) {
416 		.start_block = next_block++,
417 		.block_count = 1,
418 		.kind = RL_KIND_CONFIG,
419 		.instance = RL_SOLE_INSTANCE,
420 	};
421 
422 	setup_sub_index(layout, next_block, sls);
423 	next_block += sls->sub_index_blocks;
424 
425 	layout->seal = (struct layout_region) {
426 		.start_block = next_block,
427 		.block_count = 1,
428 		.kind = RL_KIND_SEAL,
429 		.instance = RL_SOLE_INSTANCE,
430 	};
431 }
432 
make_index_save_region_table(struct index_save_layout * isl,struct region_table ** table_ptr)433 static int __must_check make_index_save_region_table(struct index_save_layout *isl,
434 						     struct region_table **table_ptr)
435 {
436 	int result;
437 	unsigned int z;
438 	struct region_table *table;
439 	struct layout_region *lr;
440 	u16 region_count;
441 	size_t payload;
442 	size_t type;
443 
444 	if (isl->zone_count > 0) {
445 		/*
446 		 * Normal save regions: header, page map, volume index zones,
447 		 * open chapter, and possibly free space.
448 		 */
449 		region_count = 3 + isl->zone_count;
450 		if (isl->free_space.block_count > 0)
451 			region_count++;
452 
453 		payload = sizeof(isl->save_data) + sizeof(isl->state_data);
454 		type = RH_TYPE_SAVE;
455 	} else {
456 		/* Empty save regions: header, page map, free space. */
457 		region_count = 3;
458 		payload = sizeof(isl->save_data);
459 		type = RH_TYPE_UNSAVED;
460 	}
461 
462 	result = vdo_allocate_extended(struct region_table, region_count,
463 				       struct layout_region,
464 				       "layout region table for ISL", &table);
465 	if (result != VDO_SUCCESS)
466 		return result;
467 
468 	lr = &table->regions[0];
469 	*lr++ = isl->header;
470 	*lr++ = isl->index_page_map;
471 	for (z = 0; z < isl->zone_count; z++)
472 		*lr++ = isl->volume_index_zones[z];
473 
474 	if (isl->zone_count > 0)
475 		*lr++ = isl->open_chapter;
476 
477 	if (isl->free_space.block_count > 0)
478 		*lr++ = isl->free_space;
479 
480 	table->header = (struct region_header) {
481 		.magic = REGION_MAGIC,
482 		.region_blocks = isl->index_save.block_count,
483 		.type = type,
484 		.version = 1,
485 		.region_count = region_count,
486 		.payload = payload,
487 	};
488 
489 	table->encoded_size = (sizeof(struct region_header) + payload +
490 			       region_count * sizeof(struct layout_region));
491 	*table_ptr = table;
492 	return UDS_SUCCESS;
493 }
494 
encode_region_table(u8 * buffer,size_t * offset,struct region_table * table)495 static void encode_region_table(u8 *buffer, size_t *offset, struct region_table *table)
496 {
497 	unsigned int i;
498 
499 	encode_u64_le(buffer, offset, REGION_MAGIC);
500 	encode_u64_le(buffer, offset, table->header.region_blocks);
501 	encode_u16_le(buffer, offset, table->header.type);
502 	encode_u16_le(buffer, offset, table->header.version);
503 	encode_u16_le(buffer, offset, table->header.region_count);
504 	encode_u16_le(buffer, offset, table->header.payload);
505 
506 	for (i = 0; i < table->header.region_count; i++) {
507 		encode_u64_le(buffer, offset, table->regions[i].start_block);
508 		encode_u64_le(buffer, offset, table->regions[i].block_count);
509 		encode_u32_le(buffer, offset, 0);
510 		encode_u16_le(buffer, offset, table->regions[i].kind);
511 		encode_u16_le(buffer, offset, table->regions[i].instance);
512 	}
513 }
514 
write_index_save_header(struct index_save_layout * isl,struct region_table * table,struct buffered_writer * writer)515 static int __must_check write_index_save_header(struct index_save_layout *isl,
516 						struct region_table *table,
517 						struct buffered_writer *writer)
518 {
519 	int result;
520 	u8 *buffer;
521 	size_t offset = 0;
522 
523 	result = vdo_allocate(table->encoded_size, u8, "index save data", &buffer);
524 	if (result != VDO_SUCCESS)
525 		return result;
526 
527 	encode_region_table(buffer, &offset, table);
528 	encode_u64_le(buffer, &offset, isl->save_data.timestamp);
529 	encode_u64_le(buffer, &offset, isl->save_data.nonce);
530 	encode_u32_le(buffer, &offset, isl->save_data.version);
531 	encode_u32_le(buffer, &offset, 0);
532 	if (isl->zone_count > 0) {
533 		encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.signature);
534 		encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.version_id);
535 		encode_u64_le(buffer, &offset, isl->state_data.newest_chapter);
536 		encode_u64_le(buffer, &offset, isl->state_data.oldest_chapter);
537 		encode_u64_le(buffer, &offset, isl->state_data.last_save);
538 		encode_u64_le(buffer, &offset, 0);
539 	}
540 
541 	result = uds_write_to_buffered_writer(writer, buffer, offset);
542 	vdo_free(buffer);
543 	if (result != UDS_SUCCESS)
544 		return result;
545 
546 	return uds_flush_buffered_writer(writer);
547 }
548 
write_index_save_layout(struct index_layout * layout,struct index_save_layout * isl)549 static int write_index_save_layout(struct index_layout *layout,
550 				   struct index_save_layout *isl)
551 {
552 	int result;
553 	struct region_table *table;
554 	struct buffered_writer *writer;
555 
556 	result = make_index_save_region_table(isl, &table);
557 	if (result != UDS_SUCCESS)
558 		return result;
559 
560 	result = open_region_writer(layout, &isl->header, &writer);
561 	if (result != UDS_SUCCESS) {
562 		vdo_free(table);
563 		return result;
564 	}
565 
566 	result = write_index_save_header(isl, table, writer);
567 	vdo_free(table);
568 	uds_free_buffered_writer(writer);
569 
570 	return result;
571 }
572 
reset_index_save_layout(struct index_save_layout * isl,u64 page_map_blocks)573 static void reset_index_save_layout(struct index_save_layout *isl, u64 page_map_blocks)
574 {
575 	u64 free_blocks;
576 	u64 next_block = isl->index_save.start_block;
577 
578 	isl->zone_count = 0;
579 	memset(&isl->save_data, 0, sizeof(isl->save_data));
580 
581 	isl->header = (struct layout_region) {
582 		.start_block = next_block++,
583 		.block_count = 1,
584 		.kind = RL_KIND_HEADER,
585 		.instance = RL_SOLE_INSTANCE,
586 	};
587 
588 	isl->index_page_map = (struct layout_region) {
589 		.start_block = next_block,
590 		.block_count = page_map_blocks,
591 		.kind = RL_KIND_INDEX_PAGE_MAP,
592 		.instance = RL_SOLE_INSTANCE,
593 	};
594 
595 	next_block += page_map_blocks;
596 
597 	free_blocks = isl->index_save.block_count - page_map_blocks - 1;
598 	isl->free_space = (struct layout_region) {
599 		.start_block = next_block,
600 		.block_count = free_blocks,
601 		.kind = RL_KIND_EMPTY,
602 		.instance = RL_SOLE_INSTANCE,
603 	};
604 }
605 
invalidate_old_save(struct index_layout * layout,struct index_save_layout * isl)606 static int __must_check invalidate_old_save(struct index_layout *layout,
607 					    struct index_save_layout *isl)
608 {
609 	reset_index_save_layout(isl, layout->super.page_map_blocks);
610 	return write_index_save_layout(layout, isl);
611 }
612 
discard_index_state_data(struct index_layout * layout)613 static int discard_index_state_data(struct index_layout *layout)
614 {
615 	int result;
616 	int saved_result = UDS_SUCCESS;
617 	unsigned int i;
618 
619 	for (i = 0; i < layout->super.max_saves; i++) {
620 		result = invalidate_old_save(layout, &layout->index.saves[i]);
621 		if (result != UDS_SUCCESS)
622 			saved_result = result;
623 	}
624 
625 	if (saved_result != UDS_SUCCESS) {
626 		return vdo_log_error_strerror(result,
627 					      "%s: cannot destroy all index saves",
628 					      __func__);
629 	}
630 
631 	return UDS_SUCCESS;
632 }
633 
make_layout_region_table(struct index_layout * layout,struct region_table ** table_ptr)634 static int __must_check make_layout_region_table(struct index_layout *layout,
635 						 struct region_table **table_ptr)
636 {
637 	int result;
638 	unsigned int i;
639 	/* Regions: header, config, index, volume, saves, seal */
640 	u16 region_count = 5 + layout->super.max_saves;
641 	u16 payload;
642 	struct region_table *table;
643 	struct layout_region *lr;
644 
645 	result = vdo_allocate_extended(struct region_table, region_count,
646 				       struct layout_region, "layout region table",
647 				       &table);
648 	if (result != VDO_SUCCESS)
649 		return result;
650 
651 	lr = &table->regions[0];
652 	*lr++ = layout->header;
653 	*lr++ = layout->config;
654 	*lr++ = layout->index.sub_index;
655 	*lr++ = layout->index.volume;
656 
657 	for (i = 0; i < layout->super.max_saves; i++)
658 		*lr++ = layout->index.saves[i].index_save;
659 
660 	*lr++ = layout->seal;
661 
662 	if (is_converted_super_block(&layout->super)) {
663 		payload = sizeof(struct super_block_data);
664 	} else {
665 		payload = (sizeof(struct super_block_data) -
666 			   sizeof(layout->super.volume_offset) -
667 			   sizeof(layout->super.start_offset));
668 	}
669 
670 	table->header = (struct region_header) {
671 		.magic = REGION_MAGIC,
672 		.region_blocks = layout->total_blocks,
673 		.type = RH_TYPE_SUPER,
674 		.version = 1,
675 		.region_count = region_count,
676 		.payload = payload,
677 	};
678 
679 	table->encoded_size = (sizeof(struct region_header) + payload +
680 			       region_count * sizeof(struct layout_region));
681 	*table_ptr = table;
682 	return UDS_SUCCESS;
683 }
684 
write_layout_header(struct index_layout * layout,struct region_table * table,struct buffered_writer * writer)685 static int __must_check write_layout_header(struct index_layout *layout,
686 					    struct region_table *table,
687 					    struct buffered_writer *writer)
688 {
689 	int result;
690 	u8 *buffer;
691 	size_t offset = 0;
692 
693 	result = vdo_allocate(table->encoded_size, u8, "layout data", &buffer);
694 	if (result != VDO_SUCCESS)
695 		return result;
696 
697 	encode_region_table(buffer, &offset, table);
698 	memcpy(buffer + offset, &layout->super.magic_label, MAGIC_SIZE);
699 	offset += MAGIC_SIZE;
700 	memcpy(buffer + offset, &layout->super.nonce_info, NONCE_INFO_SIZE);
701 	offset += NONCE_INFO_SIZE;
702 	encode_u64_le(buffer, &offset, layout->super.nonce);
703 	encode_u32_le(buffer, &offset, layout->super.version);
704 	encode_u32_le(buffer, &offset, layout->super.block_size);
705 	encode_u16_le(buffer, &offset, layout->super.index_count);
706 	encode_u16_le(buffer, &offset, layout->super.max_saves);
707 	encode_u32_le(buffer, &offset, 0);
708 	encode_u64_le(buffer, &offset, layout->super.open_chapter_blocks);
709 	encode_u64_le(buffer, &offset, layout->super.page_map_blocks);
710 
711 	if (is_converted_super_block(&layout->super)) {
712 		encode_u64_le(buffer, &offset, layout->super.volume_offset);
713 		encode_u64_le(buffer, &offset, layout->super.start_offset);
714 	}
715 
716 	result = uds_write_to_buffered_writer(writer, buffer, offset);
717 	vdo_free(buffer);
718 	if (result != UDS_SUCCESS)
719 		return result;
720 
721 	return uds_flush_buffered_writer(writer);
722 }
723 
write_uds_index_config(struct index_layout * layout,struct uds_configuration * config,off_t offset)724 static int __must_check write_uds_index_config(struct index_layout *layout,
725 					       struct uds_configuration *config,
726 					       off_t offset)
727 {
728 	int result;
729 	struct buffered_writer *writer = NULL;
730 
731 	result = open_layout_writer(layout, &layout->config, offset, &writer);
732 	if (result != UDS_SUCCESS)
733 		return vdo_log_error_strerror(result, "failed to open config region");
734 
735 	result = uds_write_config_contents(writer, config, layout->super.version);
736 	if (result != UDS_SUCCESS) {
737 		uds_free_buffered_writer(writer);
738 		return vdo_log_error_strerror(result, "failed to write config region");
739 	}
740 
741 	result = uds_flush_buffered_writer(writer);
742 	if (result != UDS_SUCCESS) {
743 		uds_free_buffered_writer(writer);
744 		return vdo_log_error_strerror(result, "cannot flush config writer");
745 	}
746 
747 	uds_free_buffered_writer(writer);
748 	return UDS_SUCCESS;
749 }
750 
save_layout(struct index_layout * layout,off_t offset)751 static int __must_check save_layout(struct index_layout *layout, off_t offset)
752 {
753 	int result;
754 	struct buffered_writer *writer = NULL;
755 	struct region_table *table;
756 
757 	result = make_layout_region_table(layout, &table);
758 	if (result != UDS_SUCCESS)
759 		return result;
760 
761 	result = open_layout_writer(layout, &layout->header, offset, &writer);
762 	if (result != UDS_SUCCESS) {
763 		vdo_free(table);
764 		return result;
765 	}
766 
767 	result = write_layout_header(layout, table, writer);
768 	vdo_free(table);
769 	uds_free_buffered_writer(writer);
770 
771 	return result;
772 }
773 
create_index_layout(struct index_layout * layout,struct uds_configuration * config)774 static int create_index_layout(struct index_layout *layout, struct uds_configuration *config)
775 {
776 	int result;
777 	struct save_layout_sizes sizes;
778 
779 	result = compute_sizes(config, &sizes);
780 	if (result != UDS_SUCCESS)
781 		return result;
782 
783 	result = vdo_allocate(sizes.save_count, struct index_save_layout, __func__,
784 			      &layout->index.saves);
785 	if (result != VDO_SUCCESS)
786 		return result;
787 
788 	initialize_layout(layout, &sizes);
789 
790 	result = discard_index_state_data(layout);
791 	if (result != UDS_SUCCESS)
792 		return result;
793 
794 	result = write_uds_index_config(layout, config, 0);
795 	if (result != UDS_SUCCESS)
796 		return result;
797 
798 	return save_layout(layout, 0);
799 }
800 
generate_index_save_nonce(u64 volume_nonce,struct index_save_layout * isl)801 static u64 generate_index_save_nonce(u64 volume_nonce, struct index_save_layout *isl)
802 {
803 	struct save_nonce_data {
804 		struct index_save_data data;
805 		u64 offset;
806 	} nonce_data;
807 	u8 buffer[sizeof(nonce_data)];
808 	size_t offset = 0;
809 
810 	encode_u64_le(buffer, &offset, isl->save_data.timestamp);
811 	encode_u64_le(buffer, &offset, 0);
812 	encode_u32_le(buffer, &offset, isl->save_data.version);
813 	encode_u32_le(buffer, &offset, 0U);
814 	encode_u64_le(buffer, &offset, isl->index_save.start_block);
815 	VDO_ASSERT_LOG_ONLY(offset == sizeof(nonce_data),
816 			    "%zu bytes encoded of %zu expected",
817 			    offset, sizeof(nonce_data));
818 	return generate_secondary_nonce(volume_nonce, buffer, sizeof(buffer));
819 }
820 
validate_index_save_layout(struct index_save_layout * isl,u64 volume_nonce)821 static u64 validate_index_save_layout(struct index_save_layout *isl, u64 volume_nonce)
822 {
823 	if ((isl->zone_count == 0) || (isl->save_data.timestamp == 0))
824 		return 0;
825 
826 	if (isl->save_data.nonce != generate_index_save_nonce(volume_nonce, isl))
827 		return 0;
828 
829 	return isl->save_data.timestamp;
830 }
831 
find_latest_uds_index_save_slot(struct index_layout * layout,struct index_save_layout ** isl_ptr)832 static int find_latest_uds_index_save_slot(struct index_layout *layout,
833 					   struct index_save_layout **isl_ptr)
834 {
835 	struct index_save_layout *latest = NULL;
836 	struct index_save_layout *isl;
837 	unsigned int i;
838 	u64 save_time = 0;
839 	u64 latest_time = 0;
840 
841 	for (i = 0; i < layout->super.max_saves; i++) {
842 		isl = &layout->index.saves[i];
843 		save_time = validate_index_save_layout(isl, layout->index.nonce);
844 		if (save_time > latest_time) {
845 			latest = isl;
846 			latest_time = save_time;
847 		}
848 	}
849 
850 	if (latest == NULL) {
851 		vdo_log_error("No valid index save found");
852 		return UDS_INDEX_NOT_SAVED_CLEANLY;
853 	}
854 
855 	*isl_ptr = latest;
856 	return UDS_SUCCESS;
857 }
858 
uds_discard_open_chapter(struct index_layout * layout)859 int uds_discard_open_chapter(struct index_layout *layout)
860 {
861 	int result;
862 	struct index_save_layout *isl;
863 	struct buffered_writer *writer;
864 
865 	result = find_latest_uds_index_save_slot(layout, &isl);
866 	if (result != UDS_SUCCESS)
867 		return result;
868 
869 	result = open_region_writer(layout, &isl->open_chapter, &writer);
870 	if (result != UDS_SUCCESS)
871 		return result;
872 
873 	result = uds_write_to_buffered_writer(writer, NULL, UDS_BLOCK_SIZE);
874 	if (result != UDS_SUCCESS) {
875 		uds_free_buffered_writer(writer);
876 		return result;
877 	}
878 
879 	result = uds_flush_buffered_writer(writer);
880 	uds_free_buffered_writer(writer);
881 	return result;
882 }
883 
uds_load_index_state(struct index_layout * layout,struct uds_index * index)884 int uds_load_index_state(struct index_layout *layout, struct uds_index *index)
885 {
886 	int result;
887 	unsigned int zone;
888 	struct index_save_layout *isl;
889 	struct buffered_reader *readers[MAX_ZONES];
890 
891 	result = find_latest_uds_index_save_slot(layout, &isl);
892 	if (result != UDS_SUCCESS)
893 		return result;
894 
895 	index->newest_virtual_chapter = isl->state_data.newest_chapter;
896 	index->oldest_virtual_chapter = isl->state_data.oldest_chapter;
897 	index->last_save = isl->state_data.last_save;
898 
899 	result = open_region_reader(layout, &isl->open_chapter, &readers[0]);
900 	if (result != UDS_SUCCESS)
901 		return result;
902 
903 	result = uds_load_open_chapter(index, readers[0]);
904 	uds_free_buffered_reader(readers[0]);
905 	if (result != UDS_SUCCESS)
906 		return result;
907 
908 	for (zone = 0; zone < isl->zone_count; zone++) {
909 		result = open_region_reader(layout, &isl->volume_index_zones[zone],
910 					    &readers[zone]);
911 		if (result != UDS_SUCCESS) {
912 			for (; zone > 0; zone--)
913 				uds_free_buffered_reader(readers[zone - 1]);
914 
915 			return result;
916 		}
917 	}
918 
919 	result = uds_load_volume_index(index->volume_index, readers, isl->zone_count);
920 	for (zone = 0; zone < isl->zone_count; zone++)
921 		uds_free_buffered_reader(readers[zone]);
922 	if (result != UDS_SUCCESS)
923 		return result;
924 
925 	result = open_region_reader(layout, &isl->index_page_map, &readers[0]);
926 	if (result != UDS_SUCCESS)
927 		return result;
928 
929 	result = uds_read_index_page_map(index->volume->index_page_map, readers[0]);
930 	uds_free_buffered_reader(readers[0]);
931 
932 	return result;
933 }
934 
select_oldest_index_save_layout(struct index_layout * layout)935 static struct index_save_layout *select_oldest_index_save_layout(struct index_layout *layout)
936 {
937 	struct index_save_layout *oldest = NULL;
938 	struct index_save_layout *isl;
939 	unsigned int i;
940 	u64 save_time = 0;
941 	u64 oldest_time = 0;
942 
943 	for (i = 0; i < layout->super.max_saves; i++) {
944 		isl = &layout->index.saves[i];
945 		save_time = validate_index_save_layout(isl, layout->index.nonce);
946 		if (oldest == NULL || save_time < oldest_time) {
947 			oldest = isl;
948 			oldest_time = save_time;
949 		}
950 	}
951 
952 	return oldest;
953 }
954 
instantiate_index_save_layout(struct index_save_layout * isl,struct super_block_data * super,u64 volume_nonce,unsigned int zone_count)955 static void instantiate_index_save_layout(struct index_save_layout *isl,
956 					  struct super_block_data *super,
957 					  u64 volume_nonce, unsigned int zone_count)
958 {
959 	unsigned int z;
960 	u64 next_block;
961 	u64 free_blocks;
962 	u64 volume_index_blocks;
963 
964 	isl->zone_count = zone_count;
965 	memset(&isl->save_data, 0, sizeof(isl->save_data));
966 	isl->save_data.timestamp = ktime_to_ms(current_time_ns(CLOCK_REALTIME));
967 	isl->save_data.version = 1;
968 	isl->save_data.nonce = generate_index_save_nonce(volume_nonce, isl);
969 
970 	next_block = isl->index_save.start_block;
971 	isl->header = (struct layout_region) {
972 		.start_block = next_block++,
973 		.block_count = 1,
974 		.kind = RL_KIND_HEADER,
975 		.instance = RL_SOLE_INSTANCE,
976 	};
977 
978 	isl->index_page_map = (struct layout_region) {
979 		.start_block = next_block,
980 		.block_count = super->page_map_blocks,
981 		.kind = RL_KIND_INDEX_PAGE_MAP,
982 		.instance = RL_SOLE_INSTANCE,
983 	};
984 	next_block += super->page_map_blocks;
985 
986 	free_blocks = (isl->index_save.block_count - 1 -
987 		       super->page_map_blocks -
988 		       super->open_chapter_blocks);
989 	volume_index_blocks = free_blocks / isl->zone_count;
990 	for (z = 0; z < isl->zone_count; z++) {
991 		isl->volume_index_zones[z] = (struct layout_region) {
992 			.start_block = next_block,
993 			.block_count = volume_index_blocks,
994 			.kind = RL_KIND_VOLUME_INDEX,
995 			.instance = z,
996 		};
997 
998 		next_block += volume_index_blocks;
999 		free_blocks -= volume_index_blocks;
1000 	}
1001 
1002 	isl->open_chapter = (struct layout_region) {
1003 		.start_block = next_block,
1004 		.block_count = super->open_chapter_blocks,
1005 		.kind = RL_KIND_OPEN_CHAPTER,
1006 		.instance = RL_SOLE_INSTANCE,
1007 	};
1008 
1009 	next_block += super->open_chapter_blocks;
1010 
1011 	isl->free_space = (struct layout_region) {
1012 		.start_block = next_block,
1013 		.block_count = free_blocks,
1014 		.kind = RL_KIND_EMPTY,
1015 		.instance = RL_SOLE_INSTANCE,
1016 	};
1017 }
1018 
setup_uds_index_save_slot(struct index_layout * layout,unsigned int zone_count,struct index_save_layout ** isl_ptr)1019 static int setup_uds_index_save_slot(struct index_layout *layout,
1020 				     unsigned int zone_count,
1021 				     struct index_save_layout **isl_ptr)
1022 {
1023 	int result;
1024 	struct index_save_layout *isl;
1025 
1026 	isl = select_oldest_index_save_layout(layout);
1027 	result = invalidate_old_save(layout, isl);
1028 	if (result != UDS_SUCCESS)
1029 		return result;
1030 
1031 	instantiate_index_save_layout(isl, &layout->super, layout->index.nonce,
1032 				      zone_count);
1033 
1034 	*isl_ptr = isl;
1035 	return UDS_SUCCESS;
1036 }
1037 
cancel_uds_index_save(struct index_save_layout * isl)1038 static void cancel_uds_index_save(struct index_save_layout *isl)
1039 {
1040 	memset(&isl->save_data, 0, sizeof(isl->save_data));
1041 	memset(&isl->state_data, 0, sizeof(isl->state_data));
1042 	isl->zone_count = 0;
1043 }
1044 
uds_save_index_state(struct index_layout * layout,struct uds_index * index)1045 int uds_save_index_state(struct index_layout *layout, struct uds_index *index)
1046 {
1047 	int result;
1048 	unsigned int zone;
1049 	struct index_save_layout *isl;
1050 	struct buffered_writer *writers[MAX_ZONES];
1051 
1052 	result = setup_uds_index_save_slot(layout, index->zone_count, &isl);
1053 	if (result != UDS_SUCCESS)
1054 		return result;
1055 
1056 	isl->state_data	= (struct index_state_data301) {
1057 		.newest_chapter = index->newest_virtual_chapter,
1058 		.oldest_chapter = index->oldest_virtual_chapter,
1059 		.last_save = index->last_save,
1060 	};
1061 
1062 	result = open_region_writer(layout, &isl->open_chapter, &writers[0]);
1063 	if (result != UDS_SUCCESS) {
1064 		cancel_uds_index_save(isl);
1065 		return result;
1066 	}
1067 
1068 	result = uds_save_open_chapter(index, writers[0]);
1069 	uds_free_buffered_writer(writers[0]);
1070 	if (result != UDS_SUCCESS) {
1071 		cancel_uds_index_save(isl);
1072 		return result;
1073 	}
1074 
1075 	for (zone = 0; zone < index->zone_count; zone++) {
1076 		result = open_region_writer(layout, &isl->volume_index_zones[zone],
1077 					    &writers[zone]);
1078 		if (result != UDS_SUCCESS) {
1079 			for (; zone > 0; zone--)
1080 				uds_free_buffered_writer(writers[zone - 1]);
1081 
1082 			cancel_uds_index_save(isl);
1083 			return result;
1084 		}
1085 	}
1086 
1087 	result = uds_save_volume_index(index->volume_index, writers, index->zone_count);
1088 	for (zone = 0; zone < index->zone_count; zone++)
1089 		uds_free_buffered_writer(writers[zone]);
1090 	if (result != UDS_SUCCESS) {
1091 		cancel_uds_index_save(isl);
1092 		return result;
1093 	}
1094 
1095 	result = open_region_writer(layout, &isl->index_page_map, &writers[0]);
1096 	if (result != UDS_SUCCESS) {
1097 		cancel_uds_index_save(isl);
1098 		return result;
1099 	}
1100 
1101 	result = uds_write_index_page_map(index->volume->index_page_map, writers[0]);
1102 	uds_free_buffered_writer(writers[0]);
1103 	if (result != UDS_SUCCESS) {
1104 		cancel_uds_index_save(isl);
1105 		return result;
1106 	}
1107 
1108 	return write_index_save_layout(layout, isl);
1109 }
1110 
load_region_table(struct buffered_reader * reader,struct region_table ** table_ptr)1111 static int __must_check load_region_table(struct buffered_reader *reader,
1112 					  struct region_table **table_ptr)
1113 {
1114 	int result;
1115 	unsigned int i;
1116 	struct region_header header;
1117 	struct region_table *table;
1118 	u8 buffer[sizeof(struct region_header)];
1119 	size_t offset = 0;
1120 
1121 	result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1122 	if (result != UDS_SUCCESS)
1123 		return vdo_log_error_strerror(result, "cannot read region table header");
1124 
1125 	decode_u64_le(buffer, &offset, &header.magic);
1126 	decode_u64_le(buffer, &offset, &header.region_blocks);
1127 	decode_u16_le(buffer, &offset, &header.type);
1128 	decode_u16_le(buffer, &offset, &header.version);
1129 	decode_u16_le(buffer, &offset, &header.region_count);
1130 	decode_u16_le(buffer, &offset, &header.payload);
1131 
1132 	if (header.magic != REGION_MAGIC)
1133 		return UDS_NO_INDEX;
1134 
1135 	if (header.version != 1) {
1136 		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1137 					      "unknown region table version %hu",
1138 					      header.version);
1139 	}
1140 
1141 	result = vdo_allocate_extended(struct region_table, header.region_count,
1142 				       struct layout_region,
1143 				       "single file layout region table", &table);
1144 	if (result != VDO_SUCCESS)
1145 		return result;
1146 
1147 	table->header = header;
1148 	for (i = 0; i < header.region_count; i++) {
1149 		u8 region_buffer[sizeof(struct layout_region)];
1150 
1151 		offset = 0;
1152 		result = uds_read_from_buffered_reader(reader, region_buffer,
1153 						       sizeof(region_buffer));
1154 		if (result != UDS_SUCCESS) {
1155 			vdo_free(table);
1156 			return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1157 						      "cannot read region table layouts");
1158 		}
1159 
1160 		decode_u64_le(region_buffer, &offset, &table->regions[i].start_block);
1161 		decode_u64_le(region_buffer, &offset, &table->regions[i].block_count);
1162 		offset += sizeof(u32);
1163 		decode_u16_le(region_buffer, &offset, &table->regions[i].kind);
1164 		decode_u16_le(region_buffer, &offset, &table->regions[i].instance);
1165 	}
1166 
1167 	*table_ptr = table;
1168 	return UDS_SUCCESS;
1169 }
1170 
read_super_block_data(struct buffered_reader * reader,struct index_layout * layout,size_t saved_size)1171 static int __must_check read_super_block_data(struct buffered_reader *reader,
1172 					      struct index_layout *layout,
1173 					      size_t saved_size)
1174 {
1175 	int result;
1176 	struct super_block_data *super = &layout->super;
1177 	u8 *buffer;
1178 	size_t offset = 0;
1179 
1180 	result = vdo_allocate(saved_size, u8, "super block data", &buffer);
1181 	if (result != VDO_SUCCESS)
1182 		return result;
1183 
1184 	result = uds_read_from_buffered_reader(reader, buffer, saved_size);
1185 	if (result != UDS_SUCCESS) {
1186 		vdo_free(buffer);
1187 		return vdo_log_error_strerror(result, "cannot read region table header");
1188 	}
1189 
1190 	memcpy(&super->magic_label, buffer, MAGIC_SIZE);
1191 	offset += MAGIC_SIZE;
1192 	memcpy(&super->nonce_info, buffer + offset, NONCE_INFO_SIZE);
1193 	offset += NONCE_INFO_SIZE;
1194 	decode_u64_le(buffer, &offset, &super->nonce);
1195 	decode_u32_le(buffer, &offset, &super->version);
1196 	decode_u32_le(buffer, &offset, &super->block_size);
1197 	decode_u16_le(buffer, &offset, &super->index_count);
1198 	decode_u16_le(buffer, &offset, &super->max_saves);
1199 	offset += sizeof(u32);
1200 	decode_u64_le(buffer, &offset, &super->open_chapter_blocks);
1201 	decode_u64_le(buffer, &offset, &super->page_map_blocks);
1202 
1203 	if (is_converted_super_block(super)) {
1204 		decode_u64_le(buffer, &offset, &super->volume_offset);
1205 		decode_u64_le(buffer, &offset, &super->start_offset);
1206 	} else {
1207 		super->volume_offset = 0;
1208 		super->start_offset = 0;
1209 	}
1210 
1211 	vdo_free(buffer);
1212 
1213 	if (memcmp(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE) != 0)
1214 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1215 					      "unknown superblock magic label");
1216 
1217 	if ((super->version < SUPER_VERSION_MINIMUM) ||
1218 	    (super->version == 4) || (super->version == 5) || (super->version == 6) ||
1219 	    (super->version > SUPER_VERSION_MAXIMUM)) {
1220 		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1221 					      "unknown superblock version number %u",
1222 					      super->version);
1223 	}
1224 
1225 	if (super->volume_offset < super->start_offset) {
1226 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1227 					      "inconsistent offsets (start %llu, volume %llu)",
1228 					      (unsigned long long) super->start_offset,
1229 					      (unsigned long long) super->volume_offset);
1230 	}
1231 
1232 	/* Sub-indexes are no longer used but the layout retains this field. */
1233 	if (super->index_count != 1) {
1234 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1235 					      "invalid subindex count %u",
1236 					      super->index_count);
1237 	}
1238 
1239 	if (generate_primary_nonce(super->nonce_info, sizeof(super->nonce_info)) != super->nonce) {
1240 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1241 					      "inconsistent superblock nonce");
1242 	}
1243 
1244 	return UDS_SUCCESS;
1245 }
1246 
verify_region(struct layout_region * lr,u64 start_block,enum region_kind kind,unsigned int instance)1247 static int __must_check verify_region(struct layout_region *lr, u64 start_block,
1248 				      enum region_kind kind, unsigned int instance)
1249 {
1250 	if (lr->start_block != start_block)
1251 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1252 					      "incorrect layout region offset");
1253 
1254 	if (lr->kind != kind)
1255 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1256 					      "incorrect layout region kind");
1257 
1258 	if (lr->instance != instance) {
1259 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1260 					      "incorrect layout region instance");
1261 	}
1262 
1263 	return UDS_SUCCESS;
1264 }
1265 
verify_sub_index(struct index_layout * layout,u64 start_block,struct region_table * table)1266 static int __must_check verify_sub_index(struct index_layout *layout, u64 start_block,
1267 					 struct region_table *table)
1268 {
1269 	int result;
1270 	unsigned int i;
1271 	struct sub_index_layout *sil = &layout->index;
1272 	u64 next_block = start_block;
1273 
1274 	sil->sub_index = table->regions[2];
1275 	result = verify_region(&sil->sub_index, next_block, RL_KIND_INDEX, 0);
1276 	if (result != UDS_SUCCESS)
1277 		return result;
1278 
1279 	define_sub_index_nonce(layout);
1280 
1281 	sil->volume = table->regions[3];
1282 	result = verify_region(&sil->volume, next_block, RL_KIND_VOLUME,
1283 			       RL_SOLE_INSTANCE);
1284 	if (result != UDS_SUCCESS)
1285 		return result;
1286 
1287 	next_block += sil->volume.block_count + layout->super.volume_offset;
1288 
1289 	for (i = 0; i < layout->super.max_saves; i++) {
1290 		sil->saves[i].index_save = table->regions[i + 4];
1291 		result = verify_region(&sil->saves[i].index_save, next_block,
1292 				       RL_KIND_SAVE, i);
1293 		if (result != UDS_SUCCESS)
1294 			return result;
1295 
1296 		next_block += sil->saves[i].index_save.block_count;
1297 	}
1298 
1299 	next_block -= layout->super.volume_offset;
1300 	if (next_block != start_block + sil->sub_index.block_count) {
1301 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1302 					      "sub index region does not span all saves");
1303 	}
1304 
1305 	return UDS_SUCCESS;
1306 }
1307 
reconstitute_layout(struct index_layout * layout,struct region_table * table,u64 first_block)1308 static int __must_check reconstitute_layout(struct index_layout *layout,
1309 					    struct region_table *table, u64 first_block)
1310 {
1311 	int result;
1312 	u64 next_block = first_block;
1313 
1314 	result = vdo_allocate(layout->super.max_saves, struct index_save_layout,
1315 			      __func__, &layout->index.saves);
1316 	if (result != VDO_SUCCESS)
1317 		return result;
1318 
1319 	layout->total_blocks = table->header.region_blocks;
1320 
1321 	layout->header = table->regions[0];
1322 	result = verify_region(&layout->header, next_block++, RL_KIND_HEADER,
1323 			       RL_SOLE_INSTANCE);
1324 	if (result != UDS_SUCCESS)
1325 		return result;
1326 
1327 	layout->config = table->regions[1];
1328 	result = verify_region(&layout->config, next_block++, RL_KIND_CONFIG,
1329 			       RL_SOLE_INSTANCE);
1330 	if (result != UDS_SUCCESS)
1331 		return result;
1332 
1333 	result = verify_sub_index(layout, next_block, table);
1334 	if (result != UDS_SUCCESS)
1335 		return result;
1336 
1337 	next_block += layout->index.sub_index.block_count;
1338 
1339 	layout->seal = table->regions[table->header.region_count - 1];
1340 	result = verify_region(&layout->seal, next_block + layout->super.volume_offset,
1341 			       RL_KIND_SEAL, RL_SOLE_INSTANCE);
1342 	if (result != UDS_SUCCESS)
1343 		return result;
1344 
1345 	if (++next_block != (first_block + layout->total_blocks)) {
1346 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1347 					      "layout table does not span total blocks");
1348 	}
1349 
1350 	return UDS_SUCCESS;
1351 }
1352 
load_super_block(struct index_layout * layout,size_t block_size,u64 first_block,struct buffered_reader * reader)1353 static int __must_check load_super_block(struct index_layout *layout, size_t block_size,
1354 					 u64 first_block, struct buffered_reader *reader)
1355 {
1356 	int result;
1357 	struct region_table *table = NULL;
1358 	struct super_block_data *super = &layout->super;
1359 
1360 	result = load_region_table(reader, &table);
1361 	if (result != UDS_SUCCESS)
1362 		return result;
1363 
1364 	if (table->header.type != RH_TYPE_SUPER) {
1365 		vdo_free(table);
1366 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1367 					      "not a superblock region table");
1368 	}
1369 
1370 	result = read_super_block_data(reader, layout, table->header.payload);
1371 	if (result != UDS_SUCCESS) {
1372 		vdo_free(table);
1373 		return vdo_log_error_strerror(result, "unknown superblock format");
1374 	}
1375 
1376 	if (super->block_size != block_size) {
1377 		vdo_free(table);
1378 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1379 					      "superblock saved block_size %u differs from supplied block_size %zu",
1380 					      super->block_size, block_size);
1381 	}
1382 
1383 	first_block -= (super->volume_offset - super->start_offset);
1384 	result = reconstitute_layout(layout, table, first_block);
1385 	vdo_free(table);
1386 	return result;
1387 }
1388 
read_index_save_data(struct buffered_reader * reader,struct index_save_layout * isl,size_t saved_size)1389 static int __must_check read_index_save_data(struct buffered_reader *reader,
1390 					     struct index_save_layout *isl,
1391 					     size_t saved_size)
1392 {
1393 	int result;
1394 	struct index_state_version file_version;
1395 	u8 buffer[sizeof(struct index_save_data) + sizeof(struct index_state_data301)];
1396 	size_t offset = 0;
1397 
1398 	if (saved_size != sizeof(buffer)) {
1399 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1400 					      "unexpected index save data size %zu",
1401 					      saved_size);
1402 	}
1403 
1404 	result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1405 	if (result != UDS_SUCCESS)
1406 		return vdo_log_error_strerror(result, "cannot read index save data");
1407 
1408 	decode_u64_le(buffer, &offset, &isl->save_data.timestamp);
1409 	decode_u64_le(buffer, &offset, &isl->save_data.nonce);
1410 	decode_u32_le(buffer, &offset, &isl->save_data.version);
1411 	offset += sizeof(u32);
1412 
1413 	if (isl->save_data.version > 1) {
1414 		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1415 					      "unknown index save version number %u",
1416 					      isl->save_data.version);
1417 	}
1418 
1419 	decode_s32_le(buffer, &offset, &file_version.signature);
1420 	decode_s32_le(buffer, &offset, &file_version.version_id);
1421 
1422 	if ((file_version.signature != INDEX_STATE_VERSION_301.signature) ||
1423 	    (file_version.version_id != INDEX_STATE_VERSION_301.version_id)) {
1424 		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1425 					      "index state version %d,%d is unsupported",
1426 					      file_version.signature,
1427 					      file_version.version_id);
1428 	}
1429 
1430 	decode_u64_le(buffer, &offset, &isl->state_data.newest_chapter);
1431 	decode_u64_le(buffer, &offset, &isl->state_data.oldest_chapter);
1432 	decode_u64_le(buffer, &offset, &isl->state_data.last_save);
1433 	/* Skip past some historical fields that are now unused */
1434 	offset += sizeof(u32) + sizeof(u32);
1435 	return UDS_SUCCESS;
1436 }
1437 
reconstruct_index_save(struct index_save_layout * isl,struct region_table * table)1438 static int __must_check reconstruct_index_save(struct index_save_layout *isl,
1439 					       struct region_table *table)
1440 {
1441 	int result;
1442 	unsigned int z;
1443 	struct layout_region *last_region;
1444 	u64 next_block = isl->index_save.start_block;
1445 	u64 last_block = next_block + isl->index_save.block_count;
1446 
1447 	isl->zone_count = table->header.region_count - 3;
1448 
1449 	last_region = &table->regions[table->header.region_count - 1];
1450 	if (last_region->kind == RL_KIND_EMPTY) {
1451 		isl->free_space = *last_region;
1452 		isl->zone_count--;
1453 	} else {
1454 		isl->free_space = (struct layout_region) {
1455 			.start_block = last_block,
1456 			.block_count = 0,
1457 			.kind = RL_KIND_EMPTY,
1458 			.instance = RL_SOLE_INSTANCE,
1459 		};
1460 	}
1461 
1462 	isl->header = table->regions[0];
1463 	result = verify_region(&isl->header, next_block++, RL_KIND_HEADER,
1464 			       RL_SOLE_INSTANCE);
1465 	if (result != UDS_SUCCESS)
1466 		return result;
1467 
1468 	isl->index_page_map = table->regions[1];
1469 	result = verify_region(&isl->index_page_map, next_block, RL_KIND_INDEX_PAGE_MAP,
1470 			       RL_SOLE_INSTANCE);
1471 	if (result != UDS_SUCCESS)
1472 		return result;
1473 
1474 	next_block += isl->index_page_map.block_count;
1475 
1476 	for (z = 0; z < isl->zone_count; z++) {
1477 		isl->volume_index_zones[z] = table->regions[z + 2];
1478 		result = verify_region(&isl->volume_index_zones[z], next_block,
1479 				       RL_KIND_VOLUME_INDEX, z);
1480 		if (result != UDS_SUCCESS)
1481 			return result;
1482 
1483 		next_block += isl->volume_index_zones[z].block_count;
1484 	}
1485 
1486 	isl->open_chapter = table->regions[isl->zone_count + 2];
1487 	result = verify_region(&isl->open_chapter, next_block, RL_KIND_OPEN_CHAPTER,
1488 			       RL_SOLE_INSTANCE);
1489 	if (result != UDS_SUCCESS)
1490 		return result;
1491 
1492 	next_block += isl->open_chapter.block_count;
1493 
1494 	result = verify_region(&isl->free_space, next_block, RL_KIND_EMPTY,
1495 			       RL_SOLE_INSTANCE);
1496 	if (result != UDS_SUCCESS)
1497 		return result;
1498 
1499 	next_block += isl->free_space.block_count;
1500 	if (next_block != last_block) {
1501 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1502 					      "index save layout table incomplete");
1503 	}
1504 
1505 	return UDS_SUCCESS;
1506 }
1507 
load_index_save(struct index_save_layout * isl,struct buffered_reader * reader,unsigned int instance)1508 static int __must_check load_index_save(struct index_save_layout *isl,
1509 					struct buffered_reader *reader,
1510 					unsigned int instance)
1511 {
1512 	int result;
1513 	struct region_table *table = NULL;
1514 
1515 	result = load_region_table(reader, &table);
1516 	if (result != UDS_SUCCESS) {
1517 		return vdo_log_error_strerror(result, "cannot read index save %u header",
1518 					      instance);
1519 	}
1520 
1521 	if (table->header.region_blocks != isl->index_save.block_count) {
1522 		u64 region_blocks = table->header.region_blocks;
1523 
1524 		vdo_free(table);
1525 		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1526 					      "unexpected index save %u region block count %llu",
1527 					      instance,
1528 					      (unsigned long long) region_blocks);
1529 	}
1530 
1531 	if (table->header.type == RH_TYPE_UNSAVED) {
1532 		vdo_free(table);
1533 		reset_index_save_layout(isl, 0);
1534 		return UDS_SUCCESS;
1535 	}
1536 
1537 
1538 	if (table->header.type != RH_TYPE_SAVE) {
1539 		vdo_log_error_strerror(UDS_CORRUPT_DATA,
1540 				       "unexpected index save %u header type %u",
1541 				       instance, table->header.type);
1542 		vdo_free(table);
1543 		return UDS_CORRUPT_DATA;
1544 	}
1545 
1546 	result = read_index_save_data(reader, isl, table->header.payload);
1547 	if (result != UDS_SUCCESS) {
1548 		vdo_free(table);
1549 		return vdo_log_error_strerror(result,
1550 					      "unknown index save %u data format",
1551 					      instance);
1552 	}
1553 
1554 	result = reconstruct_index_save(isl, table);
1555 	vdo_free(table);
1556 	if (result != UDS_SUCCESS) {
1557 		return vdo_log_error_strerror(result, "cannot reconstruct index save %u",
1558 					      instance);
1559 	}
1560 
1561 	return UDS_SUCCESS;
1562 }
1563 
load_sub_index_regions(struct index_layout * layout)1564 static int __must_check load_sub_index_regions(struct index_layout *layout)
1565 {
1566 	int result;
1567 	unsigned int j;
1568 	struct index_save_layout *isl;
1569 	struct buffered_reader *reader;
1570 
1571 	for (j = 0; j < layout->super.max_saves; j++) {
1572 		isl = &layout->index.saves[j];
1573 		result = open_region_reader(layout, &isl->index_save, &reader);
1574 
1575 		if (result != UDS_SUCCESS) {
1576 			vdo_log_error_strerror(result,
1577 					       "cannot get reader for index 0 save %u",
1578 					       j);
1579 			return result;
1580 		}
1581 
1582 		result = load_index_save(isl, reader, j);
1583 		uds_free_buffered_reader(reader);
1584 		if (result != UDS_SUCCESS) {
1585 			/* Another save slot might be valid. */
1586 			reset_index_save_layout(isl, 0);
1587 			continue;
1588 		}
1589 	}
1590 
1591 	return UDS_SUCCESS;
1592 }
1593 
verify_uds_index_config(struct index_layout * layout,struct uds_configuration * config)1594 static int __must_check verify_uds_index_config(struct index_layout *layout,
1595 						struct uds_configuration *config)
1596 {
1597 	int result;
1598 	struct buffered_reader *reader = NULL;
1599 	u64 offset;
1600 
1601 	offset = layout->super.volume_offset - layout->super.start_offset;
1602 	result = open_layout_reader(layout, &layout->config, offset, &reader);
1603 	if (result != UDS_SUCCESS)
1604 		return vdo_log_error_strerror(result, "failed to open config reader");
1605 
1606 	result = uds_validate_config_contents(reader, config);
1607 	if (result != UDS_SUCCESS) {
1608 		uds_free_buffered_reader(reader);
1609 		return vdo_log_error_strerror(result, "failed to read config region");
1610 	}
1611 
1612 	uds_free_buffered_reader(reader);
1613 	return UDS_SUCCESS;
1614 }
1615 
load_index_layout(struct index_layout * layout,struct uds_configuration * config)1616 static int load_index_layout(struct index_layout *layout, struct uds_configuration *config)
1617 {
1618 	int result;
1619 	struct buffered_reader *reader;
1620 
1621 	result = uds_make_buffered_reader(layout->factory,
1622 					  layout->offset / UDS_BLOCK_SIZE, 1, &reader);
1623 	if (result != UDS_SUCCESS)
1624 		return vdo_log_error_strerror(result, "unable to read superblock");
1625 
1626 	result = load_super_block(layout, UDS_BLOCK_SIZE,
1627 				  layout->offset / UDS_BLOCK_SIZE, reader);
1628 	uds_free_buffered_reader(reader);
1629 	if (result != UDS_SUCCESS)
1630 		return result;
1631 
1632 	result = verify_uds_index_config(layout, config);
1633 	if (result != UDS_SUCCESS)
1634 		return result;
1635 
1636 	return load_sub_index_regions(layout);
1637 }
1638 
create_layout_factory(struct index_layout * layout,const struct uds_configuration * config)1639 static int create_layout_factory(struct index_layout *layout,
1640 				 const struct uds_configuration *config)
1641 {
1642 	int result;
1643 	size_t writable_size;
1644 	struct io_factory *factory = NULL;
1645 
1646 	result = uds_make_io_factory(config->bdev, &factory);
1647 	if (result != UDS_SUCCESS)
1648 		return result;
1649 
1650 	writable_size = uds_get_writable_size(factory) & -UDS_BLOCK_SIZE;
1651 	if (writable_size < config->size + config->offset) {
1652 		uds_put_io_factory(factory);
1653 		vdo_log_error("index storage (%zu) is smaller than the requested size %zu",
1654 			      writable_size, config->size + config->offset);
1655 		return -ENOSPC;
1656 	}
1657 
1658 	layout->factory = factory;
1659 	layout->factory_size = (config->size > 0) ? config->size : writable_size;
1660 	layout->offset = config->offset;
1661 	return UDS_SUCCESS;
1662 }
1663 
uds_make_index_layout(struct uds_configuration * config,bool new_layout,struct index_layout ** layout_ptr)1664 int uds_make_index_layout(struct uds_configuration *config, bool new_layout,
1665 			  struct index_layout **layout_ptr)
1666 {
1667 	int result;
1668 	struct index_layout *layout = NULL;
1669 	struct save_layout_sizes sizes;
1670 
1671 	result = compute_sizes(config, &sizes);
1672 	if (result != UDS_SUCCESS)
1673 		return result;
1674 
1675 	result = vdo_allocate(1, struct index_layout, __func__, &layout);
1676 	if (result != VDO_SUCCESS)
1677 		return result;
1678 
1679 	result = create_layout_factory(layout, config);
1680 	if (result != UDS_SUCCESS) {
1681 		uds_free_index_layout(layout);
1682 		return result;
1683 	}
1684 
1685 	if (layout->factory_size < sizes.total_size) {
1686 		vdo_log_error("index storage (%zu) is smaller than the required size %llu",
1687 			      layout->factory_size,
1688 			      (unsigned long long) sizes.total_size);
1689 		uds_free_index_layout(layout);
1690 		return -ENOSPC;
1691 	}
1692 
1693 	if (new_layout)
1694 		result = create_index_layout(layout, config);
1695 	else
1696 		result = load_index_layout(layout, config);
1697 	if (result != UDS_SUCCESS) {
1698 		uds_free_index_layout(layout);
1699 		return result;
1700 	}
1701 
1702 	*layout_ptr = layout;
1703 	return UDS_SUCCESS;
1704 }
1705 
uds_free_index_layout(struct index_layout * layout)1706 void uds_free_index_layout(struct index_layout *layout)
1707 {
1708 	if (layout == NULL)
1709 		return;
1710 
1711 	vdo_free(layout->index.saves);
1712 	if (layout->factory != NULL)
1713 		uds_put_io_factory(layout->factory);
1714 
1715 	vdo_free(layout);
1716 }
1717 
uds_replace_index_layout_storage(struct index_layout * layout,struct block_device * bdev)1718 int uds_replace_index_layout_storage(struct index_layout *layout,
1719 				     struct block_device *bdev)
1720 {
1721 	return uds_replace_storage(layout->factory, bdev);
1722 }
1723 
1724 /* Obtain a dm_bufio_client for the volume region. */
uds_open_volume_bufio(struct index_layout * layout,size_t block_size,unsigned int reserved_buffers,struct dm_bufio_client ** client_ptr)1725 int uds_open_volume_bufio(struct index_layout *layout, size_t block_size,
1726 			  unsigned int reserved_buffers,
1727 			  struct dm_bufio_client **client_ptr)
1728 {
1729 	off_t offset = (layout->index.volume.start_block +
1730 			layout->super.volume_offset -
1731 			layout->super.start_offset);
1732 
1733 	return uds_make_bufio(layout->factory, offset, block_size, reserved_buffers,
1734 			      client_ptr);
1735 }
1736 
uds_get_volume_nonce(struct index_layout * layout)1737 u64 uds_get_volume_nonce(struct index_layout *layout)
1738 {
1739 	return layout->index.nonce;
1740 }
1741