1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright 2023 Red Hat
4 */
5
6 #include "index-layout.h"
7
8 #include <linux/random.h>
9
10 #include "logger.h"
11 #include "memory-alloc.h"
12 #include "murmurhash3.h"
13 #include "numeric.h"
14 #include "time-utils.h"
15
16 #include "config.h"
17 #include "open-chapter.h"
18 #include "volume-index.h"
19
20 /*
21 * The UDS layout on storage media is divided into a number of fixed-size regions, the sizes of
22 * which are computed when the index is created. Every header and region begins on 4K block
23 * boundary. Save regions are further sub-divided into regions of their own.
24 *
25 * Each region has a kind and an instance number. Some kinds only have one instance and therefore
26 * use RL_SOLE_INSTANCE (-1) as the instance number. The RL_KIND_INDEX used to use instances to
27 * represent sub-indices; now, however there is only ever one sub-index and therefore one instance.
28 * The RL_KIND_VOLUME_INDEX uses instances to record which zone is being saved.
29 *
30 * Every region header has a type and version.
31 *
32 * +-+-+---------+--------+--------+-+
33 * | | | I N D E X 0 101, 0 | |
34 * |H|C+---------+--------+--------+S|
35 * |D|f| Volume | Save | Save |e|
36 * |R|g| Region | Region | Region |a|
37 * | | | 201, -1 | 202, 0 | 202, 1 |l|
38 * +-+-+--------+---------+--------+-+
39 *
40 * The header contains the encoded region layout table as well as some index configuration data.
41 * The sub-index region and its subdivisions are maintained in the same table.
42 *
43 * There are two save regions to preserve the old state in case saving the new state is incomplete.
44 * They are used in alternation. Each save region is further divided into sub-regions.
45 *
46 * +-+-----+------+------+-----+-----+
47 * |H| IPM | MI | MI | | OC |
48 * |D| | zone | zone | ... | |
49 * |R| 301 | 302 | 302 | | 303 |
50 * | | -1 | 0 | 1 | | -1 |
51 * +-+-----+------+------+-----+-----+
52 *
53 * The header contains the encoded region layout table as well as index state data for that save.
54 * Each save also has a unique nonce.
55 */
56
57 #define MAGIC_SIZE 32
58 #define NONCE_INFO_SIZE 32
59 #define MAX_SAVES 2
60
61 enum region_kind {
62 RL_KIND_EMPTY = 0,
63 RL_KIND_HEADER = 1,
64 RL_KIND_CONFIG = 100,
65 RL_KIND_INDEX = 101,
66 RL_KIND_SEAL = 102,
67 RL_KIND_VOLUME = 201,
68 RL_KIND_SAVE = 202,
69 RL_KIND_INDEX_PAGE_MAP = 301,
70 RL_KIND_VOLUME_INDEX = 302,
71 RL_KIND_OPEN_CHAPTER = 303,
72 };
73
74 /* Some region types are historical and are no longer used. */
75 enum region_type {
76 RH_TYPE_FREE = 0, /* unused */
77 RH_TYPE_SUPER = 1,
78 RH_TYPE_SAVE = 2,
79 RH_TYPE_CHECKPOINT = 3, /* unused */
80 RH_TYPE_UNSAVED = 4,
81 };
82
83 #define RL_SOLE_INSTANCE 65535
84
85 /*
86 * Super block version 2 is the first released version.
87 *
88 * Super block version 3 is the normal version used from RHEL 8.2 onwards.
89 *
90 * Super block versions 4 through 6 were incremental development versions and
91 * are not supported.
92 *
93 * Super block version 7 is used for volumes which have been reduced in size by one chapter in
94 * order to make room to prepend LVM metadata to a volume originally created without lvm. This
95 * allows the index to retain most its deduplication records.
96 */
97 #define SUPER_VERSION_MINIMUM 3
98 #define SUPER_VERSION_CURRENT 3
99 #define SUPER_VERSION_MAXIMUM 7
100
101 static const u8 LAYOUT_MAGIC[MAGIC_SIZE] = "*ALBIREO*SINGLE*FILE*LAYOUT*001*";
102 static const u64 REGION_MAGIC = 0x416c6252676e3031; /* 'AlbRgn01' */
103
104 struct region_header {
105 u64 magic;
106 u64 region_blocks;
107 u16 type;
108 /* Currently always version 1 */
109 u16 version;
110 u16 region_count;
111 u16 payload;
112 };
113
114 struct layout_region {
115 u64 start_block;
116 u64 block_count;
117 u32 __unused;
118 u16 kind;
119 u16 instance;
120 };
121
122 struct region_table {
123 size_t encoded_size;
124 struct region_header header;
125 struct layout_region regions[];
126 };
127
128 struct index_save_data {
129 u64 timestamp;
130 u64 nonce;
131 /* Currently always version 1 */
132 u32 version;
133 u32 unused__;
134 };
135
136 struct index_state_version {
137 s32 signature;
138 s32 version_id;
139 };
140
141 static const struct index_state_version INDEX_STATE_VERSION_301 = {
142 .signature = -1,
143 .version_id = 301,
144 };
145
146 struct index_state_data301 {
147 struct index_state_version version;
148 u64 newest_chapter;
149 u64 oldest_chapter;
150 u64 last_save;
151 u32 unused;
152 u32 padding;
153 };
154
155 struct index_save_layout {
156 unsigned int zone_count;
157 struct layout_region index_save;
158 struct layout_region header;
159 struct layout_region index_page_map;
160 struct layout_region free_space;
161 struct layout_region volume_index_zones[MAX_ZONES];
162 struct layout_region open_chapter;
163 struct index_save_data save_data;
164 struct index_state_data301 state_data;
165 };
166
167 struct sub_index_layout {
168 u64 nonce;
169 struct layout_region sub_index;
170 struct layout_region volume;
171 struct index_save_layout *saves;
172 };
173
174 struct super_block_data {
175 u8 magic_label[MAGIC_SIZE];
176 u8 nonce_info[NONCE_INFO_SIZE];
177 u64 nonce;
178 u32 version;
179 u32 block_size;
180 u16 index_count;
181 u16 max_saves;
182 /* Padding reflects a blank field on permanent storage */
183 u8 padding[4];
184 u64 open_chapter_blocks;
185 u64 page_map_blocks;
186 u64 volume_offset;
187 u64 start_offset;
188 };
189
190 struct index_layout {
191 struct io_factory *factory;
192 size_t factory_size;
193 off_t offset;
194 struct super_block_data super;
195 struct layout_region header;
196 struct layout_region config;
197 struct sub_index_layout index;
198 struct layout_region seal;
199 u64 total_blocks;
200 };
201
202 struct save_layout_sizes {
203 unsigned int save_count;
204 size_t block_size;
205 u64 volume_blocks;
206 u64 volume_index_blocks;
207 u64 page_map_blocks;
208 u64 open_chapter_blocks;
209 u64 save_blocks;
210 u64 sub_index_blocks;
211 u64 total_blocks;
212 size_t total_size;
213 };
214
is_converted_super_block(struct super_block_data * super)215 static inline bool is_converted_super_block(struct super_block_data *super)
216 {
217 return super->version == 7;
218 }
219
compute_sizes(const struct uds_configuration * config,struct save_layout_sizes * sls)220 static int __must_check compute_sizes(const struct uds_configuration *config,
221 struct save_layout_sizes *sls)
222 {
223 int result;
224 struct index_geometry *geometry = config->geometry;
225
226 memset(sls, 0, sizeof(*sls));
227 sls->save_count = MAX_SAVES;
228 sls->block_size = UDS_BLOCK_SIZE;
229 sls->volume_blocks = geometry->bytes_per_volume / sls->block_size;
230
231 result = uds_compute_volume_index_save_blocks(config, sls->block_size,
232 &sls->volume_index_blocks);
233 if (result != UDS_SUCCESS)
234 return vdo_log_error_strerror(result, "cannot compute index save size");
235
236 sls->page_map_blocks =
237 DIV_ROUND_UP(uds_compute_index_page_map_save_size(geometry),
238 sls->block_size);
239 sls->open_chapter_blocks =
240 DIV_ROUND_UP(uds_compute_saved_open_chapter_size(geometry),
241 sls->block_size);
242 sls->save_blocks =
243 1 + (sls->volume_index_blocks + sls->page_map_blocks + sls->open_chapter_blocks);
244 sls->sub_index_blocks = sls->volume_blocks + (sls->save_count * sls->save_blocks);
245 sls->total_blocks = 3 + sls->sub_index_blocks;
246 sls->total_size = sls->total_blocks * sls->block_size;
247
248 return UDS_SUCCESS;
249 }
250
uds_compute_index_size(const struct uds_parameters * parameters,u64 * index_size)251 int uds_compute_index_size(const struct uds_parameters *parameters, u64 *index_size)
252 {
253 int result;
254 struct uds_configuration *index_config;
255 struct save_layout_sizes sizes;
256
257 if (index_size == NULL) {
258 vdo_log_error("Missing output size pointer");
259 return -EINVAL;
260 }
261
262 result = uds_make_configuration(parameters, &index_config);
263 if (result != UDS_SUCCESS) {
264 vdo_log_error_strerror(result, "cannot compute index size");
265 return uds_status_to_errno(result);
266 }
267
268 result = compute_sizes(index_config, &sizes);
269 uds_free_configuration(index_config);
270 if (result != UDS_SUCCESS)
271 return uds_status_to_errno(result);
272
273 *index_size = sizes.total_size;
274 return UDS_SUCCESS;
275 }
276
277 /* Create unique data using the current time and a pseudorandom number. */
create_unique_nonce_data(u8 * buffer)278 static void create_unique_nonce_data(u8 *buffer)
279 {
280 ktime_t now = current_time_ns(CLOCK_REALTIME);
281 u32 rand;
282 size_t offset = 0;
283
284 get_random_bytes(&rand, sizeof(u32));
285 memcpy(buffer + offset, &now, sizeof(now));
286 offset += sizeof(now);
287 memcpy(buffer + offset, &rand, sizeof(rand));
288 offset += sizeof(rand);
289 while (offset < NONCE_INFO_SIZE) {
290 size_t len = min(NONCE_INFO_SIZE - offset, offset);
291
292 memcpy(buffer + offset, buffer, len);
293 offset += len;
294 }
295 }
296
hash_stuff(u64 start,const void * data,size_t len)297 static u64 hash_stuff(u64 start, const void *data, size_t len)
298 {
299 u32 seed = start ^ (start >> 27);
300 u8 hash_buffer[16];
301
302 murmurhash3_128(data, len, seed, hash_buffer);
303 return get_unaligned_le64(hash_buffer + 4);
304 }
305
306 /* Generate a primary nonce from the provided data. */
generate_primary_nonce(const void * data,size_t len)307 static u64 generate_primary_nonce(const void *data, size_t len)
308 {
309 return hash_stuff(0xa1b1e0fc, data, len);
310 }
311
312 /*
313 * Deterministically generate a secondary nonce from an existing nonce and some arbitrary data by
314 * hashing the original nonce and the data to produce a new nonce.
315 */
generate_secondary_nonce(u64 nonce,const void * data,size_t len)316 static u64 generate_secondary_nonce(u64 nonce, const void *data, size_t len)
317 {
318 return hash_stuff(nonce + 1, data, len);
319 }
320
open_layout_reader(struct index_layout * layout,struct layout_region * lr,off_t offset,struct buffered_reader ** reader_ptr)321 static int __must_check open_layout_reader(struct index_layout *layout,
322 struct layout_region *lr, off_t offset,
323 struct buffered_reader **reader_ptr)
324 {
325 return uds_make_buffered_reader(layout->factory, lr->start_block + offset,
326 lr->block_count, reader_ptr);
327 }
328
open_region_reader(struct index_layout * layout,struct layout_region * region,struct buffered_reader ** reader_ptr)329 static int open_region_reader(struct index_layout *layout, struct layout_region *region,
330 struct buffered_reader **reader_ptr)
331 {
332 return open_layout_reader(layout, region, -layout->super.start_offset,
333 reader_ptr);
334 }
335
open_layout_writer(struct index_layout * layout,struct layout_region * lr,off_t offset,struct buffered_writer ** writer_ptr)336 static int __must_check open_layout_writer(struct index_layout *layout,
337 struct layout_region *lr, off_t offset,
338 struct buffered_writer **writer_ptr)
339 {
340 return uds_make_buffered_writer(layout->factory, lr->start_block + offset,
341 lr->block_count, writer_ptr);
342 }
343
open_region_writer(struct index_layout * layout,struct layout_region * region,struct buffered_writer ** writer_ptr)344 static int open_region_writer(struct index_layout *layout, struct layout_region *region,
345 struct buffered_writer **writer_ptr)
346 {
347 return open_layout_writer(layout, region, -layout->super.start_offset,
348 writer_ptr);
349 }
350
generate_super_block_data(struct save_layout_sizes * sls,struct super_block_data * super)351 static void generate_super_block_data(struct save_layout_sizes *sls,
352 struct super_block_data *super)
353 {
354 memset(super, 0, sizeof(*super));
355 memcpy(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE);
356 create_unique_nonce_data(super->nonce_info);
357
358 super->nonce = generate_primary_nonce(super->nonce_info,
359 sizeof(super->nonce_info));
360 super->version = SUPER_VERSION_CURRENT;
361 super->block_size = sls->block_size;
362 super->index_count = 1;
363 super->max_saves = sls->save_count;
364 super->open_chapter_blocks = sls->open_chapter_blocks;
365 super->page_map_blocks = sls->page_map_blocks;
366 super->volume_offset = 0;
367 super->start_offset = 0;
368 }
369
define_sub_index_nonce(struct index_layout * layout)370 static void define_sub_index_nonce(struct index_layout *layout)
371 {
372 struct sub_index_nonce_data {
373 u64 offset;
374 u16 index_id;
375 };
376 struct sub_index_layout *sil = &layout->index;
377 u64 primary_nonce = layout->super.nonce;
378 u8 buffer[sizeof(struct sub_index_nonce_data)] = { 0 };
379 size_t offset = 0;
380
381 encode_u64_le(buffer, &offset, sil->sub_index.start_block);
382 encode_u16_le(buffer, &offset, 0);
383 sil->nonce = generate_secondary_nonce(primary_nonce, buffer, sizeof(buffer));
384 if (sil->nonce == 0) {
385 sil->nonce = generate_secondary_nonce(~primary_nonce + 1, buffer,
386 sizeof(buffer));
387 }
388 }
389
setup_sub_index(struct index_layout * layout,u64 start_block,struct save_layout_sizes * sls)390 static void setup_sub_index(struct index_layout *layout, u64 start_block,
391 struct save_layout_sizes *sls)
392 {
393 struct sub_index_layout *sil = &layout->index;
394 u64 next_block = start_block;
395 unsigned int i;
396
397 sil->sub_index = (struct layout_region) {
398 .start_block = start_block,
399 .block_count = sls->sub_index_blocks,
400 .kind = RL_KIND_INDEX,
401 .instance = 0,
402 };
403
404 sil->volume = (struct layout_region) {
405 .start_block = next_block,
406 .block_count = sls->volume_blocks,
407 .kind = RL_KIND_VOLUME,
408 .instance = RL_SOLE_INSTANCE,
409 };
410
411 next_block += sls->volume_blocks;
412
413 for (i = 0; i < sls->save_count; i++) {
414 sil->saves[i].index_save = (struct layout_region) {
415 .start_block = next_block,
416 .block_count = sls->save_blocks,
417 .kind = RL_KIND_SAVE,
418 .instance = i,
419 };
420
421 next_block += sls->save_blocks;
422 }
423
424 define_sub_index_nonce(layout);
425 }
426
initialize_layout(struct index_layout * layout,struct save_layout_sizes * sls)427 static void initialize_layout(struct index_layout *layout, struct save_layout_sizes *sls)
428 {
429 u64 next_block = layout->offset / sls->block_size;
430
431 layout->total_blocks = sls->total_blocks;
432 generate_super_block_data(sls, &layout->super);
433 layout->header = (struct layout_region) {
434 .start_block = next_block++,
435 .block_count = 1,
436 .kind = RL_KIND_HEADER,
437 .instance = RL_SOLE_INSTANCE,
438 };
439
440 layout->config = (struct layout_region) {
441 .start_block = next_block++,
442 .block_count = 1,
443 .kind = RL_KIND_CONFIG,
444 .instance = RL_SOLE_INSTANCE,
445 };
446
447 setup_sub_index(layout, next_block, sls);
448 next_block += sls->sub_index_blocks;
449
450 layout->seal = (struct layout_region) {
451 .start_block = next_block,
452 .block_count = 1,
453 .kind = RL_KIND_SEAL,
454 .instance = RL_SOLE_INSTANCE,
455 };
456 }
457
make_index_save_region_table(struct index_save_layout * isl,struct region_table ** table_ptr)458 static int __must_check make_index_save_region_table(struct index_save_layout *isl,
459 struct region_table **table_ptr)
460 {
461 int result;
462 unsigned int z;
463 struct region_table *table;
464 struct layout_region *lr;
465 u16 region_count;
466 size_t payload;
467 size_t type;
468
469 if (isl->zone_count > 0) {
470 /*
471 * Normal save regions: header, page map, volume index zones,
472 * open chapter, and possibly free space.
473 */
474 region_count = 3 + isl->zone_count;
475 if (isl->free_space.block_count > 0)
476 region_count++;
477
478 payload = sizeof(isl->save_data) + sizeof(isl->state_data);
479 type = RH_TYPE_SAVE;
480 } else {
481 /* Empty save regions: header, page map, free space. */
482 region_count = 3;
483 payload = sizeof(isl->save_data);
484 type = RH_TYPE_UNSAVED;
485 }
486
487 result = vdo_allocate_extended(struct region_table, region_count,
488 struct layout_region,
489 "layout region table for ISL", &table);
490 if (result != VDO_SUCCESS)
491 return result;
492
493 lr = &table->regions[0];
494 *lr++ = isl->header;
495 *lr++ = isl->index_page_map;
496 for (z = 0; z < isl->zone_count; z++)
497 *lr++ = isl->volume_index_zones[z];
498
499 if (isl->zone_count > 0)
500 *lr++ = isl->open_chapter;
501
502 if (isl->free_space.block_count > 0)
503 *lr++ = isl->free_space;
504
505 table->header = (struct region_header) {
506 .magic = REGION_MAGIC,
507 .region_blocks = isl->index_save.block_count,
508 .type = type,
509 .version = 1,
510 .region_count = region_count,
511 .payload = payload,
512 };
513
514 table->encoded_size = (sizeof(struct region_header) + payload +
515 region_count * sizeof(struct layout_region));
516 *table_ptr = table;
517 return UDS_SUCCESS;
518 }
519
encode_region_table(u8 * buffer,size_t * offset,struct region_table * table)520 static void encode_region_table(u8 *buffer, size_t *offset, struct region_table *table)
521 {
522 unsigned int i;
523
524 encode_u64_le(buffer, offset, REGION_MAGIC);
525 encode_u64_le(buffer, offset, table->header.region_blocks);
526 encode_u16_le(buffer, offset, table->header.type);
527 encode_u16_le(buffer, offset, table->header.version);
528 encode_u16_le(buffer, offset, table->header.region_count);
529 encode_u16_le(buffer, offset, table->header.payload);
530
531 for (i = 0; i < table->header.region_count; i++) {
532 encode_u64_le(buffer, offset, table->regions[i].start_block);
533 encode_u64_le(buffer, offset, table->regions[i].block_count);
534 encode_u32_le(buffer, offset, 0);
535 encode_u16_le(buffer, offset, table->regions[i].kind);
536 encode_u16_le(buffer, offset, table->regions[i].instance);
537 }
538 }
539
write_index_save_header(struct index_save_layout * isl,struct region_table * table,struct buffered_writer * writer)540 static int __must_check write_index_save_header(struct index_save_layout *isl,
541 struct region_table *table,
542 struct buffered_writer *writer)
543 {
544 int result;
545 u8 *buffer;
546 size_t offset = 0;
547
548 result = vdo_allocate(table->encoded_size, u8, "index save data", &buffer);
549 if (result != VDO_SUCCESS)
550 return result;
551
552 encode_region_table(buffer, &offset, table);
553 encode_u64_le(buffer, &offset, isl->save_data.timestamp);
554 encode_u64_le(buffer, &offset, isl->save_data.nonce);
555 encode_u32_le(buffer, &offset, isl->save_data.version);
556 encode_u32_le(buffer, &offset, 0);
557 if (isl->zone_count > 0) {
558 encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.signature);
559 encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.version_id);
560 encode_u64_le(buffer, &offset, isl->state_data.newest_chapter);
561 encode_u64_le(buffer, &offset, isl->state_data.oldest_chapter);
562 encode_u64_le(buffer, &offset, isl->state_data.last_save);
563 encode_u64_le(buffer, &offset, 0);
564 }
565
566 result = uds_write_to_buffered_writer(writer, buffer, offset);
567 vdo_free(buffer);
568 if (result != UDS_SUCCESS)
569 return result;
570
571 return uds_flush_buffered_writer(writer);
572 }
573
write_index_save_layout(struct index_layout * layout,struct index_save_layout * isl)574 static int write_index_save_layout(struct index_layout *layout,
575 struct index_save_layout *isl)
576 {
577 int result;
578 struct region_table *table;
579 struct buffered_writer *writer;
580
581 result = make_index_save_region_table(isl, &table);
582 if (result != UDS_SUCCESS)
583 return result;
584
585 result = open_region_writer(layout, &isl->header, &writer);
586 if (result != UDS_SUCCESS) {
587 vdo_free(table);
588 return result;
589 }
590
591 result = write_index_save_header(isl, table, writer);
592 vdo_free(table);
593 uds_free_buffered_writer(writer);
594
595 return result;
596 }
597
reset_index_save_layout(struct index_save_layout * isl,u64 page_map_blocks)598 static void reset_index_save_layout(struct index_save_layout *isl, u64 page_map_blocks)
599 {
600 u64 free_blocks;
601 u64 next_block = isl->index_save.start_block;
602
603 isl->zone_count = 0;
604 memset(&isl->save_data, 0, sizeof(isl->save_data));
605
606 isl->header = (struct layout_region) {
607 .start_block = next_block++,
608 .block_count = 1,
609 .kind = RL_KIND_HEADER,
610 .instance = RL_SOLE_INSTANCE,
611 };
612
613 isl->index_page_map = (struct layout_region) {
614 .start_block = next_block,
615 .block_count = page_map_blocks,
616 .kind = RL_KIND_INDEX_PAGE_MAP,
617 .instance = RL_SOLE_INSTANCE,
618 };
619
620 next_block += page_map_blocks;
621
622 free_blocks = isl->index_save.block_count - page_map_blocks - 1;
623 isl->free_space = (struct layout_region) {
624 .start_block = next_block,
625 .block_count = free_blocks,
626 .kind = RL_KIND_EMPTY,
627 .instance = RL_SOLE_INSTANCE,
628 };
629 }
630
invalidate_old_save(struct index_layout * layout,struct index_save_layout * isl)631 static int __must_check invalidate_old_save(struct index_layout *layout,
632 struct index_save_layout *isl)
633 {
634 reset_index_save_layout(isl, layout->super.page_map_blocks);
635 return write_index_save_layout(layout, isl);
636 }
637
discard_index_state_data(struct index_layout * layout)638 static int discard_index_state_data(struct index_layout *layout)
639 {
640 int result;
641 int saved_result = UDS_SUCCESS;
642 unsigned int i;
643
644 for (i = 0; i < layout->super.max_saves; i++) {
645 result = invalidate_old_save(layout, &layout->index.saves[i]);
646 if (result != UDS_SUCCESS)
647 saved_result = result;
648 }
649
650 if (saved_result != UDS_SUCCESS) {
651 return vdo_log_error_strerror(result,
652 "%s: cannot destroy all index saves",
653 __func__);
654 }
655
656 return UDS_SUCCESS;
657 }
658
make_layout_region_table(struct index_layout * layout,struct region_table ** table_ptr)659 static int __must_check make_layout_region_table(struct index_layout *layout,
660 struct region_table **table_ptr)
661 {
662 int result;
663 unsigned int i;
664 /* Regions: header, config, index, volume, saves, seal */
665 u16 region_count = 5 + layout->super.max_saves;
666 u16 payload;
667 struct region_table *table;
668 struct layout_region *lr;
669
670 result = vdo_allocate_extended(struct region_table, region_count,
671 struct layout_region, "layout region table",
672 &table);
673 if (result != VDO_SUCCESS)
674 return result;
675
676 lr = &table->regions[0];
677 *lr++ = layout->header;
678 *lr++ = layout->config;
679 *lr++ = layout->index.sub_index;
680 *lr++ = layout->index.volume;
681
682 for (i = 0; i < layout->super.max_saves; i++)
683 *lr++ = layout->index.saves[i].index_save;
684
685 *lr++ = layout->seal;
686
687 if (is_converted_super_block(&layout->super)) {
688 payload = sizeof(struct super_block_data);
689 } else {
690 payload = (sizeof(struct super_block_data) -
691 sizeof(layout->super.volume_offset) -
692 sizeof(layout->super.start_offset));
693 }
694
695 table->header = (struct region_header) {
696 .magic = REGION_MAGIC,
697 .region_blocks = layout->total_blocks,
698 .type = RH_TYPE_SUPER,
699 .version = 1,
700 .region_count = region_count,
701 .payload = payload,
702 };
703
704 table->encoded_size = (sizeof(struct region_header) + payload +
705 region_count * sizeof(struct layout_region));
706 *table_ptr = table;
707 return UDS_SUCCESS;
708 }
709
write_layout_header(struct index_layout * layout,struct region_table * table,struct buffered_writer * writer)710 static int __must_check write_layout_header(struct index_layout *layout,
711 struct region_table *table,
712 struct buffered_writer *writer)
713 {
714 int result;
715 u8 *buffer;
716 size_t offset = 0;
717
718 result = vdo_allocate(table->encoded_size, u8, "layout data", &buffer);
719 if (result != VDO_SUCCESS)
720 return result;
721
722 encode_region_table(buffer, &offset, table);
723 memcpy(buffer + offset, &layout->super.magic_label, MAGIC_SIZE);
724 offset += MAGIC_SIZE;
725 memcpy(buffer + offset, &layout->super.nonce_info, NONCE_INFO_SIZE);
726 offset += NONCE_INFO_SIZE;
727 encode_u64_le(buffer, &offset, layout->super.nonce);
728 encode_u32_le(buffer, &offset, layout->super.version);
729 encode_u32_le(buffer, &offset, layout->super.block_size);
730 encode_u16_le(buffer, &offset, layout->super.index_count);
731 encode_u16_le(buffer, &offset, layout->super.max_saves);
732 encode_u32_le(buffer, &offset, 0);
733 encode_u64_le(buffer, &offset, layout->super.open_chapter_blocks);
734 encode_u64_le(buffer, &offset, layout->super.page_map_blocks);
735
736 if (is_converted_super_block(&layout->super)) {
737 encode_u64_le(buffer, &offset, layout->super.volume_offset);
738 encode_u64_le(buffer, &offset, layout->super.start_offset);
739 }
740
741 result = uds_write_to_buffered_writer(writer, buffer, offset);
742 vdo_free(buffer);
743 if (result != UDS_SUCCESS)
744 return result;
745
746 return uds_flush_buffered_writer(writer);
747 }
748
write_uds_index_config(struct index_layout * layout,struct uds_configuration * config,off_t offset)749 static int __must_check write_uds_index_config(struct index_layout *layout,
750 struct uds_configuration *config,
751 off_t offset)
752 {
753 int result;
754 struct buffered_writer *writer = NULL;
755
756 result = open_layout_writer(layout, &layout->config, offset, &writer);
757 if (result != UDS_SUCCESS)
758 return vdo_log_error_strerror(result, "failed to open config region");
759
760 result = uds_write_config_contents(writer, config, layout->super.version);
761 if (result != UDS_SUCCESS) {
762 uds_free_buffered_writer(writer);
763 return vdo_log_error_strerror(result, "failed to write config region");
764 }
765
766 result = uds_flush_buffered_writer(writer);
767 if (result != UDS_SUCCESS) {
768 uds_free_buffered_writer(writer);
769 return vdo_log_error_strerror(result, "cannot flush config writer");
770 }
771
772 uds_free_buffered_writer(writer);
773 return UDS_SUCCESS;
774 }
775
save_layout(struct index_layout * layout,off_t offset)776 static int __must_check save_layout(struct index_layout *layout, off_t offset)
777 {
778 int result;
779 struct buffered_writer *writer = NULL;
780 struct region_table *table;
781
782 result = make_layout_region_table(layout, &table);
783 if (result != UDS_SUCCESS)
784 return result;
785
786 result = open_layout_writer(layout, &layout->header, offset, &writer);
787 if (result != UDS_SUCCESS) {
788 vdo_free(table);
789 return result;
790 }
791
792 result = write_layout_header(layout, table, writer);
793 vdo_free(table);
794 uds_free_buffered_writer(writer);
795
796 return result;
797 }
798
create_index_layout(struct index_layout * layout,struct uds_configuration * config)799 static int create_index_layout(struct index_layout *layout, struct uds_configuration *config)
800 {
801 int result;
802 struct save_layout_sizes sizes;
803
804 result = compute_sizes(config, &sizes);
805 if (result != UDS_SUCCESS)
806 return result;
807
808 result = vdo_allocate(sizes.save_count, struct index_save_layout, __func__,
809 &layout->index.saves);
810 if (result != VDO_SUCCESS)
811 return result;
812
813 initialize_layout(layout, &sizes);
814
815 result = discard_index_state_data(layout);
816 if (result != UDS_SUCCESS)
817 return result;
818
819 result = write_uds_index_config(layout, config, 0);
820 if (result != UDS_SUCCESS)
821 return result;
822
823 return save_layout(layout, 0);
824 }
825
generate_index_save_nonce(u64 volume_nonce,struct index_save_layout * isl)826 static u64 generate_index_save_nonce(u64 volume_nonce, struct index_save_layout *isl)
827 {
828 struct save_nonce_data {
829 struct index_save_data data;
830 u64 offset;
831 } nonce_data;
832 u8 buffer[sizeof(nonce_data)];
833 size_t offset = 0;
834
835 encode_u64_le(buffer, &offset, isl->save_data.timestamp);
836 encode_u64_le(buffer, &offset, 0);
837 encode_u32_le(buffer, &offset, isl->save_data.version);
838 encode_u32_le(buffer, &offset, 0U);
839 encode_u64_le(buffer, &offset, isl->index_save.start_block);
840 VDO_ASSERT_LOG_ONLY(offset == sizeof(nonce_data),
841 "%zu bytes encoded of %zu expected",
842 offset, sizeof(nonce_data));
843 return generate_secondary_nonce(volume_nonce, buffer, sizeof(buffer));
844 }
845
validate_index_save_layout(struct index_save_layout * isl,u64 volume_nonce)846 static u64 validate_index_save_layout(struct index_save_layout *isl, u64 volume_nonce)
847 {
848 if ((isl->zone_count == 0) || (isl->save_data.timestamp == 0))
849 return 0;
850
851 if (isl->save_data.nonce != generate_index_save_nonce(volume_nonce, isl))
852 return 0;
853
854 return isl->save_data.timestamp;
855 }
856
find_latest_uds_index_save_slot(struct index_layout * layout,struct index_save_layout ** isl_ptr)857 static int find_latest_uds_index_save_slot(struct index_layout *layout,
858 struct index_save_layout **isl_ptr)
859 {
860 struct index_save_layout *latest = NULL;
861 struct index_save_layout *isl;
862 unsigned int i;
863 u64 save_time = 0;
864 u64 latest_time = 0;
865
866 for (i = 0; i < layout->super.max_saves; i++) {
867 isl = &layout->index.saves[i];
868 save_time = validate_index_save_layout(isl, layout->index.nonce);
869 if (save_time > latest_time) {
870 latest = isl;
871 latest_time = save_time;
872 }
873 }
874
875 if (latest == NULL) {
876 vdo_log_error("No valid index save found");
877 return UDS_INDEX_NOT_SAVED_CLEANLY;
878 }
879
880 *isl_ptr = latest;
881 return UDS_SUCCESS;
882 }
883
uds_discard_open_chapter(struct index_layout * layout)884 int uds_discard_open_chapter(struct index_layout *layout)
885 {
886 int result;
887 struct index_save_layout *isl;
888 struct buffered_writer *writer;
889
890 result = find_latest_uds_index_save_slot(layout, &isl);
891 if (result != UDS_SUCCESS)
892 return result;
893
894 result = open_region_writer(layout, &isl->open_chapter, &writer);
895 if (result != UDS_SUCCESS)
896 return result;
897
898 result = uds_write_to_buffered_writer(writer, NULL, UDS_BLOCK_SIZE);
899 if (result != UDS_SUCCESS) {
900 uds_free_buffered_writer(writer);
901 return result;
902 }
903
904 result = uds_flush_buffered_writer(writer);
905 uds_free_buffered_writer(writer);
906 return result;
907 }
908
uds_load_index_state(struct index_layout * layout,struct uds_index * index)909 int uds_load_index_state(struct index_layout *layout, struct uds_index *index)
910 {
911 int result;
912 unsigned int zone;
913 struct index_save_layout *isl;
914 struct buffered_reader *readers[MAX_ZONES];
915
916 result = find_latest_uds_index_save_slot(layout, &isl);
917 if (result != UDS_SUCCESS)
918 return result;
919
920 index->newest_virtual_chapter = isl->state_data.newest_chapter;
921 index->oldest_virtual_chapter = isl->state_data.oldest_chapter;
922 index->last_save = isl->state_data.last_save;
923
924 result = open_region_reader(layout, &isl->open_chapter, &readers[0]);
925 if (result != UDS_SUCCESS)
926 return result;
927
928 result = uds_load_open_chapter(index, readers[0]);
929 uds_free_buffered_reader(readers[0]);
930 if (result != UDS_SUCCESS)
931 return result;
932
933 for (zone = 0; zone < isl->zone_count; zone++) {
934 result = open_region_reader(layout, &isl->volume_index_zones[zone],
935 &readers[zone]);
936 if (result != UDS_SUCCESS) {
937 for (; zone > 0; zone--)
938 uds_free_buffered_reader(readers[zone - 1]);
939
940 return result;
941 }
942 }
943
944 result = uds_load_volume_index(index->volume_index, readers, isl->zone_count);
945 for (zone = 0; zone < isl->zone_count; zone++)
946 uds_free_buffered_reader(readers[zone]);
947 if (result != UDS_SUCCESS)
948 return result;
949
950 result = open_region_reader(layout, &isl->index_page_map, &readers[0]);
951 if (result != UDS_SUCCESS)
952 return result;
953
954 result = uds_read_index_page_map(index->volume->index_page_map, readers[0]);
955 uds_free_buffered_reader(readers[0]);
956
957 return result;
958 }
959
select_oldest_index_save_layout(struct index_layout * layout)960 static struct index_save_layout *select_oldest_index_save_layout(struct index_layout *layout)
961 {
962 struct index_save_layout *oldest = NULL;
963 struct index_save_layout *isl;
964 unsigned int i;
965 u64 save_time = 0;
966 u64 oldest_time = 0;
967
968 for (i = 0; i < layout->super.max_saves; i++) {
969 isl = &layout->index.saves[i];
970 save_time = validate_index_save_layout(isl, layout->index.nonce);
971 if (oldest == NULL || save_time < oldest_time) {
972 oldest = isl;
973 oldest_time = save_time;
974 }
975 }
976
977 return oldest;
978 }
979
instantiate_index_save_layout(struct index_save_layout * isl,struct super_block_data * super,u64 volume_nonce,unsigned int zone_count)980 static void instantiate_index_save_layout(struct index_save_layout *isl,
981 struct super_block_data *super,
982 u64 volume_nonce, unsigned int zone_count)
983 {
984 unsigned int z;
985 u64 next_block;
986 u64 free_blocks;
987 u64 volume_index_blocks;
988
989 isl->zone_count = zone_count;
990 memset(&isl->save_data, 0, sizeof(isl->save_data));
991 isl->save_data.timestamp = ktime_to_ms(current_time_ns(CLOCK_REALTIME));
992 isl->save_data.version = 1;
993 isl->save_data.nonce = generate_index_save_nonce(volume_nonce, isl);
994
995 next_block = isl->index_save.start_block;
996 isl->header = (struct layout_region) {
997 .start_block = next_block++,
998 .block_count = 1,
999 .kind = RL_KIND_HEADER,
1000 .instance = RL_SOLE_INSTANCE,
1001 };
1002
1003 isl->index_page_map = (struct layout_region) {
1004 .start_block = next_block,
1005 .block_count = super->page_map_blocks,
1006 .kind = RL_KIND_INDEX_PAGE_MAP,
1007 .instance = RL_SOLE_INSTANCE,
1008 };
1009 next_block += super->page_map_blocks;
1010
1011 free_blocks = (isl->index_save.block_count - 1 -
1012 super->page_map_blocks -
1013 super->open_chapter_blocks);
1014 volume_index_blocks = free_blocks / isl->zone_count;
1015 for (z = 0; z < isl->zone_count; z++) {
1016 isl->volume_index_zones[z] = (struct layout_region) {
1017 .start_block = next_block,
1018 .block_count = volume_index_blocks,
1019 .kind = RL_KIND_VOLUME_INDEX,
1020 .instance = z,
1021 };
1022
1023 next_block += volume_index_blocks;
1024 free_blocks -= volume_index_blocks;
1025 }
1026
1027 isl->open_chapter = (struct layout_region) {
1028 .start_block = next_block,
1029 .block_count = super->open_chapter_blocks,
1030 .kind = RL_KIND_OPEN_CHAPTER,
1031 .instance = RL_SOLE_INSTANCE,
1032 };
1033
1034 next_block += super->open_chapter_blocks;
1035
1036 isl->free_space = (struct layout_region) {
1037 .start_block = next_block,
1038 .block_count = free_blocks,
1039 .kind = RL_KIND_EMPTY,
1040 .instance = RL_SOLE_INSTANCE,
1041 };
1042 }
1043
setup_uds_index_save_slot(struct index_layout * layout,unsigned int zone_count,struct index_save_layout ** isl_ptr)1044 static int setup_uds_index_save_slot(struct index_layout *layout,
1045 unsigned int zone_count,
1046 struct index_save_layout **isl_ptr)
1047 {
1048 int result;
1049 struct index_save_layout *isl;
1050
1051 isl = select_oldest_index_save_layout(layout);
1052 result = invalidate_old_save(layout, isl);
1053 if (result != UDS_SUCCESS)
1054 return result;
1055
1056 instantiate_index_save_layout(isl, &layout->super, layout->index.nonce,
1057 zone_count);
1058
1059 *isl_ptr = isl;
1060 return UDS_SUCCESS;
1061 }
1062
cancel_uds_index_save(struct index_save_layout * isl)1063 static void cancel_uds_index_save(struct index_save_layout *isl)
1064 {
1065 memset(&isl->save_data, 0, sizeof(isl->save_data));
1066 memset(&isl->state_data, 0, sizeof(isl->state_data));
1067 isl->zone_count = 0;
1068 }
1069
uds_save_index_state(struct index_layout * layout,struct uds_index * index)1070 int uds_save_index_state(struct index_layout *layout, struct uds_index *index)
1071 {
1072 int result;
1073 unsigned int zone;
1074 struct index_save_layout *isl;
1075 struct buffered_writer *writers[MAX_ZONES];
1076
1077 result = setup_uds_index_save_slot(layout, index->zone_count, &isl);
1078 if (result != UDS_SUCCESS)
1079 return result;
1080
1081 isl->state_data = (struct index_state_data301) {
1082 .newest_chapter = index->newest_virtual_chapter,
1083 .oldest_chapter = index->oldest_virtual_chapter,
1084 .last_save = index->last_save,
1085 };
1086
1087 result = open_region_writer(layout, &isl->open_chapter, &writers[0]);
1088 if (result != UDS_SUCCESS) {
1089 cancel_uds_index_save(isl);
1090 return result;
1091 }
1092
1093 result = uds_save_open_chapter(index, writers[0]);
1094 uds_free_buffered_writer(writers[0]);
1095 if (result != UDS_SUCCESS) {
1096 cancel_uds_index_save(isl);
1097 return result;
1098 }
1099
1100 for (zone = 0; zone < index->zone_count; zone++) {
1101 result = open_region_writer(layout, &isl->volume_index_zones[zone],
1102 &writers[zone]);
1103 if (result != UDS_SUCCESS) {
1104 for (; zone > 0; zone--)
1105 uds_free_buffered_writer(writers[zone - 1]);
1106
1107 cancel_uds_index_save(isl);
1108 return result;
1109 }
1110 }
1111
1112 result = uds_save_volume_index(index->volume_index, writers, index->zone_count);
1113 for (zone = 0; zone < index->zone_count; zone++)
1114 uds_free_buffered_writer(writers[zone]);
1115 if (result != UDS_SUCCESS) {
1116 cancel_uds_index_save(isl);
1117 return result;
1118 }
1119
1120 result = open_region_writer(layout, &isl->index_page_map, &writers[0]);
1121 if (result != UDS_SUCCESS) {
1122 cancel_uds_index_save(isl);
1123 return result;
1124 }
1125
1126 result = uds_write_index_page_map(index->volume->index_page_map, writers[0]);
1127 uds_free_buffered_writer(writers[0]);
1128 if (result != UDS_SUCCESS) {
1129 cancel_uds_index_save(isl);
1130 return result;
1131 }
1132
1133 return write_index_save_layout(layout, isl);
1134 }
1135
load_region_table(struct buffered_reader * reader,struct region_table ** table_ptr)1136 static int __must_check load_region_table(struct buffered_reader *reader,
1137 struct region_table **table_ptr)
1138 {
1139 int result;
1140 unsigned int i;
1141 struct region_header header;
1142 struct region_table *table;
1143 u8 buffer[sizeof(struct region_header)];
1144 size_t offset = 0;
1145
1146 result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1147 if (result != UDS_SUCCESS)
1148 return vdo_log_error_strerror(result, "cannot read region table header");
1149
1150 decode_u64_le(buffer, &offset, &header.magic);
1151 decode_u64_le(buffer, &offset, &header.region_blocks);
1152 decode_u16_le(buffer, &offset, &header.type);
1153 decode_u16_le(buffer, &offset, &header.version);
1154 decode_u16_le(buffer, &offset, &header.region_count);
1155 decode_u16_le(buffer, &offset, &header.payload);
1156
1157 if (header.magic != REGION_MAGIC)
1158 return UDS_NO_INDEX;
1159
1160 if (header.version != 1) {
1161 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1162 "unknown region table version %hu",
1163 header.version);
1164 }
1165
1166 result = vdo_allocate_extended(struct region_table, header.region_count,
1167 struct layout_region,
1168 "single file layout region table", &table);
1169 if (result != VDO_SUCCESS)
1170 return result;
1171
1172 table->header = header;
1173 for (i = 0; i < header.region_count; i++) {
1174 u8 region_buffer[sizeof(struct layout_region)];
1175
1176 offset = 0;
1177 result = uds_read_from_buffered_reader(reader, region_buffer,
1178 sizeof(region_buffer));
1179 if (result != UDS_SUCCESS) {
1180 vdo_free(table);
1181 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1182 "cannot read region table layouts");
1183 }
1184
1185 decode_u64_le(region_buffer, &offset, &table->regions[i].start_block);
1186 decode_u64_le(region_buffer, &offset, &table->regions[i].block_count);
1187 offset += sizeof(u32);
1188 decode_u16_le(region_buffer, &offset, &table->regions[i].kind);
1189 decode_u16_le(region_buffer, &offset, &table->regions[i].instance);
1190 }
1191
1192 *table_ptr = table;
1193 return UDS_SUCCESS;
1194 }
1195
read_super_block_data(struct buffered_reader * reader,struct index_layout * layout,size_t saved_size)1196 static int __must_check read_super_block_data(struct buffered_reader *reader,
1197 struct index_layout *layout,
1198 size_t saved_size)
1199 {
1200 int result;
1201 struct super_block_data *super = &layout->super;
1202 u8 *buffer;
1203 size_t offset = 0;
1204
1205 result = vdo_allocate(saved_size, u8, "super block data", &buffer);
1206 if (result != VDO_SUCCESS)
1207 return result;
1208
1209 result = uds_read_from_buffered_reader(reader, buffer, saved_size);
1210 if (result != UDS_SUCCESS) {
1211 vdo_free(buffer);
1212 return vdo_log_error_strerror(result, "cannot read region table header");
1213 }
1214
1215 memcpy(&super->magic_label, buffer, MAGIC_SIZE);
1216 offset += MAGIC_SIZE;
1217 memcpy(&super->nonce_info, buffer + offset, NONCE_INFO_SIZE);
1218 offset += NONCE_INFO_SIZE;
1219 decode_u64_le(buffer, &offset, &super->nonce);
1220 decode_u32_le(buffer, &offset, &super->version);
1221 decode_u32_le(buffer, &offset, &super->block_size);
1222 decode_u16_le(buffer, &offset, &super->index_count);
1223 decode_u16_le(buffer, &offset, &super->max_saves);
1224 offset += sizeof(u32);
1225 decode_u64_le(buffer, &offset, &super->open_chapter_blocks);
1226 decode_u64_le(buffer, &offset, &super->page_map_blocks);
1227
1228 if (is_converted_super_block(super)) {
1229 decode_u64_le(buffer, &offset, &super->volume_offset);
1230 decode_u64_le(buffer, &offset, &super->start_offset);
1231 } else {
1232 super->volume_offset = 0;
1233 super->start_offset = 0;
1234 }
1235
1236 vdo_free(buffer);
1237
1238 if (memcmp(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE) != 0)
1239 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1240 "unknown superblock magic label");
1241
1242 if ((super->version < SUPER_VERSION_MINIMUM) ||
1243 (super->version == 4) || (super->version == 5) || (super->version == 6) ||
1244 (super->version > SUPER_VERSION_MAXIMUM)) {
1245 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1246 "unknown superblock version number %u",
1247 super->version);
1248 }
1249
1250 if (super->volume_offset < super->start_offset) {
1251 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1252 "inconsistent offsets (start %llu, volume %llu)",
1253 (unsigned long long) super->start_offset,
1254 (unsigned long long) super->volume_offset);
1255 }
1256
1257 /* Sub-indexes are no longer used but the layout retains this field. */
1258 if (super->index_count != 1) {
1259 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1260 "invalid subindex count %u",
1261 super->index_count);
1262 }
1263
1264 if (generate_primary_nonce(super->nonce_info, sizeof(super->nonce_info)) != super->nonce) {
1265 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1266 "inconsistent superblock nonce");
1267 }
1268
1269 return UDS_SUCCESS;
1270 }
1271
verify_region(struct layout_region * lr,u64 start_block,enum region_kind kind,unsigned int instance)1272 static int __must_check verify_region(struct layout_region *lr, u64 start_block,
1273 enum region_kind kind, unsigned int instance)
1274 {
1275 if (lr->start_block != start_block)
1276 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1277 "incorrect layout region offset");
1278
1279 if (lr->kind != kind)
1280 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1281 "incorrect layout region kind");
1282
1283 if (lr->instance != instance) {
1284 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1285 "incorrect layout region instance");
1286 }
1287
1288 return UDS_SUCCESS;
1289 }
1290
verify_sub_index(struct index_layout * layout,u64 start_block,struct region_table * table)1291 static int __must_check verify_sub_index(struct index_layout *layout, u64 start_block,
1292 struct region_table *table)
1293 {
1294 int result;
1295 unsigned int i;
1296 struct sub_index_layout *sil = &layout->index;
1297 u64 next_block = start_block;
1298
1299 sil->sub_index = table->regions[2];
1300 result = verify_region(&sil->sub_index, next_block, RL_KIND_INDEX, 0);
1301 if (result != UDS_SUCCESS)
1302 return result;
1303
1304 define_sub_index_nonce(layout);
1305
1306 sil->volume = table->regions[3];
1307 result = verify_region(&sil->volume, next_block, RL_KIND_VOLUME,
1308 RL_SOLE_INSTANCE);
1309 if (result != UDS_SUCCESS)
1310 return result;
1311
1312 next_block += sil->volume.block_count + layout->super.volume_offset;
1313
1314 for (i = 0; i < layout->super.max_saves; i++) {
1315 sil->saves[i].index_save = table->regions[i + 4];
1316 result = verify_region(&sil->saves[i].index_save, next_block,
1317 RL_KIND_SAVE, i);
1318 if (result != UDS_SUCCESS)
1319 return result;
1320
1321 next_block += sil->saves[i].index_save.block_count;
1322 }
1323
1324 next_block -= layout->super.volume_offset;
1325 if (next_block != start_block + sil->sub_index.block_count) {
1326 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1327 "sub index region does not span all saves");
1328 }
1329
1330 return UDS_SUCCESS;
1331 }
1332
reconstitute_layout(struct index_layout * layout,struct region_table * table,u64 first_block)1333 static int __must_check reconstitute_layout(struct index_layout *layout,
1334 struct region_table *table, u64 first_block)
1335 {
1336 int result;
1337 u64 next_block = first_block;
1338
1339 result = vdo_allocate(layout->super.max_saves, struct index_save_layout,
1340 __func__, &layout->index.saves);
1341 if (result != VDO_SUCCESS)
1342 return result;
1343
1344 layout->total_blocks = table->header.region_blocks;
1345
1346 layout->header = table->regions[0];
1347 result = verify_region(&layout->header, next_block++, RL_KIND_HEADER,
1348 RL_SOLE_INSTANCE);
1349 if (result != UDS_SUCCESS)
1350 return result;
1351
1352 layout->config = table->regions[1];
1353 result = verify_region(&layout->config, next_block++, RL_KIND_CONFIG,
1354 RL_SOLE_INSTANCE);
1355 if (result != UDS_SUCCESS)
1356 return result;
1357
1358 result = verify_sub_index(layout, next_block, table);
1359 if (result != UDS_SUCCESS)
1360 return result;
1361
1362 next_block += layout->index.sub_index.block_count;
1363
1364 layout->seal = table->regions[table->header.region_count - 1];
1365 result = verify_region(&layout->seal, next_block + layout->super.volume_offset,
1366 RL_KIND_SEAL, RL_SOLE_INSTANCE);
1367 if (result != UDS_SUCCESS)
1368 return result;
1369
1370 if (++next_block != (first_block + layout->total_blocks)) {
1371 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1372 "layout table does not span total blocks");
1373 }
1374
1375 return UDS_SUCCESS;
1376 }
1377
load_super_block(struct index_layout * layout,size_t block_size,u64 first_block,struct buffered_reader * reader)1378 static int __must_check load_super_block(struct index_layout *layout, size_t block_size,
1379 u64 first_block, struct buffered_reader *reader)
1380 {
1381 int result;
1382 struct region_table *table = NULL;
1383 struct super_block_data *super = &layout->super;
1384
1385 result = load_region_table(reader, &table);
1386 if (result != UDS_SUCCESS)
1387 return result;
1388
1389 if (table->header.type != RH_TYPE_SUPER) {
1390 vdo_free(table);
1391 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1392 "not a superblock region table");
1393 }
1394
1395 result = read_super_block_data(reader, layout, table->header.payload);
1396 if (result != UDS_SUCCESS) {
1397 vdo_free(table);
1398 return vdo_log_error_strerror(result, "unknown superblock format");
1399 }
1400
1401 if (super->block_size != block_size) {
1402 vdo_free(table);
1403 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1404 "superblock saved block_size %u differs from supplied block_size %zu",
1405 super->block_size, block_size);
1406 }
1407
1408 first_block -= (super->volume_offset - super->start_offset);
1409 result = reconstitute_layout(layout, table, first_block);
1410 vdo_free(table);
1411 return result;
1412 }
1413
read_index_save_data(struct buffered_reader * reader,struct index_save_layout * isl,size_t saved_size)1414 static int __must_check read_index_save_data(struct buffered_reader *reader,
1415 struct index_save_layout *isl,
1416 size_t saved_size)
1417 {
1418 int result;
1419 struct index_state_version file_version;
1420 u8 buffer[sizeof(struct index_save_data) + sizeof(struct index_state_data301)];
1421 size_t offset = 0;
1422
1423 if (saved_size != sizeof(buffer)) {
1424 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1425 "unexpected index save data size %zu",
1426 saved_size);
1427 }
1428
1429 result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1430 if (result != UDS_SUCCESS)
1431 return vdo_log_error_strerror(result, "cannot read index save data");
1432
1433 decode_u64_le(buffer, &offset, &isl->save_data.timestamp);
1434 decode_u64_le(buffer, &offset, &isl->save_data.nonce);
1435 decode_u32_le(buffer, &offset, &isl->save_data.version);
1436 offset += sizeof(u32);
1437
1438 if (isl->save_data.version > 1) {
1439 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1440 "unknown index save version number %u",
1441 isl->save_data.version);
1442 }
1443
1444 decode_s32_le(buffer, &offset, &file_version.signature);
1445 decode_s32_le(buffer, &offset, &file_version.version_id);
1446
1447 if ((file_version.signature != INDEX_STATE_VERSION_301.signature) ||
1448 (file_version.version_id != INDEX_STATE_VERSION_301.version_id)) {
1449 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1450 "index state version %d,%d is unsupported",
1451 file_version.signature,
1452 file_version.version_id);
1453 }
1454
1455 decode_u64_le(buffer, &offset, &isl->state_data.newest_chapter);
1456 decode_u64_le(buffer, &offset, &isl->state_data.oldest_chapter);
1457 decode_u64_le(buffer, &offset, &isl->state_data.last_save);
1458 /* Skip past some historical fields that are now unused */
1459 offset += sizeof(u32) + sizeof(u32);
1460 return UDS_SUCCESS;
1461 }
1462
reconstruct_index_save(struct index_save_layout * isl,struct region_table * table)1463 static int __must_check reconstruct_index_save(struct index_save_layout *isl,
1464 struct region_table *table)
1465 {
1466 int result;
1467 unsigned int z;
1468 struct layout_region *last_region;
1469 u64 next_block = isl->index_save.start_block;
1470 u64 last_block = next_block + isl->index_save.block_count;
1471
1472 isl->zone_count = table->header.region_count - 3;
1473
1474 last_region = &table->regions[table->header.region_count - 1];
1475 if (last_region->kind == RL_KIND_EMPTY) {
1476 isl->free_space = *last_region;
1477 isl->zone_count--;
1478 } else {
1479 isl->free_space = (struct layout_region) {
1480 .start_block = last_block,
1481 .block_count = 0,
1482 .kind = RL_KIND_EMPTY,
1483 .instance = RL_SOLE_INSTANCE,
1484 };
1485 }
1486
1487 isl->header = table->regions[0];
1488 result = verify_region(&isl->header, next_block++, RL_KIND_HEADER,
1489 RL_SOLE_INSTANCE);
1490 if (result != UDS_SUCCESS)
1491 return result;
1492
1493 isl->index_page_map = table->regions[1];
1494 result = verify_region(&isl->index_page_map, next_block, RL_KIND_INDEX_PAGE_MAP,
1495 RL_SOLE_INSTANCE);
1496 if (result != UDS_SUCCESS)
1497 return result;
1498
1499 next_block += isl->index_page_map.block_count;
1500
1501 for (z = 0; z < isl->zone_count; z++) {
1502 isl->volume_index_zones[z] = table->regions[z + 2];
1503 result = verify_region(&isl->volume_index_zones[z], next_block,
1504 RL_KIND_VOLUME_INDEX, z);
1505 if (result != UDS_SUCCESS)
1506 return result;
1507
1508 next_block += isl->volume_index_zones[z].block_count;
1509 }
1510
1511 isl->open_chapter = table->regions[isl->zone_count + 2];
1512 result = verify_region(&isl->open_chapter, next_block, RL_KIND_OPEN_CHAPTER,
1513 RL_SOLE_INSTANCE);
1514 if (result != UDS_SUCCESS)
1515 return result;
1516
1517 next_block += isl->open_chapter.block_count;
1518
1519 result = verify_region(&isl->free_space, next_block, RL_KIND_EMPTY,
1520 RL_SOLE_INSTANCE);
1521 if (result != UDS_SUCCESS)
1522 return result;
1523
1524 next_block += isl->free_space.block_count;
1525 if (next_block != last_block) {
1526 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1527 "index save layout table incomplete");
1528 }
1529
1530 return UDS_SUCCESS;
1531 }
1532
load_index_save(struct index_save_layout * isl,struct buffered_reader * reader,unsigned int instance)1533 static int __must_check load_index_save(struct index_save_layout *isl,
1534 struct buffered_reader *reader,
1535 unsigned int instance)
1536 {
1537 int result;
1538 struct region_table *table = NULL;
1539
1540 result = load_region_table(reader, &table);
1541 if (result != UDS_SUCCESS) {
1542 return vdo_log_error_strerror(result, "cannot read index save %u header",
1543 instance);
1544 }
1545
1546 if (table->header.region_blocks != isl->index_save.block_count) {
1547 u64 region_blocks = table->header.region_blocks;
1548
1549 vdo_free(table);
1550 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1551 "unexpected index save %u region block count %llu",
1552 instance,
1553 (unsigned long long) region_blocks);
1554 }
1555
1556 if (table->header.type == RH_TYPE_UNSAVED) {
1557 vdo_free(table);
1558 reset_index_save_layout(isl, 0);
1559 return UDS_SUCCESS;
1560 }
1561
1562
1563 if (table->header.type != RH_TYPE_SAVE) {
1564 vdo_log_error_strerror(UDS_CORRUPT_DATA,
1565 "unexpected index save %u header type %u",
1566 instance, table->header.type);
1567 vdo_free(table);
1568 return UDS_CORRUPT_DATA;
1569 }
1570
1571 result = read_index_save_data(reader, isl, table->header.payload);
1572 if (result != UDS_SUCCESS) {
1573 vdo_free(table);
1574 return vdo_log_error_strerror(result,
1575 "unknown index save %u data format",
1576 instance);
1577 }
1578
1579 result = reconstruct_index_save(isl, table);
1580 vdo_free(table);
1581 if (result != UDS_SUCCESS) {
1582 return vdo_log_error_strerror(result, "cannot reconstruct index save %u",
1583 instance);
1584 }
1585
1586 return UDS_SUCCESS;
1587 }
1588
load_sub_index_regions(struct index_layout * layout)1589 static int __must_check load_sub_index_regions(struct index_layout *layout)
1590 {
1591 int result;
1592 unsigned int j;
1593 struct index_save_layout *isl;
1594 struct buffered_reader *reader;
1595
1596 for (j = 0; j < layout->super.max_saves; j++) {
1597 isl = &layout->index.saves[j];
1598 result = open_region_reader(layout, &isl->index_save, &reader);
1599
1600 if (result != UDS_SUCCESS) {
1601 vdo_log_error_strerror(result,
1602 "cannot get reader for index 0 save %u",
1603 j);
1604 return result;
1605 }
1606
1607 result = load_index_save(isl, reader, j);
1608 uds_free_buffered_reader(reader);
1609 if (result != UDS_SUCCESS) {
1610 /* Another save slot might be valid. */
1611 reset_index_save_layout(isl, 0);
1612 continue;
1613 }
1614 }
1615
1616 return UDS_SUCCESS;
1617 }
1618
verify_uds_index_config(struct index_layout * layout,struct uds_configuration * config)1619 static int __must_check verify_uds_index_config(struct index_layout *layout,
1620 struct uds_configuration *config)
1621 {
1622 int result;
1623 struct buffered_reader *reader = NULL;
1624 u64 offset;
1625
1626 offset = layout->super.volume_offset - layout->super.start_offset;
1627 result = open_layout_reader(layout, &layout->config, offset, &reader);
1628 if (result != UDS_SUCCESS)
1629 return vdo_log_error_strerror(result, "failed to open config reader");
1630
1631 result = uds_validate_config_contents(reader, config);
1632 if (result != UDS_SUCCESS) {
1633 uds_free_buffered_reader(reader);
1634 return vdo_log_error_strerror(result, "failed to read config region");
1635 }
1636
1637 uds_free_buffered_reader(reader);
1638 return UDS_SUCCESS;
1639 }
1640
load_index_layout(struct index_layout * layout,struct uds_configuration * config)1641 static int load_index_layout(struct index_layout *layout, struct uds_configuration *config)
1642 {
1643 int result;
1644 struct buffered_reader *reader;
1645
1646 result = uds_make_buffered_reader(layout->factory,
1647 layout->offset / UDS_BLOCK_SIZE, 1, &reader);
1648 if (result != UDS_SUCCESS)
1649 return vdo_log_error_strerror(result, "unable to read superblock");
1650
1651 result = load_super_block(layout, UDS_BLOCK_SIZE,
1652 layout->offset / UDS_BLOCK_SIZE, reader);
1653 uds_free_buffered_reader(reader);
1654 if (result != UDS_SUCCESS)
1655 return result;
1656
1657 result = verify_uds_index_config(layout, config);
1658 if (result != UDS_SUCCESS)
1659 return result;
1660
1661 return load_sub_index_regions(layout);
1662 }
1663
create_layout_factory(struct index_layout * layout,const struct uds_configuration * config)1664 static int create_layout_factory(struct index_layout *layout,
1665 const struct uds_configuration *config)
1666 {
1667 int result;
1668 size_t writable_size;
1669 struct io_factory *factory = NULL;
1670
1671 result = uds_make_io_factory(config->bdev, &factory);
1672 if (result != UDS_SUCCESS)
1673 return result;
1674
1675 writable_size = uds_get_writable_size(factory) & -UDS_BLOCK_SIZE;
1676 if (writable_size < config->size + config->offset) {
1677 uds_put_io_factory(factory);
1678 vdo_log_error("index storage (%zu) is smaller than the requested size %zu",
1679 writable_size, config->size + config->offset);
1680 return -ENOSPC;
1681 }
1682
1683 layout->factory = factory;
1684 layout->factory_size = (config->size > 0) ? config->size : writable_size;
1685 layout->offset = config->offset;
1686 return UDS_SUCCESS;
1687 }
1688
uds_make_index_layout(struct uds_configuration * config,bool new_layout,struct index_layout ** layout_ptr)1689 int uds_make_index_layout(struct uds_configuration *config, bool new_layout,
1690 struct index_layout **layout_ptr)
1691 {
1692 int result;
1693 struct index_layout *layout = NULL;
1694 struct save_layout_sizes sizes;
1695
1696 result = compute_sizes(config, &sizes);
1697 if (result != UDS_SUCCESS)
1698 return result;
1699
1700 result = vdo_allocate(1, struct index_layout, __func__, &layout);
1701 if (result != VDO_SUCCESS)
1702 return result;
1703
1704 result = create_layout_factory(layout, config);
1705 if (result != UDS_SUCCESS) {
1706 uds_free_index_layout(layout);
1707 return result;
1708 }
1709
1710 if (layout->factory_size < sizes.total_size) {
1711 vdo_log_error("index storage (%zu) is smaller than the required size %llu",
1712 layout->factory_size,
1713 (unsigned long long) sizes.total_size);
1714 uds_free_index_layout(layout);
1715 return -ENOSPC;
1716 }
1717
1718 if (new_layout)
1719 result = create_index_layout(layout, config);
1720 else
1721 result = load_index_layout(layout, config);
1722 if (result != UDS_SUCCESS) {
1723 uds_free_index_layout(layout);
1724 return result;
1725 }
1726
1727 *layout_ptr = layout;
1728 return UDS_SUCCESS;
1729 }
1730
uds_free_index_layout(struct index_layout * layout)1731 void uds_free_index_layout(struct index_layout *layout)
1732 {
1733 if (layout == NULL)
1734 return;
1735
1736 vdo_free(layout->index.saves);
1737 if (layout->factory != NULL)
1738 uds_put_io_factory(layout->factory);
1739
1740 vdo_free(layout);
1741 }
1742
uds_replace_index_layout_storage(struct index_layout * layout,struct block_device * bdev)1743 int uds_replace_index_layout_storage(struct index_layout *layout,
1744 struct block_device *bdev)
1745 {
1746 return uds_replace_storage(layout->factory, bdev);
1747 }
1748
1749 /* Obtain a dm_bufio_client for the volume region. */
uds_open_volume_bufio(struct index_layout * layout,size_t block_size,unsigned int reserved_buffers,struct dm_bufio_client ** client_ptr)1750 int uds_open_volume_bufio(struct index_layout *layout, size_t block_size,
1751 unsigned int reserved_buffers,
1752 struct dm_bufio_client **client_ptr)
1753 {
1754 off_t offset = (layout->index.volume.start_block +
1755 layout->super.volume_offset -
1756 layout->super.start_offset);
1757
1758 return uds_make_bufio(layout->factory, offset, block_size, reserved_buffers,
1759 client_ptr);
1760 }
1761
uds_get_volume_nonce(struct index_layout * layout)1762 u64 uds_get_volume_nonce(struct index_layout *layout)
1763 {
1764 return layout->index.nonce;
1765 }
1766