1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright 2023 Red Hat
4 */
5
6 #include "index-layout.h"
7
8 #include <linux/random.h>
9
10 #include "logger.h"
11 #include "memory-alloc.h"
12 #include "murmurhash3.h"
13 #include "numeric.h"
14 #include "time-utils.h"
15
16 #include "config.h"
17 #include "open-chapter.h"
18 #include "volume-index.h"
19
20 /*
21 * The UDS layout on storage media is divided into a number of fixed-size regions, the sizes of
22 * which are computed when the index is created. Every header and region begins on 4K block
23 * boundary. Save regions are further sub-divided into regions of their own.
24 *
25 * Each region has a kind and an instance number. Some kinds only have one instance and therefore
26 * use RL_SOLE_INSTANCE (-1) as the instance number. The RL_KIND_INDEX used to use instances to
27 * represent sub-indices; now, however there is only ever one sub-index and therefore one instance.
28 * The RL_KIND_VOLUME_INDEX uses instances to record which zone is being saved.
29 *
30 * Every region header has a type and version.
31 *
32 * +-+-+---------+--------+--------+-+
33 * | | | I N D E X 0 101, 0 | |
34 * |H|C+---------+--------+--------+S|
35 * |D|f| Volume | Save | Save |e|
36 * |R|g| Region | Region | Region |a|
37 * | | | 201, -1 | 202, 0 | 202, 1 |l|
38 * +-+-+--------+---------+--------+-+
39 *
40 * The header contains the encoded region layout table as well as some index configuration data.
41 * The sub-index region and its subdivisions are maintained in the same table.
42 *
43 * There are two save regions to preserve the old state in case saving the new state is incomplete.
44 * They are used in alternation. Each save region is further divided into sub-regions.
45 *
46 * +-+-----+------+------+-----+-----+
47 * |H| IPM | MI | MI | | OC |
48 * |D| | zone | zone | ... | |
49 * |R| 301 | 302 | 302 | | 303 |
50 * | | -1 | 0 | 1 | | -1 |
51 * +-+-----+------+------+-----+-----+
52 *
53 * The header contains the encoded region layout table as well as index state data for that save.
54 * Each save also has a unique nonce.
55 */
56
57 #define NONCE_INFO_SIZE 32
58 #define MAX_SAVES 2
59
60 enum region_kind {
61 RL_KIND_EMPTY = 0,
62 RL_KIND_HEADER = 1,
63 RL_KIND_CONFIG = 100,
64 RL_KIND_INDEX = 101,
65 RL_KIND_SEAL = 102,
66 RL_KIND_VOLUME = 201,
67 RL_KIND_SAVE = 202,
68 RL_KIND_INDEX_PAGE_MAP = 301,
69 RL_KIND_VOLUME_INDEX = 302,
70 RL_KIND_OPEN_CHAPTER = 303,
71 };
72
73 /* Some region types are historical and are no longer used. */
74 enum region_type {
75 RH_TYPE_FREE = 0, /* unused */
76 RH_TYPE_SUPER = 1,
77 RH_TYPE_SAVE = 2,
78 RH_TYPE_CHECKPOINT = 3, /* unused */
79 RH_TYPE_UNSAVED = 4,
80 };
81
82 #define RL_SOLE_INSTANCE 65535
83
84 /*
85 * Super block version 2 is the first released version.
86 *
87 * Super block version 3 is the normal version used from RHEL 8.2 onwards.
88 *
89 * Super block versions 4 through 6 were incremental development versions and
90 * are not supported.
91 *
92 * Super block version 7 is used for volumes which have been reduced in size by one chapter in
93 * order to make room to prepend LVM metadata to a volume originally created without lvm. This
94 * allows the index to retain most its deduplication records.
95 */
96 #define SUPER_VERSION_MINIMUM 3
97 #define SUPER_VERSION_CURRENT 3
98 #define SUPER_VERSION_MAXIMUM 7
99
100 static const u8 LAYOUT_MAGIC[] = "*ALBIREO*SINGLE*FILE*LAYOUT*001*";
101 static const u64 REGION_MAGIC = 0x416c6252676e3031; /* 'AlbRgn01' */
102
103 #define MAGIC_SIZE (sizeof(LAYOUT_MAGIC) - 1)
104
105 struct region_header {
106 u64 magic;
107 u64 region_blocks;
108 u16 type;
109 /* Currently always version 1 */
110 u16 version;
111 u16 region_count;
112 u16 payload;
113 };
114
115 struct layout_region {
116 u64 start_block;
117 u64 block_count;
118 u32 __unused;
119 u16 kind;
120 u16 instance;
121 };
122
123 struct region_table {
124 size_t encoded_size;
125 struct region_header header;
126 struct layout_region regions[];
127 };
128
129 struct index_save_data {
130 u64 timestamp;
131 u64 nonce;
132 /* Currently always version 1 */
133 u32 version;
134 u32 unused__;
135 };
136
137 struct index_state_version {
138 s32 signature;
139 s32 version_id;
140 };
141
142 static const struct index_state_version INDEX_STATE_VERSION_301 = {
143 .signature = -1,
144 .version_id = 301,
145 };
146
147 struct index_state_data301 {
148 struct index_state_version version;
149 u64 newest_chapter;
150 u64 oldest_chapter;
151 u64 last_save;
152 u32 unused;
153 u32 padding;
154 };
155
156 struct index_save_layout {
157 unsigned int zone_count;
158 struct layout_region index_save;
159 struct layout_region header;
160 struct layout_region index_page_map;
161 struct layout_region free_space;
162 struct layout_region volume_index_zones[MAX_ZONES];
163 struct layout_region open_chapter;
164 struct index_save_data save_data;
165 struct index_state_data301 state_data;
166 };
167
168 struct sub_index_layout {
169 u64 nonce;
170 struct layout_region sub_index;
171 struct layout_region volume;
172 struct index_save_layout *saves;
173 };
174
175 struct super_block_data {
176 u8 magic_label[MAGIC_SIZE];
177 u8 nonce_info[NONCE_INFO_SIZE];
178 u64 nonce;
179 u32 version;
180 u32 block_size;
181 u16 index_count;
182 u16 max_saves;
183 /* Padding reflects a blank field on permanent storage */
184 u8 padding[4];
185 u64 open_chapter_blocks;
186 u64 page_map_blocks;
187 u64 volume_offset;
188 u64 start_offset;
189 };
190
191 struct index_layout {
192 struct io_factory *factory;
193 size_t factory_size;
194 off_t offset;
195 struct super_block_data super;
196 struct layout_region header;
197 struct layout_region config;
198 struct sub_index_layout index;
199 struct layout_region seal;
200 u64 total_blocks;
201 };
202
203 struct save_layout_sizes {
204 unsigned int save_count;
205 size_t block_size;
206 u64 volume_blocks;
207 u64 volume_index_blocks;
208 u64 page_map_blocks;
209 u64 open_chapter_blocks;
210 u64 save_blocks;
211 u64 sub_index_blocks;
212 u64 total_blocks;
213 size_t total_size;
214 };
215
is_converted_super_block(struct super_block_data * super)216 static inline bool is_converted_super_block(struct super_block_data *super)
217 {
218 return super->version == 7;
219 }
220
compute_sizes(const struct uds_configuration * config,struct save_layout_sizes * sls)221 static int __must_check compute_sizes(const struct uds_configuration *config,
222 struct save_layout_sizes *sls)
223 {
224 int result;
225 struct index_geometry *geometry = config->geometry;
226
227 memset(sls, 0, sizeof(*sls));
228 sls->save_count = MAX_SAVES;
229 sls->block_size = UDS_BLOCK_SIZE;
230 sls->volume_blocks = geometry->bytes_per_volume / sls->block_size;
231
232 result = uds_compute_volume_index_save_blocks(config, sls->block_size,
233 &sls->volume_index_blocks);
234 if (result != UDS_SUCCESS)
235 return vdo_log_error_strerror(result, "cannot compute index save size");
236
237 sls->page_map_blocks =
238 DIV_ROUND_UP(uds_compute_index_page_map_save_size(geometry),
239 sls->block_size);
240 sls->open_chapter_blocks =
241 DIV_ROUND_UP(uds_compute_saved_open_chapter_size(geometry),
242 sls->block_size);
243 sls->save_blocks =
244 1 + (sls->volume_index_blocks + sls->page_map_blocks + sls->open_chapter_blocks);
245 sls->sub_index_blocks = sls->volume_blocks + (sls->save_count * sls->save_blocks);
246 sls->total_blocks = 3 + sls->sub_index_blocks;
247 sls->total_size = sls->total_blocks * sls->block_size;
248
249 return UDS_SUCCESS;
250 }
251
252 /* Create unique data using the current time and a pseudorandom number. */
create_unique_nonce_data(u8 * buffer)253 static void create_unique_nonce_data(u8 *buffer)
254 {
255 ktime_t now = current_time_ns(CLOCK_REALTIME);
256 u32 rand;
257 size_t offset = 0;
258
259 get_random_bytes(&rand, sizeof(u32));
260 memcpy(buffer + offset, &now, sizeof(now));
261 offset += sizeof(now);
262 memcpy(buffer + offset, &rand, sizeof(rand));
263 offset += sizeof(rand);
264 while (offset < NONCE_INFO_SIZE) {
265 size_t len = min(NONCE_INFO_SIZE - offset, offset);
266
267 memcpy(buffer + offset, buffer, len);
268 offset += len;
269 }
270 }
271
hash_stuff(u64 start,const void * data,size_t len)272 static u64 hash_stuff(u64 start, const void *data, size_t len)
273 {
274 u32 seed = start ^ (start >> 27);
275 u8 hash_buffer[16];
276
277 murmurhash3_128(data, len, seed, hash_buffer);
278 return get_unaligned_le64(hash_buffer + 4);
279 }
280
281 /* Generate a primary nonce from the provided data. */
generate_primary_nonce(const void * data,size_t len)282 static u64 generate_primary_nonce(const void *data, size_t len)
283 {
284 return hash_stuff(0xa1b1e0fc, data, len);
285 }
286
287 /*
288 * Deterministically generate a secondary nonce from an existing nonce and some arbitrary data by
289 * hashing the original nonce and the data to produce a new nonce.
290 */
generate_secondary_nonce(u64 nonce,const void * data,size_t len)291 static u64 generate_secondary_nonce(u64 nonce, const void *data, size_t len)
292 {
293 return hash_stuff(nonce + 1, data, len);
294 }
295
open_layout_reader(struct index_layout * layout,struct layout_region * lr,off_t offset,struct buffered_reader ** reader_ptr)296 static int __must_check open_layout_reader(struct index_layout *layout,
297 struct layout_region *lr, off_t offset,
298 struct buffered_reader **reader_ptr)
299 {
300 return uds_make_buffered_reader(layout->factory, lr->start_block + offset,
301 lr->block_count, reader_ptr);
302 }
303
open_region_reader(struct index_layout * layout,struct layout_region * region,struct buffered_reader ** reader_ptr)304 static int open_region_reader(struct index_layout *layout, struct layout_region *region,
305 struct buffered_reader **reader_ptr)
306 {
307 return open_layout_reader(layout, region, -layout->super.start_offset,
308 reader_ptr);
309 }
310
open_layout_writer(struct index_layout * layout,struct layout_region * lr,off_t offset,struct buffered_writer ** writer_ptr)311 static int __must_check open_layout_writer(struct index_layout *layout,
312 struct layout_region *lr, off_t offset,
313 struct buffered_writer **writer_ptr)
314 {
315 return uds_make_buffered_writer(layout->factory, lr->start_block + offset,
316 lr->block_count, writer_ptr);
317 }
318
open_region_writer(struct index_layout * layout,struct layout_region * region,struct buffered_writer ** writer_ptr)319 static int open_region_writer(struct index_layout *layout, struct layout_region *region,
320 struct buffered_writer **writer_ptr)
321 {
322 return open_layout_writer(layout, region, -layout->super.start_offset,
323 writer_ptr);
324 }
325
generate_super_block_data(struct save_layout_sizes * sls,struct super_block_data * super)326 static void generate_super_block_data(struct save_layout_sizes *sls,
327 struct super_block_data *super)
328 {
329 memset(super, 0, sizeof(*super));
330 memcpy(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE);
331 create_unique_nonce_data(super->nonce_info);
332
333 super->nonce = generate_primary_nonce(super->nonce_info,
334 sizeof(super->nonce_info));
335 super->version = SUPER_VERSION_CURRENT;
336 super->block_size = sls->block_size;
337 super->index_count = 1;
338 super->max_saves = sls->save_count;
339 super->open_chapter_blocks = sls->open_chapter_blocks;
340 super->page_map_blocks = sls->page_map_blocks;
341 super->volume_offset = 0;
342 super->start_offset = 0;
343 }
344
define_sub_index_nonce(struct index_layout * layout)345 static void define_sub_index_nonce(struct index_layout *layout)
346 {
347 struct sub_index_nonce_data {
348 u64 offset;
349 u16 index_id;
350 };
351 struct sub_index_layout *sil = &layout->index;
352 u64 primary_nonce = layout->super.nonce;
353 u8 buffer[sizeof(struct sub_index_nonce_data)] = { 0 };
354 size_t offset = 0;
355
356 encode_u64_le(buffer, &offset, sil->sub_index.start_block);
357 encode_u16_le(buffer, &offset, 0);
358 sil->nonce = generate_secondary_nonce(primary_nonce, buffer, sizeof(buffer));
359 if (sil->nonce == 0) {
360 sil->nonce = generate_secondary_nonce(~primary_nonce + 1, buffer,
361 sizeof(buffer));
362 }
363 }
364
setup_sub_index(struct index_layout * layout,u64 start_block,struct save_layout_sizes * sls)365 static void setup_sub_index(struct index_layout *layout, u64 start_block,
366 struct save_layout_sizes *sls)
367 {
368 struct sub_index_layout *sil = &layout->index;
369 u64 next_block = start_block;
370 unsigned int i;
371
372 sil->sub_index = (struct layout_region) {
373 .start_block = start_block,
374 .block_count = sls->sub_index_blocks,
375 .kind = RL_KIND_INDEX,
376 .instance = 0,
377 };
378
379 sil->volume = (struct layout_region) {
380 .start_block = next_block,
381 .block_count = sls->volume_blocks,
382 .kind = RL_KIND_VOLUME,
383 .instance = RL_SOLE_INSTANCE,
384 };
385
386 next_block += sls->volume_blocks;
387
388 for (i = 0; i < sls->save_count; i++) {
389 sil->saves[i].index_save = (struct layout_region) {
390 .start_block = next_block,
391 .block_count = sls->save_blocks,
392 .kind = RL_KIND_SAVE,
393 .instance = i,
394 };
395
396 next_block += sls->save_blocks;
397 }
398
399 define_sub_index_nonce(layout);
400 }
401
initialize_layout(struct index_layout * layout,struct save_layout_sizes * sls)402 static void initialize_layout(struct index_layout *layout, struct save_layout_sizes *sls)
403 {
404 u64 next_block = layout->offset / sls->block_size;
405
406 layout->total_blocks = sls->total_blocks;
407 generate_super_block_data(sls, &layout->super);
408 layout->header = (struct layout_region) {
409 .start_block = next_block++,
410 .block_count = 1,
411 .kind = RL_KIND_HEADER,
412 .instance = RL_SOLE_INSTANCE,
413 };
414
415 layout->config = (struct layout_region) {
416 .start_block = next_block++,
417 .block_count = 1,
418 .kind = RL_KIND_CONFIG,
419 .instance = RL_SOLE_INSTANCE,
420 };
421
422 setup_sub_index(layout, next_block, sls);
423 next_block += sls->sub_index_blocks;
424
425 layout->seal = (struct layout_region) {
426 .start_block = next_block,
427 .block_count = 1,
428 .kind = RL_KIND_SEAL,
429 .instance = RL_SOLE_INSTANCE,
430 };
431 }
432
make_index_save_region_table(struct index_save_layout * isl,struct region_table ** table_ptr)433 static int __must_check make_index_save_region_table(struct index_save_layout *isl,
434 struct region_table **table_ptr)
435 {
436 int result;
437 unsigned int z;
438 struct region_table *table;
439 struct layout_region *lr;
440 u16 region_count;
441 size_t payload;
442 size_t type;
443
444 if (isl->zone_count > 0) {
445 /*
446 * Normal save regions: header, page map, volume index zones,
447 * open chapter, and possibly free space.
448 */
449 region_count = 3 + isl->zone_count;
450 if (isl->free_space.block_count > 0)
451 region_count++;
452
453 payload = sizeof(isl->save_data) + sizeof(isl->state_data);
454 type = RH_TYPE_SAVE;
455 } else {
456 /* Empty save regions: header, page map, free space. */
457 region_count = 3;
458 payload = sizeof(isl->save_data);
459 type = RH_TYPE_UNSAVED;
460 }
461
462 result = vdo_allocate_extended(struct region_table, region_count,
463 struct layout_region,
464 "layout region table for ISL", &table);
465 if (result != VDO_SUCCESS)
466 return result;
467
468 lr = &table->regions[0];
469 *lr++ = isl->header;
470 *lr++ = isl->index_page_map;
471 for (z = 0; z < isl->zone_count; z++)
472 *lr++ = isl->volume_index_zones[z];
473
474 if (isl->zone_count > 0)
475 *lr++ = isl->open_chapter;
476
477 if (isl->free_space.block_count > 0)
478 *lr++ = isl->free_space;
479
480 table->header = (struct region_header) {
481 .magic = REGION_MAGIC,
482 .region_blocks = isl->index_save.block_count,
483 .type = type,
484 .version = 1,
485 .region_count = region_count,
486 .payload = payload,
487 };
488
489 table->encoded_size = (sizeof(struct region_header) + payload +
490 region_count * sizeof(struct layout_region));
491 *table_ptr = table;
492 return UDS_SUCCESS;
493 }
494
encode_region_table(u8 * buffer,size_t * offset,struct region_table * table)495 static void encode_region_table(u8 *buffer, size_t *offset, struct region_table *table)
496 {
497 unsigned int i;
498
499 encode_u64_le(buffer, offset, REGION_MAGIC);
500 encode_u64_le(buffer, offset, table->header.region_blocks);
501 encode_u16_le(buffer, offset, table->header.type);
502 encode_u16_le(buffer, offset, table->header.version);
503 encode_u16_le(buffer, offset, table->header.region_count);
504 encode_u16_le(buffer, offset, table->header.payload);
505
506 for (i = 0; i < table->header.region_count; i++) {
507 encode_u64_le(buffer, offset, table->regions[i].start_block);
508 encode_u64_le(buffer, offset, table->regions[i].block_count);
509 encode_u32_le(buffer, offset, 0);
510 encode_u16_le(buffer, offset, table->regions[i].kind);
511 encode_u16_le(buffer, offset, table->regions[i].instance);
512 }
513 }
514
write_index_save_header(struct index_save_layout * isl,struct region_table * table,struct buffered_writer * writer)515 static int __must_check write_index_save_header(struct index_save_layout *isl,
516 struct region_table *table,
517 struct buffered_writer *writer)
518 {
519 int result;
520 u8 *buffer;
521 size_t offset = 0;
522
523 result = vdo_allocate(table->encoded_size, u8, "index save data", &buffer);
524 if (result != VDO_SUCCESS)
525 return result;
526
527 encode_region_table(buffer, &offset, table);
528 encode_u64_le(buffer, &offset, isl->save_data.timestamp);
529 encode_u64_le(buffer, &offset, isl->save_data.nonce);
530 encode_u32_le(buffer, &offset, isl->save_data.version);
531 encode_u32_le(buffer, &offset, 0);
532 if (isl->zone_count > 0) {
533 encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.signature);
534 encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.version_id);
535 encode_u64_le(buffer, &offset, isl->state_data.newest_chapter);
536 encode_u64_le(buffer, &offset, isl->state_data.oldest_chapter);
537 encode_u64_le(buffer, &offset, isl->state_data.last_save);
538 encode_u64_le(buffer, &offset, 0);
539 }
540
541 result = uds_write_to_buffered_writer(writer, buffer, offset);
542 vdo_free(buffer);
543 if (result != UDS_SUCCESS)
544 return result;
545
546 return uds_flush_buffered_writer(writer);
547 }
548
write_index_save_layout(struct index_layout * layout,struct index_save_layout * isl)549 static int write_index_save_layout(struct index_layout *layout,
550 struct index_save_layout *isl)
551 {
552 int result;
553 struct region_table *table;
554 struct buffered_writer *writer;
555
556 result = make_index_save_region_table(isl, &table);
557 if (result != UDS_SUCCESS)
558 return result;
559
560 result = open_region_writer(layout, &isl->header, &writer);
561 if (result != UDS_SUCCESS) {
562 vdo_free(table);
563 return result;
564 }
565
566 result = write_index_save_header(isl, table, writer);
567 vdo_free(table);
568 uds_free_buffered_writer(writer);
569
570 return result;
571 }
572
reset_index_save_layout(struct index_save_layout * isl,u64 page_map_blocks)573 static void reset_index_save_layout(struct index_save_layout *isl, u64 page_map_blocks)
574 {
575 u64 free_blocks;
576 u64 next_block = isl->index_save.start_block;
577
578 isl->zone_count = 0;
579 memset(&isl->save_data, 0, sizeof(isl->save_data));
580
581 isl->header = (struct layout_region) {
582 .start_block = next_block++,
583 .block_count = 1,
584 .kind = RL_KIND_HEADER,
585 .instance = RL_SOLE_INSTANCE,
586 };
587
588 isl->index_page_map = (struct layout_region) {
589 .start_block = next_block,
590 .block_count = page_map_blocks,
591 .kind = RL_KIND_INDEX_PAGE_MAP,
592 .instance = RL_SOLE_INSTANCE,
593 };
594
595 next_block += page_map_blocks;
596
597 free_blocks = isl->index_save.block_count - page_map_blocks - 1;
598 isl->free_space = (struct layout_region) {
599 .start_block = next_block,
600 .block_count = free_blocks,
601 .kind = RL_KIND_EMPTY,
602 .instance = RL_SOLE_INSTANCE,
603 };
604 }
605
invalidate_old_save(struct index_layout * layout,struct index_save_layout * isl)606 static int __must_check invalidate_old_save(struct index_layout *layout,
607 struct index_save_layout *isl)
608 {
609 reset_index_save_layout(isl, layout->super.page_map_blocks);
610 return write_index_save_layout(layout, isl);
611 }
612
discard_index_state_data(struct index_layout * layout)613 static int discard_index_state_data(struct index_layout *layout)
614 {
615 int result;
616 int saved_result = UDS_SUCCESS;
617 unsigned int i;
618
619 for (i = 0; i < layout->super.max_saves; i++) {
620 result = invalidate_old_save(layout, &layout->index.saves[i]);
621 if (result != UDS_SUCCESS)
622 saved_result = result;
623 }
624
625 if (saved_result != UDS_SUCCESS) {
626 return vdo_log_error_strerror(result,
627 "%s: cannot destroy all index saves",
628 __func__);
629 }
630
631 return UDS_SUCCESS;
632 }
633
make_layout_region_table(struct index_layout * layout,struct region_table ** table_ptr)634 static int __must_check make_layout_region_table(struct index_layout *layout,
635 struct region_table **table_ptr)
636 {
637 int result;
638 unsigned int i;
639 /* Regions: header, config, index, volume, saves, seal */
640 u16 region_count = 5 + layout->super.max_saves;
641 u16 payload;
642 struct region_table *table;
643 struct layout_region *lr;
644
645 result = vdo_allocate_extended(struct region_table, region_count,
646 struct layout_region, "layout region table",
647 &table);
648 if (result != VDO_SUCCESS)
649 return result;
650
651 lr = &table->regions[0];
652 *lr++ = layout->header;
653 *lr++ = layout->config;
654 *lr++ = layout->index.sub_index;
655 *lr++ = layout->index.volume;
656
657 for (i = 0; i < layout->super.max_saves; i++)
658 *lr++ = layout->index.saves[i].index_save;
659
660 *lr++ = layout->seal;
661
662 if (is_converted_super_block(&layout->super)) {
663 payload = sizeof(struct super_block_data);
664 } else {
665 payload = (sizeof(struct super_block_data) -
666 sizeof(layout->super.volume_offset) -
667 sizeof(layout->super.start_offset));
668 }
669
670 table->header = (struct region_header) {
671 .magic = REGION_MAGIC,
672 .region_blocks = layout->total_blocks,
673 .type = RH_TYPE_SUPER,
674 .version = 1,
675 .region_count = region_count,
676 .payload = payload,
677 };
678
679 table->encoded_size = (sizeof(struct region_header) + payload +
680 region_count * sizeof(struct layout_region));
681 *table_ptr = table;
682 return UDS_SUCCESS;
683 }
684
write_layout_header(struct index_layout * layout,struct region_table * table,struct buffered_writer * writer)685 static int __must_check write_layout_header(struct index_layout *layout,
686 struct region_table *table,
687 struct buffered_writer *writer)
688 {
689 int result;
690 u8 *buffer;
691 size_t offset = 0;
692
693 result = vdo_allocate(table->encoded_size, u8, "layout data", &buffer);
694 if (result != VDO_SUCCESS)
695 return result;
696
697 encode_region_table(buffer, &offset, table);
698 memcpy(buffer + offset, &layout->super.magic_label, MAGIC_SIZE);
699 offset += MAGIC_SIZE;
700 memcpy(buffer + offset, &layout->super.nonce_info, NONCE_INFO_SIZE);
701 offset += NONCE_INFO_SIZE;
702 encode_u64_le(buffer, &offset, layout->super.nonce);
703 encode_u32_le(buffer, &offset, layout->super.version);
704 encode_u32_le(buffer, &offset, layout->super.block_size);
705 encode_u16_le(buffer, &offset, layout->super.index_count);
706 encode_u16_le(buffer, &offset, layout->super.max_saves);
707 encode_u32_le(buffer, &offset, 0);
708 encode_u64_le(buffer, &offset, layout->super.open_chapter_blocks);
709 encode_u64_le(buffer, &offset, layout->super.page_map_blocks);
710
711 if (is_converted_super_block(&layout->super)) {
712 encode_u64_le(buffer, &offset, layout->super.volume_offset);
713 encode_u64_le(buffer, &offset, layout->super.start_offset);
714 }
715
716 result = uds_write_to_buffered_writer(writer, buffer, offset);
717 vdo_free(buffer);
718 if (result != UDS_SUCCESS)
719 return result;
720
721 return uds_flush_buffered_writer(writer);
722 }
723
write_uds_index_config(struct index_layout * layout,struct uds_configuration * config,off_t offset)724 static int __must_check write_uds_index_config(struct index_layout *layout,
725 struct uds_configuration *config,
726 off_t offset)
727 {
728 int result;
729 struct buffered_writer *writer = NULL;
730
731 result = open_layout_writer(layout, &layout->config, offset, &writer);
732 if (result != UDS_SUCCESS)
733 return vdo_log_error_strerror(result, "failed to open config region");
734
735 result = uds_write_config_contents(writer, config, layout->super.version);
736 if (result != UDS_SUCCESS) {
737 uds_free_buffered_writer(writer);
738 return vdo_log_error_strerror(result, "failed to write config region");
739 }
740
741 result = uds_flush_buffered_writer(writer);
742 if (result != UDS_SUCCESS) {
743 uds_free_buffered_writer(writer);
744 return vdo_log_error_strerror(result, "cannot flush config writer");
745 }
746
747 uds_free_buffered_writer(writer);
748 return UDS_SUCCESS;
749 }
750
save_layout(struct index_layout * layout,off_t offset)751 static int __must_check save_layout(struct index_layout *layout, off_t offset)
752 {
753 int result;
754 struct buffered_writer *writer = NULL;
755 struct region_table *table;
756
757 result = make_layout_region_table(layout, &table);
758 if (result != UDS_SUCCESS)
759 return result;
760
761 result = open_layout_writer(layout, &layout->header, offset, &writer);
762 if (result != UDS_SUCCESS) {
763 vdo_free(table);
764 return result;
765 }
766
767 result = write_layout_header(layout, table, writer);
768 vdo_free(table);
769 uds_free_buffered_writer(writer);
770
771 return result;
772 }
773
create_index_layout(struct index_layout * layout,struct uds_configuration * config)774 static int create_index_layout(struct index_layout *layout, struct uds_configuration *config)
775 {
776 int result;
777 struct save_layout_sizes sizes;
778
779 result = compute_sizes(config, &sizes);
780 if (result != UDS_SUCCESS)
781 return result;
782
783 result = vdo_allocate(sizes.save_count, struct index_save_layout, __func__,
784 &layout->index.saves);
785 if (result != VDO_SUCCESS)
786 return result;
787
788 initialize_layout(layout, &sizes);
789
790 result = discard_index_state_data(layout);
791 if (result != UDS_SUCCESS)
792 return result;
793
794 result = write_uds_index_config(layout, config, 0);
795 if (result != UDS_SUCCESS)
796 return result;
797
798 return save_layout(layout, 0);
799 }
800
generate_index_save_nonce(u64 volume_nonce,struct index_save_layout * isl)801 static u64 generate_index_save_nonce(u64 volume_nonce, struct index_save_layout *isl)
802 {
803 struct save_nonce_data {
804 struct index_save_data data;
805 u64 offset;
806 } nonce_data;
807 u8 buffer[sizeof(nonce_data)];
808 size_t offset = 0;
809
810 encode_u64_le(buffer, &offset, isl->save_data.timestamp);
811 encode_u64_le(buffer, &offset, 0);
812 encode_u32_le(buffer, &offset, isl->save_data.version);
813 encode_u32_le(buffer, &offset, 0U);
814 encode_u64_le(buffer, &offset, isl->index_save.start_block);
815 VDO_ASSERT_LOG_ONLY(offset == sizeof(nonce_data),
816 "%zu bytes encoded of %zu expected",
817 offset, sizeof(nonce_data));
818 return generate_secondary_nonce(volume_nonce, buffer, sizeof(buffer));
819 }
820
validate_index_save_layout(struct index_save_layout * isl,u64 volume_nonce)821 static u64 validate_index_save_layout(struct index_save_layout *isl, u64 volume_nonce)
822 {
823 if ((isl->zone_count == 0) || (isl->save_data.timestamp == 0))
824 return 0;
825
826 if (isl->save_data.nonce != generate_index_save_nonce(volume_nonce, isl))
827 return 0;
828
829 return isl->save_data.timestamp;
830 }
831
find_latest_uds_index_save_slot(struct index_layout * layout,struct index_save_layout ** isl_ptr)832 static int find_latest_uds_index_save_slot(struct index_layout *layout,
833 struct index_save_layout **isl_ptr)
834 {
835 struct index_save_layout *latest = NULL;
836 struct index_save_layout *isl;
837 unsigned int i;
838 u64 save_time = 0;
839 u64 latest_time = 0;
840
841 for (i = 0; i < layout->super.max_saves; i++) {
842 isl = &layout->index.saves[i];
843 save_time = validate_index_save_layout(isl, layout->index.nonce);
844 if (save_time > latest_time) {
845 latest = isl;
846 latest_time = save_time;
847 }
848 }
849
850 if (latest == NULL) {
851 vdo_log_error("No valid index save found");
852 return UDS_INDEX_NOT_SAVED_CLEANLY;
853 }
854
855 *isl_ptr = latest;
856 return UDS_SUCCESS;
857 }
858
uds_discard_open_chapter(struct index_layout * layout)859 int uds_discard_open_chapter(struct index_layout *layout)
860 {
861 int result;
862 struct index_save_layout *isl;
863 struct buffered_writer *writer;
864
865 result = find_latest_uds_index_save_slot(layout, &isl);
866 if (result != UDS_SUCCESS)
867 return result;
868
869 result = open_region_writer(layout, &isl->open_chapter, &writer);
870 if (result != UDS_SUCCESS)
871 return result;
872
873 result = uds_write_to_buffered_writer(writer, NULL, UDS_BLOCK_SIZE);
874 if (result != UDS_SUCCESS) {
875 uds_free_buffered_writer(writer);
876 return result;
877 }
878
879 result = uds_flush_buffered_writer(writer);
880 uds_free_buffered_writer(writer);
881 return result;
882 }
883
uds_load_index_state(struct index_layout * layout,struct uds_index * index)884 int uds_load_index_state(struct index_layout *layout, struct uds_index *index)
885 {
886 int result;
887 unsigned int zone;
888 struct index_save_layout *isl;
889 struct buffered_reader *readers[MAX_ZONES];
890
891 result = find_latest_uds_index_save_slot(layout, &isl);
892 if (result != UDS_SUCCESS)
893 return result;
894
895 index->newest_virtual_chapter = isl->state_data.newest_chapter;
896 index->oldest_virtual_chapter = isl->state_data.oldest_chapter;
897 index->last_save = isl->state_data.last_save;
898
899 result = open_region_reader(layout, &isl->open_chapter, &readers[0]);
900 if (result != UDS_SUCCESS)
901 return result;
902
903 result = uds_load_open_chapter(index, readers[0]);
904 uds_free_buffered_reader(readers[0]);
905 if (result != UDS_SUCCESS)
906 return result;
907
908 for (zone = 0; zone < isl->zone_count; zone++) {
909 result = open_region_reader(layout, &isl->volume_index_zones[zone],
910 &readers[zone]);
911 if (result != UDS_SUCCESS) {
912 for (; zone > 0; zone--)
913 uds_free_buffered_reader(readers[zone - 1]);
914
915 return result;
916 }
917 }
918
919 result = uds_load_volume_index(index->volume_index, readers, isl->zone_count);
920 for (zone = 0; zone < isl->zone_count; zone++)
921 uds_free_buffered_reader(readers[zone]);
922 if (result != UDS_SUCCESS)
923 return result;
924
925 result = open_region_reader(layout, &isl->index_page_map, &readers[0]);
926 if (result != UDS_SUCCESS)
927 return result;
928
929 result = uds_read_index_page_map(index->volume->index_page_map, readers[0]);
930 uds_free_buffered_reader(readers[0]);
931
932 return result;
933 }
934
select_oldest_index_save_layout(struct index_layout * layout)935 static struct index_save_layout *select_oldest_index_save_layout(struct index_layout *layout)
936 {
937 struct index_save_layout *oldest = NULL;
938 struct index_save_layout *isl;
939 unsigned int i;
940 u64 save_time = 0;
941 u64 oldest_time = 0;
942
943 for (i = 0; i < layout->super.max_saves; i++) {
944 isl = &layout->index.saves[i];
945 save_time = validate_index_save_layout(isl, layout->index.nonce);
946 if (oldest == NULL || save_time < oldest_time) {
947 oldest = isl;
948 oldest_time = save_time;
949 }
950 }
951
952 return oldest;
953 }
954
instantiate_index_save_layout(struct index_save_layout * isl,struct super_block_data * super,u64 volume_nonce,unsigned int zone_count)955 static void instantiate_index_save_layout(struct index_save_layout *isl,
956 struct super_block_data *super,
957 u64 volume_nonce, unsigned int zone_count)
958 {
959 unsigned int z;
960 u64 next_block;
961 u64 free_blocks;
962 u64 volume_index_blocks;
963
964 isl->zone_count = zone_count;
965 memset(&isl->save_data, 0, sizeof(isl->save_data));
966 isl->save_data.timestamp = ktime_to_ms(current_time_ns(CLOCK_REALTIME));
967 isl->save_data.version = 1;
968 isl->save_data.nonce = generate_index_save_nonce(volume_nonce, isl);
969
970 next_block = isl->index_save.start_block;
971 isl->header = (struct layout_region) {
972 .start_block = next_block++,
973 .block_count = 1,
974 .kind = RL_KIND_HEADER,
975 .instance = RL_SOLE_INSTANCE,
976 };
977
978 isl->index_page_map = (struct layout_region) {
979 .start_block = next_block,
980 .block_count = super->page_map_blocks,
981 .kind = RL_KIND_INDEX_PAGE_MAP,
982 .instance = RL_SOLE_INSTANCE,
983 };
984 next_block += super->page_map_blocks;
985
986 free_blocks = (isl->index_save.block_count - 1 -
987 super->page_map_blocks -
988 super->open_chapter_blocks);
989 volume_index_blocks = free_blocks / isl->zone_count;
990 for (z = 0; z < isl->zone_count; z++) {
991 isl->volume_index_zones[z] = (struct layout_region) {
992 .start_block = next_block,
993 .block_count = volume_index_blocks,
994 .kind = RL_KIND_VOLUME_INDEX,
995 .instance = z,
996 };
997
998 next_block += volume_index_blocks;
999 free_blocks -= volume_index_blocks;
1000 }
1001
1002 isl->open_chapter = (struct layout_region) {
1003 .start_block = next_block,
1004 .block_count = super->open_chapter_blocks,
1005 .kind = RL_KIND_OPEN_CHAPTER,
1006 .instance = RL_SOLE_INSTANCE,
1007 };
1008
1009 next_block += super->open_chapter_blocks;
1010
1011 isl->free_space = (struct layout_region) {
1012 .start_block = next_block,
1013 .block_count = free_blocks,
1014 .kind = RL_KIND_EMPTY,
1015 .instance = RL_SOLE_INSTANCE,
1016 };
1017 }
1018
setup_uds_index_save_slot(struct index_layout * layout,unsigned int zone_count,struct index_save_layout ** isl_ptr)1019 static int setup_uds_index_save_slot(struct index_layout *layout,
1020 unsigned int zone_count,
1021 struct index_save_layout **isl_ptr)
1022 {
1023 int result;
1024 struct index_save_layout *isl;
1025
1026 isl = select_oldest_index_save_layout(layout);
1027 result = invalidate_old_save(layout, isl);
1028 if (result != UDS_SUCCESS)
1029 return result;
1030
1031 instantiate_index_save_layout(isl, &layout->super, layout->index.nonce,
1032 zone_count);
1033
1034 *isl_ptr = isl;
1035 return UDS_SUCCESS;
1036 }
1037
cancel_uds_index_save(struct index_save_layout * isl)1038 static void cancel_uds_index_save(struct index_save_layout *isl)
1039 {
1040 memset(&isl->save_data, 0, sizeof(isl->save_data));
1041 memset(&isl->state_data, 0, sizeof(isl->state_data));
1042 isl->zone_count = 0;
1043 }
1044
uds_save_index_state(struct index_layout * layout,struct uds_index * index)1045 int uds_save_index_state(struct index_layout *layout, struct uds_index *index)
1046 {
1047 int result;
1048 unsigned int zone;
1049 struct index_save_layout *isl;
1050 struct buffered_writer *writers[MAX_ZONES];
1051
1052 result = setup_uds_index_save_slot(layout, index->zone_count, &isl);
1053 if (result != UDS_SUCCESS)
1054 return result;
1055
1056 isl->state_data = (struct index_state_data301) {
1057 .newest_chapter = index->newest_virtual_chapter,
1058 .oldest_chapter = index->oldest_virtual_chapter,
1059 .last_save = index->last_save,
1060 };
1061
1062 result = open_region_writer(layout, &isl->open_chapter, &writers[0]);
1063 if (result != UDS_SUCCESS) {
1064 cancel_uds_index_save(isl);
1065 return result;
1066 }
1067
1068 result = uds_save_open_chapter(index, writers[0]);
1069 uds_free_buffered_writer(writers[0]);
1070 if (result != UDS_SUCCESS) {
1071 cancel_uds_index_save(isl);
1072 return result;
1073 }
1074
1075 for (zone = 0; zone < index->zone_count; zone++) {
1076 result = open_region_writer(layout, &isl->volume_index_zones[zone],
1077 &writers[zone]);
1078 if (result != UDS_SUCCESS) {
1079 for (; zone > 0; zone--)
1080 uds_free_buffered_writer(writers[zone - 1]);
1081
1082 cancel_uds_index_save(isl);
1083 return result;
1084 }
1085 }
1086
1087 result = uds_save_volume_index(index->volume_index, writers, index->zone_count);
1088 for (zone = 0; zone < index->zone_count; zone++)
1089 uds_free_buffered_writer(writers[zone]);
1090 if (result != UDS_SUCCESS) {
1091 cancel_uds_index_save(isl);
1092 return result;
1093 }
1094
1095 result = open_region_writer(layout, &isl->index_page_map, &writers[0]);
1096 if (result != UDS_SUCCESS) {
1097 cancel_uds_index_save(isl);
1098 return result;
1099 }
1100
1101 result = uds_write_index_page_map(index->volume->index_page_map, writers[0]);
1102 uds_free_buffered_writer(writers[0]);
1103 if (result != UDS_SUCCESS) {
1104 cancel_uds_index_save(isl);
1105 return result;
1106 }
1107
1108 return write_index_save_layout(layout, isl);
1109 }
1110
load_region_table(struct buffered_reader * reader,struct region_table ** table_ptr)1111 static int __must_check load_region_table(struct buffered_reader *reader,
1112 struct region_table **table_ptr)
1113 {
1114 int result;
1115 unsigned int i;
1116 struct region_header header;
1117 struct region_table *table;
1118 u8 buffer[sizeof(struct region_header)];
1119 size_t offset = 0;
1120
1121 result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1122 if (result != UDS_SUCCESS)
1123 return vdo_log_error_strerror(result, "cannot read region table header");
1124
1125 decode_u64_le(buffer, &offset, &header.magic);
1126 decode_u64_le(buffer, &offset, &header.region_blocks);
1127 decode_u16_le(buffer, &offset, &header.type);
1128 decode_u16_le(buffer, &offset, &header.version);
1129 decode_u16_le(buffer, &offset, &header.region_count);
1130 decode_u16_le(buffer, &offset, &header.payload);
1131
1132 if (header.magic != REGION_MAGIC)
1133 return UDS_NO_INDEX;
1134
1135 if (header.version != 1) {
1136 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1137 "unknown region table version %hu",
1138 header.version);
1139 }
1140
1141 result = vdo_allocate_extended(struct region_table, header.region_count,
1142 struct layout_region,
1143 "single file layout region table", &table);
1144 if (result != VDO_SUCCESS)
1145 return result;
1146
1147 table->header = header;
1148 for (i = 0; i < header.region_count; i++) {
1149 u8 region_buffer[sizeof(struct layout_region)];
1150
1151 offset = 0;
1152 result = uds_read_from_buffered_reader(reader, region_buffer,
1153 sizeof(region_buffer));
1154 if (result != UDS_SUCCESS) {
1155 vdo_free(table);
1156 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1157 "cannot read region table layouts");
1158 }
1159
1160 decode_u64_le(region_buffer, &offset, &table->regions[i].start_block);
1161 decode_u64_le(region_buffer, &offset, &table->regions[i].block_count);
1162 offset += sizeof(u32);
1163 decode_u16_le(region_buffer, &offset, &table->regions[i].kind);
1164 decode_u16_le(region_buffer, &offset, &table->regions[i].instance);
1165 }
1166
1167 *table_ptr = table;
1168 return UDS_SUCCESS;
1169 }
1170
read_super_block_data(struct buffered_reader * reader,struct index_layout * layout,size_t saved_size)1171 static int __must_check read_super_block_data(struct buffered_reader *reader,
1172 struct index_layout *layout,
1173 size_t saved_size)
1174 {
1175 int result;
1176 struct super_block_data *super = &layout->super;
1177 u8 *buffer;
1178 size_t offset = 0;
1179
1180 result = vdo_allocate(saved_size, u8, "super block data", &buffer);
1181 if (result != VDO_SUCCESS)
1182 return result;
1183
1184 result = uds_read_from_buffered_reader(reader, buffer, saved_size);
1185 if (result != UDS_SUCCESS) {
1186 vdo_free(buffer);
1187 return vdo_log_error_strerror(result, "cannot read region table header");
1188 }
1189
1190 memcpy(&super->magic_label, buffer, MAGIC_SIZE);
1191 offset += MAGIC_SIZE;
1192 memcpy(&super->nonce_info, buffer + offset, NONCE_INFO_SIZE);
1193 offset += NONCE_INFO_SIZE;
1194 decode_u64_le(buffer, &offset, &super->nonce);
1195 decode_u32_le(buffer, &offset, &super->version);
1196 decode_u32_le(buffer, &offset, &super->block_size);
1197 decode_u16_le(buffer, &offset, &super->index_count);
1198 decode_u16_le(buffer, &offset, &super->max_saves);
1199 offset += sizeof(u32);
1200 decode_u64_le(buffer, &offset, &super->open_chapter_blocks);
1201 decode_u64_le(buffer, &offset, &super->page_map_blocks);
1202
1203 if (is_converted_super_block(super)) {
1204 decode_u64_le(buffer, &offset, &super->volume_offset);
1205 decode_u64_le(buffer, &offset, &super->start_offset);
1206 } else {
1207 super->volume_offset = 0;
1208 super->start_offset = 0;
1209 }
1210
1211 vdo_free(buffer);
1212
1213 if (memcmp(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE) != 0)
1214 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1215 "unknown superblock magic label");
1216
1217 if ((super->version < SUPER_VERSION_MINIMUM) ||
1218 (super->version == 4) || (super->version == 5) || (super->version == 6) ||
1219 (super->version > SUPER_VERSION_MAXIMUM)) {
1220 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1221 "unknown superblock version number %u",
1222 super->version);
1223 }
1224
1225 if (super->volume_offset < super->start_offset) {
1226 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1227 "inconsistent offsets (start %llu, volume %llu)",
1228 (unsigned long long) super->start_offset,
1229 (unsigned long long) super->volume_offset);
1230 }
1231
1232 /* Sub-indexes are no longer used but the layout retains this field. */
1233 if (super->index_count != 1) {
1234 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1235 "invalid subindex count %u",
1236 super->index_count);
1237 }
1238
1239 if (generate_primary_nonce(super->nonce_info, sizeof(super->nonce_info)) != super->nonce) {
1240 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1241 "inconsistent superblock nonce");
1242 }
1243
1244 return UDS_SUCCESS;
1245 }
1246
verify_region(struct layout_region * lr,u64 start_block,enum region_kind kind,unsigned int instance)1247 static int __must_check verify_region(struct layout_region *lr, u64 start_block,
1248 enum region_kind kind, unsigned int instance)
1249 {
1250 if (lr->start_block != start_block)
1251 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1252 "incorrect layout region offset");
1253
1254 if (lr->kind != kind)
1255 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1256 "incorrect layout region kind");
1257
1258 if (lr->instance != instance) {
1259 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1260 "incorrect layout region instance");
1261 }
1262
1263 return UDS_SUCCESS;
1264 }
1265
verify_sub_index(struct index_layout * layout,u64 start_block,struct region_table * table)1266 static int __must_check verify_sub_index(struct index_layout *layout, u64 start_block,
1267 struct region_table *table)
1268 {
1269 int result;
1270 unsigned int i;
1271 struct sub_index_layout *sil = &layout->index;
1272 u64 next_block = start_block;
1273
1274 sil->sub_index = table->regions[2];
1275 result = verify_region(&sil->sub_index, next_block, RL_KIND_INDEX, 0);
1276 if (result != UDS_SUCCESS)
1277 return result;
1278
1279 define_sub_index_nonce(layout);
1280
1281 sil->volume = table->regions[3];
1282 result = verify_region(&sil->volume, next_block, RL_KIND_VOLUME,
1283 RL_SOLE_INSTANCE);
1284 if (result != UDS_SUCCESS)
1285 return result;
1286
1287 next_block += sil->volume.block_count + layout->super.volume_offset;
1288
1289 for (i = 0; i < layout->super.max_saves; i++) {
1290 sil->saves[i].index_save = table->regions[i + 4];
1291 result = verify_region(&sil->saves[i].index_save, next_block,
1292 RL_KIND_SAVE, i);
1293 if (result != UDS_SUCCESS)
1294 return result;
1295
1296 next_block += sil->saves[i].index_save.block_count;
1297 }
1298
1299 next_block -= layout->super.volume_offset;
1300 if (next_block != start_block + sil->sub_index.block_count) {
1301 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1302 "sub index region does not span all saves");
1303 }
1304
1305 return UDS_SUCCESS;
1306 }
1307
reconstitute_layout(struct index_layout * layout,struct region_table * table,u64 first_block)1308 static int __must_check reconstitute_layout(struct index_layout *layout,
1309 struct region_table *table, u64 first_block)
1310 {
1311 int result;
1312 u64 next_block = first_block;
1313
1314 result = vdo_allocate(layout->super.max_saves, struct index_save_layout,
1315 __func__, &layout->index.saves);
1316 if (result != VDO_SUCCESS)
1317 return result;
1318
1319 layout->total_blocks = table->header.region_blocks;
1320
1321 layout->header = table->regions[0];
1322 result = verify_region(&layout->header, next_block++, RL_KIND_HEADER,
1323 RL_SOLE_INSTANCE);
1324 if (result != UDS_SUCCESS)
1325 return result;
1326
1327 layout->config = table->regions[1];
1328 result = verify_region(&layout->config, next_block++, RL_KIND_CONFIG,
1329 RL_SOLE_INSTANCE);
1330 if (result != UDS_SUCCESS)
1331 return result;
1332
1333 result = verify_sub_index(layout, next_block, table);
1334 if (result != UDS_SUCCESS)
1335 return result;
1336
1337 next_block += layout->index.sub_index.block_count;
1338
1339 layout->seal = table->regions[table->header.region_count - 1];
1340 result = verify_region(&layout->seal, next_block + layout->super.volume_offset,
1341 RL_KIND_SEAL, RL_SOLE_INSTANCE);
1342 if (result != UDS_SUCCESS)
1343 return result;
1344
1345 if (++next_block != (first_block + layout->total_blocks)) {
1346 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1347 "layout table does not span total blocks");
1348 }
1349
1350 return UDS_SUCCESS;
1351 }
1352
load_super_block(struct index_layout * layout,size_t block_size,u64 first_block,struct buffered_reader * reader)1353 static int __must_check load_super_block(struct index_layout *layout, size_t block_size,
1354 u64 first_block, struct buffered_reader *reader)
1355 {
1356 int result;
1357 struct region_table *table = NULL;
1358 struct super_block_data *super = &layout->super;
1359
1360 result = load_region_table(reader, &table);
1361 if (result != UDS_SUCCESS)
1362 return result;
1363
1364 if (table->header.type != RH_TYPE_SUPER) {
1365 vdo_free(table);
1366 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1367 "not a superblock region table");
1368 }
1369
1370 result = read_super_block_data(reader, layout, table->header.payload);
1371 if (result != UDS_SUCCESS) {
1372 vdo_free(table);
1373 return vdo_log_error_strerror(result, "unknown superblock format");
1374 }
1375
1376 if (super->block_size != block_size) {
1377 vdo_free(table);
1378 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1379 "superblock saved block_size %u differs from supplied block_size %zu",
1380 super->block_size, block_size);
1381 }
1382
1383 first_block -= (super->volume_offset - super->start_offset);
1384 result = reconstitute_layout(layout, table, first_block);
1385 vdo_free(table);
1386 return result;
1387 }
1388
read_index_save_data(struct buffered_reader * reader,struct index_save_layout * isl,size_t saved_size)1389 static int __must_check read_index_save_data(struct buffered_reader *reader,
1390 struct index_save_layout *isl,
1391 size_t saved_size)
1392 {
1393 int result;
1394 struct index_state_version file_version;
1395 u8 buffer[sizeof(struct index_save_data) + sizeof(struct index_state_data301)];
1396 size_t offset = 0;
1397
1398 if (saved_size != sizeof(buffer)) {
1399 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1400 "unexpected index save data size %zu",
1401 saved_size);
1402 }
1403
1404 result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1405 if (result != UDS_SUCCESS)
1406 return vdo_log_error_strerror(result, "cannot read index save data");
1407
1408 decode_u64_le(buffer, &offset, &isl->save_data.timestamp);
1409 decode_u64_le(buffer, &offset, &isl->save_data.nonce);
1410 decode_u32_le(buffer, &offset, &isl->save_data.version);
1411 offset += sizeof(u32);
1412
1413 if (isl->save_data.version > 1) {
1414 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1415 "unknown index save version number %u",
1416 isl->save_data.version);
1417 }
1418
1419 decode_s32_le(buffer, &offset, &file_version.signature);
1420 decode_s32_le(buffer, &offset, &file_version.version_id);
1421
1422 if ((file_version.signature != INDEX_STATE_VERSION_301.signature) ||
1423 (file_version.version_id != INDEX_STATE_VERSION_301.version_id)) {
1424 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1425 "index state version %d,%d is unsupported",
1426 file_version.signature,
1427 file_version.version_id);
1428 }
1429
1430 decode_u64_le(buffer, &offset, &isl->state_data.newest_chapter);
1431 decode_u64_le(buffer, &offset, &isl->state_data.oldest_chapter);
1432 decode_u64_le(buffer, &offset, &isl->state_data.last_save);
1433 /* Skip past some historical fields that are now unused */
1434 offset += sizeof(u32) + sizeof(u32);
1435 return UDS_SUCCESS;
1436 }
1437
reconstruct_index_save(struct index_save_layout * isl,struct region_table * table)1438 static int __must_check reconstruct_index_save(struct index_save_layout *isl,
1439 struct region_table *table)
1440 {
1441 int result;
1442 unsigned int z;
1443 struct layout_region *last_region;
1444 u64 next_block = isl->index_save.start_block;
1445 u64 last_block = next_block + isl->index_save.block_count;
1446
1447 isl->zone_count = table->header.region_count - 3;
1448
1449 last_region = &table->regions[table->header.region_count - 1];
1450 if (last_region->kind == RL_KIND_EMPTY) {
1451 isl->free_space = *last_region;
1452 isl->zone_count--;
1453 } else {
1454 isl->free_space = (struct layout_region) {
1455 .start_block = last_block,
1456 .block_count = 0,
1457 .kind = RL_KIND_EMPTY,
1458 .instance = RL_SOLE_INSTANCE,
1459 };
1460 }
1461
1462 isl->header = table->regions[0];
1463 result = verify_region(&isl->header, next_block++, RL_KIND_HEADER,
1464 RL_SOLE_INSTANCE);
1465 if (result != UDS_SUCCESS)
1466 return result;
1467
1468 isl->index_page_map = table->regions[1];
1469 result = verify_region(&isl->index_page_map, next_block, RL_KIND_INDEX_PAGE_MAP,
1470 RL_SOLE_INSTANCE);
1471 if (result != UDS_SUCCESS)
1472 return result;
1473
1474 next_block += isl->index_page_map.block_count;
1475
1476 for (z = 0; z < isl->zone_count; z++) {
1477 isl->volume_index_zones[z] = table->regions[z + 2];
1478 result = verify_region(&isl->volume_index_zones[z], next_block,
1479 RL_KIND_VOLUME_INDEX, z);
1480 if (result != UDS_SUCCESS)
1481 return result;
1482
1483 next_block += isl->volume_index_zones[z].block_count;
1484 }
1485
1486 isl->open_chapter = table->regions[isl->zone_count + 2];
1487 result = verify_region(&isl->open_chapter, next_block, RL_KIND_OPEN_CHAPTER,
1488 RL_SOLE_INSTANCE);
1489 if (result != UDS_SUCCESS)
1490 return result;
1491
1492 next_block += isl->open_chapter.block_count;
1493
1494 result = verify_region(&isl->free_space, next_block, RL_KIND_EMPTY,
1495 RL_SOLE_INSTANCE);
1496 if (result != UDS_SUCCESS)
1497 return result;
1498
1499 next_block += isl->free_space.block_count;
1500 if (next_block != last_block) {
1501 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1502 "index save layout table incomplete");
1503 }
1504
1505 return UDS_SUCCESS;
1506 }
1507
load_index_save(struct index_save_layout * isl,struct buffered_reader * reader,unsigned int instance)1508 static int __must_check load_index_save(struct index_save_layout *isl,
1509 struct buffered_reader *reader,
1510 unsigned int instance)
1511 {
1512 int result;
1513 struct region_table *table = NULL;
1514
1515 result = load_region_table(reader, &table);
1516 if (result != UDS_SUCCESS) {
1517 return vdo_log_error_strerror(result, "cannot read index save %u header",
1518 instance);
1519 }
1520
1521 if (table->header.region_blocks != isl->index_save.block_count) {
1522 u64 region_blocks = table->header.region_blocks;
1523
1524 vdo_free(table);
1525 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1526 "unexpected index save %u region block count %llu",
1527 instance,
1528 (unsigned long long) region_blocks);
1529 }
1530
1531 if (table->header.type == RH_TYPE_UNSAVED) {
1532 vdo_free(table);
1533 reset_index_save_layout(isl, 0);
1534 return UDS_SUCCESS;
1535 }
1536
1537
1538 if (table->header.type != RH_TYPE_SAVE) {
1539 vdo_log_error_strerror(UDS_CORRUPT_DATA,
1540 "unexpected index save %u header type %u",
1541 instance, table->header.type);
1542 vdo_free(table);
1543 return UDS_CORRUPT_DATA;
1544 }
1545
1546 result = read_index_save_data(reader, isl, table->header.payload);
1547 if (result != UDS_SUCCESS) {
1548 vdo_free(table);
1549 return vdo_log_error_strerror(result,
1550 "unknown index save %u data format",
1551 instance);
1552 }
1553
1554 result = reconstruct_index_save(isl, table);
1555 vdo_free(table);
1556 if (result != UDS_SUCCESS) {
1557 return vdo_log_error_strerror(result, "cannot reconstruct index save %u",
1558 instance);
1559 }
1560
1561 return UDS_SUCCESS;
1562 }
1563
load_sub_index_regions(struct index_layout * layout)1564 static int __must_check load_sub_index_regions(struct index_layout *layout)
1565 {
1566 int result;
1567 unsigned int j;
1568 struct index_save_layout *isl;
1569 struct buffered_reader *reader;
1570
1571 for (j = 0; j < layout->super.max_saves; j++) {
1572 isl = &layout->index.saves[j];
1573 result = open_region_reader(layout, &isl->index_save, &reader);
1574
1575 if (result != UDS_SUCCESS) {
1576 vdo_log_error_strerror(result,
1577 "cannot get reader for index 0 save %u",
1578 j);
1579 return result;
1580 }
1581
1582 result = load_index_save(isl, reader, j);
1583 uds_free_buffered_reader(reader);
1584 if (result != UDS_SUCCESS) {
1585 /* Another save slot might be valid. */
1586 reset_index_save_layout(isl, 0);
1587 continue;
1588 }
1589 }
1590
1591 return UDS_SUCCESS;
1592 }
1593
verify_uds_index_config(struct index_layout * layout,struct uds_configuration * config)1594 static int __must_check verify_uds_index_config(struct index_layout *layout,
1595 struct uds_configuration *config)
1596 {
1597 int result;
1598 struct buffered_reader *reader = NULL;
1599 u64 offset;
1600
1601 offset = layout->super.volume_offset - layout->super.start_offset;
1602 result = open_layout_reader(layout, &layout->config, offset, &reader);
1603 if (result != UDS_SUCCESS)
1604 return vdo_log_error_strerror(result, "failed to open config reader");
1605
1606 result = uds_validate_config_contents(reader, config);
1607 if (result != UDS_SUCCESS) {
1608 uds_free_buffered_reader(reader);
1609 return vdo_log_error_strerror(result, "failed to read config region");
1610 }
1611
1612 uds_free_buffered_reader(reader);
1613 return UDS_SUCCESS;
1614 }
1615
load_index_layout(struct index_layout * layout,struct uds_configuration * config)1616 static int load_index_layout(struct index_layout *layout, struct uds_configuration *config)
1617 {
1618 int result;
1619 struct buffered_reader *reader;
1620
1621 result = uds_make_buffered_reader(layout->factory,
1622 layout->offset / UDS_BLOCK_SIZE, 1, &reader);
1623 if (result != UDS_SUCCESS)
1624 return vdo_log_error_strerror(result, "unable to read superblock");
1625
1626 result = load_super_block(layout, UDS_BLOCK_SIZE,
1627 layout->offset / UDS_BLOCK_SIZE, reader);
1628 uds_free_buffered_reader(reader);
1629 if (result != UDS_SUCCESS)
1630 return result;
1631
1632 result = verify_uds_index_config(layout, config);
1633 if (result != UDS_SUCCESS)
1634 return result;
1635
1636 return load_sub_index_regions(layout);
1637 }
1638
create_layout_factory(struct index_layout * layout,const struct uds_configuration * config)1639 static int create_layout_factory(struct index_layout *layout,
1640 const struct uds_configuration *config)
1641 {
1642 int result;
1643 size_t writable_size;
1644 struct io_factory *factory = NULL;
1645
1646 result = uds_make_io_factory(config->bdev, &factory);
1647 if (result != UDS_SUCCESS)
1648 return result;
1649
1650 writable_size = uds_get_writable_size(factory) & -UDS_BLOCK_SIZE;
1651 if (writable_size < config->size + config->offset) {
1652 uds_put_io_factory(factory);
1653 vdo_log_error("index storage (%zu) is smaller than the requested size %zu",
1654 writable_size, config->size + config->offset);
1655 return -ENOSPC;
1656 }
1657
1658 layout->factory = factory;
1659 layout->factory_size = (config->size > 0) ? config->size : writable_size;
1660 layout->offset = config->offset;
1661 return UDS_SUCCESS;
1662 }
1663
uds_make_index_layout(struct uds_configuration * config,bool new_layout,struct index_layout ** layout_ptr)1664 int uds_make_index_layout(struct uds_configuration *config, bool new_layout,
1665 struct index_layout **layout_ptr)
1666 {
1667 int result;
1668 struct index_layout *layout = NULL;
1669 struct save_layout_sizes sizes;
1670
1671 result = compute_sizes(config, &sizes);
1672 if (result != UDS_SUCCESS)
1673 return result;
1674
1675 result = vdo_allocate(1, struct index_layout, __func__, &layout);
1676 if (result != VDO_SUCCESS)
1677 return result;
1678
1679 result = create_layout_factory(layout, config);
1680 if (result != UDS_SUCCESS) {
1681 uds_free_index_layout(layout);
1682 return result;
1683 }
1684
1685 if (layout->factory_size < sizes.total_size) {
1686 vdo_log_error("index storage (%zu) is smaller than the required size %llu",
1687 layout->factory_size,
1688 (unsigned long long) sizes.total_size);
1689 uds_free_index_layout(layout);
1690 return -ENOSPC;
1691 }
1692
1693 if (new_layout)
1694 result = create_index_layout(layout, config);
1695 else
1696 result = load_index_layout(layout, config);
1697 if (result != UDS_SUCCESS) {
1698 uds_free_index_layout(layout);
1699 return result;
1700 }
1701
1702 *layout_ptr = layout;
1703 return UDS_SUCCESS;
1704 }
1705
uds_free_index_layout(struct index_layout * layout)1706 void uds_free_index_layout(struct index_layout *layout)
1707 {
1708 if (layout == NULL)
1709 return;
1710
1711 vdo_free(layout->index.saves);
1712 if (layout->factory != NULL)
1713 uds_put_io_factory(layout->factory);
1714
1715 vdo_free(layout);
1716 }
1717
uds_replace_index_layout_storage(struct index_layout * layout,struct block_device * bdev)1718 int uds_replace_index_layout_storage(struct index_layout *layout,
1719 struct block_device *bdev)
1720 {
1721 return uds_replace_storage(layout->factory, bdev);
1722 }
1723
1724 /* Obtain a dm_bufio_client for the volume region. */
uds_open_volume_bufio(struct index_layout * layout,size_t block_size,unsigned int reserved_buffers,struct dm_bufio_client ** client_ptr)1725 int uds_open_volume_bufio(struct index_layout *layout, size_t block_size,
1726 unsigned int reserved_buffers,
1727 struct dm_bufio_client **client_ptr)
1728 {
1729 off_t offset = (layout->index.volume.start_block +
1730 layout->super.volume_offset -
1731 layout->super.start_offset);
1732
1733 return uds_make_bufio(layout->factory, offset, block_size, reserved_buffers,
1734 client_ptr);
1735 }
1736
uds_get_volume_nonce(struct index_layout * layout)1737 u64 uds_get_volume_nonce(struct index_layout *layout)
1738 {
1739 return layout->index.nonce;
1740 }
1741