1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright 2023 Red Hat 4 */ 5 6 #include <linux/atomic.h> 7 #include <linux/bitops.h> 8 #include <linux/completion.h> 9 #include <linux/delay.h> 10 #include <linux/device-mapper.h> 11 #include <linux/err.h> 12 #include <linux/module.h> 13 #include <linux/mutex.h> 14 #include <linux/spinlock.h> 15 16 #include "admin-state.h" 17 #include "block-map.h" 18 #include "completion.h" 19 #include "constants.h" 20 #include "data-vio.h" 21 #include "dedupe.h" 22 #include "dump.h" 23 #include "encodings.h" 24 #include "errors.h" 25 #include "flush.h" 26 #include "io-submitter.h" 27 #include "logger.h" 28 #include "memory-alloc.h" 29 #include "message-stats.h" 30 #include "recovery-journal.h" 31 #include "repair.h" 32 #include "slab-depot.h" 33 #include "status-codes.h" 34 #include "string-utils.h" 35 #include "thread-device.h" 36 #include "thread-registry.h" 37 #include "thread-utils.h" 38 #include "types.h" 39 #include "vdo.h" 40 #include "vio.h" 41 42 enum admin_phases { 43 GROW_LOGICAL_PHASE_START, 44 GROW_LOGICAL_PHASE_GROW_BLOCK_MAP, 45 GROW_LOGICAL_PHASE_END, 46 GROW_LOGICAL_PHASE_ERROR, 47 GROW_PHYSICAL_PHASE_START, 48 GROW_PHYSICAL_PHASE_COPY_SUMMARY, 49 GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS, 50 GROW_PHYSICAL_PHASE_USE_NEW_SLABS, 51 GROW_PHYSICAL_PHASE_END, 52 GROW_PHYSICAL_PHASE_ERROR, 53 LOAD_PHASE_START, 54 LOAD_PHASE_LOAD_DEPOT, 55 LOAD_PHASE_MAKE_DIRTY, 56 LOAD_PHASE_PREPARE_TO_ALLOCATE, 57 LOAD_PHASE_SCRUB_SLABS, 58 LOAD_PHASE_DATA_REDUCTION, 59 LOAD_PHASE_FINISHED, 60 LOAD_PHASE_DRAIN_JOURNAL, 61 LOAD_PHASE_WAIT_FOR_READ_ONLY, 62 PRE_LOAD_PHASE_START, 63 PRE_LOAD_PHASE_LOAD_COMPONENTS, 64 PRE_LOAD_PHASE_END, 65 PREPARE_GROW_PHYSICAL_PHASE_START, 66 RESUME_PHASE_START, 67 RESUME_PHASE_ALLOW_READ_ONLY_MODE, 68 RESUME_PHASE_DEDUPE, 69 RESUME_PHASE_DEPOT, 70 RESUME_PHASE_JOURNAL, 71 RESUME_PHASE_BLOCK_MAP, 72 RESUME_PHASE_LOGICAL_ZONES, 73 RESUME_PHASE_PACKER, 74 RESUME_PHASE_FLUSHER, 75 RESUME_PHASE_DATA_VIOS, 76 RESUME_PHASE_END, 77 SUSPEND_PHASE_START, 78 SUSPEND_PHASE_PACKER, 79 SUSPEND_PHASE_DATA_VIOS, 80 SUSPEND_PHASE_DEDUPE, 81 SUSPEND_PHASE_FLUSHES, 82 SUSPEND_PHASE_LOGICAL_ZONES, 83 SUSPEND_PHASE_BLOCK_MAP, 84 SUSPEND_PHASE_JOURNAL, 85 SUSPEND_PHASE_DEPOT, 86 SUSPEND_PHASE_READ_ONLY_WAIT, 87 SUSPEND_PHASE_WRITE_SUPER_BLOCK, 88 SUSPEND_PHASE_END, 89 }; 90 91 static const char * const ADMIN_PHASE_NAMES[] = { 92 "GROW_LOGICAL_PHASE_START", 93 "GROW_LOGICAL_PHASE_GROW_BLOCK_MAP", 94 "GROW_LOGICAL_PHASE_END", 95 "GROW_LOGICAL_PHASE_ERROR", 96 "GROW_PHYSICAL_PHASE_START", 97 "GROW_PHYSICAL_PHASE_COPY_SUMMARY", 98 "GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS", 99 "GROW_PHYSICAL_PHASE_USE_NEW_SLABS", 100 "GROW_PHYSICAL_PHASE_END", 101 "GROW_PHYSICAL_PHASE_ERROR", 102 "LOAD_PHASE_START", 103 "LOAD_PHASE_LOAD_DEPOT", 104 "LOAD_PHASE_MAKE_DIRTY", 105 "LOAD_PHASE_PREPARE_TO_ALLOCATE", 106 "LOAD_PHASE_SCRUB_SLABS", 107 "LOAD_PHASE_DATA_REDUCTION", 108 "LOAD_PHASE_FINISHED", 109 "LOAD_PHASE_DRAIN_JOURNAL", 110 "LOAD_PHASE_WAIT_FOR_READ_ONLY", 111 "PRE_LOAD_PHASE_START", 112 "PRE_LOAD_PHASE_LOAD_COMPONENTS", 113 "PRE_LOAD_PHASE_END", 114 "PREPARE_GROW_PHYSICAL_PHASE_START", 115 "RESUME_PHASE_START", 116 "RESUME_PHASE_ALLOW_READ_ONLY_MODE", 117 "RESUME_PHASE_DEDUPE", 118 "RESUME_PHASE_DEPOT", 119 "RESUME_PHASE_JOURNAL", 120 "RESUME_PHASE_BLOCK_MAP", 121 "RESUME_PHASE_LOGICAL_ZONES", 122 "RESUME_PHASE_PACKER", 123 "RESUME_PHASE_FLUSHER", 124 "RESUME_PHASE_DATA_VIOS", 125 "RESUME_PHASE_END", 126 "SUSPEND_PHASE_START", 127 "SUSPEND_PHASE_PACKER", 128 "SUSPEND_PHASE_DATA_VIOS", 129 "SUSPEND_PHASE_DEDUPE", 130 "SUSPEND_PHASE_FLUSHES", 131 "SUSPEND_PHASE_LOGICAL_ZONES", 132 "SUSPEND_PHASE_BLOCK_MAP", 133 "SUSPEND_PHASE_JOURNAL", 134 "SUSPEND_PHASE_DEPOT", 135 "SUSPEND_PHASE_READ_ONLY_WAIT", 136 "SUSPEND_PHASE_WRITE_SUPER_BLOCK", 137 "SUSPEND_PHASE_END", 138 }; 139 140 /* If we bump this, update the arrays below */ 141 #define TABLE_VERSION 4 142 143 /* arrays for handling different table versions */ 144 static const u8 REQUIRED_ARGC[] = { 10, 12, 9, 7, 6 }; 145 /* pool name no longer used. only here for verification of older versions */ 146 static const u8 POOL_NAME_ARG_INDEX[] = { 8, 10, 8 }; 147 148 /* 149 * Track in-use instance numbers using a flat bit array. 150 * 151 * O(n) run time isn't ideal, but if we have 1000 VDO devices in use simultaneously we still only 152 * need to scan 16 words, so it's not likely to be a big deal compared to other resource usage. 153 */ 154 155 /* 156 * This minimum size for the bit array creates a numbering space of 0-999, which allows 157 * successive starts of the same volume to have different instance numbers in any 158 * reasonably-sized test. Changing instances on restart allows vdoMonReport to detect that 159 * the ephemeral stats have reset to zero. 160 */ 161 #define BIT_COUNT_MINIMUM 1000 162 /* Grow the bit array by this many bits when needed */ 163 #define BIT_COUNT_INCREMENT 100 164 165 struct instance_tracker { 166 unsigned int bit_count; 167 unsigned long *words; 168 unsigned int count; 169 unsigned int next; 170 }; 171 172 static DEFINE_MUTEX(instances_lock); 173 static struct instance_tracker instances; 174 175 /** 176 * free_device_config() - Free a device config created by parse_device_config(). 177 * @config: The config to free. 178 */ 179 static void free_device_config(struct device_config *config) 180 { 181 if (config == NULL) 182 return; 183 184 if (config->owned_device != NULL) 185 dm_put_device(config->owning_target, config->owned_device); 186 187 vdo_free(config->parent_device_name); 188 vdo_free(config->original_string); 189 190 /* Reduce the chance a use-after-free (as in BZ 1669960) happens to work. */ 191 memset(config, 0, sizeof(*config)); 192 vdo_free(config); 193 } 194 195 /** 196 * get_version_number() - Decide the version number from argv. 197 * 198 * @argc: The number of table values. 199 * @argv: The array of table values. 200 * @error_ptr: A pointer to return a error string in. 201 * @version_ptr: A pointer to return the version. 202 * 203 * Return: VDO_SUCCESS or an error code. 204 */ 205 static int get_version_number(int argc, char **argv, char **error_ptr, 206 unsigned int *version_ptr) 207 { 208 /* version, if it exists, is in a form of V<n> */ 209 if (sscanf(argv[0], "V%u", version_ptr) == 1) { 210 if (*version_ptr < 1 || *version_ptr > TABLE_VERSION) { 211 *error_ptr = "Unknown version number detected"; 212 return VDO_BAD_CONFIGURATION; 213 } 214 } else { 215 /* V0 actually has no version number in the table string */ 216 *version_ptr = 0; 217 } 218 219 /* 220 * V0 and V1 have no optional parameters. There will always be a parameter for thread 221 * config, even if it's a "." to show it's an empty list. 222 */ 223 if (*version_ptr <= 1) { 224 if (argc != REQUIRED_ARGC[*version_ptr]) { 225 *error_ptr = "Incorrect number of arguments for version"; 226 return VDO_BAD_CONFIGURATION; 227 } 228 } else if (argc < REQUIRED_ARGC[*version_ptr]) { 229 *error_ptr = "Incorrect number of arguments for version"; 230 return VDO_BAD_CONFIGURATION; 231 } 232 233 if (*version_ptr != TABLE_VERSION) { 234 vdo_log_warning("Detected version mismatch between kernel module and tools kernel: %d, tool: %d", 235 TABLE_VERSION, *version_ptr); 236 vdo_log_warning("Please consider upgrading management tools to match kernel."); 237 } 238 return VDO_SUCCESS; 239 } 240 241 /* Free a list of non-NULL string pointers, and then the list itself. */ 242 static void free_string_array(char **string_array) 243 { 244 unsigned int offset; 245 246 for (offset = 0; string_array[offset] != NULL; offset++) 247 vdo_free(string_array[offset]); 248 vdo_free(string_array); 249 } 250 251 /* 252 * Split the input string into substrings, separated at occurrences of the indicated character, 253 * returning a null-terminated list of string pointers. 254 * 255 * The string pointers and the pointer array itself should both be freed with vdo_free() when no 256 * longer needed. This can be done with vdo_free_string_array (below) if the pointers in the array 257 * are not changed. Since the array and copied strings are allocated by this function, it may only 258 * be used in contexts where allocation is permitted. 259 * 260 * Empty substrings are not ignored; that is, returned substrings may be empty strings if the 261 * separator occurs twice in a row. 262 */ 263 static int split_string(const char *string, char separator, char ***substring_array_ptr) 264 { 265 unsigned int current_substring = 0, substring_count = 1; 266 const char *s; 267 char **substrings; 268 int result; 269 ptrdiff_t length; 270 271 for (s = string; *s != 0; s++) { 272 if (*s == separator) 273 substring_count++; 274 } 275 276 result = vdo_allocate(substring_count + 1, char *, "string-splitting array", 277 &substrings); 278 if (result != VDO_SUCCESS) 279 return result; 280 281 for (s = string; *s != 0; s++) { 282 if (*s == separator) { 283 ptrdiff_t length = s - string; 284 285 result = vdo_allocate(length + 1, char, "split string", 286 &substrings[current_substring]); 287 if (result != VDO_SUCCESS) { 288 free_string_array(substrings); 289 return result; 290 } 291 /* 292 * Trailing NUL is already in place after allocation; deal with the zero or 293 * more non-NUL bytes in the string. 294 */ 295 if (length > 0) 296 memcpy(substrings[current_substring], string, length); 297 string = s + 1; 298 current_substring++; 299 BUG_ON(current_substring >= substring_count); 300 } 301 } 302 /* Process final string, with no trailing separator. */ 303 BUG_ON(current_substring != (substring_count - 1)); 304 length = strlen(string); 305 306 result = vdo_allocate(length + 1, char, "split string", 307 &substrings[current_substring]); 308 if (result != VDO_SUCCESS) { 309 free_string_array(substrings); 310 return result; 311 } 312 memcpy(substrings[current_substring], string, length); 313 current_substring++; 314 /* substrings[current_substring] is NULL already */ 315 *substring_array_ptr = substrings; 316 return VDO_SUCCESS; 317 } 318 319 /* 320 * Join the input substrings into one string, joined with the indicated character, returning a 321 * string. array_length is a bound on the number of valid elements in substring_array, in case it 322 * is not NULL-terminated. 323 */ 324 static int join_strings(char **substring_array, size_t array_length, char separator, 325 char **string_ptr) 326 { 327 size_t string_length = 0; 328 size_t i; 329 int result; 330 char *output, *current_position; 331 332 for (i = 0; (i < array_length) && (substring_array[i] != NULL); i++) 333 string_length += strlen(substring_array[i]) + 1; 334 335 result = vdo_allocate(string_length, char, __func__, &output); 336 if (result != VDO_SUCCESS) 337 return result; 338 339 current_position = &output[0]; 340 341 for (i = 0; (i < array_length) && (substring_array[i] != NULL); i++) { 342 current_position = vdo_append_to_buffer(current_position, 343 output + string_length, "%s", 344 substring_array[i]); 345 *current_position = separator; 346 current_position++; 347 } 348 349 /* We output one too many separators; replace the last with a zero byte. */ 350 if (current_position != output) 351 *(current_position - 1) = '\0'; 352 353 *string_ptr = output; 354 return VDO_SUCCESS; 355 } 356 357 /** 358 * parse_bool() - Parse a two-valued option into a bool. 359 * @bool_str: The string value to convert to a bool. 360 * @true_str: The string value which should be converted to true. 361 * @false_str: The string value which should be converted to false. 362 * @bool_ptr: A pointer to return the bool value in. 363 * 364 * Return: VDO_SUCCESS or an error if bool_str is neither true_str nor false_str. 365 */ 366 static inline int __must_check parse_bool(const char *bool_str, const char *true_str, 367 const char *false_str, bool *bool_ptr) 368 { 369 bool value = false; 370 371 if (strcmp(bool_str, true_str) == 0) 372 value = true; 373 else if (strcmp(bool_str, false_str) == 0) 374 value = false; 375 else 376 return VDO_BAD_CONFIGURATION; 377 378 *bool_ptr = value; 379 return VDO_SUCCESS; 380 } 381 382 /** 383 * process_one_thread_config_spec() - Process one component of a thread parameter configuration 384 * string and update the configuration data structure. 385 * @thread_param_type: The type of thread specified. 386 * @count: The thread count requested. 387 * @config: The configuration data structure to update. 388 * 389 * If the thread count requested is invalid, a message is logged and -EINVAL returned. If the 390 * thread name is unknown, a message is logged but no error is returned. 391 * 392 * Return: VDO_SUCCESS or -EINVAL 393 */ 394 static int process_one_thread_config_spec(const char *thread_param_type, 395 unsigned int count, 396 struct thread_count_config *config) 397 { 398 /* Handle limited thread parameters */ 399 if (strcmp(thread_param_type, "bioRotationInterval") == 0) { 400 if (count == 0) { 401 vdo_log_error("thread config string error: 'bioRotationInterval' of at least 1 is required"); 402 return -EINVAL; 403 } else if (count > VDO_BIO_ROTATION_INTERVAL_LIMIT) { 404 vdo_log_error("thread config string error: 'bioRotationInterval' cannot be higher than %d", 405 VDO_BIO_ROTATION_INTERVAL_LIMIT); 406 return -EINVAL; 407 } 408 config->bio_rotation_interval = count; 409 return VDO_SUCCESS; 410 } 411 if (strcmp(thread_param_type, "logical") == 0) { 412 if (count > MAX_VDO_LOGICAL_ZONES) { 413 vdo_log_error("thread config string error: at most %d 'logical' threads are allowed", 414 MAX_VDO_LOGICAL_ZONES); 415 return -EINVAL; 416 } 417 config->logical_zones = count; 418 return VDO_SUCCESS; 419 } 420 if (strcmp(thread_param_type, "physical") == 0) { 421 if (count > MAX_VDO_PHYSICAL_ZONES) { 422 vdo_log_error("thread config string error: at most %d 'physical' threads are allowed", 423 MAX_VDO_PHYSICAL_ZONES); 424 return -EINVAL; 425 } 426 config->physical_zones = count; 427 return VDO_SUCCESS; 428 } 429 /* Handle other thread count parameters */ 430 if (count > MAXIMUM_VDO_THREADS) { 431 vdo_log_error("thread config string error: at most %d '%s' threads are allowed", 432 MAXIMUM_VDO_THREADS, thread_param_type); 433 return -EINVAL; 434 } 435 if (strcmp(thread_param_type, "hash") == 0) { 436 config->hash_zones = count; 437 return VDO_SUCCESS; 438 } 439 if (strcmp(thread_param_type, "cpu") == 0) { 440 if (count == 0) { 441 vdo_log_error("thread config string error: at least one 'cpu' thread required"); 442 return -EINVAL; 443 } 444 config->cpu_threads = count; 445 return VDO_SUCCESS; 446 } 447 if (strcmp(thread_param_type, "ack") == 0) { 448 config->bio_ack_threads = count; 449 return VDO_SUCCESS; 450 } 451 if (strcmp(thread_param_type, "bio") == 0) { 452 if (count == 0) { 453 vdo_log_error("thread config string error: at least one 'bio' thread required"); 454 return -EINVAL; 455 } 456 config->bio_threads = count; 457 return VDO_SUCCESS; 458 } 459 460 /* 461 * Don't fail, just log. This will handle version mismatches between user mode tools and 462 * kernel. 463 */ 464 vdo_log_info("unknown thread parameter type \"%s\"", thread_param_type); 465 return VDO_SUCCESS; 466 } 467 468 /** 469 * parse_one_thread_config_spec() - Parse one component of a thread parameter configuration string 470 * and update the configuration data structure. 471 * @spec: The thread parameter specification string. 472 * @config: The configuration data to be updated. 473 */ 474 static int parse_one_thread_config_spec(const char *spec, 475 struct thread_count_config *config) 476 { 477 unsigned int count; 478 char **fields; 479 int result; 480 481 result = split_string(spec, '=', &fields); 482 if (result != VDO_SUCCESS) 483 return result; 484 485 if ((fields[0] == NULL) || (fields[1] == NULL) || (fields[2] != NULL)) { 486 vdo_log_error("thread config string error: expected thread parameter assignment, saw \"%s\"", 487 spec); 488 free_string_array(fields); 489 return -EINVAL; 490 } 491 492 result = kstrtouint(fields[1], 10, &count); 493 if (result) { 494 vdo_log_error("thread config string error: integer value needed, found \"%s\"", 495 fields[1]); 496 free_string_array(fields); 497 return result; 498 } 499 500 result = process_one_thread_config_spec(fields[0], count, config); 501 free_string_array(fields); 502 return result; 503 } 504 505 /** 506 * parse_thread_config_string() - Parse the configuration string passed and update the specified 507 * counts and other parameters of various types of threads to be 508 * created. 509 * @string: Thread parameter configuration string. 510 * @config: The thread configuration data to update. 511 * 512 * The configuration string should contain one or more comma-separated specs of the form 513 * "typename=number"; the supported type names are "cpu", "ack", "bio", "bioRotationInterval", 514 * "logical", "physical", and "hash". 515 * 516 * If an error occurs during parsing of a single key/value pair, we deem it serious enough to stop 517 * further parsing. 518 * 519 * This function can't set the "reason" value the caller wants to pass back, because we'd want to 520 * format it to say which field was invalid, and we can't allocate the "reason" strings 521 * dynamically. So if an error occurs, we'll log the details and pass back an error. 522 * 523 * Return: VDO_SUCCESS or -EINVAL or -ENOMEM 524 */ 525 static int parse_thread_config_string(const char *string, 526 struct thread_count_config *config) 527 { 528 int result = VDO_SUCCESS; 529 char **specs; 530 531 if (strcmp(".", string) != 0) { 532 unsigned int i; 533 534 result = split_string(string, ',', &specs); 535 if (result != VDO_SUCCESS) 536 return result; 537 538 for (i = 0; specs[i] != NULL; i++) { 539 result = parse_one_thread_config_spec(specs[i], config); 540 if (result != VDO_SUCCESS) 541 break; 542 } 543 free_string_array(specs); 544 } 545 return result; 546 } 547 548 /** 549 * process_one_key_value_pair() - Process one component of an optional parameter string and update 550 * the configuration data structure. 551 * @key: The optional parameter key name. 552 * @value: The optional parameter value. 553 * @config: The configuration data structure to update. 554 * 555 * If the value requested is invalid, a message is logged and -EINVAL returned. If the key is 556 * unknown, a message is logged but no error is returned. 557 * 558 * Return: VDO_SUCCESS or -EINVAL 559 */ 560 static int process_one_key_value_pair(const char *key, unsigned int value, 561 struct device_config *config) 562 { 563 /* Non thread optional parameters */ 564 if (strcmp(key, "maxDiscard") == 0) { 565 if (value == 0) { 566 vdo_log_error("optional parameter error: at least one max discard block required"); 567 return -EINVAL; 568 } 569 /* Max discard sectors in blkdev_issue_discard is UINT_MAX >> 9 */ 570 if (value > (UINT_MAX / VDO_BLOCK_SIZE)) { 571 vdo_log_error("optional parameter error: at most %d max discard blocks are allowed", 572 UINT_MAX / VDO_BLOCK_SIZE); 573 return -EINVAL; 574 } 575 config->max_discard_blocks = value; 576 return VDO_SUCCESS; 577 } 578 /* Handles unknown key names */ 579 return process_one_thread_config_spec(key, value, &config->thread_counts); 580 } 581 582 /** 583 * parse_one_key_value_pair() - Parse one key/value pair and update the configuration data 584 * structure. 585 * @key: The optional key name. 586 * @value: The optional value. 587 * @config: The configuration data to be updated. 588 * 589 * Return: VDO_SUCCESS or error. 590 */ 591 static int parse_one_key_value_pair(const char *key, const char *value, 592 struct device_config *config) 593 { 594 unsigned int count; 595 int result; 596 597 if (strcmp(key, "deduplication") == 0) 598 return parse_bool(value, "on", "off", &config->deduplication); 599 600 if (strcmp(key, "compression") == 0) 601 return parse_bool(value, "on", "off", &config->compression); 602 603 /* The remaining arguments must have integral values. */ 604 result = kstrtouint(value, 10, &count); 605 if (result) { 606 vdo_log_error("optional config string error: integer value needed, found \"%s\"", 607 value); 608 return result; 609 } 610 return process_one_key_value_pair(key, count, config); 611 } 612 613 /** 614 * parse_key_value_pairs() - Parse all key/value pairs from a list of arguments. 615 * @argc: The total number of arguments in list. 616 * @argv: The list of key/value pairs. 617 * @config: The device configuration data to update. 618 * 619 * If an error occurs during parsing of a single key/value pair, we deem it serious enough to stop 620 * further parsing. 621 * 622 * This function can't set the "reason" value the caller wants to pass back, because we'd want to 623 * format it to say which field was invalid, and we can't allocate the "reason" strings 624 * dynamically. So if an error occurs, we'll log the details and return the error. 625 * 626 * Return: VDO_SUCCESS or error 627 */ 628 static int parse_key_value_pairs(int argc, char **argv, struct device_config *config) 629 { 630 int result = VDO_SUCCESS; 631 632 while (argc) { 633 result = parse_one_key_value_pair(argv[0], argv[1], config); 634 if (result != VDO_SUCCESS) 635 break; 636 637 argc -= 2; 638 argv += 2; 639 } 640 641 return result; 642 } 643 644 /** 645 * parse_optional_arguments() - Parse the configuration string passed in for optional arguments. 646 * @arg_set: The structure holding the arguments to parse. 647 * @error_ptr: Pointer to a buffer to hold the error string. 648 * @config: Pointer to device configuration data to update. 649 * 650 * For V0/V1 configurations, there will only be one optional parameter; the thread configuration. 651 * The configuration string should contain one or more comma-separated specs of the form 652 * "typename=number"; the supported type names are "cpu", "ack", "bio", "bioRotationInterval", 653 * "logical", "physical", and "hash". 654 * 655 * For V2 configurations and beyond, there could be any number of arguments. They should contain 656 * one or more key/value pairs separated by a space. 657 * 658 * Return: VDO_SUCCESS or error 659 */ 660 static int parse_optional_arguments(struct dm_arg_set *arg_set, char **error_ptr, 661 struct device_config *config) 662 { 663 int result = VDO_SUCCESS; 664 665 if (config->version == 0 || config->version == 1) { 666 result = parse_thread_config_string(arg_set->argv[0], 667 &config->thread_counts); 668 if (result != VDO_SUCCESS) { 669 *error_ptr = "Invalid thread-count configuration"; 670 return VDO_BAD_CONFIGURATION; 671 } 672 } else { 673 if ((arg_set->argc % 2) != 0) { 674 *error_ptr = "Odd number of optional arguments given but they should be <key> <value> pairs"; 675 return VDO_BAD_CONFIGURATION; 676 } 677 result = parse_key_value_pairs(arg_set->argc, arg_set->argv, config); 678 if (result != VDO_SUCCESS) { 679 *error_ptr = "Invalid optional argument configuration"; 680 return VDO_BAD_CONFIGURATION; 681 } 682 } 683 return result; 684 } 685 686 /** 687 * handle_parse_error() - Handle a parsing error. 688 * @config: The config to free. 689 * @error_ptr: A place to store a constant string about the error. 690 * @error_str: A constant string to store in error_ptr. 691 */ 692 static void handle_parse_error(struct device_config *config, char **error_ptr, 693 char *error_str) 694 { 695 free_device_config(config); 696 *error_ptr = error_str; 697 } 698 699 /** 700 * parse_device_config() - Convert the dmsetup table into a struct device_config. 701 * @argc: The number of table values. 702 * @argv: The array of table values. 703 * @ti: The target structure for this table. 704 * @config_ptr: A pointer to return the allocated config. 705 * 706 * Return: VDO_SUCCESS or an error code. 707 */ 708 static int parse_device_config(int argc, char **argv, struct dm_target *ti, 709 struct device_config **config_ptr) 710 { 711 bool enable_512e; 712 size_t logical_bytes = to_bytes(ti->len); 713 struct dm_arg_set arg_set; 714 char **error_ptr = &ti->error; 715 struct device_config *config = NULL; 716 int result; 717 718 if ((logical_bytes % VDO_BLOCK_SIZE) != 0) { 719 handle_parse_error(config, error_ptr, 720 "Logical size must be a multiple of 4096"); 721 return VDO_BAD_CONFIGURATION; 722 } 723 724 if (argc == 0) { 725 handle_parse_error(config, error_ptr, "Incorrect number of arguments"); 726 return VDO_BAD_CONFIGURATION; 727 } 728 729 result = vdo_allocate(1, struct device_config, "device_config", &config); 730 if (result != VDO_SUCCESS) { 731 handle_parse_error(config, error_ptr, 732 "Could not allocate config structure"); 733 return VDO_BAD_CONFIGURATION; 734 } 735 736 config->owning_target = ti; 737 config->logical_blocks = logical_bytes / VDO_BLOCK_SIZE; 738 INIT_LIST_HEAD(&config->config_list); 739 740 /* Save the original string. */ 741 result = join_strings(argv, argc, ' ', &config->original_string); 742 if (result != VDO_SUCCESS) { 743 handle_parse_error(config, error_ptr, "Could not populate string"); 744 return VDO_BAD_CONFIGURATION; 745 } 746 747 vdo_log_info("table line: %s", config->original_string); 748 749 config->thread_counts = (struct thread_count_config) { 750 .bio_ack_threads = 1, 751 .bio_threads = DEFAULT_VDO_BIO_SUBMIT_QUEUE_COUNT, 752 .bio_rotation_interval = DEFAULT_VDO_BIO_SUBMIT_QUEUE_ROTATE_INTERVAL, 753 .cpu_threads = 1, 754 .logical_zones = 0, 755 .physical_zones = 0, 756 .hash_zones = 0, 757 }; 758 config->max_discard_blocks = 1; 759 config->deduplication = true; 760 config->compression = false; 761 762 arg_set.argc = argc; 763 arg_set.argv = argv; 764 765 result = get_version_number(argc, argv, error_ptr, &config->version); 766 if (result != VDO_SUCCESS) { 767 /* get_version_number sets error_ptr itself. */ 768 handle_parse_error(config, error_ptr, *error_ptr); 769 return result; 770 } 771 /* Move the arg pointer forward only if the argument was there. */ 772 if (config->version >= 1) 773 dm_shift_arg(&arg_set); 774 775 result = vdo_duplicate_string(dm_shift_arg(&arg_set), "parent device name", 776 &config->parent_device_name); 777 if (result != VDO_SUCCESS) { 778 handle_parse_error(config, error_ptr, 779 "Could not copy parent device name"); 780 return VDO_BAD_CONFIGURATION; 781 } 782 783 /* Get the physical blocks, if known. */ 784 if (config->version >= 1) { 785 result = kstrtoull(dm_shift_arg(&arg_set), 10, &config->physical_blocks); 786 if (result != VDO_SUCCESS) { 787 handle_parse_error(config, error_ptr, 788 "Invalid physical block count"); 789 return VDO_BAD_CONFIGURATION; 790 } 791 } 792 793 /* Get the logical block size and validate */ 794 result = parse_bool(dm_shift_arg(&arg_set), "512", "4096", &enable_512e); 795 if (result != VDO_SUCCESS) { 796 handle_parse_error(config, error_ptr, "Invalid logical block size"); 797 return VDO_BAD_CONFIGURATION; 798 } 799 config->logical_block_size = (enable_512e ? 512 : 4096); 800 801 /* Skip past the two no longer used read cache options. */ 802 if (config->version <= 1) 803 dm_consume_args(&arg_set, 2); 804 805 /* Get the page cache size. */ 806 result = kstrtouint(dm_shift_arg(&arg_set), 10, &config->cache_size); 807 if (result != VDO_SUCCESS) { 808 handle_parse_error(config, error_ptr, 809 "Invalid block map page cache size"); 810 return VDO_BAD_CONFIGURATION; 811 } 812 813 /* Get the block map era length. */ 814 result = kstrtouint(dm_shift_arg(&arg_set), 10, &config->block_map_maximum_age); 815 if (result != VDO_SUCCESS) { 816 handle_parse_error(config, error_ptr, "Invalid block map maximum age"); 817 return VDO_BAD_CONFIGURATION; 818 } 819 820 /* Skip past the no longer used MD RAID5 optimization mode */ 821 if (config->version <= 2) 822 dm_consume_args(&arg_set, 1); 823 824 /* Skip past the no longer used write policy setting */ 825 if (config->version <= 3) 826 dm_consume_args(&arg_set, 1); 827 828 /* Skip past the no longer used pool name for older table lines */ 829 if (config->version <= 2) { 830 /* 831 * Make sure the enum to get the pool name from argv directly is still in sync with 832 * the parsing of the table line. 833 */ 834 if (&arg_set.argv[0] != &argv[POOL_NAME_ARG_INDEX[config->version]]) { 835 handle_parse_error(config, error_ptr, 836 "Pool name not in expected location"); 837 return VDO_BAD_CONFIGURATION; 838 } 839 dm_shift_arg(&arg_set); 840 } 841 842 /* Get the optional arguments and validate. */ 843 result = parse_optional_arguments(&arg_set, error_ptr, config); 844 if (result != VDO_SUCCESS) { 845 /* parse_optional_arguments sets error_ptr itself. */ 846 handle_parse_error(config, error_ptr, *error_ptr); 847 return result; 848 } 849 850 /* 851 * Logical, physical, and hash zone counts can all be zero; then we get one thread doing 852 * everything, our older configuration. If any zone count is non-zero, the others must be 853 * as well. 854 */ 855 if (((config->thread_counts.logical_zones == 0) != 856 (config->thread_counts.physical_zones == 0)) || 857 ((config->thread_counts.physical_zones == 0) != 858 (config->thread_counts.hash_zones == 0))) { 859 handle_parse_error(config, error_ptr, 860 "Logical, physical, and hash zones counts must all be zero or all non-zero"); 861 return VDO_BAD_CONFIGURATION; 862 } 863 864 if (config->cache_size < 865 (2 * MAXIMUM_VDO_USER_VIOS * config->thread_counts.logical_zones)) { 866 handle_parse_error(config, error_ptr, 867 "Insufficient block map cache for logical zones"); 868 return VDO_BAD_CONFIGURATION; 869 } 870 871 result = dm_get_device(ti, config->parent_device_name, 872 dm_table_get_mode(ti->table), &config->owned_device); 873 if (result != 0) { 874 vdo_log_error("couldn't open device \"%s\": error %d", 875 config->parent_device_name, result); 876 handle_parse_error(config, error_ptr, "Unable to open storage device"); 877 return VDO_BAD_CONFIGURATION; 878 } 879 880 if (config->version == 0) { 881 u64 device_size = bdev_nr_bytes(config->owned_device->bdev); 882 883 config->physical_blocks = device_size / VDO_BLOCK_SIZE; 884 } 885 886 *config_ptr = config; 887 return result; 888 } 889 890 static struct vdo *get_vdo_for_target(struct dm_target *ti) 891 { 892 return ((struct device_config *) ti->private)->vdo; 893 } 894 895 896 static int vdo_map_bio(struct dm_target *ti, struct bio *bio) 897 { 898 struct vdo *vdo = get_vdo_for_target(ti); 899 struct vdo_work_queue *current_work_queue; 900 const struct admin_state_code *code = vdo_get_admin_state_code(&vdo->admin.state); 901 902 VDO_ASSERT_LOG_ONLY(code->normal, "vdo should not receive bios while in state %s", 903 code->name); 904 905 /* Count all incoming bios. */ 906 vdo_count_bios(&vdo->stats.bios_in, bio); 907 908 909 /* Handle empty bios. Empty flush bios are not associated with a vio. */ 910 if ((bio_op(bio) == REQ_OP_FLUSH) || ((bio->bi_opf & REQ_PREFLUSH) != 0)) { 911 vdo_launch_flush(vdo, bio); 912 return DM_MAPIO_SUBMITTED; 913 } 914 915 /* This could deadlock, */ 916 current_work_queue = vdo_get_current_work_queue(); 917 BUG_ON((current_work_queue != NULL) && 918 (vdo == vdo_get_work_queue_owner(current_work_queue)->vdo)); 919 vdo_launch_bio(vdo->data_vio_pool, bio); 920 return DM_MAPIO_SUBMITTED; 921 } 922 923 static void vdo_io_hints(struct dm_target *ti, struct queue_limits *limits) 924 { 925 struct vdo *vdo = get_vdo_for_target(ti); 926 927 limits->logical_block_size = vdo->device_config->logical_block_size; 928 limits->physical_block_size = VDO_BLOCK_SIZE; 929 930 /* The minimum io size for random io */ 931 limits->io_min = VDO_BLOCK_SIZE; 932 /* The optimal io size for streamed/sequential io */ 933 limits->io_opt = VDO_BLOCK_SIZE; 934 935 /* 936 * Sets the maximum discard size that will be passed into VDO. This value comes from a 937 * table line value passed in during dmsetup create. 938 * 939 * The value 1024 is the largest usable value on HD systems. A 2048 sector discard on a 940 * busy HD system takes 31 seconds. We should use a value no higher than 1024, which takes 941 * 15 to 16 seconds on a busy HD system. However, using large values results in 120 second 942 * blocked task warnings in kernel logs. In order to avoid these warnings, we choose to 943 * use the smallest reasonable value. 944 * 945 * The value is used by dm-thin to determine whether to pass down discards. The block layer 946 * splits large discards on this boundary when this is set. 947 */ 948 limits->max_hw_discard_sectors = 949 (vdo->device_config->max_discard_blocks * VDO_SECTORS_PER_BLOCK); 950 951 /* 952 * Force discards to not begin or end with a partial block by stating the granularity is 953 * 4k. 954 */ 955 limits->discard_granularity = VDO_BLOCK_SIZE; 956 } 957 958 static int vdo_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, 959 void *data) 960 { 961 struct device_config *config = get_vdo_for_target(ti)->device_config; 962 963 return fn(ti, config->owned_device, 0, 964 config->physical_blocks * VDO_SECTORS_PER_BLOCK, data); 965 } 966 967 /* 968 * Status line is: 969 * <device> <operating mode> <in recovery> <index state> <compression state> 970 * <used physical blocks> <total physical blocks> 971 */ 972 973 static void vdo_status(struct dm_target *ti, status_type_t status_type, 974 unsigned int status_flags, char *result, unsigned int maxlen) 975 { 976 struct vdo *vdo = get_vdo_for_target(ti); 977 struct vdo_statistics *stats; 978 struct device_config *device_config; 979 /* N.B.: The DMEMIT macro uses the variables named "sz", "result", "maxlen". */ 980 int sz = 0; 981 982 switch (status_type) { 983 case STATUSTYPE_INFO: 984 /* Report info for dmsetup status */ 985 mutex_lock(&vdo->stats_mutex); 986 vdo_fetch_statistics(vdo, &vdo->stats_buffer); 987 stats = &vdo->stats_buffer; 988 989 DMEMIT("/dev/%pg %s %s %s %s %llu %llu", 990 vdo_get_backing_device(vdo), stats->mode, 991 stats->in_recovery_mode ? "recovering" : "-", 992 vdo_get_dedupe_index_state_name(vdo->hash_zones), 993 vdo_get_compressing(vdo) ? "online" : "offline", 994 stats->data_blocks_used + stats->overhead_blocks_used, 995 stats->physical_blocks); 996 mutex_unlock(&vdo->stats_mutex); 997 break; 998 999 case STATUSTYPE_TABLE: 1000 /* Report the string actually specified in the beginning. */ 1001 device_config = (struct device_config *) ti->private; 1002 DMEMIT("%s", device_config->original_string); 1003 break; 1004 1005 case STATUSTYPE_IMA: 1006 /* FIXME: We ought to be more detailed here, but this is what thin does. */ 1007 *result = '\0'; 1008 break; 1009 } 1010 } 1011 1012 static block_count_t __must_check get_underlying_device_block_count(const struct vdo *vdo) 1013 { 1014 return bdev_nr_bytes(vdo_get_backing_device(vdo)) / VDO_BLOCK_SIZE; 1015 } 1016 1017 static int __must_check process_vdo_message_locked(struct vdo *vdo, unsigned int argc, 1018 char **argv) 1019 { 1020 if ((argc == 2) && (strcasecmp(argv[0], "compression") == 0)) { 1021 if (strcasecmp(argv[1], "on") == 0) { 1022 vdo_set_compressing(vdo, true); 1023 return 0; 1024 } 1025 1026 if (strcasecmp(argv[1], "off") == 0) { 1027 vdo_set_compressing(vdo, false); 1028 return 0; 1029 } 1030 1031 vdo_log_warning("invalid argument '%s' to dmsetup compression message", 1032 argv[1]); 1033 return -EINVAL; 1034 } 1035 1036 vdo_log_warning("unrecognized dmsetup message '%s' received", argv[0]); 1037 return -EINVAL; 1038 } 1039 1040 /* 1041 * If the message is a dump, just do it. Otherwise, check that no other message is being processed, 1042 * and only proceed if so. 1043 * Returns -EBUSY if another message is being processed 1044 */ 1045 static int __must_check process_vdo_message(struct vdo *vdo, unsigned int argc, 1046 char **argv) 1047 { 1048 int result; 1049 1050 /* 1051 * All messages which may be processed in parallel with other messages should be handled 1052 * here before the atomic check below. Messages which should be exclusive should be 1053 * processed in process_vdo_message_locked(). 1054 */ 1055 1056 /* Dump messages should always be processed */ 1057 if (strcasecmp(argv[0], "dump") == 0) 1058 return vdo_dump(vdo, argc, argv, "dmsetup message"); 1059 1060 if (argc == 1) { 1061 if (strcasecmp(argv[0], "dump-on-shutdown") == 0) { 1062 vdo->dump_on_shutdown = true; 1063 return 0; 1064 } 1065 1066 /* Index messages should always be processed */ 1067 if ((strcasecmp(argv[0], "index-close") == 0) || 1068 (strcasecmp(argv[0], "index-create") == 0) || 1069 (strcasecmp(argv[0], "index-disable") == 0) || 1070 (strcasecmp(argv[0], "index-enable") == 0)) 1071 return vdo_message_dedupe_index(vdo->hash_zones, argv[0]); 1072 } 1073 1074 if (atomic_cmpxchg(&vdo->processing_message, 0, 1) != 0) 1075 return -EBUSY; 1076 1077 result = process_vdo_message_locked(vdo, argc, argv); 1078 1079 /* Pairs with the implicit barrier in cmpxchg just above */ 1080 smp_wmb(); 1081 atomic_set(&vdo->processing_message, 0); 1082 return result; 1083 } 1084 1085 static int vdo_message(struct dm_target *ti, unsigned int argc, char **argv, 1086 char *result_buffer, unsigned int maxlen) 1087 { 1088 struct registered_thread allocating_thread, instance_thread; 1089 struct vdo *vdo; 1090 int result; 1091 1092 if (argc == 0) { 1093 vdo_log_warning("unspecified dmsetup message"); 1094 return -EINVAL; 1095 } 1096 1097 vdo = get_vdo_for_target(ti); 1098 vdo_register_allocating_thread(&allocating_thread, NULL); 1099 vdo_register_thread_device_id(&instance_thread, &vdo->instance); 1100 1101 /* 1102 * Must be done here so we don't map return codes. The code in dm-ioctl expects a 1 for a 1103 * return code to look at the buffer and see if it is full or not. 1104 */ 1105 if ((argc == 1) && (strcasecmp(argv[0], "stats") == 0)) { 1106 vdo_write_stats(vdo, result_buffer, maxlen); 1107 result = 1; 1108 } else if ((argc == 1) && (strcasecmp(argv[0], "config") == 0)) { 1109 vdo_write_config(vdo, &result_buffer, &maxlen); 1110 result = 1; 1111 } else { 1112 result = vdo_status_to_errno(process_vdo_message(vdo, argc, argv)); 1113 } 1114 1115 vdo_unregister_thread_device_id(); 1116 vdo_unregister_allocating_thread(); 1117 return result; 1118 } 1119 1120 static void configure_target_capabilities(struct dm_target *ti) 1121 { 1122 ti->discards_supported = 1; 1123 ti->flush_supported = true; 1124 ti->num_discard_bios = 1; 1125 ti->num_flush_bios = 1; 1126 1127 /* 1128 * If this value changes, please make sure to update the value for max_discard_sectors 1129 * accordingly. 1130 */ 1131 BUG_ON(dm_set_target_max_io_len(ti, VDO_SECTORS_PER_BLOCK) != 0); 1132 } 1133 1134 /* 1135 * Implements vdo_filter_fn. 1136 */ 1137 static bool vdo_uses_device(struct vdo *vdo, const void *context) 1138 { 1139 const struct device_config *config = context; 1140 1141 return vdo_get_backing_device(vdo)->bd_dev == config->owned_device->bdev->bd_dev; 1142 } 1143 1144 /** 1145 * get_thread_id_for_phase() - Get the thread id for the current phase of the admin operation in 1146 * progress. 1147 */ 1148 static thread_id_t __must_check get_thread_id_for_phase(struct vdo *vdo) 1149 { 1150 switch (vdo->admin.phase) { 1151 case RESUME_PHASE_PACKER: 1152 case RESUME_PHASE_FLUSHER: 1153 case SUSPEND_PHASE_PACKER: 1154 case SUSPEND_PHASE_FLUSHES: 1155 return vdo->thread_config.packer_thread; 1156 1157 case RESUME_PHASE_DATA_VIOS: 1158 case SUSPEND_PHASE_DATA_VIOS: 1159 return vdo->thread_config.cpu_thread; 1160 1161 case LOAD_PHASE_DRAIN_JOURNAL: 1162 case RESUME_PHASE_JOURNAL: 1163 case SUSPEND_PHASE_JOURNAL: 1164 return vdo->thread_config.journal_thread; 1165 1166 default: 1167 return vdo->thread_config.admin_thread; 1168 } 1169 } 1170 1171 static struct vdo_completion *prepare_admin_completion(struct vdo *vdo, 1172 vdo_action_fn callback, 1173 vdo_action_fn error_handler) 1174 { 1175 struct vdo_completion *completion = &vdo->admin.completion; 1176 1177 /* 1178 * We can't use vdo_prepare_completion_for_requeue() here because we don't want to reset 1179 * any error in the completion. 1180 */ 1181 completion->callback = callback; 1182 completion->error_handler = error_handler; 1183 completion->callback_thread_id = get_thread_id_for_phase(vdo); 1184 completion->requeue = true; 1185 return completion; 1186 } 1187 1188 /** 1189 * advance_phase() - Increment the phase of the current admin operation and prepare the admin 1190 * completion to run on the thread for the next phase. 1191 * @vdo: The on which an admin operation is being performed 1192 * 1193 * Return: The current phase 1194 */ 1195 static u32 advance_phase(struct vdo *vdo) 1196 { 1197 u32 phase = vdo->admin.phase++; 1198 1199 vdo->admin.completion.callback_thread_id = get_thread_id_for_phase(vdo); 1200 vdo->admin.completion.requeue = true; 1201 return phase; 1202 } 1203 1204 /* 1205 * Perform an administrative operation (load, suspend, grow logical, or grow physical). This method 1206 * should not be called from vdo threads. 1207 */ 1208 static int perform_admin_operation(struct vdo *vdo, u32 starting_phase, 1209 vdo_action_fn callback, vdo_action_fn error_handler, 1210 const char *type) 1211 { 1212 int result; 1213 struct vdo_administrator *admin = &vdo->admin; 1214 1215 if (atomic_cmpxchg(&admin->busy, 0, 1) != 0) { 1216 return vdo_log_error_strerror(VDO_COMPONENT_BUSY, 1217 "Can't start %s operation, another operation is already in progress", 1218 type); 1219 } 1220 1221 admin->phase = starting_phase; 1222 reinit_completion(&admin->callback_sync); 1223 vdo_reset_completion(&admin->completion); 1224 vdo_launch_completion(prepare_admin_completion(vdo, callback, error_handler)); 1225 1226 /* 1227 * Using the "interruptible" interface means that Linux will not log a message when we wait 1228 * for more than 120 seconds. 1229 */ 1230 while (wait_for_completion_interruptible(&admin->callback_sync)) { 1231 /* However, if we get a signal in a user-mode process, we could spin... */ 1232 fsleep(1000); 1233 } 1234 1235 result = admin->completion.result; 1236 /* pairs with implicit barrier in cmpxchg above */ 1237 smp_wmb(); 1238 atomic_set(&admin->busy, 0); 1239 return result; 1240 } 1241 1242 /* Assert that we are operating on the correct thread for the current phase. */ 1243 static void assert_admin_phase_thread(struct vdo *vdo, const char *what) 1244 { 1245 VDO_ASSERT_LOG_ONLY(vdo_get_callback_thread_id() == get_thread_id_for_phase(vdo), 1246 "%s on correct thread for %s", what, 1247 ADMIN_PHASE_NAMES[vdo->admin.phase]); 1248 } 1249 1250 /** 1251 * finish_operation_callback() - Callback to finish an admin operation. 1252 * @completion: The admin_completion. 1253 */ 1254 static void finish_operation_callback(struct vdo_completion *completion) 1255 { 1256 struct vdo_administrator *admin = &completion->vdo->admin; 1257 1258 vdo_finish_operation(&admin->state, completion->result); 1259 complete(&admin->callback_sync); 1260 } 1261 1262 /** 1263 * decode_from_super_block() - Decode the VDO state from the super block and validate that it is 1264 * correct. 1265 * @vdo: The vdo being loaded. 1266 * 1267 * On error from this method, the component states must be destroyed explicitly. If this method 1268 * returns successfully, the component states must not be destroyed. 1269 * 1270 * Return: VDO_SUCCESS or an error. 1271 */ 1272 static int __must_check decode_from_super_block(struct vdo *vdo) 1273 { 1274 const struct device_config *config = vdo->device_config; 1275 int result; 1276 1277 result = vdo_decode_component_states(vdo->super_block.buffer, &vdo->geometry, 1278 &vdo->states); 1279 if (result != VDO_SUCCESS) 1280 return result; 1281 1282 vdo_set_state(vdo, vdo->states.vdo.state); 1283 vdo->load_state = vdo->states.vdo.state; 1284 1285 /* 1286 * If the device config specifies a larger logical size than was recorded in the super 1287 * block, just accept it. 1288 */ 1289 if (vdo->states.vdo.config.logical_blocks < config->logical_blocks) { 1290 vdo_log_warning("Growing logical size: a logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block", 1291 (unsigned long long) config->logical_blocks, 1292 (unsigned long long) vdo->states.vdo.config.logical_blocks); 1293 vdo->states.vdo.config.logical_blocks = config->logical_blocks; 1294 } 1295 1296 result = vdo_validate_component_states(&vdo->states, vdo->geometry.nonce, 1297 config->physical_blocks, 1298 config->logical_blocks); 1299 if (result != VDO_SUCCESS) 1300 return result; 1301 1302 vdo->layout = vdo->states.layout; 1303 return VDO_SUCCESS; 1304 } 1305 1306 /** 1307 * decode_vdo() - Decode the component data portion of a super block and fill in the corresponding 1308 * portions of the vdo being loaded. 1309 * @vdo: The vdo being loaded. 1310 * 1311 * This will also allocate the recovery journal and slab depot. If this method is called with an 1312 * asynchronous layer (i.e. a thread config which specifies at least one base thread), the block 1313 * map and packer will be constructed as well. 1314 * 1315 * Return: VDO_SUCCESS or an error. 1316 */ 1317 static int __must_check decode_vdo(struct vdo *vdo) 1318 { 1319 block_count_t maximum_age, journal_length; 1320 struct partition *partition; 1321 int result; 1322 1323 result = decode_from_super_block(vdo); 1324 if (result != VDO_SUCCESS) { 1325 vdo_destroy_component_states(&vdo->states); 1326 return result; 1327 } 1328 1329 maximum_age = vdo_convert_maximum_age(vdo->device_config->block_map_maximum_age); 1330 journal_length = 1331 vdo_get_recovery_journal_length(vdo->states.vdo.config.recovery_journal_size); 1332 if (maximum_age > (journal_length / 2)) { 1333 return vdo_log_error_strerror(VDO_BAD_CONFIGURATION, 1334 "maximum age: %llu exceeds limit %llu", 1335 (unsigned long long) maximum_age, 1336 (unsigned long long) (journal_length / 2)); 1337 } 1338 1339 if (maximum_age == 0) { 1340 return vdo_log_error_strerror(VDO_BAD_CONFIGURATION, 1341 "maximum age must be greater than 0"); 1342 } 1343 1344 result = vdo_enable_read_only_entry(vdo); 1345 if (result != VDO_SUCCESS) 1346 return result; 1347 1348 partition = vdo_get_known_partition(&vdo->layout, 1349 VDO_RECOVERY_JOURNAL_PARTITION); 1350 result = vdo_decode_recovery_journal(vdo->states.recovery_journal, 1351 vdo->states.vdo.nonce, vdo, partition, 1352 vdo->states.vdo.complete_recoveries, 1353 vdo->states.vdo.config.recovery_journal_size, 1354 &vdo->recovery_journal); 1355 if (result != VDO_SUCCESS) 1356 return result; 1357 1358 partition = vdo_get_known_partition(&vdo->layout, VDO_SLAB_SUMMARY_PARTITION); 1359 result = vdo_decode_slab_depot(vdo->states.slab_depot, vdo, partition, 1360 &vdo->depot); 1361 if (result != VDO_SUCCESS) 1362 return result; 1363 1364 result = vdo_decode_block_map(vdo->states.block_map, 1365 vdo->states.vdo.config.logical_blocks, vdo, 1366 vdo->recovery_journal, vdo->states.vdo.nonce, 1367 vdo->device_config->cache_size, maximum_age, 1368 &vdo->block_map); 1369 if (result != VDO_SUCCESS) 1370 return result; 1371 1372 result = vdo_make_physical_zones(vdo, &vdo->physical_zones); 1373 if (result != VDO_SUCCESS) 1374 return result; 1375 1376 /* The logical zones depend on the physical zones already existing. */ 1377 result = vdo_make_logical_zones(vdo, &vdo->logical_zones); 1378 if (result != VDO_SUCCESS) 1379 return result; 1380 1381 return vdo_make_hash_zones(vdo, &vdo->hash_zones); 1382 } 1383 1384 /** 1385 * pre_load_callback() - Callback to initiate a pre-load, registered in vdo_initialize(). 1386 * @completion: The admin completion. 1387 */ 1388 static void pre_load_callback(struct vdo_completion *completion) 1389 { 1390 struct vdo *vdo = completion->vdo; 1391 int result; 1392 1393 assert_admin_phase_thread(vdo, __func__); 1394 1395 switch (advance_phase(vdo)) { 1396 case PRE_LOAD_PHASE_START: 1397 result = vdo_start_operation(&vdo->admin.state, 1398 VDO_ADMIN_STATE_PRE_LOADING); 1399 if (result != VDO_SUCCESS) { 1400 vdo_continue_completion(completion, result); 1401 return; 1402 } 1403 1404 vdo_load_super_block(vdo, completion); 1405 return; 1406 1407 case PRE_LOAD_PHASE_LOAD_COMPONENTS: 1408 vdo_continue_completion(completion, decode_vdo(vdo)); 1409 return; 1410 1411 case PRE_LOAD_PHASE_END: 1412 break; 1413 1414 default: 1415 vdo_set_completion_result(completion, UDS_BAD_STATE); 1416 } 1417 1418 finish_operation_callback(completion); 1419 } 1420 1421 static void release_instance(unsigned int instance) 1422 { 1423 mutex_lock(&instances_lock); 1424 if (instance >= instances.bit_count) { 1425 VDO_ASSERT_LOG_ONLY(false, 1426 "instance number %u must be less than bit count %u", 1427 instance, instances.bit_count); 1428 } else if (test_bit(instance, instances.words) == 0) { 1429 VDO_ASSERT_LOG_ONLY(false, "instance number %u must be allocated", instance); 1430 } else { 1431 __clear_bit(instance, instances.words); 1432 instances.count -= 1; 1433 } 1434 mutex_unlock(&instances_lock); 1435 } 1436 1437 static void set_device_config(struct dm_target *ti, struct vdo *vdo, 1438 struct device_config *config) 1439 { 1440 list_del_init(&config->config_list); 1441 list_add_tail(&config->config_list, &vdo->device_config_list); 1442 config->vdo = vdo; 1443 ti->private = config; 1444 configure_target_capabilities(ti); 1445 } 1446 1447 static int vdo_initialize(struct dm_target *ti, unsigned int instance, 1448 struct device_config *config) 1449 { 1450 struct vdo *vdo; 1451 int result; 1452 u64 block_size = VDO_BLOCK_SIZE; 1453 u64 logical_size = to_bytes(ti->len); 1454 block_count_t logical_blocks = logical_size / block_size; 1455 1456 vdo_log_info("loading device '%s'", vdo_get_device_name(ti)); 1457 vdo_log_debug("Logical block size = %llu", (u64) config->logical_block_size); 1458 vdo_log_debug("Logical blocks = %llu", logical_blocks); 1459 vdo_log_debug("Physical block size = %llu", (u64) block_size); 1460 vdo_log_debug("Physical blocks = %llu", config->physical_blocks); 1461 vdo_log_debug("Block map cache blocks = %u", config->cache_size); 1462 vdo_log_debug("Block map maximum age = %u", config->block_map_maximum_age); 1463 vdo_log_debug("Deduplication = %s", (config->deduplication ? "on" : "off")); 1464 vdo_log_debug("Compression = %s", (config->compression ? "on" : "off")); 1465 1466 vdo = vdo_find_matching(vdo_uses_device, config); 1467 if (vdo != NULL) { 1468 vdo_log_error("Existing vdo already uses device %s", 1469 vdo->device_config->parent_device_name); 1470 ti->error = "Cannot share storage device with already-running VDO"; 1471 return VDO_BAD_CONFIGURATION; 1472 } 1473 1474 result = vdo_make(instance, config, &ti->error, &vdo); 1475 if (result != VDO_SUCCESS) { 1476 vdo_log_error("Could not create VDO device. (VDO error %d, message %s)", 1477 result, ti->error); 1478 vdo_destroy(vdo); 1479 return result; 1480 } 1481 1482 result = perform_admin_operation(vdo, PRE_LOAD_PHASE_START, pre_load_callback, 1483 finish_operation_callback, "pre-load"); 1484 if (result != VDO_SUCCESS) { 1485 ti->error = ((result == VDO_INVALID_ADMIN_STATE) ? 1486 "Pre-load is only valid immediately after initialization" : 1487 "Cannot load metadata from device"); 1488 vdo_log_error("Could not start VDO device. (VDO error %d, message %s)", 1489 result, ti->error); 1490 vdo_destroy(vdo); 1491 return result; 1492 } 1493 1494 set_device_config(ti, vdo, config); 1495 vdo->device_config = config; 1496 return VDO_SUCCESS; 1497 } 1498 1499 /* Implements vdo_filter_fn. */ 1500 static bool __must_check vdo_is_named(struct vdo *vdo, const void *context) 1501 { 1502 struct dm_target *ti = vdo->device_config->owning_target; 1503 const char *device_name = vdo_get_device_name(ti); 1504 1505 return strcmp(device_name, context) == 0; 1506 } 1507 1508 /** 1509 * get_bit_array_size() - Return the number of bytes needed to store a bit array of the specified 1510 * capacity in an array of unsigned longs. 1511 * @bit_count: The number of bits the array must hold. 1512 * 1513 * Return: the number of bytes needed for the array representation. 1514 */ 1515 static size_t get_bit_array_size(unsigned int bit_count) 1516 { 1517 /* Round up to a multiple of the word size and convert to a byte count. */ 1518 return (BITS_TO_LONGS(bit_count) * sizeof(unsigned long)); 1519 } 1520 1521 /** 1522 * grow_bit_array() - Re-allocate the bitmap word array so there will more instance numbers that 1523 * can be allocated. 1524 * 1525 * Since the array is initially NULL, this also initializes the array the first time we allocate an 1526 * instance number. 1527 * 1528 * Return: VDO_SUCCESS or an error code from the allocation 1529 */ 1530 static int grow_bit_array(void) 1531 { 1532 unsigned int new_count = max(instances.bit_count + BIT_COUNT_INCREMENT, 1533 (unsigned int) BIT_COUNT_MINIMUM); 1534 unsigned long *new_words; 1535 int result; 1536 1537 result = vdo_reallocate_memory(instances.words, 1538 get_bit_array_size(instances.bit_count), 1539 get_bit_array_size(new_count), 1540 "instance number bit array", &new_words); 1541 if (result != VDO_SUCCESS) 1542 return result; 1543 1544 instances.bit_count = new_count; 1545 instances.words = new_words; 1546 return VDO_SUCCESS; 1547 } 1548 1549 /** 1550 * allocate_instance() - Allocate an instance number. 1551 * @instance_ptr: A point to hold the instance number 1552 * 1553 * Return: VDO_SUCCESS or an error code 1554 * 1555 * This function must be called while holding the instances lock. 1556 */ 1557 static int allocate_instance(unsigned int *instance_ptr) 1558 { 1559 unsigned int instance; 1560 int result; 1561 1562 /* If there are no unallocated instances, grow the bit array. */ 1563 if (instances.count >= instances.bit_count) { 1564 result = grow_bit_array(); 1565 if (result != VDO_SUCCESS) 1566 return result; 1567 } 1568 1569 /* 1570 * There must be a zero bit somewhere now. Find it, starting just after the last instance 1571 * allocated. 1572 */ 1573 instance = find_next_zero_bit(instances.words, instances.bit_count, 1574 instances.next); 1575 if (instance >= instances.bit_count) { 1576 /* Nothing free after next, so wrap around to instance zero. */ 1577 instance = find_first_zero_bit(instances.words, instances.bit_count); 1578 result = VDO_ASSERT(instance < instances.bit_count, 1579 "impossibly, no zero bit found"); 1580 if (result != VDO_SUCCESS) 1581 return result; 1582 } 1583 1584 __set_bit(instance, instances.words); 1585 instances.count++; 1586 instances.next = instance + 1; 1587 *instance_ptr = instance; 1588 return VDO_SUCCESS; 1589 } 1590 1591 static int construct_new_vdo_registered(struct dm_target *ti, unsigned int argc, 1592 char **argv, unsigned int instance) 1593 { 1594 int result; 1595 struct device_config *config; 1596 1597 result = parse_device_config(argc, argv, ti, &config); 1598 if (result != VDO_SUCCESS) { 1599 vdo_log_error_strerror(result, "parsing failed: %s", ti->error); 1600 release_instance(instance); 1601 return -EINVAL; 1602 } 1603 1604 /* Beyond this point, the instance number will be cleaned up for us if needed */ 1605 result = vdo_initialize(ti, instance, config); 1606 if (result != VDO_SUCCESS) { 1607 release_instance(instance); 1608 free_device_config(config); 1609 return vdo_status_to_errno(result); 1610 } 1611 1612 return VDO_SUCCESS; 1613 } 1614 1615 static int construct_new_vdo(struct dm_target *ti, unsigned int argc, char **argv) 1616 { 1617 int result; 1618 unsigned int instance; 1619 struct registered_thread instance_thread; 1620 1621 mutex_lock(&instances_lock); 1622 result = allocate_instance(&instance); 1623 mutex_unlock(&instances_lock); 1624 if (result != VDO_SUCCESS) 1625 return -ENOMEM; 1626 1627 vdo_register_thread_device_id(&instance_thread, &instance); 1628 result = construct_new_vdo_registered(ti, argc, argv, instance); 1629 vdo_unregister_thread_device_id(); 1630 return result; 1631 } 1632 1633 /** 1634 * check_may_grow_physical() - Callback to check that we're not in recovery mode, used in 1635 * vdo_prepare_to_grow_physical(). 1636 * @completion: The admin completion. 1637 */ 1638 static void check_may_grow_physical(struct vdo_completion *completion) 1639 { 1640 struct vdo *vdo = completion->vdo; 1641 1642 assert_admin_phase_thread(vdo, __func__); 1643 1644 /* These checks can only be done from a vdo thread. */ 1645 if (vdo_is_read_only(vdo)) 1646 vdo_set_completion_result(completion, VDO_READ_ONLY); 1647 1648 if (vdo_in_recovery_mode(vdo)) 1649 vdo_set_completion_result(completion, VDO_RETRY_AFTER_REBUILD); 1650 1651 finish_operation_callback(completion); 1652 } 1653 1654 static block_count_t get_partition_size(struct layout *layout, enum partition_id id) 1655 { 1656 return vdo_get_known_partition(layout, id)->count; 1657 } 1658 1659 /** 1660 * grow_layout() - Make the layout for growing a vdo. 1661 * @vdo: The vdo preparing to grow. 1662 * @old_size: The current size of the vdo. 1663 * @new_size: The size to which the vdo will be grown. 1664 * 1665 * Return: VDO_SUCCESS or an error code. 1666 */ 1667 static int grow_layout(struct vdo *vdo, block_count_t old_size, block_count_t new_size) 1668 { 1669 int result; 1670 block_count_t min_new_size; 1671 1672 if (vdo->next_layout.size == new_size) { 1673 /* We are already prepared to grow to the new size, so we're done. */ 1674 return VDO_SUCCESS; 1675 } 1676 1677 /* Make a copy completion if there isn't one */ 1678 if (vdo->partition_copier == NULL) { 1679 vdo->partition_copier = dm_kcopyd_client_create(NULL); 1680 if (IS_ERR(vdo->partition_copier)) { 1681 result = PTR_ERR(vdo->partition_copier); 1682 vdo->partition_copier = NULL; 1683 return result; 1684 } 1685 } 1686 1687 /* Free any unused preparation. */ 1688 vdo_uninitialize_layout(&vdo->next_layout); 1689 1690 /* 1691 * Make a new layout with the existing partition sizes for everything but the slab depot 1692 * partition. 1693 */ 1694 result = vdo_initialize_layout(new_size, vdo->layout.start, 1695 get_partition_size(&vdo->layout, 1696 VDO_BLOCK_MAP_PARTITION), 1697 get_partition_size(&vdo->layout, 1698 VDO_RECOVERY_JOURNAL_PARTITION), 1699 get_partition_size(&vdo->layout, 1700 VDO_SLAB_SUMMARY_PARTITION), 1701 &vdo->next_layout); 1702 if (result != VDO_SUCCESS) { 1703 dm_kcopyd_client_destroy(vdo_forget(vdo->partition_copier)); 1704 return result; 1705 } 1706 1707 /* Ensure the new journal and summary are entirely within the added blocks. */ 1708 min_new_size = (old_size + 1709 get_partition_size(&vdo->next_layout, 1710 VDO_SLAB_SUMMARY_PARTITION) + 1711 get_partition_size(&vdo->next_layout, 1712 VDO_RECOVERY_JOURNAL_PARTITION)); 1713 if (min_new_size > new_size) { 1714 /* Copying the journal and summary would destroy some old metadata. */ 1715 vdo_uninitialize_layout(&vdo->next_layout); 1716 dm_kcopyd_client_destroy(vdo_forget(vdo->partition_copier)); 1717 return VDO_INCREMENT_TOO_SMALL; 1718 } 1719 1720 return VDO_SUCCESS; 1721 } 1722 1723 static int prepare_to_grow_physical(struct vdo *vdo, block_count_t new_physical_blocks) 1724 { 1725 int result; 1726 block_count_t current_physical_blocks = vdo->states.vdo.config.physical_blocks; 1727 1728 vdo_log_info("Preparing to resize physical to %llu", 1729 (unsigned long long) new_physical_blocks); 1730 VDO_ASSERT_LOG_ONLY((new_physical_blocks > current_physical_blocks), 1731 "New physical size is larger than current physical size"); 1732 result = perform_admin_operation(vdo, PREPARE_GROW_PHYSICAL_PHASE_START, 1733 check_may_grow_physical, 1734 finish_operation_callback, 1735 "prepare grow-physical"); 1736 if (result != VDO_SUCCESS) 1737 return result; 1738 1739 result = grow_layout(vdo, current_physical_blocks, new_physical_blocks); 1740 if (result != VDO_SUCCESS) 1741 return result; 1742 1743 result = vdo_prepare_to_grow_slab_depot(vdo->depot, 1744 vdo_get_known_partition(&vdo->next_layout, 1745 VDO_SLAB_DEPOT_PARTITION)); 1746 if (result != VDO_SUCCESS) { 1747 vdo_uninitialize_layout(&vdo->next_layout); 1748 return result; 1749 } 1750 1751 vdo_log_info("Done preparing to resize physical"); 1752 return VDO_SUCCESS; 1753 } 1754 1755 /** 1756 * validate_new_device_config() - Check whether a new device config represents a valid modification 1757 * to an existing config. 1758 * @to_validate: The new config to validate. 1759 * @config: The existing config. 1760 * @may_grow: Set to true if growing the logical and physical size of the vdo is currently 1761 * permitted. 1762 * @error_ptr: A pointer to hold the reason for any error. 1763 * 1764 * Return: VDO_SUCCESS or an error. 1765 */ 1766 static int validate_new_device_config(struct device_config *to_validate, 1767 struct device_config *config, bool may_grow, 1768 char **error_ptr) 1769 { 1770 if (to_validate->owning_target->begin != config->owning_target->begin) { 1771 *error_ptr = "Starting sector cannot change"; 1772 return VDO_PARAMETER_MISMATCH; 1773 } 1774 1775 if (to_validate->logical_block_size != config->logical_block_size) { 1776 *error_ptr = "Logical block size cannot change"; 1777 return VDO_PARAMETER_MISMATCH; 1778 } 1779 1780 if (to_validate->logical_blocks < config->logical_blocks) { 1781 *error_ptr = "Can't shrink VDO logical size"; 1782 return VDO_PARAMETER_MISMATCH; 1783 } 1784 1785 if (to_validate->cache_size != config->cache_size) { 1786 *error_ptr = "Block map cache size cannot change"; 1787 return VDO_PARAMETER_MISMATCH; 1788 } 1789 1790 if (to_validate->block_map_maximum_age != config->block_map_maximum_age) { 1791 *error_ptr = "Block map maximum age cannot change"; 1792 return VDO_PARAMETER_MISMATCH; 1793 } 1794 1795 if (memcmp(&to_validate->thread_counts, &config->thread_counts, 1796 sizeof(struct thread_count_config)) != 0) { 1797 *error_ptr = "Thread configuration cannot change"; 1798 return VDO_PARAMETER_MISMATCH; 1799 } 1800 1801 if (to_validate->physical_blocks < config->physical_blocks) { 1802 *error_ptr = "Removing physical storage from a VDO is not supported"; 1803 return VDO_NOT_IMPLEMENTED; 1804 } 1805 1806 if (!may_grow && (to_validate->physical_blocks > config->physical_blocks)) { 1807 *error_ptr = "VDO physical size may not grow in current state"; 1808 return VDO_NOT_IMPLEMENTED; 1809 } 1810 1811 return VDO_SUCCESS; 1812 } 1813 1814 static int prepare_to_modify(struct dm_target *ti, struct device_config *config, 1815 struct vdo *vdo) 1816 { 1817 int result; 1818 bool may_grow = (vdo_get_admin_state(vdo) != VDO_ADMIN_STATE_PRE_LOADED); 1819 1820 result = validate_new_device_config(config, vdo->device_config, may_grow, 1821 &ti->error); 1822 if (result != VDO_SUCCESS) 1823 return -EINVAL; 1824 1825 if (config->logical_blocks > vdo->device_config->logical_blocks) { 1826 block_count_t logical_blocks = vdo->states.vdo.config.logical_blocks; 1827 1828 vdo_log_info("Preparing to resize logical to %llu", 1829 (unsigned long long) config->logical_blocks); 1830 VDO_ASSERT_LOG_ONLY((config->logical_blocks > logical_blocks), 1831 "New logical size is larger than current size"); 1832 1833 result = vdo_prepare_to_grow_block_map(vdo->block_map, 1834 config->logical_blocks); 1835 if (result != VDO_SUCCESS) { 1836 ti->error = "Device vdo_prepare_to_grow_logical failed"; 1837 return result; 1838 } 1839 1840 vdo_log_info("Done preparing to resize logical"); 1841 } 1842 1843 if (config->physical_blocks > vdo->device_config->physical_blocks) { 1844 result = prepare_to_grow_physical(vdo, config->physical_blocks); 1845 if (result != VDO_SUCCESS) { 1846 if (result == VDO_PARAMETER_MISMATCH) { 1847 /* 1848 * If we don't trap this case, vdo_status_to_errno() will remap 1849 * it to -EIO, which is misleading and ahistorical. 1850 */ 1851 result = -EINVAL; 1852 } 1853 1854 if (result == VDO_TOO_MANY_SLABS) 1855 ti->error = "Device vdo_prepare_to_grow_physical failed (specified physical size too big based on formatted slab size)"; 1856 else 1857 ti->error = "Device vdo_prepare_to_grow_physical failed"; 1858 1859 return result; 1860 } 1861 } 1862 1863 if (strcmp(config->parent_device_name, vdo->device_config->parent_device_name) != 0) { 1864 const char *device_name = vdo_get_device_name(config->owning_target); 1865 1866 vdo_log_info("Updating backing device of %s from %s to %s", device_name, 1867 vdo->device_config->parent_device_name, 1868 config->parent_device_name); 1869 } 1870 1871 return VDO_SUCCESS; 1872 } 1873 1874 static int update_existing_vdo(const char *device_name, struct dm_target *ti, 1875 unsigned int argc, char **argv, struct vdo *vdo) 1876 { 1877 int result; 1878 struct device_config *config; 1879 1880 result = parse_device_config(argc, argv, ti, &config); 1881 if (result != VDO_SUCCESS) 1882 return -EINVAL; 1883 1884 vdo_log_info("preparing to modify device '%s'", device_name); 1885 result = prepare_to_modify(ti, config, vdo); 1886 if (result != VDO_SUCCESS) { 1887 free_device_config(config); 1888 return vdo_status_to_errno(result); 1889 } 1890 1891 set_device_config(ti, vdo, config); 1892 return VDO_SUCCESS; 1893 } 1894 1895 static int vdo_ctr(struct dm_target *ti, unsigned int argc, char **argv) 1896 { 1897 int result; 1898 struct registered_thread allocating_thread, instance_thread; 1899 const char *device_name; 1900 struct vdo *vdo; 1901 1902 vdo_register_allocating_thread(&allocating_thread, NULL); 1903 device_name = vdo_get_device_name(ti); 1904 vdo = vdo_find_matching(vdo_is_named, device_name); 1905 if (vdo == NULL) { 1906 result = construct_new_vdo(ti, argc, argv); 1907 } else { 1908 vdo_register_thread_device_id(&instance_thread, &vdo->instance); 1909 result = update_existing_vdo(device_name, ti, argc, argv, vdo); 1910 vdo_unregister_thread_device_id(); 1911 } 1912 1913 vdo_unregister_allocating_thread(); 1914 return result; 1915 } 1916 1917 static void vdo_dtr(struct dm_target *ti) 1918 { 1919 struct device_config *config = ti->private; 1920 struct vdo *vdo = vdo_forget(config->vdo); 1921 1922 list_del_init(&config->config_list); 1923 if (list_empty(&vdo->device_config_list)) { 1924 const char *device_name; 1925 1926 /* This was the last config referencing the VDO. Free it. */ 1927 unsigned int instance = vdo->instance; 1928 struct registered_thread allocating_thread, instance_thread; 1929 1930 vdo_register_thread_device_id(&instance_thread, &instance); 1931 vdo_register_allocating_thread(&allocating_thread, NULL); 1932 1933 device_name = vdo_get_device_name(ti); 1934 vdo_log_info("stopping device '%s'", device_name); 1935 if (vdo->dump_on_shutdown) 1936 vdo_dump_all(vdo, "device shutdown"); 1937 1938 vdo_destroy(vdo_forget(vdo)); 1939 vdo_log_info("device '%s' stopped", device_name); 1940 vdo_unregister_thread_device_id(); 1941 vdo_unregister_allocating_thread(); 1942 release_instance(instance); 1943 } else if (config == vdo->device_config) { 1944 /* 1945 * The VDO still references this config. Give it a reference to a config that isn't 1946 * being destroyed. 1947 */ 1948 vdo->device_config = list_first_entry(&vdo->device_config_list, 1949 struct device_config, config_list); 1950 } 1951 1952 free_device_config(config); 1953 ti->private = NULL; 1954 } 1955 1956 static void vdo_presuspend(struct dm_target *ti) 1957 { 1958 get_vdo_for_target(ti)->suspend_type = 1959 (dm_noflush_suspending(ti) ? VDO_ADMIN_STATE_SUSPENDING : VDO_ADMIN_STATE_SAVING); 1960 } 1961 1962 /** 1963 * write_super_block_for_suspend() - Update the VDO state and save the super block. 1964 * @completion: The admin completion 1965 */ 1966 static void write_super_block_for_suspend(struct vdo_completion *completion) 1967 { 1968 struct vdo *vdo = completion->vdo; 1969 1970 switch (vdo_get_state(vdo)) { 1971 case VDO_DIRTY: 1972 case VDO_NEW: 1973 vdo_set_state(vdo, VDO_CLEAN); 1974 break; 1975 1976 case VDO_CLEAN: 1977 case VDO_READ_ONLY_MODE: 1978 case VDO_FORCE_REBUILD: 1979 case VDO_RECOVERING: 1980 case VDO_REBUILD_FOR_UPGRADE: 1981 break; 1982 1983 case VDO_REPLAYING: 1984 default: 1985 vdo_continue_completion(completion, UDS_BAD_STATE); 1986 return; 1987 } 1988 1989 vdo_save_components(vdo, completion); 1990 } 1991 1992 /** 1993 * suspend_callback() - Callback to initiate a suspend, registered in vdo_postsuspend(). 1994 * @completion: The sub-task completion. 1995 */ 1996 static void suspend_callback(struct vdo_completion *completion) 1997 { 1998 struct vdo *vdo = completion->vdo; 1999 struct admin_state *state = &vdo->admin.state; 2000 int result; 2001 2002 assert_admin_phase_thread(vdo, __func__); 2003 2004 switch (advance_phase(vdo)) { 2005 case SUSPEND_PHASE_START: 2006 if (vdo_get_admin_state_code(state)->quiescent) { 2007 /* Already suspended */ 2008 break; 2009 } 2010 2011 vdo_continue_completion(completion, 2012 vdo_start_operation(state, vdo->suspend_type)); 2013 return; 2014 2015 case SUSPEND_PHASE_PACKER: 2016 /* 2017 * If the VDO was already resumed from a prior suspend while read-only, some of the 2018 * components may not have been resumed. By setting a read-only error here, we 2019 * guarantee that the result of this suspend will be VDO_READ_ONLY and not 2020 * VDO_INVALID_ADMIN_STATE in that case. 2021 */ 2022 if (vdo_in_read_only_mode(vdo)) 2023 vdo_set_completion_result(completion, VDO_READ_ONLY); 2024 2025 vdo_drain_packer(vdo->packer, completion); 2026 return; 2027 2028 case SUSPEND_PHASE_DATA_VIOS: 2029 drain_data_vio_pool(vdo->data_vio_pool, completion); 2030 return; 2031 2032 case SUSPEND_PHASE_DEDUPE: 2033 vdo_drain_hash_zones(vdo->hash_zones, completion); 2034 return; 2035 2036 case SUSPEND_PHASE_FLUSHES: 2037 vdo_drain_flusher(vdo->flusher, completion); 2038 return; 2039 2040 case SUSPEND_PHASE_LOGICAL_ZONES: 2041 /* 2042 * Attempt to flush all I/O before completing post suspend work. We believe a 2043 * suspended device is expected to have persisted all data written before the 2044 * suspend, even if it hasn't been flushed yet. 2045 */ 2046 result = vdo_synchronous_flush(vdo); 2047 if (result != VDO_SUCCESS) 2048 vdo_enter_read_only_mode(vdo, result); 2049 2050 vdo_drain_logical_zones(vdo->logical_zones, 2051 vdo_get_admin_state_code(state), completion); 2052 return; 2053 2054 case SUSPEND_PHASE_BLOCK_MAP: 2055 vdo_drain_block_map(vdo->block_map, vdo_get_admin_state_code(state), 2056 completion); 2057 return; 2058 2059 case SUSPEND_PHASE_JOURNAL: 2060 vdo_drain_recovery_journal(vdo->recovery_journal, 2061 vdo_get_admin_state_code(state), completion); 2062 return; 2063 2064 case SUSPEND_PHASE_DEPOT: 2065 vdo_drain_slab_depot(vdo->depot, vdo_get_admin_state_code(state), 2066 completion); 2067 return; 2068 2069 case SUSPEND_PHASE_READ_ONLY_WAIT: 2070 vdo_wait_until_not_entering_read_only_mode(completion); 2071 return; 2072 2073 case SUSPEND_PHASE_WRITE_SUPER_BLOCK: 2074 if (vdo_is_state_suspending(state) || (completion->result != VDO_SUCCESS)) { 2075 /* If we didn't save the VDO or there was an error, we're done. */ 2076 break; 2077 } 2078 2079 write_super_block_for_suspend(completion); 2080 return; 2081 2082 case SUSPEND_PHASE_END: 2083 break; 2084 2085 default: 2086 vdo_set_completion_result(completion, UDS_BAD_STATE); 2087 } 2088 2089 finish_operation_callback(completion); 2090 } 2091 2092 static void vdo_postsuspend(struct dm_target *ti) 2093 { 2094 struct vdo *vdo = get_vdo_for_target(ti); 2095 struct registered_thread instance_thread; 2096 const char *device_name; 2097 int result; 2098 2099 vdo_register_thread_device_id(&instance_thread, &vdo->instance); 2100 device_name = vdo_get_device_name(vdo->device_config->owning_target); 2101 vdo_log_info("suspending device '%s'", device_name); 2102 2103 /* 2104 * It's important to note any error here does not actually stop device-mapper from 2105 * suspending the device. All this work is done post suspend. 2106 */ 2107 result = perform_admin_operation(vdo, SUSPEND_PHASE_START, suspend_callback, 2108 suspend_callback, "suspend"); 2109 2110 if ((result == VDO_SUCCESS) || (result == VDO_READ_ONLY)) { 2111 /* 2112 * Treat VDO_READ_ONLY as a success since a read-only suspension still leaves the 2113 * VDO suspended. 2114 */ 2115 vdo_log_info("device '%s' suspended", device_name); 2116 } else if (result == VDO_INVALID_ADMIN_STATE) { 2117 vdo_log_error("Suspend invoked while in unexpected state: %s", 2118 vdo_get_admin_state(vdo)->name); 2119 } else { 2120 vdo_log_error_strerror(result, "Suspend of device '%s' failed", 2121 device_name); 2122 } 2123 2124 vdo_unregister_thread_device_id(); 2125 } 2126 2127 /** 2128 * was_new() - Check whether the vdo was new when it was loaded. 2129 * @vdo: The vdo to query. 2130 * 2131 * Return: true if the vdo was new. 2132 */ 2133 static bool was_new(const struct vdo *vdo) 2134 { 2135 return (vdo->load_state == VDO_NEW); 2136 } 2137 2138 /** 2139 * requires_repair() - Check whether a vdo requires recovery or rebuild. 2140 * @vdo: The vdo to query. 2141 * 2142 * Return: true if the vdo must be repaired. 2143 */ 2144 static bool __must_check requires_repair(const struct vdo *vdo) 2145 { 2146 switch (vdo_get_state(vdo)) { 2147 case VDO_DIRTY: 2148 case VDO_FORCE_REBUILD: 2149 case VDO_REPLAYING: 2150 case VDO_REBUILD_FOR_UPGRADE: 2151 return true; 2152 2153 default: 2154 return false; 2155 } 2156 } 2157 2158 /** 2159 * get_load_type() - Determine how the slab depot was loaded. 2160 * @vdo: The vdo. 2161 * 2162 * Return: How the depot was loaded. 2163 */ 2164 static enum slab_depot_load_type get_load_type(struct vdo *vdo) 2165 { 2166 if (vdo_state_requires_read_only_rebuild(vdo->load_state)) 2167 return VDO_SLAB_DEPOT_REBUILD_LOAD; 2168 2169 if (vdo_state_requires_recovery(vdo->load_state)) 2170 return VDO_SLAB_DEPOT_RECOVERY_LOAD; 2171 2172 return VDO_SLAB_DEPOT_NORMAL_LOAD; 2173 } 2174 2175 /** 2176 * load_callback() - Callback to do the destructive parts of loading a VDO. 2177 * @completion: The sub-task completion. 2178 */ 2179 static void load_callback(struct vdo_completion *completion) 2180 { 2181 struct vdo *vdo = completion->vdo; 2182 int result; 2183 2184 assert_admin_phase_thread(vdo, __func__); 2185 2186 switch (advance_phase(vdo)) { 2187 case LOAD_PHASE_START: 2188 result = vdo_start_operation(&vdo->admin.state, VDO_ADMIN_STATE_LOADING); 2189 if (result != VDO_SUCCESS) { 2190 vdo_continue_completion(completion, result); 2191 return; 2192 } 2193 2194 /* Prepare the recovery journal for new entries. */ 2195 vdo_open_recovery_journal(vdo->recovery_journal, vdo->depot, 2196 vdo->block_map); 2197 vdo_allow_read_only_mode_entry(completion); 2198 return; 2199 2200 case LOAD_PHASE_LOAD_DEPOT: 2201 vdo_set_dedupe_state_normal(vdo->hash_zones); 2202 if (vdo_is_read_only(vdo)) { 2203 /* 2204 * In read-only mode we don't use the allocator and it may not even be 2205 * readable, so don't bother trying to load it. 2206 */ 2207 vdo_set_completion_result(completion, VDO_READ_ONLY); 2208 break; 2209 } 2210 2211 if (requires_repair(vdo)) { 2212 vdo_repair(completion); 2213 return; 2214 } 2215 2216 vdo_load_slab_depot(vdo->depot, 2217 (was_new(vdo) ? VDO_ADMIN_STATE_FORMATTING : 2218 VDO_ADMIN_STATE_LOADING), 2219 completion, NULL); 2220 return; 2221 2222 case LOAD_PHASE_MAKE_DIRTY: 2223 vdo_set_state(vdo, VDO_DIRTY); 2224 vdo_save_components(vdo, completion); 2225 return; 2226 2227 case LOAD_PHASE_PREPARE_TO_ALLOCATE: 2228 vdo_initialize_block_map_from_journal(vdo->block_map, 2229 vdo->recovery_journal); 2230 vdo_prepare_slab_depot_to_allocate(vdo->depot, get_load_type(vdo), 2231 completion); 2232 return; 2233 2234 case LOAD_PHASE_SCRUB_SLABS: 2235 if (vdo_state_requires_recovery(vdo->load_state)) 2236 vdo_enter_recovery_mode(vdo); 2237 2238 vdo_scrub_all_unrecovered_slabs(vdo->depot, completion); 2239 return; 2240 2241 case LOAD_PHASE_DATA_REDUCTION: 2242 WRITE_ONCE(vdo->compressing, vdo->device_config->compression); 2243 if (vdo->device_config->deduplication) { 2244 /* 2245 * Don't try to load or rebuild the index first (and log scary error 2246 * messages) if this is known to be a newly-formatted volume. 2247 */ 2248 vdo_start_dedupe_index(vdo->hash_zones, was_new(vdo)); 2249 } 2250 2251 vdo->allocations_allowed = false; 2252 fallthrough; 2253 2254 case LOAD_PHASE_FINISHED: 2255 break; 2256 2257 case LOAD_PHASE_DRAIN_JOURNAL: 2258 vdo_drain_recovery_journal(vdo->recovery_journal, VDO_ADMIN_STATE_SAVING, 2259 completion); 2260 return; 2261 2262 case LOAD_PHASE_WAIT_FOR_READ_ONLY: 2263 /* Avoid an infinite loop */ 2264 completion->error_handler = NULL; 2265 vdo->admin.phase = LOAD_PHASE_FINISHED; 2266 vdo_wait_until_not_entering_read_only_mode(completion); 2267 return; 2268 2269 default: 2270 vdo_set_completion_result(completion, UDS_BAD_STATE); 2271 } 2272 2273 finish_operation_callback(completion); 2274 } 2275 2276 /** 2277 * handle_load_error() - Handle an error during the load operation. 2278 * @completion: The admin completion. 2279 * 2280 * If at all possible, brings the vdo online in read-only mode. This handler is registered in 2281 * vdo_preresume_registered(). 2282 */ 2283 static void handle_load_error(struct vdo_completion *completion) 2284 { 2285 struct vdo *vdo = completion->vdo; 2286 2287 if (vdo_requeue_completion_if_needed(completion, 2288 vdo->thread_config.admin_thread)) 2289 return; 2290 2291 if (vdo_state_requires_read_only_rebuild(vdo->load_state) && 2292 (vdo->admin.phase == LOAD_PHASE_MAKE_DIRTY)) { 2293 vdo_log_error_strerror(completion->result, "aborting load"); 2294 vdo->admin.phase = LOAD_PHASE_DRAIN_JOURNAL; 2295 load_callback(vdo_forget(completion)); 2296 return; 2297 } 2298 2299 if ((completion->result == VDO_UNSUPPORTED_VERSION) && 2300 (vdo->admin.phase == LOAD_PHASE_MAKE_DIRTY)) { 2301 vdo_log_error("Aborting load due to unsupported version"); 2302 vdo->admin.phase = LOAD_PHASE_FINISHED; 2303 load_callback(completion); 2304 return; 2305 } 2306 2307 vdo_log_error_strerror(completion->result, 2308 "Entering read-only mode due to load error"); 2309 vdo->admin.phase = LOAD_PHASE_WAIT_FOR_READ_ONLY; 2310 vdo_enter_read_only_mode(vdo, completion->result); 2311 completion->result = VDO_READ_ONLY; 2312 load_callback(completion); 2313 } 2314 2315 /** 2316 * write_super_block_for_resume() - Update the VDO state and save the super block. 2317 * @completion: The admin completion 2318 */ 2319 static void write_super_block_for_resume(struct vdo_completion *completion) 2320 { 2321 struct vdo *vdo = completion->vdo; 2322 2323 switch (vdo_get_state(vdo)) { 2324 case VDO_CLEAN: 2325 case VDO_NEW: 2326 vdo_set_state(vdo, VDO_DIRTY); 2327 vdo_save_components(vdo, completion); 2328 return; 2329 2330 case VDO_DIRTY: 2331 case VDO_READ_ONLY_MODE: 2332 case VDO_FORCE_REBUILD: 2333 case VDO_RECOVERING: 2334 case VDO_REBUILD_FOR_UPGRADE: 2335 /* No need to write the super block in these cases */ 2336 vdo_launch_completion(completion); 2337 return; 2338 2339 case VDO_REPLAYING: 2340 default: 2341 vdo_continue_completion(completion, UDS_BAD_STATE); 2342 } 2343 } 2344 2345 /** 2346 * resume_callback() - Callback to resume a VDO. 2347 * @completion: The admin completion. 2348 */ 2349 static void resume_callback(struct vdo_completion *completion) 2350 { 2351 struct vdo *vdo = completion->vdo; 2352 int result; 2353 2354 assert_admin_phase_thread(vdo, __func__); 2355 2356 switch (advance_phase(vdo)) { 2357 case RESUME_PHASE_START: 2358 result = vdo_start_operation(&vdo->admin.state, 2359 VDO_ADMIN_STATE_RESUMING); 2360 if (result != VDO_SUCCESS) { 2361 vdo_continue_completion(completion, result); 2362 return; 2363 } 2364 2365 write_super_block_for_resume(completion); 2366 return; 2367 2368 case RESUME_PHASE_ALLOW_READ_ONLY_MODE: 2369 vdo_allow_read_only_mode_entry(completion); 2370 return; 2371 2372 case RESUME_PHASE_DEDUPE: 2373 vdo_resume_hash_zones(vdo->hash_zones, completion); 2374 return; 2375 2376 case RESUME_PHASE_DEPOT: 2377 vdo_resume_slab_depot(vdo->depot, completion); 2378 return; 2379 2380 case RESUME_PHASE_JOURNAL: 2381 vdo_resume_recovery_journal(vdo->recovery_journal, completion); 2382 return; 2383 2384 case RESUME_PHASE_BLOCK_MAP: 2385 vdo_resume_block_map(vdo->block_map, completion); 2386 return; 2387 2388 case RESUME_PHASE_LOGICAL_ZONES: 2389 vdo_resume_logical_zones(vdo->logical_zones, completion); 2390 return; 2391 2392 case RESUME_PHASE_PACKER: 2393 { 2394 bool was_enabled = vdo_get_compressing(vdo); 2395 bool enable = vdo->device_config->compression; 2396 2397 if (enable != was_enabled) 2398 WRITE_ONCE(vdo->compressing, enable); 2399 vdo_log_info("compression is %s", (enable ? "enabled" : "disabled")); 2400 2401 vdo_resume_packer(vdo->packer, completion); 2402 return; 2403 } 2404 2405 case RESUME_PHASE_FLUSHER: 2406 vdo_resume_flusher(vdo->flusher, completion); 2407 return; 2408 2409 case RESUME_PHASE_DATA_VIOS: 2410 resume_data_vio_pool(vdo->data_vio_pool, completion); 2411 return; 2412 2413 case RESUME_PHASE_END: 2414 break; 2415 2416 default: 2417 vdo_set_completion_result(completion, UDS_BAD_STATE); 2418 } 2419 2420 finish_operation_callback(completion); 2421 } 2422 2423 /** 2424 * grow_logical_callback() - Callback to initiate a grow logical. 2425 * @completion: The admin completion. 2426 * 2427 * Registered in perform_grow_logical(). 2428 */ 2429 static void grow_logical_callback(struct vdo_completion *completion) 2430 { 2431 struct vdo *vdo = completion->vdo; 2432 int result; 2433 2434 assert_admin_phase_thread(vdo, __func__); 2435 2436 switch (advance_phase(vdo)) { 2437 case GROW_LOGICAL_PHASE_START: 2438 if (vdo_is_read_only(vdo)) { 2439 vdo_log_error_strerror(VDO_READ_ONLY, 2440 "Can't grow logical size of a read-only VDO"); 2441 vdo_set_completion_result(completion, VDO_READ_ONLY); 2442 break; 2443 } 2444 2445 result = vdo_start_operation(&vdo->admin.state, 2446 VDO_ADMIN_STATE_SUSPENDED_OPERATION); 2447 if (result != VDO_SUCCESS) { 2448 vdo_continue_completion(completion, result); 2449 return; 2450 } 2451 2452 vdo->states.vdo.config.logical_blocks = vdo->block_map->next_entry_count; 2453 vdo_save_components(vdo, completion); 2454 return; 2455 2456 case GROW_LOGICAL_PHASE_GROW_BLOCK_MAP: 2457 vdo_grow_block_map(vdo->block_map, completion); 2458 return; 2459 2460 case GROW_LOGICAL_PHASE_END: 2461 break; 2462 2463 case GROW_LOGICAL_PHASE_ERROR: 2464 vdo_enter_read_only_mode(vdo, completion->result); 2465 break; 2466 2467 default: 2468 vdo_set_completion_result(completion, UDS_BAD_STATE); 2469 } 2470 2471 finish_operation_callback(completion); 2472 } 2473 2474 /** 2475 * handle_logical_growth_error() - Handle an error during the grow physical process. 2476 * @completion: The admin completion. 2477 */ 2478 static void handle_logical_growth_error(struct vdo_completion *completion) 2479 { 2480 struct vdo *vdo = completion->vdo; 2481 2482 if (vdo->admin.phase == GROW_LOGICAL_PHASE_GROW_BLOCK_MAP) { 2483 /* 2484 * We've failed to write the new size in the super block, so set our in memory 2485 * config back to the old size. 2486 */ 2487 vdo->states.vdo.config.logical_blocks = vdo->block_map->entry_count; 2488 vdo_abandon_block_map_growth(vdo->block_map); 2489 } 2490 2491 vdo->admin.phase = GROW_LOGICAL_PHASE_ERROR; 2492 grow_logical_callback(completion); 2493 } 2494 2495 /** 2496 * perform_grow_logical() - Grow the logical size of the vdo. 2497 * @vdo: The vdo to grow. 2498 * @new_logical_blocks: The size to which the vdo should be grown. 2499 * 2500 * Context: This method may only be called when the vdo has been suspended and must not be called 2501 * from a base thread. 2502 * 2503 * Return: VDO_SUCCESS or an error. 2504 */ 2505 static int perform_grow_logical(struct vdo *vdo, block_count_t new_logical_blocks) 2506 { 2507 int result; 2508 2509 if (vdo->device_config->logical_blocks == new_logical_blocks) { 2510 /* 2511 * A table was loaded for which we prepared to grow, but a table without that 2512 * growth was what we are resuming with. 2513 */ 2514 vdo_abandon_block_map_growth(vdo->block_map); 2515 return VDO_SUCCESS; 2516 } 2517 2518 vdo_log_info("Resizing logical to %llu", 2519 (unsigned long long) new_logical_blocks); 2520 if (vdo->block_map->next_entry_count != new_logical_blocks) 2521 return VDO_PARAMETER_MISMATCH; 2522 2523 result = perform_admin_operation(vdo, GROW_LOGICAL_PHASE_START, 2524 grow_logical_callback, 2525 handle_logical_growth_error, "grow logical"); 2526 if (result != VDO_SUCCESS) 2527 return result; 2528 2529 vdo_log_info("Logical blocks now %llu", (unsigned long long) new_logical_blocks); 2530 return VDO_SUCCESS; 2531 } 2532 2533 static void copy_callback(int read_err, unsigned long write_err, void *context) 2534 { 2535 struct vdo_completion *completion = context; 2536 int result = (((read_err == 0) && (write_err == 0)) ? VDO_SUCCESS : -EIO); 2537 2538 vdo_continue_completion(completion, result); 2539 } 2540 2541 static void partition_to_region(struct partition *partition, struct vdo *vdo, 2542 struct dm_io_region *region) 2543 { 2544 physical_block_number_t pbn = partition->offset - vdo->geometry.bio_offset; 2545 2546 *region = (struct dm_io_region) { 2547 .bdev = vdo_get_backing_device(vdo), 2548 .sector = pbn * VDO_SECTORS_PER_BLOCK, 2549 .count = partition->count * VDO_SECTORS_PER_BLOCK, 2550 }; 2551 } 2552 2553 /** 2554 * copy_partition() - Copy a partition from the location specified in the current layout to that in 2555 * the next layout. 2556 * @vdo: The vdo preparing to grow. 2557 * @id: The ID of the partition to copy. 2558 * @parent: The completion to notify when the copy is complete. 2559 */ 2560 static void copy_partition(struct vdo *vdo, enum partition_id id, 2561 struct vdo_completion *parent) 2562 { 2563 struct dm_io_region read_region, write_regions[1]; 2564 struct partition *from = vdo_get_known_partition(&vdo->layout, id); 2565 struct partition *to = vdo_get_known_partition(&vdo->next_layout, id); 2566 2567 partition_to_region(from, vdo, &read_region); 2568 partition_to_region(to, vdo, &write_regions[0]); 2569 dm_kcopyd_copy(vdo->partition_copier, &read_region, 1, write_regions, 0, 2570 copy_callback, parent); 2571 } 2572 2573 /** 2574 * grow_physical_callback() - Callback to initiate a grow physical. 2575 * @completion: The admin completion. 2576 * 2577 * Registered in perform_grow_physical(). 2578 */ 2579 static void grow_physical_callback(struct vdo_completion *completion) 2580 { 2581 struct vdo *vdo = completion->vdo; 2582 int result; 2583 2584 assert_admin_phase_thread(vdo, __func__); 2585 2586 switch (advance_phase(vdo)) { 2587 case GROW_PHYSICAL_PHASE_START: 2588 if (vdo_is_read_only(vdo)) { 2589 vdo_log_error_strerror(VDO_READ_ONLY, 2590 "Can't grow physical size of a read-only VDO"); 2591 vdo_set_completion_result(completion, VDO_READ_ONLY); 2592 break; 2593 } 2594 2595 result = vdo_start_operation(&vdo->admin.state, 2596 VDO_ADMIN_STATE_SUSPENDED_OPERATION); 2597 if (result != VDO_SUCCESS) { 2598 vdo_continue_completion(completion, result); 2599 return; 2600 } 2601 2602 /* Copy the journal into the new layout. */ 2603 copy_partition(vdo, VDO_RECOVERY_JOURNAL_PARTITION, completion); 2604 return; 2605 2606 case GROW_PHYSICAL_PHASE_COPY_SUMMARY: 2607 copy_partition(vdo, VDO_SLAB_SUMMARY_PARTITION, completion); 2608 return; 2609 2610 case GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS: 2611 vdo_uninitialize_layout(&vdo->layout); 2612 vdo->layout = vdo->next_layout; 2613 vdo_forget(vdo->next_layout.head); 2614 vdo->states.vdo.config.physical_blocks = vdo->layout.size; 2615 vdo_update_slab_depot_size(vdo->depot); 2616 vdo_save_components(vdo, completion); 2617 return; 2618 2619 case GROW_PHYSICAL_PHASE_USE_NEW_SLABS: 2620 vdo_use_new_slabs(vdo->depot, completion); 2621 return; 2622 2623 case GROW_PHYSICAL_PHASE_END: 2624 vdo->depot->summary_origin = 2625 vdo_get_known_partition(&vdo->layout, 2626 VDO_SLAB_SUMMARY_PARTITION)->offset; 2627 vdo->recovery_journal->origin = 2628 vdo_get_known_partition(&vdo->layout, 2629 VDO_RECOVERY_JOURNAL_PARTITION)->offset; 2630 break; 2631 2632 case GROW_PHYSICAL_PHASE_ERROR: 2633 vdo_enter_read_only_mode(vdo, completion->result); 2634 break; 2635 2636 default: 2637 vdo_set_completion_result(completion, UDS_BAD_STATE); 2638 } 2639 2640 vdo_uninitialize_layout(&vdo->next_layout); 2641 finish_operation_callback(completion); 2642 } 2643 2644 /** 2645 * handle_physical_growth_error() - Handle an error during the grow physical process. 2646 * @completion: The sub-task completion. 2647 */ 2648 static void handle_physical_growth_error(struct vdo_completion *completion) 2649 { 2650 completion->vdo->admin.phase = GROW_PHYSICAL_PHASE_ERROR; 2651 grow_physical_callback(completion); 2652 } 2653 2654 /** 2655 * perform_grow_physical() - Grow the physical size of the vdo. 2656 * @vdo: The vdo to resize. 2657 * @new_physical_blocks: The new physical size in blocks. 2658 * 2659 * Context: This method may only be called when the vdo has been suspended and must not be called 2660 * from a base thread. 2661 * 2662 * Return: VDO_SUCCESS or an error. 2663 */ 2664 static int perform_grow_physical(struct vdo *vdo, block_count_t new_physical_blocks) 2665 { 2666 int result; 2667 block_count_t new_depot_size, prepared_depot_size; 2668 block_count_t old_physical_blocks = vdo->states.vdo.config.physical_blocks; 2669 2670 /* Skip any noop grows. */ 2671 if (old_physical_blocks == new_physical_blocks) 2672 return VDO_SUCCESS; 2673 2674 if (new_physical_blocks != vdo->next_layout.size) { 2675 /* 2676 * Either the VDO isn't prepared to grow, or it was prepared to grow to a different 2677 * size. Doing this check here relies on the fact that the call to this method is 2678 * done under the dmsetup message lock. 2679 */ 2680 vdo_uninitialize_layout(&vdo->next_layout); 2681 vdo_abandon_new_slabs(vdo->depot); 2682 return VDO_PARAMETER_MISMATCH; 2683 } 2684 2685 /* Validate that we are prepared to grow appropriately. */ 2686 new_depot_size = 2687 vdo_get_known_partition(&vdo->next_layout, VDO_SLAB_DEPOT_PARTITION)->count; 2688 prepared_depot_size = (vdo->depot->new_slabs == NULL) ? 0 : vdo->depot->new_size; 2689 if (prepared_depot_size != new_depot_size) 2690 return VDO_PARAMETER_MISMATCH; 2691 2692 result = perform_admin_operation(vdo, GROW_PHYSICAL_PHASE_START, 2693 grow_physical_callback, 2694 handle_physical_growth_error, "grow physical"); 2695 if (result != VDO_SUCCESS) 2696 return result; 2697 2698 vdo_log_info("Physical block count was %llu, now %llu", 2699 (unsigned long long) old_physical_blocks, 2700 (unsigned long long) new_physical_blocks); 2701 return VDO_SUCCESS; 2702 } 2703 2704 /** 2705 * apply_new_vdo_configuration() - Attempt to make any configuration changes from the table being 2706 * resumed. 2707 * @vdo: The vdo being resumed. 2708 * @config: The new device configuration derived from the table with which the vdo is being 2709 * resumed. 2710 * 2711 * Return: VDO_SUCCESS or an error. 2712 */ 2713 static int __must_check apply_new_vdo_configuration(struct vdo *vdo, 2714 struct device_config *config) 2715 { 2716 int result; 2717 2718 result = perform_grow_logical(vdo, config->logical_blocks); 2719 if (result != VDO_SUCCESS) { 2720 vdo_log_error("grow logical operation failed, result = %d", result); 2721 return result; 2722 } 2723 2724 result = perform_grow_physical(vdo, config->physical_blocks); 2725 if (result != VDO_SUCCESS) 2726 vdo_log_error("resize operation failed, result = %d", result); 2727 2728 return result; 2729 } 2730 2731 static int vdo_preresume_registered(struct dm_target *ti, struct vdo *vdo) 2732 { 2733 struct device_config *config = ti->private; 2734 const char *device_name = vdo_get_device_name(ti); 2735 block_count_t backing_blocks; 2736 int result; 2737 2738 backing_blocks = get_underlying_device_block_count(vdo); 2739 if (backing_blocks < config->physical_blocks) { 2740 /* FIXME: can this still happen? */ 2741 vdo_log_error("resume of device '%s' failed: backing device has %llu blocks but VDO physical size is %llu blocks", 2742 device_name, (unsigned long long) backing_blocks, 2743 (unsigned long long) config->physical_blocks); 2744 return -EINVAL; 2745 } 2746 2747 if (vdo_get_admin_state(vdo) == VDO_ADMIN_STATE_PRE_LOADED) { 2748 vdo_log_info("starting device '%s'", device_name); 2749 result = perform_admin_operation(vdo, LOAD_PHASE_START, load_callback, 2750 handle_load_error, "load"); 2751 if (result == VDO_UNSUPPORTED_VERSION) { 2752 /* 2753 * A component version is not supported. This can happen when the 2754 * recovery journal metadata is in an old version format. Abort the 2755 * load without saving the state. 2756 */ 2757 vdo->suspend_type = VDO_ADMIN_STATE_SUSPENDING; 2758 perform_admin_operation(vdo, SUSPEND_PHASE_START, 2759 suspend_callback, suspend_callback, 2760 "suspend"); 2761 return result; 2762 } 2763 2764 if ((result != VDO_SUCCESS) && (result != VDO_READ_ONLY)) { 2765 /* 2766 * Something has gone very wrong. Make sure everything has drained and 2767 * leave the device in an unresumable state. 2768 */ 2769 vdo_log_error_strerror(result, 2770 "Start failed, could not load VDO metadata"); 2771 vdo->suspend_type = VDO_ADMIN_STATE_STOPPING; 2772 perform_admin_operation(vdo, SUSPEND_PHASE_START, 2773 suspend_callback, suspend_callback, 2774 "suspend"); 2775 return result; 2776 } 2777 2778 /* Even if the VDO is read-only, it is now able to handle read requests. */ 2779 vdo_log_info("device '%s' started", device_name); 2780 } 2781 2782 vdo_log_info("resuming device '%s'", device_name); 2783 2784 /* If this fails, the VDO was not in a state to be resumed. This should never happen. */ 2785 result = apply_new_vdo_configuration(vdo, config); 2786 BUG_ON(result == VDO_INVALID_ADMIN_STATE); 2787 2788 /* 2789 * Now that we've tried to modify the vdo, the new config *is* the config, whether the 2790 * modifications worked or not. 2791 */ 2792 vdo->device_config = config; 2793 2794 /* 2795 * Any error here is highly unexpected and the state of the vdo is questionable, so we mark 2796 * it read-only in memory. Because we are suspended, the read-only state will not be 2797 * written to disk. 2798 */ 2799 if (result != VDO_SUCCESS) { 2800 vdo_log_error_strerror(result, 2801 "Commit of modifications to device '%s' failed", 2802 device_name); 2803 vdo_enter_read_only_mode(vdo, result); 2804 return result; 2805 } 2806 2807 if (vdo_get_admin_state(vdo)->normal) { 2808 /* The VDO was just started, so we don't need to resume it. */ 2809 return VDO_SUCCESS; 2810 } 2811 2812 result = perform_admin_operation(vdo, RESUME_PHASE_START, resume_callback, 2813 resume_callback, "resume"); 2814 BUG_ON(result == VDO_INVALID_ADMIN_STATE); 2815 if (result == VDO_READ_ONLY) { 2816 /* Even if the vdo is read-only, it has still resumed. */ 2817 result = VDO_SUCCESS; 2818 } 2819 2820 if (result != VDO_SUCCESS) 2821 vdo_log_error("resume of device '%s' failed with error: %d", device_name, 2822 result); 2823 2824 return result; 2825 } 2826 2827 static int vdo_preresume(struct dm_target *ti) 2828 { 2829 struct registered_thread instance_thread; 2830 struct vdo *vdo = get_vdo_for_target(ti); 2831 int result; 2832 2833 vdo_register_thread_device_id(&instance_thread, &vdo->instance); 2834 result = vdo_preresume_registered(ti, vdo); 2835 if ((result == VDO_PARAMETER_MISMATCH) || (result == VDO_INVALID_ADMIN_STATE) || 2836 (result == VDO_UNSUPPORTED_VERSION)) 2837 result = -EINVAL; 2838 vdo_unregister_thread_device_id(); 2839 return vdo_status_to_errno(result); 2840 } 2841 2842 static void vdo_resume(struct dm_target *ti) 2843 { 2844 struct registered_thread instance_thread; 2845 2846 vdo_register_thread_device_id(&instance_thread, 2847 &get_vdo_for_target(ti)->instance); 2848 vdo_log_info("device '%s' resumed", vdo_get_device_name(ti)); 2849 vdo_unregister_thread_device_id(); 2850 } 2851 2852 /* 2853 * If anything changes that affects how user tools will interact with vdo, update the version 2854 * number and make sure documentation about the change is complete so tools can properly update 2855 * their management code. 2856 */ 2857 static struct target_type vdo_target_bio = { 2858 .features = DM_TARGET_SINGLETON, 2859 .name = "vdo", 2860 .version = { 9, 1, 0 }, 2861 .module = THIS_MODULE, 2862 .ctr = vdo_ctr, 2863 .dtr = vdo_dtr, 2864 .io_hints = vdo_io_hints, 2865 .iterate_devices = vdo_iterate_devices, 2866 .map = vdo_map_bio, 2867 .message = vdo_message, 2868 .status = vdo_status, 2869 .presuspend = vdo_presuspend, 2870 .postsuspend = vdo_postsuspend, 2871 .preresume = vdo_preresume, 2872 .resume = vdo_resume, 2873 }; 2874 2875 static bool dm_registered; 2876 2877 static void vdo_module_destroy(void) 2878 { 2879 vdo_log_debug("unloading"); 2880 2881 if (dm_registered) 2882 dm_unregister_target(&vdo_target_bio); 2883 2884 VDO_ASSERT_LOG_ONLY(instances.count == 0, 2885 "should have no instance numbers still in use, but have %u", 2886 instances.count); 2887 vdo_free(instances.words); 2888 memset(&instances, 0, sizeof(struct instance_tracker)); 2889 } 2890 2891 static int __init vdo_init(void) 2892 { 2893 int result = 0; 2894 2895 /* Memory tracking must be initialized first for accurate accounting. */ 2896 vdo_memory_init(); 2897 vdo_initialize_threads_mutex(); 2898 vdo_initialize_thread_device_registry(); 2899 vdo_initialize_device_registry_once(); 2900 2901 /* Add VDO errors to the set of errors registered by the indexer. */ 2902 result = vdo_register_status_codes(); 2903 if (result != VDO_SUCCESS) { 2904 vdo_log_error("vdo_register_status_codes failed %d", result); 2905 vdo_module_destroy(); 2906 return result; 2907 } 2908 2909 result = dm_register_target(&vdo_target_bio); 2910 if (result < 0) { 2911 vdo_log_error("dm_register_target failed %d", result); 2912 vdo_module_destroy(); 2913 return result; 2914 } 2915 dm_registered = true; 2916 2917 return result; 2918 } 2919 2920 static void __exit vdo_exit(void) 2921 { 2922 vdo_module_destroy(); 2923 /* Memory tracking cleanup must be done last. */ 2924 vdo_memory_exit(); 2925 } 2926 2927 module_init(vdo_init); 2928 module_exit(vdo_exit); 2929 2930 module_param_named(log_level, vdo_log_level, uint, 0644); 2931 MODULE_PARM_DESC(log_level, "Log level for log messages"); 2932 2933 MODULE_DESCRIPTION(DM_NAME " target for transparent deduplication"); 2934 MODULE_AUTHOR("Red Hat, Inc."); 2935 MODULE_LICENSE("GPL"); 2936