1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright 2023 Red Hat 4 */ 5 6 #include <linux/atomic.h> 7 #include <linux/bitops.h> 8 #include <linux/completion.h> 9 #include <linux/delay.h> 10 #include <linux/device-mapper.h> 11 #include <linux/err.h> 12 #include <linux/module.h> 13 #include <linux/mutex.h> 14 #include <linux/spinlock.h> 15 16 #include "admin-state.h" 17 #include "block-map.h" 18 #include "completion.h" 19 #include "constants.h" 20 #include "data-vio.h" 21 #include "dedupe.h" 22 #include "dump.h" 23 #include "encodings.h" 24 #include "errors.h" 25 #include "flush.h" 26 #include "io-submitter.h" 27 #include "logger.h" 28 #include "memory-alloc.h" 29 #include "message-stats.h" 30 #include "recovery-journal.h" 31 #include "repair.h" 32 #include "slab-depot.h" 33 #include "status-codes.h" 34 #include "string-utils.h" 35 #include "thread-device.h" 36 #include "thread-registry.h" 37 #include "thread-utils.h" 38 #include "types.h" 39 #include "vdo.h" 40 #include "vio.h" 41 42 enum admin_phases { 43 GROW_LOGICAL_PHASE_START, 44 GROW_LOGICAL_PHASE_GROW_BLOCK_MAP, 45 GROW_LOGICAL_PHASE_END, 46 GROW_LOGICAL_PHASE_ERROR, 47 GROW_PHYSICAL_PHASE_START, 48 GROW_PHYSICAL_PHASE_COPY_SUMMARY, 49 GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS, 50 GROW_PHYSICAL_PHASE_USE_NEW_SLABS, 51 GROW_PHYSICAL_PHASE_END, 52 GROW_PHYSICAL_PHASE_ERROR, 53 LOAD_PHASE_START, 54 LOAD_PHASE_LOAD_DEPOT, 55 LOAD_PHASE_MAKE_DIRTY, 56 LOAD_PHASE_PREPARE_TO_ALLOCATE, 57 LOAD_PHASE_SCRUB_SLABS, 58 LOAD_PHASE_DATA_REDUCTION, 59 LOAD_PHASE_FINISHED, 60 LOAD_PHASE_DRAIN_JOURNAL, 61 LOAD_PHASE_WAIT_FOR_READ_ONLY, 62 PRE_LOAD_PHASE_START, 63 PRE_LOAD_PHASE_LOAD_COMPONENTS, 64 PRE_LOAD_PHASE_END, 65 PREPARE_GROW_PHYSICAL_PHASE_START, 66 RESUME_PHASE_START, 67 RESUME_PHASE_ALLOW_READ_ONLY_MODE, 68 RESUME_PHASE_DEDUPE, 69 RESUME_PHASE_DEPOT, 70 RESUME_PHASE_JOURNAL, 71 RESUME_PHASE_BLOCK_MAP, 72 RESUME_PHASE_LOGICAL_ZONES, 73 RESUME_PHASE_PACKER, 74 RESUME_PHASE_FLUSHER, 75 RESUME_PHASE_DATA_VIOS, 76 RESUME_PHASE_END, 77 SUSPEND_PHASE_START, 78 SUSPEND_PHASE_PACKER, 79 SUSPEND_PHASE_DATA_VIOS, 80 SUSPEND_PHASE_DEDUPE, 81 SUSPEND_PHASE_FLUSHES, 82 SUSPEND_PHASE_LOGICAL_ZONES, 83 SUSPEND_PHASE_BLOCK_MAP, 84 SUSPEND_PHASE_JOURNAL, 85 SUSPEND_PHASE_DEPOT, 86 SUSPEND_PHASE_READ_ONLY_WAIT, 87 SUSPEND_PHASE_WRITE_SUPER_BLOCK, 88 SUSPEND_PHASE_END, 89 }; 90 91 static const char * const ADMIN_PHASE_NAMES[] = { 92 "GROW_LOGICAL_PHASE_START", 93 "GROW_LOGICAL_PHASE_GROW_BLOCK_MAP", 94 "GROW_LOGICAL_PHASE_END", 95 "GROW_LOGICAL_PHASE_ERROR", 96 "GROW_PHYSICAL_PHASE_START", 97 "GROW_PHYSICAL_PHASE_COPY_SUMMARY", 98 "GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS", 99 "GROW_PHYSICAL_PHASE_USE_NEW_SLABS", 100 "GROW_PHYSICAL_PHASE_END", 101 "GROW_PHYSICAL_PHASE_ERROR", 102 "LOAD_PHASE_START", 103 "LOAD_PHASE_LOAD_DEPOT", 104 "LOAD_PHASE_MAKE_DIRTY", 105 "LOAD_PHASE_PREPARE_TO_ALLOCATE", 106 "LOAD_PHASE_SCRUB_SLABS", 107 "LOAD_PHASE_DATA_REDUCTION", 108 "LOAD_PHASE_FINISHED", 109 "LOAD_PHASE_DRAIN_JOURNAL", 110 "LOAD_PHASE_WAIT_FOR_READ_ONLY", 111 "PRE_LOAD_PHASE_START", 112 "PRE_LOAD_PHASE_LOAD_COMPONENTS", 113 "PRE_LOAD_PHASE_END", 114 "PREPARE_GROW_PHYSICAL_PHASE_START", 115 "RESUME_PHASE_START", 116 "RESUME_PHASE_ALLOW_READ_ONLY_MODE", 117 "RESUME_PHASE_DEDUPE", 118 "RESUME_PHASE_DEPOT", 119 "RESUME_PHASE_JOURNAL", 120 "RESUME_PHASE_BLOCK_MAP", 121 "RESUME_PHASE_LOGICAL_ZONES", 122 "RESUME_PHASE_PACKER", 123 "RESUME_PHASE_FLUSHER", 124 "RESUME_PHASE_DATA_VIOS", 125 "RESUME_PHASE_END", 126 "SUSPEND_PHASE_START", 127 "SUSPEND_PHASE_PACKER", 128 "SUSPEND_PHASE_DATA_VIOS", 129 "SUSPEND_PHASE_DEDUPE", 130 "SUSPEND_PHASE_FLUSHES", 131 "SUSPEND_PHASE_LOGICAL_ZONES", 132 "SUSPEND_PHASE_BLOCK_MAP", 133 "SUSPEND_PHASE_JOURNAL", 134 "SUSPEND_PHASE_DEPOT", 135 "SUSPEND_PHASE_READ_ONLY_WAIT", 136 "SUSPEND_PHASE_WRITE_SUPER_BLOCK", 137 "SUSPEND_PHASE_END", 138 }; 139 140 /* If we bump this, update the arrays below */ 141 #define TABLE_VERSION 4 142 143 /* arrays for handling different table versions */ 144 static const u8 REQUIRED_ARGC[] = { 10, 12, 9, 7, 6 }; 145 /* pool name no longer used. only here for verification of older versions */ 146 static const u8 POOL_NAME_ARG_INDEX[] = { 8, 10, 8 }; 147 148 /* 149 * Track in-use instance numbers using a flat bit array. 150 * 151 * O(n) run time isn't ideal, but if we have 1000 VDO devices in use simultaneously we still only 152 * need to scan 16 words, so it's not likely to be a big deal compared to other resource usage. 153 */ 154 155 /* 156 * This minimum size for the bit array creates a numbering space of 0-999, which allows 157 * successive starts of the same volume to have different instance numbers in any 158 * reasonably-sized test. Changing instances on restart allows vdoMonReport to detect that 159 * the ephemeral stats have reset to zero. 160 */ 161 #define BIT_COUNT_MINIMUM 1000 162 /* Grow the bit array by this many bits when needed */ 163 #define BIT_COUNT_INCREMENT 100 164 165 struct instance_tracker { 166 unsigned int bit_count; 167 unsigned long *words; 168 unsigned int count; 169 unsigned int next; 170 }; 171 172 static DEFINE_MUTEX(instances_lock); 173 static struct instance_tracker instances; 174 175 /** 176 * free_device_config() - Free a device config created by parse_device_config(). 177 * @config: The config to free. 178 */ 179 static void free_device_config(struct device_config *config) 180 { 181 if (config == NULL) 182 return; 183 184 if (config->owned_device != NULL) 185 dm_put_device(config->owning_target, config->owned_device); 186 187 vdo_free(config->parent_device_name); 188 vdo_free(config->original_string); 189 190 /* Reduce the chance a use-after-free (as in BZ 1669960) happens to work. */ 191 memset(config, 0, sizeof(*config)); 192 vdo_free(config); 193 } 194 195 /** 196 * get_version_number() - Decide the version number from argv. 197 * 198 * @argc: The number of table values. 199 * @argv: The array of table values. 200 * @error_ptr: A pointer to return a error string in. 201 * @version_ptr: A pointer to return the version. 202 * 203 * Return: VDO_SUCCESS or an error code. 204 */ 205 static int get_version_number(int argc, char **argv, char **error_ptr, 206 unsigned int *version_ptr) 207 { 208 /* version, if it exists, is in a form of V<n> */ 209 if (sscanf(argv[0], "V%u", version_ptr) == 1) { 210 if (*version_ptr < 1 || *version_ptr > TABLE_VERSION) { 211 *error_ptr = "Unknown version number detected"; 212 return VDO_BAD_CONFIGURATION; 213 } 214 } else { 215 /* V0 actually has no version number in the table string */ 216 *version_ptr = 0; 217 } 218 219 /* 220 * V0 and V1 have no optional parameters. There will always be a parameter for thread 221 * config, even if it's a "." to show it's an empty list. 222 */ 223 if (*version_ptr <= 1) { 224 if (argc != REQUIRED_ARGC[*version_ptr]) { 225 *error_ptr = "Incorrect number of arguments for version"; 226 return VDO_BAD_CONFIGURATION; 227 } 228 } else if (argc < REQUIRED_ARGC[*version_ptr]) { 229 *error_ptr = "Incorrect number of arguments for version"; 230 return VDO_BAD_CONFIGURATION; 231 } 232 233 if (*version_ptr != TABLE_VERSION) { 234 vdo_log_warning("Detected version mismatch between kernel module and tools kernel: %d, tool: %d", 235 TABLE_VERSION, *version_ptr); 236 vdo_log_warning("Please consider upgrading management tools to match kernel."); 237 } 238 return VDO_SUCCESS; 239 } 240 241 /* Free a list of non-NULL string pointers, and then the list itself. */ 242 static void free_string_array(char **string_array) 243 { 244 unsigned int offset; 245 246 for (offset = 0; string_array[offset] != NULL; offset++) 247 vdo_free(string_array[offset]); 248 vdo_free(string_array); 249 } 250 251 /* 252 * Split the input string into substrings, separated at occurrences of the indicated character, 253 * returning a null-terminated list of string pointers. 254 * 255 * The string pointers and the pointer array itself should both be freed with vdo_free() when no 256 * longer needed. This can be done with vdo_free_string_array (below) if the pointers in the array 257 * are not changed. Since the array and copied strings are allocated by this function, it may only 258 * be used in contexts where allocation is permitted. 259 * 260 * Empty substrings are not ignored; that is, returned substrings may be empty strings if the 261 * separator occurs twice in a row. 262 */ 263 static int split_string(const char *string, char separator, char ***substring_array_ptr) 264 { 265 unsigned int current_substring = 0, substring_count = 1; 266 const char *s; 267 char **substrings; 268 int result; 269 ptrdiff_t length; 270 271 for (s = string; *s != 0; s++) { 272 if (*s == separator) 273 substring_count++; 274 } 275 276 result = vdo_allocate(substring_count + 1, char *, "string-splitting array", 277 &substrings); 278 if (result != VDO_SUCCESS) 279 return result; 280 281 for (s = string; *s != 0; s++) { 282 if (*s == separator) { 283 ptrdiff_t length = s - string; 284 285 result = vdo_allocate(length + 1, char, "split string", 286 &substrings[current_substring]); 287 if (result != VDO_SUCCESS) { 288 free_string_array(substrings); 289 return result; 290 } 291 /* 292 * Trailing NUL is already in place after allocation; deal with the zero or 293 * more non-NUL bytes in the string. 294 */ 295 if (length > 0) 296 memcpy(substrings[current_substring], string, length); 297 string = s + 1; 298 current_substring++; 299 BUG_ON(current_substring >= substring_count); 300 } 301 } 302 /* Process final string, with no trailing separator. */ 303 BUG_ON(current_substring != (substring_count - 1)); 304 length = strlen(string); 305 306 result = vdo_allocate(length + 1, char, "split string", 307 &substrings[current_substring]); 308 if (result != VDO_SUCCESS) { 309 free_string_array(substrings); 310 return result; 311 } 312 memcpy(substrings[current_substring], string, length); 313 current_substring++; 314 /* substrings[current_substring] is NULL already */ 315 *substring_array_ptr = substrings; 316 return VDO_SUCCESS; 317 } 318 319 /* 320 * Join the input substrings into one string, joined with the indicated character, returning a 321 * string. array_length is a bound on the number of valid elements in substring_array, in case it 322 * is not NULL-terminated. 323 */ 324 static int join_strings(char **substring_array, size_t array_length, char separator, 325 char **string_ptr) 326 { 327 size_t string_length = 0; 328 size_t i; 329 int result; 330 char *output, *current_position; 331 332 for (i = 0; (i < array_length) && (substring_array[i] != NULL); i++) 333 string_length += strlen(substring_array[i]) + 1; 334 335 result = vdo_allocate(string_length, char, __func__, &output); 336 if (result != VDO_SUCCESS) 337 return result; 338 339 current_position = &output[0]; 340 341 for (i = 0; (i < array_length) && (substring_array[i] != NULL); i++) { 342 current_position = vdo_append_to_buffer(current_position, 343 output + string_length, "%s", 344 substring_array[i]); 345 *current_position = separator; 346 current_position++; 347 } 348 349 /* We output one too many separators; replace the last with a zero byte. */ 350 if (current_position != output) 351 *(current_position - 1) = '\0'; 352 353 *string_ptr = output; 354 return VDO_SUCCESS; 355 } 356 357 /** 358 * parse_bool() - Parse a two-valued option into a bool. 359 * @bool_str: The string value to convert to a bool. 360 * @true_str: The string value which should be converted to true. 361 * @false_str: The string value which should be converted to false. 362 * @bool_ptr: A pointer to return the bool value in. 363 * 364 * Return: VDO_SUCCESS or an error if bool_str is neither true_str nor false_str. 365 */ 366 static inline int __must_check parse_bool(const char *bool_str, const char *true_str, 367 const char *false_str, bool *bool_ptr) 368 { 369 bool value = false; 370 371 if (strcmp(bool_str, true_str) == 0) 372 value = true; 373 else if (strcmp(bool_str, false_str) == 0) 374 value = false; 375 else 376 return VDO_BAD_CONFIGURATION; 377 378 *bool_ptr = value; 379 return VDO_SUCCESS; 380 } 381 382 /** 383 * process_one_thread_config_spec() - Process one component of a thread parameter configuration 384 * string and update the configuration data structure. 385 * @thread_param_type: The type of thread specified. 386 * @count: The thread count requested. 387 * @config: The configuration data structure to update. 388 * 389 * If the thread count requested is invalid, a message is logged and -EINVAL returned. If the 390 * thread name is unknown, a message is logged but no error is returned. 391 * 392 * Return: VDO_SUCCESS or -EINVAL 393 */ 394 static int process_one_thread_config_spec(const char *thread_param_type, 395 unsigned int count, 396 struct thread_count_config *config) 397 { 398 /* Handle limited thread parameters */ 399 if (strcmp(thread_param_type, "bioRotationInterval") == 0) { 400 if (count == 0) { 401 vdo_log_error("thread config string error: 'bioRotationInterval' of at least 1 is required"); 402 return -EINVAL; 403 } else if (count > VDO_BIO_ROTATION_INTERVAL_LIMIT) { 404 vdo_log_error("thread config string error: 'bioRotationInterval' cannot be higher than %d", 405 VDO_BIO_ROTATION_INTERVAL_LIMIT); 406 return -EINVAL; 407 } 408 config->bio_rotation_interval = count; 409 return VDO_SUCCESS; 410 } 411 if (strcmp(thread_param_type, "logical") == 0) { 412 if (count > MAX_VDO_LOGICAL_ZONES) { 413 vdo_log_error("thread config string error: at most %d 'logical' threads are allowed", 414 MAX_VDO_LOGICAL_ZONES); 415 return -EINVAL; 416 } 417 config->logical_zones = count; 418 return VDO_SUCCESS; 419 } 420 if (strcmp(thread_param_type, "physical") == 0) { 421 if (count > MAX_VDO_PHYSICAL_ZONES) { 422 vdo_log_error("thread config string error: at most %d 'physical' threads are allowed", 423 MAX_VDO_PHYSICAL_ZONES); 424 return -EINVAL; 425 } 426 config->physical_zones = count; 427 return VDO_SUCCESS; 428 } 429 /* Handle other thread count parameters */ 430 if (count > MAXIMUM_VDO_THREADS) { 431 vdo_log_error("thread config string error: at most %d '%s' threads are allowed", 432 MAXIMUM_VDO_THREADS, thread_param_type); 433 return -EINVAL; 434 } 435 if (strcmp(thread_param_type, "hash") == 0) { 436 config->hash_zones = count; 437 return VDO_SUCCESS; 438 } 439 if (strcmp(thread_param_type, "cpu") == 0) { 440 if (count == 0) { 441 vdo_log_error("thread config string error: at least one 'cpu' thread required"); 442 return -EINVAL; 443 } 444 config->cpu_threads = count; 445 return VDO_SUCCESS; 446 } 447 if (strcmp(thread_param_type, "ack") == 0) { 448 config->bio_ack_threads = count; 449 return VDO_SUCCESS; 450 } 451 if (strcmp(thread_param_type, "bio") == 0) { 452 if (count == 0) { 453 vdo_log_error("thread config string error: at least one 'bio' thread required"); 454 return -EINVAL; 455 } 456 config->bio_threads = count; 457 return VDO_SUCCESS; 458 } 459 460 /* 461 * Don't fail, just log. This will handle version mismatches between user mode tools and 462 * kernel. 463 */ 464 vdo_log_info("unknown thread parameter type \"%s\"", thread_param_type); 465 return VDO_SUCCESS; 466 } 467 468 /** 469 * parse_one_thread_config_spec() - Parse one component of a thread parameter configuration string 470 * and update the configuration data structure. 471 * @spec: The thread parameter specification string. 472 * @config: The configuration data to be updated. 473 */ 474 static int parse_one_thread_config_spec(const char *spec, 475 struct thread_count_config *config) 476 { 477 unsigned int count; 478 char **fields; 479 int result; 480 481 result = split_string(spec, '=', &fields); 482 if (result != VDO_SUCCESS) 483 return result; 484 485 if ((fields[0] == NULL) || (fields[1] == NULL) || (fields[2] != NULL)) { 486 vdo_log_error("thread config string error: expected thread parameter assignment, saw \"%s\"", 487 spec); 488 free_string_array(fields); 489 return -EINVAL; 490 } 491 492 result = kstrtouint(fields[1], 10, &count); 493 if (result) { 494 vdo_log_error("thread config string error: integer value needed, found \"%s\"", 495 fields[1]); 496 free_string_array(fields); 497 return result; 498 } 499 500 result = process_one_thread_config_spec(fields[0], count, config); 501 free_string_array(fields); 502 return result; 503 } 504 505 /** 506 * parse_thread_config_string() - Parse the configuration string passed and update the specified 507 * counts and other parameters of various types of threads to be 508 * created. 509 * @string: Thread parameter configuration string. 510 * @config: The thread configuration data to update. 511 * 512 * The configuration string should contain one or more comma-separated specs of the form 513 * "typename=number"; the supported type names are "cpu", "ack", "bio", "bioRotationInterval", 514 * "logical", "physical", and "hash". 515 * 516 * If an error occurs during parsing of a single key/value pair, we deem it serious enough to stop 517 * further parsing. 518 * 519 * This function can't set the "reason" value the caller wants to pass back, because we'd want to 520 * format it to say which field was invalid, and we can't allocate the "reason" strings 521 * dynamically. So if an error occurs, we'll log the details and pass back an error. 522 * 523 * Return: VDO_SUCCESS or -EINVAL or -ENOMEM 524 */ 525 static int parse_thread_config_string(const char *string, 526 struct thread_count_config *config) 527 { 528 int result = VDO_SUCCESS; 529 char **specs; 530 531 if (strcmp(".", string) != 0) { 532 unsigned int i; 533 534 result = split_string(string, ',', &specs); 535 if (result != VDO_SUCCESS) 536 return result; 537 538 for (i = 0; specs[i] != NULL; i++) { 539 result = parse_one_thread_config_spec(specs[i], config); 540 if (result != VDO_SUCCESS) 541 break; 542 } 543 free_string_array(specs); 544 } 545 return result; 546 } 547 548 /** 549 * process_one_key_value_pair() - Process one component of an optional parameter string and update 550 * the configuration data structure. 551 * @key: The optional parameter key name. 552 * @value: The optional parameter value. 553 * @config: The configuration data structure to update. 554 * 555 * If the value requested is invalid, a message is logged and -EINVAL returned. If the key is 556 * unknown, a message is logged but no error is returned. 557 * 558 * Return: VDO_SUCCESS or -EINVAL 559 */ 560 static int process_one_key_value_pair(const char *key, unsigned int value, 561 struct device_config *config) 562 { 563 /* Non thread optional parameters */ 564 if (strcmp(key, "maxDiscard") == 0) { 565 if (value == 0) { 566 vdo_log_error("optional parameter error: at least one max discard block required"); 567 return -EINVAL; 568 } 569 /* Max discard sectors in blkdev_issue_discard is UINT_MAX >> 9 */ 570 if (value > (UINT_MAX / VDO_BLOCK_SIZE)) { 571 vdo_log_error("optional parameter error: at most %d max discard blocks are allowed", 572 UINT_MAX / VDO_BLOCK_SIZE); 573 return -EINVAL; 574 } 575 config->max_discard_blocks = value; 576 return VDO_SUCCESS; 577 } 578 /* Handles unknown key names */ 579 return process_one_thread_config_spec(key, value, &config->thread_counts); 580 } 581 582 /** 583 * parse_one_key_value_pair() - Parse one key/value pair and update the configuration data 584 * structure. 585 * @key: The optional key name. 586 * @value: The optional value. 587 * @config: The configuration data to be updated. 588 * 589 * Return: VDO_SUCCESS or error. 590 */ 591 static int parse_one_key_value_pair(const char *key, const char *value, 592 struct device_config *config) 593 { 594 unsigned int count; 595 int result; 596 597 if (strcmp(key, "deduplication") == 0) 598 return parse_bool(value, "on", "off", &config->deduplication); 599 600 if (strcmp(key, "compression") == 0) 601 return parse_bool(value, "on", "off", &config->compression); 602 603 /* The remaining arguments must have integral values. */ 604 result = kstrtouint(value, 10, &count); 605 if (result) { 606 vdo_log_error("optional config string error: integer value needed, found \"%s\"", 607 value); 608 return result; 609 } 610 return process_one_key_value_pair(key, count, config); 611 } 612 613 /** 614 * parse_key_value_pairs() - Parse all key/value pairs from a list of arguments. 615 * @argc: The total number of arguments in list. 616 * @argv: The list of key/value pairs. 617 * @config: The device configuration data to update. 618 * 619 * If an error occurs during parsing of a single key/value pair, we deem it serious enough to stop 620 * further parsing. 621 * 622 * This function can't set the "reason" value the caller wants to pass back, because we'd want to 623 * format it to say which field was invalid, and we can't allocate the "reason" strings 624 * dynamically. So if an error occurs, we'll log the details and return the error. 625 * 626 * Return: VDO_SUCCESS or error 627 */ 628 static int parse_key_value_pairs(int argc, char **argv, struct device_config *config) 629 { 630 int result = VDO_SUCCESS; 631 632 while (argc) { 633 result = parse_one_key_value_pair(argv[0], argv[1], config); 634 if (result != VDO_SUCCESS) 635 break; 636 637 argc -= 2; 638 argv += 2; 639 } 640 641 return result; 642 } 643 644 /** 645 * parse_optional_arguments() - Parse the configuration string passed in for optional arguments. 646 * @arg_set: The structure holding the arguments to parse. 647 * @error_ptr: Pointer to a buffer to hold the error string. 648 * @config: Pointer to device configuration data to update. 649 * 650 * For V0/V1 configurations, there will only be one optional parameter; the thread configuration. 651 * The configuration string should contain one or more comma-separated specs of the form 652 * "typename=number"; the supported type names are "cpu", "ack", "bio", "bioRotationInterval", 653 * "logical", "physical", and "hash". 654 * 655 * For V2 configurations and beyond, there could be any number of arguments. They should contain 656 * one or more key/value pairs separated by a space. 657 * 658 * Return: VDO_SUCCESS or error 659 */ 660 static int parse_optional_arguments(struct dm_arg_set *arg_set, char **error_ptr, 661 struct device_config *config) 662 { 663 int result = VDO_SUCCESS; 664 665 if (config->version == 0 || config->version == 1) { 666 result = parse_thread_config_string(arg_set->argv[0], 667 &config->thread_counts); 668 if (result != VDO_SUCCESS) { 669 *error_ptr = "Invalid thread-count configuration"; 670 return VDO_BAD_CONFIGURATION; 671 } 672 } else { 673 if ((arg_set->argc % 2) != 0) { 674 *error_ptr = "Odd number of optional arguments given but they should be <key> <value> pairs"; 675 return VDO_BAD_CONFIGURATION; 676 } 677 result = parse_key_value_pairs(arg_set->argc, arg_set->argv, config); 678 if (result != VDO_SUCCESS) { 679 *error_ptr = "Invalid optional argument configuration"; 680 return VDO_BAD_CONFIGURATION; 681 } 682 } 683 return result; 684 } 685 686 /** 687 * handle_parse_error() - Handle a parsing error. 688 * @config: The config to free. 689 * @error_ptr: A place to store a constant string about the error. 690 * @error_str: A constant string to store in error_ptr. 691 */ 692 static void handle_parse_error(struct device_config *config, char **error_ptr, 693 char *error_str) 694 { 695 free_device_config(config); 696 *error_ptr = error_str; 697 } 698 699 /** 700 * parse_device_config() - Convert the dmsetup table into a struct device_config. 701 * @argc: The number of table values. 702 * @argv: The array of table values. 703 * @ti: The target structure for this table. 704 * @config_ptr: A pointer to return the allocated config. 705 * 706 * Return: VDO_SUCCESS or an error code. 707 */ 708 static int parse_device_config(int argc, char **argv, struct dm_target *ti, 709 struct device_config **config_ptr) 710 { 711 bool enable_512e; 712 size_t logical_bytes = to_bytes(ti->len); 713 struct dm_arg_set arg_set; 714 char **error_ptr = &ti->error; 715 struct device_config *config = NULL; 716 int result; 717 718 if ((logical_bytes % VDO_BLOCK_SIZE) != 0) { 719 handle_parse_error(config, error_ptr, 720 "Logical size must be a multiple of 4096"); 721 return VDO_BAD_CONFIGURATION; 722 } 723 724 if (argc == 0) { 725 handle_parse_error(config, error_ptr, "Incorrect number of arguments"); 726 return VDO_BAD_CONFIGURATION; 727 } 728 729 result = vdo_allocate(1, struct device_config, "device_config", &config); 730 if (result != VDO_SUCCESS) { 731 handle_parse_error(config, error_ptr, 732 "Could not allocate config structure"); 733 return VDO_BAD_CONFIGURATION; 734 } 735 736 config->owning_target = ti; 737 config->logical_blocks = logical_bytes / VDO_BLOCK_SIZE; 738 INIT_LIST_HEAD(&config->config_list); 739 740 /* Save the original string. */ 741 result = join_strings(argv, argc, ' ', &config->original_string); 742 if (result != VDO_SUCCESS) { 743 handle_parse_error(config, error_ptr, "Could not populate string"); 744 return VDO_BAD_CONFIGURATION; 745 } 746 747 vdo_log_info("table line: %s", config->original_string); 748 749 config->thread_counts = (struct thread_count_config) { 750 .bio_ack_threads = 1, 751 .bio_threads = DEFAULT_VDO_BIO_SUBMIT_QUEUE_COUNT, 752 .bio_rotation_interval = DEFAULT_VDO_BIO_SUBMIT_QUEUE_ROTATE_INTERVAL, 753 .cpu_threads = 1, 754 .logical_zones = 0, 755 .physical_zones = 0, 756 .hash_zones = 0, 757 }; 758 config->max_discard_blocks = 1; 759 config->deduplication = true; 760 config->compression = false; 761 762 arg_set.argc = argc; 763 arg_set.argv = argv; 764 765 result = get_version_number(argc, argv, error_ptr, &config->version); 766 if (result != VDO_SUCCESS) { 767 /* get_version_number sets error_ptr itself. */ 768 handle_parse_error(config, error_ptr, *error_ptr); 769 return result; 770 } 771 /* Move the arg pointer forward only if the argument was there. */ 772 if (config->version >= 1) 773 dm_shift_arg(&arg_set); 774 775 result = vdo_duplicate_string(dm_shift_arg(&arg_set), "parent device name", 776 &config->parent_device_name); 777 if (result != VDO_SUCCESS) { 778 handle_parse_error(config, error_ptr, 779 "Could not copy parent device name"); 780 return VDO_BAD_CONFIGURATION; 781 } 782 783 /* Get the physical blocks, if known. */ 784 if (config->version >= 1) { 785 result = kstrtoull(dm_shift_arg(&arg_set), 10, &config->physical_blocks); 786 if (result != VDO_SUCCESS) { 787 handle_parse_error(config, error_ptr, 788 "Invalid physical block count"); 789 return VDO_BAD_CONFIGURATION; 790 } 791 } 792 793 /* Get the logical block size and validate */ 794 result = parse_bool(dm_shift_arg(&arg_set), "512", "4096", &enable_512e); 795 if (result != VDO_SUCCESS) { 796 handle_parse_error(config, error_ptr, "Invalid logical block size"); 797 return VDO_BAD_CONFIGURATION; 798 } 799 config->logical_block_size = (enable_512e ? 512 : 4096); 800 801 /* Skip past the two no longer used read cache options. */ 802 if (config->version <= 1) 803 dm_consume_args(&arg_set, 2); 804 805 /* Get the page cache size. */ 806 result = kstrtouint(dm_shift_arg(&arg_set), 10, &config->cache_size); 807 if (result != VDO_SUCCESS) { 808 handle_parse_error(config, error_ptr, 809 "Invalid block map page cache size"); 810 return VDO_BAD_CONFIGURATION; 811 } 812 813 /* Get the block map era length. */ 814 result = kstrtouint(dm_shift_arg(&arg_set), 10, &config->block_map_maximum_age); 815 if (result != VDO_SUCCESS) { 816 handle_parse_error(config, error_ptr, "Invalid block map maximum age"); 817 return VDO_BAD_CONFIGURATION; 818 } 819 820 /* Skip past the no longer used MD RAID5 optimization mode */ 821 if (config->version <= 2) 822 dm_consume_args(&arg_set, 1); 823 824 /* Skip past the no longer used write policy setting */ 825 if (config->version <= 3) 826 dm_consume_args(&arg_set, 1); 827 828 /* Skip past the no longer used pool name for older table lines */ 829 if (config->version <= 2) { 830 /* 831 * Make sure the enum to get the pool name from argv directly is still in sync with 832 * the parsing of the table line. 833 */ 834 if (&arg_set.argv[0] != &argv[POOL_NAME_ARG_INDEX[config->version]]) { 835 handle_parse_error(config, error_ptr, 836 "Pool name not in expected location"); 837 return VDO_BAD_CONFIGURATION; 838 } 839 dm_shift_arg(&arg_set); 840 } 841 842 /* Get the optional arguments and validate. */ 843 result = parse_optional_arguments(&arg_set, error_ptr, config); 844 if (result != VDO_SUCCESS) { 845 /* parse_optional_arguments sets error_ptr itself. */ 846 handle_parse_error(config, error_ptr, *error_ptr); 847 return result; 848 } 849 850 /* 851 * Logical, physical, and hash zone counts can all be zero; then we get one thread doing 852 * everything, our older configuration. If any zone count is non-zero, the others must be 853 * as well. 854 */ 855 if (((config->thread_counts.logical_zones == 0) != 856 (config->thread_counts.physical_zones == 0)) || 857 ((config->thread_counts.physical_zones == 0) != 858 (config->thread_counts.hash_zones == 0))) { 859 handle_parse_error(config, error_ptr, 860 "Logical, physical, and hash zones counts must all be zero or all non-zero"); 861 return VDO_BAD_CONFIGURATION; 862 } 863 864 if (config->cache_size < 865 (2 * MAXIMUM_VDO_USER_VIOS * config->thread_counts.logical_zones)) { 866 handle_parse_error(config, error_ptr, 867 "Insufficient block map cache for logical zones"); 868 return VDO_BAD_CONFIGURATION; 869 } 870 871 result = dm_get_device(ti, config->parent_device_name, 872 dm_table_get_mode(ti->table), &config->owned_device); 873 if (result != 0) { 874 vdo_log_error("couldn't open device \"%s\": error %d", 875 config->parent_device_name, result); 876 handle_parse_error(config, error_ptr, "Unable to open storage device"); 877 return VDO_BAD_CONFIGURATION; 878 } 879 880 if (config->version == 0) { 881 u64 device_size = bdev_nr_bytes(config->owned_device->bdev); 882 883 config->physical_blocks = device_size / VDO_BLOCK_SIZE; 884 } 885 886 *config_ptr = config; 887 return result; 888 } 889 890 static struct vdo *get_vdo_for_target(struct dm_target *ti) 891 { 892 return ((struct device_config *) ti->private)->vdo; 893 } 894 895 896 static int vdo_map_bio(struct dm_target *ti, struct bio *bio) 897 { 898 struct vdo *vdo = get_vdo_for_target(ti); 899 struct vdo_work_queue *current_work_queue; 900 const struct admin_state_code *code = vdo_get_admin_state_code(&vdo->admin.state); 901 902 VDO_ASSERT_LOG_ONLY(code->normal, "vdo should not receive bios while in state %s", 903 code->name); 904 905 /* Count all incoming bios. */ 906 vdo_count_bios(&vdo->stats.bios_in, bio); 907 908 909 /* Handle empty bios. Empty flush bios are not associated with a vio. */ 910 if ((bio_op(bio) == REQ_OP_FLUSH) || ((bio->bi_opf & REQ_PREFLUSH) != 0)) { 911 vdo_launch_flush(vdo, bio); 912 return DM_MAPIO_SUBMITTED; 913 } 914 915 /* This could deadlock, */ 916 current_work_queue = vdo_get_current_work_queue(); 917 BUG_ON((current_work_queue != NULL) && 918 (vdo == vdo_get_work_queue_owner(current_work_queue)->vdo)); 919 vdo_launch_bio(vdo->data_vio_pool, bio); 920 return DM_MAPIO_SUBMITTED; 921 } 922 923 static void vdo_io_hints(struct dm_target *ti, struct queue_limits *limits) 924 { 925 struct vdo *vdo = get_vdo_for_target(ti); 926 927 limits->logical_block_size = vdo->device_config->logical_block_size; 928 limits->physical_block_size = VDO_BLOCK_SIZE; 929 930 /* The minimum io size for random io */ 931 limits->io_min = VDO_BLOCK_SIZE; 932 /* The optimal io size for streamed/sequential io */ 933 limits->io_opt = VDO_BLOCK_SIZE; 934 935 /* 936 * Sets the maximum discard size that will be passed into VDO. This value comes from a 937 * table line value passed in during dmsetup create. 938 * 939 * The value 1024 is the largest usable value on HD systems. A 2048 sector discard on a 940 * busy HD system takes 31 seconds. We should use a value no higher than 1024, which takes 941 * 15 to 16 seconds on a busy HD system. However, using large values results in 120 second 942 * blocked task warnings in kernel logs. In order to avoid these warnings, we choose to 943 * use the smallest reasonable value. 944 * 945 * The value is used by dm-thin to determine whether to pass down discards. The block layer 946 * splits large discards on this boundary when this is set. 947 */ 948 limits->max_hw_discard_sectors = 949 (vdo->device_config->max_discard_blocks * VDO_SECTORS_PER_BLOCK); 950 951 /* 952 * Force discards to not begin or end with a partial block by stating the granularity is 953 * 4k. 954 */ 955 limits->discard_granularity = VDO_BLOCK_SIZE; 956 } 957 958 static int vdo_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, 959 void *data) 960 { 961 struct device_config *config = get_vdo_for_target(ti)->device_config; 962 963 return fn(ti, config->owned_device, 0, 964 config->physical_blocks * VDO_SECTORS_PER_BLOCK, data); 965 } 966 967 /* 968 * Status line is: 969 * <device> <operating mode> <in recovery> <index state> <compression state> 970 * <used physical blocks> <total physical blocks> 971 */ 972 973 static void vdo_status(struct dm_target *ti, status_type_t status_type, 974 unsigned int status_flags, char *result, unsigned int maxlen) 975 { 976 struct vdo *vdo = get_vdo_for_target(ti); 977 struct vdo_statistics *stats; 978 struct device_config *device_config; 979 /* N.B.: The DMEMIT macro uses the variables named "sz", "result", "maxlen". */ 980 int sz = 0; 981 982 switch (status_type) { 983 case STATUSTYPE_INFO: 984 /* Report info for dmsetup status */ 985 mutex_lock(&vdo->stats_mutex); 986 vdo_fetch_statistics(vdo, &vdo->stats_buffer); 987 stats = &vdo->stats_buffer; 988 989 DMEMIT("/dev/%pg %s %s %s %s %llu %llu", 990 vdo_get_backing_device(vdo), stats->mode, 991 stats->in_recovery_mode ? "recovering" : "-", 992 vdo_get_dedupe_index_state_name(vdo->hash_zones), 993 vdo_get_compressing(vdo) ? "online" : "offline", 994 stats->data_blocks_used + stats->overhead_blocks_used, 995 stats->physical_blocks); 996 mutex_unlock(&vdo->stats_mutex); 997 break; 998 999 case STATUSTYPE_TABLE: 1000 /* Report the string actually specified in the beginning. */ 1001 device_config = (struct device_config *) ti->private; 1002 DMEMIT("%s", device_config->original_string); 1003 break; 1004 1005 case STATUSTYPE_IMA: 1006 /* FIXME: We ought to be more detailed here, but this is what thin does. */ 1007 *result = '\0'; 1008 break; 1009 } 1010 } 1011 1012 static block_count_t __must_check get_underlying_device_block_count(const struct vdo *vdo) 1013 { 1014 return bdev_nr_bytes(vdo_get_backing_device(vdo)) / VDO_BLOCK_SIZE; 1015 } 1016 1017 static int __must_check process_vdo_message_locked(struct vdo *vdo, unsigned int argc, 1018 char **argv) 1019 { 1020 if ((argc == 2) && (strcasecmp(argv[0], "compression") == 0)) { 1021 if (strcasecmp(argv[1], "on") == 0) { 1022 vdo_set_compressing(vdo, true); 1023 return 0; 1024 } 1025 1026 if (strcasecmp(argv[1], "off") == 0) { 1027 vdo_set_compressing(vdo, false); 1028 return 0; 1029 } 1030 1031 vdo_log_warning("invalid argument '%s' to dmsetup compression message", 1032 argv[1]); 1033 return -EINVAL; 1034 } 1035 1036 vdo_log_warning("unrecognized dmsetup message '%s' received", argv[0]); 1037 return -EINVAL; 1038 } 1039 1040 /* 1041 * If the message is a dump, just do it. Otherwise, check that no other message is being processed, 1042 * and only proceed if so. 1043 * Returns -EBUSY if another message is being processed 1044 */ 1045 static int __must_check process_vdo_message(struct vdo *vdo, unsigned int argc, 1046 char **argv) 1047 { 1048 int result; 1049 1050 /* 1051 * All messages which may be processed in parallel with other messages should be handled 1052 * here before the atomic check below. Messages which should be exclusive should be 1053 * processed in process_vdo_message_locked(). 1054 */ 1055 1056 /* Dump messages should always be processed */ 1057 if (strcasecmp(argv[0], "dump") == 0) 1058 return vdo_dump(vdo, argc, argv, "dmsetup message"); 1059 1060 if (argc == 1) { 1061 if (strcasecmp(argv[0], "dump-on-shutdown") == 0) { 1062 vdo->dump_on_shutdown = true; 1063 return 0; 1064 } 1065 1066 /* Index messages should always be processed */ 1067 if ((strcasecmp(argv[0], "index-close") == 0) || 1068 (strcasecmp(argv[0], "index-create") == 0) || 1069 (strcasecmp(argv[0], "index-disable") == 0) || 1070 (strcasecmp(argv[0], "index-enable") == 0)) 1071 return vdo_message_dedupe_index(vdo->hash_zones, argv[0]); 1072 } 1073 1074 if (atomic_cmpxchg(&vdo->processing_message, 0, 1) != 0) 1075 return -EBUSY; 1076 1077 result = process_vdo_message_locked(vdo, argc, argv); 1078 1079 /* Pairs with the implicit barrier in cmpxchg just above */ 1080 smp_wmb(); 1081 atomic_set(&vdo->processing_message, 0); 1082 return result; 1083 } 1084 1085 static int vdo_message(struct dm_target *ti, unsigned int argc, char **argv, 1086 char *result_buffer, unsigned int maxlen) 1087 { 1088 struct registered_thread allocating_thread, instance_thread; 1089 struct vdo *vdo; 1090 int result; 1091 1092 if (argc == 0) { 1093 vdo_log_warning("unspecified dmsetup message"); 1094 return -EINVAL; 1095 } 1096 1097 vdo = get_vdo_for_target(ti); 1098 vdo_register_allocating_thread(&allocating_thread, NULL); 1099 vdo_register_thread_device_id(&instance_thread, &vdo->instance); 1100 1101 /* 1102 * Must be done here so we don't map return codes. The code in dm-ioctl expects a 1 for a 1103 * return code to look at the buffer and see if it is full or not. 1104 */ 1105 if ((argc == 1) && (strcasecmp(argv[0], "stats") == 0)) { 1106 vdo_write_stats(vdo, result_buffer, maxlen); 1107 result = 1; 1108 } else if ((argc == 1) && (strcasecmp(argv[0], "config") == 0)) { 1109 vdo_write_config(vdo, &result_buffer, &maxlen); 1110 result = 1; 1111 } else { 1112 result = vdo_status_to_errno(process_vdo_message(vdo, argc, argv)); 1113 } 1114 1115 vdo_unregister_thread_device_id(); 1116 vdo_unregister_allocating_thread(); 1117 return result; 1118 } 1119 1120 static void configure_target_capabilities(struct dm_target *ti) 1121 { 1122 ti->discards_supported = 1; 1123 ti->flush_supported = true; 1124 ti->num_discard_bios = 1; 1125 ti->num_flush_bios = 1; 1126 1127 /* 1128 * If this value changes, please make sure to update the value for max_discard_sectors 1129 * accordingly. 1130 */ 1131 BUG_ON(dm_set_target_max_io_len(ti, VDO_SECTORS_PER_BLOCK) != 0); 1132 } 1133 1134 /* 1135 * Implements vdo_filter_fn. 1136 */ 1137 static bool vdo_uses_device(struct vdo *vdo, const void *context) 1138 { 1139 const struct device_config *config = context; 1140 1141 return vdo_get_backing_device(vdo)->bd_dev == config->owned_device->bdev->bd_dev; 1142 } 1143 1144 /** 1145 * get_thread_id_for_phase() - Get the thread id for the current phase of the admin operation in 1146 * progress. 1147 * @vdo: The vdo. 1148 */ 1149 static thread_id_t __must_check get_thread_id_for_phase(struct vdo *vdo) 1150 { 1151 switch (vdo->admin.phase) { 1152 case RESUME_PHASE_PACKER: 1153 case RESUME_PHASE_FLUSHER: 1154 case SUSPEND_PHASE_PACKER: 1155 case SUSPEND_PHASE_FLUSHES: 1156 return vdo->thread_config.packer_thread; 1157 1158 case RESUME_PHASE_DATA_VIOS: 1159 case SUSPEND_PHASE_DATA_VIOS: 1160 return vdo->thread_config.cpu_thread; 1161 1162 case LOAD_PHASE_DRAIN_JOURNAL: 1163 case RESUME_PHASE_JOURNAL: 1164 case SUSPEND_PHASE_JOURNAL: 1165 return vdo->thread_config.journal_thread; 1166 1167 default: 1168 return vdo->thread_config.admin_thread; 1169 } 1170 } 1171 1172 static struct vdo_completion *prepare_admin_completion(struct vdo *vdo, 1173 vdo_action_fn callback, 1174 vdo_action_fn error_handler) 1175 { 1176 struct vdo_completion *completion = &vdo->admin.completion; 1177 1178 /* 1179 * We can't use vdo_prepare_completion_for_requeue() here because we don't want to reset 1180 * any error in the completion. 1181 */ 1182 completion->callback = callback; 1183 completion->error_handler = error_handler; 1184 completion->callback_thread_id = get_thread_id_for_phase(vdo); 1185 completion->requeue = true; 1186 return completion; 1187 } 1188 1189 /** 1190 * advance_phase() - Increment the phase of the current admin operation and prepare the admin 1191 * completion to run on the thread for the next phase. 1192 * @vdo: The vdo on which an admin operation is being performed. 1193 * 1194 * Return: The current phase. 1195 */ 1196 static u32 advance_phase(struct vdo *vdo) 1197 { 1198 u32 phase = vdo->admin.phase++; 1199 1200 vdo->admin.completion.callback_thread_id = get_thread_id_for_phase(vdo); 1201 vdo->admin.completion.requeue = true; 1202 return phase; 1203 } 1204 1205 /* 1206 * Perform an administrative operation (load, suspend, grow logical, or grow physical). This method 1207 * should not be called from vdo threads. 1208 */ 1209 static int perform_admin_operation(struct vdo *vdo, u32 starting_phase, 1210 vdo_action_fn callback, vdo_action_fn error_handler, 1211 const char *type) 1212 { 1213 int result; 1214 struct vdo_administrator *admin = &vdo->admin; 1215 1216 if (atomic_cmpxchg(&admin->busy, 0, 1) != 0) { 1217 return vdo_log_error_strerror(VDO_COMPONENT_BUSY, 1218 "Can't start %s operation, another operation is already in progress", 1219 type); 1220 } 1221 1222 admin->phase = starting_phase; 1223 reinit_completion(&admin->callback_sync); 1224 vdo_reset_completion(&admin->completion); 1225 vdo_launch_completion(prepare_admin_completion(vdo, callback, error_handler)); 1226 1227 /* 1228 * Using the "interruptible" interface means that Linux will not log a message when we wait 1229 * for more than 120 seconds. 1230 */ 1231 while (wait_for_completion_interruptible(&admin->callback_sync)) { 1232 /* However, if we get a signal in a user-mode process, we could spin... */ 1233 fsleep(1000); 1234 } 1235 1236 result = admin->completion.result; 1237 /* pairs with implicit barrier in cmpxchg above */ 1238 smp_wmb(); 1239 atomic_set(&admin->busy, 0); 1240 return result; 1241 } 1242 1243 /* Assert that we are operating on the correct thread for the current phase. */ 1244 static void assert_admin_phase_thread(struct vdo *vdo, const char *what) 1245 { 1246 VDO_ASSERT_LOG_ONLY(vdo_get_callback_thread_id() == get_thread_id_for_phase(vdo), 1247 "%s on correct thread for %s", what, 1248 ADMIN_PHASE_NAMES[vdo->admin.phase]); 1249 } 1250 1251 /** 1252 * finish_operation_callback() - Callback to finish an admin operation. 1253 * @completion: The admin_completion. 1254 */ 1255 static void finish_operation_callback(struct vdo_completion *completion) 1256 { 1257 struct vdo_administrator *admin = &completion->vdo->admin; 1258 1259 vdo_finish_operation(&admin->state, completion->result); 1260 complete(&admin->callback_sync); 1261 } 1262 1263 /** 1264 * decode_from_super_block() - Decode the VDO state from the super block and validate that it is 1265 * correct. 1266 * @vdo: The vdo being loaded. 1267 * 1268 * On error from this method, the component states must be destroyed explicitly. If this method 1269 * returns successfully, the component states must not be destroyed. 1270 * 1271 * Return: VDO_SUCCESS or an error. 1272 */ 1273 static int __must_check decode_from_super_block(struct vdo *vdo) 1274 { 1275 const struct device_config *config = vdo->device_config; 1276 int result; 1277 1278 result = vdo_decode_component_states(vdo->super_block.buffer, &vdo->geometry, 1279 &vdo->states); 1280 if (result != VDO_SUCCESS) 1281 return result; 1282 1283 vdo_set_state(vdo, vdo->states.vdo.state); 1284 vdo->load_state = vdo->states.vdo.state; 1285 1286 /* 1287 * If the device config specifies a larger logical size than was recorded in the super 1288 * block, just accept it. 1289 */ 1290 if (vdo->states.vdo.config.logical_blocks < config->logical_blocks) { 1291 vdo_log_warning("Growing logical size: a logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block", 1292 (unsigned long long) config->logical_blocks, 1293 (unsigned long long) vdo->states.vdo.config.logical_blocks); 1294 vdo->states.vdo.config.logical_blocks = config->logical_blocks; 1295 } 1296 1297 result = vdo_validate_component_states(&vdo->states, vdo->geometry.nonce, 1298 config->physical_blocks, 1299 config->logical_blocks); 1300 if (result != VDO_SUCCESS) 1301 return result; 1302 1303 vdo->layout = vdo->states.layout; 1304 return VDO_SUCCESS; 1305 } 1306 1307 /** 1308 * decode_vdo() - Decode the component data portion of a super block and fill in the corresponding 1309 * portions of the vdo being loaded. 1310 * @vdo: The vdo being loaded. 1311 * 1312 * This will also allocate the recovery journal and slab depot. If this method is called with an 1313 * asynchronous layer (i.e. a thread config which specifies at least one base thread), the block 1314 * map and packer will be constructed as well. 1315 * 1316 * Return: VDO_SUCCESS or an error. 1317 */ 1318 static int __must_check decode_vdo(struct vdo *vdo) 1319 { 1320 block_count_t maximum_age, journal_length; 1321 struct partition *partition; 1322 int result; 1323 1324 result = decode_from_super_block(vdo); 1325 if (result != VDO_SUCCESS) { 1326 vdo_destroy_component_states(&vdo->states); 1327 return result; 1328 } 1329 1330 maximum_age = vdo_convert_maximum_age(vdo->device_config->block_map_maximum_age); 1331 journal_length = 1332 vdo_get_recovery_journal_length(vdo->states.vdo.config.recovery_journal_size); 1333 if (maximum_age > (journal_length / 2)) { 1334 return vdo_log_error_strerror(VDO_BAD_CONFIGURATION, 1335 "maximum age: %llu exceeds limit %llu", 1336 (unsigned long long) maximum_age, 1337 (unsigned long long) (journal_length / 2)); 1338 } 1339 1340 if (maximum_age == 0) { 1341 return vdo_log_error_strerror(VDO_BAD_CONFIGURATION, 1342 "maximum age must be greater than 0"); 1343 } 1344 1345 result = vdo_enable_read_only_entry(vdo); 1346 if (result != VDO_SUCCESS) 1347 return result; 1348 1349 partition = vdo_get_known_partition(&vdo->layout, 1350 VDO_RECOVERY_JOURNAL_PARTITION); 1351 result = vdo_decode_recovery_journal(vdo->states.recovery_journal, 1352 vdo->states.vdo.nonce, vdo, partition, 1353 vdo->states.vdo.complete_recoveries, 1354 vdo->states.vdo.config.recovery_journal_size, 1355 &vdo->recovery_journal); 1356 if (result != VDO_SUCCESS) 1357 return result; 1358 1359 partition = vdo_get_known_partition(&vdo->layout, VDO_SLAB_SUMMARY_PARTITION); 1360 result = vdo_decode_slab_depot(vdo->states.slab_depot, vdo, partition, 1361 &vdo->depot); 1362 if (result != VDO_SUCCESS) 1363 return result; 1364 1365 result = vdo_decode_block_map(vdo->states.block_map, 1366 vdo->states.vdo.config.logical_blocks, vdo, 1367 vdo->recovery_journal, vdo->states.vdo.nonce, 1368 vdo->device_config->cache_size, maximum_age, 1369 &vdo->block_map); 1370 if (result != VDO_SUCCESS) 1371 return result; 1372 1373 result = vdo_make_physical_zones(vdo, &vdo->physical_zones); 1374 if (result != VDO_SUCCESS) 1375 return result; 1376 1377 /* The logical zones depend on the physical zones already existing. */ 1378 result = vdo_make_logical_zones(vdo, &vdo->logical_zones); 1379 if (result != VDO_SUCCESS) 1380 return result; 1381 1382 return vdo_make_hash_zones(vdo, &vdo->hash_zones); 1383 } 1384 1385 /** 1386 * pre_load_callback() - Callback to initiate a pre-load, registered in vdo_initialize(). 1387 * @completion: The admin completion. 1388 */ 1389 static void pre_load_callback(struct vdo_completion *completion) 1390 { 1391 struct vdo *vdo = completion->vdo; 1392 int result; 1393 1394 assert_admin_phase_thread(vdo, __func__); 1395 1396 switch (advance_phase(vdo)) { 1397 case PRE_LOAD_PHASE_START: 1398 result = vdo_start_operation(&vdo->admin.state, 1399 VDO_ADMIN_STATE_PRE_LOADING); 1400 if (result != VDO_SUCCESS) { 1401 vdo_continue_completion(completion, result); 1402 return; 1403 } 1404 1405 vdo_load_super_block(vdo, completion); 1406 return; 1407 1408 case PRE_LOAD_PHASE_LOAD_COMPONENTS: 1409 vdo_continue_completion(completion, decode_vdo(vdo)); 1410 return; 1411 1412 case PRE_LOAD_PHASE_END: 1413 break; 1414 1415 default: 1416 vdo_set_completion_result(completion, UDS_BAD_STATE); 1417 } 1418 1419 finish_operation_callback(completion); 1420 } 1421 1422 static void release_instance(unsigned int instance) 1423 { 1424 mutex_lock(&instances_lock); 1425 if (instance >= instances.bit_count) { 1426 VDO_ASSERT_LOG_ONLY(false, 1427 "instance number %u must be less than bit count %u", 1428 instance, instances.bit_count); 1429 } else if (test_bit(instance, instances.words) == 0) { 1430 VDO_ASSERT_LOG_ONLY(false, "instance number %u must be allocated", instance); 1431 } else { 1432 __clear_bit(instance, instances.words); 1433 instances.count -= 1; 1434 } 1435 mutex_unlock(&instances_lock); 1436 } 1437 1438 static void set_device_config(struct dm_target *ti, struct vdo *vdo, 1439 struct device_config *config) 1440 { 1441 list_del_init(&config->config_list); 1442 list_add_tail(&config->config_list, &vdo->device_config_list); 1443 config->vdo = vdo; 1444 ti->private = config; 1445 configure_target_capabilities(ti); 1446 } 1447 1448 static int vdo_initialize(struct dm_target *ti, unsigned int instance, 1449 struct device_config *config) 1450 { 1451 struct vdo *vdo; 1452 int result; 1453 u64 block_size = VDO_BLOCK_SIZE; 1454 u64 logical_size = to_bytes(ti->len); 1455 block_count_t logical_blocks = logical_size / block_size; 1456 1457 vdo_log_info("loading device '%s'", vdo_get_device_name(ti)); 1458 vdo_log_debug("Logical block size = %llu", (u64) config->logical_block_size); 1459 vdo_log_debug("Logical blocks = %llu", logical_blocks); 1460 vdo_log_debug("Physical block size = %llu", (u64) block_size); 1461 vdo_log_debug("Physical blocks = %llu", config->physical_blocks); 1462 vdo_log_debug("Block map cache blocks = %u", config->cache_size); 1463 vdo_log_debug("Block map maximum age = %u", config->block_map_maximum_age); 1464 vdo_log_debug("Deduplication = %s", (config->deduplication ? "on" : "off")); 1465 vdo_log_debug("Compression = %s", (config->compression ? "on" : "off")); 1466 1467 vdo = vdo_find_matching(vdo_uses_device, config); 1468 if (vdo != NULL) { 1469 vdo_log_error("Existing vdo already uses device %s", 1470 vdo->device_config->parent_device_name); 1471 ti->error = "Cannot share storage device with already-running VDO"; 1472 return VDO_BAD_CONFIGURATION; 1473 } 1474 1475 result = vdo_make(instance, config, &ti->error, &vdo); 1476 if (result != VDO_SUCCESS) { 1477 vdo_log_error("Could not create VDO device. (VDO error %d, message %s)", 1478 result, ti->error); 1479 vdo_destroy(vdo); 1480 return result; 1481 } 1482 1483 result = perform_admin_operation(vdo, PRE_LOAD_PHASE_START, pre_load_callback, 1484 finish_operation_callback, "pre-load"); 1485 if (result != VDO_SUCCESS) { 1486 ti->error = ((result == VDO_INVALID_ADMIN_STATE) ? 1487 "Pre-load is only valid immediately after initialization" : 1488 "Cannot load metadata from device"); 1489 vdo_log_error("Could not start VDO device. (VDO error %d, message %s)", 1490 result, ti->error); 1491 vdo_destroy(vdo); 1492 return result; 1493 } 1494 1495 set_device_config(ti, vdo, config); 1496 vdo->device_config = config; 1497 return VDO_SUCCESS; 1498 } 1499 1500 /* Implements vdo_filter_fn. */ 1501 static bool __must_check vdo_is_named(struct vdo *vdo, const void *context) 1502 { 1503 struct dm_target *ti = vdo->device_config->owning_target; 1504 const char *device_name = vdo_get_device_name(ti); 1505 1506 return strcmp(device_name, context) == 0; 1507 } 1508 1509 /** 1510 * get_bit_array_size() - Return the number of bytes needed to store a bit array of the specified 1511 * capacity in an array of unsigned longs. 1512 * @bit_count: The number of bits the array must hold. 1513 * 1514 * Return: the number of bytes needed for the array representation. 1515 */ 1516 static size_t get_bit_array_size(unsigned int bit_count) 1517 { 1518 /* Round up to a multiple of the word size and convert to a byte count. */ 1519 return (BITS_TO_LONGS(bit_count) * sizeof(unsigned long)); 1520 } 1521 1522 /** 1523 * grow_bit_array() - Re-allocate the bitmap word array so there will more instance numbers that 1524 * can be allocated. 1525 * 1526 * Since the array is initially NULL, this also initializes the array the first time we allocate an 1527 * instance number. 1528 * 1529 * Return: VDO_SUCCESS or an error code from the allocation 1530 */ 1531 static int grow_bit_array(void) 1532 { 1533 unsigned int new_count = max(instances.bit_count + BIT_COUNT_INCREMENT, 1534 (unsigned int) BIT_COUNT_MINIMUM); 1535 unsigned long *new_words; 1536 int result; 1537 1538 result = vdo_reallocate_memory(instances.words, 1539 get_bit_array_size(instances.bit_count), 1540 get_bit_array_size(new_count), 1541 "instance number bit array", &new_words); 1542 if (result != VDO_SUCCESS) 1543 return result; 1544 1545 instances.bit_count = new_count; 1546 instances.words = new_words; 1547 return VDO_SUCCESS; 1548 } 1549 1550 /** 1551 * allocate_instance() - Allocate an instance number. 1552 * @instance_ptr: A point to hold the instance number 1553 * 1554 * Return: VDO_SUCCESS or an error code 1555 * 1556 * This function must be called while holding the instances lock. 1557 */ 1558 static int allocate_instance(unsigned int *instance_ptr) 1559 { 1560 unsigned int instance; 1561 int result; 1562 1563 /* If there are no unallocated instances, grow the bit array. */ 1564 if (instances.count >= instances.bit_count) { 1565 result = grow_bit_array(); 1566 if (result != VDO_SUCCESS) 1567 return result; 1568 } 1569 1570 /* 1571 * There must be a zero bit somewhere now. Find it, starting just after the last instance 1572 * allocated. 1573 */ 1574 instance = find_next_zero_bit(instances.words, instances.bit_count, 1575 instances.next); 1576 if (instance >= instances.bit_count) { 1577 /* Nothing free after next, so wrap around to instance zero. */ 1578 instance = find_first_zero_bit(instances.words, instances.bit_count); 1579 result = VDO_ASSERT(instance < instances.bit_count, 1580 "impossibly, no zero bit found"); 1581 if (result != VDO_SUCCESS) 1582 return result; 1583 } 1584 1585 __set_bit(instance, instances.words); 1586 instances.count++; 1587 instances.next = instance + 1; 1588 *instance_ptr = instance; 1589 return VDO_SUCCESS; 1590 } 1591 1592 static int construct_new_vdo_registered(struct dm_target *ti, unsigned int argc, 1593 char **argv, unsigned int instance) 1594 { 1595 int result; 1596 struct device_config *config; 1597 1598 result = parse_device_config(argc, argv, ti, &config); 1599 if (result != VDO_SUCCESS) { 1600 vdo_log_error_strerror(result, "parsing failed: %s", ti->error); 1601 release_instance(instance); 1602 return -EINVAL; 1603 } 1604 1605 /* Beyond this point, the instance number will be cleaned up for us if needed */ 1606 result = vdo_initialize(ti, instance, config); 1607 if (result != VDO_SUCCESS) { 1608 release_instance(instance); 1609 free_device_config(config); 1610 return vdo_status_to_errno(result); 1611 } 1612 1613 return VDO_SUCCESS; 1614 } 1615 1616 static int construct_new_vdo(struct dm_target *ti, unsigned int argc, char **argv) 1617 { 1618 int result; 1619 unsigned int instance; 1620 struct registered_thread instance_thread; 1621 1622 mutex_lock(&instances_lock); 1623 result = allocate_instance(&instance); 1624 mutex_unlock(&instances_lock); 1625 if (result != VDO_SUCCESS) 1626 return -ENOMEM; 1627 1628 vdo_register_thread_device_id(&instance_thread, &instance); 1629 result = construct_new_vdo_registered(ti, argc, argv, instance); 1630 vdo_unregister_thread_device_id(); 1631 return result; 1632 } 1633 1634 /** 1635 * check_may_grow_physical() - Callback to check that we're not in recovery mode, used in 1636 * vdo_prepare_to_grow_physical(). 1637 * @completion: The admin completion. 1638 */ 1639 static void check_may_grow_physical(struct vdo_completion *completion) 1640 { 1641 struct vdo *vdo = completion->vdo; 1642 1643 assert_admin_phase_thread(vdo, __func__); 1644 1645 /* These checks can only be done from a vdo thread. */ 1646 if (vdo_is_read_only(vdo)) 1647 vdo_set_completion_result(completion, VDO_READ_ONLY); 1648 1649 if (vdo_in_recovery_mode(vdo)) 1650 vdo_set_completion_result(completion, VDO_RETRY_AFTER_REBUILD); 1651 1652 finish_operation_callback(completion); 1653 } 1654 1655 static block_count_t get_partition_size(struct layout *layout, enum partition_id id) 1656 { 1657 return vdo_get_known_partition(layout, id)->count; 1658 } 1659 1660 /** 1661 * grow_layout() - Make the layout for growing a vdo. 1662 * @vdo: The vdo preparing to grow. 1663 * @old_size: The current size of the vdo. 1664 * @new_size: The size to which the vdo will be grown. 1665 * 1666 * Return: VDO_SUCCESS or an error code. 1667 */ 1668 static int grow_layout(struct vdo *vdo, block_count_t old_size, block_count_t new_size) 1669 { 1670 int result; 1671 block_count_t min_new_size; 1672 1673 if (vdo->next_layout.size == new_size) { 1674 /* We are already prepared to grow to the new size, so we're done. */ 1675 return VDO_SUCCESS; 1676 } 1677 1678 /* Make a copy completion if there isn't one */ 1679 if (vdo->partition_copier == NULL) { 1680 vdo->partition_copier = dm_kcopyd_client_create(NULL); 1681 if (IS_ERR(vdo->partition_copier)) { 1682 result = PTR_ERR(vdo->partition_copier); 1683 vdo->partition_copier = NULL; 1684 return result; 1685 } 1686 } 1687 1688 /* Free any unused preparation. */ 1689 vdo_uninitialize_layout(&vdo->next_layout); 1690 1691 /* 1692 * Make a new layout with the existing partition sizes for everything but the slab depot 1693 * partition. 1694 */ 1695 result = vdo_initialize_layout(new_size, vdo->layout.start, 1696 get_partition_size(&vdo->layout, 1697 VDO_BLOCK_MAP_PARTITION), 1698 get_partition_size(&vdo->layout, 1699 VDO_RECOVERY_JOURNAL_PARTITION), 1700 get_partition_size(&vdo->layout, 1701 VDO_SLAB_SUMMARY_PARTITION), 1702 &vdo->next_layout); 1703 if (result != VDO_SUCCESS) { 1704 dm_kcopyd_client_destroy(vdo_forget(vdo->partition_copier)); 1705 return result; 1706 } 1707 1708 /* Ensure the new journal and summary are entirely within the added blocks. */ 1709 min_new_size = (old_size + 1710 get_partition_size(&vdo->next_layout, 1711 VDO_SLAB_SUMMARY_PARTITION) + 1712 get_partition_size(&vdo->next_layout, 1713 VDO_RECOVERY_JOURNAL_PARTITION)); 1714 if (min_new_size > new_size) { 1715 /* Copying the journal and summary would destroy some old metadata. */ 1716 vdo_uninitialize_layout(&vdo->next_layout); 1717 dm_kcopyd_client_destroy(vdo_forget(vdo->partition_copier)); 1718 return VDO_INCREMENT_TOO_SMALL; 1719 } 1720 1721 return VDO_SUCCESS; 1722 } 1723 1724 static int prepare_to_grow_physical(struct vdo *vdo, block_count_t new_physical_blocks) 1725 { 1726 int result; 1727 block_count_t current_physical_blocks = vdo->states.vdo.config.physical_blocks; 1728 1729 vdo_log_info("Preparing to resize physical to %llu", 1730 (unsigned long long) new_physical_blocks); 1731 VDO_ASSERT_LOG_ONLY((new_physical_blocks > current_physical_blocks), 1732 "New physical size is larger than current physical size"); 1733 result = perform_admin_operation(vdo, PREPARE_GROW_PHYSICAL_PHASE_START, 1734 check_may_grow_physical, 1735 finish_operation_callback, 1736 "prepare grow-physical"); 1737 if (result != VDO_SUCCESS) 1738 return result; 1739 1740 result = grow_layout(vdo, current_physical_blocks, new_physical_blocks); 1741 if (result != VDO_SUCCESS) 1742 return result; 1743 1744 result = vdo_prepare_to_grow_slab_depot(vdo->depot, 1745 vdo_get_known_partition(&vdo->next_layout, 1746 VDO_SLAB_DEPOT_PARTITION)); 1747 if (result != VDO_SUCCESS) { 1748 vdo_uninitialize_layout(&vdo->next_layout); 1749 return result; 1750 } 1751 1752 vdo_log_info("Done preparing to resize physical"); 1753 return VDO_SUCCESS; 1754 } 1755 1756 /** 1757 * validate_new_device_config() - Check whether a new device config represents a valid modification 1758 * to an existing config. 1759 * @to_validate: The new config to validate. 1760 * @config: The existing config. 1761 * @may_grow: Set to true if growing the logical and physical size of the vdo is currently 1762 * permitted. 1763 * @error_ptr: A pointer to hold the reason for any error. 1764 * 1765 * Return: VDO_SUCCESS or an error. 1766 */ 1767 static int validate_new_device_config(struct device_config *to_validate, 1768 struct device_config *config, bool may_grow, 1769 char **error_ptr) 1770 { 1771 if (to_validate->owning_target->begin != config->owning_target->begin) { 1772 *error_ptr = "Starting sector cannot change"; 1773 return VDO_PARAMETER_MISMATCH; 1774 } 1775 1776 if (to_validate->logical_block_size != config->logical_block_size) { 1777 *error_ptr = "Logical block size cannot change"; 1778 return VDO_PARAMETER_MISMATCH; 1779 } 1780 1781 if (to_validate->logical_blocks < config->logical_blocks) { 1782 *error_ptr = "Can't shrink VDO logical size"; 1783 return VDO_PARAMETER_MISMATCH; 1784 } 1785 1786 if (to_validate->cache_size != config->cache_size) { 1787 *error_ptr = "Block map cache size cannot change"; 1788 return VDO_PARAMETER_MISMATCH; 1789 } 1790 1791 if (to_validate->block_map_maximum_age != config->block_map_maximum_age) { 1792 *error_ptr = "Block map maximum age cannot change"; 1793 return VDO_PARAMETER_MISMATCH; 1794 } 1795 1796 if (memcmp(&to_validate->thread_counts, &config->thread_counts, 1797 sizeof(struct thread_count_config)) != 0) { 1798 *error_ptr = "Thread configuration cannot change"; 1799 return VDO_PARAMETER_MISMATCH; 1800 } 1801 1802 if (to_validate->physical_blocks < config->physical_blocks) { 1803 *error_ptr = "Removing physical storage from a VDO is not supported"; 1804 return VDO_NOT_IMPLEMENTED; 1805 } 1806 1807 if (!may_grow && (to_validate->physical_blocks > config->physical_blocks)) { 1808 *error_ptr = "VDO physical size may not grow in current state"; 1809 return VDO_NOT_IMPLEMENTED; 1810 } 1811 1812 return VDO_SUCCESS; 1813 } 1814 1815 static int prepare_to_modify(struct dm_target *ti, struct device_config *config, 1816 struct vdo *vdo) 1817 { 1818 int result; 1819 bool may_grow = (vdo_get_admin_state(vdo) != VDO_ADMIN_STATE_PRE_LOADED); 1820 1821 result = validate_new_device_config(config, vdo->device_config, may_grow, 1822 &ti->error); 1823 if (result != VDO_SUCCESS) 1824 return -EINVAL; 1825 1826 if (config->logical_blocks > vdo->device_config->logical_blocks) { 1827 block_count_t logical_blocks = vdo->states.vdo.config.logical_blocks; 1828 1829 vdo_log_info("Preparing to resize logical to %llu", 1830 (unsigned long long) config->logical_blocks); 1831 VDO_ASSERT_LOG_ONLY((config->logical_blocks > logical_blocks), 1832 "New logical size is larger than current size"); 1833 1834 result = vdo_prepare_to_grow_block_map(vdo->block_map, 1835 config->logical_blocks); 1836 if (result != VDO_SUCCESS) { 1837 ti->error = "Device vdo_prepare_to_grow_logical failed"; 1838 return result; 1839 } 1840 1841 vdo_log_info("Done preparing to resize logical"); 1842 } 1843 1844 if (config->physical_blocks > vdo->device_config->physical_blocks) { 1845 result = prepare_to_grow_physical(vdo, config->physical_blocks); 1846 if (result != VDO_SUCCESS) { 1847 if (result == VDO_PARAMETER_MISMATCH) { 1848 /* 1849 * If we don't trap this case, vdo_status_to_errno() will remap 1850 * it to -EIO, which is misleading and ahistorical. 1851 */ 1852 result = -EINVAL; 1853 } 1854 1855 if (result == VDO_TOO_MANY_SLABS) 1856 ti->error = "Device vdo_prepare_to_grow_physical failed (specified physical size too big based on formatted slab size)"; 1857 else 1858 ti->error = "Device vdo_prepare_to_grow_physical failed"; 1859 1860 return result; 1861 } 1862 } 1863 1864 if (strcmp(config->parent_device_name, vdo->device_config->parent_device_name) != 0) { 1865 const char *device_name = vdo_get_device_name(config->owning_target); 1866 1867 vdo_log_info("Updating backing device of %s from %s to %s", device_name, 1868 vdo->device_config->parent_device_name, 1869 config->parent_device_name); 1870 } 1871 1872 return VDO_SUCCESS; 1873 } 1874 1875 static int update_existing_vdo(const char *device_name, struct dm_target *ti, 1876 unsigned int argc, char **argv, struct vdo *vdo) 1877 { 1878 int result; 1879 struct device_config *config; 1880 1881 result = parse_device_config(argc, argv, ti, &config); 1882 if (result != VDO_SUCCESS) 1883 return -EINVAL; 1884 1885 vdo_log_info("preparing to modify device '%s'", device_name); 1886 result = prepare_to_modify(ti, config, vdo); 1887 if (result != VDO_SUCCESS) { 1888 free_device_config(config); 1889 return vdo_status_to_errno(result); 1890 } 1891 1892 set_device_config(ti, vdo, config); 1893 return VDO_SUCCESS; 1894 } 1895 1896 static int vdo_ctr(struct dm_target *ti, unsigned int argc, char **argv) 1897 { 1898 int result; 1899 struct registered_thread allocating_thread, instance_thread; 1900 const char *device_name; 1901 struct vdo *vdo; 1902 1903 vdo_register_allocating_thread(&allocating_thread, NULL); 1904 device_name = vdo_get_device_name(ti); 1905 vdo = vdo_find_matching(vdo_is_named, device_name); 1906 if (vdo == NULL) { 1907 result = construct_new_vdo(ti, argc, argv); 1908 } else { 1909 vdo_register_thread_device_id(&instance_thread, &vdo->instance); 1910 result = update_existing_vdo(device_name, ti, argc, argv, vdo); 1911 vdo_unregister_thread_device_id(); 1912 } 1913 1914 vdo_unregister_allocating_thread(); 1915 return result; 1916 } 1917 1918 static void vdo_dtr(struct dm_target *ti) 1919 { 1920 struct device_config *config = ti->private; 1921 struct vdo *vdo = vdo_forget(config->vdo); 1922 1923 list_del_init(&config->config_list); 1924 if (list_empty(&vdo->device_config_list)) { 1925 const char *device_name; 1926 1927 /* This was the last config referencing the VDO. Free it. */ 1928 unsigned int instance = vdo->instance; 1929 struct registered_thread allocating_thread, instance_thread; 1930 1931 vdo_register_thread_device_id(&instance_thread, &instance); 1932 vdo_register_allocating_thread(&allocating_thread, NULL); 1933 1934 device_name = vdo_get_device_name(ti); 1935 vdo_log_info("stopping device '%s'", device_name); 1936 if (vdo->dump_on_shutdown) 1937 vdo_dump_all(vdo, "device shutdown"); 1938 1939 vdo_destroy(vdo_forget(vdo)); 1940 vdo_log_info("device '%s' stopped", device_name); 1941 vdo_unregister_thread_device_id(); 1942 vdo_unregister_allocating_thread(); 1943 release_instance(instance); 1944 } else if (config == vdo->device_config) { 1945 /* 1946 * The VDO still references this config. Give it a reference to a config that isn't 1947 * being destroyed. 1948 */ 1949 vdo->device_config = list_first_entry(&vdo->device_config_list, 1950 struct device_config, config_list); 1951 } 1952 1953 free_device_config(config); 1954 ti->private = NULL; 1955 } 1956 1957 static void vdo_presuspend(struct dm_target *ti) 1958 { 1959 get_vdo_for_target(ti)->suspend_type = 1960 (dm_noflush_suspending(ti) ? VDO_ADMIN_STATE_SUSPENDING : VDO_ADMIN_STATE_SAVING); 1961 } 1962 1963 /** 1964 * write_super_block_for_suspend() - Update the VDO state and save the super block. 1965 * @completion: The admin completion 1966 */ 1967 static void write_super_block_for_suspend(struct vdo_completion *completion) 1968 { 1969 struct vdo *vdo = completion->vdo; 1970 1971 switch (vdo_get_state(vdo)) { 1972 case VDO_DIRTY: 1973 case VDO_NEW: 1974 vdo_set_state(vdo, VDO_CLEAN); 1975 break; 1976 1977 case VDO_CLEAN: 1978 case VDO_READ_ONLY_MODE: 1979 case VDO_FORCE_REBUILD: 1980 case VDO_RECOVERING: 1981 case VDO_REBUILD_FOR_UPGRADE: 1982 break; 1983 1984 case VDO_REPLAYING: 1985 default: 1986 vdo_continue_completion(completion, UDS_BAD_STATE); 1987 return; 1988 } 1989 1990 vdo_save_components(vdo, completion); 1991 } 1992 1993 /** 1994 * suspend_callback() - Callback to initiate a suspend, registered in vdo_postsuspend(). 1995 * @completion: The sub-task completion. 1996 */ 1997 static void suspend_callback(struct vdo_completion *completion) 1998 { 1999 struct vdo *vdo = completion->vdo; 2000 struct admin_state *state = &vdo->admin.state; 2001 int result; 2002 2003 assert_admin_phase_thread(vdo, __func__); 2004 2005 switch (advance_phase(vdo)) { 2006 case SUSPEND_PHASE_START: 2007 if (vdo_get_admin_state_code(state)->quiescent) { 2008 /* Already suspended */ 2009 break; 2010 } 2011 2012 vdo_continue_completion(completion, 2013 vdo_start_operation(state, vdo->suspend_type)); 2014 return; 2015 2016 case SUSPEND_PHASE_PACKER: 2017 /* 2018 * If the VDO was already resumed from a prior suspend while read-only, some of the 2019 * components may not have been resumed. By setting a read-only error here, we 2020 * guarantee that the result of this suspend will be VDO_READ_ONLY and not 2021 * VDO_INVALID_ADMIN_STATE in that case. 2022 */ 2023 if (vdo_in_read_only_mode(vdo)) 2024 vdo_set_completion_result(completion, VDO_READ_ONLY); 2025 2026 vdo_drain_packer(vdo->packer, completion); 2027 return; 2028 2029 case SUSPEND_PHASE_DATA_VIOS: 2030 drain_data_vio_pool(vdo->data_vio_pool, completion); 2031 return; 2032 2033 case SUSPEND_PHASE_DEDUPE: 2034 vdo_drain_hash_zones(vdo->hash_zones, completion); 2035 return; 2036 2037 case SUSPEND_PHASE_FLUSHES: 2038 vdo_drain_flusher(vdo->flusher, completion); 2039 return; 2040 2041 case SUSPEND_PHASE_LOGICAL_ZONES: 2042 /* 2043 * Attempt to flush all I/O before completing post suspend work. We believe a 2044 * suspended device is expected to have persisted all data written before the 2045 * suspend, even if it hasn't been flushed yet. 2046 */ 2047 result = vdo_synchronous_flush(vdo); 2048 if (result != VDO_SUCCESS) 2049 vdo_enter_read_only_mode(vdo, result); 2050 2051 vdo_drain_logical_zones(vdo->logical_zones, 2052 vdo_get_admin_state_code(state), completion); 2053 return; 2054 2055 case SUSPEND_PHASE_BLOCK_MAP: 2056 vdo_drain_block_map(vdo->block_map, vdo_get_admin_state_code(state), 2057 completion); 2058 return; 2059 2060 case SUSPEND_PHASE_JOURNAL: 2061 vdo_drain_recovery_journal(vdo->recovery_journal, 2062 vdo_get_admin_state_code(state), completion); 2063 return; 2064 2065 case SUSPEND_PHASE_DEPOT: 2066 vdo_drain_slab_depot(vdo->depot, vdo_get_admin_state_code(state), 2067 completion); 2068 return; 2069 2070 case SUSPEND_PHASE_READ_ONLY_WAIT: 2071 vdo_wait_until_not_entering_read_only_mode(completion); 2072 return; 2073 2074 case SUSPEND_PHASE_WRITE_SUPER_BLOCK: 2075 if (vdo_is_state_suspending(state) || (completion->result != VDO_SUCCESS)) { 2076 /* If we didn't save the VDO or there was an error, we're done. */ 2077 break; 2078 } 2079 2080 write_super_block_for_suspend(completion); 2081 return; 2082 2083 case SUSPEND_PHASE_END: 2084 break; 2085 2086 default: 2087 vdo_set_completion_result(completion, UDS_BAD_STATE); 2088 } 2089 2090 finish_operation_callback(completion); 2091 } 2092 2093 static void vdo_postsuspend(struct dm_target *ti) 2094 { 2095 struct vdo *vdo = get_vdo_for_target(ti); 2096 struct registered_thread instance_thread; 2097 const char *device_name; 2098 int result; 2099 2100 vdo_register_thread_device_id(&instance_thread, &vdo->instance); 2101 device_name = vdo_get_device_name(vdo->device_config->owning_target); 2102 vdo_log_info("suspending device '%s'", device_name); 2103 2104 /* 2105 * It's important to note any error here does not actually stop device-mapper from 2106 * suspending the device. All this work is done post suspend. 2107 */ 2108 result = perform_admin_operation(vdo, SUSPEND_PHASE_START, suspend_callback, 2109 suspend_callback, "suspend"); 2110 2111 if ((result == VDO_SUCCESS) || (result == VDO_READ_ONLY)) { 2112 /* 2113 * Treat VDO_READ_ONLY as a success since a read-only suspension still leaves the 2114 * VDO suspended. 2115 */ 2116 vdo_log_info("device '%s' suspended", device_name); 2117 } else if (result == VDO_INVALID_ADMIN_STATE) { 2118 vdo_log_error("Suspend invoked while in unexpected state: %s", 2119 vdo_get_admin_state(vdo)->name); 2120 } else { 2121 vdo_log_error_strerror(result, "Suspend of device '%s' failed", 2122 device_name); 2123 } 2124 2125 vdo_unregister_thread_device_id(); 2126 } 2127 2128 /** 2129 * was_new() - Check whether the vdo was new when it was loaded. 2130 * @vdo: The vdo to query. 2131 * 2132 * Return: true if the vdo was new. 2133 */ 2134 static bool was_new(const struct vdo *vdo) 2135 { 2136 return (vdo->load_state == VDO_NEW); 2137 } 2138 2139 /** 2140 * requires_repair() - Check whether a vdo requires recovery or rebuild. 2141 * @vdo: The vdo to query. 2142 * 2143 * Return: true if the vdo must be repaired. 2144 */ 2145 static bool __must_check requires_repair(const struct vdo *vdo) 2146 { 2147 switch (vdo_get_state(vdo)) { 2148 case VDO_DIRTY: 2149 case VDO_FORCE_REBUILD: 2150 case VDO_REPLAYING: 2151 case VDO_REBUILD_FOR_UPGRADE: 2152 return true; 2153 2154 default: 2155 return false; 2156 } 2157 } 2158 2159 /** 2160 * get_load_type() - Determine how the slab depot was loaded. 2161 * @vdo: The vdo. 2162 * 2163 * Return: How the depot was loaded. 2164 */ 2165 static enum slab_depot_load_type get_load_type(struct vdo *vdo) 2166 { 2167 if (vdo_state_requires_read_only_rebuild(vdo->load_state)) 2168 return VDO_SLAB_DEPOT_REBUILD_LOAD; 2169 2170 if (vdo_state_requires_recovery(vdo->load_state)) 2171 return VDO_SLAB_DEPOT_RECOVERY_LOAD; 2172 2173 return VDO_SLAB_DEPOT_NORMAL_LOAD; 2174 } 2175 2176 /** 2177 * load_callback() - Callback to do the destructive parts of loading a VDO. 2178 * @completion: The sub-task completion. 2179 */ 2180 static void load_callback(struct vdo_completion *completion) 2181 { 2182 struct vdo *vdo = completion->vdo; 2183 int result; 2184 2185 assert_admin_phase_thread(vdo, __func__); 2186 2187 switch (advance_phase(vdo)) { 2188 case LOAD_PHASE_START: 2189 result = vdo_start_operation(&vdo->admin.state, VDO_ADMIN_STATE_LOADING); 2190 if (result != VDO_SUCCESS) { 2191 vdo_continue_completion(completion, result); 2192 return; 2193 } 2194 2195 /* Prepare the recovery journal for new entries. */ 2196 vdo_open_recovery_journal(vdo->recovery_journal, vdo->depot, 2197 vdo->block_map); 2198 vdo_allow_read_only_mode_entry(completion); 2199 return; 2200 2201 case LOAD_PHASE_LOAD_DEPOT: 2202 vdo_set_dedupe_state_normal(vdo->hash_zones); 2203 if (vdo_is_read_only(vdo)) { 2204 /* 2205 * In read-only mode we don't use the allocator and it may not even be 2206 * readable, so don't bother trying to load it. 2207 */ 2208 vdo_set_completion_result(completion, VDO_READ_ONLY); 2209 break; 2210 } 2211 2212 if (requires_repair(vdo)) { 2213 vdo_repair(completion); 2214 return; 2215 } 2216 2217 vdo_load_slab_depot(vdo->depot, 2218 (was_new(vdo) ? VDO_ADMIN_STATE_FORMATTING : 2219 VDO_ADMIN_STATE_LOADING), 2220 completion, NULL); 2221 return; 2222 2223 case LOAD_PHASE_MAKE_DIRTY: 2224 vdo_set_state(vdo, VDO_DIRTY); 2225 vdo_save_components(vdo, completion); 2226 return; 2227 2228 case LOAD_PHASE_PREPARE_TO_ALLOCATE: 2229 vdo_initialize_block_map_from_journal(vdo->block_map, 2230 vdo->recovery_journal); 2231 vdo_prepare_slab_depot_to_allocate(vdo->depot, get_load_type(vdo), 2232 completion); 2233 return; 2234 2235 case LOAD_PHASE_SCRUB_SLABS: 2236 if (vdo_state_requires_recovery(vdo->load_state)) 2237 vdo_enter_recovery_mode(vdo); 2238 2239 vdo_scrub_all_unrecovered_slabs(vdo->depot, completion); 2240 return; 2241 2242 case LOAD_PHASE_DATA_REDUCTION: 2243 WRITE_ONCE(vdo->compressing, vdo->device_config->compression); 2244 if (vdo->device_config->deduplication) { 2245 /* 2246 * Don't try to load or rebuild the index first (and log scary error 2247 * messages) if this is known to be a newly-formatted volume. 2248 */ 2249 vdo_start_dedupe_index(vdo->hash_zones, was_new(vdo)); 2250 } 2251 2252 vdo->allocations_allowed = false; 2253 fallthrough; 2254 2255 case LOAD_PHASE_FINISHED: 2256 break; 2257 2258 case LOAD_PHASE_DRAIN_JOURNAL: 2259 vdo_drain_recovery_journal(vdo->recovery_journal, VDO_ADMIN_STATE_SAVING, 2260 completion); 2261 return; 2262 2263 case LOAD_PHASE_WAIT_FOR_READ_ONLY: 2264 /* Avoid an infinite loop */ 2265 completion->error_handler = NULL; 2266 vdo->admin.phase = LOAD_PHASE_FINISHED; 2267 vdo_wait_until_not_entering_read_only_mode(completion); 2268 return; 2269 2270 default: 2271 vdo_set_completion_result(completion, UDS_BAD_STATE); 2272 } 2273 2274 finish_operation_callback(completion); 2275 } 2276 2277 /** 2278 * handle_load_error() - Handle an error during the load operation. 2279 * @completion: The admin completion. 2280 * 2281 * If at all possible, brings the vdo online in read-only mode. This handler is registered in 2282 * vdo_preresume_registered(). 2283 */ 2284 static void handle_load_error(struct vdo_completion *completion) 2285 { 2286 struct vdo *vdo = completion->vdo; 2287 2288 if (vdo_requeue_completion_if_needed(completion, 2289 vdo->thread_config.admin_thread)) 2290 return; 2291 2292 if (vdo_state_requires_read_only_rebuild(vdo->load_state) && 2293 (vdo->admin.phase == LOAD_PHASE_MAKE_DIRTY)) { 2294 vdo_log_error_strerror(completion->result, "aborting load"); 2295 vdo->admin.phase = LOAD_PHASE_DRAIN_JOURNAL; 2296 load_callback(vdo_forget(completion)); 2297 return; 2298 } 2299 2300 if ((completion->result == VDO_UNSUPPORTED_VERSION) && 2301 (vdo->admin.phase == LOAD_PHASE_MAKE_DIRTY)) { 2302 vdo_log_error("Aborting load due to unsupported version"); 2303 vdo->admin.phase = LOAD_PHASE_FINISHED; 2304 load_callback(completion); 2305 return; 2306 } 2307 2308 vdo_log_error_strerror(completion->result, 2309 "Entering read-only mode due to load error"); 2310 vdo->admin.phase = LOAD_PHASE_WAIT_FOR_READ_ONLY; 2311 vdo_enter_read_only_mode(vdo, completion->result); 2312 completion->result = VDO_READ_ONLY; 2313 load_callback(completion); 2314 } 2315 2316 /** 2317 * write_super_block_for_resume() - Update the VDO state and save the super block. 2318 * @completion: The admin completion 2319 */ 2320 static void write_super_block_for_resume(struct vdo_completion *completion) 2321 { 2322 struct vdo *vdo = completion->vdo; 2323 2324 switch (vdo_get_state(vdo)) { 2325 case VDO_CLEAN: 2326 case VDO_NEW: 2327 vdo_set_state(vdo, VDO_DIRTY); 2328 vdo_save_components(vdo, completion); 2329 return; 2330 2331 case VDO_DIRTY: 2332 case VDO_READ_ONLY_MODE: 2333 case VDO_FORCE_REBUILD: 2334 case VDO_RECOVERING: 2335 case VDO_REBUILD_FOR_UPGRADE: 2336 /* No need to write the super block in these cases */ 2337 vdo_launch_completion(completion); 2338 return; 2339 2340 case VDO_REPLAYING: 2341 default: 2342 vdo_continue_completion(completion, UDS_BAD_STATE); 2343 } 2344 } 2345 2346 /** 2347 * resume_callback() - Callback to resume a VDO. 2348 * @completion: The admin completion. 2349 */ 2350 static void resume_callback(struct vdo_completion *completion) 2351 { 2352 struct vdo *vdo = completion->vdo; 2353 int result; 2354 2355 assert_admin_phase_thread(vdo, __func__); 2356 2357 switch (advance_phase(vdo)) { 2358 case RESUME_PHASE_START: 2359 result = vdo_start_operation(&vdo->admin.state, 2360 VDO_ADMIN_STATE_RESUMING); 2361 if (result != VDO_SUCCESS) { 2362 vdo_continue_completion(completion, result); 2363 return; 2364 } 2365 2366 write_super_block_for_resume(completion); 2367 return; 2368 2369 case RESUME_PHASE_ALLOW_READ_ONLY_MODE: 2370 vdo_allow_read_only_mode_entry(completion); 2371 return; 2372 2373 case RESUME_PHASE_DEDUPE: 2374 vdo_resume_hash_zones(vdo->hash_zones, completion); 2375 return; 2376 2377 case RESUME_PHASE_DEPOT: 2378 vdo_resume_slab_depot(vdo->depot, completion); 2379 return; 2380 2381 case RESUME_PHASE_JOURNAL: 2382 vdo_resume_recovery_journal(vdo->recovery_journal, completion); 2383 return; 2384 2385 case RESUME_PHASE_BLOCK_MAP: 2386 vdo_resume_block_map(vdo->block_map, completion); 2387 return; 2388 2389 case RESUME_PHASE_LOGICAL_ZONES: 2390 vdo_resume_logical_zones(vdo->logical_zones, completion); 2391 return; 2392 2393 case RESUME_PHASE_PACKER: 2394 { 2395 bool was_enabled = vdo_get_compressing(vdo); 2396 bool enable = vdo->device_config->compression; 2397 2398 if (enable != was_enabled) 2399 WRITE_ONCE(vdo->compressing, enable); 2400 vdo_log_info("compression is %s", (enable ? "enabled" : "disabled")); 2401 2402 vdo_resume_packer(vdo->packer, completion); 2403 return; 2404 } 2405 2406 case RESUME_PHASE_FLUSHER: 2407 vdo_resume_flusher(vdo->flusher, completion); 2408 return; 2409 2410 case RESUME_PHASE_DATA_VIOS: 2411 resume_data_vio_pool(vdo->data_vio_pool, completion); 2412 return; 2413 2414 case RESUME_PHASE_END: 2415 break; 2416 2417 default: 2418 vdo_set_completion_result(completion, UDS_BAD_STATE); 2419 } 2420 2421 finish_operation_callback(completion); 2422 } 2423 2424 /** 2425 * grow_logical_callback() - Callback to initiate a grow logical. 2426 * @completion: The admin completion. 2427 * 2428 * Registered in perform_grow_logical(). 2429 */ 2430 static void grow_logical_callback(struct vdo_completion *completion) 2431 { 2432 struct vdo *vdo = completion->vdo; 2433 int result; 2434 2435 assert_admin_phase_thread(vdo, __func__); 2436 2437 switch (advance_phase(vdo)) { 2438 case GROW_LOGICAL_PHASE_START: 2439 if (vdo_is_read_only(vdo)) { 2440 vdo_log_error_strerror(VDO_READ_ONLY, 2441 "Can't grow logical size of a read-only VDO"); 2442 vdo_set_completion_result(completion, VDO_READ_ONLY); 2443 break; 2444 } 2445 2446 result = vdo_start_operation(&vdo->admin.state, 2447 VDO_ADMIN_STATE_SUSPENDED_OPERATION); 2448 if (result != VDO_SUCCESS) { 2449 vdo_continue_completion(completion, result); 2450 return; 2451 } 2452 2453 vdo->states.vdo.config.logical_blocks = vdo->block_map->next_entry_count; 2454 vdo_save_components(vdo, completion); 2455 return; 2456 2457 case GROW_LOGICAL_PHASE_GROW_BLOCK_MAP: 2458 vdo_grow_block_map(vdo->block_map, completion); 2459 return; 2460 2461 case GROW_LOGICAL_PHASE_END: 2462 break; 2463 2464 case GROW_LOGICAL_PHASE_ERROR: 2465 vdo_enter_read_only_mode(vdo, completion->result); 2466 break; 2467 2468 default: 2469 vdo_set_completion_result(completion, UDS_BAD_STATE); 2470 } 2471 2472 finish_operation_callback(completion); 2473 } 2474 2475 /** 2476 * handle_logical_growth_error() - Handle an error during the grow physical process. 2477 * @completion: The admin completion. 2478 */ 2479 static void handle_logical_growth_error(struct vdo_completion *completion) 2480 { 2481 struct vdo *vdo = completion->vdo; 2482 2483 if (vdo->admin.phase == GROW_LOGICAL_PHASE_GROW_BLOCK_MAP) { 2484 /* 2485 * We've failed to write the new size in the super block, so set our in memory 2486 * config back to the old size. 2487 */ 2488 vdo->states.vdo.config.logical_blocks = vdo->block_map->entry_count; 2489 vdo_abandon_block_map_growth(vdo->block_map); 2490 } 2491 2492 vdo->admin.phase = GROW_LOGICAL_PHASE_ERROR; 2493 grow_logical_callback(completion); 2494 } 2495 2496 /** 2497 * perform_grow_logical() - Grow the logical size of the vdo. 2498 * @vdo: The vdo to grow. 2499 * @new_logical_blocks: The size to which the vdo should be grown. 2500 * 2501 * Context: This method may only be called when the vdo has been suspended and must not be called 2502 * from a base thread. 2503 * 2504 * Return: VDO_SUCCESS or an error. 2505 */ 2506 static int perform_grow_logical(struct vdo *vdo, block_count_t new_logical_blocks) 2507 { 2508 int result; 2509 2510 if (vdo->device_config->logical_blocks == new_logical_blocks) { 2511 /* 2512 * A table was loaded for which we prepared to grow, but a table without that 2513 * growth was what we are resuming with. 2514 */ 2515 vdo_abandon_block_map_growth(vdo->block_map); 2516 return VDO_SUCCESS; 2517 } 2518 2519 vdo_log_info("Resizing logical to %llu", 2520 (unsigned long long) new_logical_blocks); 2521 if (vdo->block_map->next_entry_count != new_logical_blocks) 2522 return VDO_PARAMETER_MISMATCH; 2523 2524 result = perform_admin_operation(vdo, GROW_LOGICAL_PHASE_START, 2525 grow_logical_callback, 2526 handle_logical_growth_error, "grow logical"); 2527 if (result != VDO_SUCCESS) 2528 return result; 2529 2530 vdo_log_info("Logical blocks now %llu", (unsigned long long) new_logical_blocks); 2531 return VDO_SUCCESS; 2532 } 2533 2534 static void copy_callback(int read_err, unsigned long write_err, void *context) 2535 { 2536 struct vdo_completion *completion = context; 2537 int result = (((read_err == 0) && (write_err == 0)) ? VDO_SUCCESS : -EIO); 2538 2539 vdo_continue_completion(completion, result); 2540 } 2541 2542 static void partition_to_region(struct partition *partition, struct vdo *vdo, 2543 struct dm_io_region *region) 2544 { 2545 physical_block_number_t pbn = partition->offset - vdo->geometry.bio_offset; 2546 2547 *region = (struct dm_io_region) { 2548 .bdev = vdo_get_backing_device(vdo), 2549 .sector = pbn * VDO_SECTORS_PER_BLOCK, 2550 .count = partition->count * VDO_SECTORS_PER_BLOCK, 2551 }; 2552 } 2553 2554 /** 2555 * copy_partition() - Copy a partition from the location specified in the current layout to that in 2556 * the next layout. 2557 * @vdo: The vdo preparing to grow. 2558 * @id: The ID of the partition to copy. 2559 * @parent: The completion to notify when the copy is complete. 2560 */ 2561 static void copy_partition(struct vdo *vdo, enum partition_id id, 2562 struct vdo_completion *parent) 2563 { 2564 struct dm_io_region read_region, write_regions[1]; 2565 struct partition *from = vdo_get_known_partition(&vdo->layout, id); 2566 struct partition *to = vdo_get_known_partition(&vdo->next_layout, id); 2567 2568 partition_to_region(from, vdo, &read_region); 2569 partition_to_region(to, vdo, &write_regions[0]); 2570 dm_kcopyd_copy(vdo->partition_copier, &read_region, 1, write_regions, 0, 2571 copy_callback, parent); 2572 } 2573 2574 /** 2575 * grow_physical_callback() - Callback to initiate a grow physical. 2576 * @completion: The admin completion. 2577 * 2578 * Registered in perform_grow_physical(). 2579 */ 2580 static void grow_physical_callback(struct vdo_completion *completion) 2581 { 2582 struct vdo *vdo = completion->vdo; 2583 int result; 2584 2585 assert_admin_phase_thread(vdo, __func__); 2586 2587 switch (advance_phase(vdo)) { 2588 case GROW_PHYSICAL_PHASE_START: 2589 if (vdo_is_read_only(vdo)) { 2590 vdo_log_error_strerror(VDO_READ_ONLY, 2591 "Can't grow physical size of a read-only VDO"); 2592 vdo_set_completion_result(completion, VDO_READ_ONLY); 2593 break; 2594 } 2595 2596 result = vdo_start_operation(&vdo->admin.state, 2597 VDO_ADMIN_STATE_SUSPENDED_OPERATION); 2598 if (result != VDO_SUCCESS) { 2599 vdo_continue_completion(completion, result); 2600 return; 2601 } 2602 2603 /* Copy the journal into the new layout. */ 2604 copy_partition(vdo, VDO_RECOVERY_JOURNAL_PARTITION, completion); 2605 return; 2606 2607 case GROW_PHYSICAL_PHASE_COPY_SUMMARY: 2608 copy_partition(vdo, VDO_SLAB_SUMMARY_PARTITION, completion); 2609 return; 2610 2611 case GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS: 2612 vdo_uninitialize_layout(&vdo->layout); 2613 vdo->layout = vdo->next_layout; 2614 vdo_forget(vdo->next_layout.head); 2615 vdo->states.vdo.config.physical_blocks = vdo->layout.size; 2616 vdo_update_slab_depot_size(vdo->depot); 2617 vdo_save_components(vdo, completion); 2618 return; 2619 2620 case GROW_PHYSICAL_PHASE_USE_NEW_SLABS: 2621 vdo_use_new_slabs(vdo->depot, completion); 2622 return; 2623 2624 case GROW_PHYSICAL_PHASE_END: 2625 vdo->depot->summary_origin = 2626 vdo_get_known_partition(&vdo->layout, 2627 VDO_SLAB_SUMMARY_PARTITION)->offset; 2628 vdo->recovery_journal->origin = 2629 vdo_get_known_partition(&vdo->layout, 2630 VDO_RECOVERY_JOURNAL_PARTITION)->offset; 2631 break; 2632 2633 case GROW_PHYSICAL_PHASE_ERROR: 2634 vdo_enter_read_only_mode(vdo, completion->result); 2635 break; 2636 2637 default: 2638 vdo_set_completion_result(completion, UDS_BAD_STATE); 2639 } 2640 2641 vdo_uninitialize_layout(&vdo->next_layout); 2642 finish_operation_callback(completion); 2643 } 2644 2645 /** 2646 * handle_physical_growth_error() - Handle an error during the grow physical process. 2647 * @completion: The sub-task completion. 2648 */ 2649 static void handle_physical_growth_error(struct vdo_completion *completion) 2650 { 2651 completion->vdo->admin.phase = GROW_PHYSICAL_PHASE_ERROR; 2652 grow_physical_callback(completion); 2653 } 2654 2655 /** 2656 * perform_grow_physical() - Grow the physical size of the vdo. 2657 * @vdo: The vdo to resize. 2658 * @new_physical_blocks: The new physical size in blocks. 2659 * 2660 * Context: This method may only be called when the vdo has been suspended and must not be called 2661 * from a base thread. 2662 * 2663 * Return: VDO_SUCCESS or an error. 2664 */ 2665 static int perform_grow_physical(struct vdo *vdo, block_count_t new_physical_blocks) 2666 { 2667 int result; 2668 block_count_t new_depot_size, prepared_depot_size; 2669 block_count_t old_physical_blocks = vdo->states.vdo.config.physical_blocks; 2670 2671 /* Skip any noop grows. */ 2672 if (old_physical_blocks == new_physical_blocks) 2673 return VDO_SUCCESS; 2674 2675 if (new_physical_blocks != vdo->next_layout.size) { 2676 /* 2677 * Either the VDO isn't prepared to grow, or it was prepared to grow to a different 2678 * size. Doing this check here relies on the fact that the call to this method is 2679 * done under the dmsetup message lock. 2680 */ 2681 vdo_uninitialize_layout(&vdo->next_layout); 2682 vdo_abandon_new_slabs(vdo->depot); 2683 return VDO_PARAMETER_MISMATCH; 2684 } 2685 2686 /* Validate that we are prepared to grow appropriately. */ 2687 new_depot_size = 2688 vdo_get_known_partition(&vdo->next_layout, VDO_SLAB_DEPOT_PARTITION)->count; 2689 prepared_depot_size = (vdo->depot->new_slabs == NULL) ? 0 : vdo->depot->new_size; 2690 if (prepared_depot_size != new_depot_size) 2691 return VDO_PARAMETER_MISMATCH; 2692 2693 result = perform_admin_operation(vdo, GROW_PHYSICAL_PHASE_START, 2694 grow_physical_callback, 2695 handle_physical_growth_error, "grow physical"); 2696 if (result != VDO_SUCCESS) 2697 return result; 2698 2699 vdo_log_info("Physical block count was %llu, now %llu", 2700 (unsigned long long) old_physical_blocks, 2701 (unsigned long long) new_physical_blocks); 2702 return VDO_SUCCESS; 2703 } 2704 2705 /** 2706 * apply_new_vdo_configuration() - Attempt to make any configuration changes from the table being 2707 * resumed. 2708 * @vdo: The vdo being resumed. 2709 * @config: The new device configuration derived from the table with which the vdo is being 2710 * resumed. 2711 * 2712 * Return: VDO_SUCCESS or an error. 2713 */ 2714 static int __must_check apply_new_vdo_configuration(struct vdo *vdo, 2715 struct device_config *config) 2716 { 2717 int result; 2718 2719 result = perform_grow_logical(vdo, config->logical_blocks); 2720 if (result != VDO_SUCCESS) { 2721 vdo_log_error("grow logical operation failed, result = %d", result); 2722 return result; 2723 } 2724 2725 result = perform_grow_physical(vdo, config->physical_blocks); 2726 if (result != VDO_SUCCESS) 2727 vdo_log_error("resize operation failed, result = %d", result); 2728 2729 return result; 2730 } 2731 2732 static int vdo_preresume_registered(struct dm_target *ti, struct vdo *vdo) 2733 { 2734 struct device_config *config = ti->private; 2735 const char *device_name = vdo_get_device_name(ti); 2736 block_count_t backing_blocks; 2737 int result; 2738 2739 backing_blocks = get_underlying_device_block_count(vdo); 2740 if (backing_blocks < config->physical_blocks) { 2741 /* FIXME: can this still happen? */ 2742 vdo_log_error("resume of device '%s' failed: backing device has %llu blocks but VDO physical size is %llu blocks", 2743 device_name, (unsigned long long) backing_blocks, 2744 (unsigned long long) config->physical_blocks); 2745 return -EINVAL; 2746 } 2747 2748 if (vdo_get_admin_state(vdo) == VDO_ADMIN_STATE_PRE_LOADED) { 2749 vdo_log_info("starting device '%s'", device_name); 2750 result = perform_admin_operation(vdo, LOAD_PHASE_START, load_callback, 2751 handle_load_error, "load"); 2752 if (result == VDO_UNSUPPORTED_VERSION) { 2753 /* 2754 * A component version is not supported. This can happen when the 2755 * recovery journal metadata is in an old version format. Abort the 2756 * load without saving the state. 2757 */ 2758 vdo->suspend_type = VDO_ADMIN_STATE_SUSPENDING; 2759 perform_admin_operation(vdo, SUSPEND_PHASE_START, 2760 suspend_callback, suspend_callback, 2761 "suspend"); 2762 return result; 2763 } 2764 2765 if ((result != VDO_SUCCESS) && (result != VDO_READ_ONLY)) { 2766 /* 2767 * Something has gone very wrong. Make sure everything has drained and 2768 * leave the device in an unresumable state. 2769 */ 2770 vdo_log_error_strerror(result, 2771 "Start failed, could not load VDO metadata"); 2772 vdo->suspend_type = VDO_ADMIN_STATE_STOPPING; 2773 perform_admin_operation(vdo, SUSPEND_PHASE_START, 2774 suspend_callback, suspend_callback, 2775 "suspend"); 2776 return result; 2777 } 2778 2779 /* Even if the VDO is read-only, it is now able to handle read requests. */ 2780 vdo_log_info("device '%s' started", device_name); 2781 } 2782 2783 vdo_log_info("resuming device '%s'", device_name); 2784 2785 /* If this fails, the VDO was not in a state to be resumed. This should never happen. */ 2786 result = apply_new_vdo_configuration(vdo, config); 2787 BUG_ON(result == VDO_INVALID_ADMIN_STATE); 2788 2789 /* 2790 * Now that we've tried to modify the vdo, the new config *is* the config, whether the 2791 * modifications worked or not. 2792 */ 2793 vdo->device_config = config; 2794 2795 /* 2796 * Any error here is highly unexpected and the state of the vdo is questionable, so we mark 2797 * it read-only in memory. Because we are suspended, the read-only state will not be 2798 * written to disk. 2799 */ 2800 if (result != VDO_SUCCESS) { 2801 vdo_log_error_strerror(result, 2802 "Commit of modifications to device '%s' failed", 2803 device_name); 2804 vdo_enter_read_only_mode(vdo, result); 2805 return result; 2806 } 2807 2808 if (vdo_get_admin_state(vdo)->normal) { 2809 /* The VDO was just started, so we don't need to resume it. */ 2810 return VDO_SUCCESS; 2811 } 2812 2813 result = perform_admin_operation(vdo, RESUME_PHASE_START, resume_callback, 2814 resume_callback, "resume"); 2815 BUG_ON(result == VDO_INVALID_ADMIN_STATE); 2816 if (result == VDO_READ_ONLY) { 2817 /* Even if the vdo is read-only, it has still resumed. */ 2818 result = VDO_SUCCESS; 2819 } 2820 2821 if (result != VDO_SUCCESS) 2822 vdo_log_error("resume of device '%s' failed with error: %d", device_name, 2823 result); 2824 2825 return result; 2826 } 2827 2828 static int vdo_preresume(struct dm_target *ti) 2829 { 2830 struct registered_thread instance_thread; 2831 struct vdo *vdo = get_vdo_for_target(ti); 2832 int result; 2833 2834 vdo_register_thread_device_id(&instance_thread, &vdo->instance); 2835 result = vdo_preresume_registered(ti, vdo); 2836 if ((result == VDO_PARAMETER_MISMATCH) || (result == VDO_INVALID_ADMIN_STATE) || 2837 (result == VDO_UNSUPPORTED_VERSION)) 2838 result = -EINVAL; 2839 vdo_unregister_thread_device_id(); 2840 return vdo_status_to_errno(result); 2841 } 2842 2843 static void vdo_resume(struct dm_target *ti) 2844 { 2845 struct registered_thread instance_thread; 2846 2847 vdo_register_thread_device_id(&instance_thread, 2848 &get_vdo_for_target(ti)->instance); 2849 vdo_log_info("device '%s' resumed", vdo_get_device_name(ti)); 2850 vdo_unregister_thread_device_id(); 2851 } 2852 2853 /* 2854 * If anything changes that affects how user tools will interact with vdo, update the version 2855 * number and make sure documentation about the change is complete so tools can properly update 2856 * their management code. 2857 */ 2858 static struct target_type vdo_target_bio = { 2859 .features = DM_TARGET_SINGLETON, 2860 .name = "vdo", 2861 .version = { 9, 1, 0 }, 2862 .module = THIS_MODULE, 2863 .ctr = vdo_ctr, 2864 .dtr = vdo_dtr, 2865 .io_hints = vdo_io_hints, 2866 .iterate_devices = vdo_iterate_devices, 2867 .map = vdo_map_bio, 2868 .message = vdo_message, 2869 .status = vdo_status, 2870 .presuspend = vdo_presuspend, 2871 .postsuspend = vdo_postsuspend, 2872 .preresume = vdo_preresume, 2873 .resume = vdo_resume, 2874 }; 2875 2876 static bool dm_registered; 2877 2878 static void vdo_module_destroy(void) 2879 { 2880 vdo_log_debug("unloading"); 2881 2882 if (dm_registered) 2883 dm_unregister_target(&vdo_target_bio); 2884 2885 VDO_ASSERT_LOG_ONLY(instances.count == 0, 2886 "should have no instance numbers still in use, but have %u", 2887 instances.count); 2888 vdo_free(instances.words); 2889 memset(&instances, 0, sizeof(struct instance_tracker)); 2890 } 2891 2892 static int __init vdo_init(void) 2893 { 2894 int result = 0; 2895 2896 /* Memory tracking must be initialized first for accurate accounting. */ 2897 vdo_memory_init(); 2898 vdo_initialize_threads_mutex(); 2899 vdo_initialize_thread_device_registry(); 2900 vdo_initialize_device_registry_once(); 2901 2902 /* Add VDO errors to the set of errors registered by the indexer. */ 2903 result = vdo_register_status_codes(); 2904 if (result != VDO_SUCCESS) { 2905 vdo_log_error("vdo_register_status_codes failed %d", result); 2906 vdo_module_destroy(); 2907 return result; 2908 } 2909 2910 result = dm_register_target(&vdo_target_bio); 2911 if (result < 0) { 2912 vdo_log_error("dm_register_target failed %d", result); 2913 vdo_module_destroy(); 2914 return result; 2915 } 2916 dm_registered = true; 2917 2918 return result; 2919 } 2920 2921 static void __exit vdo_exit(void) 2922 { 2923 vdo_module_destroy(); 2924 /* Memory tracking cleanup must be done last. */ 2925 vdo_memory_exit(); 2926 } 2927 2928 module_init(vdo_init); 2929 module_exit(vdo_exit); 2930 2931 module_param_named(log_level, vdo_log_level, uint, 0644); 2932 MODULE_PARM_DESC(log_level, "Log level for log messages"); 2933 2934 MODULE_DESCRIPTION(DM_NAME " target for transparent deduplication"); 2935 MODULE_AUTHOR("Red Hat, Inc."); 2936 MODULE_LICENSE("GPL"); 2937