1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright (c) 2011, 2015 by Delphix. All rights reserved. 25 * Copyright (c) 2013 Steven Hartland. All rights reserved. 26 */ 27 28 /* 29 * zhack is a debugging tool that can write changes to ZFS pool using libzpool 30 * for testing purposes. Altering pools with zhack is unsupported and may 31 * result in corrupted pools. 32 */ 33 34 #include <zfs_prop.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <ctype.h> 38 #include <sys/stat.h> 39 #include <sys/zfs_context.h> 40 #include <sys/spa.h> 41 #include <sys/spa_impl.h> 42 #include <sys/dmu.h> 43 #include <sys/zap.h> 44 #include <sys/zfs_znode.h> 45 #include <sys/dsl_synctask.h> 46 #include <sys/vdev.h> 47 #include <sys/vdev_impl.h> 48 #include <sys/fs/zfs.h> 49 #include <sys/dmu_objset.h> 50 #include <sys/dsl_pool.h> 51 #include <sys/zio_checksum.h> 52 #include <sys/zio_compress.h> 53 #include <sys/zfeature.h> 54 #include <sys/dmu_tx.h> 55 #include <zfeature_common.h> 56 #include <libzutil.h> 57 #include <sys/metaslab_impl.h> 58 #include <libzpool.h> 59 60 static importargs_t g_importargs; 61 static char *g_pool; 62 static boolean_t g_readonly; 63 64 typedef enum { 65 ZHACK_REPAIR_OP_UNKNOWN = 0, 66 ZHACK_REPAIR_OP_CKSUM = (1 << 0), 67 ZHACK_REPAIR_OP_UNDETACH = (1 << 1) 68 } zhack_repair_op_t; 69 70 static __attribute__((noreturn)) void 71 usage(void) 72 { 73 (void) fprintf(stderr, 74 "Usage: zhack [-o tunable] [-c cachefile] [-d dir] <subcommand> " 75 "<args> ...\n" 76 "where <subcommand> <args> is one of the following:\n" 77 "\n"); 78 79 (void) fprintf(stderr, 80 " feature stat <pool>\n" 81 " print information about enabled features\n" 82 " feature enable [-r] [-d desc] <pool> <feature>\n" 83 " add a new enabled feature to the pool\n" 84 " -d <desc> sets the feature's description\n" 85 " -r set read-only compatible flag for feature\n" 86 " feature ref [-md] <pool> <feature>\n" 87 " change the refcount on the given feature\n" 88 " -d decrease instead of increase the refcount\n" 89 " -m add the feature to the label if increasing refcount\n" 90 "\n" 91 " <feature> : should be a feature guid\n" 92 "\n" 93 " label repair <device>\n" 94 " repair labels of a specified device according to options\n" 95 " which may be combined to do their functions in one call\n" 96 " -c repair corrupted label checksums\n" 97 " -u restore the label on a detached device\n" 98 "\n" 99 " <device> : path to vdev\n" 100 "\n" 101 " metaslab leak <pool>\n" 102 " apply allocation map from zdb to specified pool\n"); 103 exit(1); 104 } 105 106 107 static __attribute__((format(printf, 3, 4))) __attribute__((noreturn)) void 108 fatal(spa_t *spa, const void *tag, const char *fmt, ...) 109 { 110 va_list ap; 111 112 if (spa != NULL) { 113 spa_close(spa, tag); 114 (void) spa_export(g_pool, NULL, B_TRUE, B_FALSE); 115 } 116 117 va_start(ap, fmt); 118 (void) fputs("zhack: ", stderr); 119 (void) vfprintf(stderr, fmt, ap); 120 va_end(ap); 121 (void) fputc('\n', stderr); 122 123 exit(1); 124 } 125 126 static int 127 space_delta_cb(dmu_object_type_t bonustype, const void *data, 128 zfs_file_info_t *zoi) 129 { 130 (void) data, (void) zoi; 131 132 /* 133 * Is it a valid type of object to track? 134 */ 135 if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA) 136 return (ENOENT); 137 (void) fprintf(stderr, "modifying object that needs user accounting"); 138 abort(); 139 } 140 141 /* 142 * Target is the dataset whose pool we want to open. 143 */ 144 static void 145 zhack_import(char *target, boolean_t readonly) 146 { 147 nvlist_t *config; 148 nvlist_t *props; 149 int error; 150 151 kernel_init(readonly ? SPA_MODE_READ : 152 (SPA_MODE_READ | SPA_MODE_WRITE)); 153 154 dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb); 155 156 g_readonly = readonly; 157 g_importargs.can_be_active = readonly; 158 g_pool = strdup(target); 159 160 libpc_handle_t lpch = { 161 .lpc_lib_handle = NULL, 162 .lpc_ops = &libzpool_config_ops, 163 .lpc_printerr = B_TRUE 164 }; 165 error = zpool_find_config(&lpch, target, &config, &g_importargs); 166 if (error) 167 fatal(NULL, FTAG, "cannot import '%s'", target); 168 169 props = NULL; 170 if (readonly) { 171 VERIFY0(nvlist_alloc(&props, NV_UNIQUE_NAME, 0)); 172 VERIFY0(nvlist_add_uint64(props, 173 zpool_prop_to_name(ZPOOL_PROP_READONLY), 1)); 174 } 175 176 zfeature_checks_disable = B_TRUE; 177 error = spa_import(target, config, props, 178 (readonly ? ZFS_IMPORT_SKIP_MMP : ZFS_IMPORT_NORMAL)); 179 fnvlist_free(config); 180 zfeature_checks_disable = B_FALSE; 181 if (error == EEXIST) 182 error = 0; 183 184 if (error) 185 fatal(NULL, FTAG, "can't import '%s': %s", target, 186 strerror(error)); 187 } 188 189 static void 190 zhack_spa_open(char *target, boolean_t readonly, const void *tag, spa_t **spa) 191 { 192 int err; 193 194 zhack_import(target, readonly); 195 196 zfeature_checks_disable = B_TRUE; 197 err = spa_open(target, spa, tag); 198 zfeature_checks_disable = B_FALSE; 199 200 if (err != 0) 201 fatal(*spa, FTAG, "cannot open '%s': %s", target, 202 strerror(err)); 203 if (spa_version(*spa) < SPA_VERSION_FEATURES) { 204 fatal(*spa, FTAG, "'%s' has version %d, features not enabled", 205 target, (int)spa_version(*spa)); 206 } 207 } 208 209 static void 210 dump_obj(objset_t *os, uint64_t obj, const char *name) 211 { 212 zap_cursor_t zc; 213 zap_attribute_t *za = zap_attribute_long_alloc(); 214 215 (void) printf("%s_obj:\n", name); 216 217 for (zap_cursor_init(&zc, os, obj); 218 zap_cursor_retrieve(&zc, za) == 0; 219 zap_cursor_advance(&zc)) { 220 if (za->za_integer_length == 8) { 221 ASSERT(za->za_num_integers == 1); 222 (void) printf("\t%s = %llu\n", 223 za->za_name, (u_longlong_t)za->za_first_integer); 224 } else { 225 ASSERT(za->za_integer_length == 1); 226 char val[1024]; 227 VERIFY0(zap_lookup(os, obj, za->za_name, 228 1, sizeof (val), val)); 229 (void) printf("\t%s = %s\n", za->za_name, val); 230 } 231 } 232 zap_cursor_fini(&zc); 233 zap_attribute_free(za); 234 } 235 236 static void 237 dump_mos(spa_t *spa) 238 { 239 nvlist_t *nv = spa->spa_label_features; 240 nvpair_t *pair; 241 242 (void) printf("label config:\n"); 243 for (pair = nvlist_next_nvpair(nv, NULL); 244 pair != NULL; 245 pair = nvlist_next_nvpair(nv, pair)) { 246 (void) printf("\t%s\n", nvpair_name(pair)); 247 } 248 } 249 250 static void 251 zhack_do_feature_stat(int argc, char **argv) 252 { 253 spa_t *spa; 254 objset_t *os; 255 char *target; 256 257 argc--; 258 argv++; 259 260 if (argc < 1) { 261 (void) fprintf(stderr, "error: missing pool name\n"); 262 usage(); 263 } 264 target = argv[0]; 265 266 zhack_spa_open(target, B_TRUE, FTAG, &spa); 267 os = spa->spa_meta_objset; 268 269 dump_obj(os, spa->spa_feat_for_read_obj, "for_read"); 270 dump_obj(os, spa->spa_feat_for_write_obj, "for_write"); 271 dump_obj(os, spa->spa_feat_desc_obj, "descriptions"); 272 if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) { 273 dump_obj(os, spa->spa_feat_enabled_txg_obj, "enabled_txg"); 274 } 275 dump_mos(spa); 276 277 spa_close(spa, FTAG); 278 } 279 280 static void 281 zhack_feature_enable_sync(void *arg, dmu_tx_t *tx) 282 { 283 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 284 zfeature_info_t *feature = arg; 285 286 feature_enable_sync(spa, feature, tx); 287 288 spa_history_log_internal(spa, "zhack enable feature", tx, 289 "name=%s flags=%u", 290 feature->fi_guid, feature->fi_flags); 291 } 292 293 static void 294 zhack_do_feature_enable(int argc, char **argv) 295 { 296 int c; 297 char *desc, *target; 298 spa_t *spa; 299 objset_t *mos; 300 zfeature_info_t feature; 301 const spa_feature_t nodeps[] = { SPA_FEATURE_NONE }; 302 303 /* 304 * Features are not added to the pool's label until their refcounts 305 * are incremented, so fi_mos can just be left as false for now. 306 */ 307 desc = NULL; 308 feature.fi_uname = "zhack"; 309 feature.fi_flags = 0; 310 feature.fi_depends = nodeps; 311 feature.fi_feature = SPA_FEATURE_NONE; 312 313 optind = 1; 314 while ((c = getopt(argc, argv, "+rd:")) != -1) { 315 switch (c) { 316 case 'r': 317 feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT; 318 break; 319 case 'd': 320 if (desc != NULL) 321 free(desc); 322 desc = strdup(optarg); 323 break; 324 default: 325 usage(); 326 break; 327 } 328 } 329 330 if (desc == NULL) 331 desc = strdup("zhack injected"); 332 feature.fi_desc = desc; 333 334 argc -= optind; 335 argv += optind; 336 337 if (argc < 2) { 338 (void) fprintf(stderr, "error: missing feature or pool name\n"); 339 usage(); 340 } 341 target = argv[0]; 342 feature.fi_guid = argv[1]; 343 344 if (!zfeature_is_valid_guid(feature.fi_guid)) 345 fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid); 346 347 zhack_spa_open(target, B_FALSE, FTAG, &spa); 348 mos = spa->spa_meta_objset; 349 350 if (zfeature_is_supported(feature.fi_guid)) 351 fatal(spa, FTAG, "'%s' is a real feature, will not enable", 352 feature.fi_guid); 353 if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid)) 354 fatal(spa, FTAG, "feature already enabled: %s", 355 feature.fi_guid); 356 357 VERIFY0(dsl_sync_task(spa_name(spa), NULL, 358 zhack_feature_enable_sync, &feature, 5, ZFS_SPACE_CHECK_NORMAL)); 359 360 spa_close(spa, FTAG); 361 362 free(desc); 363 } 364 365 static void 366 feature_incr_sync(void *arg, dmu_tx_t *tx) 367 { 368 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 369 zfeature_info_t *feature = arg; 370 uint64_t refcount; 371 372 mutex_enter(&spa->spa_feat_stats_lock); 373 VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount)); 374 feature_sync(spa, feature, refcount + 1, tx); 375 spa_history_log_internal(spa, "zhack feature incr", tx, 376 "name=%s", feature->fi_guid); 377 mutex_exit(&spa->spa_feat_stats_lock); 378 } 379 380 static void 381 feature_decr_sync(void *arg, dmu_tx_t *tx) 382 { 383 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 384 zfeature_info_t *feature = arg; 385 uint64_t refcount; 386 387 mutex_enter(&spa->spa_feat_stats_lock); 388 VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount)); 389 feature_sync(spa, feature, refcount - 1, tx); 390 spa_history_log_internal(spa, "zhack feature decr", tx, 391 "name=%s", feature->fi_guid); 392 mutex_exit(&spa->spa_feat_stats_lock); 393 } 394 395 static void 396 zhack_do_feature_ref(int argc, char **argv) 397 { 398 int c; 399 char *target; 400 boolean_t decr = B_FALSE; 401 spa_t *spa; 402 objset_t *mos; 403 zfeature_info_t feature; 404 const spa_feature_t nodeps[] = { SPA_FEATURE_NONE }; 405 406 /* 407 * fi_desc does not matter here because it was written to disk 408 * when the feature was enabled, but we need to properly set the 409 * feature for read or write based on the information we read off 410 * disk later. 411 */ 412 feature.fi_uname = "zhack"; 413 feature.fi_flags = 0; 414 feature.fi_desc = NULL; 415 feature.fi_depends = nodeps; 416 feature.fi_feature = SPA_FEATURE_NONE; 417 418 optind = 1; 419 while ((c = getopt(argc, argv, "+md")) != -1) { 420 switch (c) { 421 case 'm': 422 feature.fi_flags |= ZFEATURE_FLAG_MOS; 423 break; 424 case 'd': 425 decr = B_TRUE; 426 break; 427 default: 428 usage(); 429 break; 430 } 431 } 432 argc -= optind; 433 argv += optind; 434 435 if (argc < 2) { 436 (void) fprintf(stderr, "error: missing feature or pool name\n"); 437 usage(); 438 } 439 target = argv[0]; 440 feature.fi_guid = argv[1]; 441 442 if (!zfeature_is_valid_guid(feature.fi_guid)) 443 fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid); 444 445 zhack_spa_open(target, B_FALSE, FTAG, &spa); 446 mos = spa->spa_meta_objset; 447 448 if (zfeature_is_supported(feature.fi_guid)) { 449 fatal(spa, FTAG, 450 "'%s' is a real feature, will not change refcount", 451 feature.fi_guid); 452 } 453 454 if (0 == zap_contains(mos, spa->spa_feat_for_read_obj, 455 feature.fi_guid)) { 456 feature.fi_flags &= ~ZFEATURE_FLAG_READONLY_COMPAT; 457 } else if (0 == zap_contains(mos, spa->spa_feat_for_write_obj, 458 feature.fi_guid)) { 459 feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT; 460 } else { 461 fatal(spa, FTAG, "feature is not enabled: %s", feature.fi_guid); 462 } 463 464 if (decr) { 465 uint64_t count; 466 if (feature_get_refcount_from_disk(spa, &feature, 467 &count) == 0 && count == 0) { 468 fatal(spa, FTAG, "feature refcount already 0: %s", 469 feature.fi_guid); 470 } 471 } 472 473 VERIFY0(dsl_sync_task(spa_name(spa), NULL, 474 decr ? feature_decr_sync : feature_incr_sync, &feature, 475 5, ZFS_SPACE_CHECK_NORMAL)); 476 477 spa_close(spa, FTAG); 478 } 479 480 static int 481 zhack_do_feature(int argc, char **argv) 482 { 483 char *subcommand; 484 485 argc--; 486 argv++; 487 if (argc == 0) { 488 (void) fprintf(stderr, 489 "error: no feature operation specified\n"); 490 usage(); 491 } 492 493 subcommand = argv[0]; 494 if (strcmp(subcommand, "stat") == 0) { 495 zhack_do_feature_stat(argc, argv); 496 } else if (strcmp(subcommand, "enable") == 0) { 497 zhack_do_feature_enable(argc, argv); 498 } else if (strcmp(subcommand, "ref") == 0) { 499 zhack_do_feature_ref(argc, argv); 500 } else { 501 (void) fprintf(stderr, "error: unknown subcommand: %s\n", 502 subcommand); 503 usage(); 504 } 505 506 return (0); 507 } 508 509 static boolean_t 510 strstarts(const char *a, const char *b) 511 { 512 return (strncmp(a, b, strlen(b)) == 0); 513 } 514 515 static void 516 metaslab_force_alloc(metaslab_t *msp, uint64_t start, uint64_t size, 517 dmu_tx_t *tx) 518 { 519 ASSERT(msp->ms_disabled); 520 ASSERT(MUTEX_HELD(&msp->ms_lock)); 521 uint64_t txg = dmu_tx_get_txg(tx); 522 523 uint64_t off = start; 524 while (off < start + size) { 525 uint64_t ostart, osize; 526 boolean_t found = zfs_range_tree_find_in(msp->ms_allocatable, 527 off, start + size - off, &ostart, &osize); 528 if (!found) 529 break; 530 zfs_range_tree_remove(msp->ms_allocatable, ostart, osize); 531 532 if (zfs_range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK])) 533 vdev_dirty(msp->ms_group->mg_vd, VDD_METASLAB, msp, 534 txg); 535 536 zfs_range_tree_add(msp->ms_allocating[txg & TXG_MASK], ostart, 537 osize); 538 msp->ms_allocating_total += osize; 539 off = ostart + osize; 540 } 541 } 542 543 static void 544 zhack_do_metaslab_leak(int argc, char **argv) 545 { 546 int c; 547 char *target; 548 spa_t *spa; 549 550 optind = 1; 551 boolean_t force = B_FALSE; 552 while ((c = getopt(argc, argv, "f")) != -1) { 553 switch (c) { 554 case 'f': 555 force = B_TRUE; 556 break; 557 default: 558 usage(); 559 break; 560 } 561 } 562 563 argc -= optind; 564 argv += optind; 565 566 if (argc < 1) { 567 (void) fprintf(stderr, "error: missing pool name\n"); 568 usage(); 569 } 570 target = argv[0]; 571 572 zhack_spa_open(target, B_FALSE, FTAG, &spa); 573 spa_config_enter(spa, SCL_VDEV | SCL_ALLOC, FTAG, RW_READER); 574 575 char *line = NULL; 576 size_t cap = 0; 577 578 vdev_t *vd = NULL; 579 metaslab_t *prev = NULL; 580 dmu_tx_t *tx = NULL; 581 while (getline(&line, &cap, stdin) > 0) { 582 if (strstarts(line, "\tvdev ")) { 583 uint64_t vdev_id, ms_shift; 584 if (sscanf(line, 585 "\tvdev %10"PRIu64"\t%*s metaslab shift %4"PRIu64, 586 &vdev_id, &ms_shift) == 1) { 587 VERIFY3U(sscanf(line, "\tvdev %"PRIu64 588 "\t metaslab shift %4"PRIu64, 589 &vdev_id, &ms_shift), ==, 2); 590 } 591 vd = vdev_lookup_top(spa, vdev_id); 592 if (vd == NULL) { 593 fprintf(stderr, "error: no such vdev with " 594 "id %"PRIu64"\n", vdev_id); 595 break; 596 } 597 if (tx) { 598 dmu_tx_commit(tx); 599 mutex_exit(&prev->ms_lock); 600 metaslab_enable(prev, B_FALSE, B_FALSE); 601 tx = NULL; 602 prev = NULL; 603 } 604 if (vd->vdev_ms_shift != ms_shift) { 605 fprintf(stderr, "error: ms_shift mismatch: %" 606 PRIu64" != %"PRIu64"\n", vd->vdev_ms_shift, 607 ms_shift); 608 break; 609 } 610 } else if (strstarts(line, "\tmetaslabs ")) { 611 uint64_t ms_count; 612 VERIFY3U(sscanf(line, "\tmetaslabs %"PRIu64, &ms_count), 613 ==, 1); 614 ASSERT(vd); 615 if (!force && vd->vdev_ms_count != ms_count) { 616 fprintf(stderr, "error: ms_count mismatch: %" 617 PRIu64" != %"PRIu64"\n", vd->vdev_ms_count, 618 ms_count); 619 break; 620 } 621 } else if (strstarts(line, "ALLOC:")) { 622 uint64_t start, size; 623 VERIFY3U(sscanf(line, "ALLOC: %"PRIu64" %"PRIu64"\n", 624 &start, &size), ==, 2); 625 626 ASSERT(vd); 627 metaslab_t *cur = 628 vd->vdev_ms[start >> vd->vdev_ms_shift]; 629 if (prev != cur) { 630 if (prev) { 631 dmu_tx_commit(tx); 632 mutex_exit(&prev->ms_lock); 633 metaslab_enable(prev, B_FALSE, B_FALSE); 634 } 635 ASSERT(cur); 636 metaslab_disable(cur); 637 mutex_enter(&cur->ms_lock); 638 metaslab_load(cur); 639 prev = cur; 640 tx = dmu_tx_create_dd( 641 spa_get_dsl(vd->vdev_spa)->dp_root_dir); 642 dmu_tx_assign(tx, DMU_TX_WAIT); 643 } 644 645 metaslab_force_alloc(cur, start, size, tx); 646 } else { 647 continue; 648 } 649 } 650 if (tx) { 651 dmu_tx_commit(tx); 652 mutex_exit(&prev->ms_lock); 653 metaslab_enable(prev, B_FALSE, B_FALSE); 654 tx = NULL; 655 prev = NULL; 656 } 657 if (line) 658 free(line); 659 660 spa_config_exit(spa, SCL_VDEV | SCL_ALLOC, FTAG); 661 spa_close(spa, FTAG); 662 } 663 664 static int 665 zhack_do_metaslab(int argc, char **argv) 666 { 667 char *subcommand; 668 669 argc--; 670 argv++; 671 if (argc == 0) { 672 (void) fprintf(stderr, 673 "error: no metaslab operation specified\n"); 674 usage(); 675 } 676 677 subcommand = argv[0]; 678 if (strcmp(subcommand, "leak") == 0) { 679 zhack_do_metaslab_leak(argc, argv); 680 } else { 681 (void) fprintf(stderr, "error: unknown subcommand: %s\n", 682 subcommand); 683 usage(); 684 } 685 686 return (0); 687 } 688 689 #define ASHIFT_UBERBLOCK_SHIFT(ashift) \ 690 MIN(MAX(ashift, UBERBLOCK_SHIFT), \ 691 MAX_UBERBLOCK_SHIFT) 692 #define ASHIFT_UBERBLOCK_SIZE(ashift) \ 693 (1ULL << ASHIFT_UBERBLOCK_SHIFT(ashift)) 694 695 #define REPAIR_LABEL_STATUS_CKSUM (1 << 0) 696 #define REPAIR_LABEL_STATUS_UB (1 << 1) 697 698 static int 699 zhack_repair_read_label(const int fd, vdev_label_t *vl, 700 const uint64_t label_offset, const int l) 701 { 702 const int err = pread64(fd, vl, sizeof (vdev_label_t), label_offset); 703 704 if (err == -1) { 705 (void) fprintf(stderr, 706 "error: cannot read label %d: %s\n", 707 l, strerror(errno)); 708 return (err); 709 } else if (err != sizeof (vdev_label_t)) { 710 (void) fprintf(stderr, 711 "error: bad label %d read size\n", l); 712 return (err); 713 } 714 715 return (0); 716 } 717 718 static int 719 zhack_repair_get_byteswap(const zio_eck_t *vdev_eck, const int l, int *byteswap) 720 { 721 if (vdev_eck->zec_magic == ZEC_MAGIC) { 722 *byteswap = B_FALSE; 723 } else if (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC)) { 724 *byteswap = B_TRUE; 725 } else { 726 (void) fprintf(stderr, "error: label %d: " 727 "Expected the nvlist checksum magic number but instead got " 728 "0x%" PRIx64 "\n", 729 l, vdev_eck->zec_magic); 730 return (1); 731 } 732 return (0); 733 } 734 735 static void 736 zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset, 737 const uint64_t abdsize, zio_eck_t *eck, zio_cksum_t *cksum) 738 { 739 zio_cksum_t verifier; 740 zio_cksum_t current_cksum; 741 zio_checksum_info_t *ci; 742 abd_t *abd; 743 744 ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0); 745 746 if (byteswap) 747 byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); 748 749 current_cksum = eck->zec_cksum; 750 eck->zec_cksum = verifier; 751 752 ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; 753 abd = abd_get_from_buf(data, abdsize); 754 ci->ci_func[byteswap](abd, abdsize, NULL, cksum); 755 abd_free(abd); 756 757 eck->zec_cksum = current_cksum; 758 } 759 760 static int 761 zhack_repair_get_ashift(nvlist_t *cfg, const int l, uint64_t *ashift) 762 { 763 int err; 764 nvlist_t *vdev_tree_cfg; 765 766 err = nvlist_lookup_nvlist(cfg, 767 ZPOOL_CONFIG_VDEV_TREE, &vdev_tree_cfg); 768 if (err) { 769 (void) fprintf(stderr, 770 "error: label %d: cannot find nvlist key %s\n", 771 l, ZPOOL_CONFIG_VDEV_TREE); 772 return (err); 773 } 774 775 err = nvlist_lookup_uint64(vdev_tree_cfg, 776 ZPOOL_CONFIG_ASHIFT, ashift); 777 if (err) { 778 (void) fprintf(stderr, 779 "error: label %d: cannot find nvlist key %s\n", 780 l, ZPOOL_CONFIG_ASHIFT); 781 return (err); 782 } 783 784 if (*ashift == 0) { 785 (void) fprintf(stderr, 786 "error: label %d: nvlist key %s is zero\n", 787 l, ZPOOL_CONFIG_ASHIFT); 788 return (1); 789 } 790 791 return (0); 792 } 793 794 static int 795 zhack_repair_undetach(uberblock_t *ub, nvlist_t *cfg, const int l) 796 { 797 /* 798 * Uberblock root block pointer has valid birth TXG. 799 * Copying it to the label NVlist 800 */ 801 if (BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp) != 0) { 802 const uint64_t txg = BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp); 803 int err; 804 805 ub->ub_txg = txg; 806 807 err = nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG); 808 if (err) { 809 (void) fprintf(stderr, 810 "error: label %d: " 811 "Failed to remove pool creation TXG\n", 812 l); 813 return (err); 814 } 815 816 err = nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG); 817 if (err) { 818 (void) fprintf(stderr, 819 "error: label %d: Failed to remove pool TXG to " 820 "be replaced.\n", 821 l); 822 return (err); 823 } 824 825 err = nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg); 826 if (err) { 827 (void) fprintf(stderr, 828 "error: label %d: " 829 "Failed to add pool TXG of %" PRIu64 "\n", 830 l, txg); 831 return (err); 832 } 833 } 834 835 return (0); 836 } 837 838 static boolean_t 839 zhack_repair_write_label(const int l, const int fd, const int byteswap, 840 void *data, zio_eck_t *eck, const uint64_t offset, const uint64_t abdsize) 841 { 842 zio_cksum_t actual_cksum; 843 zhack_repair_calc_cksum(byteswap, data, offset, abdsize, eck, 844 &actual_cksum); 845 zio_cksum_t expected_cksum = eck->zec_cksum; 846 ssize_t err; 847 848 if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) 849 return (B_FALSE); 850 851 eck->zec_cksum = actual_cksum; 852 853 err = pwrite64(fd, data, abdsize, offset); 854 if (err == -1) { 855 (void) fprintf(stderr, "error: cannot write label %d: %s\n", 856 l, strerror(errno)); 857 return (B_FALSE); 858 } else if (err != abdsize) { 859 (void) fprintf(stderr, "error: bad write size label %d\n", l); 860 return (B_FALSE); 861 } else { 862 (void) fprintf(stderr, 863 "label %d: wrote %" PRIu64 " bytes at offset %" PRIu64 "\n", 864 l, abdsize, offset); 865 } 866 867 return (B_TRUE); 868 } 869 870 static void 871 zhack_repair_write_uberblock(vdev_label_t *vl, const int l, 872 const uint64_t ashift, const int fd, const int byteswap, 873 const uint64_t label_offset, uint32_t *labels_repaired) 874 { 875 void *ub_data = 876 (char *)vl + offsetof(vdev_label_t, vl_uberblock); 877 zio_eck_t *ub_eck = 878 (zio_eck_t *) 879 ((char *)(ub_data) + (ASHIFT_UBERBLOCK_SIZE(ashift))) - 1; 880 881 if (ub_eck->zec_magic != 0) { 882 (void) fprintf(stderr, 883 "error: label %d: " 884 "Expected Uberblock checksum magic number to " 885 "be 0, but got %" PRIu64 "\n", 886 l, ub_eck->zec_magic); 887 (void) fprintf(stderr, "It would appear there's already " 888 "a checksum for the uberblock.\n"); 889 return; 890 } 891 892 893 ub_eck->zec_magic = byteswap ? BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC; 894 895 if (zhack_repair_write_label(l, fd, byteswap, 896 ub_data, ub_eck, 897 label_offset + offsetof(vdev_label_t, vl_uberblock), 898 ASHIFT_UBERBLOCK_SIZE(ashift))) 899 labels_repaired[l] |= REPAIR_LABEL_STATUS_UB; 900 } 901 902 static void 903 zhack_repair_print_cksum(FILE *stream, const zio_cksum_t *cksum) 904 { 905 (void) fprintf(stream, 906 "%016llx:%016llx:%016llx:%016llx", 907 (u_longlong_t)cksum->zc_word[0], 908 (u_longlong_t)cksum->zc_word[1], 909 (u_longlong_t)cksum->zc_word[2], 910 (u_longlong_t)cksum->zc_word[3]); 911 } 912 913 static int 914 zhack_repair_test_cksum(const int byteswap, void *vdev_data, 915 zio_eck_t *vdev_eck, const uint64_t vdev_phys_offset, const int l) 916 { 917 const zio_cksum_t expected_cksum = vdev_eck->zec_cksum; 918 zio_cksum_t actual_cksum; 919 zhack_repair_calc_cksum(byteswap, vdev_data, vdev_phys_offset, 920 VDEV_PHYS_SIZE, vdev_eck, &actual_cksum); 921 const uint64_t expected_magic = byteswap ? 922 BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC; 923 const uint64_t actual_magic = vdev_eck->zec_magic; 924 int err = 0; 925 926 if (actual_magic != expected_magic) { 927 (void) fprintf(stderr, "error: label %d: " 928 "Expected " 929 "the nvlist checksum magic number to not be %" 930 PRIu64 " not %" PRIu64 "\n", 931 l, expected_magic, actual_magic); 932 err = ECKSUM; 933 } 934 if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) { 935 (void) fprintf(stderr, "error: label %d: " 936 "Expected the nvlist checksum to be ", l); 937 (void) zhack_repair_print_cksum(stderr, 938 &expected_cksum); 939 (void) fprintf(stderr, " not "); 940 zhack_repair_print_cksum(stderr, &actual_cksum); 941 (void) fprintf(stderr, "\n"); 942 err = ECKSUM; 943 } 944 return (err); 945 } 946 947 static int 948 zhack_repair_unpack_cfg(vdev_label_t *vl, const int l, nvlist_t **cfg) 949 { 950 const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION, 951 ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID }; 952 int err; 953 954 err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist, 955 VDEV_PHYS_SIZE - sizeof (zio_eck_t), cfg, 0); 956 if (err) { 957 (void) fprintf(stderr, 958 "error: cannot unpack nvlist label %d\n", l); 959 return (err); 960 } 961 962 for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) { 963 uint64_t val; 964 err = nvlist_lookup_uint64(*cfg, cfg_keys[i], &val); 965 if (err) { 966 (void) fprintf(stderr, 967 "error: label %d, %d: " 968 "cannot find nvlist key %s\n", 969 l, i, cfg_keys[i]); 970 return (err); 971 } 972 } 973 974 return (0); 975 } 976 977 static void 978 zhack_repair_one_label(const zhack_repair_op_t op, const int fd, 979 vdev_label_t *vl, const uint64_t label_offset, const int l, 980 uint32_t *labels_repaired) 981 { 982 ssize_t err; 983 uberblock_t *ub = (uberblock_t *)vl->vl_uberblock; 984 void *vdev_data = 985 (char *)vl + offsetof(vdev_label_t, vl_vdev_phys); 986 zio_eck_t *vdev_eck = 987 (zio_eck_t *)((char *)(vdev_data) + VDEV_PHYS_SIZE) - 1; 988 const uint64_t vdev_phys_offset = 989 label_offset + offsetof(vdev_label_t, vl_vdev_phys); 990 nvlist_t *cfg; 991 uint64_t ashift; 992 int byteswap; 993 994 err = zhack_repair_read_label(fd, vl, label_offset, l); 995 if (err) 996 return; 997 998 err = zhack_repair_get_byteswap(vdev_eck, l, &byteswap); 999 if (err) 1000 return; 1001 1002 if (byteswap) { 1003 byteswap_uint64_array(&vdev_eck->zec_cksum, 1004 sizeof (zio_cksum_t)); 1005 vdev_eck->zec_magic = BSWAP_64(vdev_eck->zec_magic); 1006 } 1007 1008 if ((op & ZHACK_REPAIR_OP_CKSUM) == 0 && 1009 zhack_repair_test_cksum(byteswap, vdev_data, vdev_eck, 1010 vdev_phys_offset, l) != 0) { 1011 (void) fprintf(stderr, "It would appear checksums are " 1012 "corrupted. Try zhack repair label -c <device>\n"); 1013 return; 1014 } 1015 1016 err = zhack_repair_unpack_cfg(vl, l, &cfg); 1017 if (err) 1018 return; 1019 1020 if ((op & ZHACK_REPAIR_OP_UNDETACH) != 0) { 1021 char *buf; 1022 size_t buflen; 1023 1024 if (ub->ub_txg != 0) { 1025 (void) fprintf(stderr, 1026 "error: label %d: UB TXG of 0 expected, but got %" 1027 PRIu64 "\n", l, ub->ub_txg); 1028 (void) fprintf(stderr, "It would appear the device was " 1029 "not properly detached.\n"); 1030 return; 1031 } 1032 1033 err = zhack_repair_get_ashift(cfg, l, &ashift); 1034 if (err) 1035 return; 1036 1037 err = zhack_repair_undetach(ub, cfg, l); 1038 if (err) 1039 return; 1040 1041 buf = vl->vl_vdev_phys.vp_nvlist; 1042 buflen = VDEV_PHYS_SIZE - sizeof (zio_eck_t); 1043 if (nvlist_pack(cfg, &buf, &buflen, NV_ENCODE_XDR, 0) != 0) { 1044 (void) fprintf(stderr, 1045 "error: label %d: Failed to pack nvlist\n", l); 1046 return; 1047 } 1048 1049 zhack_repair_write_uberblock(vl, 1050 l, ashift, fd, byteswap, label_offset, labels_repaired); 1051 } 1052 1053 if (zhack_repair_write_label(l, fd, byteswap, vdev_data, vdev_eck, 1054 vdev_phys_offset, VDEV_PHYS_SIZE)) 1055 labels_repaired[l] |= REPAIR_LABEL_STATUS_CKSUM; 1056 1057 fsync(fd); 1058 } 1059 1060 static const char * 1061 zhack_repair_label_status(const uint32_t label_status, 1062 const uint32_t to_check) 1063 { 1064 return ((label_status & to_check) != 0 ? "repaired" : "skipped"); 1065 } 1066 1067 static int 1068 zhack_label_repair(const zhack_repair_op_t op, const int argc, char **argv) 1069 { 1070 uint32_t labels_repaired[VDEV_LABELS] = {0}; 1071 vdev_label_t labels[VDEV_LABELS] = {{{0}}}; 1072 struct stat64 st; 1073 int fd; 1074 off_t filesize; 1075 uint32_t repaired = 0; 1076 1077 abd_init(); 1078 1079 if (argc < 1) { 1080 (void) fprintf(stderr, "error: missing device\n"); 1081 usage(); 1082 } 1083 1084 if ((fd = open(argv[0], O_RDWR)) == -1) 1085 fatal(NULL, FTAG, "cannot open '%s': %s", argv[0], 1086 strerror(errno)); 1087 1088 if (fstat64_blk(fd, &st) != 0) 1089 fatal(NULL, FTAG, "cannot stat '%s': %s", argv[0], 1090 strerror(errno)); 1091 1092 filesize = st.st_size; 1093 (void) fprintf(stderr, "Calculated filesize to be %jd\n", 1094 (intmax_t)filesize); 1095 1096 if (filesize % sizeof (vdev_label_t) != 0) 1097 filesize = 1098 (filesize / sizeof (vdev_label_t)) * sizeof (vdev_label_t); 1099 1100 for (int l = 0; l < VDEV_LABELS; l++) { 1101 zhack_repair_one_label(op, fd, &labels[l], 1102 vdev_label_offset(filesize, l, 0), l, labels_repaired); 1103 } 1104 1105 close(fd); 1106 1107 abd_fini(); 1108 1109 for (int l = 0; l < VDEV_LABELS; l++) { 1110 const uint32_t lr = labels_repaired[l]; 1111 (void) printf("label %d: ", l); 1112 (void) printf("uberblock: %s ", 1113 zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_UB)); 1114 (void) printf("checksum: %s\n", 1115 zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_CKSUM)); 1116 repaired |= lr; 1117 } 1118 1119 if (repaired > 0) 1120 return (0); 1121 1122 return (1); 1123 } 1124 1125 static int 1126 zhack_do_label_repair(int argc, char **argv) 1127 { 1128 zhack_repair_op_t op = ZHACK_REPAIR_OP_UNKNOWN; 1129 int c; 1130 1131 optind = 1; 1132 while ((c = getopt(argc, argv, "+cu")) != -1) { 1133 switch (c) { 1134 case 'c': 1135 op |= ZHACK_REPAIR_OP_CKSUM; 1136 break; 1137 case 'u': 1138 op |= ZHACK_REPAIR_OP_UNDETACH; 1139 break; 1140 default: 1141 usage(); 1142 break; 1143 } 1144 } 1145 1146 argc -= optind; 1147 argv += optind; 1148 1149 if (op == ZHACK_REPAIR_OP_UNKNOWN) 1150 op = ZHACK_REPAIR_OP_CKSUM; 1151 1152 return (zhack_label_repair(op, argc, argv)); 1153 } 1154 1155 static int 1156 zhack_do_label(int argc, char **argv) 1157 { 1158 char *subcommand; 1159 int err; 1160 1161 argc--; 1162 argv++; 1163 if (argc == 0) { 1164 (void) fprintf(stderr, 1165 "error: no label operation specified\n"); 1166 usage(); 1167 } 1168 1169 subcommand = argv[0]; 1170 if (strcmp(subcommand, "repair") == 0) { 1171 err = zhack_do_label_repair(argc, argv); 1172 } else { 1173 (void) fprintf(stderr, "error: unknown subcommand: %s\n", 1174 subcommand); 1175 usage(); 1176 } 1177 1178 return (err); 1179 } 1180 1181 #define MAX_NUM_PATHS 1024 1182 1183 int 1184 main(int argc, char **argv) 1185 { 1186 char *path[MAX_NUM_PATHS]; 1187 const char *subcommand; 1188 int rv = 0; 1189 int c; 1190 1191 g_importargs.path = path; 1192 1193 dprintf_setup(&argc, argv); 1194 zfs_prop_init(); 1195 1196 while ((c = getopt(argc, argv, "+c:d:o:")) != -1) { 1197 switch (c) { 1198 case 'c': 1199 g_importargs.cachefile = optarg; 1200 break; 1201 case 'd': 1202 assert(g_importargs.paths < MAX_NUM_PATHS); 1203 g_importargs.path[g_importargs.paths++] = optarg; 1204 break; 1205 case 'o': 1206 if (handle_tunable_option(optarg, B_FALSE) != 0) 1207 exit(1); 1208 break; 1209 default: 1210 usage(); 1211 break; 1212 } 1213 } 1214 1215 argc -= optind; 1216 argv += optind; 1217 optind = 1; 1218 1219 if (argc == 0) { 1220 (void) fprintf(stderr, "error: no command specified\n"); 1221 usage(); 1222 } 1223 1224 subcommand = argv[0]; 1225 1226 if (strcmp(subcommand, "feature") == 0) { 1227 rv = zhack_do_feature(argc, argv); 1228 } else if (strcmp(subcommand, "label") == 0) { 1229 return (zhack_do_label(argc, argv)); 1230 } else if (strcmp(subcommand, "metaslab") == 0) { 1231 rv = zhack_do_metaslab(argc, argv); 1232 } else { 1233 (void) fprintf(stderr, "error: unknown subcommand: %s\n", 1234 subcommand); 1235 usage(); 1236 } 1237 1238 if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_FALSE) != 0) { 1239 fatal(NULL, FTAG, "pool export failed; " 1240 "changes may not be committed to disk\n"); 1241 } 1242 1243 kernel_fini(); 1244 1245 return (rv); 1246 } 1247