1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright (c) 2011, 2015 by Delphix. All rights reserved. 25 * Copyright (c) 2013 Steven Hartland. All rights reserved. 26 */ 27 28 /* 29 * zhack is a debugging tool that can write changes to ZFS pool using libzpool 30 * for testing purposes. Altering pools with zhack is unsupported and may 31 * result in corrupted pools. 32 */ 33 34 #include <zfs_prop.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <ctype.h> 38 #include <sys/stat.h> 39 #include <sys/zfs_context.h> 40 #include <sys/spa.h> 41 #include <sys/spa_impl.h> 42 #include <sys/dmu.h> 43 #include <sys/zap.h> 44 #include <sys/zfs_znode.h> 45 #include <sys/dsl_synctask.h> 46 #include <sys/vdev.h> 47 #include <sys/vdev_impl.h> 48 #include <sys/fs/zfs.h> 49 #include <sys/dmu_objset.h> 50 #include <sys/dsl_pool.h> 51 #include <sys/zio_checksum.h> 52 #include <sys/zio_compress.h> 53 #include <sys/zfeature.h> 54 #include <sys/dmu_tx.h> 55 #include <zfeature_common.h> 56 #include <libzutil.h> 57 #include <sys/metaslab_impl.h> 58 59 static importargs_t g_importargs; 60 static char *g_pool; 61 static boolean_t g_readonly; 62 63 typedef enum { 64 ZHACK_REPAIR_OP_UNKNOWN = 0, 65 ZHACK_REPAIR_OP_CKSUM = (1 << 0), 66 ZHACK_REPAIR_OP_UNDETACH = (1 << 1) 67 } zhack_repair_op_t; 68 69 static __attribute__((noreturn)) void 70 usage(void) 71 { 72 (void) fprintf(stderr, 73 "Usage: zhack [-o tunable] [-c cachefile] [-d dir] <subcommand> " 74 "<args> ...\n" 75 "where <subcommand> <args> is one of the following:\n" 76 "\n"); 77 78 (void) fprintf(stderr, 79 " feature stat <pool>\n" 80 " print information about enabled features\n" 81 " feature enable [-r] [-d desc] <pool> <feature>\n" 82 " add a new enabled feature to the pool\n" 83 " -d <desc> sets the feature's description\n" 84 " -r set read-only compatible flag for feature\n" 85 " feature ref [-md] <pool> <feature>\n" 86 " change the refcount on the given feature\n" 87 " -d decrease instead of increase the refcount\n" 88 " -m add the feature to the label if increasing refcount\n" 89 "\n" 90 " <feature> : should be a feature guid\n" 91 "\n" 92 " label repair <device>\n" 93 " repair labels of a specified device according to options\n" 94 " which may be combined to do their functions in one call\n" 95 " -c repair corrupted label checksums\n" 96 " -u restore the label on a detached device\n" 97 "\n" 98 " <device> : path to vdev\n" 99 "\n" 100 " metaslab leak <pool>\n" 101 " apply allocation map from zdb to specified pool\n"); 102 exit(1); 103 } 104 105 106 static __attribute__((format(printf, 3, 4))) __attribute__((noreturn)) void 107 fatal(spa_t *spa, const void *tag, const char *fmt, ...) 108 { 109 va_list ap; 110 111 if (spa != NULL) { 112 spa_close(spa, tag); 113 (void) spa_export(g_pool, NULL, B_TRUE, B_FALSE); 114 } 115 116 va_start(ap, fmt); 117 (void) fputs("zhack: ", stderr); 118 (void) vfprintf(stderr, fmt, ap); 119 va_end(ap); 120 (void) fputc('\n', stderr); 121 122 exit(1); 123 } 124 125 static int 126 space_delta_cb(dmu_object_type_t bonustype, const void *data, 127 zfs_file_info_t *zoi) 128 { 129 (void) data, (void) zoi; 130 131 /* 132 * Is it a valid type of object to track? 133 */ 134 if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA) 135 return (ENOENT); 136 (void) fprintf(stderr, "modifying object that needs user accounting"); 137 abort(); 138 } 139 140 /* 141 * Target is the dataset whose pool we want to open. 142 */ 143 static void 144 zhack_import(char *target, boolean_t readonly) 145 { 146 nvlist_t *config; 147 nvlist_t *props; 148 int error; 149 150 kernel_init(readonly ? SPA_MODE_READ : 151 (SPA_MODE_READ | SPA_MODE_WRITE)); 152 153 dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb); 154 155 g_readonly = readonly; 156 g_importargs.can_be_active = readonly; 157 g_pool = strdup(target); 158 159 libpc_handle_t lpch = { 160 .lpc_lib_handle = NULL, 161 .lpc_ops = &libzpool_config_ops, 162 .lpc_printerr = B_TRUE 163 }; 164 error = zpool_find_config(&lpch, target, &config, &g_importargs); 165 if (error) 166 fatal(NULL, FTAG, "cannot import '%s'", target); 167 168 props = NULL; 169 if (readonly) { 170 VERIFY0(nvlist_alloc(&props, NV_UNIQUE_NAME, 0)); 171 VERIFY0(nvlist_add_uint64(props, 172 zpool_prop_to_name(ZPOOL_PROP_READONLY), 1)); 173 } 174 175 zfeature_checks_disable = B_TRUE; 176 error = spa_import(target, config, props, 177 (readonly ? ZFS_IMPORT_SKIP_MMP : ZFS_IMPORT_NORMAL)); 178 fnvlist_free(config); 179 zfeature_checks_disable = B_FALSE; 180 if (error == EEXIST) 181 error = 0; 182 183 if (error) 184 fatal(NULL, FTAG, "can't import '%s': %s", target, 185 strerror(error)); 186 } 187 188 static void 189 zhack_spa_open(char *target, boolean_t readonly, const void *tag, spa_t **spa) 190 { 191 int err; 192 193 zhack_import(target, readonly); 194 195 zfeature_checks_disable = B_TRUE; 196 err = spa_open(target, spa, tag); 197 zfeature_checks_disable = B_FALSE; 198 199 if (err != 0) 200 fatal(*spa, FTAG, "cannot open '%s': %s", target, 201 strerror(err)); 202 if (spa_version(*spa) < SPA_VERSION_FEATURES) { 203 fatal(*spa, FTAG, "'%s' has version %d, features not enabled", 204 target, (int)spa_version(*spa)); 205 } 206 } 207 208 static void 209 dump_obj(objset_t *os, uint64_t obj, const char *name) 210 { 211 zap_cursor_t zc; 212 zap_attribute_t *za = zap_attribute_long_alloc(); 213 214 (void) printf("%s_obj:\n", name); 215 216 for (zap_cursor_init(&zc, os, obj); 217 zap_cursor_retrieve(&zc, za) == 0; 218 zap_cursor_advance(&zc)) { 219 if (za->za_integer_length == 8) { 220 ASSERT(za->za_num_integers == 1); 221 (void) printf("\t%s = %llu\n", 222 za->za_name, (u_longlong_t)za->za_first_integer); 223 } else { 224 ASSERT(za->za_integer_length == 1); 225 char val[1024]; 226 VERIFY0(zap_lookup(os, obj, za->za_name, 227 1, sizeof (val), val)); 228 (void) printf("\t%s = %s\n", za->za_name, val); 229 } 230 } 231 zap_cursor_fini(&zc); 232 zap_attribute_free(za); 233 } 234 235 static void 236 dump_mos(spa_t *spa) 237 { 238 nvlist_t *nv = spa->spa_label_features; 239 nvpair_t *pair; 240 241 (void) printf("label config:\n"); 242 for (pair = nvlist_next_nvpair(nv, NULL); 243 pair != NULL; 244 pair = nvlist_next_nvpair(nv, pair)) { 245 (void) printf("\t%s\n", nvpair_name(pair)); 246 } 247 } 248 249 static void 250 zhack_do_feature_stat(int argc, char **argv) 251 { 252 spa_t *spa; 253 objset_t *os; 254 char *target; 255 256 argc--; 257 argv++; 258 259 if (argc < 1) { 260 (void) fprintf(stderr, "error: missing pool name\n"); 261 usage(); 262 } 263 target = argv[0]; 264 265 zhack_spa_open(target, B_TRUE, FTAG, &spa); 266 os = spa->spa_meta_objset; 267 268 dump_obj(os, spa->spa_feat_for_read_obj, "for_read"); 269 dump_obj(os, spa->spa_feat_for_write_obj, "for_write"); 270 dump_obj(os, spa->spa_feat_desc_obj, "descriptions"); 271 if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) { 272 dump_obj(os, spa->spa_feat_enabled_txg_obj, "enabled_txg"); 273 } 274 dump_mos(spa); 275 276 spa_close(spa, FTAG); 277 } 278 279 static void 280 zhack_feature_enable_sync(void *arg, dmu_tx_t *tx) 281 { 282 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 283 zfeature_info_t *feature = arg; 284 285 feature_enable_sync(spa, feature, tx); 286 287 spa_history_log_internal(spa, "zhack enable feature", tx, 288 "name=%s flags=%u", 289 feature->fi_guid, feature->fi_flags); 290 } 291 292 static void 293 zhack_do_feature_enable(int argc, char **argv) 294 { 295 int c; 296 char *desc, *target; 297 spa_t *spa; 298 objset_t *mos; 299 zfeature_info_t feature; 300 const spa_feature_t nodeps[] = { SPA_FEATURE_NONE }; 301 302 /* 303 * Features are not added to the pool's label until their refcounts 304 * are incremented, so fi_mos can just be left as false for now. 305 */ 306 desc = NULL; 307 feature.fi_uname = "zhack"; 308 feature.fi_flags = 0; 309 feature.fi_depends = nodeps; 310 feature.fi_feature = SPA_FEATURE_NONE; 311 312 optind = 1; 313 while ((c = getopt(argc, argv, "+rd:")) != -1) { 314 switch (c) { 315 case 'r': 316 feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT; 317 break; 318 case 'd': 319 if (desc != NULL) 320 free(desc); 321 desc = strdup(optarg); 322 break; 323 default: 324 usage(); 325 break; 326 } 327 } 328 329 if (desc == NULL) 330 desc = strdup("zhack injected"); 331 feature.fi_desc = desc; 332 333 argc -= optind; 334 argv += optind; 335 336 if (argc < 2) { 337 (void) fprintf(stderr, "error: missing feature or pool name\n"); 338 usage(); 339 } 340 target = argv[0]; 341 feature.fi_guid = argv[1]; 342 343 if (!zfeature_is_valid_guid(feature.fi_guid)) 344 fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid); 345 346 zhack_spa_open(target, B_FALSE, FTAG, &spa); 347 mos = spa->spa_meta_objset; 348 349 if (zfeature_is_supported(feature.fi_guid)) 350 fatal(spa, FTAG, "'%s' is a real feature, will not enable", 351 feature.fi_guid); 352 if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid)) 353 fatal(spa, FTAG, "feature already enabled: %s", 354 feature.fi_guid); 355 356 VERIFY0(dsl_sync_task(spa_name(spa), NULL, 357 zhack_feature_enable_sync, &feature, 5, ZFS_SPACE_CHECK_NORMAL)); 358 359 spa_close(spa, FTAG); 360 361 free(desc); 362 } 363 364 static void 365 feature_incr_sync(void *arg, dmu_tx_t *tx) 366 { 367 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 368 zfeature_info_t *feature = arg; 369 uint64_t refcount; 370 371 mutex_enter(&spa->spa_feat_stats_lock); 372 VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount)); 373 feature_sync(spa, feature, refcount + 1, tx); 374 spa_history_log_internal(spa, "zhack feature incr", tx, 375 "name=%s", feature->fi_guid); 376 mutex_exit(&spa->spa_feat_stats_lock); 377 } 378 379 static void 380 feature_decr_sync(void *arg, dmu_tx_t *tx) 381 { 382 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 383 zfeature_info_t *feature = arg; 384 uint64_t refcount; 385 386 mutex_enter(&spa->spa_feat_stats_lock); 387 VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount)); 388 feature_sync(spa, feature, refcount - 1, tx); 389 spa_history_log_internal(spa, "zhack feature decr", tx, 390 "name=%s", feature->fi_guid); 391 mutex_exit(&spa->spa_feat_stats_lock); 392 } 393 394 static void 395 zhack_do_feature_ref(int argc, char **argv) 396 { 397 int c; 398 char *target; 399 boolean_t decr = B_FALSE; 400 spa_t *spa; 401 objset_t *mos; 402 zfeature_info_t feature; 403 const spa_feature_t nodeps[] = { SPA_FEATURE_NONE }; 404 405 /* 406 * fi_desc does not matter here because it was written to disk 407 * when the feature was enabled, but we need to properly set the 408 * feature for read or write based on the information we read off 409 * disk later. 410 */ 411 feature.fi_uname = "zhack"; 412 feature.fi_flags = 0; 413 feature.fi_desc = NULL; 414 feature.fi_depends = nodeps; 415 feature.fi_feature = SPA_FEATURE_NONE; 416 417 optind = 1; 418 while ((c = getopt(argc, argv, "+md")) != -1) { 419 switch (c) { 420 case 'm': 421 feature.fi_flags |= ZFEATURE_FLAG_MOS; 422 break; 423 case 'd': 424 decr = B_TRUE; 425 break; 426 default: 427 usage(); 428 break; 429 } 430 } 431 argc -= optind; 432 argv += optind; 433 434 if (argc < 2) { 435 (void) fprintf(stderr, "error: missing feature or pool name\n"); 436 usage(); 437 } 438 target = argv[0]; 439 feature.fi_guid = argv[1]; 440 441 if (!zfeature_is_valid_guid(feature.fi_guid)) 442 fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid); 443 444 zhack_spa_open(target, B_FALSE, FTAG, &spa); 445 mos = spa->spa_meta_objset; 446 447 if (zfeature_is_supported(feature.fi_guid)) { 448 fatal(spa, FTAG, 449 "'%s' is a real feature, will not change refcount", 450 feature.fi_guid); 451 } 452 453 if (0 == zap_contains(mos, spa->spa_feat_for_read_obj, 454 feature.fi_guid)) { 455 feature.fi_flags &= ~ZFEATURE_FLAG_READONLY_COMPAT; 456 } else if (0 == zap_contains(mos, spa->spa_feat_for_write_obj, 457 feature.fi_guid)) { 458 feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT; 459 } else { 460 fatal(spa, FTAG, "feature is not enabled: %s", feature.fi_guid); 461 } 462 463 if (decr) { 464 uint64_t count; 465 if (feature_get_refcount_from_disk(spa, &feature, 466 &count) == 0 && count == 0) { 467 fatal(spa, FTAG, "feature refcount already 0: %s", 468 feature.fi_guid); 469 } 470 } 471 472 VERIFY0(dsl_sync_task(spa_name(spa), NULL, 473 decr ? feature_decr_sync : feature_incr_sync, &feature, 474 5, ZFS_SPACE_CHECK_NORMAL)); 475 476 spa_close(spa, FTAG); 477 } 478 479 static int 480 zhack_do_feature(int argc, char **argv) 481 { 482 char *subcommand; 483 484 argc--; 485 argv++; 486 if (argc == 0) { 487 (void) fprintf(stderr, 488 "error: no feature operation specified\n"); 489 usage(); 490 } 491 492 subcommand = argv[0]; 493 if (strcmp(subcommand, "stat") == 0) { 494 zhack_do_feature_stat(argc, argv); 495 } else if (strcmp(subcommand, "enable") == 0) { 496 zhack_do_feature_enable(argc, argv); 497 } else if (strcmp(subcommand, "ref") == 0) { 498 zhack_do_feature_ref(argc, argv); 499 } else { 500 (void) fprintf(stderr, "error: unknown subcommand: %s\n", 501 subcommand); 502 usage(); 503 } 504 505 return (0); 506 } 507 508 static boolean_t 509 strstarts(const char *a, const char *b) 510 { 511 return (strncmp(a, b, strlen(b)) == 0); 512 } 513 514 static void 515 metaslab_force_alloc(metaslab_t *msp, uint64_t start, uint64_t size, 516 dmu_tx_t *tx) 517 { 518 ASSERT(msp->ms_disabled); 519 ASSERT(MUTEX_HELD(&msp->ms_lock)); 520 uint64_t txg = dmu_tx_get_txg(tx); 521 522 uint64_t off = start; 523 while (off < start + size) { 524 uint64_t ostart, osize; 525 boolean_t found = zfs_range_tree_find_in(msp->ms_allocatable, 526 off, start + size - off, &ostart, &osize); 527 if (!found) 528 break; 529 zfs_range_tree_remove(msp->ms_allocatable, ostart, osize); 530 531 if (zfs_range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK])) 532 vdev_dirty(msp->ms_group->mg_vd, VDD_METASLAB, msp, 533 txg); 534 535 zfs_range_tree_add(msp->ms_allocating[txg & TXG_MASK], ostart, 536 osize); 537 msp->ms_allocating_total += osize; 538 off = ostart + osize; 539 } 540 } 541 542 static void 543 zhack_do_metaslab_leak(int argc, char **argv) 544 { 545 int c; 546 char *target; 547 spa_t *spa; 548 549 optind = 1; 550 boolean_t force = B_FALSE; 551 while ((c = getopt(argc, argv, "f")) != -1) { 552 switch (c) { 553 case 'f': 554 force = B_TRUE; 555 break; 556 default: 557 usage(); 558 break; 559 } 560 } 561 562 argc -= optind; 563 argv += optind; 564 565 if (argc < 1) { 566 (void) fprintf(stderr, "error: missing pool name\n"); 567 usage(); 568 } 569 target = argv[0]; 570 571 zhack_spa_open(target, B_FALSE, FTAG, &spa); 572 spa_config_enter(spa, SCL_VDEV | SCL_ALLOC, FTAG, RW_READER); 573 574 char *line = NULL; 575 size_t cap = 0; 576 577 vdev_t *vd = NULL; 578 metaslab_t *prev = NULL; 579 dmu_tx_t *tx = NULL; 580 while (getline(&line, &cap, stdin) > 0) { 581 if (strstarts(line, "\tvdev ")) { 582 uint64_t vdev_id, ms_shift; 583 if (sscanf(line, 584 "\tvdev %10"PRIu64"\t%*s metaslab shift %4"PRIu64, 585 &vdev_id, &ms_shift) == 1) { 586 VERIFY3U(sscanf(line, "\tvdev %"PRIu64 587 "\t metaslab shift %4"PRIu64, 588 &vdev_id, &ms_shift), ==, 2); 589 } 590 vd = vdev_lookup_top(spa, vdev_id); 591 if (vd == NULL) { 592 fprintf(stderr, "error: no such vdev with " 593 "id %"PRIu64"\n", vdev_id); 594 break; 595 } 596 if (tx) { 597 dmu_tx_commit(tx); 598 mutex_exit(&prev->ms_lock); 599 metaslab_enable(prev, B_FALSE, B_FALSE); 600 tx = NULL; 601 prev = NULL; 602 } 603 if (vd->vdev_ms_shift != ms_shift) { 604 fprintf(stderr, "error: ms_shift mismatch: %" 605 PRIu64" != %"PRIu64"\n", vd->vdev_ms_shift, 606 ms_shift); 607 break; 608 } 609 } else if (strstarts(line, "\tmetaslabs ")) { 610 uint64_t ms_count; 611 VERIFY3U(sscanf(line, "\tmetaslabs %"PRIu64, &ms_count), 612 ==, 1); 613 ASSERT(vd); 614 if (!force && vd->vdev_ms_count != ms_count) { 615 fprintf(stderr, "error: ms_count mismatch: %" 616 PRIu64" != %"PRIu64"\n", vd->vdev_ms_count, 617 ms_count); 618 break; 619 } 620 } else if (strstarts(line, "ALLOC:")) { 621 uint64_t start, size; 622 VERIFY3U(sscanf(line, "ALLOC: %"PRIu64" %"PRIu64"\n", 623 &start, &size), ==, 2); 624 625 ASSERT(vd); 626 metaslab_t *cur = 627 vd->vdev_ms[start >> vd->vdev_ms_shift]; 628 if (prev != cur) { 629 if (prev) { 630 dmu_tx_commit(tx); 631 mutex_exit(&prev->ms_lock); 632 metaslab_enable(prev, B_FALSE, B_FALSE); 633 } 634 ASSERT(cur); 635 metaslab_disable(cur); 636 mutex_enter(&cur->ms_lock); 637 metaslab_load(cur); 638 prev = cur; 639 tx = dmu_tx_create_dd( 640 spa_get_dsl(vd->vdev_spa)->dp_root_dir); 641 dmu_tx_assign(tx, DMU_TX_WAIT); 642 } 643 644 metaslab_force_alloc(cur, start, size, tx); 645 } else { 646 continue; 647 } 648 } 649 if (tx) { 650 dmu_tx_commit(tx); 651 mutex_exit(&prev->ms_lock); 652 metaslab_enable(prev, B_FALSE, B_FALSE); 653 tx = NULL; 654 prev = NULL; 655 } 656 if (line) 657 free(line); 658 659 spa_config_exit(spa, SCL_VDEV | SCL_ALLOC, FTAG); 660 spa_close(spa, FTAG); 661 } 662 663 static int 664 zhack_do_metaslab(int argc, char **argv) 665 { 666 char *subcommand; 667 668 argc--; 669 argv++; 670 if (argc == 0) { 671 (void) fprintf(stderr, 672 "error: no metaslab operation specified\n"); 673 usage(); 674 } 675 676 subcommand = argv[0]; 677 if (strcmp(subcommand, "leak") == 0) { 678 zhack_do_metaslab_leak(argc, argv); 679 } else { 680 (void) fprintf(stderr, "error: unknown subcommand: %s\n", 681 subcommand); 682 usage(); 683 } 684 685 return (0); 686 } 687 688 #define ASHIFT_UBERBLOCK_SHIFT(ashift) \ 689 MIN(MAX(ashift, UBERBLOCK_SHIFT), \ 690 MAX_UBERBLOCK_SHIFT) 691 #define ASHIFT_UBERBLOCK_SIZE(ashift) \ 692 (1ULL << ASHIFT_UBERBLOCK_SHIFT(ashift)) 693 694 #define REPAIR_LABEL_STATUS_CKSUM (1 << 0) 695 #define REPAIR_LABEL_STATUS_UB (1 << 1) 696 697 static int 698 zhack_repair_read_label(const int fd, vdev_label_t *vl, 699 const uint64_t label_offset, const int l) 700 { 701 const int err = pread64(fd, vl, sizeof (vdev_label_t), label_offset); 702 703 if (err == -1) { 704 (void) fprintf(stderr, 705 "error: cannot read label %d: %s\n", 706 l, strerror(errno)); 707 return (err); 708 } else if (err != sizeof (vdev_label_t)) { 709 (void) fprintf(stderr, 710 "error: bad label %d read size\n", l); 711 return (err); 712 } 713 714 return (0); 715 } 716 717 static int 718 zhack_repair_get_byteswap(const zio_eck_t *vdev_eck, const int l, int *byteswap) 719 { 720 if (vdev_eck->zec_magic == ZEC_MAGIC) { 721 *byteswap = B_FALSE; 722 } else if (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC)) { 723 *byteswap = B_TRUE; 724 } else { 725 (void) fprintf(stderr, "error: label %d: " 726 "Expected the nvlist checksum magic number but instead got " 727 "0x%" PRIx64 "\n", 728 l, vdev_eck->zec_magic); 729 return (1); 730 } 731 return (0); 732 } 733 734 static void 735 zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset, 736 const uint64_t abdsize, zio_eck_t *eck, zio_cksum_t *cksum) 737 { 738 zio_cksum_t verifier; 739 zio_cksum_t current_cksum; 740 zio_checksum_info_t *ci; 741 abd_t *abd; 742 743 ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0); 744 745 if (byteswap) 746 byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); 747 748 current_cksum = eck->zec_cksum; 749 eck->zec_cksum = verifier; 750 751 ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; 752 abd = abd_get_from_buf(data, abdsize); 753 ci->ci_func[byteswap](abd, abdsize, NULL, cksum); 754 abd_free(abd); 755 756 eck->zec_cksum = current_cksum; 757 } 758 759 static int 760 zhack_repair_get_ashift(nvlist_t *cfg, const int l, uint64_t *ashift) 761 { 762 int err; 763 nvlist_t *vdev_tree_cfg; 764 765 err = nvlist_lookup_nvlist(cfg, 766 ZPOOL_CONFIG_VDEV_TREE, &vdev_tree_cfg); 767 if (err) { 768 (void) fprintf(stderr, 769 "error: label %d: cannot find nvlist key %s\n", 770 l, ZPOOL_CONFIG_VDEV_TREE); 771 return (err); 772 } 773 774 err = nvlist_lookup_uint64(vdev_tree_cfg, 775 ZPOOL_CONFIG_ASHIFT, ashift); 776 if (err) { 777 (void) fprintf(stderr, 778 "error: label %d: cannot find nvlist key %s\n", 779 l, ZPOOL_CONFIG_ASHIFT); 780 return (err); 781 } 782 783 if (*ashift == 0) { 784 (void) fprintf(stderr, 785 "error: label %d: nvlist key %s is zero\n", 786 l, ZPOOL_CONFIG_ASHIFT); 787 return (1); 788 } 789 790 return (0); 791 } 792 793 static int 794 zhack_repair_undetach(uberblock_t *ub, nvlist_t *cfg, const int l) 795 { 796 /* 797 * Uberblock root block pointer has valid birth TXG. 798 * Copying it to the label NVlist 799 */ 800 if (BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp) != 0) { 801 const uint64_t txg = BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp); 802 int err; 803 804 ub->ub_txg = txg; 805 806 err = nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG); 807 if (err) { 808 (void) fprintf(stderr, 809 "error: label %d: " 810 "Failed to remove pool creation TXG\n", 811 l); 812 return (err); 813 } 814 815 err = nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG); 816 if (err) { 817 (void) fprintf(stderr, 818 "error: label %d: Failed to remove pool TXG to " 819 "be replaced.\n", 820 l); 821 return (err); 822 } 823 824 err = nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg); 825 if (err) { 826 (void) fprintf(stderr, 827 "error: label %d: " 828 "Failed to add pool TXG of %" PRIu64 "\n", 829 l, txg); 830 return (err); 831 } 832 } 833 834 return (0); 835 } 836 837 static boolean_t 838 zhack_repair_write_label(const int l, const int fd, const int byteswap, 839 void *data, zio_eck_t *eck, const uint64_t offset, const uint64_t abdsize) 840 { 841 zio_cksum_t actual_cksum; 842 zhack_repair_calc_cksum(byteswap, data, offset, abdsize, eck, 843 &actual_cksum); 844 zio_cksum_t expected_cksum = eck->zec_cksum; 845 ssize_t err; 846 847 if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) 848 return (B_FALSE); 849 850 eck->zec_cksum = actual_cksum; 851 852 err = pwrite64(fd, data, abdsize, offset); 853 if (err == -1) { 854 (void) fprintf(stderr, "error: cannot write label %d: %s\n", 855 l, strerror(errno)); 856 return (B_FALSE); 857 } else if (err != abdsize) { 858 (void) fprintf(stderr, "error: bad write size label %d\n", l); 859 return (B_FALSE); 860 } else { 861 (void) fprintf(stderr, 862 "label %d: wrote %" PRIu64 " bytes at offset %" PRIu64 "\n", 863 l, abdsize, offset); 864 } 865 866 return (B_TRUE); 867 } 868 869 static void 870 zhack_repair_write_uberblock(vdev_label_t *vl, const int l, 871 const uint64_t ashift, const int fd, const int byteswap, 872 const uint64_t label_offset, uint32_t *labels_repaired) 873 { 874 void *ub_data = 875 (char *)vl + offsetof(vdev_label_t, vl_uberblock); 876 zio_eck_t *ub_eck = 877 (zio_eck_t *) 878 ((char *)(ub_data) + (ASHIFT_UBERBLOCK_SIZE(ashift))) - 1; 879 880 if (ub_eck->zec_magic != 0) { 881 (void) fprintf(stderr, 882 "error: label %d: " 883 "Expected Uberblock checksum magic number to " 884 "be 0, but got %" PRIu64 "\n", 885 l, ub_eck->zec_magic); 886 (void) fprintf(stderr, "It would appear there's already " 887 "a checksum for the uberblock.\n"); 888 return; 889 } 890 891 892 ub_eck->zec_magic = byteswap ? BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC; 893 894 if (zhack_repair_write_label(l, fd, byteswap, 895 ub_data, ub_eck, 896 label_offset + offsetof(vdev_label_t, vl_uberblock), 897 ASHIFT_UBERBLOCK_SIZE(ashift))) 898 labels_repaired[l] |= REPAIR_LABEL_STATUS_UB; 899 } 900 901 static void 902 zhack_repair_print_cksum(FILE *stream, const zio_cksum_t *cksum) 903 { 904 (void) fprintf(stream, 905 "%016llx:%016llx:%016llx:%016llx", 906 (u_longlong_t)cksum->zc_word[0], 907 (u_longlong_t)cksum->zc_word[1], 908 (u_longlong_t)cksum->zc_word[2], 909 (u_longlong_t)cksum->zc_word[3]); 910 } 911 912 static int 913 zhack_repair_test_cksum(const int byteswap, void *vdev_data, 914 zio_eck_t *vdev_eck, const uint64_t vdev_phys_offset, const int l) 915 { 916 const zio_cksum_t expected_cksum = vdev_eck->zec_cksum; 917 zio_cksum_t actual_cksum; 918 zhack_repair_calc_cksum(byteswap, vdev_data, vdev_phys_offset, 919 VDEV_PHYS_SIZE, vdev_eck, &actual_cksum); 920 const uint64_t expected_magic = byteswap ? 921 BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC; 922 const uint64_t actual_magic = vdev_eck->zec_magic; 923 int err = 0; 924 925 if (actual_magic != expected_magic) { 926 (void) fprintf(stderr, "error: label %d: " 927 "Expected " 928 "the nvlist checksum magic number to not be %" 929 PRIu64 " not %" PRIu64 "\n", 930 l, expected_magic, actual_magic); 931 err = ECKSUM; 932 } 933 if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) { 934 (void) fprintf(stderr, "error: label %d: " 935 "Expected the nvlist checksum to be ", l); 936 (void) zhack_repair_print_cksum(stderr, 937 &expected_cksum); 938 (void) fprintf(stderr, " not "); 939 zhack_repair_print_cksum(stderr, &actual_cksum); 940 (void) fprintf(stderr, "\n"); 941 err = ECKSUM; 942 } 943 return (err); 944 } 945 946 static int 947 zhack_repair_unpack_cfg(vdev_label_t *vl, const int l, nvlist_t **cfg) 948 { 949 const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION, 950 ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID }; 951 int err; 952 953 err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist, 954 VDEV_PHYS_SIZE - sizeof (zio_eck_t), cfg, 0); 955 if (err) { 956 (void) fprintf(stderr, 957 "error: cannot unpack nvlist label %d\n", l); 958 return (err); 959 } 960 961 for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) { 962 uint64_t val; 963 err = nvlist_lookup_uint64(*cfg, cfg_keys[i], &val); 964 if (err) { 965 (void) fprintf(stderr, 966 "error: label %d, %d: " 967 "cannot find nvlist key %s\n", 968 l, i, cfg_keys[i]); 969 return (err); 970 } 971 } 972 973 return (0); 974 } 975 976 static void 977 zhack_repair_one_label(const zhack_repair_op_t op, const int fd, 978 vdev_label_t *vl, const uint64_t label_offset, const int l, 979 uint32_t *labels_repaired) 980 { 981 ssize_t err; 982 uberblock_t *ub = (uberblock_t *)vl->vl_uberblock; 983 void *vdev_data = 984 (char *)vl + offsetof(vdev_label_t, vl_vdev_phys); 985 zio_eck_t *vdev_eck = 986 (zio_eck_t *)((char *)(vdev_data) + VDEV_PHYS_SIZE) - 1; 987 const uint64_t vdev_phys_offset = 988 label_offset + offsetof(vdev_label_t, vl_vdev_phys); 989 nvlist_t *cfg; 990 uint64_t ashift; 991 int byteswap; 992 993 err = zhack_repair_read_label(fd, vl, label_offset, l); 994 if (err) 995 return; 996 997 err = zhack_repair_get_byteswap(vdev_eck, l, &byteswap); 998 if (err) 999 return; 1000 1001 if (byteswap) { 1002 byteswap_uint64_array(&vdev_eck->zec_cksum, 1003 sizeof (zio_cksum_t)); 1004 vdev_eck->zec_magic = BSWAP_64(vdev_eck->zec_magic); 1005 } 1006 1007 if ((op & ZHACK_REPAIR_OP_CKSUM) == 0 && 1008 zhack_repair_test_cksum(byteswap, vdev_data, vdev_eck, 1009 vdev_phys_offset, l) != 0) { 1010 (void) fprintf(stderr, "It would appear checksums are " 1011 "corrupted. Try zhack repair label -c <device>\n"); 1012 return; 1013 } 1014 1015 err = zhack_repair_unpack_cfg(vl, l, &cfg); 1016 if (err) 1017 return; 1018 1019 if ((op & ZHACK_REPAIR_OP_UNDETACH) != 0) { 1020 char *buf; 1021 size_t buflen; 1022 1023 if (ub->ub_txg != 0) { 1024 (void) fprintf(stderr, 1025 "error: label %d: UB TXG of 0 expected, but got %" 1026 PRIu64 "\n", l, ub->ub_txg); 1027 (void) fprintf(stderr, "It would appear the device was " 1028 "not properly detached.\n"); 1029 return; 1030 } 1031 1032 err = zhack_repair_get_ashift(cfg, l, &ashift); 1033 if (err) 1034 return; 1035 1036 err = zhack_repair_undetach(ub, cfg, l); 1037 if (err) 1038 return; 1039 1040 buf = vl->vl_vdev_phys.vp_nvlist; 1041 buflen = VDEV_PHYS_SIZE - sizeof (zio_eck_t); 1042 if (nvlist_pack(cfg, &buf, &buflen, NV_ENCODE_XDR, 0) != 0) { 1043 (void) fprintf(stderr, 1044 "error: label %d: Failed to pack nvlist\n", l); 1045 return; 1046 } 1047 1048 zhack_repair_write_uberblock(vl, 1049 l, ashift, fd, byteswap, label_offset, labels_repaired); 1050 } 1051 1052 if (zhack_repair_write_label(l, fd, byteswap, vdev_data, vdev_eck, 1053 vdev_phys_offset, VDEV_PHYS_SIZE)) 1054 labels_repaired[l] |= REPAIR_LABEL_STATUS_CKSUM; 1055 1056 fsync(fd); 1057 } 1058 1059 static const char * 1060 zhack_repair_label_status(const uint32_t label_status, 1061 const uint32_t to_check) 1062 { 1063 return ((label_status & to_check) != 0 ? "repaired" : "skipped"); 1064 } 1065 1066 static int 1067 zhack_label_repair(const zhack_repair_op_t op, const int argc, char **argv) 1068 { 1069 uint32_t labels_repaired[VDEV_LABELS] = {0}; 1070 vdev_label_t labels[VDEV_LABELS] = {{{0}}}; 1071 struct stat64 st; 1072 int fd; 1073 off_t filesize; 1074 uint32_t repaired = 0; 1075 1076 abd_init(); 1077 1078 if (argc < 1) { 1079 (void) fprintf(stderr, "error: missing device\n"); 1080 usage(); 1081 } 1082 1083 if ((fd = open(argv[0], O_RDWR)) == -1) 1084 fatal(NULL, FTAG, "cannot open '%s': %s", argv[0], 1085 strerror(errno)); 1086 1087 if (fstat64_blk(fd, &st) != 0) 1088 fatal(NULL, FTAG, "cannot stat '%s': %s", argv[0], 1089 strerror(errno)); 1090 1091 filesize = st.st_size; 1092 (void) fprintf(stderr, "Calculated filesize to be %jd\n", 1093 (intmax_t)filesize); 1094 1095 if (filesize % sizeof (vdev_label_t) != 0) 1096 filesize = 1097 (filesize / sizeof (vdev_label_t)) * sizeof (vdev_label_t); 1098 1099 for (int l = 0; l < VDEV_LABELS; l++) { 1100 zhack_repair_one_label(op, fd, &labels[l], 1101 vdev_label_offset(filesize, l, 0), l, labels_repaired); 1102 } 1103 1104 close(fd); 1105 1106 abd_fini(); 1107 1108 for (int l = 0; l < VDEV_LABELS; l++) { 1109 const uint32_t lr = labels_repaired[l]; 1110 (void) printf("label %d: ", l); 1111 (void) printf("uberblock: %s ", 1112 zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_UB)); 1113 (void) printf("checksum: %s\n", 1114 zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_CKSUM)); 1115 repaired |= lr; 1116 } 1117 1118 if (repaired > 0) 1119 return (0); 1120 1121 return (1); 1122 } 1123 1124 static int 1125 zhack_do_label_repair(int argc, char **argv) 1126 { 1127 zhack_repair_op_t op = ZHACK_REPAIR_OP_UNKNOWN; 1128 int c; 1129 1130 optind = 1; 1131 while ((c = getopt(argc, argv, "+cu")) != -1) { 1132 switch (c) { 1133 case 'c': 1134 op |= ZHACK_REPAIR_OP_CKSUM; 1135 break; 1136 case 'u': 1137 op |= ZHACK_REPAIR_OP_UNDETACH; 1138 break; 1139 default: 1140 usage(); 1141 break; 1142 } 1143 } 1144 1145 argc -= optind; 1146 argv += optind; 1147 1148 if (op == ZHACK_REPAIR_OP_UNKNOWN) 1149 op = ZHACK_REPAIR_OP_CKSUM; 1150 1151 return (zhack_label_repair(op, argc, argv)); 1152 } 1153 1154 static int 1155 zhack_do_label(int argc, char **argv) 1156 { 1157 char *subcommand; 1158 int err; 1159 1160 argc--; 1161 argv++; 1162 if (argc == 0) { 1163 (void) fprintf(stderr, 1164 "error: no label operation specified\n"); 1165 usage(); 1166 } 1167 1168 subcommand = argv[0]; 1169 if (strcmp(subcommand, "repair") == 0) { 1170 err = zhack_do_label_repair(argc, argv); 1171 } else { 1172 (void) fprintf(stderr, "error: unknown subcommand: %s\n", 1173 subcommand); 1174 usage(); 1175 } 1176 1177 return (err); 1178 } 1179 1180 #define MAX_NUM_PATHS 1024 1181 1182 int 1183 main(int argc, char **argv) 1184 { 1185 char *path[MAX_NUM_PATHS]; 1186 const char *subcommand; 1187 int rv = 0; 1188 int c; 1189 1190 g_importargs.path = path; 1191 1192 dprintf_setup(&argc, argv); 1193 zfs_prop_init(); 1194 1195 while ((c = getopt(argc, argv, "+c:d:o:")) != -1) { 1196 switch (c) { 1197 case 'c': 1198 g_importargs.cachefile = optarg; 1199 break; 1200 case 'd': 1201 assert(g_importargs.paths < MAX_NUM_PATHS); 1202 g_importargs.path[g_importargs.paths++] = optarg; 1203 break; 1204 case 'o': 1205 if (handle_tunable_option(optarg, B_FALSE) != 0) 1206 exit(1); 1207 break; 1208 default: 1209 usage(); 1210 break; 1211 } 1212 } 1213 1214 argc -= optind; 1215 argv += optind; 1216 optind = 1; 1217 1218 if (argc == 0) { 1219 (void) fprintf(stderr, "error: no command specified\n"); 1220 usage(); 1221 } 1222 1223 subcommand = argv[0]; 1224 1225 if (strcmp(subcommand, "feature") == 0) { 1226 rv = zhack_do_feature(argc, argv); 1227 } else if (strcmp(subcommand, "label") == 0) { 1228 return (zhack_do_label(argc, argv)); 1229 } else if (strcmp(subcommand, "metaslab") == 0) { 1230 rv = zhack_do_metaslab(argc, argv); 1231 } else { 1232 (void) fprintf(stderr, "error: unknown subcommand: %s\n", 1233 subcommand); 1234 usage(); 1235 } 1236 1237 if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_FALSE) != 0) { 1238 fatal(NULL, FTAG, "pool export failed; " 1239 "changes may not be committed to disk\n"); 1240 } 1241 1242 kernel_fini(); 1243 1244 return (rv); 1245 } 1246