1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23 /*
24 * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25 * Copyright (c) 2013 Steven Hartland. All rights reserved.
26 */
27
28 /*
29 * zhack is a debugging tool that can write changes to ZFS pool using libzpool
30 * for testing purposes. Altering pools with zhack is unsupported and may
31 * result in corrupted pools.
32 */
33
34 #include <zfs_prop.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <ctype.h>
38 #include <sys/stat.h>
39 #include <sys/zfs_context.h>
40 #include <sys/spa.h>
41 #include <sys/spa_impl.h>
42 #include <sys/dmu.h>
43 #include <sys/zap.h>
44 #include <sys/zfs_znode.h>
45 #include <sys/dsl_synctask.h>
46 #include <sys/vdev.h>
47 #include <sys/vdev_impl.h>
48 #include <sys/fs/zfs.h>
49 #include <sys/dmu_objset.h>
50 #include <sys/dsl_pool.h>
51 #include <sys/zio_checksum.h>
52 #include <sys/zio_compress.h>
53 #include <sys/zfeature.h>
54 #include <sys/dmu_tx.h>
55 #include <zfeature_common.h>
56 #include <libzutil.h>
57 #include <sys/metaslab_impl.h>
58 #include <libzpool.h>
59
60 static importargs_t g_importargs;
61 static char *g_pool;
62 static boolean_t g_readonly;
63
64 typedef enum {
65 ZHACK_REPAIR_OP_UNKNOWN = 0,
66 ZHACK_REPAIR_OP_CKSUM = (1 << 0),
67 ZHACK_REPAIR_OP_UNDETACH = (1 << 1)
68 } zhack_repair_op_t;
69
70 static __attribute__((noreturn)) void
usage(void)71 usage(void)
72 {
73 (void) fprintf(stderr,
74 "Usage: zhack [-o tunable] [-c cachefile] [-d dir] <subcommand> "
75 "<args> ...\n"
76 "where <subcommand> <args> is one of the following:\n"
77 "\n");
78
79 (void) fprintf(stderr,
80 " feature stat <pool>\n"
81 " print information about enabled features\n"
82 " feature enable [-r] [-d desc] <pool> <feature>\n"
83 " add a new enabled feature to the pool\n"
84 " -d <desc> sets the feature's description\n"
85 " -r set read-only compatible flag for feature\n"
86 " feature ref [-md] <pool> <feature>\n"
87 " change the refcount on the given feature\n"
88 " -d decrease instead of increase the refcount\n"
89 " -m add the feature to the label if increasing refcount\n"
90 "\n"
91 " <feature> : should be a feature guid\n"
92 "\n"
93 " label repair <device>\n"
94 " repair labels of a specified device according to options\n"
95 " which may be combined to do their functions in one call\n"
96 " -c repair corrupted label checksums\n"
97 " -u restore the label on a detached device\n"
98 "\n"
99 " <device> : path to vdev\n"
100 "\n"
101 " metaslab leak <pool>\n"
102 " apply allocation map from zdb to specified pool\n");
103 exit(1);
104 }
105
106
107 static __attribute__((format(printf, 3, 4))) __attribute__((noreturn)) void
fatal(spa_t * spa,const void * tag,const char * fmt,...)108 fatal(spa_t *spa, const void *tag, const char *fmt, ...)
109 {
110 va_list ap;
111
112 if (spa != NULL) {
113 spa_close(spa, tag);
114 (void) spa_export(g_pool, NULL, B_TRUE, B_FALSE);
115 }
116
117 va_start(ap, fmt);
118 (void) fputs("zhack: ", stderr);
119 (void) vfprintf(stderr, fmt, ap);
120 va_end(ap);
121 (void) fputc('\n', stderr);
122
123 exit(1);
124 }
125
126 static int
space_delta_cb(dmu_object_type_t bonustype,const void * data,zfs_file_info_t * zoi)127 space_delta_cb(dmu_object_type_t bonustype, const void *data,
128 zfs_file_info_t *zoi)
129 {
130 (void) data, (void) zoi;
131
132 /*
133 * Is it a valid type of object to track?
134 */
135 if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
136 return (ENOENT);
137 (void) fprintf(stderr, "modifying object that needs user accounting");
138 abort();
139 }
140
141 /*
142 * Target is the dataset whose pool we want to open.
143 */
144 static void
zhack_import(char * target,boolean_t readonly)145 zhack_import(char *target, boolean_t readonly)
146 {
147 nvlist_t *config;
148 nvlist_t *props;
149 int error;
150
151 kernel_init(readonly ? SPA_MODE_READ :
152 (SPA_MODE_READ | SPA_MODE_WRITE));
153
154 dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb);
155
156 g_readonly = readonly;
157 g_importargs.can_be_active = readonly;
158 g_pool = strdup(target);
159
160 libpc_handle_t lpch = {
161 .lpc_lib_handle = NULL,
162 .lpc_ops = &libzpool_config_ops,
163 .lpc_printerr = B_TRUE
164 };
165 error = zpool_find_config(&lpch, target, &config, &g_importargs);
166 if (error)
167 fatal(NULL, FTAG, "cannot import '%s'", target);
168
169 props = NULL;
170 if (readonly) {
171 VERIFY0(nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
172 VERIFY0(nvlist_add_uint64(props,
173 zpool_prop_to_name(ZPOOL_PROP_READONLY), 1));
174 }
175
176 zfeature_checks_disable = B_TRUE;
177 error = spa_import(target, config, props,
178 (readonly ? ZFS_IMPORT_SKIP_MMP : ZFS_IMPORT_NORMAL));
179 fnvlist_free(config);
180 zfeature_checks_disable = B_FALSE;
181 if (error == EEXIST)
182 error = 0;
183
184 if (error)
185 fatal(NULL, FTAG, "can't import '%s': %s", target,
186 strerror(error));
187 }
188
189 static void
zhack_spa_open(char * target,boolean_t readonly,const void * tag,spa_t ** spa)190 zhack_spa_open(char *target, boolean_t readonly, const void *tag, spa_t **spa)
191 {
192 int err;
193
194 zhack_import(target, readonly);
195
196 zfeature_checks_disable = B_TRUE;
197 err = spa_open(target, spa, tag);
198 zfeature_checks_disable = B_FALSE;
199
200 if (err != 0)
201 fatal(*spa, FTAG, "cannot open '%s': %s", target,
202 strerror(err));
203 if (spa_version(*spa) < SPA_VERSION_FEATURES) {
204 fatal(*spa, FTAG, "'%s' has version %d, features not enabled",
205 target, (int)spa_version(*spa));
206 }
207 }
208
209 static void
dump_obj(objset_t * os,uint64_t obj,const char * name)210 dump_obj(objset_t *os, uint64_t obj, const char *name)
211 {
212 zap_cursor_t zc;
213 zap_attribute_t *za = zap_attribute_long_alloc();
214
215 (void) printf("%s_obj:\n", name);
216
217 for (zap_cursor_init(&zc, os, obj);
218 zap_cursor_retrieve(&zc, za) == 0;
219 zap_cursor_advance(&zc)) {
220 if (za->za_integer_length == 8) {
221 ASSERT(za->za_num_integers == 1);
222 (void) printf("\t%s = %llu\n",
223 za->za_name, (u_longlong_t)za->za_first_integer);
224 } else {
225 ASSERT(za->za_integer_length == 1);
226 char val[1024];
227 VERIFY0(zap_lookup(os, obj, za->za_name,
228 1, sizeof (val), val));
229 (void) printf("\t%s = %s\n", za->za_name, val);
230 }
231 }
232 zap_cursor_fini(&zc);
233 zap_attribute_free(za);
234 }
235
236 static void
dump_mos(spa_t * spa)237 dump_mos(spa_t *spa)
238 {
239 nvlist_t *nv = spa->spa_label_features;
240 nvpair_t *pair;
241
242 (void) printf("label config:\n");
243 for (pair = nvlist_next_nvpair(nv, NULL);
244 pair != NULL;
245 pair = nvlist_next_nvpair(nv, pair)) {
246 (void) printf("\t%s\n", nvpair_name(pair));
247 }
248 }
249
250 static void
zhack_do_feature_stat(int argc,char ** argv)251 zhack_do_feature_stat(int argc, char **argv)
252 {
253 spa_t *spa;
254 objset_t *os;
255 char *target;
256
257 argc--;
258 argv++;
259
260 if (argc < 1) {
261 (void) fprintf(stderr, "error: missing pool name\n");
262 usage();
263 }
264 target = argv[0];
265
266 zhack_spa_open(target, B_TRUE, FTAG, &spa);
267 os = spa->spa_meta_objset;
268
269 dump_obj(os, spa->spa_feat_for_read_obj, "for_read");
270 dump_obj(os, spa->spa_feat_for_write_obj, "for_write");
271 dump_obj(os, spa->spa_feat_desc_obj, "descriptions");
272 if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) {
273 dump_obj(os, spa->spa_feat_enabled_txg_obj, "enabled_txg");
274 }
275 dump_mos(spa);
276
277 spa_close(spa, FTAG);
278 }
279
280 static void
zhack_feature_enable_sync(void * arg,dmu_tx_t * tx)281 zhack_feature_enable_sync(void *arg, dmu_tx_t *tx)
282 {
283 spa_t *spa = dmu_tx_pool(tx)->dp_spa;
284 zfeature_info_t *feature = arg;
285
286 feature_enable_sync(spa, feature, tx);
287
288 spa_history_log_internal(spa, "zhack enable feature", tx,
289 "name=%s flags=%u",
290 feature->fi_guid, feature->fi_flags);
291 }
292
293 static void
zhack_do_feature_enable(int argc,char ** argv)294 zhack_do_feature_enable(int argc, char **argv)
295 {
296 int c;
297 char *desc, *target;
298 spa_t *spa;
299 objset_t *mos;
300 zfeature_info_t feature;
301 const spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
302
303 /*
304 * Features are not added to the pool's label until their refcounts
305 * are incremented, so fi_mos can just be left as false for now.
306 */
307 desc = NULL;
308 feature.fi_uname = "zhack";
309 feature.fi_flags = 0;
310 feature.fi_depends = nodeps;
311 feature.fi_feature = SPA_FEATURE_NONE;
312
313 optind = 1;
314 while ((c = getopt(argc, argv, "+rd:")) != -1) {
315 switch (c) {
316 case 'r':
317 feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
318 break;
319 case 'd':
320 if (desc != NULL)
321 free(desc);
322 desc = strdup(optarg);
323 break;
324 default:
325 usage();
326 break;
327 }
328 }
329
330 if (desc == NULL)
331 desc = strdup("zhack injected");
332 feature.fi_desc = desc;
333
334 argc -= optind;
335 argv += optind;
336
337 if (argc < 2) {
338 (void) fprintf(stderr, "error: missing feature or pool name\n");
339 usage();
340 }
341 target = argv[0];
342 feature.fi_guid = argv[1];
343
344 if (!zfeature_is_valid_guid(feature.fi_guid))
345 fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid);
346
347 zhack_spa_open(target, B_FALSE, FTAG, &spa);
348 mos = spa->spa_meta_objset;
349
350 if (zfeature_is_supported(feature.fi_guid))
351 fatal(spa, FTAG, "'%s' is a real feature, will not enable",
352 feature.fi_guid);
353 if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid))
354 fatal(spa, FTAG, "feature already enabled: %s",
355 feature.fi_guid);
356
357 VERIFY0(dsl_sync_task(spa_name(spa), NULL,
358 zhack_feature_enable_sync, &feature, 5, ZFS_SPACE_CHECK_NORMAL));
359
360 spa_close(spa, FTAG);
361
362 free(desc);
363 }
364
365 static void
feature_incr_sync(void * arg,dmu_tx_t * tx)366 feature_incr_sync(void *arg, dmu_tx_t *tx)
367 {
368 spa_t *spa = dmu_tx_pool(tx)->dp_spa;
369 zfeature_info_t *feature = arg;
370 uint64_t refcount;
371
372 mutex_enter(&spa->spa_feat_stats_lock);
373 VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
374 feature_sync(spa, feature, refcount + 1, tx);
375 spa_history_log_internal(spa, "zhack feature incr", tx,
376 "name=%s", feature->fi_guid);
377 mutex_exit(&spa->spa_feat_stats_lock);
378 }
379
380 static void
feature_decr_sync(void * arg,dmu_tx_t * tx)381 feature_decr_sync(void *arg, dmu_tx_t *tx)
382 {
383 spa_t *spa = dmu_tx_pool(tx)->dp_spa;
384 zfeature_info_t *feature = arg;
385 uint64_t refcount;
386
387 mutex_enter(&spa->spa_feat_stats_lock);
388 VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
389 feature_sync(spa, feature, refcount - 1, tx);
390 spa_history_log_internal(spa, "zhack feature decr", tx,
391 "name=%s", feature->fi_guid);
392 mutex_exit(&spa->spa_feat_stats_lock);
393 }
394
395 static void
zhack_do_feature_ref(int argc,char ** argv)396 zhack_do_feature_ref(int argc, char **argv)
397 {
398 int c;
399 char *target;
400 boolean_t decr = B_FALSE;
401 spa_t *spa;
402 objset_t *mos;
403 zfeature_info_t feature;
404 const spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
405
406 /*
407 * fi_desc does not matter here because it was written to disk
408 * when the feature was enabled, but we need to properly set the
409 * feature for read or write based on the information we read off
410 * disk later.
411 */
412 feature.fi_uname = "zhack";
413 feature.fi_flags = 0;
414 feature.fi_desc = NULL;
415 feature.fi_depends = nodeps;
416 feature.fi_feature = SPA_FEATURE_NONE;
417
418 optind = 1;
419 while ((c = getopt(argc, argv, "+md")) != -1) {
420 switch (c) {
421 case 'm':
422 feature.fi_flags |= ZFEATURE_FLAG_MOS;
423 break;
424 case 'd':
425 decr = B_TRUE;
426 break;
427 default:
428 usage();
429 break;
430 }
431 }
432 argc -= optind;
433 argv += optind;
434
435 if (argc < 2) {
436 (void) fprintf(stderr, "error: missing feature or pool name\n");
437 usage();
438 }
439 target = argv[0];
440 feature.fi_guid = argv[1];
441
442 if (!zfeature_is_valid_guid(feature.fi_guid))
443 fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid);
444
445 zhack_spa_open(target, B_FALSE, FTAG, &spa);
446 mos = spa->spa_meta_objset;
447
448 if (zfeature_is_supported(feature.fi_guid)) {
449 fatal(spa, FTAG,
450 "'%s' is a real feature, will not change refcount",
451 feature.fi_guid);
452 }
453
454 if (0 == zap_contains(mos, spa->spa_feat_for_read_obj,
455 feature.fi_guid)) {
456 feature.fi_flags &= ~ZFEATURE_FLAG_READONLY_COMPAT;
457 } else if (0 == zap_contains(mos, spa->spa_feat_for_write_obj,
458 feature.fi_guid)) {
459 feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
460 } else {
461 fatal(spa, FTAG, "feature is not enabled: %s", feature.fi_guid);
462 }
463
464 if (decr) {
465 uint64_t count;
466 if (feature_get_refcount_from_disk(spa, &feature,
467 &count) == 0 && count == 0) {
468 fatal(spa, FTAG, "feature refcount already 0: %s",
469 feature.fi_guid);
470 }
471 }
472
473 VERIFY0(dsl_sync_task(spa_name(spa), NULL,
474 decr ? feature_decr_sync : feature_incr_sync, &feature,
475 5, ZFS_SPACE_CHECK_NORMAL));
476
477 spa_close(spa, FTAG);
478 }
479
480 static int
zhack_do_feature(int argc,char ** argv)481 zhack_do_feature(int argc, char **argv)
482 {
483 char *subcommand;
484
485 argc--;
486 argv++;
487 if (argc == 0) {
488 (void) fprintf(stderr,
489 "error: no feature operation specified\n");
490 usage();
491 }
492
493 subcommand = argv[0];
494 if (strcmp(subcommand, "stat") == 0) {
495 zhack_do_feature_stat(argc, argv);
496 } else if (strcmp(subcommand, "enable") == 0) {
497 zhack_do_feature_enable(argc, argv);
498 } else if (strcmp(subcommand, "ref") == 0) {
499 zhack_do_feature_ref(argc, argv);
500 } else {
501 (void) fprintf(stderr, "error: unknown subcommand: %s\n",
502 subcommand);
503 usage();
504 }
505
506 return (0);
507 }
508
509 static boolean_t
strstarts(const char * a,const char * b)510 strstarts(const char *a, const char *b)
511 {
512 return (strncmp(a, b, strlen(b)) == 0);
513 }
514
515 static void
metaslab_force_alloc(metaslab_t * msp,uint64_t start,uint64_t size,dmu_tx_t * tx)516 metaslab_force_alloc(metaslab_t *msp, uint64_t start, uint64_t size,
517 dmu_tx_t *tx)
518 {
519 ASSERT(msp->ms_disabled);
520 ASSERT(MUTEX_HELD(&msp->ms_lock));
521 uint64_t txg = dmu_tx_get_txg(tx);
522
523 uint64_t off = start;
524 while (off < start + size) {
525 uint64_t ostart, osize;
526 boolean_t found = zfs_range_tree_find_in(msp->ms_allocatable,
527 off, start + size - off, &ostart, &osize);
528 if (!found)
529 break;
530 zfs_range_tree_remove(msp->ms_allocatable, ostart, osize);
531
532 if (zfs_range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK]))
533 vdev_dirty(msp->ms_group->mg_vd, VDD_METASLAB, msp,
534 txg);
535
536 zfs_range_tree_add(msp->ms_allocating[txg & TXG_MASK], ostart,
537 osize);
538 msp->ms_allocating_total += osize;
539 off = ostart + osize;
540 }
541 }
542
543 static void
zhack_do_metaslab_leak(int argc,char ** argv)544 zhack_do_metaslab_leak(int argc, char **argv)
545 {
546 int c;
547 char *target;
548 spa_t *spa;
549
550 optind = 1;
551 boolean_t force = B_FALSE;
552 while ((c = getopt(argc, argv, "f")) != -1) {
553 switch (c) {
554 case 'f':
555 force = B_TRUE;
556 break;
557 default:
558 usage();
559 break;
560 }
561 }
562
563 argc -= optind;
564 argv += optind;
565
566 if (argc < 1) {
567 (void) fprintf(stderr, "error: missing pool name\n");
568 usage();
569 }
570 target = argv[0];
571
572 zhack_spa_open(target, B_FALSE, FTAG, &spa);
573 spa_config_enter(spa, SCL_VDEV | SCL_ALLOC, FTAG, RW_READER);
574
575 char *line = NULL;
576 size_t cap = 0;
577
578 vdev_t *vd = NULL;
579 metaslab_t *prev = NULL;
580 dmu_tx_t *tx = NULL;
581 while (getline(&line, &cap, stdin) > 0) {
582 if (strstarts(line, "\tvdev ")) {
583 uint64_t vdev_id, ms_shift;
584 if (sscanf(line,
585 "\tvdev %10"PRIu64"\t%*s metaslab shift %4"PRIu64,
586 &vdev_id, &ms_shift) == 1) {
587 VERIFY3U(sscanf(line, "\tvdev %"PRIu64
588 "\t metaslab shift %4"PRIu64,
589 &vdev_id, &ms_shift), ==, 2);
590 }
591 vd = vdev_lookup_top(spa, vdev_id);
592 if (vd == NULL) {
593 fprintf(stderr, "error: no such vdev with "
594 "id %"PRIu64"\n", vdev_id);
595 break;
596 }
597 if (tx) {
598 dmu_tx_commit(tx);
599 mutex_exit(&prev->ms_lock);
600 metaslab_enable(prev, B_FALSE, B_FALSE);
601 tx = NULL;
602 prev = NULL;
603 }
604 if (vd->vdev_ms_shift != ms_shift) {
605 fprintf(stderr, "error: ms_shift mismatch: %"
606 PRIu64" != %"PRIu64"\n", vd->vdev_ms_shift,
607 ms_shift);
608 break;
609 }
610 } else if (strstarts(line, "\tmetaslabs ")) {
611 uint64_t ms_count;
612 VERIFY3U(sscanf(line, "\tmetaslabs %"PRIu64, &ms_count),
613 ==, 1);
614 ASSERT(vd);
615 if (!force && vd->vdev_ms_count != ms_count) {
616 fprintf(stderr, "error: ms_count mismatch: %"
617 PRIu64" != %"PRIu64"\n", vd->vdev_ms_count,
618 ms_count);
619 break;
620 }
621 } else if (strstarts(line, "ALLOC:")) {
622 uint64_t start, size;
623 VERIFY3U(sscanf(line, "ALLOC: %"PRIu64" %"PRIu64"\n",
624 &start, &size), ==, 2);
625
626 ASSERT(vd);
627 metaslab_t *cur =
628 vd->vdev_ms[start >> vd->vdev_ms_shift];
629 if (prev != cur) {
630 if (prev) {
631 dmu_tx_commit(tx);
632 mutex_exit(&prev->ms_lock);
633 metaslab_enable(prev, B_FALSE, B_FALSE);
634 }
635 ASSERT(cur);
636 metaslab_disable(cur);
637 mutex_enter(&cur->ms_lock);
638 metaslab_load(cur);
639 prev = cur;
640 tx = dmu_tx_create_dd(
641 spa_get_dsl(vd->vdev_spa)->dp_root_dir);
642 dmu_tx_assign(tx, DMU_TX_WAIT);
643 }
644
645 metaslab_force_alloc(cur, start, size, tx);
646 } else {
647 continue;
648 }
649 }
650 if (tx) {
651 dmu_tx_commit(tx);
652 mutex_exit(&prev->ms_lock);
653 metaslab_enable(prev, B_FALSE, B_FALSE);
654 tx = NULL;
655 prev = NULL;
656 }
657 if (line)
658 free(line);
659
660 spa_config_exit(spa, SCL_VDEV | SCL_ALLOC, FTAG);
661 spa_close(spa, FTAG);
662 }
663
664 static int
zhack_do_metaslab(int argc,char ** argv)665 zhack_do_metaslab(int argc, char **argv)
666 {
667 char *subcommand;
668
669 argc--;
670 argv++;
671 if (argc == 0) {
672 (void) fprintf(stderr,
673 "error: no metaslab operation specified\n");
674 usage();
675 }
676
677 subcommand = argv[0];
678 if (strcmp(subcommand, "leak") == 0) {
679 zhack_do_metaslab_leak(argc, argv);
680 } else {
681 (void) fprintf(stderr, "error: unknown subcommand: %s\n",
682 subcommand);
683 usage();
684 }
685
686 return (0);
687 }
688
689 #define ASHIFT_UBERBLOCK_SHIFT(ashift) \
690 MIN(MAX(ashift, UBERBLOCK_SHIFT), \
691 MAX_UBERBLOCK_SHIFT)
692 #define ASHIFT_UBERBLOCK_SIZE(ashift) \
693 (1ULL << ASHIFT_UBERBLOCK_SHIFT(ashift))
694
695 #define REPAIR_LABEL_STATUS_CKSUM (1 << 0)
696 #define REPAIR_LABEL_STATUS_UB (1 << 1)
697
698 static int
zhack_repair_read_label(const int fd,vdev_label_t * vl,const uint64_t label_offset,const int l)699 zhack_repair_read_label(const int fd, vdev_label_t *vl,
700 const uint64_t label_offset, const int l)
701 {
702 const int err = pread64(fd, vl, sizeof (vdev_label_t), label_offset);
703
704 if (err == -1) {
705 (void) fprintf(stderr,
706 "error: cannot read label %d: %s\n",
707 l, strerror(errno));
708 return (err);
709 } else if (err != sizeof (vdev_label_t)) {
710 (void) fprintf(stderr,
711 "error: bad label %d read size\n", l);
712 return (err);
713 }
714
715 return (0);
716 }
717
718 static int
zhack_repair_get_byteswap(const zio_eck_t * vdev_eck,const int l,int * byteswap)719 zhack_repair_get_byteswap(const zio_eck_t *vdev_eck, const int l, int *byteswap)
720 {
721 if (vdev_eck->zec_magic == ZEC_MAGIC) {
722 *byteswap = B_FALSE;
723 } else if (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC)) {
724 *byteswap = B_TRUE;
725 } else {
726 (void) fprintf(stderr, "error: label %d: "
727 "Expected the nvlist checksum magic number but instead got "
728 "0x%" PRIx64 "\n",
729 l, vdev_eck->zec_magic);
730 return (1);
731 }
732 return (0);
733 }
734
735 static void
zhack_repair_calc_cksum(const int byteswap,void * data,const uint64_t offset,const uint64_t abdsize,zio_eck_t * eck,zio_cksum_t * cksum)736 zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset,
737 const uint64_t abdsize, zio_eck_t *eck, zio_cksum_t *cksum)
738 {
739 zio_cksum_t verifier;
740 zio_cksum_t current_cksum;
741 zio_checksum_info_t *ci;
742 abd_t *abd;
743
744 ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0);
745
746 if (byteswap)
747 byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
748
749 current_cksum = eck->zec_cksum;
750 eck->zec_cksum = verifier;
751
752 ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL];
753 abd = abd_get_from_buf(data, abdsize);
754 ci->ci_func[byteswap](abd, abdsize, NULL, cksum);
755 abd_free(abd);
756
757 eck->zec_cksum = current_cksum;
758 }
759
760 static int
zhack_repair_get_ashift(nvlist_t * cfg,const int l,uint64_t * ashift)761 zhack_repair_get_ashift(nvlist_t *cfg, const int l, uint64_t *ashift)
762 {
763 int err;
764 nvlist_t *vdev_tree_cfg;
765
766 err = nvlist_lookup_nvlist(cfg,
767 ZPOOL_CONFIG_VDEV_TREE, &vdev_tree_cfg);
768 if (err) {
769 (void) fprintf(stderr,
770 "error: label %d: cannot find nvlist key %s\n",
771 l, ZPOOL_CONFIG_VDEV_TREE);
772 return (err);
773 }
774
775 err = nvlist_lookup_uint64(vdev_tree_cfg,
776 ZPOOL_CONFIG_ASHIFT, ashift);
777 if (err) {
778 (void) fprintf(stderr,
779 "error: label %d: cannot find nvlist key %s\n",
780 l, ZPOOL_CONFIG_ASHIFT);
781 return (err);
782 }
783
784 if (*ashift == 0) {
785 (void) fprintf(stderr,
786 "error: label %d: nvlist key %s is zero\n",
787 l, ZPOOL_CONFIG_ASHIFT);
788 return (1);
789 }
790
791 return (0);
792 }
793
794 static int
zhack_repair_undetach(uberblock_t * ub,nvlist_t * cfg,const int l)795 zhack_repair_undetach(uberblock_t *ub, nvlist_t *cfg, const int l)
796 {
797 /*
798 * Uberblock root block pointer has valid birth TXG.
799 * Copying it to the label NVlist
800 */
801 if (BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp) != 0) {
802 const uint64_t txg = BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp);
803 int err;
804
805 ub->ub_txg = txg;
806
807 err = nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG);
808 if (err) {
809 (void) fprintf(stderr,
810 "error: label %d: "
811 "Failed to remove pool creation TXG\n",
812 l);
813 return (err);
814 }
815
816 err = nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG);
817 if (err) {
818 (void) fprintf(stderr,
819 "error: label %d: Failed to remove pool TXG to "
820 "be replaced.\n",
821 l);
822 return (err);
823 }
824
825 err = nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg);
826 if (err) {
827 (void) fprintf(stderr,
828 "error: label %d: "
829 "Failed to add pool TXG of %" PRIu64 "\n",
830 l, txg);
831 return (err);
832 }
833 }
834
835 return (0);
836 }
837
838 static boolean_t
zhack_repair_write_label(const int l,const int fd,const int byteswap,void * data,zio_eck_t * eck,const uint64_t offset,const uint64_t abdsize)839 zhack_repair_write_label(const int l, const int fd, const int byteswap,
840 void *data, zio_eck_t *eck, const uint64_t offset, const uint64_t abdsize)
841 {
842 zio_cksum_t actual_cksum;
843 zhack_repair_calc_cksum(byteswap, data, offset, abdsize, eck,
844 &actual_cksum);
845 zio_cksum_t expected_cksum = eck->zec_cksum;
846 ssize_t err;
847
848 if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
849 return (B_FALSE);
850
851 eck->zec_cksum = actual_cksum;
852
853 err = pwrite64(fd, data, abdsize, offset);
854 if (err == -1) {
855 (void) fprintf(stderr, "error: cannot write label %d: %s\n",
856 l, strerror(errno));
857 return (B_FALSE);
858 } else if (err != abdsize) {
859 (void) fprintf(stderr, "error: bad write size label %d\n", l);
860 return (B_FALSE);
861 } else {
862 (void) fprintf(stderr,
863 "label %d: wrote %" PRIu64 " bytes at offset %" PRIu64 "\n",
864 l, abdsize, offset);
865 }
866
867 return (B_TRUE);
868 }
869
870 static void
zhack_repair_write_uberblock(vdev_label_t * vl,const int l,const uint64_t ashift,const int fd,const int byteswap,const uint64_t label_offset,uint32_t * labels_repaired)871 zhack_repair_write_uberblock(vdev_label_t *vl, const int l,
872 const uint64_t ashift, const int fd, const int byteswap,
873 const uint64_t label_offset, uint32_t *labels_repaired)
874 {
875 void *ub_data =
876 (char *)vl + offsetof(vdev_label_t, vl_uberblock);
877 zio_eck_t *ub_eck =
878 (zio_eck_t *)
879 ((char *)(ub_data) + (ASHIFT_UBERBLOCK_SIZE(ashift))) - 1;
880
881 if (ub_eck->zec_magic != 0) {
882 (void) fprintf(stderr,
883 "error: label %d: "
884 "Expected Uberblock checksum magic number to "
885 "be 0, but got %" PRIu64 "\n",
886 l, ub_eck->zec_magic);
887 (void) fprintf(stderr, "It would appear there's already "
888 "a checksum for the uberblock.\n");
889 return;
890 }
891
892
893 ub_eck->zec_magic = byteswap ? BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC;
894
895 if (zhack_repair_write_label(l, fd, byteswap,
896 ub_data, ub_eck,
897 label_offset + offsetof(vdev_label_t, vl_uberblock),
898 ASHIFT_UBERBLOCK_SIZE(ashift)))
899 labels_repaired[l] |= REPAIR_LABEL_STATUS_UB;
900 }
901
902 static void
zhack_repair_print_cksum(FILE * stream,const zio_cksum_t * cksum)903 zhack_repair_print_cksum(FILE *stream, const zio_cksum_t *cksum)
904 {
905 (void) fprintf(stream,
906 "%016llx:%016llx:%016llx:%016llx",
907 (u_longlong_t)cksum->zc_word[0],
908 (u_longlong_t)cksum->zc_word[1],
909 (u_longlong_t)cksum->zc_word[2],
910 (u_longlong_t)cksum->zc_word[3]);
911 }
912
913 static int
zhack_repair_test_cksum(const int byteswap,void * vdev_data,zio_eck_t * vdev_eck,const uint64_t vdev_phys_offset,const int l)914 zhack_repair_test_cksum(const int byteswap, void *vdev_data,
915 zio_eck_t *vdev_eck, const uint64_t vdev_phys_offset, const int l)
916 {
917 const zio_cksum_t expected_cksum = vdev_eck->zec_cksum;
918 zio_cksum_t actual_cksum;
919 zhack_repair_calc_cksum(byteswap, vdev_data, vdev_phys_offset,
920 VDEV_PHYS_SIZE, vdev_eck, &actual_cksum);
921 const uint64_t expected_magic = byteswap ?
922 BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC;
923 const uint64_t actual_magic = vdev_eck->zec_magic;
924 int err = 0;
925
926 if (actual_magic != expected_magic) {
927 (void) fprintf(stderr, "error: label %d: "
928 "Expected "
929 "the nvlist checksum magic number to not be %"
930 PRIu64 " not %" PRIu64 "\n",
931 l, expected_magic, actual_magic);
932 err = ECKSUM;
933 }
934 if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) {
935 (void) fprintf(stderr, "error: label %d: "
936 "Expected the nvlist checksum to be ", l);
937 (void) zhack_repair_print_cksum(stderr,
938 &expected_cksum);
939 (void) fprintf(stderr, " not ");
940 zhack_repair_print_cksum(stderr, &actual_cksum);
941 (void) fprintf(stderr, "\n");
942 err = ECKSUM;
943 }
944 return (err);
945 }
946
947 static int
zhack_repair_unpack_cfg(vdev_label_t * vl,const int l,nvlist_t ** cfg)948 zhack_repair_unpack_cfg(vdev_label_t *vl, const int l, nvlist_t **cfg)
949 {
950 const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION,
951 ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID };
952 int err;
953
954 err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist,
955 VDEV_PHYS_SIZE - sizeof (zio_eck_t), cfg, 0);
956 if (err) {
957 (void) fprintf(stderr,
958 "error: cannot unpack nvlist label %d\n", l);
959 return (err);
960 }
961
962 for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) {
963 uint64_t val;
964 err = nvlist_lookup_uint64(*cfg, cfg_keys[i], &val);
965 if (err) {
966 (void) fprintf(stderr,
967 "error: label %d, %d: "
968 "cannot find nvlist key %s\n",
969 l, i, cfg_keys[i]);
970 return (err);
971 }
972 }
973
974 return (0);
975 }
976
977 static void
zhack_repair_one_label(const zhack_repair_op_t op,const int fd,vdev_label_t * vl,const uint64_t label_offset,const int l,uint32_t * labels_repaired)978 zhack_repair_one_label(const zhack_repair_op_t op, const int fd,
979 vdev_label_t *vl, const uint64_t label_offset, const int l,
980 uint32_t *labels_repaired)
981 {
982 ssize_t err;
983 uberblock_t *ub = (uberblock_t *)vl->vl_uberblock;
984 void *vdev_data =
985 (char *)vl + offsetof(vdev_label_t, vl_vdev_phys);
986 zio_eck_t *vdev_eck =
987 (zio_eck_t *)((char *)(vdev_data) + VDEV_PHYS_SIZE) - 1;
988 const uint64_t vdev_phys_offset =
989 label_offset + offsetof(vdev_label_t, vl_vdev_phys);
990 nvlist_t *cfg;
991 uint64_t ashift;
992 int byteswap;
993
994 err = zhack_repair_read_label(fd, vl, label_offset, l);
995 if (err)
996 return;
997
998 err = zhack_repair_get_byteswap(vdev_eck, l, &byteswap);
999 if (err)
1000 return;
1001
1002 if (byteswap) {
1003 byteswap_uint64_array(&vdev_eck->zec_cksum,
1004 sizeof (zio_cksum_t));
1005 vdev_eck->zec_magic = BSWAP_64(vdev_eck->zec_magic);
1006 }
1007
1008 if ((op & ZHACK_REPAIR_OP_CKSUM) == 0 &&
1009 zhack_repair_test_cksum(byteswap, vdev_data, vdev_eck,
1010 vdev_phys_offset, l) != 0) {
1011 (void) fprintf(stderr, "It would appear checksums are "
1012 "corrupted. Try zhack repair label -c <device>\n");
1013 return;
1014 }
1015
1016 err = zhack_repair_unpack_cfg(vl, l, &cfg);
1017 if (err)
1018 return;
1019
1020 if ((op & ZHACK_REPAIR_OP_UNDETACH) != 0) {
1021 char *buf;
1022 size_t buflen;
1023
1024 if (ub->ub_txg != 0) {
1025 (void) fprintf(stderr,
1026 "error: label %d: UB TXG of 0 expected, but got %"
1027 PRIu64 "\n", l, ub->ub_txg);
1028 (void) fprintf(stderr, "It would appear the device was "
1029 "not properly detached.\n");
1030 return;
1031 }
1032
1033 err = zhack_repair_get_ashift(cfg, l, &ashift);
1034 if (err)
1035 return;
1036
1037 err = zhack_repair_undetach(ub, cfg, l);
1038 if (err)
1039 return;
1040
1041 buf = vl->vl_vdev_phys.vp_nvlist;
1042 buflen = VDEV_PHYS_SIZE - sizeof (zio_eck_t);
1043 if (nvlist_pack(cfg, &buf, &buflen, NV_ENCODE_XDR, 0) != 0) {
1044 (void) fprintf(stderr,
1045 "error: label %d: Failed to pack nvlist\n", l);
1046 return;
1047 }
1048
1049 zhack_repair_write_uberblock(vl,
1050 l, ashift, fd, byteswap, label_offset, labels_repaired);
1051 }
1052
1053 if (zhack_repair_write_label(l, fd, byteswap, vdev_data, vdev_eck,
1054 vdev_phys_offset, VDEV_PHYS_SIZE))
1055 labels_repaired[l] |= REPAIR_LABEL_STATUS_CKSUM;
1056
1057 fsync(fd);
1058 }
1059
1060 static const char *
zhack_repair_label_status(const uint32_t label_status,const uint32_t to_check)1061 zhack_repair_label_status(const uint32_t label_status,
1062 const uint32_t to_check)
1063 {
1064 return ((label_status & to_check) != 0 ? "repaired" : "skipped");
1065 }
1066
1067 static int
zhack_label_repair(const zhack_repair_op_t op,const int argc,char ** argv)1068 zhack_label_repair(const zhack_repair_op_t op, const int argc, char **argv)
1069 {
1070 uint32_t labels_repaired[VDEV_LABELS] = {0};
1071 vdev_label_t labels[VDEV_LABELS] = {{{0}}};
1072 struct stat64 st;
1073 int fd;
1074 off_t filesize;
1075 uint32_t repaired = 0;
1076
1077 abd_init();
1078
1079 if (argc < 1) {
1080 (void) fprintf(stderr, "error: missing device\n");
1081 usage();
1082 }
1083
1084 if ((fd = open(argv[0], O_RDWR)) == -1)
1085 fatal(NULL, FTAG, "cannot open '%s': %s", argv[0],
1086 strerror(errno));
1087
1088 if (fstat64_blk(fd, &st) != 0)
1089 fatal(NULL, FTAG, "cannot stat '%s': %s", argv[0],
1090 strerror(errno));
1091
1092 filesize = st.st_size;
1093 (void) fprintf(stderr, "Calculated filesize to be %jd\n",
1094 (intmax_t)filesize);
1095
1096 if (filesize % sizeof (vdev_label_t) != 0)
1097 filesize =
1098 (filesize / sizeof (vdev_label_t)) * sizeof (vdev_label_t);
1099
1100 for (int l = 0; l < VDEV_LABELS; l++) {
1101 zhack_repair_one_label(op, fd, &labels[l],
1102 vdev_label_offset(filesize, l, 0), l, labels_repaired);
1103 }
1104
1105 close(fd);
1106
1107 abd_fini();
1108
1109 for (int l = 0; l < VDEV_LABELS; l++) {
1110 const uint32_t lr = labels_repaired[l];
1111 (void) printf("label %d: ", l);
1112 (void) printf("uberblock: %s ",
1113 zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_UB));
1114 (void) printf("checksum: %s\n",
1115 zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_CKSUM));
1116 repaired |= lr;
1117 }
1118
1119 if (repaired > 0)
1120 return (0);
1121
1122 return (1);
1123 }
1124
1125 static int
zhack_do_label_repair(int argc,char ** argv)1126 zhack_do_label_repair(int argc, char **argv)
1127 {
1128 zhack_repair_op_t op = ZHACK_REPAIR_OP_UNKNOWN;
1129 int c;
1130
1131 optind = 1;
1132 while ((c = getopt(argc, argv, "+cu")) != -1) {
1133 switch (c) {
1134 case 'c':
1135 op |= ZHACK_REPAIR_OP_CKSUM;
1136 break;
1137 case 'u':
1138 op |= ZHACK_REPAIR_OP_UNDETACH;
1139 break;
1140 default:
1141 usage();
1142 break;
1143 }
1144 }
1145
1146 argc -= optind;
1147 argv += optind;
1148
1149 if (op == ZHACK_REPAIR_OP_UNKNOWN)
1150 op = ZHACK_REPAIR_OP_CKSUM;
1151
1152 return (zhack_label_repair(op, argc, argv));
1153 }
1154
1155 static int
zhack_do_label(int argc,char ** argv)1156 zhack_do_label(int argc, char **argv)
1157 {
1158 char *subcommand;
1159 int err;
1160
1161 argc--;
1162 argv++;
1163 if (argc == 0) {
1164 (void) fprintf(stderr,
1165 "error: no label operation specified\n");
1166 usage();
1167 }
1168
1169 subcommand = argv[0];
1170 if (strcmp(subcommand, "repair") == 0) {
1171 err = zhack_do_label_repair(argc, argv);
1172 } else {
1173 (void) fprintf(stderr, "error: unknown subcommand: %s\n",
1174 subcommand);
1175 usage();
1176 }
1177
1178 return (err);
1179 }
1180
1181 #define MAX_NUM_PATHS 1024
1182
1183 int
main(int argc,char ** argv)1184 main(int argc, char **argv)
1185 {
1186 char *path[MAX_NUM_PATHS];
1187 const char *subcommand;
1188 int rv = 0;
1189 int c;
1190
1191 g_importargs.path = path;
1192
1193 dprintf_setup(&argc, argv);
1194 zfs_prop_init();
1195
1196 while ((c = getopt(argc, argv, "+c:d:o:")) != -1) {
1197 switch (c) {
1198 case 'c':
1199 g_importargs.cachefile = optarg;
1200 break;
1201 case 'd':
1202 assert(g_importargs.paths < MAX_NUM_PATHS);
1203 g_importargs.path[g_importargs.paths++] = optarg;
1204 break;
1205 case 'o':
1206 if (handle_tunable_option(optarg, B_FALSE) != 0)
1207 exit(1);
1208 break;
1209 default:
1210 usage();
1211 break;
1212 }
1213 }
1214
1215 argc -= optind;
1216 argv += optind;
1217 optind = 1;
1218
1219 if (argc == 0) {
1220 (void) fprintf(stderr, "error: no command specified\n");
1221 usage();
1222 }
1223
1224 subcommand = argv[0];
1225
1226 if (strcmp(subcommand, "feature") == 0) {
1227 rv = zhack_do_feature(argc, argv);
1228 } else if (strcmp(subcommand, "label") == 0) {
1229 return (zhack_do_label(argc, argv));
1230 } else if (strcmp(subcommand, "metaslab") == 0) {
1231 rv = zhack_do_metaslab(argc, argv);
1232 } else {
1233 (void) fprintf(stderr, "error: unknown subcommand: %s\n",
1234 subcommand);
1235 usage();
1236 }
1237
1238 if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_FALSE) != 0) {
1239 fatal(NULL, FTAG, "pool export failed; "
1240 "changes may not be committed to disk\n");
1241 }
1242
1243 kernel_fini();
1244
1245 return (rv);
1246 }
1247