1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23 /*
24 * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25 * Copyright (c) 2013 Steven Hartland. All rights reserved.
26 */
27
28 /*
29 * zhack is a debugging tool that can write changes to ZFS pool using libzpool
30 * for testing purposes. Altering pools with zhack is unsupported and may
31 * result in corrupted pools.
32 */
33
34 #include <zfs_prop.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <ctype.h>
38 #include <sys/stat.h>
39 #include <sys/zfs_context.h>
40 #include <sys/spa.h>
41 #include <sys/spa_impl.h>
42 #include <sys/dmu.h>
43 #include <sys/zap.h>
44 #include <sys/zfs_znode.h>
45 #include <sys/dsl_synctask.h>
46 #include <sys/vdev.h>
47 #include <sys/vdev_impl.h>
48 #include <sys/fs/zfs.h>
49 #include <sys/dmu_objset.h>
50 #include <sys/dsl_pool.h>
51 #include <sys/zio_checksum.h>
52 #include <sys/zio_compress.h>
53 #include <sys/zfeature.h>
54 #include <sys/dmu_tx.h>
55 #include <zfeature_common.h>
56 #include <libzutil.h>
57 #include <sys/metaslab_impl.h>
58
59 static importargs_t g_importargs;
60 static char *g_pool;
61 static boolean_t g_readonly;
62
63 typedef enum {
64 ZHACK_REPAIR_OP_UNKNOWN = 0,
65 ZHACK_REPAIR_OP_CKSUM = (1 << 0),
66 ZHACK_REPAIR_OP_UNDETACH = (1 << 1)
67 } zhack_repair_op_t;
68
69 static __attribute__((noreturn)) void
usage(void)70 usage(void)
71 {
72 (void) fprintf(stderr,
73 "Usage: zhack [-o tunable] [-c cachefile] [-d dir] <subcommand> "
74 "<args> ...\n"
75 "where <subcommand> <args> is one of the following:\n"
76 "\n");
77
78 (void) fprintf(stderr,
79 " feature stat <pool>\n"
80 " print information about enabled features\n"
81 " feature enable [-r] [-d desc] <pool> <feature>\n"
82 " add a new enabled feature to the pool\n"
83 " -d <desc> sets the feature's description\n"
84 " -r set read-only compatible flag for feature\n"
85 " feature ref [-md] <pool> <feature>\n"
86 " change the refcount on the given feature\n"
87 " -d decrease instead of increase the refcount\n"
88 " -m add the feature to the label if increasing refcount\n"
89 "\n"
90 " <feature> : should be a feature guid\n"
91 "\n"
92 " label repair <device>\n"
93 " repair labels of a specified device according to options\n"
94 " which may be combined to do their functions in one call\n"
95 " -c repair corrupted label checksums\n"
96 " -u restore the label on a detached device\n"
97 "\n"
98 " <device> : path to vdev\n"
99 "\n"
100 " metaslab leak <pool>\n"
101 " apply allocation map from zdb to specified pool\n");
102 exit(1);
103 }
104
105
106 static __attribute__((format(printf, 3, 4))) __attribute__((noreturn)) void
fatal(spa_t * spa,const void * tag,const char * fmt,...)107 fatal(spa_t *spa, const void *tag, const char *fmt, ...)
108 {
109 va_list ap;
110
111 if (spa != NULL) {
112 spa_close(spa, tag);
113 (void) spa_export(g_pool, NULL, B_TRUE, B_FALSE);
114 }
115
116 va_start(ap, fmt);
117 (void) fputs("zhack: ", stderr);
118 (void) vfprintf(stderr, fmt, ap);
119 va_end(ap);
120 (void) fputc('\n', stderr);
121
122 exit(1);
123 }
124
125 static int
space_delta_cb(dmu_object_type_t bonustype,const void * data,zfs_file_info_t * zoi)126 space_delta_cb(dmu_object_type_t bonustype, const void *data,
127 zfs_file_info_t *zoi)
128 {
129 (void) data, (void) zoi;
130
131 /*
132 * Is it a valid type of object to track?
133 */
134 if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
135 return (ENOENT);
136 (void) fprintf(stderr, "modifying object that needs user accounting");
137 abort();
138 }
139
140 /*
141 * Target is the dataset whose pool we want to open.
142 */
143 static void
zhack_import(char * target,boolean_t readonly)144 zhack_import(char *target, boolean_t readonly)
145 {
146 nvlist_t *config;
147 nvlist_t *props;
148 int error;
149
150 kernel_init(readonly ? SPA_MODE_READ :
151 (SPA_MODE_READ | SPA_MODE_WRITE));
152
153 dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb);
154
155 g_readonly = readonly;
156 g_importargs.can_be_active = readonly;
157 g_pool = strdup(target);
158
159 libpc_handle_t lpch = {
160 .lpc_lib_handle = NULL,
161 .lpc_ops = &libzpool_config_ops,
162 .lpc_printerr = B_TRUE
163 };
164 error = zpool_find_config(&lpch, target, &config, &g_importargs);
165 if (error)
166 fatal(NULL, FTAG, "cannot import '%s'", target);
167
168 props = NULL;
169 if (readonly) {
170 VERIFY0(nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
171 VERIFY0(nvlist_add_uint64(props,
172 zpool_prop_to_name(ZPOOL_PROP_READONLY), 1));
173 }
174
175 zfeature_checks_disable = B_TRUE;
176 error = spa_import(target, config, props,
177 (readonly ? ZFS_IMPORT_SKIP_MMP : ZFS_IMPORT_NORMAL));
178 fnvlist_free(config);
179 zfeature_checks_disable = B_FALSE;
180 if (error == EEXIST)
181 error = 0;
182
183 if (error)
184 fatal(NULL, FTAG, "can't import '%s': %s", target,
185 strerror(error));
186 }
187
188 static void
zhack_spa_open(char * target,boolean_t readonly,const void * tag,spa_t ** spa)189 zhack_spa_open(char *target, boolean_t readonly, const void *tag, spa_t **spa)
190 {
191 int err;
192
193 zhack_import(target, readonly);
194
195 zfeature_checks_disable = B_TRUE;
196 err = spa_open(target, spa, tag);
197 zfeature_checks_disable = B_FALSE;
198
199 if (err != 0)
200 fatal(*spa, FTAG, "cannot open '%s': %s", target,
201 strerror(err));
202 if (spa_version(*spa) < SPA_VERSION_FEATURES) {
203 fatal(*spa, FTAG, "'%s' has version %d, features not enabled",
204 target, (int)spa_version(*spa));
205 }
206 }
207
208 static void
dump_obj(objset_t * os,uint64_t obj,const char * name)209 dump_obj(objset_t *os, uint64_t obj, const char *name)
210 {
211 zap_cursor_t zc;
212 zap_attribute_t *za = zap_attribute_long_alloc();
213
214 (void) printf("%s_obj:\n", name);
215
216 for (zap_cursor_init(&zc, os, obj);
217 zap_cursor_retrieve(&zc, za) == 0;
218 zap_cursor_advance(&zc)) {
219 if (za->za_integer_length == 8) {
220 ASSERT(za->za_num_integers == 1);
221 (void) printf("\t%s = %llu\n",
222 za->za_name, (u_longlong_t)za->za_first_integer);
223 } else {
224 ASSERT(za->za_integer_length == 1);
225 char val[1024];
226 VERIFY0(zap_lookup(os, obj, za->za_name,
227 1, sizeof (val), val));
228 (void) printf("\t%s = %s\n", za->za_name, val);
229 }
230 }
231 zap_cursor_fini(&zc);
232 zap_attribute_free(za);
233 }
234
235 static void
dump_mos(spa_t * spa)236 dump_mos(spa_t *spa)
237 {
238 nvlist_t *nv = spa->spa_label_features;
239 nvpair_t *pair;
240
241 (void) printf("label config:\n");
242 for (pair = nvlist_next_nvpair(nv, NULL);
243 pair != NULL;
244 pair = nvlist_next_nvpair(nv, pair)) {
245 (void) printf("\t%s\n", nvpair_name(pair));
246 }
247 }
248
249 static void
zhack_do_feature_stat(int argc,char ** argv)250 zhack_do_feature_stat(int argc, char **argv)
251 {
252 spa_t *spa;
253 objset_t *os;
254 char *target;
255
256 argc--;
257 argv++;
258
259 if (argc < 1) {
260 (void) fprintf(stderr, "error: missing pool name\n");
261 usage();
262 }
263 target = argv[0];
264
265 zhack_spa_open(target, B_TRUE, FTAG, &spa);
266 os = spa->spa_meta_objset;
267
268 dump_obj(os, spa->spa_feat_for_read_obj, "for_read");
269 dump_obj(os, spa->spa_feat_for_write_obj, "for_write");
270 dump_obj(os, spa->spa_feat_desc_obj, "descriptions");
271 if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) {
272 dump_obj(os, spa->spa_feat_enabled_txg_obj, "enabled_txg");
273 }
274 dump_mos(spa);
275
276 spa_close(spa, FTAG);
277 }
278
279 static void
zhack_feature_enable_sync(void * arg,dmu_tx_t * tx)280 zhack_feature_enable_sync(void *arg, dmu_tx_t *tx)
281 {
282 spa_t *spa = dmu_tx_pool(tx)->dp_spa;
283 zfeature_info_t *feature = arg;
284
285 feature_enable_sync(spa, feature, tx);
286
287 spa_history_log_internal(spa, "zhack enable feature", tx,
288 "name=%s flags=%u",
289 feature->fi_guid, feature->fi_flags);
290 }
291
292 static void
zhack_do_feature_enable(int argc,char ** argv)293 zhack_do_feature_enable(int argc, char **argv)
294 {
295 int c;
296 char *desc, *target;
297 spa_t *spa;
298 objset_t *mos;
299 zfeature_info_t feature;
300 const spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
301
302 /*
303 * Features are not added to the pool's label until their refcounts
304 * are incremented, so fi_mos can just be left as false for now.
305 */
306 desc = NULL;
307 feature.fi_uname = "zhack";
308 feature.fi_flags = 0;
309 feature.fi_depends = nodeps;
310 feature.fi_feature = SPA_FEATURE_NONE;
311
312 optind = 1;
313 while ((c = getopt(argc, argv, "+rd:")) != -1) {
314 switch (c) {
315 case 'r':
316 feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
317 break;
318 case 'd':
319 if (desc != NULL)
320 free(desc);
321 desc = strdup(optarg);
322 break;
323 default:
324 usage();
325 break;
326 }
327 }
328
329 if (desc == NULL)
330 desc = strdup("zhack injected");
331 feature.fi_desc = desc;
332
333 argc -= optind;
334 argv += optind;
335
336 if (argc < 2) {
337 (void) fprintf(stderr, "error: missing feature or pool name\n");
338 usage();
339 }
340 target = argv[0];
341 feature.fi_guid = argv[1];
342
343 if (!zfeature_is_valid_guid(feature.fi_guid))
344 fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid);
345
346 zhack_spa_open(target, B_FALSE, FTAG, &spa);
347 mos = spa->spa_meta_objset;
348
349 if (zfeature_is_supported(feature.fi_guid))
350 fatal(spa, FTAG, "'%s' is a real feature, will not enable",
351 feature.fi_guid);
352 if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid))
353 fatal(spa, FTAG, "feature already enabled: %s",
354 feature.fi_guid);
355
356 VERIFY0(dsl_sync_task(spa_name(spa), NULL,
357 zhack_feature_enable_sync, &feature, 5, ZFS_SPACE_CHECK_NORMAL));
358
359 spa_close(spa, FTAG);
360
361 free(desc);
362 }
363
364 static void
feature_incr_sync(void * arg,dmu_tx_t * tx)365 feature_incr_sync(void *arg, dmu_tx_t *tx)
366 {
367 spa_t *spa = dmu_tx_pool(tx)->dp_spa;
368 zfeature_info_t *feature = arg;
369 uint64_t refcount;
370
371 mutex_enter(&spa->spa_feat_stats_lock);
372 VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
373 feature_sync(spa, feature, refcount + 1, tx);
374 spa_history_log_internal(spa, "zhack feature incr", tx,
375 "name=%s", feature->fi_guid);
376 mutex_exit(&spa->spa_feat_stats_lock);
377 }
378
379 static void
feature_decr_sync(void * arg,dmu_tx_t * tx)380 feature_decr_sync(void *arg, dmu_tx_t *tx)
381 {
382 spa_t *spa = dmu_tx_pool(tx)->dp_spa;
383 zfeature_info_t *feature = arg;
384 uint64_t refcount;
385
386 mutex_enter(&spa->spa_feat_stats_lock);
387 VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
388 feature_sync(spa, feature, refcount - 1, tx);
389 spa_history_log_internal(spa, "zhack feature decr", tx,
390 "name=%s", feature->fi_guid);
391 mutex_exit(&spa->spa_feat_stats_lock);
392 }
393
394 static void
zhack_do_feature_ref(int argc,char ** argv)395 zhack_do_feature_ref(int argc, char **argv)
396 {
397 int c;
398 char *target;
399 boolean_t decr = B_FALSE;
400 spa_t *spa;
401 objset_t *mos;
402 zfeature_info_t feature;
403 const spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
404
405 /*
406 * fi_desc does not matter here because it was written to disk
407 * when the feature was enabled, but we need to properly set the
408 * feature for read or write based on the information we read off
409 * disk later.
410 */
411 feature.fi_uname = "zhack";
412 feature.fi_flags = 0;
413 feature.fi_desc = NULL;
414 feature.fi_depends = nodeps;
415 feature.fi_feature = SPA_FEATURE_NONE;
416
417 optind = 1;
418 while ((c = getopt(argc, argv, "+md")) != -1) {
419 switch (c) {
420 case 'm':
421 feature.fi_flags |= ZFEATURE_FLAG_MOS;
422 break;
423 case 'd':
424 decr = B_TRUE;
425 break;
426 default:
427 usage();
428 break;
429 }
430 }
431 argc -= optind;
432 argv += optind;
433
434 if (argc < 2) {
435 (void) fprintf(stderr, "error: missing feature or pool name\n");
436 usage();
437 }
438 target = argv[0];
439 feature.fi_guid = argv[1];
440
441 if (!zfeature_is_valid_guid(feature.fi_guid))
442 fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid);
443
444 zhack_spa_open(target, B_FALSE, FTAG, &spa);
445 mos = spa->spa_meta_objset;
446
447 if (zfeature_is_supported(feature.fi_guid)) {
448 fatal(spa, FTAG,
449 "'%s' is a real feature, will not change refcount",
450 feature.fi_guid);
451 }
452
453 if (0 == zap_contains(mos, spa->spa_feat_for_read_obj,
454 feature.fi_guid)) {
455 feature.fi_flags &= ~ZFEATURE_FLAG_READONLY_COMPAT;
456 } else if (0 == zap_contains(mos, spa->spa_feat_for_write_obj,
457 feature.fi_guid)) {
458 feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
459 } else {
460 fatal(spa, FTAG, "feature is not enabled: %s", feature.fi_guid);
461 }
462
463 if (decr) {
464 uint64_t count;
465 if (feature_get_refcount_from_disk(spa, &feature,
466 &count) == 0 && count == 0) {
467 fatal(spa, FTAG, "feature refcount already 0: %s",
468 feature.fi_guid);
469 }
470 }
471
472 VERIFY0(dsl_sync_task(spa_name(spa), NULL,
473 decr ? feature_decr_sync : feature_incr_sync, &feature,
474 5, ZFS_SPACE_CHECK_NORMAL));
475
476 spa_close(spa, FTAG);
477 }
478
479 static int
zhack_do_feature(int argc,char ** argv)480 zhack_do_feature(int argc, char **argv)
481 {
482 char *subcommand;
483
484 argc--;
485 argv++;
486 if (argc == 0) {
487 (void) fprintf(stderr,
488 "error: no feature operation specified\n");
489 usage();
490 }
491
492 subcommand = argv[0];
493 if (strcmp(subcommand, "stat") == 0) {
494 zhack_do_feature_stat(argc, argv);
495 } else if (strcmp(subcommand, "enable") == 0) {
496 zhack_do_feature_enable(argc, argv);
497 } else if (strcmp(subcommand, "ref") == 0) {
498 zhack_do_feature_ref(argc, argv);
499 } else {
500 (void) fprintf(stderr, "error: unknown subcommand: %s\n",
501 subcommand);
502 usage();
503 }
504
505 return (0);
506 }
507
508 static boolean_t
strstarts(const char * a,const char * b)509 strstarts(const char *a, const char *b)
510 {
511 return (strncmp(a, b, strlen(b)) == 0);
512 }
513
514 static void
metaslab_force_alloc(metaslab_t * msp,uint64_t start,uint64_t size,dmu_tx_t * tx)515 metaslab_force_alloc(metaslab_t *msp, uint64_t start, uint64_t size,
516 dmu_tx_t *tx)
517 {
518 ASSERT(msp->ms_disabled);
519 ASSERT(MUTEX_HELD(&msp->ms_lock));
520 uint64_t txg = dmu_tx_get_txg(tx);
521
522 uint64_t off = start;
523 while (off < start + size) {
524 uint64_t ostart, osize;
525 boolean_t found = zfs_range_tree_find_in(msp->ms_allocatable,
526 off, start + size - off, &ostart, &osize);
527 if (!found)
528 break;
529 zfs_range_tree_remove(msp->ms_allocatable, ostart, osize);
530
531 if (zfs_range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK]))
532 vdev_dirty(msp->ms_group->mg_vd, VDD_METASLAB, msp,
533 txg);
534
535 zfs_range_tree_add(msp->ms_allocating[txg & TXG_MASK], ostart,
536 osize);
537 msp->ms_allocating_total += osize;
538 off = ostart + osize;
539 }
540 }
541
542 static void
zhack_do_metaslab_leak(int argc,char ** argv)543 zhack_do_metaslab_leak(int argc, char **argv)
544 {
545 int c;
546 char *target;
547 spa_t *spa;
548
549 optind = 1;
550 boolean_t force = B_FALSE;
551 while ((c = getopt(argc, argv, "f")) != -1) {
552 switch (c) {
553 case 'f':
554 force = B_TRUE;
555 break;
556 default:
557 usage();
558 break;
559 }
560 }
561
562 argc -= optind;
563 argv += optind;
564
565 if (argc < 1) {
566 (void) fprintf(stderr, "error: missing pool name\n");
567 usage();
568 }
569 target = argv[0];
570
571 zhack_spa_open(target, B_FALSE, FTAG, &spa);
572 spa_config_enter(spa, SCL_VDEV | SCL_ALLOC, FTAG, RW_READER);
573
574 char *line = NULL;
575 size_t cap = 0;
576
577 vdev_t *vd = NULL;
578 metaslab_t *prev = NULL;
579 dmu_tx_t *tx = NULL;
580 while (getline(&line, &cap, stdin) > 0) {
581 if (strstarts(line, "\tvdev ")) {
582 uint64_t vdev_id, ms_shift;
583 if (sscanf(line,
584 "\tvdev %10"PRIu64"\t%*s metaslab shift %4"PRIu64,
585 &vdev_id, &ms_shift) == 1) {
586 VERIFY3U(sscanf(line, "\tvdev %"PRIu64
587 "\t metaslab shift %4"PRIu64,
588 &vdev_id, &ms_shift), ==, 2);
589 }
590 vd = vdev_lookup_top(spa, vdev_id);
591 if (vd == NULL) {
592 fprintf(stderr, "error: no such vdev with "
593 "id %"PRIu64"\n", vdev_id);
594 break;
595 }
596 if (tx) {
597 dmu_tx_commit(tx);
598 mutex_exit(&prev->ms_lock);
599 metaslab_enable(prev, B_FALSE, B_FALSE);
600 tx = NULL;
601 prev = NULL;
602 }
603 if (vd->vdev_ms_shift != ms_shift) {
604 fprintf(stderr, "error: ms_shift mismatch: %"
605 PRIu64" != %"PRIu64"\n", vd->vdev_ms_shift,
606 ms_shift);
607 break;
608 }
609 } else if (strstarts(line, "\tmetaslabs ")) {
610 uint64_t ms_count;
611 VERIFY3U(sscanf(line, "\tmetaslabs %"PRIu64, &ms_count),
612 ==, 1);
613 ASSERT(vd);
614 if (!force && vd->vdev_ms_count != ms_count) {
615 fprintf(stderr, "error: ms_count mismatch: %"
616 PRIu64" != %"PRIu64"\n", vd->vdev_ms_count,
617 ms_count);
618 break;
619 }
620 } else if (strstarts(line, "ALLOC:")) {
621 uint64_t start, size;
622 VERIFY3U(sscanf(line, "ALLOC: %"PRIu64" %"PRIu64"\n",
623 &start, &size), ==, 2);
624
625 ASSERT(vd);
626 metaslab_t *cur =
627 vd->vdev_ms[start >> vd->vdev_ms_shift];
628 if (prev != cur) {
629 if (prev) {
630 dmu_tx_commit(tx);
631 mutex_exit(&prev->ms_lock);
632 metaslab_enable(prev, B_FALSE, B_FALSE);
633 }
634 ASSERT(cur);
635 metaslab_disable(cur);
636 mutex_enter(&cur->ms_lock);
637 metaslab_load(cur);
638 prev = cur;
639 tx = dmu_tx_create_dd(
640 spa_get_dsl(vd->vdev_spa)->dp_root_dir);
641 dmu_tx_assign(tx, DMU_TX_WAIT);
642 }
643
644 metaslab_force_alloc(cur, start, size, tx);
645 } else {
646 continue;
647 }
648 }
649 if (tx) {
650 dmu_tx_commit(tx);
651 mutex_exit(&prev->ms_lock);
652 metaslab_enable(prev, B_FALSE, B_FALSE);
653 tx = NULL;
654 prev = NULL;
655 }
656 if (line)
657 free(line);
658
659 spa_config_exit(spa, SCL_VDEV | SCL_ALLOC, FTAG);
660 spa_close(spa, FTAG);
661 }
662
663 static int
zhack_do_metaslab(int argc,char ** argv)664 zhack_do_metaslab(int argc, char **argv)
665 {
666 char *subcommand;
667
668 argc--;
669 argv++;
670 if (argc == 0) {
671 (void) fprintf(stderr,
672 "error: no metaslab operation specified\n");
673 usage();
674 }
675
676 subcommand = argv[0];
677 if (strcmp(subcommand, "leak") == 0) {
678 zhack_do_metaslab_leak(argc, argv);
679 } else {
680 (void) fprintf(stderr, "error: unknown subcommand: %s\n",
681 subcommand);
682 usage();
683 }
684
685 return (0);
686 }
687
688 #define ASHIFT_UBERBLOCK_SHIFT(ashift) \
689 MIN(MAX(ashift, UBERBLOCK_SHIFT), \
690 MAX_UBERBLOCK_SHIFT)
691 #define ASHIFT_UBERBLOCK_SIZE(ashift) \
692 (1ULL << ASHIFT_UBERBLOCK_SHIFT(ashift))
693
694 #define REPAIR_LABEL_STATUS_CKSUM (1 << 0)
695 #define REPAIR_LABEL_STATUS_UB (1 << 1)
696
697 static int
zhack_repair_read_label(const int fd,vdev_label_t * vl,const uint64_t label_offset,const int l)698 zhack_repair_read_label(const int fd, vdev_label_t *vl,
699 const uint64_t label_offset, const int l)
700 {
701 const int err = pread64(fd, vl, sizeof (vdev_label_t), label_offset);
702
703 if (err == -1) {
704 (void) fprintf(stderr,
705 "error: cannot read label %d: %s\n",
706 l, strerror(errno));
707 return (err);
708 } else if (err != sizeof (vdev_label_t)) {
709 (void) fprintf(stderr,
710 "error: bad label %d read size\n", l);
711 return (err);
712 }
713
714 return (0);
715 }
716
717 static int
zhack_repair_get_byteswap(const zio_eck_t * vdev_eck,const int l,int * byteswap)718 zhack_repair_get_byteswap(const zio_eck_t *vdev_eck, const int l, int *byteswap)
719 {
720 if (vdev_eck->zec_magic == ZEC_MAGIC) {
721 *byteswap = B_FALSE;
722 } else if (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC)) {
723 *byteswap = B_TRUE;
724 } else {
725 (void) fprintf(stderr, "error: label %d: "
726 "Expected the nvlist checksum magic number but instead got "
727 "0x%" PRIx64 "\n",
728 l, vdev_eck->zec_magic);
729 return (1);
730 }
731 return (0);
732 }
733
734 static void
zhack_repair_calc_cksum(const int byteswap,void * data,const uint64_t offset,const uint64_t abdsize,zio_eck_t * eck,zio_cksum_t * cksum)735 zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset,
736 const uint64_t abdsize, zio_eck_t *eck, zio_cksum_t *cksum)
737 {
738 zio_cksum_t verifier;
739 zio_cksum_t current_cksum;
740 zio_checksum_info_t *ci;
741 abd_t *abd;
742
743 ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0);
744
745 if (byteswap)
746 byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
747
748 current_cksum = eck->zec_cksum;
749 eck->zec_cksum = verifier;
750
751 ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL];
752 abd = abd_get_from_buf(data, abdsize);
753 ci->ci_func[byteswap](abd, abdsize, NULL, cksum);
754 abd_free(abd);
755
756 eck->zec_cksum = current_cksum;
757 }
758
759 static int
zhack_repair_get_ashift(nvlist_t * cfg,const int l,uint64_t * ashift)760 zhack_repair_get_ashift(nvlist_t *cfg, const int l, uint64_t *ashift)
761 {
762 int err;
763 nvlist_t *vdev_tree_cfg;
764
765 err = nvlist_lookup_nvlist(cfg,
766 ZPOOL_CONFIG_VDEV_TREE, &vdev_tree_cfg);
767 if (err) {
768 (void) fprintf(stderr,
769 "error: label %d: cannot find nvlist key %s\n",
770 l, ZPOOL_CONFIG_VDEV_TREE);
771 return (err);
772 }
773
774 err = nvlist_lookup_uint64(vdev_tree_cfg,
775 ZPOOL_CONFIG_ASHIFT, ashift);
776 if (err) {
777 (void) fprintf(stderr,
778 "error: label %d: cannot find nvlist key %s\n",
779 l, ZPOOL_CONFIG_ASHIFT);
780 return (err);
781 }
782
783 if (*ashift == 0) {
784 (void) fprintf(stderr,
785 "error: label %d: nvlist key %s is zero\n",
786 l, ZPOOL_CONFIG_ASHIFT);
787 return (1);
788 }
789
790 return (0);
791 }
792
793 static int
zhack_repair_undetach(uberblock_t * ub,nvlist_t * cfg,const int l)794 zhack_repair_undetach(uberblock_t *ub, nvlist_t *cfg, const int l)
795 {
796 /*
797 * Uberblock root block pointer has valid birth TXG.
798 * Copying it to the label NVlist
799 */
800 if (BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp) != 0) {
801 const uint64_t txg = BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp);
802 int err;
803
804 ub->ub_txg = txg;
805
806 err = nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG);
807 if (err) {
808 (void) fprintf(stderr,
809 "error: label %d: "
810 "Failed to remove pool creation TXG\n",
811 l);
812 return (err);
813 }
814
815 err = nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG);
816 if (err) {
817 (void) fprintf(stderr,
818 "error: label %d: Failed to remove pool TXG to "
819 "be replaced.\n",
820 l);
821 return (err);
822 }
823
824 err = nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg);
825 if (err) {
826 (void) fprintf(stderr,
827 "error: label %d: "
828 "Failed to add pool TXG of %" PRIu64 "\n",
829 l, txg);
830 return (err);
831 }
832 }
833
834 return (0);
835 }
836
837 static boolean_t
zhack_repair_write_label(const int l,const int fd,const int byteswap,void * data,zio_eck_t * eck,const uint64_t offset,const uint64_t abdsize)838 zhack_repair_write_label(const int l, const int fd, const int byteswap,
839 void *data, zio_eck_t *eck, const uint64_t offset, const uint64_t abdsize)
840 {
841 zio_cksum_t actual_cksum;
842 zhack_repair_calc_cksum(byteswap, data, offset, abdsize, eck,
843 &actual_cksum);
844 zio_cksum_t expected_cksum = eck->zec_cksum;
845 ssize_t err;
846
847 if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
848 return (B_FALSE);
849
850 eck->zec_cksum = actual_cksum;
851
852 err = pwrite64(fd, data, abdsize, offset);
853 if (err == -1) {
854 (void) fprintf(stderr, "error: cannot write label %d: %s\n",
855 l, strerror(errno));
856 return (B_FALSE);
857 } else if (err != abdsize) {
858 (void) fprintf(stderr, "error: bad write size label %d\n", l);
859 return (B_FALSE);
860 } else {
861 (void) fprintf(stderr,
862 "label %d: wrote %" PRIu64 " bytes at offset %" PRIu64 "\n",
863 l, abdsize, offset);
864 }
865
866 return (B_TRUE);
867 }
868
869 static void
zhack_repair_write_uberblock(vdev_label_t * vl,const int l,const uint64_t ashift,const int fd,const int byteswap,const uint64_t label_offset,uint32_t * labels_repaired)870 zhack_repair_write_uberblock(vdev_label_t *vl, const int l,
871 const uint64_t ashift, const int fd, const int byteswap,
872 const uint64_t label_offset, uint32_t *labels_repaired)
873 {
874 void *ub_data =
875 (char *)vl + offsetof(vdev_label_t, vl_uberblock);
876 zio_eck_t *ub_eck =
877 (zio_eck_t *)
878 ((char *)(ub_data) + (ASHIFT_UBERBLOCK_SIZE(ashift))) - 1;
879
880 if (ub_eck->zec_magic != 0) {
881 (void) fprintf(stderr,
882 "error: label %d: "
883 "Expected Uberblock checksum magic number to "
884 "be 0, but got %" PRIu64 "\n",
885 l, ub_eck->zec_magic);
886 (void) fprintf(stderr, "It would appear there's already "
887 "a checksum for the uberblock.\n");
888 return;
889 }
890
891
892 ub_eck->zec_magic = byteswap ? BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC;
893
894 if (zhack_repair_write_label(l, fd, byteswap,
895 ub_data, ub_eck,
896 label_offset + offsetof(vdev_label_t, vl_uberblock),
897 ASHIFT_UBERBLOCK_SIZE(ashift)))
898 labels_repaired[l] |= REPAIR_LABEL_STATUS_UB;
899 }
900
901 static void
zhack_repair_print_cksum(FILE * stream,const zio_cksum_t * cksum)902 zhack_repair_print_cksum(FILE *stream, const zio_cksum_t *cksum)
903 {
904 (void) fprintf(stream,
905 "%016llx:%016llx:%016llx:%016llx",
906 (u_longlong_t)cksum->zc_word[0],
907 (u_longlong_t)cksum->zc_word[1],
908 (u_longlong_t)cksum->zc_word[2],
909 (u_longlong_t)cksum->zc_word[3]);
910 }
911
912 static int
zhack_repair_test_cksum(const int byteswap,void * vdev_data,zio_eck_t * vdev_eck,const uint64_t vdev_phys_offset,const int l)913 zhack_repair_test_cksum(const int byteswap, void *vdev_data,
914 zio_eck_t *vdev_eck, const uint64_t vdev_phys_offset, const int l)
915 {
916 const zio_cksum_t expected_cksum = vdev_eck->zec_cksum;
917 zio_cksum_t actual_cksum;
918 zhack_repair_calc_cksum(byteswap, vdev_data, vdev_phys_offset,
919 VDEV_PHYS_SIZE, vdev_eck, &actual_cksum);
920 const uint64_t expected_magic = byteswap ?
921 BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC;
922 const uint64_t actual_magic = vdev_eck->zec_magic;
923 int err = 0;
924
925 if (actual_magic != expected_magic) {
926 (void) fprintf(stderr, "error: label %d: "
927 "Expected "
928 "the nvlist checksum magic number to not be %"
929 PRIu64 " not %" PRIu64 "\n",
930 l, expected_magic, actual_magic);
931 err = ECKSUM;
932 }
933 if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) {
934 (void) fprintf(stderr, "error: label %d: "
935 "Expected the nvlist checksum to be ", l);
936 (void) zhack_repair_print_cksum(stderr,
937 &expected_cksum);
938 (void) fprintf(stderr, " not ");
939 zhack_repair_print_cksum(stderr, &actual_cksum);
940 (void) fprintf(stderr, "\n");
941 err = ECKSUM;
942 }
943 return (err);
944 }
945
946 static int
zhack_repair_unpack_cfg(vdev_label_t * vl,const int l,nvlist_t ** cfg)947 zhack_repair_unpack_cfg(vdev_label_t *vl, const int l, nvlist_t **cfg)
948 {
949 const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION,
950 ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID };
951 int err;
952
953 err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist,
954 VDEV_PHYS_SIZE - sizeof (zio_eck_t), cfg, 0);
955 if (err) {
956 (void) fprintf(stderr,
957 "error: cannot unpack nvlist label %d\n", l);
958 return (err);
959 }
960
961 for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) {
962 uint64_t val;
963 err = nvlist_lookup_uint64(*cfg, cfg_keys[i], &val);
964 if (err) {
965 (void) fprintf(stderr,
966 "error: label %d, %d: "
967 "cannot find nvlist key %s\n",
968 l, i, cfg_keys[i]);
969 return (err);
970 }
971 }
972
973 return (0);
974 }
975
976 static void
zhack_repair_one_label(const zhack_repair_op_t op,const int fd,vdev_label_t * vl,const uint64_t label_offset,const int l,uint32_t * labels_repaired)977 zhack_repair_one_label(const zhack_repair_op_t op, const int fd,
978 vdev_label_t *vl, const uint64_t label_offset, const int l,
979 uint32_t *labels_repaired)
980 {
981 ssize_t err;
982 uberblock_t *ub = (uberblock_t *)vl->vl_uberblock;
983 void *vdev_data =
984 (char *)vl + offsetof(vdev_label_t, vl_vdev_phys);
985 zio_eck_t *vdev_eck =
986 (zio_eck_t *)((char *)(vdev_data) + VDEV_PHYS_SIZE) - 1;
987 const uint64_t vdev_phys_offset =
988 label_offset + offsetof(vdev_label_t, vl_vdev_phys);
989 nvlist_t *cfg;
990 uint64_t ashift;
991 int byteswap;
992
993 err = zhack_repair_read_label(fd, vl, label_offset, l);
994 if (err)
995 return;
996
997 err = zhack_repair_get_byteswap(vdev_eck, l, &byteswap);
998 if (err)
999 return;
1000
1001 if (byteswap) {
1002 byteswap_uint64_array(&vdev_eck->zec_cksum,
1003 sizeof (zio_cksum_t));
1004 vdev_eck->zec_magic = BSWAP_64(vdev_eck->zec_magic);
1005 }
1006
1007 if ((op & ZHACK_REPAIR_OP_CKSUM) == 0 &&
1008 zhack_repair_test_cksum(byteswap, vdev_data, vdev_eck,
1009 vdev_phys_offset, l) != 0) {
1010 (void) fprintf(stderr, "It would appear checksums are "
1011 "corrupted. Try zhack repair label -c <device>\n");
1012 return;
1013 }
1014
1015 err = zhack_repair_unpack_cfg(vl, l, &cfg);
1016 if (err)
1017 return;
1018
1019 if ((op & ZHACK_REPAIR_OP_UNDETACH) != 0) {
1020 char *buf;
1021 size_t buflen;
1022
1023 if (ub->ub_txg != 0) {
1024 (void) fprintf(stderr,
1025 "error: label %d: UB TXG of 0 expected, but got %"
1026 PRIu64 "\n", l, ub->ub_txg);
1027 (void) fprintf(stderr, "It would appear the device was "
1028 "not properly detached.\n");
1029 return;
1030 }
1031
1032 err = zhack_repair_get_ashift(cfg, l, &ashift);
1033 if (err)
1034 return;
1035
1036 err = zhack_repair_undetach(ub, cfg, l);
1037 if (err)
1038 return;
1039
1040 buf = vl->vl_vdev_phys.vp_nvlist;
1041 buflen = VDEV_PHYS_SIZE - sizeof (zio_eck_t);
1042 if (nvlist_pack(cfg, &buf, &buflen, NV_ENCODE_XDR, 0) != 0) {
1043 (void) fprintf(stderr,
1044 "error: label %d: Failed to pack nvlist\n", l);
1045 return;
1046 }
1047
1048 zhack_repair_write_uberblock(vl,
1049 l, ashift, fd, byteswap, label_offset, labels_repaired);
1050 }
1051
1052 if (zhack_repair_write_label(l, fd, byteswap, vdev_data, vdev_eck,
1053 vdev_phys_offset, VDEV_PHYS_SIZE))
1054 labels_repaired[l] |= REPAIR_LABEL_STATUS_CKSUM;
1055
1056 fsync(fd);
1057 }
1058
1059 static const char *
zhack_repair_label_status(const uint32_t label_status,const uint32_t to_check)1060 zhack_repair_label_status(const uint32_t label_status,
1061 const uint32_t to_check)
1062 {
1063 return ((label_status & to_check) != 0 ? "repaired" : "skipped");
1064 }
1065
1066 static int
zhack_label_repair(const zhack_repair_op_t op,const int argc,char ** argv)1067 zhack_label_repair(const zhack_repair_op_t op, const int argc, char **argv)
1068 {
1069 uint32_t labels_repaired[VDEV_LABELS] = {0};
1070 vdev_label_t labels[VDEV_LABELS] = {{{0}}};
1071 struct stat64 st;
1072 int fd;
1073 off_t filesize;
1074 uint32_t repaired = 0;
1075
1076 abd_init();
1077
1078 if (argc < 1) {
1079 (void) fprintf(stderr, "error: missing device\n");
1080 usage();
1081 }
1082
1083 if ((fd = open(argv[0], O_RDWR)) == -1)
1084 fatal(NULL, FTAG, "cannot open '%s': %s", argv[0],
1085 strerror(errno));
1086
1087 if (fstat64_blk(fd, &st) != 0)
1088 fatal(NULL, FTAG, "cannot stat '%s': %s", argv[0],
1089 strerror(errno));
1090
1091 filesize = st.st_size;
1092 (void) fprintf(stderr, "Calculated filesize to be %jd\n",
1093 (intmax_t)filesize);
1094
1095 if (filesize % sizeof (vdev_label_t) != 0)
1096 filesize =
1097 (filesize / sizeof (vdev_label_t)) * sizeof (vdev_label_t);
1098
1099 for (int l = 0; l < VDEV_LABELS; l++) {
1100 zhack_repair_one_label(op, fd, &labels[l],
1101 vdev_label_offset(filesize, l, 0), l, labels_repaired);
1102 }
1103
1104 close(fd);
1105
1106 abd_fini();
1107
1108 for (int l = 0; l < VDEV_LABELS; l++) {
1109 const uint32_t lr = labels_repaired[l];
1110 (void) printf("label %d: ", l);
1111 (void) printf("uberblock: %s ",
1112 zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_UB));
1113 (void) printf("checksum: %s\n",
1114 zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_CKSUM));
1115 repaired |= lr;
1116 }
1117
1118 if (repaired > 0)
1119 return (0);
1120
1121 return (1);
1122 }
1123
1124 static int
zhack_do_label_repair(int argc,char ** argv)1125 zhack_do_label_repair(int argc, char **argv)
1126 {
1127 zhack_repair_op_t op = ZHACK_REPAIR_OP_UNKNOWN;
1128 int c;
1129
1130 optind = 1;
1131 while ((c = getopt(argc, argv, "+cu")) != -1) {
1132 switch (c) {
1133 case 'c':
1134 op |= ZHACK_REPAIR_OP_CKSUM;
1135 break;
1136 case 'u':
1137 op |= ZHACK_REPAIR_OP_UNDETACH;
1138 break;
1139 default:
1140 usage();
1141 break;
1142 }
1143 }
1144
1145 argc -= optind;
1146 argv += optind;
1147
1148 if (op == ZHACK_REPAIR_OP_UNKNOWN)
1149 op = ZHACK_REPAIR_OP_CKSUM;
1150
1151 return (zhack_label_repair(op, argc, argv));
1152 }
1153
1154 static int
zhack_do_label(int argc,char ** argv)1155 zhack_do_label(int argc, char **argv)
1156 {
1157 char *subcommand;
1158 int err;
1159
1160 argc--;
1161 argv++;
1162 if (argc == 0) {
1163 (void) fprintf(stderr,
1164 "error: no label operation specified\n");
1165 usage();
1166 }
1167
1168 subcommand = argv[0];
1169 if (strcmp(subcommand, "repair") == 0) {
1170 err = zhack_do_label_repair(argc, argv);
1171 } else {
1172 (void) fprintf(stderr, "error: unknown subcommand: %s\n",
1173 subcommand);
1174 usage();
1175 }
1176
1177 return (err);
1178 }
1179
1180 #define MAX_NUM_PATHS 1024
1181
1182 int
main(int argc,char ** argv)1183 main(int argc, char **argv)
1184 {
1185 char *path[MAX_NUM_PATHS];
1186 const char *subcommand;
1187 int rv = 0;
1188 int c;
1189
1190 g_importargs.path = path;
1191
1192 dprintf_setup(&argc, argv);
1193 zfs_prop_init();
1194
1195 while ((c = getopt(argc, argv, "+c:d:o:")) != -1) {
1196 switch (c) {
1197 case 'c':
1198 g_importargs.cachefile = optarg;
1199 break;
1200 case 'd':
1201 assert(g_importargs.paths < MAX_NUM_PATHS);
1202 g_importargs.path[g_importargs.paths++] = optarg;
1203 break;
1204 case 'o':
1205 if (handle_tunable_option(optarg, B_FALSE) != 0)
1206 exit(1);
1207 break;
1208 default:
1209 usage();
1210 break;
1211 }
1212 }
1213
1214 argc -= optind;
1215 argv += optind;
1216 optind = 1;
1217
1218 if (argc == 0) {
1219 (void) fprintf(stderr, "error: no command specified\n");
1220 usage();
1221 }
1222
1223 subcommand = argv[0];
1224
1225 if (strcmp(subcommand, "feature") == 0) {
1226 rv = zhack_do_feature(argc, argv);
1227 } else if (strcmp(subcommand, "label") == 0) {
1228 return (zhack_do_label(argc, argv));
1229 } else if (strcmp(subcommand, "metaslab") == 0) {
1230 rv = zhack_do_metaslab(argc, argv);
1231 } else {
1232 (void) fprintf(stderr, "error: unknown subcommand: %s\n",
1233 subcommand);
1234 usage();
1235 }
1236
1237 if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_FALSE) != 0) {
1238 fatal(NULL, FTAG, "pool export failed; "
1239 "changes may not be committed to disk\n");
1240 }
1241
1242 kernel_fini();
1243
1244 return (rv);
1245 }
1246