xref: /freebsd/sys/contrib/openzfs/cmd/zhack.c (revision 113e60742ef6ba5c069aa737ee57ba3c2f88b248)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25  * Copyright (c) 2013 Steven Hartland. All rights reserved.
26  */
27 
28 /*
29  * zhack is a debugging tool that can write changes to ZFS pool using libzpool
30  * for testing purposes. Altering pools with zhack is unsupported and may
31  * result in corrupted pools.
32  */
33 
34 #include <zfs_prop.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <ctype.h>
38 #include <sys/stat.h>
39 #include <sys/zfs_context.h>
40 #include <sys/spa.h>
41 #include <sys/spa_impl.h>
42 #include <sys/dmu.h>
43 #include <sys/zap.h>
44 #include <sys/zfs_znode.h>
45 #include <sys/dsl_synctask.h>
46 #include <sys/vdev.h>
47 #include <sys/vdev_impl.h>
48 #include <sys/fs/zfs.h>
49 #include <sys/dmu_objset.h>
50 #include <sys/dsl_pool.h>
51 #include <sys/zio_checksum.h>
52 #include <sys/zio_compress.h>
53 #include <sys/zfeature.h>
54 #include <sys/dmu_tx.h>
55 #include <zfeature_common.h>
56 #include <libzutil.h>
57 #include <sys/metaslab_impl.h>
58 
59 static importargs_t g_importargs;
60 static char *g_pool;
61 static boolean_t g_readonly;
62 
63 typedef enum {
64 	ZHACK_REPAIR_OP_UNKNOWN  = 0,
65 	ZHACK_REPAIR_OP_CKSUM    = (1 << 0),
66 	ZHACK_REPAIR_OP_UNDETACH = (1 << 1)
67 } zhack_repair_op_t;
68 
69 static __attribute__((noreturn)) void
usage(void)70 usage(void)
71 {
72 	(void) fprintf(stderr,
73 	    "Usage: zhack [-o tunable] [-c cachefile] [-d dir] <subcommand> "
74 	    "<args> ...\n"
75 	    "where <subcommand> <args> is one of the following:\n"
76 	    "\n");
77 
78 	(void) fprintf(stderr,
79 	    "    feature stat <pool>\n"
80 	    "        print information about enabled features\n"
81 	    "    feature enable [-r] [-d desc] <pool> <feature>\n"
82 	    "        add a new enabled feature to the pool\n"
83 	    "        -d <desc> sets the feature's description\n"
84 	    "        -r set read-only compatible flag for feature\n"
85 	    "    feature ref [-md] <pool> <feature>\n"
86 	    "        change the refcount on the given feature\n"
87 	    "        -d decrease instead of increase the refcount\n"
88 	    "        -m add the feature to the label if increasing refcount\n"
89 	    "\n"
90 	    "    <feature> : should be a feature guid\n"
91 	    "\n"
92 	    "    label repair <device>\n"
93 	    "        repair labels of a specified device according to options\n"
94 	    "        which may be combined to do their functions in one call\n"
95 	    "        -c repair corrupted label checksums\n"
96 	    "        -u restore the label on a detached device\n"
97 	    "\n"
98 	    "    <device> : path to vdev\n"
99 	    "\n"
100 	    "    metaslab leak <pool>\n"
101 	    "        apply allocation map from zdb to specified pool\n");
102 	exit(1);
103 }
104 
105 
106 static __attribute__((format(printf, 3, 4))) __attribute__((noreturn)) void
fatal(spa_t * spa,const void * tag,const char * fmt,...)107 fatal(spa_t *spa, const void *tag, const char *fmt, ...)
108 {
109 	va_list ap;
110 
111 	if (spa != NULL) {
112 		spa_close(spa, tag);
113 		(void) spa_export(g_pool, NULL, B_TRUE, B_FALSE);
114 	}
115 
116 	va_start(ap, fmt);
117 	(void) fputs("zhack: ", stderr);
118 	(void) vfprintf(stderr, fmt, ap);
119 	va_end(ap);
120 	(void) fputc('\n', stderr);
121 
122 	exit(1);
123 }
124 
125 static int
space_delta_cb(dmu_object_type_t bonustype,const void * data,zfs_file_info_t * zoi)126 space_delta_cb(dmu_object_type_t bonustype, const void *data,
127     zfs_file_info_t *zoi)
128 {
129 	(void) data, (void) zoi;
130 
131 	/*
132 	 * Is it a valid type of object to track?
133 	 */
134 	if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
135 		return (ENOENT);
136 	(void) fprintf(stderr, "modifying object that needs user accounting");
137 	abort();
138 }
139 
140 /*
141  * Target is the dataset whose pool we want to open.
142  */
143 static void
zhack_import(char * target,boolean_t readonly)144 zhack_import(char *target, boolean_t readonly)
145 {
146 	nvlist_t *config;
147 	nvlist_t *props;
148 	int error;
149 
150 	kernel_init(readonly ? SPA_MODE_READ :
151 	    (SPA_MODE_READ | SPA_MODE_WRITE));
152 
153 	dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb);
154 
155 	g_readonly = readonly;
156 	g_importargs.can_be_active = readonly;
157 	g_pool = strdup(target);
158 
159 	libpc_handle_t lpch = {
160 		.lpc_lib_handle = NULL,
161 		.lpc_ops = &libzpool_config_ops,
162 		.lpc_printerr = B_TRUE
163 	};
164 	error = zpool_find_config(&lpch, target, &config, &g_importargs);
165 	if (error)
166 		fatal(NULL, FTAG, "cannot import '%s'", target);
167 
168 	props = NULL;
169 	if (readonly) {
170 		VERIFY0(nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
171 		VERIFY0(nvlist_add_uint64(props,
172 		    zpool_prop_to_name(ZPOOL_PROP_READONLY), 1));
173 	}
174 
175 	zfeature_checks_disable = B_TRUE;
176 	error = spa_import(target, config, props,
177 	    (readonly ?  ZFS_IMPORT_SKIP_MMP : ZFS_IMPORT_NORMAL));
178 	fnvlist_free(config);
179 	zfeature_checks_disable = B_FALSE;
180 	if (error == EEXIST)
181 		error = 0;
182 
183 	if (error)
184 		fatal(NULL, FTAG, "can't import '%s': %s", target,
185 		    strerror(error));
186 }
187 
188 static void
zhack_spa_open(char * target,boolean_t readonly,const void * tag,spa_t ** spa)189 zhack_spa_open(char *target, boolean_t readonly, const void *tag, spa_t **spa)
190 {
191 	int err;
192 
193 	zhack_import(target, readonly);
194 
195 	zfeature_checks_disable = B_TRUE;
196 	err = spa_open(target, spa, tag);
197 	zfeature_checks_disable = B_FALSE;
198 
199 	if (err != 0)
200 		fatal(*spa, FTAG, "cannot open '%s': %s", target,
201 		    strerror(err));
202 	if (spa_version(*spa) < SPA_VERSION_FEATURES) {
203 		fatal(*spa, FTAG, "'%s' has version %d, features not enabled",
204 		    target, (int)spa_version(*spa));
205 	}
206 }
207 
208 static void
dump_obj(objset_t * os,uint64_t obj,const char * name)209 dump_obj(objset_t *os, uint64_t obj, const char *name)
210 {
211 	zap_cursor_t zc;
212 	zap_attribute_t *za = zap_attribute_long_alloc();
213 
214 	(void) printf("%s_obj:\n", name);
215 
216 	for (zap_cursor_init(&zc, os, obj);
217 	    zap_cursor_retrieve(&zc, za) == 0;
218 	    zap_cursor_advance(&zc)) {
219 		if (za->za_integer_length == 8) {
220 			ASSERT(za->za_num_integers == 1);
221 			(void) printf("\t%s = %llu\n",
222 			    za->za_name, (u_longlong_t)za->za_first_integer);
223 		} else {
224 			ASSERT(za->za_integer_length == 1);
225 			char val[1024];
226 			VERIFY0(zap_lookup(os, obj, za->za_name,
227 			    1, sizeof (val), val));
228 			(void) printf("\t%s = %s\n", za->za_name, val);
229 		}
230 	}
231 	zap_cursor_fini(&zc);
232 	zap_attribute_free(za);
233 }
234 
235 static void
dump_mos(spa_t * spa)236 dump_mos(spa_t *spa)
237 {
238 	nvlist_t *nv = spa->spa_label_features;
239 	nvpair_t *pair;
240 
241 	(void) printf("label config:\n");
242 	for (pair = nvlist_next_nvpair(nv, NULL);
243 	    pair != NULL;
244 	    pair = nvlist_next_nvpair(nv, pair)) {
245 		(void) printf("\t%s\n", nvpair_name(pair));
246 	}
247 }
248 
249 static void
zhack_do_feature_stat(int argc,char ** argv)250 zhack_do_feature_stat(int argc, char **argv)
251 {
252 	spa_t *spa;
253 	objset_t *os;
254 	char *target;
255 
256 	argc--;
257 	argv++;
258 
259 	if (argc < 1) {
260 		(void) fprintf(stderr, "error: missing pool name\n");
261 		usage();
262 	}
263 	target = argv[0];
264 
265 	zhack_spa_open(target, B_TRUE, FTAG, &spa);
266 	os = spa->spa_meta_objset;
267 
268 	dump_obj(os, spa->spa_feat_for_read_obj, "for_read");
269 	dump_obj(os, spa->spa_feat_for_write_obj, "for_write");
270 	dump_obj(os, spa->spa_feat_desc_obj, "descriptions");
271 	if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) {
272 		dump_obj(os, spa->spa_feat_enabled_txg_obj, "enabled_txg");
273 	}
274 	dump_mos(spa);
275 
276 	spa_close(spa, FTAG);
277 }
278 
279 static void
zhack_feature_enable_sync(void * arg,dmu_tx_t * tx)280 zhack_feature_enable_sync(void *arg, dmu_tx_t *tx)
281 {
282 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
283 	zfeature_info_t *feature = arg;
284 
285 	feature_enable_sync(spa, feature, tx);
286 
287 	spa_history_log_internal(spa, "zhack enable feature", tx,
288 	    "name=%s flags=%u",
289 	    feature->fi_guid, feature->fi_flags);
290 }
291 
292 static void
zhack_do_feature_enable(int argc,char ** argv)293 zhack_do_feature_enable(int argc, char **argv)
294 {
295 	int c;
296 	char *desc, *target;
297 	spa_t *spa;
298 	objset_t *mos;
299 	zfeature_info_t feature;
300 	const spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
301 
302 	/*
303 	 * Features are not added to the pool's label until their refcounts
304 	 * are incremented, so fi_mos can just be left as false for now.
305 	 */
306 	desc = NULL;
307 	feature.fi_uname = "zhack";
308 	feature.fi_flags = 0;
309 	feature.fi_depends = nodeps;
310 	feature.fi_feature = SPA_FEATURE_NONE;
311 
312 	optind = 1;
313 	while ((c = getopt(argc, argv, "+rd:")) != -1) {
314 		switch (c) {
315 		case 'r':
316 			feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
317 			break;
318 		case 'd':
319 			if (desc != NULL)
320 				free(desc);
321 			desc = strdup(optarg);
322 			break;
323 		default:
324 			usage();
325 			break;
326 		}
327 	}
328 
329 	if (desc == NULL)
330 		desc = strdup("zhack injected");
331 	feature.fi_desc = desc;
332 
333 	argc -= optind;
334 	argv += optind;
335 
336 	if (argc < 2) {
337 		(void) fprintf(stderr, "error: missing feature or pool name\n");
338 		usage();
339 	}
340 	target = argv[0];
341 	feature.fi_guid = argv[1];
342 
343 	if (!zfeature_is_valid_guid(feature.fi_guid))
344 		fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid);
345 
346 	zhack_spa_open(target, B_FALSE, FTAG, &spa);
347 	mos = spa->spa_meta_objset;
348 
349 	if (zfeature_is_supported(feature.fi_guid))
350 		fatal(spa, FTAG, "'%s' is a real feature, will not enable",
351 		    feature.fi_guid);
352 	if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid))
353 		fatal(spa, FTAG, "feature already enabled: %s",
354 		    feature.fi_guid);
355 
356 	VERIFY0(dsl_sync_task(spa_name(spa), NULL,
357 	    zhack_feature_enable_sync, &feature, 5, ZFS_SPACE_CHECK_NORMAL));
358 
359 	spa_close(spa, FTAG);
360 
361 	free(desc);
362 }
363 
364 static void
feature_incr_sync(void * arg,dmu_tx_t * tx)365 feature_incr_sync(void *arg, dmu_tx_t *tx)
366 {
367 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
368 	zfeature_info_t *feature = arg;
369 	uint64_t refcount;
370 
371 	mutex_enter(&spa->spa_feat_stats_lock);
372 	VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
373 	feature_sync(spa, feature, refcount + 1, tx);
374 	spa_history_log_internal(spa, "zhack feature incr", tx,
375 	    "name=%s", feature->fi_guid);
376 	mutex_exit(&spa->spa_feat_stats_lock);
377 }
378 
379 static void
feature_decr_sync(void * arg,dmu_tx_t * tx)380 feature_decr_sync(void *arg, dmu_tx_t *tx)
381 {
382 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
383 	zfeature_info_t *feature = arg;
384 	uint64_t refcount;
385 
386 	mutex_enter(&spa->spa_feat_stats_lock);
387 	VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
388 	feature_sync(spa, feature, refcount - 1, tx);
389 	spa_history_log_internal(spa, "zhack feature decr", tx,
390 	    "name=%s", feature->fi_guid);
391 	mutex_exit(&spa->spa_feat_stats_lock);
392 }
393 
394 static void
zhack_do_feature_ref(int argc,char ** argv)395 zhack_do_feature_ref(int argc, char **argv)
396 {
397 	int c;
398 	char *target;
399 	boolean_t decr = B_FALSE;
400 	spa_t *spa;
401 	objset_t *mos;
402 	zfeature_info_t feature;
403 	const spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
404 
405 	/*
406 	 * fi_desc does not matter here because it was written to disk
407 	 * when the feature was enabled, but we need to properly set the
408 	 * feature for read or write based on the information we read off
409 	 * disk later.
410 	 */
411 	feature.fi_uname = "zhack";
412 	feature.fi_flags = 0;
413 	feature.fi_desc = NULL;
414 	feature.fi_depends = nodeps;
415 	feature.fi_feature = SPA_FEATURE_NONE;
416 
417 	optind = 1;
418 	while ((c = getopt(argc, argv, "+md")) != -1) {
419 		switch (c) {
420 		case 'm':
421 			feature.fi_flags |= ZFEATURE_FLAG_MOS;
422 			break;
423 		case 'd':
424 			decr = B_TRUE;
425 			break;
426 		default:
427 			usage();
428 			break;
429 		}
430 	}
431 	argc -= optind;
432 	argv += optind;
433 
434 	if (argc < 2) {
435 		(void) fprintf(stderr, "error: missing feature or pool name\n");
436 		usage();
437 	}
438 	target = argv[0];
439 	feature.fi_guid = argv[1];
440 
441 	if (!zfeature_is_valid_guid(feature.fi_guid))
442 		fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid);
443 
444 	zhack_spa_open(target, B_FALSE, FTAG, &spa);
445 	mos = spa->spa_meta_objset;
446 
447 	if (zfeature_is_supported(feature.fi_guid)) {
448 		fatal(spa, FTAG,
449 		    "'%s' is a real feature, will not change refcount",
450 		    feature.fi_guid);
451 	}
452 
453 	if (0 == zap_contains(mos, spa->spa_feat_for_read_obj,
454 	    feature.fi_guid)) {
455 		feature.fi_flags &= ~ZFEATURE_FLAG_READONLY_COMPAT;
456 	} else if (0 == zap_contains(mos, spa->spa_feat_for_write_obj,
457 	    feature.fi_guid)) {
458 		feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
459 	} else {
460 		fatal(spa, FTAG, "feature is not enabled: %s", feature.fi_guid);
461 	}
462 
463 	if (decr) {
464 		uint64_t count;
465 		if (feature_get_refcount_from_disk(spa, &feature,
466 		    &count) == 0 && count == 0) {
467 			fatal(spa, FTAG, "feature refcount already 0: %s",
468 			    feature.fi_guid);
469 		}
470 	}
471 
472 	VERIFY0(dsl_sync_task(spa_name(spa), NULL,
473 	    decr ? feature_decr_sync : feature_incr_sync, &feature,
474 	    5, ZFS_SPACE_CHECK_NORMAL));
475 
476 	spa_close(spa, FTAG);
477 }
478 
479 static int
zhack_do_feature(int argc,char ** argv)480 zhack_do_feature(int argc, char **argv)
481 {
482 	char *subcommand;
483 
484 	argc--;
485 	argv++;
486 	if (argc == 0) {
487 		(void) fprintf(stderr,
488 		    "error: no feature operation specified\n");
489 		usage();
490 	}
491 
492 	subcommand = argv[0];
493 	if (strcmp(subcommand, "stat") == 0) {
494 		zhack_do_feature_stat(argc, argv);
495 	} else if (strcmp(subcommand, "enable") == 0) {
496 		zhack_do_feature_enable(argc, argv);
497 	} else if (strcmp(subcommand, "ref") == 0) {
498 		zhack_do_feature_ref(argc, argv);
499 	} else {
500 		(void) fprintf(stderr, "error: unknown subcommand: %s\n",
501 		    subcommand);
502 		usage();
503 	}
504 
505 	return (0);
506 }
507 
508 static boolean_t
strstarts(const char * a,const char * b)509 strstarts(const char *a, const char *b)
510 {
511 	return (strncmp(a, b, strlen(b)) == 0);
512 }
513 
514 static void
metaslab_force_alloc(metaslab_t * msp,uint64_t start,uint64_t size,dmu_tx_t * tx)515 metaslab_force_alloc(metaslab_t *msp, uint64_t start, uint64_t size,
516     dmu_tx_t *tx)
517 {
518 	ASSERT(msp->ms_disabled);
519 	ASSERT(MUTEX_HELD(&msp->ms_lock));
520 	uint64_t txg = dmu_tx_get_txg(tx);
521 
522 	uint64_t off = start;
523 	while (off < start + size) {
524 		uint64_t ostart, osize;
525 		boolean_t found = zfs_range_tree_find_in(msp->ms_allocatable,
526 		    off, start + size - off, &ostart, &osize);
527 		if (!found)
528 			break;
529 		zfs_range_tree_remove(msp->ms_allocatable, ostart, osize);
530 
531 		if (zfs_range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK]))
532 			vdev_dirty(msp->ms_group->mg_vd, VDD_METASLAB, msp,
533 			    txg);
534 
535 		zfs_range_tree_add(msp->ms_allocating[txg & TXG_MASK], ostart,
536 		    osize);
537 		msp->ms_allocating_total += osize;
538 		off = ostart + osize;
539 	}
540 }
541 
542 static void
zhack_do_metaslab_leak(int argc,char ** argv)543 zhack_do_metaslab_leak(int argc, char **argv)
544 {
545 	int c;
546 	char *target;
547 	spa_t *spa;
548 
549 	optind = 1;
550 	boolean_t force = B_FALSE;
551 	while ((c = getopt(argc, argv, "f")) != -1) {
552 		switch (c) {
553 		case 'f':
554 			force = B_TRUE;
555 			break;
556 		default:
557 			usage();
558 			break;
559 		}
560 	}
561 
562 	argc -= optind;
563 	argv += optind;
564 
565 	if (argc < 1) {
566 		(void) fprintf(stderr, "error: missing pool name\n");
567 		usage();
568 	}
569 	target = argv[0];
570 
571 	zhack_spa_open(target, B_FALSE, FTAG, &spa);
572 	spa_config_enter(spa, SCL_VDEV | SCL_ALLOC, FTAG, RW_READER);
573 
574 	char *line = NULL;
575 	size_t cap = 0;
576 
577 	vdev_t *vd = NULL;
578 	metaslab_t *prev = NULL;
579 	dmu_tx_t *tx = NULL;
580 	while (getline(&line, &cap, stdin) > 0) {
581 		if (strstarts(line, "\tvdev ")) {
582 			uint64_t vdev_id, ms_shift;
583 			if (sscanf(line,
584 			    "\tvdev %10"PRIu64"\t%*s  metaslab shift %4"PRIu64,
585 			    &vdev_id, &ms_shift) == 1) {
586 				VERIFY3U(sscanf(line, "\tvdev %"PRIu64
587 				    "\t  metaslab shift %4"PRIu64,
588 				    &vdev_id, &ms_shift), ==, 2);
589 			}
590 			vd = vdev_lookup_top(spa, vdev_id);
591 			if (vd == NULL) {
592 				fprintf(stderr, "error: no such vdev with "
593 				    "id %"PRIu64"\n", vdev_id);
594 				break;
595 			}
596 			if (tx) {
597 				dmu_tx_commit(tx);
598 				mutex_exit(&prev->ms_lock);
599 				metaslab_enable(prev, B_FALSE, B_FALSE);
600 				tx = NULL;
601 				prev = NULL;
602 			}
603 			if (vd->vdev_ms_shift != ms_shift) {
604 				fprintf(stderr, "error: ms_shift mismatch: %"
605 				    PRIu64" != %"PRIu64"\n", vd->vdev_ms_shift,
606 				    ms_shift);
607 				break;
608 			}
609 		} else if (strstarts(line, "\tmetaslabs ")) {
610 			uint64_t ms_count;
611 			VERIFY3U(sscanf(line, "\tmetaslabs %"PRIu64, &ms_count),
612 			    ==, 1);
613 			ASSERT(vd);
614 			if (!force && vd->vdev_ms_count != ms_count) {
615 				fprintf(stderr, "error: ms_count mismatch: %"
616 				    PRIu64" != %"PRIu64"\n", vd->vdev_ms_count,
617 				    ms_count);
618 				break;
619 			}
620 		} else if (strstarts(line, "ALLOC:")) {
621 			uint64_t start, size;
622 			VERIFY3U(sscanf(line, "ALLOC: %"PRIu64" %"PRIu64"\n",
623 			    &start, &size), ==, 2);
624 
625 			ASSERT(vd);
626 			metaslab_t *cur =
627 			    vd->vdev_ms[start >> vd->vdev_ms_shift];
628 			if (prev != cur) {
629 				if (prev) {
630 					dmu_tx_commit(tx);
631 					mutex_exit(&prev->ms_lock);
632 					metaslab_enable(prev, B_FALSE, B_FALSE);
633 				}
634 				ASSERT(cur);
635 				metaslab_disable(cur);
636 				mutex_enter(&cur->ms_lock);
637 				metaslab_load(cur);
638 				prev = cur;
639 				tx = dmu_tx_create_dd(
640 				    spa_get_dsl(vd->vdev_spa)->dp_root_dir);
641 				dmu_tx_assign(tx, DMU_TX_WAIT);
642 			}
643 
644 			metaslab_force_alloc(cur, start, size, tx);
645 		} else {
646 			continue;
647 		}
648 	}
649 	if (tx) {
650 		dmu_tx_commit(tx);
651 		mutex_exit(&prev->ms_lock);
652 		metaslab_enable(prev, B_FALSE, B_FALSE);
653 		tx = NULL;
654 		prev = NULL;
655 	}
656 	if (line)
657 		free(line);
658 
659 	spa_config_exit(spa, SCL_VDEV | SCL_ALLOC, FTAG);
660 	spa_close(spa, FTAG);
661 }
662 
663 static int
zhack_do_metaslab(int argc,char ** argv)664 zhack_do_metaslab(int argc, char **argv)
665 {
666 	char *subcommand;
667 
668 	argc--;
669 	argv++;
670 	if (argc == 0) {
671 		(void) fprintf(stderr,
672 		    "error: no metaslab operation specified\n");
673 		usage();
674 	}
675 
676 	subcommand = argv[0];
677 	if (strcmp(subcommand, "leak") == 0) {
678 		zhack_do_metaslab_leak(argc, argv);
679 	} else {
680 		(void) fprintf(stderr, "error: unknown subcommand: %s\n",
681 		    subcommand);
682 		usage();
683 	}
684 
685 	return (0);
686 }
687 
688 #define	ASHIFT_UBERBLOCK_SHIFT(ashift)	\
689 	MIN(MAX(ashift, UBERBLOCK_SHIFT), \
690 	MAX_UBERBLOCK_SHIFT)
691 #define	ASHIFT_UBERBLOCK_SIZE(ashift) \
692 	(1ULL << ASHIFT_UBERBLOCK_SHIFT(ashift))
693 
694 #define	REPAIR_LABEL_STATUS_CKSUM (1 << 0)
695 #define	REPAIR_LABEL_STATUS_UB    (1 << 1)
696 
697 static int
zhack_repair_read_label(const int fd,vdev_label_t * vl,const uint64_t label_offset,const int l)698 zhack_repair_read_label(const int fd, vdev_label_t *vl,
699     const uint64_t label_offset, const int l)
700 {
701 	const int err = pread64(fd, vl, sizeof (vdev_label_t), label_offset);
702 
703 	if (err == -1) {
704 		(void) fprintf(stderr,
705 		    "error: cannot read label %d: %s\n",
706 		    l, strerror(errno));
707 		return (err);
708 	} else if (err != sizeof (vdev_label_t)) {
709 		(void) fprintf(stderr,
710 		    "error: bad label %d read size\n", l);
711 		return (err);
712 	}
713 
714 	return (0);
715 }
716 
717 static int
zhack_repair_get_byteswap(const zio_eck_t * vdev_eck,const int l,int * byteswap)718 zhack_repair_get_byteswap(const zio_eck_t *vdev_eck, const int l, int *byteswap)
719 {
720 	if (vdev_eck->zec_magic == ZEC_MAGIC) {
721 		*byteswap = B_FALSE;
722 	} else if (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC)) {
723 		*byteswap = B_TRUE;
724 	} else {
725 		(void) fprintf(stderr, "error: label %d: "
726 		    "Expected the nvlist checksum magic number but instead got "
727 		    "0x%" PRIx64 "\n",
728 		    l, vdev_eck->zec_magic);
729 		return (1);
730 	}
731 	return (0);
732 }
733 
734 static void
zhack_repair_calc_cksum(const int byteswap,void * data,const uint64_t offset,const uint64_t abdsize,zio_eck_t * eck,zio_cksum_t * cksum)735 zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset,
736     const uint64_t abdsize, zio_eck_t *eck, zio_cksum_t *cksum)
737 {
738 	zio_cksum_t verifier;
739 	zio_cksum_t current_cksum;
740 	zio_checksum_info_t *ci;
741 	abd_t *abd;
742 
743 	ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0);
744 
745 	if (byteswap)
746 		byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
747 
748 	current_cksum = eck->zec_cksum;
749 	eck->zec_cksum = verifier;
750 
751 	ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL];
752 	abd = abd_get_from_buf(data, abdsize);
753 	ci->ci_func[byteswap](abd, abdsize, NULL, cksum);
754 	abd_free(abd);
755 
756 	eck->zec_cksum = current_cksum;
757 }
758 
759 static int
zhack_repair_get_ashift(nvlist_t * cfg,const int l,uint64_t * ashift)760 zhack_repair_get_ashift(nvlist_t *cfg, const int l, uint64_t *ashift)
761 {
762 	int err;
763 	nvlist_t *vdev_tree_cfg;
764 
765 	err = nvlist_lookup_nvlist(cfg,
766 	    ZPOOL_CONFIG_VDEV_TREE, &vdev_tree_cfg);
767 	if (err) {
768 		(void) fprintf(stderr,
769 		    "error: label %d: cannot find nvlist key %s\n",
770 		    l, ZPOOL_CONFIG_VDEV_TREE);
771 		return (err);
772 	}
773 
774 	err = nvlist_lookup_uint64(vdev_tree_cfg,
775 	    ZPOOL_CONFIG_ASHIFT, ashift);
776 	if (err) {
777 		(void) fprintf(stderr,
778 		    "error: label %d: cannot find nvlist key %s\n",
779 		    l, ZPOOL_CONFIG_ASHIFT);
780 		return (err);
781 	}
782 
783 	if (*ashift == 0) {
784 		(void) fprintf(stderr,
785 		    "error: label %d: nvlist key %s is zero\n",
786 		    l, ZPOOL_CONFIG_ASHIFT);
787 		return (1);
788 	}
789 
790 	return (0);
791 }
792 
793 static int
zhack_repair_undetach(uberblock_t * ub,nvlist_t * cfg,const int l)794 zhack_repair_undetach(uberblock_t *ub, nvlist_t *cfg, const int l)
795 {
796 	/*
797 	 * Uberblock root block pointer has valid birth TXG.
798 	 * Copying it to the label NVlist
799 	 */
800 	if (BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp) != 0) {
801 		const uint64_t txg = BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp);
802 		int err;
803 
804 		ub->ub_txg = txg;
805 
806 		err = nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG);
807 		if (err) {
808 			(void) fprintf(stderr,
809 			    "error: label %d: "
810 			    "Failed to remove pool creation TXG\n",
811 			    l);
812 			return (err);
813 		}
814 
815 		err = nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG);
816 		if (err) {
817 			(void) fprintf(stderr,
818 			    "error: label %d: Failed to remove pool TXG to "
819 			    "be replaced.\n",
820 			    l);
821 			return (err);
822 		}
823 
824 		err = nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg);
825 		if (err) {
826 			(void) fprintf(stderr,
827 			    "error: label %d: "
828 			    "Failed to add pool TXG of %" PRIu64 "\n",
829 			    l, txg);
830 			return (err);
831 		}
832 	}
833 
834 	return (0);
835 }
836 
837 static boolean_t
zhack_repair_write_label(const int l,const int fd,const int byteswap,void * data,zio_eck_t * eck,const uint64_t offset,const uint64_t abdsize)838 zhack_repair_write_label(const int l, const int fd, const int byteswap,
839     void *data, zio_eck_t *eck, const uint64_t offset, const uint64_t abdsize)
840 {
841 	zio_cksum_t actual_cksum;
842 	zhack_repair_calc_cksum(byteswap, data, offset, abdsize, eck,
843 	    &actual_cksum);
844 	zio_cksum_t expected_cksum = eck->zec_cksum;
845 	ssize_t err;
846 
847 	if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
848 		return (B_FALSE);
849 
850 	eck->zec_cksum = actual_cksum;
851 
852 	err = pwrite64(fd, data, abdsize, offset);
853 	if (err == -1) {
854 		(void) fprintf(stderr, "error: cannot write label %d: %s\n",
855 		    l, strerror(errno));
856 		return (B_FALSE);
857 	} else if (err != abdsize) {
858 		(void) fprintf(stderr, "error: bad write size label %d\n", l);
859 		return (B_FALSE);
860 	} else {
861 		(void) fprintf(stderr,
862 		    "label %d: wrote %" PRIu64 " bytes at offset %" PRIu64 "\n",
863 		    l, abdsize, offset);
864 	}
865 
866 	return (B_TRUE);
867 }
868 
869 static void
zhack_repair_write_uberblock(vdev_label_t * vl,const int l,const uint64_t ashift,const int fd,const int byteswap,const uint64_t label_offset,uint32_t * labels_repaired)870 zhack_repair_write_uberblock(vdev_label_t *vl, const int l,
871     const uint64_t ashift, const int fd, const int byteswap,
872     const uint64_t label_offset, uint32_t *labels_repaired)
873 {
874 	void *ub_data =
875 	    (char *)vl + offsetof(vdev_label_t, vl_uberblock);
876 	zio_eck_t *ub_eck =
877 	    (zio_eck_t *)
878 	    ((char *)(ub_data) + (ASHIFT_UBERBLOCK_SIZE(ashift))) - 1;
879 
880 	if (ub_eck->zec_magic != 0) {
881 		(void) fprintf(stderr,
882 		    "error: label %d: "
883 		    "Expected Uberblock checksum magic number to "
884 		    "be 0, but got %" PRIu64 "\n",
885 		    l, ub_eck->zec_magic);
886 		(void) fprintf(stderr, "It would appear there's already "
887 		    "a checksum for the uberblock.\n");
888 		return;
889 	}
890 
891 
892 	ub_eck->zec_magic = byteswap ? BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC;
893 
894 	if (zhack_repair_write_label(l, fd, byteswap,
895 	    ub_data, ub_eck,
896 	    label_offset + offsetof(vdev_label_t, vl_uberblock),
897 	    ASHIFT_UBERBLOCK_SIZE(ashift)))
898 			labels_repaired[l] |= REPAIR_LABEL_STATUS_UB;
899 }
900 
901 static void
zhack_repair_print_cksum(FILE * stream,const zio_cksum_t * cksum)902 zhack_repair_print_cksum(FILE *stream, const zio_cksum_t *cksum)
903 {
904 	(void) fprintf(stream,
905 	    "%016llx:%016llx:%016llx:%016llx",
906 	    (u_longlong_t)cksum->zc_word[0],
907 	    (u_longlong_t)cksum->zc_word[1],
908 	    (u_longlong_t)cksum->zc_word[2],
909 	    (u_longlong_t)cksum->zc_word[3]);
910 }
911 
912 static int
zhack_repair_test_cksum(const int byteswap,void * vdev_data,zio_eck_t * vdev_eck,const uint64_t vdev_phys_offset,const int l)913 zhack_repair_test_cksum(const int byteswap, void *vdev_data,
914     zio_eck_t *vdev_eck, const uint64_t vdev_phys_offset, const int l)
915 {
916 	const zio_cksum_t expected_cksum = vdev_eck->zec_cksum;
917 	zio_cksum_t actual_cksum;
918 	zhack_repair_calc_cksum(byteswap, vdev_data, vdev_phys_offset,
919 	    VDEV_PHYS_SIZE, vdev_eck, &actual_cksum);
920 	const uint64_t expected_magic = byteswap ?
921 	    BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC;
922 	const uint64_t actual_magic = vdev_eck->zec_magic;
923 	int err = 0;
924 
925 	if (actual_magic != expected_magic) {
926 		(void) fprintf(stderr, "error: label %d: "
927 		    "Expected "
928 		    "the nvlist checksum magic number to not be %"
929 		    PRIu64 " not %" PRIu64 "\n",
930 		    l, expected_magic, actual_magic);
931 		err = ECKSUM;
932 	}
933 	if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) {
934 		(void) fprintf(stderr, "error: label %d: "
935 		    "Expected the nvlist checksum to be ", l);
936 		(void) zhack_repair_print_cksum(stderr,
937 		    &expected_cksum);
938 		(void) fprintf(stderr, " not ");
939 		zhack_repair_print_cksum(stderr, &actual_cksum);
940 		(void) fprintf(stderr, "\n");
941 		err = ECKSUM;
942 	}
943 	return (err);
944 }
945 
946 static int
zhack_repair_unpack_cfg(vdev_label_t * vl,const int l,nvlist_t ** cfg)947 zhack_repair_unpack_cfg(vdev_label_t *vl, const int l, nvlist_t **cfg)
948 {
949 	const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION,
950 	    ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID };
951 	int err;
952 
953 	err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist,
954 	    VDEV_PHYS_SIZE - sizeof (zio_eck_t), cfg, 0);
955 	if (err) {
956 		(void) fprintf(stderr,
957 		    "error: cannot unpack nvlist label %d\n", l);
958 		return (err);
959 	}
960 
961 	for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) {
962 		uint64_t val;
963 		err = nvlist_lookup_uint64(*cfg, cfg_keys[i], &val);
964 		if (err) {
965 			(void) fprintf(stderr,
966 			    "error: label %d, %d: "
967 			    "cannot find nvlist key %s\n",
968 			    l, i, cfg_keys[i]);
969 			return (err);
970 		}
971 	}
972 
973 	return (0);
974 }
975 
976 static void
zhack_repair_one_label(const zhack_repair_op_t op,const int fd,vdev_label_t * vl,const uint64_t label_offset,const int l,uint32_t * labels_repaired)977 zhack_repair_one_label(const zhack_repair_op_t op, const int fd,
978     vdev_label_t *vl, const uint64_t label_offset, const int l,
979     uint32_t *labels_repaired)
980 {
981 	ssize_t err;
982 	uberblock_t *ub = (uberblock_t *)vl->vl_uberblock;
983 	void *vdev_data =
984 	    (char *)vl + offsetof(vdev_label_t, vl_vdev_phys);
985 	zio_eck_t *vdev_eck =
986 	    (zio_eck_t *)((char *)(vdev_data) + VDEV_PHYS_SIZE) - 1;
987 	const uint64_t vdev_phys_offset =
988 	    label_offset + offsetof(vdev_label_t, vl_vdev_phys);
989 	nvlist_t *cfg;
990 	uint64_t ashift;
991 	int byteswap;
992 
993 	err = zhack_repair_read_label(fd, vl, label_offset, l);
994 	if (err)
995 		return;
996 
997 	err = zhack_repair_get_byteswap(vdev_eck, l, &byteswap);
998 	if (err)
999 		return;
1000 
1001 	if (byteswap) {
1002 		byteswap_uint64_array(&vdev_eck->zec_cksum,
1003 		    sizeof (zio_cksum_t));
1004 		vdev_eck->zec_magic = BSWAP_64(vdev_eck->zec_magic);
1005 	}
1006 
1007 	if ((op & ZHACK_REPAIR_OP_CKSUM) == 0 &&
1008 	    zhack_repair_test_cksum(byteswap, vdev_data, vdev_eck,
1009 	    vdev_phys_offset, l) != 0) {
1010 		(void) fprintf(stderr, "It would appear checksums are "
1011 		    "corrupted. Try zhack repair label -c <device>\n");
1012 		return;
1013 	}
1014 
1015 	err = zhack_repair_unpack_cfg(vl, l, &cfg);
1016 	if (err)
1017 		return;
1018 
1019 	if ((op & ZHACK_REPAIR_OP_UNDETACH) != 0) {
1020 		char *buf;
1021 		size_t buflen;
1022 
1023 		if (ub->ub_txg != 0) {
1024 			(void) fprintf(stderr,
1025 			    "error: label %d: UB TXG of 0 expected, but got %"
1026 			    PRIu64 "\n", l, ub->ub_txg);
1027 			(void) fprintf(stderr, "It would appear the device was "
1028 			    "not properly detached.\n");
1029 			return;
1030 		}
1031 
1032 		err = zhack_repair_get_ashift(cfg, l, &ashift);
1033 		if (err)
1034 			return;
1035 
1036 		err = zhack_repair_undetach(ub, cfg, l);
1037 		if (err)
1038 			return;
1039 
1040 		buf = vl->vl_vdev_phys.vp_nvlist;
1041 		buflen = VDEV_PHYS_SIZE - sizeof (zio_eck_t);
1042 		if (nvlist_pack(cfg, &buf, &buflen, NV_ENCODE_XDR, 0) != 0) {
1043 			(void) fprintf(stderr,
1044 			    "error: label %d: Failed to pack nvlist\n", l);
1045 			return;
1046 		}
1047 
1048 		zhack_repair_write_uberblock(vl,
1049 		    l, ashift, fd, byteswap, label_offset, labels_repaired);
1050 	}
1051 
1052 	if (zhack_repair_write_label(l, fd, byteswap, vdev_data, vdev_eck,
1053 	    vdev_phys_offset, VDEV_PHYS_SIZE))
1054 			labels_repaired[l] |= REPAIR_LABEL_STATUS_CKSUM;
1055 
1056 	fsync(fd);
1057 }
1058 
1059 static const char *
zhack_repair_label_status(const uint32_t label_status,const uint32_t to_check)1060 zhack_repair_label_status(const uint32_t label_status,
1061     const uint32_t to_check)
1062 {
1063 	return ((label_status & to_check) != 0 ? "repaired" : "skipped");
1064 }
1065 
1066 static int
zhack_label_repair(const zhack_repair_op_t op,const int argc,char ** argv)1067 zhack_label_repair(const zhack_repair_op_t op, const int argc, char **argv)
1068 {
1069 	uint32_t labels_repaired[VDEV_LABELS] = {0};
1070 	vdev_label_t labels[VDEV_LABELS] = {{{0}}};
1071 	struct stat64 st;
1072 	int fd;
1073 	off_t filesize;
1074 	uint32_t repaired = 0;
1075 
1076 	abd_init();
1077 
1078 	if (argc < 1) {
1079 		(void) fprintf(stderr, "error: missing device\n");
1080 		usage();
1081 	}
1082 
1083 	if ((fd = open(argv[0], O_RDWR)) == -1)
1084 		fatal(NULL, FTAG, "cannot open '%s': %s", argv[0],
1085 		    strerror(errno));
1086 
1087 	if (fstat64_blk(fd, &st) != 0)
1088 		fatal(NULL, FTAG, "cannot stat '%s': %s", argv[0],
1089 		    strerror(errno));
1090 
1091 	filesize = st.st_size;
1092 	(void) fprintf(stderr, "Calculated filesize to be %jd\n",
1093 	    (intmax_t)filesize);
1094 
1095 	if (filesize % sizeof (vdev_label_t) != 0)
1096 		filesize =
1097 		    (filesize / sizeof (vdev_label_t)) * sizeof (vdev_label_t);
1098 
1099 	for (int l = 0; l < VDEV_LABELS; l++) {
1100 		zhack_repair_one_label(op, fd, &labels[l],
1101 		    vdev_label_offset(filesize, l, 0), l, labels_repaired);
1102 	}
1103 
1104 	close(fd);
1105 
1106 	abd_fini();
1107 
1108 	for (int l = 0; l < VDEV_LABELS; l++) {
1109 		const uint32_t lr = labels_repaired[l];
1110 		(void) printf("label %d: ", l);
1111 		(void) printf("uberblock: %s ",
1112 		    zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_UB));
1113 		(void) printf("checksum: %s\n",
1114 		    zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_CKSUM));
1115 		repaired |= lr;
1116 	}
1117 
1118 	if (repaired > 0)
1119 		return (0);
1120 
1121 	return (1);
1122 }
1123 
1124 static int
zhack_do_label_repair(int argc,char ** argv)1125 zhack_do_label_repair(int argc, char **argv)
1126 {
1127 	zhack_repair_op_t op = ZHACK_REPAIR_OP_UNKNOWN;
1128 	int c;
1129 
1130 	optind = 1;
1131 	while ((c = getopt(argc, argv, "+cu")) != -1) {
1132 		switch (c) {
1133 		case 'c':
1134 			op |= ZHACK_REPAIR_OP_CKSUM;
1135 			break;
1136 		case 'u':
1137 			op |= ZHACK_REPAIR_OP_UNDETACH;
1138 			break;
1139 		default:
1140 			usage();
1141 			break;
1142 		}
1143 	}
1144 
1145 	argc -= optind;
1146 	argv += optind;
1147 
1148 	if (op == ZHACK_REPAIR_OP_UNKNOWN)
1149 		op = ZHACK_REPAIR_OP_CKSUM;
1150 
1151 	return (zhack_label_repair(op, argc, argv));
1152 }
1153 
1154 static int
zhack_do_label(int argc,char ** argv)1155 zhack_do_label(int argc, char **argv)
1156 {
1157 	char *subcommand;
1158 	int err;
1159 
1160 	argc--;
1161 	argv++;
1162 	if (argc == 0) {
1163 		(void) fprintf(stderr,
1164 		    "error: no label operation specified\n");
1165 		usage();
1166 	}
1167 
1168 	subcommand = argv[0];
1169 	if (strcmp(subcommand, "repair") == 0) {
1170 		err = zhack_do_label_repair(argc, argv);
1171 	} else {
1172 		(void) fprintf(stderr, "error: unknown subcommand: %s\n",
1173 		    subcommand);
1174 		usage();
1175 	}
1176 
1177 	return (err);
1178 }
1179 
1180 #define	MAX_NUM_PATHS 1024
1181 
1182 int
main(int argc,char ** argv)1183 main(int argc, char **argv)
1184 {
1185 	char *path[MAX_NUM_PATHS];
1186 	const char *subcommand;
1187 	int rv = 0;
1188 	int c;
1189 
1190 	g_importargs.path = path;
1191 
1192 	dprintf_setup(&argc, argv);
1193 	zfs_prop_init();
1194 
1195 	while ((c = getopt(argc, argv, "+c:d:o:")) != -1) {
1196 		switch (c) {
1197 		case 'c':
1198 			g_importargs.cachefile = optarg;
1199 			break;
1200 		case 'd':
1201 			assert(g_importargs.paths < MAX_NUM_PATHS);
1202 			g_importargs.path[g_importargs.paths++] = optarg;
1203 			break;
1204 		case 'o':
1205 			if (handle_tunable_option(optarg, B_FALSE) != 0)
1206 				exit(1);
1207 			break;
1208 		default:
1209 			usage();
1210 			break;
1211 		}
1212 	}
1213 
1214 	argc -= optind;
1215 	argv += optind;
1216 	optind = 1;
1217 
1218 	if (argc == 0) {
1219 		(void) fprintf(stderr, "error: no command specified\n");
1220 		usage();
1221 	}
1222 
1223 	subcommand = argv[0];
1224 
1225 	if (strcmp(subcommand, "feature") == 0) {
1226 		rv = zhack_do_feature(argc, argv);
1227 	} else if (strcmp(subcommand, "label") == 0) {
1228 		return (zhack_do_label(argc, argv));
1229 	} else if (strcmp(subcommand, "metaslab") == 0) {
1230 		rv = zhack_do_metaslab(argc, argv);
1231 	} else {
1232 		(void) fprintf(stderr, "error: unknown subcommand: %s\n",
1233 		    subcommand);
1234 		usage();
1235 	}
1236 
1237 	if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_FALSE) != 0) {
1238 		fatal(NULL, FTAG, "pool export failed; "
1239 		    "changes may not be committed to disk\n");
1240 	}
1241 
1242 	kernel_fini();
1243 
1244 	return (rv);
1245 }
1246