xref: /freebsd/sys/contrib/openzfs/cmd/zhack.c (revision 8ac904ce090b1c2e355da8aa122ca2252183f4e1)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25  * Copyright (c) 2013 Steven Hartland. All rights reserved.
26  */
27 
28 /*
29  * zhack is a debugging tool that can write changes to ZFS pool using libzpool
30  * for testing purposes. Altering pools with zhack is unsupported and may
31  * result in corrupted pools.
32  */
33 
34 #include <zfs_prop.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <ctype.h>
38 #include <sys/stat.h>
39 #include <sys/zfs_context.h>
40 #include <sys/spa.h>
41 #include <sys/spa_impl.h>
42 #include <sys/dmu.h>
43 #include <sys/zap.h>
44 #include <sys/zfs_znode.h>
45 #include <sys/dsl_synctask.h>
46 #include <sys/vdev.h>
47 #include <sys/vdev_impl.h>
48 #include <sys/fs/zfs.h>
49 #include <sys/dmu_objset.h>
50 #include <sys/dsl_pool.h>
51 #include <sys/zio_checksum.h>
52 #include <sys/zio_compress.h>
53 #include <sys/zfeature.h>
54 #include <sys/dmu_tx.h>
55 #include <zfeature_common.h>
56 #include <libzutil.h>
57 #include <sys/metaslab_impl.h>
58 #include <libzpool.h>
59 
60 static importargs_t g_importargs;
61 static char *g_pool;
62 static boolean_t g_readonly;
63 
64 typedef enum {
65 	ZHACK_REPAIR_OP_UNKNOWN  = 0,
66 	ZHACK_REPAIR_OP_CKSUM    = (1 << 0),
67 	ZHACK_REPAIR_OP_UNDETACH = (1 << 1)
68 } zhack_repair_op_t;
69 
70 static __attribute__((noreturn)) void
usage(void)71 usage(void)
72 {
73 	(void) fprintf(stderr,
74 	    "Usage: zhack [-o tunable] [-c cachefile] [-d dir] <subcommand> "
75 	    "<args> ...\n"
76 	    "where <subcommand> <args> is one of the following:\n"
77 	    "\n");
78 
79 	(void) fprintf(stderr,
80 	    "    feature stat <pool>\n"
81 	    "        print information about enabled features\n"
82 	    "    feature enable [-r] [-d desc] <pool> <feature>\n"
83 	    "        add a new enabled feature to the pool\n"
84 	    "        -d <desc> sets the feature's description\n"
85 	    "        -r set read-only compatible flag for feature\n"
86 	    "    feature ref [-md] <pool> <feature>\n"
87 	    "        change the refcount on the given feature\n"
88 	    "        -d decrease instead of increase the refcount\n"
89 	    "        -m add the feature to the label if increasing refcount\n"
90 	    "\n"
91 	    "    <feature> : should be a feature guid\n"
92 	    "\n"
93 	    "    label repair <device>\n"
94 	    "        repair labels of a specified device according to options\n"
95 	    "        which may be combined to do their functions in one call\n"
96 	    "        -c repair corrupted label checksums\n"
97 	    "        -u restore the label on a detached device\n"
98 	    "\n"
99 	    "    <device> : path to vdev\n"
100 	    "\n"
101 	    "    metaslab leak <pool>\n"
102 	    "        apply allocation map from zdb to specified pool\n");
103 	exit(1);
104 }
105 
106 
107 static __attribute__((format(printf, 3, 4))) __attribute__((noreturn)) void
fatal(spa_t * spa,const void * tag,const char * fmt,...)108 fatal(spa_t *spa, const void *tag, const char *fmt, ...)
109 {
110 	va_list ap;
111 
112 	if (spa != NULL) {
113 		spa_close(spa, tag);
114 		(void) spa_export(g_pool, NULL, B_TRUE, B_FALSE);
115 	}
116 
117 	va_start(ap, fmt);
118 	(void) fputs("zhack: ", stderr);
119 	(void) vfprintf(stderr, fmt, ap);
120 	va_end(ap);
121 	(void) fputc('\n', stderr);
122 
123 	exit(1);
124 }
125 
126 static int
space_delta_cb(dmu_object_type_t bonustype,const void * data,zfs_file_info_t * zoi)127 space_delta_cb(dmu_object_type_t bonustype, const void *data,
128     zfs_file_info_t *zoi)
129 {
130 	(void) data, (void) zoi;
131 
132 	/*
133 	 * Is it a valid type of object to track?
134 	 */
135 	if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
136 		return (ENOENT);
137 	(void) fprintf(stderr, "modifying object that needs user accounting");
138 	abort();
139 }
140 
141 /*
142  * Target is the dataset whose pool we want to open.
143  */
144 static void
zhack_import(char * target,boolean_t readonly)145 zhack_import(char *target, boolean_t readonly)
146 {
147 	nvlist_t *config;
148 	nvlist_t *props;
149 	int error;
150 
151 	kernel_init(readonly ? SPA_MODE_READ :
152 	    (SPA_MODE_READ | SPA_MODE_WRITE));
153 
154 	dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb);
155 
156 	g_readonly = readonly;
157 	g_importargs.can_be_active = readonly;
158 	g_pool = strdup(target);
159 
160 	libpc_handle_t lpch = {
161 		.lpc_lib_handle = NULL,
162 		.lpc_ops = &libzpool_config_ops,
163 		.lpc_printerr = B_TRUE
164 	};
165 	error = zpool_find_config(&lpch, target, &config, &g_importargs);
166 	if (error)
167 		fatal(NULL, FTAG, "cannot import '%s'", target);
168 
169 	props = NULL;
170 	if (readonly) {
171 		VERIFY0(nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
172 		VERIFY0(nvlist_add_uint64(props,
173 		    zpool_prop_to_name(ZPOOL_PROP_READONLY), 1));
174 	}
175 
176 	zfeature_checks_disable = B_TRUE;
177 	error = spa_import(target, config, props,
178 	    (readonly ?  ZFS_IMPORT_SKIP_MMP : ZFS_IMPORT_NORMAL));
179 	fnvlist_free(config);
180 	zfeature_checks_disable = B_FALSE;
181 	if (error == EEXIST)
182 		error = 0;
183 
184 	if (error)
185 		fatal(NULL, FTAG, "can't import '%s': %s", target,
186 		    strerror(error));
187 }
188 
189 static void
zhack_spa_open(char * target,boolean_t readonly,const void * tag,spa_t ** spa)190 zhack_spa_open(char *target, boolean_t readonly, const void *tag, spa_t **spa)
191 {
192 	int err;
193 
194 	zhack_import(target, readonly);
195 
196 	zfeature_checks_disable = B_TRUE;
197 	err = spa_open(target, spa, tag);
198 	zfeature_checks_disable = B_FALSE;
199 
200 	if (err != 0)
201 		fatal(*spa, FTAG, "cannot open '%s': %s", target,
202 		    strerror(err));
203 	if (spa_version(*spa) < SPA_VERSION_FEATURES) {
204 		fatal(*spa, FTAG, "'%s' has version %d, features not enabled",
205 		    target, (int)spa_version(*spa));
206 	}
207 }
208 
209 static void
dump_obj(objset_t * os,uint64_t obj,const char * name)210 dump_obj(objset_t *os, uint64_t obj, const char *name)
211 {
212 	zap_cursor_t zc;
213 	zap_attribute_t *za = zap_attribute_long_alloc();
214 
215 	(void) printf("%s_obj:\n", name);
216 
217 	for (zap_cursor_init(&zc, os, obj);
218 	    zap_cursor_retrieve(&zc, za) == 0;
219 	    zap_cursor_advance(&zc)) {
220 		if (za->za_integer_length == 8) {
221 			ASSERT(za->za_num_integers == 1);
222 			(void) printf("\t%s = %llu\n",
223 			    za->za_name, (u_longlong_t)za->za_first_integer);
224 		} else {
225 			ASSERT(za->za_integer_length == 1);
226 			char val[1024];
227 			VERIFY0(zap_lookup(os, obj, za->za_name,
228 			    1, sizeof (val), val));
229 			(void) printf("\t%s = %s\n", za->za_name, val);
230 		}
231 	}
232 	zap_cursor_fini(&zc);
233 	zap_attribute_free(za);
234 }
235 
236 static void
dump_mos(spa_t * spa)237 dump_mos(spa_t *spa)
238 {
239 	nvlist_t *nv = spa->spa_label_features;
240 	nvpair_t *pair;
241 
242 	(void) printf("label config:\n");
243 	for (pair = nvlist_next_nvpair(nv, NULL);
244 	    pair != NULL;
245 	    pair = nvlist_next_nvpair(nv, pair)) {
246 		(void) printf("\t%s\n", nvpair_name(pair));
247 	}
248 }
249 
250 static void
zhack_do_feature_stat(int argc,char ** argv)251 zhack_do_feature_stat(int argc, char **argv)
252 {
253 	spa_t *spa;
254 	objset_t *os;
255 	char *target;
256 
257 	argc--;
258 	argv++;
259 
260 	if (argc < 1) {
261 		(void) fprintf(stderr, "error: missing pool name\n");
262 		usage();
263 	}
264 	target = argv[0];
265 
266 	zhack_spa_open(target, B_TRUE, FTAG, &spa);
267 	os = spa->spa_meta_objset;
268 
269 	dump_obj(os, spa->spa_feat_for_read_obj, "for_read");
270 	dump_obj(os, spa->spa_feat_for_write_obj, "for_write");
271 	dump_obj(os, spa->spa_feat_desc_obj, "descriptions");
272 	if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) {
273 		dump_obj(os, spa->spa_feat_enabled_txg_obj, "enabled_txg");
274 	}
275 	dump_mos(spa);
276 
277 	spa_close(spa, FTAG);
278 }
279 
280 static void
zhack_feature_enable_sync(void * arg,dmu_tx_t * tx)281 zhack_feature_enable_sync(void *arg, dmu_tx_t *tx)
282 {
283 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
284 	zfeature_info_t *feature = arg;
285 
286 	feature_enable_sync(spa, feature, tx);
287 
288 	spa_history_log_internal(spa, "zhack enable feature", tx,
289 	    "name=%s flags=%u",
290 	    feature->fi_guid, feature->fi_flags);
291 }
292 
293 static void
zhack_do_feature_enable(int argc,char ** argv)294 zhack_do_feature_enable(int argc, char **argv)
295 {
296 	int c;
297 	char *desc, *target;
298 	spa_t *spa;
299 	objset_t *mos;
300 	zfeature_info_t feature;
301 	const spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
302 
303 	/*
304 	 * Features are not added to the pool's label until their refcounts
305 	 * are incremented, so fi_mos can just be left as false for now.
306 	 */
307 	desc = NULL;
308 	feature.fi_uname = "zhack";
309 	feature.fi_flags = 0;
310 	feature.fi_depends = nodeps;
311 	feature.fi_feature = SPA_FEATURE_NONE;
312 
313 	optind = 1;
314 	while ((c = getopt(argc, argv, "+rd:")) != -1) {
315 		switch (c) {
316 		case 'r':
317 			feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
318 			break;
319 		case 'd':
320 			if (desc != NULL)
321 				free(desc);
322 			desc = strdup(optarg);
323 			break;
324 		default:
325 			usage();
326 			break;
327 		}
328 	}
329 
330 	if (desc == NULL)
331 		desc = strdup("zhack injected");
332 	feature.fi_desc = desc;
333 
334 	argc -= optind;
335 	argv += optind;
336 
337 	if (argc < 2) {
338 		(void) fprintf(stderr, "error: missing feature or pool name\n");
339 		usage();
340 	}
341 	target = argv[0];
342 	feature.fi_guid = argv[1];
343 
344 	if (!zfeature_is_valid_guid(feature.fi_guid))
345 		fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid);
346 
347 	zhack_spa_open(target, B_FALSE, FTAG, &spa);
348 	mos = spa->spa_meta_objset;
349 
350 	if (zfeature_is_supported(feature.fi_guid))
351 		fatal(spa, FTAG, "'%s' is a real feature, will not enable",
352 		    feature.fi_guid);
353 	if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid))
354 		fatal(spa, FTAG, "feature already enabled: %s",
355 		    feature.fi_guid);
356 
357 	VERIFY0(dsl_sync_task(spa_name(spa), NULL,
358 	    zhack_feature_enable_sync, &feature, 5, ZFS_SPACE_CHECK_NORMAL));
359 
360 	spa_close(spa, FTAG);
361 
362 	free(desc);
363 }
364 
365 static void
feature_incr_sync(void * arg,dmu_tx_t * tx)366 feature_incr_sync(void *arg, dmu_tx_t *tx)
367 {
368 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
369 	zfeature_info_t *feature = arg;
370 	uint64_t refcount;
371 
372 	mutex_enter(&spa->spa_feat_stats_lock);
373 	VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
374 	feature_sync(spa, feature, refcount + 1, tx);
375 	spa_history_log_internal(spa, "zhack feature incr", tx,
376 	    "name=%s", feature->fi_guid);
377 	mutex_exit(&spa->spa_feat_stats_lock);
378 }
379 
380 static void
feature_decr_sync(void * arg,dmu_tx_t * tx)381 feature_decr_sync(void *arg, dmu_tx_t *tx)
382 {
383 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
384 	zfeature_info_t *feature = arg;
385 	uint64_t refcount;
386 
387 	mutex_enter(&spa->spa_feat_stats_lock);
388 	VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
389 	feature_sync(spa, feature, refcount - 1, tx);
390 	spa_history_log_internal(spa, "zhack feature decr", tx,
391 	    "name=%s", feature->fi_guid);
392 	mutex_exit(&spa->spa_feat_stats_lock);
393 }
394 
395 static void
zhack_do_feature_ref(int argc,char ** argv)396 zhack_do_feature_ref(int argc, char **argv)
397 {
398 	int c;
399 	char *target;
400 	boolean_t decr = B_FALSE;
401 	spa_t *spa;
402 	objset_t *mos;
403 	zfeature_info_t feature;
404 	const spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
405 
406 	/*
407 	 * fi_desc does not matter here because it was written to disk
408 	 * when the feature was enabled, but we need to properly set the
409 	 * feature for read or write based on the information we read off
410 	 * disk later.
411 	 */
412 	feature.fi_uname = "zhack";
413 	feature.fi_flags = 0;
414 	feature.fi_desc = NULL;
415 	feature.fi_depends = nodeps;
416 	feature.fi_feature = SPA_FEATURE_NONE;
417 
418 	optind = 1;
419 	while ((c = getopt(argc, argv, "+md")) != -1) {
420 		switch (c) {
421 		case 'm':
422 			feature.fi_flags |= ZFEATURE_FLAG_MOS;
423 			break;
424 		case 'd':
425 			decr = B_TRUE;
426 			break;
427 		default:
428 			usage();
429 			break;
430 		}
431 	}
432 	argc -= optind;
433 	argv += optind;
434 
435 	if (argc < 2) {
436 		(void) fprintf(stderr, "error: missing feature or pool name\n");
437 		usage();
438 	}
439 	target = argv[0];
440 	feature.fi_guid = argv[1];
441 
442 	if (!zfeature_is_valid_guid(feature.fi_guid))
443 		fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid);
444 
445 	zhack_spa_open(target, B_FALSE, FTAG, &spa);
446 	mos = spa->spa_meta_objset;
447 
448 	if (zfeature_is_supported(feature.fi_guid)) {
449 		fatal(spa, FTAG,
450 		    "'%s' is a real feature, will not change refcount",
451 		    feature.fi_guid);
452 	}
453 
454 	if (0 == zap_contains(mos, spa->spa_feat_for_read_obj,
455 	    feature.fi_guid)) {
456 		feature.fi_flags &= ~ZFEATURE_FLAG_READONLY_COMPAT;
457 	} else if (0 == zap_contains(mos, spa->spa_feat_for_write_obj,
458 	    feature.fi_guid)) {
459 		feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
460 	} else {
461 		fatal(spa, FTAG, "feature is not enabled: %s", feature.fi_guid);
462 	}
463 
464 	if (decr) {
465 		uint64_t count;
466 		if (feature_get_refcount_from_disk(spa, &feature,
467 		    &count) == 0 && count == 0) {
468 			fatal(spa, FTAG, "feature refcount already 0: %s",
469 			    feature.fi_guid);
470 		}
471 	}
472 
473 	VERIFY0(dsl_sync_task(spa_name(spa), NULL,
474 	    decr ? feature_decr_sync : feature_incr_sync, &feature,
475 	    5, ZFS_SPACE_CHECK_NORMAL));
476 
477 	spa_close(spa, FTAG);
478 }
479 
480 static int
zhack_do_feature(int argc,char ** argv)481 zhack_do_feature(int argc, char **argv)
482 {
483 	char *subcommand;
484 
485 	argc--;
486 	argv++;
487 	if (argc == 0) {
488 		(void) fprintf(stderr,
489 		    "error: no feature operation specified\n");
490 		usage();
491 	}
492 
493 	subcommand = argv[0];
494 	if (strcmp(subcommand, "stat") == 0) {
495 		zhack_do_feature_stat(argc, argv);
496 	} else if (strcmp(subcommand, "enable") == 0) {
497 		zhack_do_feature_enable(argc, argv);
498 	} else if (strcmp(subcommand, "ref") == 0) {
499 		zhack_do_feature_ref(argc, argv);
500 	} else {
501 		(void) fprintf(stderr, "error: unknown subcommand: %s\n",
502 		    subcommand);
503 		usage();
504 	}
505 
506 	return (0);
507 }
508 
509 static boolean_t
strstarts(const char * a,const char * b)510 strstarts(const char *a, const char *b)
511 {
512 	return (strncmp(a, b, strlen(b)) == 0);
513 }
514 
515 static void
metaslab_force_alloc(metaslab_t * msp,uint64_t start,uint64_t size,dmu_tx_t * tx)516 metaslab_force_alloc(metaslab_t *msp, uint64_t start, uint64_t size,
517     dmu_tx_t *tx)
518 {
519 	ASSERT(msp->ms_disabled);
520 	ASSERT(MUTEX_HELD(&msp->ms_lock));
521 	uint64_t txg = dmu_tx_get_txg(tx);
522 
523 	uint64_t off = start;
524 	while (off < start + size) {
525 		uint64_t ostart, osize;
526 		boolean_t found = zfs_range_tree_find_in(msp->ms_allocatable,
527 		    off, start + size - off, &ostart, &osize);
528 		if (!found)
529 			break;
530 		zfs_range_tree_remove(msp->ms_allocatable, ostart, osize);
531 
532 		if (zfs_range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK]))
533 			vdev_dirty(msp->ms_group->mg_vd, VDD_METASLAB, msp,
534 			    txg);
535 
536 		zfs_range_tree_add(msp->ms_allocating[txg & TXG_MASK], ostart,
537 		    osize);
538 		msp->ms_allocating_total += osize;
539 		off = ostart + osize;
540 	}
541 }
542 
543 static void
zhack_do_metaslab_leak(int argc,char ** argv)544 zhack_do_metaslab_leak(int argc, char **argv)
545 {
546 	int c;
547 	char *target;
548 	spa_t *spa;
549 
550 	optind = 1;
551 	boolean_t force = B_FALSE;
552 	while ((c = getopt(argc, argv, "f")) != -1) {
553 		switch (c) {
554 		case 'f':
555 			force = B_TRUE;
556 			break;
557 		default:
558 			usage();
559 			break;
560 		}
561 	}
562 
563 	argc -= optind;
564 	argv += optind;
565 
566 	if (argc < 1) {
567 		(void) fprintf(stderr, "error: missing pool name\n");
568 		usage();
569 	}
570 	target = argv[0];
571 
572 	zhack_spa_open(target, B_FALSE, FTAG, &spa);
573 	spa_config_enter(spa, SCL_VDEV | SCL_ALLOC, FTAG, RW_READER);
574 
575 	char *line = NULL;
576 	size_t cap = 0;
577 
578 	vdev_t *vd = NULL;
579 	metaslab_t *prev = NULL;
580 	dmu_tx_t *tx = NULL;
581 	while (getline(&line, &cap, stdin) > 0) {
582 		if (strstarts(line, "\tvdev ")) {
583 			uint64_t vdev_id, ms_shift;
584 			if (sscanf(line,
585 			    "\tvdev %10"PRIu64"\t%*s  metaslab shift %4"PRIu64,
586 			    &vdev_id, &ms_shift) == 1) {
587 				VERIFY3U(sscanf(line, "\tvdev %"PRIu64
588 				    "\t  metaslab shift %4"PRIu64,
589 				    &vdev_id, &ms_shift), ==, 2);
590 			}
591 			vd = vdev_lookup_top(spa, vdev_id);
592 			if (vd == NULL) {
593 				fprintf(stderr, "error: no such vdev with "
594 				    "id %"PRIu64"\n", vdev_id);
595 				break;
596 			}
597 			if (tx) {
598 				dmu_tx_commit(tx);
599 				mutex_exit(&prev->ms_lock);
600 				metaslab_enable(prev, B_FALSE, B_FALSE);
601 				tx = NULL;
602 				prev = NULL;
603 			}
604 			if (vd->vdev_ms_shift != ms_shift) {
605 				fprintf(stderr, "error: ms_shift mismatch: %"
606 				    PRIu64" != %"PRIu64"\n", vd->vdev_ms_shift,
607 				    ms_shift);
608 				break;
609 			}
610 		} else if (strstarts(line, "\tmetaslabs ")) {
611 			uint64_t ms_count;
612 			VERIFY3U(sscanf(line, "\tmetaslabs %"PRIu64, &ms_count),
613 			    ==, 1);
614 			ASSERT(vd);
615 			if (!force && vd->vdev_ms_count != ms_count) {
616 				fprintf(stderr, "error: ms_count mismatch: %"
617 				    PRIu64" != %"PRIu64"\n", vd->vdev_ms_count,
618 				    ms_count);
619 				break;
620 			}
621 		} else if (strstarts(line, "ALLOC:")) {
622 			uint64_t start, size;
623 			VERIFY3U(sscanf(line, "ALLOC: %"PRIu64" %"PRIu64"\n",
624 			    &start, &size), ==, 2);
625 
626 			ASSERT(vd);
627 			metaslab_t *cur =
628 			    vd->vdev_ms[start >> vd->vdev_ms_shift];
629 			if (prev != cur) {
630 				if (prev) {
631 					dmu_tx_commit(tx);
632 					mutex_exit(&prev->ms_lock);
633 					metaslab_enable(prev, B_FALSE, B_FALSE);
634 				}
635 				ASSERT(cur);
636 				metaslab_disable(cur);
637 				mutex_enter(&cur->ms_lock);
638 				metaslab_load(cur);
639 				prev = cur;
640 				tx = dmu_tx_create_dd(
641 				    spa_get_dsl(vd->vdev_spa)->dp_root_dir);
642 				dmu_tx_assign(tx, DMU_TX_WAIT);
643 			}
644 
645 			metaslab_force_alloc(cur, start, size, tx);
646 		} else {
647 			continue;
648 		}
649 	}
650 	if (tx) {
651 		dmu_tx_commit(tx);
652 		mutex_exit(&prev->ms_lock);
653 		metaslab_enable(prev, B_FALSE, B_FALSE);
654 		tx = NULL;
655 		prev = NULL;
656 	}
657 	if (line)
658 		free(line);
659 
660 	spa_config_exit(spa, SCL_VDEV | SCL_ALLOC, FTAG);
661 	spa_close(spa, FTAG);
662 }
663 
664 static int
zhack_do_metaslab(int argc,char ** argv)665 zhack_do_metaslab(int argc, char **argv)
666 {
667 	char *subcommand;
668 
669 	argc--;
670 	argv++;
671 	if (argc == 0) {
672 		(void) fprintf(stderr,
673 		    "error: no metaslab operation specified\n");
674 		usage();
675 	}
676 
677 	subcommand = argv[0];
678 	if (strcmp(subcommand, "leak") == 0) {
679 		zhack_do_metaslab_leak(argc, argv);
680 	} else {
681 		(void) fprintf(stderr, "error: unknown subcommand: %s\n",
682 		    subcommand);
683 		usage();
684 	}
685 
686 	return (0);
687 }
688 
689 #define	ASHIFT_UBERBLOCK_SHIFT(ashift)	\
690 	MIN(MAX(ashift, UBERBLOCK_SHIFT), \
691 	MAX_UBERBLOCK_SHIFT)
692 #define	ASHIFT_UBERBLOCK_SIZE(ashift) \
693 	(1ULL << ASHIFT_UBERBLOCK_SHIFT(ashift))
694 
695 #define	REPAIR_LABEL_STATUS_CKSUM (1 << 0)
696 #define	REPAIR_LABEL_STATUS_UB    (1 << 1)
697 
698 static int
zhack_repair_read_label(const int fd,vdev_label_t * vl,const uint64_t label_offset,const int l)699 zhack_repair_read_label(const int fd, vdev_label_t *vl,
700     const uint64_t label_offset, const int l)
701 {
702 	const int err = pread64(fd, vl, sizeof (vdev_label_t), label_offset);
703 
704 	if (err == -1) {
705 		(void) fprintf(stderr,
706 		    "error: cannot read label %d: %s\n",
707 		    l, strerror(errno));
708 		return (err);
709 	} else if (err != sizeof (vdev_label_t)) {
710 		(void) fprintf(stderr,
711 		    "error: bad label %d read size\n", l);
712 		return (err);
713 	}
714 
715 	return (0);
716 }
717 
718 static int
zhack_repair_get_byteswap(const zio_eck_t * vdev_eck,const int l,int * byteswap)719 zhack_repair_get_byteswap(const zio_eck_t *vdev_eck, const int l, int *byteswap)
720 {
721 	if (vdev_eck->zec_magic == ZEC_MAGIC) {
722 		*byteswap = B_FALSE;
723 	} else if (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC)) {
724 		*byteswap = B_TRUE;
725 	} else {
726 		(void) fprintf(stderr, "error: label %d: "
727 		    "Expected the nvlist checksum magic number but instead got "
728 		    "0x%" PRIx64 "\n",
729 		    l, vdev_eck->zec_magic);
730 		return (1);
731 	}
732 	return (0);
733 }
734 
735 static void
zhack_repair_calc_cksum(const int byteswap,void * data,const uint64_t offset,const uint64_t abdsize,zio_eck_t * eck,zio_cksum_t * cksum)736 zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset,
737     const uint64_t abdsize, zio_eck_t *eck, zio_cksum_t *cksum)
738 {
739 	zio_cksum_t verifier;
740 	zio_cksum_t current_cksum;
741 	zio_checksum_info_t *ci;
742 	abd_t *abd;
743 
744 	ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0);
745 
746 	if (byteswap)
747 		byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
748 
749 	current_cksum = eck->zec_cksum;
750 	eck->zec_cksum = verifier;
751 
752 	ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL];
753 	abd = abd_get_from_buf(data, abdsize);
754 	ci->ci_func[byteswap](abd, abdsize, NULL, cksum);
755 	abd_free(abd);
756 
757 	eck->zec_cksum = current_cksum;
758 }
759 
760 static int
zhack_repair_get_ashift(nvlist_t * cfg,const int l,uint64_t * ashift)761 zhack_repair_get_ashift(nvlist_t *cfg, const int l, uint64_t *ashift)
762 {
763 	int err;
764 	nvlist_t *vdev_tree_cfg;
765 
766 	err = nvlist_lookup_nvlist(cfg,
767 	    ZPOOL_CONFIG_VDEV_TREE, &vdev_tree_cfg);
768 	if (err) {
769 		(void) fprintf(stderr,
770 		    "error: label %d: cannot find nvlist key %s\n",
771 		    l, ZPOOL_CONFIG_VDEV_TREE);
772 		return (err);
773 	}
774 
775 	err = nvlist_lookup_uint64(vdev_tree_cfg,
776 	    ZPOOL_CONFIG_ASHIFT, ashift);
777 	if (err) {
778 		(void) fprintf(stderr,
779 		    "error: label %d: cannot find nvlist key %s\n",
780 		    l, ZPOOL_CONFIG_ASHIFT);
781 		return (err);
782 	}
783 
784 	if (*ashift == 0) {
785 		(void) fprintf(stderr,
786 		    "error: label %d: nvlist key %s is zero\n",
787 		    l, ZPOOL_CONFIG_ASHIFT);
788 		return (1);
789 	}
790 
791 	return (0);
792 }
793 
794 static int
zhack_repair_undetach(uberblock_t * ub,nvlist_t * cfg,const int l)795 zhack_repair_undetach(uberblock_t *ub, nvlist_t *cfg, const int l)
796 {
797 	/*
798 	 * Uberblock root block pointer has valid birth TXG.
799 	 * Copying it to the label NVlist
800 	 */
801 	if (BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp) != 0) {
802 		const uint64_t txg = BP_GET_LOGICAL_BIRTH(&ub->ub_rootbp);
803 		int err;
804 
805 		ub->ub_txg = txg;
806 
807 		err = nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG);
808 		if (err) {
809 			(void) fprintf(stderr,
810 			    "error: label %d: "
811 			    "Failed to remove pool creation TXG\n",
812 			    l);
813 			return (err);
814 		}
815 
816 		err = nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG);
817 		if (err) {
818 			(void) fprintf(stderr,
819 			    "error: label %d: Failed to remove pool TXG to "
820 			    "be replaced.\n",
821 			    l);
822 			return (err);
823 		}
824 
825 		err = nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg);
826 		if (err) {
827 			(void) fprintf(stderr,
828 			    "error: label %d: "
829 			    "Failed to add pool TXG of %" PRIu64 "\n",
830 			    l, txg);
831 			return (err);
832 		}
833 	}
834 
835 	return (0);
836 }
837 
838 static boolean_t
zhack_repair_write_label(const int l,const int fd,const int byteswap,void * data,zio_eck_t * eck,const uint64_t offset,const uint64_t abdsize)839 zhack_repair_write_label(const int l, const int fd, const int byteswap,
840     void *data, zio_eck_t *eck, const uint64_t offset, const uint64_t abdsize)
841 {
842 	zio_cksum_t actual_cksum;
843 	zhack_repair_calc_cksum(byteswap, data, offset, abdsize, eck,
844 	    &actual_cksum);
845 	zio_cksum_t expected_cksum = eck->zec_cksum;
846 	ssize_t err;
847 
848 	if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
849 		return (B_FALSE);
850 
851 	eck->zec_cksum = actual_cksum;
852 
853 	err = pwrite64(fd, data, abdsize, offset);
854 	if (err == -1) {
855 		(void) fprintf(stderr, "error: cannot write label %d: %s\n",
856 		    l, strerror(errno));
857 		return (B_FALSE);
858 	} else if (err != abdsize) {
859 		(void) fprintf(stderr, "error: bad write size label %d\n", l);
860 		return (B_FALSE);
861 	} else {
862 		(void) fprintf(stderr,
863 		    "label %d: wrote %" PRIu64 " bytes at offset %" PRIu64 "\n",
864 		    l, abdsize, offset);
865 	}
866 
867 	return (B_TRUE);
868 }
869 
870 static void
zhack_repair_write_uberblock(vdev_label_t * vl,const int l,const uint64_t ashift,const int fd,const int byteswap,const uint64_t label_offset,uint32_t * labels_repaired)871 zhack_repair_write_uberblock(vdev_label_t *vl, const int l,
872     const uint64_t ashift, const int fd, const int byteswap,
873     const uint64_t label_offset, uint32_t *labels_repaired)
874 {
875 	void *ub_data =
876 	    (char *)vl + offsetof(vdev_label_t, vl_uberblock);
877 	zio_eck_t *ub_eck =
878 	    (zio_eck_t *)
879 	    ((char *)(ub_data) + (ASHIFT_UBERBLOCK_SIZE(ashift))) - 1;
880 
881 	if (ub_eck->zec_magic != 0) {
882 		(void) fprintf(stderr,
883 		    "error: label %d: "
884 		    "Expected Uberblock checksum magic number to "
885 		    "be 0, but got %" PRIu64 "\n",
886 		    l, ub_eck->zec_magic);
887 		(void) fprintf(stderr, "It would appear there's already "
888 		    "a checksum for the uberblock.\n");
889 		return;
890 	}
891 
892 
893 	ub_eck->zec_magic = byteswap ? BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC;
894 
895 	if (zhack_repair_write_label(l, fd, byteswap,
896 	    ub_data, ub_eck,
897 	    label_offset + offsetof(vdev_label_t, vl_uberblock),
898 	    ASHIFT_UBERBLOCK_SIZE(ashift)))
899 			labels_repaired[l] |= REPAIR_LABEL_STATUS_UB;
900 }
901 
902 static void
zhack_repair_print_cksum(FILE * stream,const zio_cksum_t * cksum)903 zhack_repair_print_cksum(FILE *stream, const zio_cksum_t *cksum)
904 {
905 	(void) fprintf(stream,
906 	    "%016llx:%016llx:%016llx:%016llx",
907 	    (u_longlong_t)cksum->zc_word[0],
908 	    (u_longlong_t)cksum->zc_word[1],
909 	    (u_longlong_t)cksum->zc_word[2],
910 	    (u_longlong_t)cksum->zc_word[3]);
911 }
912 
913 static int
zhack_repair_test_cksum(const int byteswap,void * vdev_data,zio_eck_t * vdev_eck,const uint64_t vdev_phys_offset,const int l)914 zhack_repair_test_cksum(const int byteswap, void *vdev_data,
915     zio_eck_t *vdev_eck, const uint64_t vdev_phys_offset, const int l)
916 {
917 	const zio_cksum_t expected_cksum = vdev_eck->zec_cksum;
918 	zio_cksum_t actual_cksum;
919 	zhack_repair_calc_cksum(byteswap, vdev_data, vdev_phys_offset,
920 	    VDEV_PHYS_SIZE, vdev_eck, &actual_cksum);
921 	const uint64_t expected_magic = byteswap ?
922 	    BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC;
923 	const uint64_t actual_magic = vdev_eck->zec_magic;
924 	int err = 0;
925 
926 	if (actual_magic != expected_magic) {
927 		(void) fprintf(stderr, "error: label %d: "
928 		    "Expected "
929 		    "the nvlist checksum magic number to not be %"
930 		    PRIu64 " not %" PRIu64 "\n",
931 		    l, expected_magic, actual_magic);
932 		err = ECKSUM;
933 	}
934 	if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) {
935 		(void) fprintf(stderr, "error: label %d: "
936 		    "Expected the nvlist checksum to be ", l);
937 		(void) zhack_repair_print_cksum(stderr,
938 		    &expected_cksum);
939 		(void) fprintf(stderr, " not ");
940 		zhack_repair_print_cksum(stderr, &actual_cksum);
941 		(void) fprintf(stderr, "\n");
942 		err = ECKSUM;
943 	}
944 	return (err);
945 }
946 
947 static int
zhack_repair_unpack_cfg(vdev_label_t * vl,const int l,nvlist_t ** cfg)948 zhack_repair_unpack_cfg(vdev_label_t *vl, const int l, nvlist_t **cfg)
949 {
950 	const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION,
951 	    ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID };
952 	int err;
953 
954 	err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist,
955 	    VDEV_PHYS_SIZE - sizeof (zio_eck_t), cfg, 0);
956 	if (err) {
957 		(void) fprintf(stderr,
958 		    "error: cannot unpack nvlist label %d\n", l);
959 		return (err);
960 	}
961 
962 	for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) {
963 		uint64_t val;
964 		err = nvlist_lookup_uint64(*cfg, cfg_keys[i], &val);
965 		if (err) {
966 			(void) fprintf(stderr,
967 			    "error: label %d, %d: "
968 			    "cannot find nvlist key %s\n",
969 			    l, i, cfg_keys[i]);
970 			return (err);
971 		}
972 	}
973 
974 	return (0);
975 }
976 
977 static void
zhack_repair_one_label(const zhack_repair_op_t op,const int fd,vdev_label_t * vl,const uint64_t label_offset,const int l,uint32_t * labels_repaired)978 zhack_repair_one_label(const zhack_repair_op_t op, const int fd,
979     vdev_label_t *vl, const uint64_t label_offset, const int l,
980     uint32_t *labels_repaired)
981 {
982 	ssize_t err;
983 	uberblock_t *ub = (uberblock_t *)vl->vl_uberblock;
984 	void *vdev_data =
985 	    (char *)vl + offsetof(vdev_label_t, vl_vdev_phys);
986 	zio_eck_t *vdev_eck =
987 	    (zio_eck_t *)((char *)(vdev_data) + VDEV_PHYS_SIZE) - 1;
988 	const uint64_t vdev_phys_offset =
989 	    label_offset + offsetof(vdev_label_t, vl_vdev_phys);
990 	nvlist_t *cfg;
991 	uint64_t ashift;
992 	int byteswap;
993 
994 	err = zhack_repair_read_label(fd, vl, label_offset, l);
995 	if (err)
996 		return;
997 
998 	err = zhack_repair_get_byteswap(vdev_eck, l, &byteswap);
999 	if (err)
1000 		return;
1001 
1002 	if (byteswap) {
1003 		byteswap_uint64_array(&vdev_eck->zec_cksum,
1004 		    sizeof (zio_cksum_t));
1005 		vdev_eck->zec_magic = BSWAP_64(vdev_eck->zec_magic);
1006 	}
1007 
1008 	if ((op & ZHACK_REPAIR_OP_CKSUM) == 0 &&
1009 	    zhack_repair_test_cksum(byteswap, vdev_data, vdev_eck,
1010 	    vdev_phys_offset, l) != 0) {
1011 		(void) fprintf(stderr, "It would appear checksums are "
1012 		    "corrupted. Try zhack repair label -c <device>\n");
1013 		return;
1014 	}
1015 
1016 	err = zhack_repair_unpack_cfg(vl, l, &cfg);
1017 	if (err)
1018 		return;
1019 
1020 	if ((op & ZHACK_REPAIR_OP_UNDETACH) != 0) {
1021 		char *buf;
1022 		size_t buflen;
1023 
1024 		if (ub->ub_txg != 0) {
1025 			(void) fprintf(stderr,
1026 			    "error: label %d: UB TXG of 0 expected, but got %"
1027 			    PRIu64 "\n", l, ub->ub_txg);
1028 			(void) fprintf(stderr, "It would appear the device was "
1029 			    "not properly detached.\n");
1030 			return;
1031 		}
1032 
1033 		err = zhack_repair_get_ashift(cfg, l, &ashift);
1034 		if (err)
1035 			return;
1036 
1037 		err = zhack_repair_undetach(ub, cfg, l);
1038 		if (err)
1039 			return;
1040 
1041 		buf = vl->vl_vdev_phys.vp_nvlist;
1042 		buflen = VDEV_PHYS_SIZE - sizeof (zio_eck_t);
1043 		if (nvlist_pack(cfg, &buf, &buflen, NV_ENCODE_XDR, 0) != 0) {
1044 			(void) fprintf(stderr,
1045 			    "error: label %d: Failed to pack nvlist\n", l);
1046 			return;
1047 		}
1048 
1049 		zhack_repair_write_uberblock(vl,
1050 		    l, ashift, fd, byteswap, label_offset, labels_repaired);
1051 	}
1052 
1053 	if (zhack_repair_write_label(l, fd, byteswap, vdev_data, vdev_eck,
1054 	    vdev_phys_offset, VDEV_PHYS_SIZE))
1055 			labels_repaired[l] |= REPAIR_LABEL_STATUS_CKSUM;
1056 
1057 	fsync(fd);
1058 }
1059 
1060 static const char *
zhack_repair_label_status(const uint32_t label_status,const uint32_t to_check)1061 zhack_repair_label_status(const uint32_t label_status,
1062     const uint32_t to_check)
1063 {
1064 	return ((label_status & to_check) != 0 ? "repaired" : "skipped");
1065 }
1066 
1067 static int
zhack_label_repair(const zhack_repair_op_t op,const int argc,char ** argv)1068 zhack_label_repair(const zhack_repair_op_t op, const int argc, char **argv)
1069 {
1070 	uint32_t labels_repaired[VDEV_LABELS] = {0};
1071 	vdev_label_t labels[VDEV_LABELS] = {{{0}}};
1072 	struct stat64 st;
1073 	int fd;
1074 	off_t filesize;
1075 	uint32_t repaired = 0;
1076 
1077 	abd_init();
1078 
1079 	if (argc < 1) {
1080 		(void) fprintf(stderr, "error: missing device\n");
1081 		usage();
1082 	}
1083 
1084 	if ((fd = open(argv[0], O_RDWR)) == -1)
1085 		fatal(NULL, FTAG, "cannot open '%s': %s", argv[0],
1086 		    strerror(errno));
1087 
1088 	if (fstat64_blk(fd, &st) != 0)
1089 		fatal(NULL, FTAG, "cannot stat '%s': %s", argv[0],
1090 		    strerror(errno));
1091 
1092 	filesize = st.st_size;
1093 	(void) fprintf(stderr, "Calculated filesize to be %jd\n",
1094 	    (intmax_t)filesize);
1095 
1096 	if (filesize % sizeof (vdev_label_t) != 0)
1097 		filesize =
1098 		    (filesize / sizeof (vdev_label_t)) * sizeof (vdev_label_t);
1099 
1100 	for (int l = 0; l < VDEV_LABELS; l++) {
1101 		zhack_repair_one_label(op, fd, &labels[l],
1102 		    vdev_label_offset(filesize, l, 0), l, labels_repaired);
1103 	}
1104 
1105 	close(fd);
1106 
1107 	abd_fini();
1108 
1109 	for (int l = 0; l < VDEV_LABELS; l++) {
1110 		const uint32_t lr = labels_repaired[l];
1111 		(void) printf("label %d: ", l);
1112 		(void) printf("uberblock: %s ",
1113 		    zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_UB));
1114 		(void) printf("checksum: %s\n",
1115 		    zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_CKSUM));
1116 		repaired |= lr;
1117 	}
1118 
1119 	if (repaired > 0)
1120 		return (0);
1121 
1122 	return (1);
1123 }
1124 
1125 static int
zhack_do_label_repair(int argc,char ** argv)1126 zhack_do_label_repair(int argc, char **argv)
1127 {
1128 	zhack_repair_op_t op = ZHACK_REPAIR_OP_UNKNOWN;
1129 	int c;
1130 
1131 	optind = 1;
1132 	while ((c = getopt(argc, argv, "+cu")) != -1) {
1133 		switch (c) {
1134 		case 'c':
1135 			op |= ZHACK_REPAIR_OP_CKSUM;
1136 			break;
1137 		case 'u':
1138 			op |= ZHACK_REPAIR_OP_UNDETACH;
1139 			break;
1140 		default:
1141 			usage();
1142 			break;
1143 		}
1144 	}
1145 
1146 	argc -= optind;
1147 	argv += optind;
1148 
1149 	if (op == ZHACK_REPAIR_OP_UNKNOWN)
1150 		op = ZHACK_REPAIR_OP_CKSUM;
1151 
1152 	return (zhack_label_repair(op, argc, argv));
1153 }
1154 
1155 static int
zhack_do_label(int argc,char ** argv)1156 zhack_do_label(int argc, char **argv)
1157 {
1158 	char *subcommand;
1159 	int err;
1160 
1161 	argc--;
1162 	argv++;
1163 	if (argc == 0) {
1164 		(void) fprintf(stderr,
1165 		    "error: no label operation specified\n");
1166 		usage();
1167 	}
1168 
1169 	subcommand = argv[0];
1170 	if (strcmp(subcommand, "repair") == 0) {
1171 		err = zhack_do_label_repair(argc, argv);
1172 	} else {
1173 		(void) fprintf(stderr, "error: unknown subcommand: %s\n",
1174 		    subcommand);
1175 		usage();
1176 	}
1177 
1178 	return (err);
1179 }
1180 
1181 #define	MAX_NUM_PATHS 1024
1182 
1183 int
main(int argc,char ** argv)1184 main(int argc, char **argv)
1185 {
1186 	char *path[MAX_NUM_PATHS];
1187 	const char *subcommand;
1188 	int rv = 0;
1189 	int c;
1190 
1191 	g_importargs.path = path;
1192 
1193 	dprintf_setup(&argc, argv);
1194 	zfs_prop_init();
1195 
1196 	while ((c = getopt(argc, argv, "+c:d:o:")) != -1) {
1197 		switch (c) {
1198 		case 'c':
1199 			g_importargs.cachefile = optarg;
1200 			break;
1201 		case 'd':
1202 			assert(g_importargs.paths < MAX_NUM_PATHS);
1203 			g_importargs.path[g_importargs.paths++] = optarg;
1204 			break;
1205 		case 'o':
1206 			if (handle_tunable_option(optarg, B_FALSE) != 0)
1207 				exit(1);
1208 			break;
1209 		default:
1210 			usage();
1211 			break;
1212 		}
1213 	}
1214 
1215 	argc -= optind;
1216 	argv += optind;
1217 	optind = 1;
1218 
1219 	if (argc == 0) {
1220 		(void) fprintf(stderr, "error: no command specified\n");
1221 		usage();
1222 	}
1223 
1224 	subcommand = argv[0];
1225 
1226 	if (strcmp(subcommand, "feature") == 0) {
1227 		rv = zhack_do_feature(argc, argv);
1228 	} else if (strcmp(subcommand, "label") == 0) {
1229 		return (zhack_do_label(argc, argv));
1230 	} else if (strcmp(subcommand, "metaslab") == 0) {
1231 		rv = zhack_do_metaslab(argc, argv);
1232 	} else {
1233 		(void) fprintf(stderr, "error: unknown subcommand: %s\n",
1234 		    subcommand);
1235 		usage();
1236 	}
1237 
1238 	if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_FALSE) != 0) {
1239 		fatal(NULL, FTAG, "pool export failed; "
1240 		    "changes may not be committed to disk\n");
1241 	}
1242 
1243 	kernel_fini();
1244 
1245 	return (rv);
1246 }
1247