xref: /freebsd/sys/contrib/openzfs/cmd/zdb/zdb.c (revision f5463265955b829775bbb32e1fd0bc11dafc36ce)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
25  * Copyright (c) 2014 Integros [integros.com]
26  * Copyright 2016 Nexenta Systems, Inc.
27  * Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC.
28  * Copyright (c) 2015, 2017, Intel Corporation.
29  * Copyright (c) 2020 Datto Inc.
30  * Copyright (c) 2020, The FreeBSD Foundation [1]
31  *
32  * [1] Portions of this software were developed by Allan Jude
33  *     under sponsorship from the FreeBSD Foundation.
34  * Copyright (c) 2021 Allan Jude
35  * Copyright (c) 2021 Toomas Soome <tsoome@me.com>
36  * Copyright (c) 2023, Klara Inc.
37  * Copyright (c) 2023, Rob Norris <robn@despairlabs.com>
38  */
39 
40 #include <stdio.h>
41 #include <unistd.h>
42 #include <stdlib.h>
43 #include <ctype.h>
44 #include <getopt.h>
45 #include <openssl/evp.h>
46 #include <sys/zfs_context.h>
47 #include <sys/spa.h>
48 #include <sys/spa_impl.h>
49 #include <sys/dmu.h>
50 #include <sys/zap.h>
51 #include <sys/fs/zfs.h>
52 #include <sys/zfs_znode.h>
53 #include <sys/zfs_sa.h>
54 #include <sys/sa.h>
55 #include <sys/sa_impl.h>
56 #include <sys/vdev.h>
57 #include <sys/vdev_impl.h>
58 #include <sys/metaslab_impl.h>
59 #include <sys/dmu_objset.h>
60 #include <sys/dsl_dir.h>
61 #include <sys/dsl_dataset.h>
62 #include <sys/dsl_pool.h>
63 #include <sys/dsl_bookmark.h>
64 #include <sys/dbuf.h>
65 #include <sys/zil.h>
66 #include <sys/zil_impl.h>
67 #include <sys/stat.h>
68 #include <sys/resource.h>
69 #include <sys/dmu_send.h>
70 #include <sys/dmu_traverse.h>
71 #include <sys/zio_checksum.h>
72 #include <sys/zio_compress.h>
73 #include <sys/zfs_fuid.h>
74 #include <sys/arc.h>
75 #include <sys/arc_impl.h>
76 #include <sys/ddt.h>
77 #include <sys/zfeature.h>
78 #include <sys/abd.h>
79 #include <sys/blkptr.h>
80 #include <sys/dsl_crypt.h>
81 #include <sys/dsl_scan.h>
82 #include <sys/btree.h>
83 #include <sys/brt.h>
84 #include <sys/brt_impl.h>
85 #include <zfs_comutil.h>
86 #include <sys/zstd/zstd.h>
87 
88 #include <libnvpair.h>
89 #include <libzutil.h>
90 
91 #include "zdb.h"
92 
93 #define	ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ?	\
94 	zio_compress_table[(idx)].ci_name : "UNKNOWN")
95 #define	ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ?	\
96 	zio_checksum_table[(idx)].ci_name : "UNKNOWN")
97 #define	ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) :		\
98 	(idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA ?	\
99 	DMU_OT_ZAP_OTHER : \
100 	(idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \
101 	DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES)
102 
103 /* Some platforms require part of inode IDs to be remapped */
104 #ifdef __APPLE__
105 #define	ZDB_MAP_OBJECT_ID(obj) INO_XNUTOZFS(obj, 2)
106 #else
107 #define	ZDB_MAP_OBJECT_ID(obj) (obj)
108 #endif
109 
110 static const char *
111 zdb_ot_name(dmu_object_type_t type)
112 {
113 	if (type < DMU_OT_NUMTYPES)
114 		return (dmu_ot[type].ot_name);
115 	else if ((type & DMU_OT_NEWTYPE) &&
116 	    ((type & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS))
117 		return (dmu_ot_byteswap[type & DMU_OT_BYTESWAP_MASK].ob_name);
118 	else
119 		return ("UNKNOWN");
120 }
121 
122 extern int reference_tracking_enable;
123 extern int zfs_recover;
124 extern uint_t zfs_vdev_async_read_max_active;
125 extern boolean_t spa_load_verify_dryrun;
126 extern boolean_t spa_mode_readable_spacemaps;
127 extern uint_t zfs_reconstruct_indirect_combinations_max;
128 extern uint_t zfs_btree_verify_intensity;
129 
130 static const char cmdname[] = "zdb";
131 uint8_t dump_opt[256];
132 
133 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
134 
135 static uint64_t *zopt_metaslab = NULL;
136 static unsigned zopt_metaslab_args = 0;
137 
138 typedef struct zopt_object_range {
139 	uint64_t zor_obj_start;
140 	uint64_t zor_obj_end;
141 	uint64_t zor_flags;
142 } zopt_object_range_t;
143 
144 static zopt_object_range_t *zopt_object_ranges = NULL;
145 static unsigned zopt_object_args = 0;
146 
147 static int flagbits[256];
148 
149 #define	ZOR_FLAG_PLAIN_FILE	0x0001
150 #define	ZOR_FLAG_DIRECTORY	0x0002
151 #define	ZOR_FLAG_SPACE_MAP	0x0004
152 #define	ZOR_FLAG_ZAP		0x0008
153 #define	ZOR_FLAG_ALL_TYPES	-1
154 #define	ZOR_SUPPORTED_FLAGS	(ZOR_FLAG_PLAIN_FILE	| \
155 				ZOR_FLAG_DIRECTORY	| \
156 				ZOR_FLAG_SPACE_MAP	| \
157 				ZOR_FLAG_ZAP)
158 
159 #define	ZDB_FLAG_CHECKSUM	0x0001
160 #define	ZDB_FLAG_DECOMPRESS	0x0002
161 #define	ZDB_FLAG_BSWAP		0x0004
162 #define	ZDB_FLAG_GBH		0x0008
163 #define	ZDB_FLAG_INDIRECT	0x0010
164 #define	ZDB_FLAG_RAW		0x0020
165 #define	ZDB_FLAG_PRINT_BLKPTR	0x0040
166 #define	ZDB_FLAG_VERBOSE	0x0080
167 
168 static uint64_t max_inflight_bytes = 256 * 1024 * 1024; /* 256MB */
169 static int leaked_objects = 0;
170 static range_tree_t *mos_refd_objs;
171 
172 static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *,
173     boolean_t);
174 static void mos_obj_refd(uint64_t);
175 static void mos_obj_refd_multiple(uint64_t);
176 static int dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t free,
177     dmu_tx_t *tx);
178 
179 typedef struct sublivelist_verify {
180 	/* FREE's that haven't yet matched to an ALLOC, in one sub-livelist */
181 	zfs_btree_t sv_pair;
182 
183 	/* ALLOC's without a matching FREE, accumulates across sub-livelists */
184 	zfs_btree_t sv_leftover;
185 } sublivelist_verify_t;
186 
187 static int
188 livelist_compare(const void *larg, const void *rarg)
189 {
190 	const blkptr_t *l = larg;
191 	const blkptr_t *r = rarg;
192 
193 	/* Sort them according to dva[0] */
194 	uint64_t l_dva0_vdev, r_dva0_vdev;
195 	l_dva0_vdev = DVA_GET_VDEV(&l->blk_dva[0]);
196 	r_dva0_vdev = DVA_GET_VDEV(&r->blk_dva[0]);
197 	if (l_dva0_vdev < r_dva0_vdev)
198 		return (-1);
199 	else if (l_dva0_vdev > r_dva0_vdev)
200 		return (+1);
201 
202 	/* if vdevs are equal, sort by offsets. */
203 	uint64_t l_dva0_offset;
204 	uint64_t r_dva0_offset;
205 	l_dva0_offset = DVA_GET_OFFSET(&l->blk_dva[0]);
206 	r_dva0_offset = DVA_GET_OFFSET(&r->blk_dva[0]);
207 	if (l_dva0_offset < r_dva0_offset) {
208 		return (-1);
209 	} else if (l_dva0_offset > r_dva0_offset) {
210 		return (+1);
211 	}
212 
213 	/*
214 	 * Since we're storing blkptrs without cancelling FREE/ALLOC pairs,
215 	 * it's possible the offsets are equal. In that case, sort by txg
216 	 */
217 	if (l->blk_birth < r->blk_birth) {
218 		return (-1);
219 	} else if (l->blk_birth > r->blk_birth) {
220 		return (+1);
221 	}
222 	return (0);
223 }
224 
225 typedef struct sublivelist_verify_block {
226 	dva_t svb_dva;
227 
228 	/*
229 	 * We need this to check if the block marked as allocated
230 	 * in the livelist was freed (and potentially reallocated)
231 	 * in the metaslab spacemaps at a later TXG.
232 	 */
233 	uint64_t svb_allocated_txg;
234 } sublivelist_verify_block_t;
235 
236 static void zdb_print_blkptr(const blkptr_t *bp, int flags);
237 
238 typedef struct sublivelist_verify_block_refcnt {
239 	/* block pointer entry in livelist being verified */
240 	blkptr_t svbr_blk;
241 
242 	/*
243 	 * Refcount gets incremented to 1 when we encounter the first
244 	 * FREE entry for the svfbr block pointer and a node for it
245 	 * is created in our ZDB verification/tracking metadata.
246 	 *
247 	 * As we encounter more FREE entries we increment this counter
248 	 * and similarly decrement it whenever we find the respective
249 	 * ALLOC entries for this block.
250 	 *
251 	 * When the refcount gets to 0 it means that all the FREE and
252 	 * ALLOC entries of this block have paired up and we no longer
253 	 * need to track it in our verification logic (e.g. the node
254 	 * containing this struct in our verification data structure
255 	 * should be freed).
256 	 *
257 	 * [refer to sublivelist_verify_blkptr() for the actual code]
258 	 */
259 	uint32_t svbr_refcnt;
260 } sublivelist_verify_block_refcnt_t;
261 
262 static int
263 sublivelist_block_refcnt_compare(const void *larg, const void *rarg)
264 {
265 	const sublivelist_verify_block_refcnt_t *l = larg;
266 	const sublivelist_verify_block_refcnt_t *r = rarg;
267 	return (livelist_compare(&l->svbr_blk, &r->svbr_blk));
268 }
269 
270 static int
271 sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free,
272     dmu_tx_t *tx)
273 {
274 	ASSERT3P(tx, ==, NULL);
275 	struct sublivelist_verify *sv = arg;
276 	sublivelist_verify_block_refcnt_t current = {
277 			.svbr_blk = *bp,
278 
279 			/*
280 			 * Start with 1 in case this is the first free entry.
281 			 * This field is not used for our B-Tree comparisons
282 			 * anyway.
283 			 */
284 			.svbr_refcnt = 1,
285 	};
286 
287 	zfs_btree_index_t where;
288 	sublivelist_verify_block_refcnt_t *pair =
289 	    zfs_btree_find(&sv->sv_pair, &current, &where);
290 	if (free) {
291 		if (pair == NULL) {
292 			/* first free entry for this block pointer */
293 			zfs_btree_add(&sv->sv_pair, &current);
294 		} else {
295 			pair->svbr_refcnt++;
296 		}
297 	} else {
298 		if (pair == NULL) {
299 			/* block that is currently marked as allocated */
300 			for (int i = 0; i < SPA_DVAS_PER_BP; i++) {
301 				if (DVA_IS_EMPTY(&bp->blk_dva[i]))
302 					break;
303 				sublivelist_verify_block_t svb = {
304 				    .svb_dva = bp->blk_dva[i],
305 				    .svb_allocated_txg = bp->blk_birth
306 				};
307 
308 				if (zfs_btree_find(&sv->sv_leftover, &svb,
309 				    &where) == NULL) {
310 					zfs_btree_add_idx(&sv->sv_leftover,
311 					    &svb, &where);
312 				}
313 			}
314 		} else {
315 			/* alloc matches a free entry */
316 			pair->svbr_refcnt--;
317 			if (pair->svbr_refcnt == 0) {
318 				/* all allocs and frees have been matched */
319 				zfs_btree_remove_idx(&sv->sv_pair, &where);
320 			}
321 		}
322 	}
323 
324 	return (0);
325 }
326 
327 static int
328 sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle)
329 {
330 	int err;
331 	struct sublivelist_verify *sv = args;
332 
333 	zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare, NULL,
334 	    sizeof (sublivelist_verify_block_refcnt_t));
335 
336 	err = bpobj_iterate_nofree(&dle->dle_bpobj, sublivelist_verify_blkptr,
337 	    sv, NULL);
338 
339 	sublivelist_verify_block_refcnt_t *e;
340 	zfs_btree_index_t *cookie = NULL;
341 	while ((e = zfs_btree_destroy_nodes(&sv->sv_pair, &cookie)) != NULL) {
342 		char blkbuf[BP_SPRINTF_LEN];
343 		snprintf_blkptr_compact(blkbuf, sizeof (blkbuf),
344 		    &e->svbr_blk, B_TRUE);
345 		(void) printf("\tERROR: %d unmatched FREE(s): %s\n",
346 		    e->svbr_refcnt, blkbuf);
347 	}
348 	zfs_btree_destroy(&sv->sv_pair);
349 
350 	return (err);
351 }
352 
353 static int
354 livelist_block_compare(const void *larg, const void *rarg)
355 {
356 	const sublivelist_verify_block_t *l = larg;
357 	const sublivelist_verify_block_t *r = rarg;
358 
359 	if (DVA_GET_VDEV(&l->svb_dva) < DVA_GET_VDEV(&r->svb_dva))
360 		return (-1);
361 	else if (DVA_GET_VDEV(&l->svb_dva) > DVA_GET_VDEV(&r->svb_dva))
362 		return (+1);
363 
364 	if (DVA_GET_OFFSET(&l->svb_dva) < DVA_GET_OFFSET(&r->svb_dva))
365 		return (-1);
366 	else if (DVA_GET_OFFSET(&l->svb_dva) > DVA_GET_OFFSET(&r->svb_dva))
367 		return (+1);
368 
369 	if (DVA_GET_ASIZE(&l->svb_dva) < DVA_GET_ASIZE(&r->svb_dva))
370 		return (-1);
371 	else if (DVA_GET_ASIZE(&l->svb_dva) > DVA_GET_ASIZE(&r->svb_dva))
372 		return (+1);
373 
374 	return (0);
375 }
376 
377 /*
378  * Check for errors in a livelist while tracking all unfreed ALLOCs in the
379  * sublivelist_verify_t: sv->sv_leftover
380  */
381 static void
382 livelist_verify(dsl_deadlist_t *dl, void *arg)
383 {
384 	sublivelist_verify_t *sv = arg;
385 	dsl_deadlist_iterate(dl, sublivelist_verify_func, sv);
386 }
387 
388 /*
389  * Check for errors in the livelist entry and discard the intermediary
390  * data structures
391  */
392 static int
393 sublivelist_verify_lightweight(void *args, dsl_deadlist_entry_t *dle)
394 {
395 	(void) args;
396 	sublivelist_verify_t sv;
397 	zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL,
398 	    sizeof (sublivelist_verify_block_t));
399 	int err = sublivelist_verify_func(&sv, dle);
400 	zfs_btree_clear(&sv.sv_leftover);
401 	zfs_btree_destroy(&sv.sv_leftover);
402 	return (err);
403 }
404 
405 typedef struct metaslab_verify {
406 	/*
407 	 * Tree containing all the leftover ALLOCs from the livelists
408 	 * that are part of this metaslab.
409 	 */
410 	zfs_btree_t mv_livelist_allocs;
411 
412 	/*
413 	 * Metaslab information.
414 	 */
415 	uint64_t mv_vdid;
416 	uint64_t mv_msid;
417 	uint64_t mv_start;
418 	uint64_t mv_end;
419 
420 	/*
421 	 * What's currently allocated for this metaslab.
422 	 */
423 	range_tree_t *mv_allocated;
424 } metaslab_verify_t;
425 
426 typedef void ll_iter_t(dsl_deadlist_t *ll, void *arg);
427 
428 typedef int (*zdb_log_sm_cb_t)(spa_t *spa, space_map_entry_t *sme, uint64_t txg,
429     void *arg);
430 
431 typedef struct unflushed_iter_cb_arg {
432 	spa_t *uic_spa;
433 	uint64_t uic_txg;
434 	void *uic_arg;
435 	zdb_log_sm_cb_t uic_cb;
436 } unflushed_iter_cb_arg_t;
437 
438 static int
439 iterate_through_spacemap_logs_cb(space_map_entry_t *sme, void *arg)
440 {
441 	unflushed_iter_cb_arg_t *uic = arg;
442 	return (uic->uic_cb(uic->uic_spa, sme, uic->uic_txg, uic->uic_arg));
443 }
444 
445 static void
446 iterate_through_spacemap_logs(spa_t *spa, zdb_log_sm_cb_t cb, void *arg)
447 {
448 	if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
449 		return;
450 
451 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
452 	for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
453 	    sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
454 		space_map_t *sm = NULL;
455 		VERIFY0(space_map_open(&sm, spa_meta_objset(spa),
456 		    sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT));
457 
458 		unflushed_iter_cb_arg_t uic = {
459 			.uic_spa = spa,
460 			.uic_txg = sls->sls_txg,
461 			.uic_arg = arg,
462 			.uic_cb = cb
463 		};
464 		VERIFY0(space_map_iterate(sm, space_map_length(sm),
465 		    iterate_through_spacemap_logs_cb, &uic));
466 		space_map_close(sm);
467 	}
468 	spa_config_exit(spa, SCL_CONFIG, FTAG);
469 }
470 
471 static void
472 verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg,
473     uint64_t offset, uint64_t size)
474 {
475 	sublivelist_verify_block_t svb = {{{0}}};
476 	DVA_SET_VDEV(&svb.svb_dva, mv->mv_vdid);
477 	DVA_SET_OFFSET(&svb.svb_dva, offset);
478 	DVA_SET_ASIZE(&svb.svb_dva, size);
479 	zfs_btree_index_t where;
480 	uint64_t end_offset = offset + size;
481 
482 	/*
483 	 *  Look for an exact match for spacemap entry in the livelist entries.
484 	 *  Then, look for other livelist entries that fall within the range
485 	 *  of the spacemap entry as it may have been condensed
486 	 */
487 	sublivelist_verify_block_t *found =
488 	    zfs_btree_find(&mv->mv_livelist_allocs, &svb, &where);
489 	if (found == NULL) {
490 		found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where);
491 	}
492 	for (; found != NULL && DVA_GET_VDEV(&found->svb_dva) == mv->mv_vdid &&
493 	    DVA_GET_OFFSET(&found->svb_dva) < end_offset;
494 	    found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) {
495 		if (found->svb_allocated_txg <= txg) {
496 			(void) printf("ERROR: Livelist ALLOC [%llx:%llx] "
497 			    "from TXG %llx FREED at TXG %llx\n",
498 			    (u_longlong_t)DVA_GET_OFFSET(&found->svb_dva),
499 			    (u_longlong_t)DVA_GET_ASIZE(&found->svb_dva),
500 			    (u_longlong_t)found->svb_allocated_txg,
501 			    (u_longlong_t)txg);
502 		}
503 	}
504 }
505 
506 static int
507 metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg)
508 {
509 	metaslab_verify_t *mv = arg;
510 	uint64_t offset = sme->sme_offset;
511 	uint64_t size = sme->sme_run;
512 	uint64_t txg = sme->sme_txg;
513 
514 	if (sme->sme_type == SM_ALLOC) {
515 		if (range_tree_contains(mv->mv_allocated,
516 		    offset, size)) {
517 			(void) printf("ERROR: DOUBLE ALLOC: "
518 			    "%llu [%llx:%llx] "
519 			    "%llu:%llu LOG_SM\n",
520 			    (u_longlong_t)txg, (u_longlong_t)offset,
521 			    (u_longlong_t)size, (u_longlong_t)mv->mv_vdid,
522 			    (u_longlong_t)mv->mv_msid);
523 		} else {
524 			range_tree_add(mv->mv_allocated,
525 			    offset, size);
526 		}
527 	} else {
528 		if (!range_tree_contains(mv->mv_allocated,
529 		    offset, size)) {
530 			(void) printf("ERROR: DOUBLE FREE: "
531 			    "%llu [%llx:%llx] "
532 			    "%llu:%llu LOG_SM\n",
533 			    (u_longlong_t)txg, (u_longlong_t)offset,
534 			    (u_longlong_t)size, (u_longlong_t)mv->mv_vdid,
535 			    (u_longlong_t)mv->mv_msid);
536 		} else {
537 			range_tree_remove(mv->mv_allocated,
538 			    offset, size);
539 		}
540 	}
541 
542 	if (sme->sme_type != SM_ALLOC) {
543 		/*
544 		 * If something is freed in the spacemap, verify that
545 		 * it is not listed as allocated in the livelist.
546 		 */
547 		verify_livelist_allocs(mv, txg, offset, size);
548 	}
549 	return (0);
550 }
551 
552 static int
553 spacemap_check_sm_log_cb(spa_t *spa, space_map_entry_t *sme,
554     uint64_t txg, void *arg)
555 {
556 	metaslab_verify_t *mv = arg;
557 	uint64_t offset = sme->sme_offset;
558 	uint64_t vdev_id = sme->sme_vdev;
559 
560 	vdev_t *vd = vdev_lookup_top(spa, vdev_id);
561 
562 	/* skip indirect vdevs */
563 	if (!vdev_is_concrete(vd))
564 		return (0);
565 
566 	if (vdev_id != mv->mv_vdid)
567 		return (0);
568 
569 	metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
570 	if (ms->ms_id != mv->mv_msid)
571 		return (0);
572 
573 	if (txg < metaslab_unflushed_txg(ms))
574 		return (0);
575 
576 
577 	ASSERT3U(txg, ==, sme->sme_txg);
578 	return (metaslab_spacemap_validation_cb(sme, mv));
579 }
580 
581 static void
582 spacemap_check_sm_log(spa_t *spa, metaslab_verify_t *mv)
583 {
584 	iterate_through_spacemap_logs(spa, spacemap_check_sm_log_cb, mv);
585 }
586 
587 static void
588 spacemap_check_ms_sm(space_map_t  *sm, metaslab_verify_t *mv)
589 {
590 	if (sm == NULL)
591 		return;
592 
593 	VERIFY0(space_map_iterate(sm, space_map_length(sm),
594 	    metaslab_spacemap_validation_cb, mv));
595 }
596 
597 static void iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg);
598 
599 /*
600  * Transfer blocks from sv_leftover tree to the mv_livelist_allocs if
601  * they are part of that metaslab (mv_msid).
602  */
603 static void
604 mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv)
605 {
606 	zfs_btree_index_t where;
607 	sublivelist_verify_block_t *svb;
608 	ASSERT3U(zfs_btree_numnodes(&mv->mv_livelist_allocs), ==, 0);
609 	for (svb = zfs_btree_first(&sv->sv_leftover, &where);
610 	    svb != NULL;
611 	    svb = zfs_btree_next(&sv->sv_leftover, &where, &where)) {
612 		if (DVA_GET_VDEV(&svb->svb_dva) != mv->mv_vdid)
613 			continue;
614 
615 		if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start &&
616 		    (DVA_GET_OFFSET(&svb->svb_dva) +
617 		    DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_start) {
618 			(void) printf("ERROR: Found block that crosses "
619 			    "metaslab boundary: <%llu:%llx:%llx>\n",
620 			    (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva),
621 			    (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
622 			    (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva));
623 			continue;
624 		}
625 
626 		if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start)
627 			continue;
628 
629 		if (DVA_GET_OFFSET(&svb->svb_dva) >= mv->mv_end)
630 			continue;
631 
632 		if ((DVA_GET_OFFSET(&svb->svb_dva) +
633 		    DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_end) {
634 			(void) printf("ERROR: Found block that crosses "
635 			    "metaslab boundary: <%llu:%llx:%llx>\n",
636 			    (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva),
637 			    (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
638 			    (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva));
639 			continue;
640 		}
641 
642 		zfs_btree_add(&mv->mv_livelist_allocs, svb);
643 	}
644 
645 	for (svb = zfs_btree_first(&mv->mv_livelist_allocs, &where);
646 	    svb != NULL;
647 	    svb = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) {
648 		zfs_btree_remove(&sv->sv_leftover, svb);
649 	}
650 }
651 
652 /*
653  * [Livelist Check]
654  * Iterate through all the sublivelists and:
655  * - report leftover frees (**)
656  * - record leftover ALLOCs together with their TXG [see Cross Check]
657  *
658  * (**) Note: Double ALLOCs are valid in datasets that have dedup
659  *      enabled. Similarly double FREEs are allowed as well but
660  *      only if they pair up with a corresponding ALLOC entry once
661  *      we our done with our sublivelist iteration.
662  *
663  * [Spacemap Check]
664  * for each metaslab:
665  * - iterate over spacemap and then the metaslab's entries in the
666  *   spacemap log, then report any double FREEs and ALLOCs (do not
667  *   blow up).
668  *
669  * [Cross Check]
670  * After finishing the Livelist Check phase and while being in the
671  * Spacemap Check phase, we find all the recorded leftover ALLOCs
672  * of the livelist check that are part of the metaslab that we are
673  * currently looking at in the Spacemap Check. We report any entries
674  * that are marked as ALLOCs in the livelists but have been actually
675  * freed (and potentially allocated again) after their TXG stamp in
676  * the spacemaps. Also report any ALLOCs from the livelists that
677  * belong to indirect vdevs (e.g. their vdev completed removal).
678  *
679  * Note that this will miss Log Spacemap entries that cancelled each other
680  * out before being flushed to the metaslab, so we are not guaranteed
681  * to match all erroneous ALLOCs.
682  */
683 static void
684 livelist_metaslab_validate(spa_t *spa)
685 {
686 	(void) printf("Verifying deleted livelist entries\n");
687 
688 	sublivelist_verify_t sv;
689 	zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL,
690 	    sizeof (sublivelist_verify_block_t));
691 	iterate_deleted_livelists(spa, livelist_verify, &sv);
692 
693 	(void) printf("Verifying metaslab entries\n");
694 	vdev_t *rvd = spa->spa_root_vdev;
695 	for (uint64_t c = 0; c < rvd->vdev_children; c++) {
696 		vdev_t *vd = rvd->vdev_child[c];
697 
698 		if (!vdev_is_concrete(vd))
699 			continue;
700 
701 		for (uint64_t mid = 0; mid < vd->vdev_ms_count; mid++) {
702 			metaslab_t *m = vd->vdev_ms[mid];
703 
704 			(void) fprintf(stderr,
705 			    "\rverifying concrete vdev %llu, "
706 			    "metaslab %llu of %llu ...",
707 			    (longlong_t)vd->vdev_id,
708 			    (longlong_t)mid,
709 			    (longlong_t)vd->vdev_ms_count);
710 
711 			uint64_t shift, start;
712 			range_seg_type_t type =
713 			    metaslab_calculate_range_tree_type(vd, m,
714 			    &start, &shift);
715 			metaslab_verify_t mv;
716 			mv.mv_allocated = range_tree_create(NULL,
717 			    type, NULL, start, shift);
718 			mv.mv_vdid = vd->vdev_id;
719 			mv.mv_msid = m->ms_id;
720 			mv.mv_start = m->ms_start;
721 			mv.mv_end = m->ms_start + m->ms_size;
722 			zfs_btree_create(&mv.mv_livelist_allocs,
723 			    livelist_block_compare, NULL,
724 			    sizeof (sublivelist_verify_block_t));
725 
726 			mv_populate_livelist_allocs(&mv, &sv);
727 
728 			spacemap_check_ms_sm(m->ms_sm, &mv);
729 			spacemap_check_sm_log(spa, &mv);
730 
731 			range_tree_vacate(mv.mv_allocated, NULL, NULL);
732 			range_tree_destroy(mv.mv_allocated);
733 			zfs_btree_clear(&mv.mv_livelist_allocs);
734 			zfs_btree_destroy(&mv.mv_livelist_allocs);
735 		}
736 	}
737 	(void) fprintf(stderr, "\n");
738 
739 	/*
740 	 * If there are any segments in the leftover tree after we walked
741 	 * through all the metaslabs in the concrete vdevs then this means
742 	 * that we have segments in the livelists that belong to indirect
743 	 * vdevs and are marked as allocated.
744 	 */
745 	if (zfs_btree_numnodes(&sv.sv_leftover) == 0) {
746 		zfs_btree_destroy(&sv.sv_leftover);
747 		return;
748 	}
749 	(void) printf("ERROR: Found livelist blocks marked as allocated "
750 	    "for indirect vdevs:\n");
751 
752 	zfs_btree_index_t *where = NULL;
753 	sublivelist_verify_block_t *svb;
754 	while ((svb = zfs_btree_destroy_nodes(&sv.sv_leftover, &where)) !=
755 	    NULL) {
756 		int vdev_id = DVA_GET_VDEV(&svb->svb_dva);
757 		ASSERT3U(vdev_id, <, rvd->vdev_children);
758 		vdev_t *vd = rvd->vdev_child[vdev_id];
759 		ASSERT(!vdev_is_concrete(vd));
760 		(void) printf("<%d:%llx:%llx> TXG %llx\n",
761 		    vdev_id, (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva),
762 		    (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva),
763 		    (u_longlong_t)svb->svb_allocated_txg);
764 	}
765 	(void) printf("\n");
766 	zfs_btree_destroy(&sv.sv_leftover);
767 }
768 
769 /*
770  * These libumem hooks provide a reasonable set of defaults for the allocator's
771  * debugging facilities.
772  */
773 const char *
774 _umem_debug_init(void)
775 {
776 	return ("default,verbose"); /* $UMEM_DEBUG setting */
777 }
778 
779 const char *
780 _umem_logging_init(void)
781 {
782 	return ("fail,contents"); /* $UMEM_LOGGING setting */
783 }
784 
785 static void
786 usage(void)
787 {
788 	(void) fprintf(stderr,
789 	    "Usage:\t%s [-AbcdDFGhikLMPsvXy] [-e [-V] [-p <path> ...]] "
790 	    "[-I <inflight I/Os>]\n"
791 	    "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
792 	    "\t\t[-K <key>]\n"
793 	    "\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]]\n"
794 	    "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] [-K <key>]\n"
795 	    "\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]\n"
796 	    "\t%s -B [-e [-V] [-p <path> ...]] [-I <inflight I/Os>]\n"
797 	    "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
798 	    "\t\t[-K <key>] <poolname>/<objset id> [<backupflags>]\n"
799 	    "\t%s [-v] <bookmark>\n"
800 	    "\t%s -C [-A] [-U <cache>] [<poolname>]\n"
801 	    "\t%s -l [-Aqu] <device>\n"
802 	    "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] "
803 	    "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"
804 	    "\t%s -O [-K <key>] <dataset> <path>\n"
805 	    "\t%s -r [-K <key>] <dataset> <path> <destination>\n"
806 	    "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n"
807 	    "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n"
808 	    "\t%s -E [-A] word0:word1:...:word15\n"
809 	    "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
810 	    "<poolname>\n\n",
811 	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
812 	    cmdname, cmdname, cmdname, cmdname, cmdname);
813 
814 	(void) fprintf(stderr, "    Dataset name must include at least one "
815 	    "separator character '/' or '@'\n");
816 	(void) fprintf(stderr, "    If dataset name is specified, only that "
817 	    "dataset is dumped\n");
818 	(void) fprintf(stderr,  "    If object numbers or object number "
819 	    "ranges are specified, only those\n"
820 	    "    objects or ranges are dumped.\n\n");
821 	(void) fprintf(stderr,
822 	    "    Object ranges take the form <start>:<end>[:<flags>]\n"
823 	    "        start    Starting object number\n"
824 	    "        end      Ending object number, or -1 for no upper bound\n"
825 	    "        flags    Optional flags to select object types:\n"
826 	    "            A     All objects (this is the default)\n"
827 	    "            d     ZFS directories\n"
828 	    "            f     ZFS files \n"
829 	    "            m     SPA space maps\n"
830 	    "            z     ZAPs\n"
831 	    "            -     Negate effect of next flag\n\n");
832 	(void) fprintf(stderr, "    Options to control amount of output:\n");
833 	(void) fprintf(stderr, "        -b --block-stats             "
834 	    "block statistics\n");
835 	(void) fprintf(stderr, "        -B --backup                  "
836 	    "backup stream\n");
837 	(void) fprintf(stderr, "        -c --checksum                "
838 	    "checksum all metadata (twice for all data) blocks\n");
839 	(void) fprintf(stderr, "        -C --config                  "
840 	    "config (or cachefile if alone)\n");
841 	(void) fprintf(stderr, "        -d --datasets                "
842 	    "dataset(s)\n");
843 	(void) fprintf(stderr, "        -D --dedup-stats             "
844 	    "dedup statistics\n");
845 	(void) fprintf(stderr, "        -E --embedded-block-pointer=INTEGER\n"
846 	    "                                     decode and display block "
847 	    "from an embedded block pointer\n");
848 	(void) fprintf(stderr, "        -h --history                 "
849 	    "pool history\n");
850 	(void) fprintf(stderr, "        -i --intent-logs             "
851 	    "intent logs\n");
852 	(void) fprintf(stderr, "        -l --label                   "
853 	    "read label contents\n");
854 	(void) fprintf(stderr, "        -k --checkpointed-state      "
855 	    "examine the checkpointed state of the pool\n");
856 	(void) fprintf(stderr, "        -L --disable-leak-tracking   "
857 	    "disable leak tracking (do not load spacemaps)\n");
858 	(void) fprintf(stderr, "        -m --metaslabs               "
859 	    "metaslabs\n");
860 	(void) fprintf(stderr, "        -M --metaslab-groups         "
861 	    "metaslab groups\n");
862 	(void) fprintf(stderr, "        -O --object-lookups          "
863 	    "perform object lookups by path\n");
864 	(void) fprintf(stderr, "        -r --copy-object             "
865 	    "copy an object by path to file\n");
866 	(void) fprintf(stderr, "        -R --read-block              "
867 	    "read and display block from a device\n");
868 	(void) fprintf(stderr, "        -s --io-stats                "
869 	    "report stats on zdb's I/O\n");
870 	(void) fprintf(stderr, "        -S --simulate-dedup          "
871 	    "simulate dedup to measure effect\n");
872 	(void) fprintf(stderr, "        -v --verbose                 "
873 	    "verbose (applies to all others)\n");
874 	(void) fprintf(stderr, "        -y --livelist                "
875 	    "perform livelist and metaslab validation on any livelists being "
876 	    "deleted\n\n");
877 	(void) fprintf(stderr, "    Below options are intended for use "
878 	    "with other options:\n");
879 	(void) fprintf(stderr, "        -A --ignore-assertions       "
880 	    "ignore assertions (-A), enable panic recovery (-AA) or both "
881 	    "(-AAA)\n");
882 	(void) fprintf(stderr, "        -e --exported                "
883 	    "pool is exported/destroyed/has altroot/not in a cachefile\n");
884 	(void) fprintf(stderr, "        -F --automatic-rewind        "
885 	    "attempt automatic rewind within safe range of transaction "
886 	    "groups\n");
887 	(void) fprintf(stderr, "        -G --dump-debug-msg          "
888 	    "dump zfs_dbgmsg buffer before exiting\n");
889 	(void) fprintf(stderr, "        -I --inflight=INTEGER        "
890 	    "specify the maximum number of checksumming I/Os "
891 	    "[default is 200]\n");
892 	(void) fprintf(stderr, "        -K --key=KEY                 "
893 	    "decryption key for encrypted dataset\n");
894 	(void) fprintf(stderr, "        -o --option=\"OPTION=INTEGER\" "
895 	    "set global variable to an unsigned 32-bit integer\n");
896 	(void) fprintf(stderr, "        -p --path==PATH              "
897 	    "use one or more with -e to specify path to vdev dir\n");
898 	(void) fprintf(stderr, "        -P --parseable               "
899 	    "print numbers in parseable form\n");
900 	(void) fprintf(stderr, "        -q --skip-label              "
901 	    "don't print label contents\n");
902 	(void) fprintf(stderr, "        -t --txg=INTEGER             "
903 	    "highest txg to use when searching for uberblocks\n");
904 	(void) fprintf(stderr, "        -T --brt-stats               "
905 	    "BRT statistics\n");
906 	(void) fprintf(stderr, "        -u --uberblock               "
907 	    "uberblock\n");
908 	(void) fprintf(stderr, "        -U --cachefile=PATH          "
909 	    "use alternate cachefile\n");
910 	(void) fprintf(stderr, "        -V --verbatim                "
911 	    "do verbatim import\n");
912 	(void) fprintf(stderr, "        -x --dump-blocks=PATH        "
913 	    "dump all read blocks into specified directory\n");
914 	(void) fprintf(stderr, "        -X --extreme-rewind          "
915 	    "attempt extreme rewind (does not work with dataset)\n");
916 	(void) fprintf(stderr, "        -Y --all-reconstruction      "
917 	    "attempt all reconstruction combinations for split blocks\n");
918 	(void) fprintf(stderr, "        -Z --zstd-headers            "
919 	    "show ZSTD headers \n");
920 	(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
921 	    "to make only that option verbose\n");
922 	(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
923 	exit(1);
924 }
925 
926 static void
927 dump_debug_buffer(void)
928 {
929 	if (dump_opt['G']) {
930 		(void) printf("\n");
931 		(void) fflush(stdout);
932 		zfs_dbgmsg_print("zdb");
933 	}
934 }
935 
936 /*
937  * Called for usage errors that are discovered after a call to spa_open(),
938  * dmu_bonus_hold(), or pool_match().  abort() is called for other errors.
939  */
940 
941 static void
942 fatal(const char *fmt, ...)
943 {
944 	va_list ap;
945 
946 	va_start(ap, fmt);
947 	(void) fprintf(stderr, "%s: ", cmdname);
948 	(void) vfprintf(stderr, fmt, ap);
949 	va_end(ap);
950 	(void) fprintf(stderr, "\n");
951 
952 	dump_debug_buffer();
953 
954 	exit(1);
955 }
956 
957 static void
958 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
959 {
960 	(void) size;
961 	nvlist_t *nv;
962 	size_t nvsize = *(uint64_t *)data;
963 	char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
964 
965 	VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
966 
967 	VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
968 
969 	umem_free(packed, nvsize);
970 
971 	dump_nvlist(nv, 8);
972 
973 	nvlist_free(nv);
974 }
975 
976 static void
977 dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size)
978 {
979 	(void) os, (void) object, (void) size;
980 	spa_history_phys_t *shp = data;
981 
982 	if (shp == NULL)
983 		return;
984 
985 	(void) printf("\t\tpool_create_len = %llu\n",
986 	    (u_longlong_t)shp->sh_pool_create_len);
987 	(void) printf("\t\tphys_max_off = %llu\n",
988 	    (u_longlong_t)shp->sh_phys_max_off);
989 	(void) printf("\t\tbof = %llu\n",
990 	    (u_longlong_t)shp->sh_bof);
991 	(void) printf("\t\teof = %llu\n",
992 	    (u_longlong_t)shp->sh_eof);
993 	(void) printf("\t\trecords_lost = %llu\n",
994 	    (u_longlong_t)shp->sh_records_lost);
995 }
996 
997 static void
998 zdb_nicenum(uint64_t num, char *buf, size_t buflen)
999 {
1000 	if (dump_opt['P'])
1001 		(void) snprintf(buf, buflen, "%llu", (longlong_t)num);
1002 	else
1003 		nicenum(num, buf, buflen);
1004 }
1005 
1006 static void
1007 zdb_nicebytes(uint64_t bytes, char *buf, size_t buflen)
1008 {
1009 	if (dump_opt['P'])
1010 		(void) snprintf(buf, buflen, "%llu", (longlong_t)bytes);
1011 	else
1012 		zfs_nicebytes(bytes, buf, buflen);
1013 }
1014 
1015 static const char histo_stars[] = "****************************************";
1016 static const uint64_t histo_width = sizeof (histo_stars) - 1;
1017 
1018 static void
1019 dump_histogram(const uint64_t *histo, int size, int offset)
1020 {
1021 	int i;
1022 	int minidx = size - 1;
1023 	int maxidx = 0;
1024 	uint64_t max = 0;
1025 
1026 	for (i = 0; i < size; i++) {
1027 		if (histo[i] == 0)
1028 			continue;
1029 		if (histo[i] > max)
1030 			max = histo[i];
1031 		if (i > maxidx)
1032 			maxidx = i;
1033 		if (i < minidx)
1034 			minidx = i;
1035 	}
1036 
1037 	if (max < histo_width)
1038 		max = histo_width;
1039 
1040 	for (i = minidx; i <= maxidx; i++) {
1041 		(void) printf("\t\t\t%3u: %6llu %s\n",
1042 		    i + offset, (u_longlong_t)histo[i],
1043 		    &histo_stars[(max - histo[i]) * histo_width / max]);
1044 	}
1045 }
1046 
1047 static void
1048 dump_zap_stats(objset_t *os, uint64_t object)
1049 {
1050 	int error;
1051 	zap_stats_t zs;
1052 
1053 	error = zap_get_stats(os, object, &zs);
1054 	if (error)
1055 		return;
1056 
1057 	if (zs.zs_ptrtbl_len == 0) {
1058 		ASSERT(zs.zs_num_blocks == 1);
1059 		(void) printf("\tmicrozap: %llu bytes, %llu entries\n",
1060 		    (u_longlong_t)zs.zs_blocksize,
1061 		    (u_longlong_t)zs.zs_num_entries);
1062 		return;
1063 	}
1064 
1065 	(void) printf("\tFat ZAP stats:\n");
1066 
1067 	(void) printf("\t\tPointer table:\n");
1068 	(void) printf("\t\t\t%llu elements\n",
1069 	    (u_longlong_t)zs.zs_ptrtbl_len);
1070 	(void) printf("\t\t\tzt_blk: %llu\n",
1071 	    (u_longlong_t)zs.zs_ptrtbl_zt_blk);
1072 	(void) printf("\t\t\tzt_numblks: %llu\n",
1073 	    (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
1074 	(void) printf("\t\t\tzt_shift: %llu\n",
1075 	    (u_longlong_t)zs.zs_ptrtbl_zt_shift);
1076 	(void) printf("\t\t\tzt_blks_copied: %llu\n",
1077 	    (u_longlong_t)zs.zs_ptrtbl_blks_copied);
1078 	(void) printf("\t\t\tzt_nextblk: %llu\n",
1079 	    (u_longlong_t)zs.zs_ptrtbl_nextblk);
1080 
1081 	(void) printf("\t\tZAP entries: %llu\n",
1082 	    (u_longlong_t)zs.zs_num_entries);
1083 	(void) printf("\t\tLeaf blocks: %llu\n",
1084 	    (u_longlong_t)zs.zs_num_leafs);
1085 	(void) printf("\t\tTotal blocks: %llu\n",
1086 	    (u_longlong_t)zs.zs_num_blocks);
1087 	(void) printf("\t\tzap_block_type: 0x%llx\n",
1088 	    (u_longlong_t)zs.zs_block_type);
1089 	(void) printf("\t\tzap_magic: 0x%llx\n",
1090 	    (u_longlong_t)zs.zs_magic);
1091 	(void) printf("\t\tzap_salt: 0x%llx\n",
1092 	    (u_longlong_t)zs.zs_salt);
1093 
1094 	(void) printf("\t\tLeafs with 2^n pointers:\n");
1095 	dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0);
1096 
1097 	(void) printf("\t\tBlocks with n*5 entries:\n");
1098 	dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0);
1099 
1100 	(void) printf("\t\tBlocks n/10 full:\n");
1101 	dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0);
1102 
1103 	(void) printf("\t\tEntries with n chunks:\n");
1104 	dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0);
1105 
1106 	(void) printf("\t\tBuckets with n entries:\n");
1107 	dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0);
1108 }
1109 
1110 static void
1111 dump_none(objset_t *os, uint64_t object, void *data, size_t size)
1112 {
1113 	(void) os, (void) object, (void) data, (void) size;
1114 }
1115 
1116 static void
1117 dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
1118 {
1119 	(void) os, (void) object, (void) data, (void) size;
1120 	(void) printf("\tUNKNOWN OBJECT TYPE\n");
1121 }
1122 
1123 static void
1124 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
1125 {
1126 	(void) os, (void) object, (void) data, (void) size;
1127 }
1128 
1129 static void
1130 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
1131 {
1132 	uint64_t *arr;
1133 	uint64_t oursize;
1134 	if (dump_opt['d'] < 6)
1135 		return;
1136 
1137 	if (data == NULL) {
1138 		dmu_object_info_t doi;
1139 
1140 		VERIFY0(dmu_object_info(os, object, &doi));
1141 		size = doi.doi_max_offset;
1142 		/*
1143 		 * We cap the size at 1 mebibyte here to prevent
1144 		 * allocation failures and nigh-infinite printing if the
1145 		 * object is extremely large.
1146 		 */
1147 		oursize = MIN(size, 1 << 20);
1148 		arr = kmem_alloc(oursize, KM_SLEEP);
1149 
1150 		int err = dmu_read(os, object, 0, oursize, arr, 0);
1151 		if (err != 0) {
1152 			(void) printf("got error %u from dmu_read\n", err);
1153 			kmem_free(arr, oursize);
1154 			return;
1155 		}
1156 	} else {
1157 		/*
1158 		 * Even though the allocation is already done in this code path,
1159 		 * we still cap the size to prevent excessive printing.
1160 		 */
1161 		oursize = MIN(size, 1 << 20);
1162 		arr = data;
1163 	}
1164 
1165 	if (size == 0) {
1166 		if (data == NULL)
1167 			kmem_free(arr, oursize);
1168 		(void) printf("\t\t[]\n");
1169 		return;
1170 	}
1171 
1172 	(void) printf("\t\t[%0llx", (u_longlong_t)arr[0]);
1173 	for (size_t i = 1; i * sizeof (uint64_t) < oursize; i++) {
1174 		if (i % 4 != 0)
1175 			(void) printf(", %0llx", (u_longlong_t)arr[i]);
1176 		else
1177 			(void) printf(",\n\t\t%0llx", (u_longlong_t)arr[i]);
1178 	}
1179 	if (oursize != size)
1180 		(void) printf(", ... ");
1181 	(void) printf("]\n");
1182 
1183 	if (data == NULL)
1184 		kmem_free(arr, oursize);
1185 }
1186 
1187 static void
1188 dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
1189 {
1190 	(void) data, (void) size;
1191 	zap_cursor_t zc;
1192 	zap_attribute_t attr;
1193 	void *prop;
1194 	unsigned i;
1195 
1196 	dump_zap_stats(os, object);
1197 	(void) printf("\n");
1198 
1199 	for (zap_cursor_init(&zc, os, object);
1200 	    zap_cursor_retrieve(&zc, &attr) == 0;
1201 	    zap_cursor_advance(&zc)) {
1202 		(void) printf("\t\t%s = ", attr.za_name);
1203 		if (attr.za_num_integers == 0) {
1204 			(void) printf("\n");
1205 			continue;
1206 		}
1207 		prop = umem_zalloc(attr.za_num_integers *
1208 		    attr.za_integer_length, UMEM_NOFAIL);
1209 		(void) zap_lookup(os, object, attr.za_name,
1210 		    attr.za_integer_length, attr.za_num_integers, prop);
1211 		if (attr.za_integer_length == 1) {
1212 			if (strcmp(attr.za_name,
1213 			    DSL_CRYPTO_KEY_MASTER_KEY) == 0 ||
1214 			    strcmp(attr.za_name,
1215 			    DSL_CRYPTO_KEY_HMAC_KEY) == 0 ||
1216 			    strcmp(attr.za_name, DSL_CRYPTO_KEY_IV) == 0 ||
1217 			    strcmp(attr.za_name, DSL_CRYPTO_KEY_MAC) == 0 ||
1218 			    strcmp(attr.za_name, DMU_POOL_CHECKSUM_SALT) == 0) {
1219 				uint8_t *u8 = prop;
1220 
1221 				for (i = 0; i < attr.za_num_integers; i++) {
1222 					(void) printf("%02x", u8[i]);
1223 				}
1224 			} else {
1225 				(void) printf("%s", (char *)prop);
1226 			}
1227 		} else {
1228 			for (i = 0; i < attr.za_num_integers; i++) {
1229 				switch (attr.za_integer_length) {
1230 				case 2:
1231 					(void) printf("%u ",
1232 					    ((uint16_t *)prop)[i]);
1233 					break;
1234 				case 4:
1235 					(void) printf("%u ",
1236 					    ((uint32_t *)prop)[i]);
1237 					break;
1238 				case 8:
1239 					(void) printf("%lld ",
1240 					    (u_longlong_t)((int64_t *)prop)[i]);
1241 					break;
1242 				}
1243 			}
1244 		}
1245 		(void) printf("\n");
1246 		umem_free(prop, attr.za_num_integers * attr.za_integer_length);
1247 	}
1248 	zap_cursor_fini(&zc);
1249 }
1250 
1251 static void
1252 dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size)
1253 {
1254 	bpobj_phys_t *bpop = data;
1255 	uint64_t i;
1256 	char bytes[32], comp[32], uncomp[32];
1257 
1258 	/* make sure the output won't get truncated */
1259 	_Static_assert(sizeof (bytes) >= NN_NUMBUF_SZ, "bytes truncated");
1260 	_Static_assert(sizeof (comp) >= NN_NUMBUF_SZ, "comp truncated");
1261 	_Static_assert(sizeof (uncomp) >= NN_NUMBUF_SZ, "uncomp truncated");
1262 
1263 	if (bpop == NULL)
1264 		return;
1265 
1266 	zdb_nicenum(bpop->bpo_bytes, bytes, sizeof (bytes));
1267 	zdb_nicenum(bpop->bpo_comp, comp, sizeof (comp));
1268 	zdb_nicenum(bpop->bpo_uncomp, uncomp, sizeof (uncomp));
1269 
1270 	(void) printf("\t\tnum_blkptrs = %llu\n",
1271 	    (u_longlong_t)bpop->bpo_num_blkptrs);
1272 	(void) printf("\t\tbytes = %s\n", bytes);
1273 	if (size >= BPOBJ_SIZE_V1) {
1274 		(void) printf("\t\tcomp = %s\n", comp);
1275 		(void) printf("\t\tuncomp = %s\n", uncomp);
1276 	}
1277 	if (size >= BPOBJ_SIZE_V2) {
1278 		(void) printf("\t\tsubobjs = %llu\n",
1279 		    (u_longlong_t)bpop->bpo_subobjs);
1280 		(void) printf("\t\tnum_subobjs = %llu\n",
1281 		    (u_longlong_t)bpop->bpo_num_subobjs);
1282 	}
1283 	if (size >= sizeof (*bpop)) {
1284 		(void) printf("\t\tnum_freed = %llu\n",
1285 		    (u_longlong_t)bpop->bpo_num_freed);
1286 	}
1287 
1288 	if (dump_opt['d'] < 5)
1289 		return;
1290 
1291 	for (i = 0; i < bpop->bpo_num_blkptrs; i++) {
1292 		char blkbuf[BP_SPRINTF_LEN];
1293 		blkptr_t bp;
1294 
1295 		int err = dmu_read(os, object,
1296 		    i * sizeof (bp), sizeof (bp), &bp, 0);
1297 		if (err != 0) {
1298 			(void) printf("got error %u from dmu_read\n", err);
1299 			break;
1300 		}
1301 		snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp,
1302 		    BP_GET_FREE(&bp));
1303 		(void) printf("\t%s\n", blkbuf);
1304 	}
1305 }
1306 
1307 static void
1308 dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size)
1309 {
1310 	(void) data, (void) size;
1311 	dmu_object_info_t doi;
1312 	int64_t i;
1313 
1314 	VERIFY0(dmu_object_info(os, object, &doi));
1315 	uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP);
1316 
1317 	int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0);
1318 	if (err != 0) {
1319 		(void) printf("got error %u from dmu_read\n", err);
1320 		kmem_free(subobjs, doi.doi_max_offset);
1321 		return;
1322 	}
1323 
1324 	int64_t last_nonzero = -1;
1325 	for (i = 0; i < doi.doi_max_offset / 8; i++) {
1326 		if (subobjs[i] != 0)
1327 			last_nonzero = i;
1328 	}
1329 
1330 	for (i = 0; i <= last_nonzero; i++) {
1331 		(void) printf("\t%llu\n", (u_longlong_t)subobjs[i]);
1332 	}
1333 	kmem_free(subobjs, doi.doi_max_offset);
1334 }
1335 
1336 static void
1337 dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
1338 {
1339 	(void) data, (void) size;
1340 	dump_zap_stats(os, object);
1341 	/* contents are printed elsewhere, properly decoded */
1342 }
1343 
1344 static void
1345 dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
1346 {
1347 	(void) data, (void) size;
1348 	zap_cursor_t zc;
1349 	zap_attribute_t attr;
1350 
1351 	dump_zap_stats(os, object);
1352 	(void) printf("\n");
1353 
1354 	for (zap_cursor_init(&zc, os, object);
1355 	    zap_cursor_retrieve(&zc, &attr) == 0;
1356 	    zap_cursor_advance(&zc)) {
1357 		(void) printf("\t\t%s = ", attr.za_name);
1358 		if (attr.za_num_integers == 0) {
1359 			(void) printf("\n");
1360 			continue;
1361 		}
1362 		(void) printf(" %llx : [%d:%d:%d]\n",
1363 		    (u_longlong_t)attr.za_first_integer,
1364 		    (int)ATTR_LENGTH(attr.za_first_integer),
1365 		    (int)ATTR_BSWAP(attr.za_first_integer),
1366 		    (int)ATTR_NUM(attr.za_first_integer));
1367 	}
1368 	zap_cursor_fini(&zc);
1369 }
1370 
1371 static void
1372 dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
1373 {
1374 	(void) data, (void) size;
1375 	zap_cursor_t zc;
1376 	zap_attribute_t attr;
1377 	uint16_t *layout_attrs;
1378 	unsigned i;
1379 
1380 	dump_zap_stats(os, object);
1381 	(void) printf("\n");
1382 
1383 	for (zap_cursor_init(&zc, os, object);
1384 	    zap_cursor_retrieve(&zc, &attr) == 0;
1385 	    zap_cursor_advance(&zc)) {
1386 		(void) printf("\t\t%s = [", attr.za_name);
1387 		if (attr.za_num_integers == 0) {
1388 			(void) printf("\n");
1389 			continue;
1390 		}
1391 
1392 		VERIFY(attr.za_integer_length == 2);
1393 		layout_attrs = umem_zalloc(attr.za_num_integers *
1394 		    attr.za_integer_length, UMEM_NOFAIL);
1395 
1396 		VERIFY(zap_lookup(os, object, attr.za_name,
1397 		    attr.za_integer_length,
1398 		    attr.za_num_integers, layout_attrs) == 0);
1399 
1400 		for (i = 0; i != attr.za_num_integers; i++)
1401 			(void) printf(" %d ", (int)layout_attrs[i]);
1402 		(void) printf("]\n");
1403 		umem_free(layout_attrs,
1404 		    attr.za_num_integers * attr.za_integer_length);
1405 	}
1406 	zap_cursor_fini(&zc);
1407 }
1408 
1409 static void
1410 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
1411 {
1412 	(void) data, (void) size;
1413 	zap_cursor_t zc;
1414 	zap_attribute_t attr;
1415 	const char *typenames[] = {
1416 		/* 0 */ "not specified",
1417 		/* 1 */ "FIFO",
1418 		/* 2 */ "Character Device",
1419 		/* 3 */ "3 (invalid)",
1420 		/* 4 */ "Directory",
1421 		/* 5 */ "5 (invalid)",
1422 		/* 6 */ "Block Device",
1423 		/* 7 */ "7 (invalid)",
1424 		/* 8 */ "Regular File",
1425 		/* 9 */ "9 (invalid)",
1426 		/* 10 */ "Symbolic Link",
1427 		/* 11 */ "11 (invalid)",
1428 		/* 12 */ "Socket",
1429 		/* 13 */ "Door",
1430 		/* 14 */ "Event Port",
1431 		/* 15 */ "15 (invalid)",
1432 	};
1433 
1434 	dump_zap_stats(os, object);
1435 	(void) printf("\n");
1436 
1437 	for (zap_cursor_init(&zc, os, object);
1438 	    zap_cursor_retrieve(&zc, &attr) == 0;
1439 	    zap_cursor_advance(&zc)) {
1440 		(void) printf("\t\t%s = %lld (type: %s)\n",
1441 		    attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
1442 		    typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
1443 	}
1444 	zap_cursor_fini(&zc);
1445 }
1446 
1447 static int
1448 get_dtl_refcount(vdev_t *vd)
1449 {
1450 	int refcount = 0;
1451 
1452 	if (vd->vdev_ops->vdev_op_leaf) {
1453 		space_map_t *sm = vd->vdev_dtl_sm;
1454 
1455 		if (sm != NULL &&
1456 		    sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
1457 			return (1);
1458 		return (0);
1459 	}
1460 
1461 	for (unsigned c = 0; c < vd->vdev_children; c++)
1462 		refcount += get_dtl_refcount(vd->vdev_child[c]);
1463 	return (refcount);
1464 }
1465 
1466 static int
1467 get_metaslab_refcount(vdev_t *vd)
1468 {
1469 	int refcount = 0;
1470 
1471 	if (vd->vdev_top == vd) {
1472 		for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
1473 			space_map_t *sm = vd->vdev_ms[m]->ms_sm;
1474 
1475 			if (sm != NULL &&
1476 			    sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
1477 				refcount++;
1478 		}
1479 	}
1480 	for (unsigned c = 0; c < vd->vdev_children; c++)
1481 		refcount += get_metaslab_refcount(vd->vdev_child[c]);
1482 
1483 	return (refcount);
1484 }
1485 
1486 static int
1487 get_obsolete_refcount(vdev_t *vd)
1488 {
1489 	uint64_t obsolete_sm_object;
1490 	int refcount = 0;
1491 
1492 	VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
1493 	if (vd->vdev_top == vd && obsolete_sm_object != 0) {
1494 		dmu_object_info_t doi;
1495 		VERIFY0(dmu_object_info(vd->vdev_spa->spa_meta_objset,
1496 		    obsolete_sm_object, &doi));
1497 		if (doi.doi_bonus_size == sizeof (space_map_phys_t)) {
1498 			refcount++;
1499 		}
1500 	} else {
1501 		ASSERT3P(vd->vdev_obsolete_sm, ==, NULL);
1502 		ASSERT3U(obsolete_sm_object, ==, 0);
1503 	}
1504 	for (unsigned c = 0; c < vd->vdev_children; c++) {
1505 		refcount += get_obsolete_refcount(vd->vdev_child[c]);
1506 	}
1507 
1508 	return (refcount);
1509 }
1510 
1511 static int
1512 get_prev_obsolete_spacemap_refcount(spa_t *spa)
1513 {
1514 	uint64_t prev_obj =
1515 	    spa->spa_condensing_indirect_phys.scip_prev_obsolete_sm_object;
1516 	if (prev_obj != 0) {
1517 		dmu_object_info_t doi;
1518 		VERIFY0(dmu_object_info(spa->spa_meta_objset, prev_obj, &doi));
1519 		if (doi.doi_bonus_size == sizeof (space_map_phys_t)) {
1520 			return (1);
1521 		}
1522 	}
1523 	return (0);
1524 }
1525 
1526 static int
1527 get_checkpoint_refcount(vdev_t *vd)
1528 {
1529 	int refcount = 0;
1530 
1531 	if (vd->vdev_top == vd && vd->vdev_top_zap != 0 &&
1532 	    zap_contains(spa_meta_objset(vd->vdev_spa),
1533 	    vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) == 0)
1534 		refcount++;
1535 
1536 	for (uint64_t c = 0; c < vd->vdev_children; c++)
1537 		refcount += get_checkpoint_refcount(vd->vdev_child[c]);
1538 
1539 	return (refcount);
1540 }
1541 
1542 static int
1543 get_log_spacemap_refcount(spa_t *spa)
1544 {
1545 	return (avl_numnodes(&spa->spa_sm_logs_by_txg));
1546 }
1547 
1548 static int
1549 verify_spacemap_refcounts(spa_t *spa)
1550 {
1551 	uint64_t expected_refcount = 0;
1552 	uint64_t actual_refcount;
1553 
1554 	(void) feature_get_refcount(spa,
1555 	    &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM],
1556 	    &expected_refcount);
1557 	actual_refcount = get_dtl_refcount(spa->spa_root_vdev);
1558 	actual_refcount += get_metaslab_refcount(spa->spa_root_vdev);
1559 	actual_refcount += get_obsolete_refcount(spa->spa_root_vdev);
1560 	actual_refcount += get_prev_obsolete_spacemap_refcount(spa);
1561 	actual_refcount += get_checkpoint_refcount(spa->spa_root_vdev);
1562 	actual_refcount += get_log_spacemap_refcount(spa);
1563 
1564 	if (expected_refcount != actual_refcount) {
1565 		(void) printf("space map refcount mismatch: expected %lld != "
1566 		    "actual %lld\n",
1567 		    (longlong_t)expected_refcount,
1568 		    (longlong_t)actual_refcount);
1569 		return (2);
1570 	}
1571 	return (0);
1572 }
1573 
1574 static void
1575 dump_spacemap(objset_t *os, space_map_t *sm)
1576 {
1577 	const char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
1578 	    "INVALID", "INVALID", "INVALID", "INVALID" };
1579 
1580 	if (sm == NULL)
1581 		return;
1582 
1583 	(void) printf("space map object %llu:\n",
1584 	    (longlong_t)sm->sm_object);
1585 	(void) printf("  smp_length = 0x%llx\n",
1586 	    (longlong_t)sm->sm_phys->smp_length);
1587 	(void) printf("  smp_alloc = 0x%llx\n",
1588 	    (longlong_t)sm->sm_phys->smp_alloc);
1589 
1590 	if (dump_opt['d'] < 6 && dump_opt['m'] < 4)
1591 		return;
1592 
1593 	/*
1594 	 * Print out the freelist entries in both encoded and decoded form.
1595 	 */
1596 	uint8_t mapshift = sm->sm_shift;
1597 	int64_t alloc = 0;
1598 	uint64_t word, entry_id = 0;
1599 	for (uint64_t offset = 0; offset < space_map_length(sm);
1600 	    offset += sizeof (word)) {
1601 
1602 		VERIFY0(dmu_read(os, space_map_object(sm), offset,
1603 		    sizeof (word), &word, DMU_READ_PREFETCH));
1604 
1605 		if (sm_entry_is_debug(word)) {
1606 			uint64_t de_txg = SM_DEBUG_TXG_DECODE(word);
1607 			uint64_t de_sync_pass = SM_DEBUG_SYNCPASS_DECODE(word);
1608 			if (de_txg == 0) {
1609 				(void) printf(
1610 				    "\t    [%6llu] PADDING\n",
1611 				    (u_longlong_t)entry_id);
1612 			} else {
1613 				(void) printf(
1614 				    "\t    [%6llu] %s: txg %llu pass %llu\n",
1615 				    (u_longlong_t)entry_id,
1616 				    ddata[SM_DEBUG_ACTION_DECODE(word)],
1617 				    (u_longlong_t)de_txg,
1618 				    (u_longlong_t)de_sync_pass);
1619 			}
1620 			entry_id++;
1621 			continue;
1622 		}
1623 
1624 		uint8_t words;
1625 		char entry_type;
1626 		uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID;
1627 
1628 		if (sm_entry_is_single_word(word)) {
1629 			entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ?
1630 			    'A' : 'F';
1631 			entry_off = (SM_OFFSET_DECODE(word) << mapshift) +
1632 			    sm->sm_start;
1633 			entry_run = SM_RUN_DECODE(word) << mapshift;
1634 			words = 1;
1635 		} else {
1636 			/* it is a two-word entry so we read another word */
1637 			ASSERT(sm_entry_is_double_word(word));
1638 
1639 			uint64_t extra_word;
1640 			offset += sizeof (extra_word);
1641 			VERIFY0(dmu_read(os, space_map_object(sm), offset,
1642 			    sizeof (extra_word), &extra_word,
1643 			    DMU_READ_PREFETCH));
1644 
1645 			ASSERT3U(offset, <=, space_map_length(sm));
1646 
1647 			entry_run = SM2_RUN_DECODE(word) << mapshift;
1648 			entry_vdev = SM2_VDEV_DECODE(word);
1649 			entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ?
1650 			    'A' : 'F';
1651 			entry_off = (SM2_OFFSET_DECODE(extra_word) <<
1652 			    mapshift) + sm->sm_start;
1653 			words = 2;
1654 		}
1655 
1656 		(void) printf("\t    [%6llu]    %c  range:"
1657 		    " %010llx-%010llx  size: %06llx vdev: %06llu words: %u\n",
1658 		    (u_longlong_t)entry_id,
1659 		    entry_type, (u_longlong_t)entry_off,
1660 		    (u_longlong_t)(entry_off + entry_run),
1661 		    (u_longlong_t)entry_run,
1662 		    (u_longlong_t)entry_vdev, words);
1663 
1664 		if (entry_type == 'A')
1665 			alloc += entry_run;
1666 		else
1667 			alloc -= entry_run;
1668 		entry_id++;
1669 	}
1670 	if (alloc != space_map_allocated(sm)) {
1671 		(void) printf("space_map_object alloc (%lld) INCONSISTENT "
1672 		    "with space map summary (%lld)\n",
1673 		    (longlong_t)space_map_allocated(sm), (longlong_t)alloc);
1674 	}
1675 }
1676 
1677 static void
1678 dump_metaslab_stats(metaslab_t *msp)
1679 {
1680 	char maxbuf[32];
1681 	range_tree_t *rt = msp->ms_allocatable;
1682 	zfs_btree_t *t = &msp->ms_allocatable_by_size;
1683 	int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
1684 
1685 	/* max sure nicenum has enough space */
1686 	_Static_assert(sizeof (maxbuf) >= NN_NUMBUF_SZ, "maxbuf truncated");
1687 
1688 	zdb_nicenum(metaslab_largest_allocatable(msp), maxbuf, sizeof (maxbuf));
1689 
1690 	(void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
1691 	    "segments", zfs_btree_numnodes(t), "maxsize", maxbuf,
1692 	    "freepct", free_pct);
1693 	(void) printf("\tIn-memory histogram:\n");
1694 	dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
1695 }
1696 
1697 static void
1698 dump_metaslab(metaslab_t *msp)
1699 {
1700 	vdev_t *vd = msp->ms_group->mg_vd;
1701 	spa_t *spa = vd->vdev_spa;
1702 	space_map_t *sm = msp->ms_sm;
1703 	char freebuf[32];
1704 
1705 	zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf,
1706 	    sizeof (freebuf));
1707 
1708 	(void) printf(
1709 	    "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
1710 	    (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
1711 	    (u_longlong_t)space_map_object(sm), freebuf);
1712 
1713 	if (dump_opt['m'] > 2 && !dump_opt['L']) {
1714 		mutex_enter(&msp->ms_lock);
1715 		VERIFY0(metaslab_load(msp));
1716 		range_tree_stat_verify(msp->ms_allocatable);
1717 		dump_metaslab_stats(msp);
1718 		metaslab_unload(msp);
1719 		mutex_exit(&msp->ms_lock);
1720 	}
1721 
1722 	if (dump_opt['m'] > 1 && sm != NULL &&
1723 	    spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
1724 		/*
1725 		 * The space map histogram represents free space in chunks
1726 		 * of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
1727 		 */
1728 		(void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n",
1729 		    (u_longlong_t)msp->ms_fragmentation);
1730 		dump_histogram(sm->sm_phys->smp_histogram,
1731 		    SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
1732 	}
1733 
1734 	if (vd->vdev_ops == &vdev_draid_ops)
1735 		ASSERT3U(msp->ms_size, <=, 1ULL << vd->vdev_ms_shift);
1736 	else
1737 		ASSERT3U(msp->ms_size, ==, 1ULL << vd->vdev_ms_shift);
1738 
1739 	dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
1740 
1741 	if (spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) {
1742 		(void) printf("\tFlush data:\n\tunflushed txg=%llu\n\n",
1743 		    (u_longlong_t)metaslab_unflushed_txg(msp));
1744 	}
1745 }
1746 
1747 static void
1748 print_vdev_metaslab_header(vdev_t *vd)
1749 {
1750 	vdev_alloc_bias_t alloc_bias = vd->vdev_alloc_bias;
1751 	const char *bias_str = "";
1752 	if (alloc_bias == VDEV_BIAS_LOG || vd->vdev_islog) {
1753 		bias_str = VDEV_ALLOC_BIAS_LOG;
1754 	} else if (alloc_bias == VDEV_BIAS_SPECIAL) {
1755 		bias_str = VDEV_ALLOC_BIAS_SPECIAL;
1756 	} else if (alloc_bias == VDEV_BIAS_DEDUP) {
1757 		bias_str = VDEV_ALLOC_BIAS_DEDUP;
1758 	}
1759 
1760 	uint64_t ms_flush_data_obj = 0;
1761 	if (vd->vdev_top_zap != 0) {
1762 		int error = zap_lookup(spa_meta_objset(vd->vdev_spa),
1763 		    vd->vdev_top_zap, VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS,
1764 		    sizeof (uint64_t), 1, &ms_flush_data_obj);
1765 		if (error != ENOENT) {
1766 			ASSERT0(error);
1767 		}
1768 	}
1769 
1770 	(void) printf("\tvdev %10llu   %s",
1771 	    (u_longlong_t)vd->vdev_id, bias_str);
1772 
1773 	if (ms_flush_data_obj != 0) {
1774 		(void) printf("   ms_unflushed_phys object %llu",
1775 		    (u_longlong_t)ms_flush_data_obj);
1776 	}
1777 
1778 	(void) printf("\n\t%-10s%5llu   %-19s   %-15s   %-12s\n",
1779 	    "metaslabs", (u_longlong_t)vd->vdev_ms_count,
1780 	    "offset", "spacemap", "free");
1781 	(void) printf("\t%15s   %19s   %15s   %12s\n",
1782 	    "---------------", "-------------------",
1783 	    "---------------", "------------");
1784 }
1785 
1786 static void
1787 dump_metaslab_groups(spa_t *spa, boolean_t show_special)
1788 {
1789 	vdev_t *rvd = spa->spa_root_vdev;
1790 	metaslab_class_t *mc = spa_normal_class(spa);
1791 	metaslab_class_t *smc = spa_special_class(spa);
1792 	uint64_t fragmentation;
1793 
1794 	metaslab_class_histogram_verify(mc);
1795 
1796 	for (unsigned c = 0; c < rvd->vdev_children; c++) {
1797 		vdev_t *tvd = rvd->vdev_child[c];
1798 		metaslab_group_t *mg = tvd->vdev_mg;
1799 
1800 		if (mg == NULL || (mg->mg_class != mc &&
1801 		    (!show_special || mg->mg_class != smc)))
1802 			continue;
1803 
1804 		metaslab_group_histogram_verify(mg);
1805 		mg->mg_fragmentation = metaslab_group_fragmentation(mg);
1806 
1807 		(void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t"
1808 		    "fragmentation",
1809 		    (u_longlong_t)tvd->vdev_id,
1810 		    (u_longlong_t)tvd->vdev_ms_count);
1811 		if (mg->mg_fragmentation == ZFS_FRAG_INVALID) {
1812 			(void) printf("%3s\n", "-");
1813 		} else {
1814 			(void) printf("%3llu%%\n",
1815 			    (u_longlong_t)mg->mg_fragmentation);
1816 		}
1817 		dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
1818 	}
1819 
1820 	(void) printf("\tpool %s\tfragmentation", spa_name(spa));
1821 	fragmentation = metaslab_class_fragmentation(mc);
1822 	if (fragmentation == ZFS_FRAG_INVALID)
1823 		(void) printf("\t%3s\n", "-");
1824 	else
1825 		(void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation);
1826 	dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
1827 }
1828 
1829 static void
1830 print_vdev_indirect(vdev_t *vd)
1831 {
1832 	vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
1833 	vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
1834 	vdev_indirect_births_t *vib = vd->vdev_indirect_births;
1835 
1836 	if (vim == NULL) {
1837 		ASSERT3P(vib, ==, NULL);
1838 		return;
1839 	}
1840 
1841 	ASSERT3U(vdev_indirect_mapping_object(vim), ==,
1842 	    vic->vic_mapping_object);
1843 	ASSERT3U(vdev_indirect_births_object(vib), ==,
1844 	    vic->vic_births_object);
1845 
1846 	(void) printf("indirect births obj %llu:\n",
1847 	    (longlong_t)vic->vic_births_object);
1848 	(void) printf("    vib_count = %llu\n",
1849 	    (longlong_t)vdev_indirect_births_count(vib));
1850 	for (uint64_t i = 0; i < vdev_indirect_births_count(vib); i++) {
1851 		vdev_indirect_birth_entry_phys_t *cur_vibe =
1852 		    &vib->vib_entries[i];
1853 		(void) printf("\toffset %llx -> txg %llu\n",
1854 		    (longlong_t)cur_vibe->vibe_offset,
1855 		    (longlong_t)cur_vibe->vibe_phys_birth_txg);
1856 	}
1857 	(void) printf("\n");
1858 
1859 	(void) printf("indirect mapping obj %llu:\n",
1860 	    (longlong_t)vic->vic_mapping_object);
1861 	(void) printf("    vim_max_offset = 0x%llx\n",
1862 	    (longlong_t)vdev_indirect_mapping_max_offset(vim));
1863 	(void) printf("    vim_bytes_mapped = 0x%llx\n",
1864 	    (longlong_t)vdev_indirect_mapping_bytes_mapped(vim));
1865 	(void) printf("    vim_count = %llu\n",
1866 	    (longlong_t)vdev_indirect_mapping_num_entries(vim));
1867 
1868 	if (dump_opt['d'] <= 5 && dump_opt['m'] <= 3)
1869 		return;
1870 
1871 	uint32_t *counts = vdev_indirect_mapping_load_obsolete_counts(vim);
1872 
1873 	for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) {
1874 		vdev_indirect_mapping_entry_phys_t *vimep =
1875 		    &vim->vim_entries[i];
1876 		(void) printf("\t<%llx:%llx:%llx> -> "
1877 		    "<%llx:%llx:%llx> (%x obsolete)\n",
1878 		    (longlong_t)vd->vdev_id,
1879 		    (longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep),
1880 		    (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
1881 		    (longlong_t)DVA_GET_VDEV(&vimep->vimep_dst),
1882 		    (longlong_t)DVA_GET_OFFSET(&vimep->vimep_dst),
1883 		    (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
1884 		    counts[i]);
1885 	}
1886 	(void) printf("\n");
1887 
1888 	uint64_t obsolete_sm_object;
1889 	VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
1890 	if (obsolete_sm_object != 0) {
1891 		objset_t *mos = vd->vdev_spa->spa_meta_objset;
1892 		(void) printf("obsolete space map object %llu:\n",
1893 		    (u_longlong_t)obsolete_sm_object);
1894 		ASSERT(vd->vdev_obsolete_sm != NULL);
1895 		ASSERT3U(space_map_object(vd->vdev_obsolete_sm), ==,
1896 		    obsolete_sm_object);
1897 		dump_spacemap(mos, vd->vdev_obsolete_sm);
1898 		(void) printf("\n");
1899 	}
1900 }
1901 
1902 static void
1903 dump_metaslabs(spa_t *spa)
1904 {
1905 	vdev_t *vd, *rvd = spa->spa_root_vdev;
1906 	uint64_t m, c = 0, children = rvd->vdev_children;
1907 
1908 	(void) printf("\nMetaslabs:\n");
1909 
1910 	if (!dump_opt['d'] && zopt_metaslab_args > 0) {
1911 		c = zopt_metaslab[0];
1912 
1913 		if (c >= children)
1914 			(void) fatal("bad vdev id: %llu", (u_longlong_t)c);
1915 
1916 		if (zopt_metaslab_args > 1) {
1917 			vd = rvd->vdev_child[c];
1918 			print_vdev_metaslab_header(vd);
1919 
1920 			for (m = 1; m < zopt_metaslab_args; m++) {
1921 				if (zopt_metaslab[m] < vd->vdev_ms_count)
1922 					dump_metaslab(
1923 					    vd->vdev_ms[zopt_metaslab[m]]);
1924 				else
1925 					(void) fprintf(stderr, "bad metaslab "
1926 					    "number %llu\n",
1927 					    (u_longlong_t)zopt_metaslab[m]);
1928 			}
1929 			(void) printf("\n");
1930 			return;
1931 		}
1932 		children = c + 1;
1933 	}
1934 	for (; c < children; c++) {
1935 		vd = rvd->vdev_child[c];
1936 		print_vdev_metaslab_header(vd);
1937 
1938 		print_vdev_indirect(vd);
1939 
1940 		for (m = 0; m < vd->vdev_ms_count; m++)
1941 			dump_metaslab(vd->vdev_ms[m]);
1942 		(void) printf("\n");
1943 	}
1944 }
1945 
1946 static void
1947 dump_log_spacemaps(spa_t *spa)
1948 {
1949 	if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
1950 		return;
1951 
1952 	(void) printf("\nLog Space Maps in Pool:\n");
1953 	for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
1954 	    sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
1955 		space_map_t *sm = NULL;
1956 		VERIFY0(space_map_open(&sm, spa_meta_objset(spa),
1957 		    sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT));
1958 
1959 		(void) printf("Log Spacemap object %llu txg %llu\n",
1960 		    (u_longlong_t)sls->sls_sm_obj, (u_longlong_t)sls->sls_txg);
1961 		dump_spacemap(spa->spa_meta_objset, sm);
1962 		space_map_close(sm);
1963 	}
1964 	(void) printf("\n");
1965 }
1966 
1967 static void
1968 dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
1969 {
1970 	const ddt_phys_t *ddp = dde->dde_phys;
1971 	const ddt_key_t *ddk = &dde->dde_key;
1972 	const char *types[4] = { "ditto", "single", "double", "triple" };
1973 	char blkbuf[BP_SPRINTF_LEN];
1974 	blkptr_t blk;
1975 	int p;
1976 
1977 	for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
1978 		if (ddp->ddp_phys_birth == 0)
1979 			continue;
1980 		ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
1981 		snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
1982 		(void) printf("index %llx refcnt %llu %s %s\n",
1983 		    (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
1984 		    types[p], blkbuf);
1985 	}
1986 }
1987 
1988 static void
1989 dump_dedup_ratio(const ddt_stat_t *dds)
1990 {
1991 	double rL, rP, rD, D, dedup, compress, copies;
1992 
1993 	if (dds->dds_blocks == 0)
1994 		return;
1995 
1996 	rL = (double)dds->dds_ref_lsize;
1997 	rP = (double)dds->dds_ref_psize;
1998 	rD = (double)dds->dds_ref_dsize;
1999 	D = (double)dds->dds_dsize;
2000 
2001 	dedup = rD / D;
2002 	compress = rL / rP;
2003 	copies = rD / rP;
2004 
2005 	(void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
2006 	    "dedup * compress / copies = %.2f\n\n",
2007 	    dedup, compress, copies, dedup * compress / copies);
2008 }
2009 
2010 static void
2011 dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
2012 {
2013 	char name[DDT_NAMELEN];
2014 	ddt_entry_t dde;
2015 	uint64_t walk = 0;
2016 	dmu_object_info_t doi;
2017 	uint64_t count, dspace, mspace;
2018 	int error;
2019 
2020 	error = ddt_object_info(ddt, type, class, &doi);
2021 
2022 	if (error == ENOENT)
2023 		return;
2024 	ASSERT(error == 0);
2025 
2026 	error = ddt_object_count(ddt, type, class, &count);
2027 	ASSERT(error == 0);
2028 	if (count == 0)
2029 		return;
2030 
2031 	dspace = doi.doi_physical_blocks_512 << 9;
2032 	mspace = doi.doi_fill_count * doi.doi_data_block_size;
2033 
2034 	ddt_object_name(ddt, type, class, name);
2035 
2036 	(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
2037 	    name,
2038 	    (u_longlong_t)count,
2039 	    (u_longlong_t)(dspace / count),
2040 	    (u_longlong_t)(mspace / count));
2041 
2042 	if (dump_opt['D'] < 3)
2043 		return;
2044 
2045 	zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
2046 
2047 	if (dump_opt['D'] < 4)
2048 		return;
2049 
2050 	if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
2051 		return;
2052 
2053 	(void) printf("%s contents:\n\n", name);
2054 
2055 	while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
2056 		dump_dde(ddt, &dde, walk);
2057 
2058 	ASSERT3U(error, ==, ENOENT);
2059 
2060 	(void) printf("\n");
2061 }
2062 
2063 static void
2064 dump_all_ddts(spa_t *spa)
2065 {
2066 	ddt_histogram_t ddh_total = {{{0}}};
2067 	ddt_stat_t dds_total = {0};
2068 
2069 	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
2070 		ddt_t *ddt = spa->spa_ddt[c];
2071 		for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
2072 			for (enum ddt_class class = 0; class < DDT_CLASSES;
2073 			    class++) {
2074 				dump_ddt(ddt, type, class);
2075 			}
2076 		}
2077 	}
2078 
2079 	ddt_get_dedup_stats(spa, &dds_total);
2080 
2081 	if (dds_total.dds_blocks == 0) {
2082 		(void) printf("All DDTs are empty\n");
2083 		return;
2084 	}
2085 
2086 	(void) printf("\n");
2087 
2088 	if (dump_opt['D'] > 1) {
2089 		(void) printf("DDT histogram (aggregated over all DDTs):\n");
2090 		ddt_get_dedup_histogram(spa, &ddh_total);
2091 		zpool_dump_ddt(&dds_total, &ddh_total);
2092 	}
2093 
2094 	dump_dedup_ratio(&dds_total);
2095 }
2096 
2097 static void
2098 dump_brt(spa_t *spa)
2099 {
2100 	if (!spa_feature_is_enabled(spa, SPA_FEATURE_BLOCK_CLONING)) {
2101 		printf("BRT: unsupported on this pool\n");
2102 		return;
2103 	}
2104 
2105 	if (!spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) {
2106 		printf("BRT: empty\n");
2107 		return;
2108 	}
2109 
2110 	brt_t *brt = spa->spa_brt;
2111 	VERIFY(brt);
2112 
2113 	char count[32], used[32], saved[32];
2114 	zdb_nicebytes(brt_get_used(spa), used, sizeof (used));
2115 	zdb_nicebytes(brt_get_saved(spa), saved, sizeof (saved));
2116 	uint64_t ratio = brt_get_ratio(spa);
2117 	printf("BRT: used %s; saved %s; ratio %llu.%02llux\n", used, saved,
2118 	    (u_longlong_t)(ratio / 100), (u_longlong_t)(ratio % 100));
2119 
2120 	if (dump_opt['T'] < 2)
2121 		return;
2122 
2123 	for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
2124 		brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
2125 		if (brtvd == NULL)
2126 			continue;
2127 
2128 		if (!brtvd->bv_initiated) {
2129 			printf("BRT: vdev %" PRIu64 ": empty\n", vdevid);
2130 			continue;
2131 		}
2132 
2133 		zdb_nicenum(brtvd->bv_totalcount, count, sizeof (count));
2134 		zdb_nicebytes(brtvd->bv_usedspace, used, sizeof (used));
2135 		zdb_nicebytes(brtvd->bv_savedspace, saved, sizeof (saved));
2136 		printf("BRT: vdev %" PRIu64 ": refcnt %s; used %s; saved %s\n",
2137 		    vdevid, count, used, saved);
2138 	}
2139 
2140 	if (dump_opt['T'] < 3)
2141 		return;
2142 
2143 	char dva[64];
2144 	printf("\n%-16s %-10s\n", "DVA", "REFCNT");
2145 
2146 	for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
2147 		brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
2148 		if (brtvd == NULL || !brtvd->bv_initiated)
2149 			continue;
2150 
2151 		zap_cursor_t zc;
2152 		zap_attribute_t za;
2153 		for (zap_cursor_init(&zc, brt->brt_mos, brtvd->bv_mos_entries);
2154 		    zap_cursor_retrieve(&zc, &za) == 0;
2155 		    zap_cursor_advance(&zc)) {
2156 			uint64_t offset = *(uint64_t *)za.za_name;
2157 			uint64_t refcnt = za.za_first_integer;
2158 
2159 			snprintf(dva, sizeof (dva), "%" PRIu64 ":%llx", vdevid,
2160 			    (u_longlong_t)offset);
2161 			printf("%-16s %-10llu\n", dva, (u_longlong_t)refcnt);
2162 		}
2163 		zap_cursor_fini(&zc);
2164 	}
2165 }
2166 
2167 static void
2168 dump_dtl_seg(void *arg, uint64_t start, uint64_t size)
2169 {
2170 	char *prefix = arg;
2171 
2172 	(void) printf("%s [%llu,%llu) length %llu\n",
2173 	    prefix,
2174 	    (u_longlong_t)start,
2175 	    (u_longlong_t)(start + size),
2176 	    (u_longlong_t)(size));
2177 }
2178 
2179 static void
2180 dump_dtl(vdev_t *vd, int indent)
2181 {
2182 	spa_t *spa = vd->vdev_spa;
2183 	boolean_t required;
2184 	const char *name[DTL_TYPES] = { "missing", "partial", "scrub",
2185 		"outage" };
2186 	char prefix[256];
2187 
2188 	spa_vdev_state_enter(spa, SCL_NONE);
2189 	required = vdev_dtl_required(vd);
2190 	(void) spa_vdev_state_exit(spa, NULL, 0);
2191 
2192 	if (indent == 0)
2193 		(void) printf("\nDirty time logs:\n\n");
2194 
2195 	(void) printf("\t%*s%s [%s]\n", indent, "",
2196 	    vd->vdev_path ? vd->vdev_path :
2197 	    vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
2198 	    required ? "DTL-required" : "DTL-expendable");
2199 
2200 	for (int t = 0; t < DTL_TYPES; t++) {
2201 		range_tree_t *rt = vd->vdev_dtl[t];
2202 		if (range_tree_space(rt) == 0)
2203 			continue;
2204 		(void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
2205 		    indent + 2, "", name[t]);
2206 		range_tree_walk(rt, dump_dtl_seg, prefix);
2207 		if (dump_opt['d'] > 5 && vd->vdev_children == 0)
2208 			dump_spacemap(spa->spa_meta_objset,
2209 			    vd->vdev_dtl_sm);
2210 	}
2211 
2212 	for (unsigned c = 0; c < vd->vdev_children; c++)
2213 		dump_dtl(vd->vdev_child[c], indent + 4);
2214 }
2215 
2216 static void
2217 dump_history(spa_t *spa)
2218 {
2219 	nvlist_t **events = NULL;
2220 	char *buf;
2221 	uint64_t resid, len, off = 0;
2222 	uint_t num = 0;
2223 	int error;
2224 	char tbuf[30];
2225 
2226 	if ((buf = malloc(SPA_OLD_MAXBLOCKSIZE)) == NULL) {
2227 		(void) fprintf(stderr, "%s: unable to allocate I/O buffer\n",
2228 		    __func__);
2229 		return;
2230 	}
2231 
2232 	do {
2233 		len = SPA_OLD_MAXBLOCKSIZE;
2234 
2235 		if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
2236 			(void) fprintf(stderr, "Unable to read history: "
2237 			    "error %d\n", error);
2238 			free(buf);
2239 			return;
2240 		}
2241 
2242 		if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
2243 			break;
2244 
2245 		off -= resid;
2246 	} while (len != 0);
2247 
2248 	(void) printf("\nHistory:\n");
2249 	for (unsigned i = 0; i < num; i++) {
2250 		boolean_t printed = B_FALSE;
2251 
2252 		if (nvlist_exists(events[i], ZPOOL_HIST_TIME)) {
2253 			time_t tsec;
2254 			struct tm t;
2255 
2256 			tsec = fnvlist_lookup_uint64(events[i],
2257 			    ZPOOL_HIST_TIME);
2258 			(void) localtime_r(&tsec, &t);
2259 			(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
2260 		} else {
2261 			tbuf[0] = '\0';
2262 		}
2263 
2264 		if (nvlist_exists(events[i], ZPOOL_HIST_CMD)) {
2265 			(void) printf("%s %s\n", tbuf,
2266 			    fnvlist_lookup_string(events[i], ZPOOL_HIST_CMD));
2267 		} else if (nvlist_exists(events[i], ZPOOL_HIST_INT_EVENT)) {
2268 			uint64_t ievent;
2269 
2270 			ievent = fnvlist_lookup_uint64(events[i],
2271 			    ZPOOL_HIST_INT_EVENT);
2272 			if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS)
2273 				goto next;
2274 
2275 			(void) printf(" %s [internal %s txg:%ju] %s\n",
2276 			    tbuf,
2277 			    zfs_history_event_names[ievent],
2278 			    fnvlist_lookup_uint64(events[i],
2279 			    ZPOOL_HIST_TXG),
2280 			    fnvlist_lookup_string(events[i],
2281 			    ZPOOL_HIST_INT_STR));
2282 		} else if (nvlist_exists(events[i], ZPOOL_HIST_INT_NAME)) {
2283 			(void) printf("%s [txg:%ju] %s", tbuf,
2284 			    fnvlist_lookup_uint64(events[i],
2285 			    ZPOOL_HIST_TXG),
2286 			    fnvlist_lookup_string(events[i],
2287 			    ZPOOL_HIST_INT_NAME));
2288 
2289 			if (nvlist_exists(events[i], ZPOOL_HIST_DSNAME)) {
2290 				(void) printf(" %s (%llu)",
2291 				    fnvlist_lookup_string(events[i],
2292 				    ZPOOL_HIST_DSNAME),
2293 				    (u_longlong_t)fnvlist_lookup_uint64(
2294 				    events[i],
2295 				    ZPOOL_HIST_DSID));
2296 			}
2297 
2298 			(void) printf(" %s\n", fnvlist_lookup_string(events[i],
2299 			    ZPOOL_HIST_INT_STR));
2300 		} else if (nvlist_exists(events[i], ZPOOL_HIST_IOCTL)) {
2301 			(void) printf("%s ioctl %s\n", tbuf,
2302 			    fnvlist_lookup_string(events[i],
2303 			    ZPOOL_HIST_IOCTL));
2304 
2305 			if (nvlist_exists(events[i], ZPOOL_HIST_INPUT_NVL)) {
2306 				(void) printf("    input:\n");
2307 				dump_nvlist(fnvlist_lookup_nvlist(events[i],
2308 				    ZPOOL_HIST_INPUT_NVL), 8);
2309 			}
2310 			if (nvlist_exists(events[i], ZPOOL_HIST_OUTPUT_NVL)) {
2311 				(void) printf("    output:\n");
2312 				dump_nvlist(fnvlist_lookup_nvlist(events[i],
2313 				    ZPOOL_HIST_OUTPUT_NVL), 8);
2314 			}
2315 			if (nvlist_exists(events[i], ZPOOL_HIST_ERRNO)) {
2316 				(void) printf("    errno: %lld\n",
2317 				    (longlong_t)fnvlist_lookup_int64(events[i],
2318 				    ZPOOL_HIST_ERRNO));
2319 			}
2320 		} else {
2321 			goto next;
2322 		}
2323 
2324 		printed = B_TRUE;
2325 next:
2326 		if (dump_opt['h'] > 1) {
2327 			if (!printed)
2328 				(void) printf("unrecognized record:\n");
2329 			dump_nvlist(events[i], 2);
2330 		}
2331 	}
2332 	free(buf);
2333 }
2334 
2335 static void
2336 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
2337 {
2338 	(void) os, (void) object, (void) data, (void) size;
2339 }
2340 
2341 static uint64_t
2342 blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp,
2343     const zbookmark_phys_t *zb)
2344 {
2345 	if (dnp == NULL) {
2346 		ASSERT(zb->zb_level < 0);
2347 		if (zb->zb_object == 0)
2348 			return (zb->zb_blkid);
2349 		return (zb->zb_blkid * BP_GET_LSIZE(bp));
2350 	}
2351 
2352 	ASSERT(zb->zb_level >= 0);
2353 
2354 	return ((zb->zb_blkid <<
2355 	    (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
2356 	    dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
2357 }
2358 
2359 static void
2360 snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen,
2361     const blkptr_t *bp)
2362 {
2363 	static abd_t *pabd = NULL;
2364 	void *buf;
2365 	zio_t *zio;
2366 	zfs_zstdhdr_t zstd_hdr;
2367 	int error;
2368 
2369 	if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_ZSTD)
2370 		return;
2371 
2372 	if (BP_IS_HOLE(bp))
2373 		return;
2374 
2375 	if (BP_IS_EMBEDDED(bp)) {
2376 		buf = malloc(SPA_MAXBLOCKSIZE);
2377 		if (buf == NULL) {
2378 			(void) fprintf(stderr, "out of memory\n");
2379 			exit(1);
2380 		}
2381 		decode_embedded_bp_compressed(bp, buf);
2382 		memcpy(&zstd_hdr, buf, sizeof (zstd_hdr));
2383 		free(buf);
2384 		zstd_hdr.c_len = BE_32(zstd_hdr.c_len);
2385 		zstd_hdr.raw_version_level = BE_32(zstd_hdr.raw_version_level);
2386 		(void) snprintf(blkbuf + strlen(blkbuf),
2387 		    buflen - strlen(blkbuf),
2388 		    " ZSTD:size=%u:version=%u:level=%u:EMBEDDED",
2389 		    zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr),
2390 		    zfs_get_hdrlevel(&zstd_hdr));
2391 		return;
2392 	}
2393 
2394 	if (!pabd)
2395 		pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE);
2396 	zio = zio_root(spa, NULL, NULL, 0);
2397 
2398 	/* Decrypt but don't decompress so we can read the compression header */
2399 	zio_nowait(zio_read(zio, spa, bp, pabd, BP_GET_PSIZE(bp), NULL, NULL,
2400 	    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW_COMPRESS,
2401 	    NULL));
2402 	error = zio_wait(zio);
2403 	if (error) {
2404 		(void) fprintf(stderr, "read failed: %d\n", error);
2405 		return;
2406 	}
2407 	buf = abd_borrow_buf_copy(pabd, BP_GET_LSIZE(bp));
2408 	memcpy(&zstd_hdr, buf, sizeof (zstd_hdr));
2409 	zstd_hdr.c_len = BE_32(zstd_hdr.c_len);
2410 	zstd_hdr.raw_version_level = BE_32(zstd_hdr.raw_version_level);
2411 
2412 	(void) snprintf(blkbuf + strlen(blkbuf),
2413 	    buflen - strlen(blkbuf),
2414 	    " ZSTD:size=%u:version=%u:level=%u:NORMAL",
2415 	    zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr),
2416 	    zfs_get_hdrlevel(&zstd_hdr));
2417 
2418 	abd_return_buf_copy(pabd, buf, BP_GET_LSIZE(bp));
2419 }
2420 
2421 static void
2422 snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp,
2423     boolean_t bp_freed)
2424 {
2425 	const dva_t *dva = bp->blk_dva;
2426 	int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
2427 	int i;
2428 
2429 	if (dump_opt['b'] >= 6) {
2430 		snprintf_blkptr(blkbuf, buflen, bp);
2431 		if (bp_freed) {
2432 			(void) snprintf(blkbuf + strlen(blkbuf),
2433 			    buflen - strlen(blkbuf), " %s", "FREE");
2434 		}
2435 		return;
2436 	}
2437 
2438 	if (BP_IS_EMBEDDED(bp)) {
2439 		(void) sprintf(blkbuf,
2440 		    "EMBEDDED et=%u %llxL/%llxP B=%llu",
2441 		    (int)BPE_GET_ETYPE(bp),
2442 		    (u_longlong_t)BPE_GET_LSIZE(bp),
2443 		    (u_longlong_t)BPE_GET_PSIZE(bp),
2444 		    (u_longlong_t)bp->blk_birth);
2445 		return;
2446 	}
2447 
2448 	blkbuf[0] = '\0';
2449 
2450 	for (i = 0; i < ndvas; i++)
2451 		(void) snprintf(blkbuf + strlen(blkbuf),
2452 		    buflen - strlen(blkbuf), "%llu:%llx:%llx ",
2453 		    (u_longlong_t)DVA_GET_VDEV(&dva[i]),
2454 		    (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
2455 		    (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
2456 
2457 	if (BP_IS_HOLE(bp)) {
2458 		(void) snprintf(blkbuf + strlen(blkbuf),
2459 		    buflen - strlen(blkbuf),
2460 		    "%llxL B=%llu",
2461 		    (u_longlong_t)BP_GET_LSIZE(bp),
2462 		    (u_longlong_t)bp->blk_birth);
2463 	} else {
2464 		(void) snprintf(blkbuf + strlen(blkbuf),
2465 		    buflen - strlen(blkbuf),
2466 		    "%llxL/%llxP F=%llu B=%llu/%llu",
2467 		    (u_longlong_t)BP_GET_LSIZE(bp),
2468 		    (u_longlong_t)BP_GET_PSIZE(bp),
2469 		    (u_longlong_t)BP_GET_FILL(bp),
2470 		    (u_longlong_t)bp->blk_birth,
2471 		    (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
2472 		if (bp_freed)
2473 			(void) snprintf(blkbuf + strlen(blkbuf),
2474 			    buflen - strlen(blkbuf), " %s", "FREE");
2475 		(void) snprintf(blkbuf + strlen(blkbuf),
2476 		    buflen - strlen(blkbuf),
2477 		    " cksum=%016llx:%016llx:%016llx:%016llx",
2478 		    (u_longlong_t)bp->blk_cksum.zc_word[0],
2479 		    (u_longlong_t)bp->blk_cksum.zc_word[1],
2480 		    (u_longlong_t)bp->blk_cksum.zc_word[2],
2481 		    (u_longlong_t)bp->blk_cksum.zc_word[3]);
2482 	}
2483 }
2484 
2485 static void
2486 print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb,
2487     const dnode_phys_t *dnp)
2488 {
2489 	char blkbuf[BP_SPRINTF_LEN];
2490 	int l;
2491 
2492 	if (!BP_IS_EMBEDDED(bp)) {
2493 		ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
2494 		ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
2495 	}
2496 
2497 	(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
2498 
2499 	ASSERT(zb->zb_level >= 0);
2500 
2501 	for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
2502 		if (l == zb->zb_level) {
2503 			(void) printf("L%llx", (u_longlong_t)zb->zb_level);
2504 		} else {
2505 			(void) printf(" ");
2506 		}
2507 	}
2508 
2509 	snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, B_FALSE);
2510 	if (dump_opt['Z'] && BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD)
2511 		snprintf_zstd_header(spa, blkbuf, sizeof (blkbuf), bp);
2512 	(void) printf("%s\n", blkbuf);
2513 }
2514 
2515 static int
2516 visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
2517     blkptr_t *bp, const zbookmark_phys_t *zb)
2518 {
2519 	int err = 0;
2520 
2521 	if (bp->blk_birth == 0)
2522 		return (0);
2523 
2524 	print_indirect(spa, bp, zb, dnp);
2525 
2526 	if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
2527 		arc_flags_t flags = ARC_FLAG_WAIT;
2528 		int i;
2529 		blkptr_t *cbp;
2530 		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
2531 		arc_buf_t *buf;
2532 		uint64_t fill = 0;
2533 		ASSERT(!BP_IS_REDACTED(bp));
2534 
2535 		err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf,
2536 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
2537 		if (err)
2538 			return (err);
2539 		ASSERT(buf->b_data);
2540 
2541 		/* recursively visit blocks below this */
2542 		cbp = buf->b_data;
2543 		for (i = 0; i < epb; i++, cbp++) {
2544 			zbookmark_phys_t czb;
2545 
2546 			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
2547 			    zb->zb_level - 1,
2548 			    zb->zb_blkid * epb + i);
2549 			err = visit_indirect(spa, dnp, cbp, &czb);
2550 			if (err)
2551 				break;
2552 			fill += BP_GET_FILL(cbp);
2553 		}
2554 		if (!err)
2555 			ASSERT3U(fill, ==, BP_GET_FILL(bp));
2556 		arc_buf_destroy(buf, &buf);
2557 	}
2558 
2559 	return (err);
2560 }
2561 
2562 static void
2563 dump_indirect(dnode_t *dn)
2564 {
2565 	dnode_phys_t *dnp = dn->dn_phys;
2566 	zbookmark_phys_t czb;
2567 
2568 	(void) printf("Indirect blocks:\n");
2569 
2570 	SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
2571 	    dn->dn_object, dnp->dn_nlevels - 1, 0);
2572 	for (int j = 0; j < dnp->dn_nblkptr; j++) {
2573 		czb.zb_blkid = j;
2574 		(void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
2575 		    &dnp->dn_blkptr[j], &czb);
2576 	}
2577 
2578 	(void) printf("\n");
2579 }
2580 
2581 static void
2582 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
2583 {
2584 	(void) os, (void) object;
2585 	dsl_dir_phys_t *dd = data;
2586 	time_t crtime;
2587 	char nice[32];
2588 
2589 	/* make sure nicenum has enough space */
2590 	_Static_assert(sizeof (nice) >= NN_NUMBUF_SZ, "nice truncated");
2591 
2592 	if (dd == NULL)
2593 		return;
2594 
2595 	ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
2596 
2597 	crtime = dd->dd_creation_time;
2598 	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
2599 	(void) printf("\t\thead_dataset_obj = %llu\n",
2600 	    (u_longlong_t)dd->dd_head_dataset_obj);
2601 	(void) printf("\t\tparent_dir_obj = %llu\n",
2602 	    (u_longlong_t)dd->dd_parent_obj);
2603 	(void) printf("\t\torigin_obj = %llu\n",
2604 	    (u_longlong_t)dd->dd_origin_obj);
2605 	(void) printf("\t\tchild_dir_zapobj = %llu\n",
2606 	    (u_longlong_t)dd->dd_child_dir_zapobj);
2607 	zdb_nicenum(dd->dd_used_bytes, nice, sizeof (nice));
2608 	(void) printf("\t\tused_bytes = %s\n", nice);
2609 	zdb_nicenum(dd->dd_compressed_bytes, nice, sizeof (nice));
2610 	(void) printf("\t\tcompressed_bytes = %s\n", nice);
2611 	zdb_nicenum(dd->dd_uncompressed_bytes, nice, sizeof (nice));
2612 	(void) printf("\t\tuncompressed_bytes = %s\n", nice);
2613 	zdb_nicenum(dd->dd_quota, nice, sizeof (nice));
2614 	(void) printf("\t\tquota = %s\n", nice);
2615 	zdb_nicenum(dd->dd_reserved, nice, sizeof (nice));
2616 	(void) printf("\t\treserved = %s\n", nice);
2617 	(void) printf("\t\tprops_zapobj = %llu\n",
2618 	    (u_longlong_t)dd->dd_props_zapobj);
2619 	(void) printf("\t\tdeleg_zapobj = %llu\n",
2620 	    (u_longlong_t)dd->dd_deleg_zapobj);
2621 	(void) printf("\t\tflags = %llx\n",
2622 	    (u_longlong_t)dd->dd_flags);
2623 
2624 #define	DO(which) \
2625 	zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice, \
2626 	    sizeof (nice)); \
2627 	(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
2628 	DO(HEAD);
2629 	DO(SNAP);
2630 	DO(CHILD);
2631 	DO(CHILD_RSRV);
2632 	DO(REFRSRV);
2633 #undef DO
2634 	(void) printf("\t\tclones = %llu\n",
2635 	    (u_longlong_t)dd->dd_clones);
2636 }
2637 
2638 static void
2639 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
2640 {
2641 	(void) os, (void) object;
2642 	dsl_dataset_phys_t *ds = data;
2643 	time_t crtime;
2644 	char used[32], compressed[32], uncompressed[32], unique[32];
2645 	char blkbuf[BP_SPRINTF_LEN];
2646 
2647 	/* make sure nicenum has enough space */
2648 	_Static_assert(sizeof (used) >= NN_NUMBUF_SZ, "used truncated");
2649 	_Static_assert(sizeof (compressed) >= NN_NUMBUF_SZ,
2650 	    "compressed truncated");
2651 	_Static_assert(sizeof (uncompressed) >= NN_NUMBUF_SZ,
2652 	    "uncompressed truncated");
2653 	_Static_assert(sizeof (unique) >= NN_NUMBUF_SZ, "unique truncated");
2654 
2655 	if (ds == NULL)
2656 		return;
2657 
2658 	ASSERT(size == sizeof (*ds));
2659 	crtime = ds->ds_creation_time;
2660 	zdb_nicenum(ds->ds_referenced_bytes, used, sizeof (used));
2661 	zdb_nicenum(ds->ds_compressed_bytes, compressed, sizeof (compressed));
2662 	zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed,
2663 	    sizeof (uncompressed));
2664 	zdb_nicenum(ds->ds_unique_bytes, unique, sizeof (unique));
2665 	snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp);
2666 
2667 	(void) printf("\t\tdir_obj = %llu\n",
2668 	    (u_longlong_t)ds->ds_dir_obj);
2669 	(void) printf("\t\tprev_snap_obj = %llu\n",
2670 	    (u_longlong_t)ds->ds_prev_snap_obj);
2671 	(void) printf("\t\tprev_snap_txg = %llu\n",
2672 	    (u_longlong_t)ds->ds_prev_snap_txg);
2673 	(void) printf("\t\tnext_snap_obj = %llu\n",
2674 	    (u_longlong_t)ds->ds_next_snap_obj);
2675 	(void) printf("\t\tsnapnames_zapobj = %llu\n",
2676 	    (u_longlong_t)ds->ds_snapnames_zapobj);
2677 	(void) printf("\t\tnum_children = %llu\n",
2678 	    (u_longlong_t)ds->ds_num_children);
2679 	(void) printf("\t\tuserrefs_obj = %llu\n",
2680 	    (u_longlong_t)ds->ds_userrefs_obj);
2681 	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
2682 	(void) printf("\t\tcreation_txg = %llu\n",
2683 	    (u_longlong_t)ds->ds_creation_txg);
2684 	(void) printf("\t\tdeadlist_obj = %llu\n",
2685 	    (u_longlong_t)ds->ds_deadlist_obj);
2686 	(void) printf("\t\tused_bytes = %s\n", used);
2687 	(void) printf("\t\tcompressed_bytes = %s\n", compressed);
2688 	(void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
2689 	(void) printf("\t\tunique = %s\n", unique);
2690 	(void) printf("\t\tfsid_guid = %llu\n",
2691 	    (u_longlong_t)ds->ds_fsid_guid);
2692 	(void) printf("\t\tguid = %llu\n",
2693 	    (u_longlong_t)ds->ds_guid);
2694 	(void) printf("\t\tflags = %llx\n",
2695 	    (u_longlong_t)ds->ds_flags);
2696 	(void) printf("\t\tnext_clones_obj = %llu\n",
2697 	    (u_longlong_t)ds->ds_next_clones_obj);
2698 	(void) printf("\t\tprops_obj = %llu\n",
2699 	    (u_longlong_t)ds->ds_props_obj);
2700 	(void) printf("\t\tbp = %s\n", blkbuf);
2701 }
2702 
2703 static int
2704 dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
2705 {
2706 	(void) arg, (void) tx;
2707 	char blkbuf[BP_SPRINTF_LEN];
2708 
2709 	if (bp->blk_birth != 0) {
2710 		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2711 		(void) printf("\t%s\n", blkbuf);
2712 	}
2713 	return (0);
2714 }
2715 
2716 static void
2717 dump_bptree(objset_t *os, uint64_t obj, const char *name)
2718 {
2719 	char bytes[32];
2720 	bptree_phys_t *bt;
2721 	dmu_buf_t *db;
2722 
2723 	/* make sure nicenum has enough space */
2724 	_Static_assert(sizeof (bytes) >= NN_NUMBUF_SZ, "bytes truncated");
2725 
2726 	if (dump_opt['d'] < 3)
2727 		return;
2728 
2729 	VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
2730 	bt = db->db_data;
2731 	zdb_nicenum(bt->bt_bytes, bytes, sizeof (bytes));
2732 	(void) printf("\n    %s: %llu datasets, %s\n",
2733 	    name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
2734 	dmu_buf_rele(db, FTAG);
2735 
2736 	if (dump_opt['d'] < 5)
2737 		return;
2738 
2739 	(void) printf("\n");
2740 
2741 	(void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL);
2742 }
2743 
2744 static int
2745 dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx)
2746 {
2747 	(void) arg, (void) tx;
2748 	char blkbuf[BP_SPRINTF_LEN];
2749 
2750 	ASSERT(bp->blk_birth != 0);
2751 	snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, bp_freed);
2752 	(void) printf("\t%s\n", blkbuf);
2753 	return (0);
2754 }
2755 
2756 static void
2757 dump_full_bpobj(bpobj_t *bpo, const char *name, int indent)
2758 {
2759 	char bytes[32];
2760 	char comp[32];
2761 	char uncomp[32];
2762 	uint64_t i;
2763 
2764 	/* make sure nicenum has enough space */
2765 	_Static_assert(sizeof (bytes) >= NN_NUMBUF_SZ, "bytes truncated");
2766 	_Static_assert(sizeof (comp) >= NN_NUMBUF_SZ, "comp truncated");
2767 	_Static_assert(sizeof (uncomp) >= NN_NUMBUF_SZ, "uncomp truncated");
2768 
2769 	if (dump_opt['d'] < 3)
2770 		return;
2771 
2772 	zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes, sizeof (bytes));
2773 	if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
2774 		zdb_nicenum(bpo->bpo_phys->bpo_comp, comp, sizeof (comp));
2775 		zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp, sizeof (uncomp));
2776 		if (bpo->bpo_havefreed) {
2777 			(void) printf("    %*s: object %llu, %llu local "
2778 			    "blkptrs, %llu freed, %llu subobjs in object %llu, "
2779 			    "%s (%s/%s comp)\n",
2780 			    indent * 8, name,
2781 			    (u_longlong_t)bpo->bpo_object,
2782 			    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
2783 			    (u_longlong_t)bpo->bpo_phys->bpo_num_freed,
2784 			    (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
2785 			    (u_longlong_t)bpo->bpo_phys->bpo_subobjs,
2786 			    bytes, comp, uncomp);
2787 		} else {
2788 			(void) printf("    %*s: object %llu, %llu local "
2789 			    "blkptrs, %llu subobjs in object %llu, "
2790 			    "%s (%s/%s comp)\n",
2791 			    indent * 8, name,
2792 			    (u_longlong_t)bpo->bpo_object,
2793 			    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
2794 			    (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
2795 			    (u_longlong_t)bpo->bpo_phys->bpo_subobjs,
2796 			    bytes, comp, uncomp);
2797 		}
2798 
2799 		for (i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
2800 			uint64_t subobj;
2801 			bpobj_t subbpo;
2802 			int error;
2803 			VERIFY0(dmu_read(bpo->bpo_os,
2804 			    bpo->bpo_phys->bpo_subobjs,
2805 			    i * sizeof (subobj), sizeof (subobj), &subobj, 0));
2806 			error = bpobj_open(&subbpo, bpo->bpo_os, subobj);
2807 			if (error != 0) {
2808 				(void) printf("ERROR %u while trying to open "
2809 				    "subobj id %llu\n",
2810 				    error, (u_longlong_t)subobj);
2811 				continue;
2812 			}
2813 			dump_full_bpobj(&subbpo, "subobj", indent + 1);
2814 			bpobj_close(&subbpo);
2815 		}
2816 	} else {
2817 		if (bpo->bpo_havefreed) {
2818 			(void) printf("    %*s: object %llu, %llu blkptrs, "
2819 			    "%llu freed, %s\n",
2820 			    indent * 8, name,
2821 			    (u_longlong_t)bpo->bpo_object,
2822 			    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
2823 			    (u_longlong_t)bpo->bpo_phys->bpo_num_freed,
2824 			    bytes);
2825 		} else {
2826 			(void) printf("    %*s: object %llu, %llu blkptrs, "
2827 			    "%s\n",
2828 			    indent * 8, name,
2829 			    (u_longlong_t)bpo->bpo_object,
2830 			    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
2831 			    bytes);
2832 		}
2833 	}
2834 
2835 	if (dump_opt['d'] < 5)
2836 		return;
2837 
2838 
2839 	if (indent == 0) {
2840 		(void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
2841 		(void) printf("\n");
2842 	}
2843 }
2844 
2845 static int
2846 dump_bookmark(dsl_pool_t *dp, char *name, boolean_t print_redact,
2847     boolean_t print_list)
2848 {
2849 	int err = 0;
2850 	zfs_bookmark_phys_t prop;
2851 	objset_t *mos = dp->dp_spa->spa_meta_objset;
2852 	err = dsl_bookmark_lookup(dp, name, NULL, &prop);
2853 
2854 	if (err != 0) {
2855 		return (err);
2856 	}
2857 
2858 	(void) printf("\t#%s: ", strchr(name, '#') + 1);
2859 	(void) printf("{guid: %llx creation_txg: %llu creation_time: "
2860 	    "%llu redaction_obj: %llu}\n", (u_longlong_t)prop.zbm_guid,
2861 	    (u_longlong_t)prop.zbm_creation_txg,
2862 	    (u_longlong_t)prop.zbm_creation_time,
2863 	    (u_longlong_t)prop.zbm_redaction_obj);
2864 
2865 	IMPLY(print_list, print_redact);
2866 	if (!print_redact || prop.zbm_redaction_obj == 0)
2867 		return (0);
2868 
2869 	redaction_list_t *rl;
2870 	VERIFY0(dsl_redaction_list_hold_obj(dp,
2871 	    prop.zbm_redaction_obj, FTAG, &rl));
2872 
2873 	redaction_list_phys_t *rlp = rl->rl_phys;
2874 	(void) printf("\tRedacted:\n\t\tProgress: ");
2875 	if (rlp->rlp_last_object != UINT64_MAX ||
2876 	    rlp->rlp_last_blkid != UINT64_MAX) {
2877 		(void) printf("%llu %llu (incomplete)\n",
2878 		    (u_longlong_t)rlp->rlp_last_object,
2879 		    (u_longlong_t)rlp->rlp_last_blkid);
2880 	} else {
2881 		(void) printf("complete\n");
2882 	}
2883 	(void) printf("\t\tSnapshots: [");
2884 	for (unsigned int i = 0; i < rlp->rlp_num_snaps; i++) {
2885 		if (i > 0)
2886 			(void) printf(", ");
2887 		(void) printf("%0llu",
2888 		    (u_longlong_t)rlp->rlp_snaps[i]);
2889 	}
2890 	(void) printf("]\n\t\tLength: %llu\n",
2891 	    (u_longlong_t)rlp->rlp_num_entries);
2892 
2893 	if (!print_list) {
2894 		dsl_redaction_list_rele(rl, FTAG);
2895 		return (0);
2896 	}
2897 
2898 	if (rlp->rlp_num_entries == 0) {
2899 		dsl_redaction_list_rele(rl, FTAG);
2900 		(void) printf("\t\tRedaction List: []\n\n");
2901 		return (0);
2902 	}
2903 
2904 	redact_block_phys_t *rbp_buf;
2905 	uint64_t size;
2906 	dmu_object_info_t doi;
2907 
2908 	VERIFY0(dmu_object_info(mos, prop.zbm_redaction_obj, &doi));
2909 	size = doi.doi_max_offset;
2910 	rbp_buf = kmem_alloc(size, KM_SLEEP);
2911 
2912 	err = dmu_read(mos, prop.zbm_redaction_obj, 0, size,
2913 	    rbp_buf, 0);
2914 	if (err != 0) {
2915 		dsl_redaction_list_rele(rl, FTAG);
2916 		kmem_free(rbp_buf, size);
2917 		return (err);
2918 	}
2919 
2920 	(void) printf("\t\tRedaction List: [{object: %llx, offset: "
2921 	    "%llx, blksz: %x, count: %llx}",
2922 	    (u_longlong_t)rbp_buf[0].rbp_object,
2923 	    (u_longlong_t)rbp_buf[0].rbp_blkid,
2924 	    (uint_t)(redact_block_get_size(&rbp_buf[0])),
2925 	    (u_longlong_t)redact_block_get_count(&rbp_buf[0]));
2926 
2927 	for (size_t i = 1; i < rlp->rlp_num_entries; i++) {
2928 		(void) printf(",\n\t\t{object: %llx, offset: %llx, "
2929 		    "blksz: %x, count: %llx}",
2930 		    (u_longlong_t)rbp_buf[i].rbp_object,
2931 		    (u_longlong_t)rbp_buf[i].rbp_blkid,
2932 		    (uint_t)(redact_block_get_size(&rbp_buf[i])),
2933 		    (u_longlong_t)redact_block_get_count(&rbp_buf[i]));
2934 	}
2935 	dsl_redaction_list_rele(rl, FTAG);
2936 	kmem_free(rbp_buf, size);
2937 	(void) printf("]\n\n");
2938 	return (0);
2939 }
2940 
2941 static void
2942 dump_bookmarks(objset_t *os, int verbosity)
2943 {
2944 	zap_cursor_t zc;
2945 	zap_attribute_t attr;
2946 	dsl_dataset_t *ds = dmu_objset_ds(os);
2947 	dsl_pool_t *dp = spa_get_dsl(os->os_spa);
2948 	objset_t *mos = os->os_spa->spa_meta_objset;
2949 	if (verbosity < 4)
2950 		return;
2951 	dsl_pool_config_enter(dp, FTAG);
2952 
2953 	for (zap_cursor_init(&zc, mos, ds->ds_bookmarks_obj);
2954 	    zap_cursor_retrieve(&zc, &attr) == 0;
2955 	    zap_cursor_advance(&zc)) {
2956 		char osname[ZFS_MAX_DATASET_NAME_LEN];
2957 		char buf[ZFS_MAX_DATASET_NAME_LEN];
2958 		int len;
2959 		dmu_objset_name(os, osname);
2960 		len = snprintf(buf, sizeof (buf), "%s#%s", osname,
2961 		    attr.za_name);
2962 		VERIFY3S(len, <, ZFS_MAX_DATASET_NAME_LEN);
2963 		(void) dump_bookmark(dp, buf, verbosity >= 5, verbosity >= 6);
2964 	}
2965 	zap_cursor_fini(&zc);
2966 	dsl_pool_config_exit(dp, FTAG);
2967 }
2968 
2969 static void
2970 bpobj_count_refd(bpobj_t *bpo)
2971 {
2972 	mos_obj_refd(bpo->bpo_object);
2973 
2974 	if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
2975 		mos_obj_refd(bpo->bpo_phys->bpo_subobjs);
2976 		for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
2977 			uint64_t subobj;
2978 			bpobj_t subbpo;
2979 			int error;
2980 			VERIFY0(dmu_read(bpo->bpo_os,
2981 			    bpo->bpo_phys->bpo_subobjs,
2982 			    i * sizeof (subobj), sizeof (subobj), &subobj, 0));
2983 			error = bpobj_open(&subbpo, bpo->bpo_os, subobj);
2984 			if (error != 0) {
2985 				(void) printf("ERROR %u while trying to open "
2986 				    "subobj id %llu\n",
2987 				    error, (u_longlong_t)subobj);
2988 				continue;
2989 			}
2990 			bpobj_count_refd(&subbpo);
2991 			bpobj_close(&subbpo);
2992 		}
2993 	}
2994 }
2995 
2996 static int
2997 dsl_deadlist_entry_count_refd(void *arg, dsl_deadlist_entry_t *dle)
2998 {
2999 	spa_t *spa = arg;
3000 	uint64_t empty_bpobj = spa->spa_dsl_pool->dp_empty_bpobj;
3001 	if (dle->dle_bpobj.bpo_object != empty_bpobj)
3002 		bpobj_count_refd(&dle->dle_bpobj);
3003 	return (0);
3004 }
3005 
3006 static int
3007 dsl_deadlist_entry_dump(void *arg, dsl_deadlist_entry_t *dle)
3008 {
3009 	ASSERT(arg == NULL);
3010 	if (dump_opt['d'] >= 5) {
3011 		char buf[128];
3012 		(void) snprintf(buf, sizeof (buf),
3013 		    "mintxg %llu -> obj %llu",
3014 		    (longlong_t)dle->dle_mintxg,
3015 		    (longlong_t)dle->dle_bpobj.bpo_object);
3016 
3017 		dump_full_bpobj(&dle->dle_bpobj, buf, 0);
3018 	} else {
3019 		(void) printf("mintxg %llu -> obj %llu\n",
3020 		    (longlong_t)dle->dle_mintxg,
3021 		    (longlong_t)dle->dle_bpobj.bpo_object);
3022 	}
3023 	return (0);
3024 }
3025 
3026 static void
3027 dump_blkptr_list(dsl_deadlist_t *dl, const char *name)
3028 {
3029 	char bytes[32];
3030 	char comp[32];
3031 	char uncomp[32];
3032 	char entries[32];
3033 	spa_t *spa = dmu_objset_spa(dl->dl_os);
3034 	uint64_t empty_bpobj = spa->spa_dsl_pool->dp_empty_bpobj;
3035 
3036 	if (dl->dl_oldfmt) {
3037 		if (dl->dl_bpobj.bpo_object != empty_bpobj)
3038 			bpobj_count_refd(&dl->dl_bpobj);
3039 	} else {
3040 		mos_obj_refd(dl->dl_object);
3041 		dsl_deadlist_iterate(dl, dsl_deadlist_entry_count_refd, spa);
3042 	}
3043 
3044 	/* make sure nicenum has enough space */
3045 	_Static_assert(sizeof (bytes) >= NN_NUMBUF_SZ, "bytes truncated");
3046 	_Static_assert(sizeof (comp) >= NN_NUMBUF_SZ, "comp truncated");
3047 	_Static_assert(sizeof (uncomp) >= NN_NUMBUF_SZ, "uncomp truncated");
3048 	_Static_assert(sizeof (entries) >= NN_NUMBUF_SZ, "entries truncated");
3049 
3050 	if (dump_opt['d'] < 3)
3051 		return;
3052 
3053 	if (dl->dl_oldfmt) {
3054 		dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0);
3055 		return;
3056 	}
3057 
3058 	zdb_nicenum(dl->dl_phys->dl_used, bytes, sizeof (bytes));
3059 	zdb_nicenum(dl->dl_phys->dl_comp, comp, sizeof (comp));
3060 	zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp, sizeof (uncomp));
3061 	zdb_nicenum(avl_numnodes(&dl->dl_tree), entries, sizeof (entries));
3062 	(void) printf("\n    %s: %s (%s/%s comp), %s entries\n",
3063 	    name, bytes, comp, uncomp, entries);
3064 
3065 	if (dump_opt['d'] < 4)
3066 		return;
3067 
3068 	(void) putchar('\n');
3069 
3070 	dsl_deadlist_iterate(dl, dsl_deadlist_entry_dump, NULL);
3071 }
3072 
3073 static int
3074 verify_dd_livelist(objset_t *os)
3075 {
3076 	uint64_t ll_used, used, ll_comp, comp, ll_uncomp, uncomp;
3077 	dsl_pool_t *dp = spa_get_dsl(os->os_spa);
3078 	dsl_dir_t  *dd = os->os_dsl_dataset->ds_dir;
3079 
3080 	ASSERT(!dmu_objset_is_snapshot(os));
3081 	if (!dsl_deadlist_is_open(&dd->dd_livelist))
3082 		return (0);
3083 
3084 	/* Iterate through the livelist to check for duplicates */
3085 	dsl_deadlist_iterate(&dd->dd_livelist, sublivelist_verify_lightweight,
3086 	    NULL);
3087 
3088 	dsl_pool_config_enter(dp, FTAG);
3089 	dsl_deadlist_space(&dd->dd_livelist, &ll_used,
3090 	    &ll_comp, &ll_uncomp);
3091 
3092 	dsl_dataset_t *origin_ds;
3093 	ASSERT(dsl_pool_config_held(dp));
3094 	VERIFY0(dsl_dataset_hold_obj(dp,
3095 	    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin_ds));
3096 	VERIFY0(dsl_dataset_space_written(origin_ds, os->os_dsl_dataset,
3097 	    &used, &comp, &uncomp));
3098 	dsl_dataset_rele(origin_ds, FTAG);
3099 	dsl_pool_config_exit(dp, FTAG);
3100 	/*
3101 	 *  It's possible that the dataset's uncomp space is larger than the
3102 	 *  livelist's because livelists do not track embedded block pointers
3103 	 */
3104 	if (used != ll_used || comp != ll_comp || uncomp < ll_uncomp) {
3105 		char nice_used[32], nice_comp[32], nice_uncomp[32];
3106 		(void) printf("Discrepancy in space accounting:\n");
3107 		zdb_nicenum(used, nice_used, sizeof (nice_used));
3108 		zdb_nicenum(comp, nice_comp, sizeof (nice_comp));
3109 		zdb_nicenum(uncomp, nice_uncomp, sizeof (nice_uncomp));
3110 		(void) printf("dir: used %s, comp %s, uncomp %s\n",
3111 		    nice_used, nice_comp, nice_uncomp);
3112 		zdb_nicenum(ll_used, nice_used, sizeof (nice_used));
3113 		zdb_nicenum(ll_comp, nice_comp, sizeof (nice_comp));
3114 		zdb_nicenum(ll_uncomp, nice_uncomp, sizeof (nice_uncomp));
3115 		(void) printf("livelist: used %s, comp %s, uncomp %s\n",
3116 		    nice_used, nice_comp, nice_uncomp);
3117 		return (1);
3118 	}
3119 	return (0);
3120 }
3121 
3122 static char *key_material = NULL;
3123 
3124 static boolean_t
3125 zdb_derive_key(dsl_dir_t *dd, uint8_t *key_out)
3126 {
3127 	uint64_t keyformat, salt, iters;
3128 	int i;
3129 	unsigned char c;
3130 
3131 	VERIFY0(zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj,
3132 	    zfs_prop_to_name(ZFS_PROP_KEYFORMAT), sizeof (uint64_t),
3133 	    1, &keyformat));
3134 
3135 	switch (keyformat) {
3136 	case ZFS_KEYFORMAT_HEX:
3137 		for (i = 0; i < WRAPPING_KEY_LEN * 2; i += 2) {
3138 			if (!isxdigit(key_material[i]) ||
3139 			    !isxdigit(key_material[i+1]))
3140 				return (B_FALSE);
3141 			if (sscanf(&key_material[i], "%02hhx", &c) != 1)
3142 				return (B_FALSE);
3143 			key_out[i / 2] = c;
3144 		}
3145 		break;
3146 
3147 	case ZFS_KEYFORMAT_PASSPHRASE:
3148 		VERIFY0(zap_lookup(dd->dd_pool->dp_meta_objset,
3149 		    dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT),
3150 		    sizeof (uint64_t), 1, &salt));
3151 		VERIFY0(zap_lookup(dd->dd_pool->dp_meta_objset,
3152 		    dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS),
3153 		    sizeof (uint64_t), 1, &iters));
3154 
3155 		if (PKCS5_PBKDF2_HMAC_SHA1(key_material, strlen(key_material),
3156 		    ((uint8_t *)&salt), sizeof (uint64_t), iters,
3157 		    WRAPPING_KEY_LEN, key_out) != 1)
3158 			return (B_FALSE);
3159 
3160 		break;
3161 
3162 	default:
3163 		fatal("no support for key format %u\n",
3164 		    (unsigned int) keyformat);
3165 	}
3166 
3167 	return (B_TRUE);
3168 }
3169 
3170 static char encroot[ZFS_MAX_DATASET_NAME_LEN];
3171 static boolean_t key_loaded = B_FALSE;
3172 
3173 static void
3174 zdb_load_key(objset_t *os)
3175 {
3176 	dsl_pool_t *dp;
3177 	dsl_dir_t *dd, *rdd;
3178 	uint8_t key[WRAPPING_KEY_LEN];
3179 	uint64_t rddobj;
3180 	int err;
3181 
3182 	dp = spa_get_dsl(os->os_spa);
3183 	dd = os->os_dsl_dataset->ds_dir;
3184 
3185 	dsl_pool_config_enter(dp, FTAG);
3186 	VERIFY0(zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj,
3187 	    DSL_CRYPTO_KEY_ROOT_DDOBJ, sizeof (uint64_t), 1, &rddobj));
3188 	VERIFY0(dsl_dir_hold_obj(dd->dd_pool, rddobj, NULL, FTAG, &rdd));
3189 	dsl_dir_name(rdd, encroot);
3190 	dsl_dir_rele(rdd, FTAG);
3191 
3192 	if (!zdb_derive_key(dd, key))
3193 		fatal("couldn't derive encryption key");
3194 
3195 	dsl_pool_config_exit(dp, FTAG);
3196 
3197 	ASSERT3U(dsl_dataset_get_keystatus(dd), ==, ZFS_KEYSTATUS_UNAVAILABLE);
3198 
3199 	dsl_crypto_params_t *dcp;
3200 	nvlist_t *crypto_args;
3201 
3202 	crypto_args = fnvlist_alloc();
3203 	fnvlist_add_uint8_array(crypto_args, "wkeydata",
3204 	    (uint8_t *)key, WRAPPING_KEY_LEN);
3205 	VERIFY0(dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
3206 	    NULL, crypto_args, &dcp));
3207 	err = spa_keystore_load_wkey(encroot, dcp, B_FALSE);
3208 
3209 	dsl_crypto_params_free(dcp, (err != 0));
3210 	fnvlist_free(crypto_args);
3211 
3212 	if (err != 0)
3213 		fatal(
3214 		    "couldn't load encryption key for %s: %s",
3215 		    encroot, err == ZFS_ERR_CRYPTO_NOTSUP ?
3216 		    "crypto params not supported" : strerror(err));
3217 
3218 	ASSERT3U(dsl_dataset_get_keystatus(dd), ==, ZFS_KEYSTATUS_AVAILABLE);
3219 
3220 	printf("Unlocked encryption root: %s\n", encroot);
3221 	key_loaded = B_TRUE;
3222 }
3223 
3224 static void
3225 zdb_unload_key(void)
3226 {
3227 	if (!key_loaded)
3228 		return;
3229 
3230 	VERIFY0(spa_keystore_unload_wkey(encroot));
3231 	key_loaded = B_FALSE;
3232 }
3233 
3234 static avl_tree_t idx_tree;
3235 static avl_tree_t domain_tree;
3236 static boolean_t fuid_table_loaded;
3237 static objset_t *sa_os = NULL;
3238 static sa_attr_type_t *sa_attr_table = NULL;
3239 
3240 static int
3241 open_objset(const char *path, const void *tag, objset_t **osp)
3242 {
3243 	int err;
3244 	uint64_t sa_attrs = 0;
3245 	uint64_t version = 0;
3246 
3247 	VERIFY3P(sa_os, ==, NULL);
3248 
3249 	/*
3250 	 * We can't own an objset if it's redacted.  Therefore, we do this
3251 	 * dance: hold the objset, then acquire a long hold on its dataset, then
3252 	 * release the pool (which is held as part of holding the objset).
3253 	 */
3254 
3255 	if (dump_opt['K']) {
3256 		/* decryption requested, try to load keys */
3257 		err = dmu_objset_hold(path, tag, osp);
3258 		if (err != 0) {
3259 			(void) fprintf(stderr, "failed to hold dataset "
3260 			    "'%s': %s\n",
3261 			    path, strerror(err));
3262 			return (err);
3263 		}
3264 		dsl_dataset_long_hold(dmu_objset_ds(*osp), tag);
3265 		dsl_pool_rele(dmu_objset_pool(*osp), tag);
3266 
3267 		/* succeeds or dies */
3268 		zdb_load_key(*osp);
3269 
3270 		/* release it all */
3271 		dsl_dataset_long_rele(dmu_objset_ds(*osp), tag);
3272 		dsl_dataset_rele(dmu_objset_ds(*osp), tag);
3273 	}
3274 
3275 	int ds_hold_flags = key_loaded ? DS_HOLD_FLAG_DECRYPT : 0;
3276 
3277 	err = dmu_objset_hold_flags(path, ds_hold_flags, tag, osp);
3278 	if (err != 0) {
3279 		(void) fprintf(stderr, "failed to hold dataset '%s': %s\n",
3280 		    path, strerror(err));
3281 		return (err);
3282 	}
3283 	dsl_dataset_long_hold(dmu_objset_ds(*osp), tag);
3284 	dsl_pool_rele(dmu_objset_pool(*osp), tag);
3285 
3286 	if (dmu_objset_type(*osp) == DMU_OST_ZFS &&
3287 	    (key_loaded || !(*osp)->os_encrypted)) {
3288 		(void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR,
3289 		    8, 1, &version);
3290 		if (version >= ZPL_VERSION_SA) {
3291 			(void) zap_lookup(*osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
3292 			    8, 1, &sa_attrs);
3293 		}
3294 		err = sa_setup(*osp, sa_attrs, zfs_attr_table, ZPL_END,
3295 		    &sa_attr_table);
3296 		if (err != 0) {
3297 			(void) fprintf(stderr, "sa_setup failed: %s\n",
3298 			    strerror(err));
3299 			dsl_dataset_long_rele(dmu_objset_ds(*osp), tag);
3300 			dsl_dataset_rele_flags(dmu_objset_ds(*osp),
3301 			    ds_hold_flags, tag);
3302 			*osp = NULL;
3303 		}
3304 	}
3305 	sa_os = *osp;
3306 
3307 	return (err);
3308 }
3309 
3310 static void
3311 close_objset(objset_t *os, const void *tag)
3312 {
3313 	VERIFY3P(os, ==, sa_os);
3314 	if (os->os_sa != NULL)
3315 		sa_tear_down(os);
3316 	dsl_dataset_long_rele(dmu_objset_ds(os), tag);
3317 	dsl_dataset_rele_flags(dmu_objset_ds(os),
3318 	    key_loaded ? DS_HOLD_FLAG_DECRYPT : 0, tag);
3319 	sa_attr_table = NULL;
3320 	sa_os = NULL;
3321 
3322 	zdb_unload_key();
3323 }
3324 
3325 static void
3326 fuid_table_destroy(void)
3327 {
3328 	if (fuid_table_loaded) {
3329 		zfs_fuid_table_destroy(&idx_tree, &domain_tree);
3330 		fuid_table_loaded = B_FALSE;
3331 	}
3332 }
3333 
3334 /*
3335  * print uid or gid information.
3336  * For normal POSIX id just the id is printed in decimal format.
3337  * For CIFS files with FUID the fuid is printed in hex followed by
3338  * the domain-rid string.
3339  */
3340 static void
3341 print_idstr(uint64_t id, const char *id_type)
3342 {
3343 	if (FUID_INDEX(id)) {
3344 		const char *domain =
3345 		    zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
3346 		(void) printf("\t%s     %llx [%s-%d]\n", id_type,
3347 		    (u_longlong_t)id, domain, (int)FUID_RID(id));
3348 	} else {
3349 		(void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
3350 	}
3351 
3352 }
3353 
3354 static void
3355 dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
3356 {
3357 	uint32_t uid_idx, gid_idx;
3358 
3359 	uid_idx = FUID_INDEX(uid);
3360 	gid_idx = FUID_INDEX(gid);
3361 
3362 	/* Load domain table, if not already loaded */
3363 	if (!fuid_table_loaded && (uid_idx || gid_idx)) {
3364 		uint64_t fuid_obj;
3365 
3366 		/* first find the fuid object.  It lives in the master node */
3367 		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
3368 		    8, 1, &fuid_obj) == 0);
3369 		zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
3370 		(void) zfs_fuid_table_load(os, fuid_obj,
3371 		    &idx_tree, &domain_tree);
3372 		fuid_table_loaded = B_TRUE;
3373 	}
3374 
3375 	print_idstr(uid, "uid");
3376 	print_idstr(gid, "gid");
3377 }
3378 
3379 static void
3380 dump_znode_sa_xattr(sa_handle_t *hdl)
3381 {
3382 	nvlist_t *sa_xattr;
3383 	nvpair_t *elem = NULL;
3384 	int sa_xattr_size = 0;
3385 	int sa_xattr_entries = 0;
3386 	int error;
3387 	char *sa_xattr_packed;
3388 
3389 	error = sa_size(hdl, sa_attr_table[ZPL_DXATTR], &sa_xattr_size);
3390 	if (error || sa_xattr_size == 0)
3391 		return;
3392 
3393 	sa_xattr_packed = malloc(sa_xattr_size);
3394 	if (sa_xattr_packed == NULL)
3395 		return;
3396 
3397 	error = sa_lookup(hdl, sa_attr_table[ZPL_DXATTR],
3398 	    sa_xattr_packed, sa_xattr_size);
3399 	if (error) {
3400 		free(sa_xattr_packed);
3401 		return;
3402 	}
3403 
3404 	error = nvlist_unpack(sa_xattr_packed, sa_xattr_size, &sa_xattr, 0);
3405 	if (error) {
3406 		free(sa_xattr_packed);
3407 		return;
3408 	}
3409 
3410 	while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL)
3411 		sa_xattr_entries++;
3412 
3413 	(void) printf("\tSA xattrs: %d bytes, %d entries\n\n",
3414 	    sa_xattr_size, sa_xattr_entries);
3415 	while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) {
3416 		boolean_t can_print = !dump_opt['P'];
3417 		uchar_t *value;
3418 		uint_t cnt, idx;
3419 
3420 		(void) printf("\t\t%s = ", nvpair_name(elem));
3421 		nvpair_value_byte_array(elem, &value, &cnt);
3422 
3423 		for (idx = 0; idx < cnt; ++idx) {
3424 			if (!isprint(value[idx])) {
3425 				can_print = B_FALSE;
3426 				break;
3427 			}
3428 		}
3429 
3430 		for (idx = 0; idx < cnt; ++idx) {
3431 			if (can_print)
3432 				(void) putchar(value[idx]);
3433 			else
3434 				(void) printf("\\%3.3o", value[idx]);
3435 		}
3436 		(void) putchar('\n');
3437 	}
3438 
3439 	nvlist_free(sa_xattr);
3440 	free(sa_xattr_packed);
3441 }
3442 
3443 static void
3444 dump_znode_symlink(sa_handle_t *hdl)
3445 {
3446 	int sa_symlink_size = 0;
3447 	char linktarget[MAXPATHLEN];
3448 	int error;
3449 
3450 	error = sa_size(hdl, sa_attr_table[ZPL_SYMLINK], &sa_symlink_size);
3451 	if (error || sa_symlink_size == 0) {
3452 		return;
3453 	}
3454 	if (sa_symlink_size >= sizeof (linktarget)) {
3455 		(void) printf("symlink size %d is too large\n",
3456 		    sa_symlink_size);
3457 		return;
3458 	}
3459 	linktarget[sa_symlink_size] = '\0';
3460 	if (sa_lookup(hdl, sa_attr_table[ZPL_SYMLINK],
3461 	    &linktarget, sa_symlink_size) == 0)
3462 		(void) printf("\ttarget	%s\n", linktarget);
3463 }
3464 
3465 static void
3466 dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
3467 {
3468 	(void) data, (void) size;
3469 	char path[MAXPATHLEN * 2];	/* allow for xattr and failure prefix */
3470 	sa_handle_t *hdl;
3471 	uint64_t xattr, rdev, gen;
3472 	uint64_t uid, gid, mode, fsize, parent, links;
3473 	uint64_t pflags;
3474 	uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
3475 	time_t z_crtime, z_atime, z_mtime, z_ctime;
3476 	sa_bulk_attr_t bulk[12];
3477 	int idx = 0;
3478 	int error;
3479 
3480 	VERIFY3P(os, ==, sa_os);
3481 	if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
3482 		(void) printf("Failed to get handle for SA znode\n");
3483 		return;
3484 	}
3485 
3486 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
3487 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
3488 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
3489 	    &links, 8);
3490 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
3491 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
3492 	    &mode, 8);
3493 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
3494 	    NULL, &parent, 8);
3495 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
3496 	    &fsize, 8);
3497 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
3498 	    acctm, 16);
3499 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
3500 	    modtm, 16);
3501 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
3502 	    crtm, 16);
3503 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
3504 	    chgtm, 16);
3505 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
3506 	    &pflags, 8);
3507 
3508 	if (sa_bulk_lookup(hdl, bulk, idx)) {
3509 		(void) sa_handle_destroy(hdl);
3510 		return;
3511 	}
3512 
3513 	z_crtime = (time_t)crtm[0];
3514 	z_atime = (time_t)acctm[0];
3515 	z_mtime = (time_t)modtm[0];
3516 	z_ctime = (time_t)chgtm[0];
3517 
3518 	if (dump_opt['d'] > 4) {
3519 		error = zfs_obj_to_path(os, object, path, sizeof (path));
3520 		if (error == ESTALE) {
3521 			(void) snprintf(path, sizeof (path), "on delete queue");
3522 		} else if (error != 0) {
3523 			leaked_objects++;
3524 			(void) snprintf(path, sizeof (path),
3525 			    "path not found, possibly leaked");
3526 		}
3527 		(void) printf("\tpath	%s\n", path);
3528 	}
3529 
3530 	if (S_ISLNK(mode))
3531 		dump_znode_symlink(hdl);
3532 	dump_uidgid(os, uid, gid);
3533 	(void) printf("\tatime	%s", ctime(&z_atime));
3534 	(void) printf("\tmtime	%s", ctime(&z_mtime));
3535 	(void) printf("\tctime	%s", ctime(&z_ctime));
3536 	(void) printf("\tcrtime	%s", ctime(&z_crtime));
3537 	(void) printf("\tgen	%llu\n", (u_longlong_t)gen);
3538 	(void) printf("\tmode	%llo\n", (u_longlong_t)mode);
3539 	(void) printf("\tsize	%llu\n", (u_longlong_t)fsize);
3540 	(void) printf("\tparent	%llu\n", (u_longlong_t)parent);
3541 	(void) printf("\tlinks	%llu\n", (u_longlong_t)links);
3542 	(void) printf("\tpflags	%llx\n", (u_longlong_t)pflags);
3543 	if (dmu_objset_projectquota_enabled(os) && (pflags & ZFS_PROJID)) {
3544 		uint64_t projid;
3545 
3546 		if (sa_lookup(hdl, sa_attr_table[ZPL_PROJID], &projid,
3547 		    sizeof (uint64_t)) == 0)
3548 			(void) printf("\tprojid	%llu\n", (u_longlong_t)projid);
3549 	}
3550 	if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
3551 	    sizeof (uint64_t)) == 0)
3552 		(void) printf("\txattr	%llu\n", (u_longlong_t)xattr);
3553 	if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
3554 	    sizeof (uint64_t)) == 0)
3555 		(void) printf("\trdev	0x%016llx\n", (u_longlong_t)rdev);
3556 	dump_znode_sa_xattr(hdl);
3557 	sa_handle_destroy(hdl);
3558 }
3559 
3560 static void
3561 dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
3562 {
3563 	(void) os, (void) object, (void) data, (void) size;
3564 }
3565 
3566 static void
3567 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
3568 {
3569 	(void) os, (void) object, (void) data, (void) size;
3570 }
3571 
3572 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
3573 	dump_none,		/* unallocated			*/
3574 	dump_zap,		/* object directory		*/
3575 	dump_uint64,		/* object array			*/
3576 	dump_none,		/* packed nvlist		*/
3577 	dump_packed_nvlist,	/* packed nvlist size		*/
3578 	dump_none,		/* bpobj			*/
3579 	dump_bpobj,		/* bpobj header			*/
3580 	dump_none,		/* SPA space map header		*/
3581 	dump_none,		/* SPA space map		*/
3582 	dump_none,		/* ZIL intent log		*/
3583 	dump_dnode,		/* DMU dnode			*/
3584 	dump_dmu_objset,	/* DMU objset			*/
3585 	dump_dsl_dir,		/* DSL directory		*/
3586 	dump_zap,		/* DSL directory child map	*/
3587 	dump_zap,		/* DSL dataset snap map		*/
3588 	dump_zap,		/* DSL props			*/
3589 	dump_dsl_dataset,	/* DSL dataset			*/
3590 	dump_znode,		/* ZFS znode			*/
3591 	dump_acl,		/* ZFS V0 ACL			*/
3592 	dump_uint8,		/* ZFS plain file		*/
3593 	dump_zpldir,		/* ZFS directory		*/
3594 	dump_zap,		/* ZFS master node		*/
3595 	dump_zap,		/* ZFS delete queue		*/
3596 	dump_uint8,		/* zvol object			*/
3597 	dump_zap,		/* zvol prop			*/
3598 	dump_uint8,		/* other uint8[]		*/
3599 	dump_uint64,		/* other uint64[]		*/
3600 	dump_zap,		/* other ZAP			*/
3601 	dump_zap,		/* persistent error log		*/
3602 	dump_uint8,		/* SPA history			*/
3603 	dump_history_offsets,	/* SPA history offsets		*/
3604 	dump_zap,		/* Pool properties		*/
3605 	dump_zap,		/* DSL permissions		*/
3606 	dump_acl,		/* ZFS ACL			*/
3607 	dump_uint8,		/* ZFS SYSACL			*/
3608 	dump_none,		/* FUID nvlist			*/
3609 	dump_packed_nvlist,	/* FUID nvlist size		*/
3610 	dump_zap,		/* DSL dataset next clones	*/
3611 	dump_zap,		/* DSL scrub queue		*/
3612 	dump_zap,		/* ZFS user/group/project used	*/
3613 	dump_zap,		/* ZFS user/group/project quota	*/
3614 	dump_zap,		/* snapshot refcount tags	*/
3615 	dump_ddt_zap,		/* DDT ZAP object		*/
3616 	dump_zap,		/* DDT statistics		*/
3617 	dump_znode,		/* SA object			*/
3618 	dump_zap,		/* SA Master Node		*/
3619 	dump_sa_attrs,		/* SA attribute registration	*/
3620 	dump_sa_layouts,	/* SA attribute layouts		*/
3621 	dump_zap,		/* DSL scrub translations	*/
3622 	dump_none,		/* fake dedup BP		*/
3623 	dump_zap,		/* deadlist			*/
3624 	dump_none,		/* deadlist hdr			*/
3625 	dump_zap,		/* dsl clones			*/
3626 	dump_bpobj_subobjs,	/* bpobj subobjs		*/
3627 	dump_unknown,		/* Unknown type, must be last	*/
3628 };
3629 
3630 static boolean_t
3631 match_object_type(dmu_object_type_t obj_type, uint64_t flags)
3632 {
3633 	boolean_t match = B_TRUE;
3634 
3635 	switch (obj_type) {
3636 	case DMU_OT_DIRECTORY_CONTENTS:
3637 		if (!(flags & ZOR_FLAG_DIRECTORY))
3638 			match = B_FALSE;
3639 		break;
3640 	case DMU_OT_PLAIN_FILE_CONTENTS:
3641 		if (!(flags & ZOR_FLAG_PLAIN_FILE))
3642 			match = B_FALSE;
3643 		break;
3644 	case DMU_OT_SPACE_MAP:
3645 		if (!(flags & ZOR_FLAG_SPACE_MAP))
3646 			match = B_FALSE;
3647 		break;
3648 	default:
3649 		if (strcmp(zdb_ot_name(obj_type), "zap") == 0) {
3650 			if (!(flags & ZOR_FLAG_ZAP))
3651 				match = B_FALSE;
3652 			break;
3653 		}
3654 
3655 		/*
3656 		 * If all bits except some of the supported flags are
3657 		 * set, the user combined the all-types flag (A) with
3658 		 * a negated flag to exclude some types (e.g. A-f to
3659 		 * show all object types except plain files).
3660 		 */
3661 		if ((flags | ZOR_SUPPORTED_FLAGS) != ZOR_FLAG_ALL_TYPES)
3662 			match = B_FALSE;
3663 
3664 		break;
3665 	}
3666 
3667 	return (match);
3668 }
3669 
3670 static void
3671 dump_object(objset_t *os, uint64_t object, int verbosity,
3672     boolean_t *print_header, uint64_t *dnode_slots_used, uint64_t flags)
3673 {
3674 	dmu_buf_t *db = NULL;
3675 	dmu_object_info_t doi;
3676 	dnode_t *dn;
3677 	boolean_t dnode_held = B_FALSE;
3678 	void *bonus = NULL;
3679 	size_t bsize = 0;
3680 	char iblk[32], dblk[32], lsize[32], asize[32], fill[32], dnsize[32];
3681 	char bonus_size[32];
3682 	char aux[50];
3683 	int error;
3684 
3685 	/* make sure nicenum has enough space */
3686 	_Static_assert(sizeof (iblk) >= NN_NUMBUF_SZ, "iblk truncated");
3687 	_Static_assert(sizeof (dblk) >= NN_NUMBUF_SZ, "dblk truncated");
3688 	_Static_assert(sizeof (lsize) >= NN_NUMBUF_SZ, "lsize truncated");
3689 	_Static_assert(sizeof (asize) >= NN_NUMBUF_SZ, "asize truncated");
3690 	_Static_assert(sizeof (bonus_size) >= NN_NUMBUF_SZ,
3691 	    "bonus_size truncated");
3692 
3693 	if (*print_header) {
3694 		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %6s  %5s  %6s  %s\n",
3695 		    "Object", "lvl", "iblk", "dblk", "dsize", "dnsize",
3696 		    "lsize", "%full", "type");
3697 		*print_header = 0;
3698 	}
3699 
3700 	if (object == 0) {
3701 		dn = DMU_META_DNODE(os);
3702 		dmu_object_info_from_dnode(dn, &doi);
3703 	} else {
3704 		/*
3705 		 * Encrypted datasets will have sensitive bonus buffers
3706 		 * encrypted. Therefore we cannot hold the bonus buffer and
3707 		 * must hold the dnode itself instead.
3708 		 */
3709 		error = dmu_object_info(os, object, &doi);
3710 		if (error)
3711 			fatal("dmu_object_info() failed, errno %u", error);
3712 
3713 		if (!key_loaded && os->os_encrypted &&
3714 		    DMU_OT_IS_ENCRYPTED(doi.doi_bonus_type)) {
3715 			error = dnode_hold(os, object, FTAG, &dn);
3716 			if (error)
3717 				fatal("dnode_hold() failed, errno %u", error);
3718 			dnode_held = B_TRUE;
3719 		} else {
3720 			error = dmu_bonus_hold(os, object, FTAG, &db);
3721 			if (error)
3722 				fatal("dmu_bonus_hold(%llu) failed, errno %u",
3723 				    object, error);
3724 			bonus = db->db_data;
3725 			bsize = db->db_size;
3726 			dn = DB_DNODE((dmu_buf_impl_t *)db);
3727 		}
3728 	}
3729 
3730 	/*
3731 	 * Default to showing all object types if no flags were specified.
3732 	 */
3733 	if (flags != 0 && flags != ZOR_FLAG_ALL_TYPES &&
3734 	    !match_object_type(doi.doi_type, flags))
3735 		goto out;
3736 
3737 	if (dnode_slots_used)
3738 		*dnode_slots_used = doi.doi_dnodesize / DNODE_MIN_SIZE;
3739 
3740 	zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk));
3741 	zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk));
3742 	zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize));
3743 	zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof (asize));
3744 	zdb_nicenum(doi.doi_bonus_size, bonus_size, sizeof (bonus_size));
3745 	zdb_nicenum(doi.doi_dnodesize, dnsize, sizeof (dnsize));
3746 	(void) snprintf(fill, sizeof (fill), "%6.2f", 100.0 *
3747 	    doi.doi_fill_count * doi.doi_data_block_size / (object == 0 ?
3748 	    DNODES_PER_BLOCK : 1) / doi.doi_max_offset);
3749 
3750 	aux[0] = '\0';
3751 
3752 	if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
3753 		(void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux),
3754 		    " (K=%s)", ZDB_CHECKSUM_NAME(doi.doi_checksum));
3755 	}
3756 
3757 	if (doi.doi_compress == ZIO_COMPRESS_INHERIT &&
3758 	    ZIO_COMPRESS_HASLEVEL(os->os_compress) && verbosity >= 6) {
3759 		const char *compname = NULL;
3760 		if (zfs_prop_index_to_string(ZFS_PROP_COMPRESSION,
3761 		    ZIO_COMPRESS_RAW(os->os_compress, os->os_complevel),
3762 		    &compname) == 0) {
3763 			(void) snprintf(aux + strlen(aux),
3764 			    sizeof (aux) - strlen(aux), " (Z=inherit=%s)",
3765 			    compname);
3766 		} else {
3767 			(void) snprintf(aux + strlen(aux),
3768 			    sizeof (aux) - strlen(aux),
3769 			    " (Z=inherit=%s-unknown)",
3770 			    ZDB_COMPRESS_NAME(os->os_compress));
3771 		}
3772 	} else if (doi.doi_compress == ZIO_COMPRESS_INHERIT && verbosity >= 6) {
3773 		(void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux),
3774 		    " (Z=inherit=%s)", ZDB_COMPRESS_NAME(os->os_compress));
3775 	} else if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
3776 		(void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux),
3777 		    " (Z=%s)", ZDB_COMPRESS_NAME(doi.doi_compress));
3778 	}
3779 
3780 	(void) printf("%10lld  %3u  %5s  %5s  %5s  %6s  %5s  %6s  %s%s\n",
3781 	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
3782 	    asize, dnsize, lsize, fill, zdb_ot_name(doi.doi_type), aux);
3783 
3784 	if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
3785 		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %5s  %6s  %s\n",
3786 		    "", "", "", "", "", "", bonus_size, "bonus",
3787 		    zdb_ot_name(doi.doi_bonus_type));
3788 	}
3789 
3790 	if (verbosity >= 4) {
3791 		(void) printf("\tdnode flags: %s%s%s%s\n",
3792 		    (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
3793 		    "USED_BYTES " : "",
3794 		    (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
3795 		    "USERUSED_ACCOUNTED " : "",
3796 		    (dn->dn_phys->dn_flags & DNODE_FLAG_USEROBJUSED_ACCOUNTED) ?
3797 		    "USEROBJUSED_ACCOUNTED " : "",
3798 		    (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
3799 		    "SPILL_BLKPTR" : "");
3800 		(void) printf("\tdnode maxblkid: %llu\n",
3801 		    (longlong_t)dn->dn_phys->dn_maxblkid);
3802 
3803 		if (!dnode_held) {
3804 			object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os,
3805 			    object, bonus, bsize);
3806 		} else {
3807 			(void) printf("\t\t(bonus encrypted)\n");
3808 		}
3809 
3810 		if (key_loaded ||
3811 		    (!os->os_encrypted || !DMU_OT_IS_ENCRYPTED(doi.doi_type))) {
3812 			object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object,
3813 			    NULL, 0);
3814 		} else {
3815 			(void) printf("\t\t(object encrypted)\n");
3816 		}
3817 
3818 		*print_header = B_TRUE;
3819 	}
3820 
3821 	if (verbosity >= 5) {
3822 		if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
3823 			char blkbuf[BP_SPRINTF_LEN];
3824 			snprintf_blkptr_compact(blkbuf, sizeof (blkbuf),
3825 			    DN_SPILL_BLKPTR(dn->dn_phys), B_FALSE);
3826 			(void) printf("\nSpill block: %s\n", blkbuf);
3827 		}
3828 		dump_indirect(dn);
3829 	}
3830 
3831 	if (verbosity >= 5) {
3832 		/*
3833 		 * Report the list of segments that comprise the object.
3834 		 */
3835 		uint64_t start = 0;
3836 		uint64_t end;
3837 		uint64_t blkfill = 1;
3838 		int minlvl = 1;
3839 
3840 		if (dn->dn_type == DMU_OT_DNODE) {
3841 			minlvl = 0;
3842 			blkfill = DNODES_PER_BLOCK;
3843 		}
3844 
3845 		for (;;) {
3846 			char segsize[32];
3847 			/* make sure nicenum has enough space */
3848 			_Static_assert(sizeof (segsize) >= NN_NUMBUF_SZ,
3849 			    "segsize truncated");
3850 			error = dnode_next_offset(dn,
3851 			    0, &start, minlvl, blkfill, 0);
3852 			if (error)
3853 				break;
3854 			end = start;
3855 			error = dnode_next_offset(dn,
3856 			    DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
3857 			zdb_nicenum(end - start, segsize, sizeof (segsize));
3858 			(void) printf("\t\tsegment [%016llx, %016llx)"
3859 			    " size %5s\n", (u_longlong_t)start,
3860 			    (u_longlong_t)end, segsize);
3861 			if (error)
3862 				break;
3863 			start = end;
3864 		}
3865 	}
3866 
3867 out:
3868 	if (db != NULL)
3869 		dmu_buf_rele(db, FTAG);
3870 	if (dnode_held)
3871 		dnode_rele(dn, FTAG);
3872 }
3873 
3874 static void
3875 count_dir_mos_objects(dsl_dir_t *dd)
3876 {
3877 	mos_obj_refd(dd->dd_object);
3878 	mos_obj_refd(dsl_dir_phys(dd)->dd_child_dir_zapobj);
3879 	mos_obj_refd(dsl_dir_phys(dd)->dd_deleg_zapobj);
3880 	mos_obj_refd(dsl_dir_phys(dd)->dd_props_zapobj);
3881 	mos_obj_refd(dsl_dir_phys(dd)->dd_clones);
3882 
3883 	/*
3884 	 * The dd_crypto_obj can be referenced by multiple dsl_dir's.
3885 	 * Ignore the references after the first one.
3886 	 */
3887 	mos_obj_refd_multiple(dd->dd_crypto_obj);
3888 }
3889 
3890 static void
3891 count_ds_mos_objects(dsl_dataset_t *ds)
3892 {
3893 	mos_obj_refd(ds->ds_object);
3894 	mos_obj_refd(dsl_dataset_phys(ds)->ds_next_clones_obj);
3895 	mos_obj_refd(dsl_dataset_phys(ds)->ds_props_obj);
3896 	mos_obj_refd(dsl_dataset_phys(ds)->ds_userrefs_obj);
3897 	mos_obj_refd(dsl_dataset_phys(ds)->ds_snapnames_zapobj);
3898 	mos_obj_refd(ds->ds_bookmarks_obj);
3899 
3900 	if (!dsl_dataset_is_snapshot(ds)) {
3901 		count_dir_mos_objects(ds->ds_dir);
3902 	}
3903 }
3904 
3905 static const char *const objset_types[DMU_OST_NUMTYPES] = {
3906 	"NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
3907 
3908 /*
3909  * Parse a string denoting a range of object IDs of the form
3910  * <start>[:<end>[:flags]], and store the results in zor.
3911  * Return 0 on success. On error, return 1 and update the msg
3912  * pointer to point to a descriptive error message.
3913  */
3914 static int
3915 parse_object_range(char *range, zopt_object_range_t *zor, const char **msg)
3916 {
3917 	uint64_t flags = 0;
3918 	char *p, *s, *dup, *flagstr, *tmp = NULL;
3919 	size_t len;
3920 	int i;
3921 	int rc = 0;
3922 
3923 	if (strchr(range, ':') == NULL) {
3924 		zor->zor_obj_start = strtoull(range, &p, 0);
3925 		if (*p != '\0') {
3926 			*msg = "Invalid characters in object ID";
3927 			rc = 1;
3928 		}
3929 		zor->zor_obj_start = ZDB_MAP_OBJECT_ID(zor->zor_obj_start);
3930 		zor->zor_obj_end = zor->zor_obj_start;
3931 		return (rc);
3932 	}
3933 
3934 	if (strchr(range, ':') == range) {
3935 		*msg = "Invalid leading colon";
3936 		rc = 1;
3937 		return (rc);
3938 	}
3939 
3940 	len = strlen(range);
3941 	if (range[len - 1] == ':') {
3942 		*msg = "Invalid trailing colon";
3943 		rc = 1;
3944 		return (rc);
3945 	}
3946 
3947 	dup = strdup(range);
3948 	s = strtok_r(dup, ":", &tmp);
3949 	zor->zor_obj_start = strtoull(s, &p, 0);
3950 
3951 	if (*p != '\0') {
3952 		*msg = "Invalid characters in start object ID";
3953 		rc = 1;
3954 		goto out;
3955 	}
3956 
3957 	s = strtok_r(NULL, ":", &tmp);
3958 	zor->zor_obj_end = strtoull(s, &p, 0);
3959 
3960 	if (*p != '\0') {
3961 		*msg = "Invalid characters in end object ID";
3962 		rc = 1;
3963 		goto out;
3964 	}
3965 
3966 	if (zor->zor_obj_start > zor->zor_obj_end) {
3967 		*msg = "Start object ID may not exceed end object ID";
3968 		rc = 1;
3969 		goto out;
3970 	}
3971 
3972 	s = strtok_r(NULL, ":", &tmp);
3973 	if (s == NULL) {
3974 		zor->zor_flags = ZOR_FLAG_ALL_TYPES;
3975 		goto out;
3976 	} else if (strtok_r(NULL, ":", &tmp) != NULL) {
3977 		*msg = "Invalid colon-delimited field after flags";
3978 		rc = 1;
3979 		goto out;
3980 	}
3981 
3982 	flagstr = s;
3983 	for (i = 0; flagstr[i]; i++) {
3984 		int bit;
3985 		boolean_t negation = (flagstr[i] == '-');
3986 
3987 		if (negation) {
3988 			i++;
3989 			if (flagstr[i] == '\0') {
3990 				*msg = "Invalid trailing negation operator";
3991 				rc = 1;
3992 				goto out;
3993 			}
3994 		}
3995 		bit = flagbits[(uchar_t)flagstr[i]];
3996 		if (bit == 0) {
3997 			*msg = "Invalid flag";
3998 			rc = 1;
3999 			goto out;
4000 		}
4001 		if (negation)
4002 			flags &= ~bit;
4003 		else
4004 			flags |= bit;
4005 	}
4006 	zor->zor_flags = flags;
4007 
4008 	zor->zor_obj_start = ZDB_MAP_OBJECT_ID(zor->zor_obj_start);
4009 	zor->zor_obj_end = ZDB_MAP_OBJECT_ID(zor->zor_obj_end);
4010 
4011 out:
4012 	free(dup);
4013 	return (rc);
4014 }
4015 
4016 static void
4017 dump_objset(objset_t *os)
4018 {
4019 	dmu_objset_stats_t dds = { 0 };
4020 	uint64_t object, object_count;
4021 	uint64_t refdbytes, usedobjs, scratch;
4022 	char numbuf[32];
4023 	char blkbuf[BP_SPRINTF_LEN + 20];
4024 	char osname[ZFS_MAX_DATASET_NAME_LEN];
4025 	const char *type = "UNKNOWN";
4026 	int verbosity = dump_opt['d'];
4027 	boolean_t print_header;
4028 	unsigned i;
4029 	int error;
4030 	uint64_t total_slots_used = 0;
4031 	uint64_t max_slot_used = 0;
4032 	uint64_t dnode_slots;
4033 	uint64_t obj_start;
4034 	uint64_t obj_end;
4035 	uint64_t flags;
4036 
4037 	/* make sure nicenum has enough space */
4038 	_Static_assert(sizeof (numbuf) >= NN_NUMBUF_SZ, "numbuf truncated");
4039 
4040 	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
4041 	dmu_objset_fast_stat(os, &dds);
4042 	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
4043 
4044 	print_header = B_TRUE;
4045 
4046 	if (dds.dds_type < DMU_OST_NUMTYPES)
4047 		type = objset_types[dds.dds_type];
4048 
4049 	if (dds.dds_type == DMU_OST_META) {
4050 		dds.dds_creation_txg = TXG_INITIAL;
4051 		usedobjs = BP_GET_FILL(os->os_rootbp);
4052 		refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)->
4053 		    dd_used_bytes;
4054 	} else {
4055 		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
4056 	}
4057 
4058 	ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp));
4059 
4060 	zdb_nicenum(refdbytes, numbuf, sizeof (numbuf));
4061 
4062 	if (verbosity >= 4) {
4063 		(void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp ");
4064 		(void) snprintf_blkptr(blkbuf + strlen(blkbuf),
4065 		    sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp);
4066 	} else {
4067 		blkbuf[0] = '\0';
4068 	}
4069 
4070 	dmu_objset_name(os, osname);
4071 
4072 	(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
4073 	    "%s, %llu objects%s%s\n",
4074 	    osname, type, (u_longlong_t)dmu_objset_id(os),
4075 	    (u_longlong_t)dds.dds_creation_txg,
4076 	    numbuf, (u_longlong_t)usedobjs, blkbuf,
4077 	    (dds.dds_inconsistent) ? " (inconsistent)" : "");
4078 
4079 	for (i = 0; i < zopt_object_args; i++) {
4080 		obj_start = zopt_object_ranges[i].zor_obj_start;
4081 		obj_end = zopt_object_ranges[i].zor_obj_end;
4082 		flags = zopt_object_ranges[i].zor_flags;
4083 
4084 		object = obj_start;
4085 		if (object == 0 || obj_start == obj_end)
4086 			dump_object(os, object, verbosity, &print_header, NULL,
4087 			    flags);
4088 		else
4089 			object--;
4090 
4091 		while ((dmu_object_next(os, &object, B_FALSE, 0) == 0) &&
4092 		    object <= obj_end) {
4093 			dump_object(os, object, verbosity, &print_header, NULL,
4094 			    flags);
4095 		}
4096 	}
4097 
4098 	if (zopt_object_args > 0) {
4099 		(void) printf("\n");
4100 		return;
4101 	}
4102 
4103 	if (dump_opt['i'] != 0 || verbosity >= 2)
4104 		dump_intent_log(dmu_objset_zil(os));
4105 
4106 	if (dmu_objset_ds(os) != NULL) {
4107 		dsl_dataset_t *ds = dmu_objset_ds(os);
4108 		dump_blkptr_list(&ds->ds_deadlist, "Deadlist");
4109 		if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) &&
4110 		    !dmu_objset_is_snapshot(os)) {
4111 			dump_blkptr_list(&ds->ds_dir->dd_livelist, "Livelist");
4112 			if (verify_dd_livelist(os) != 0)
4113 				fatal("livelist is incorrect");
4114 		}
4115 
4116 		if (dsl_dataset_remap_deadlist_exists(ds)) {
4117 			(void) printf("ds_remap_deadlist:\n");
4118 			dump_blkptr_list(&ds->ds_remap_deadlist, "Deadlist");
4119 		}
4120 		count_ds_mos_objects(ds);
4121 	}
4122 
4123 	if (dmu_objset_ds(os) != NULL)
4124 		dump_bookmarks(os, verbosity);
4125 
4126 	if (verbosity < 2)
4127 		return;
4128 
4129 	if (BP_IS_HOLE(os->os_rootbp))
4130 		return;
4131 
4132 	dump_object(os, 0, verbosity, &print_header, NULL, 0);
4133 	object_count = 0;
4134 	if (DMU_USERUSED_DNODE(os) != NULL &&
4135 	    DMU_USERUSED_DNODE(os)->dn_type != 0) {
4136 		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header,
4137 		    NULL, 0);
4138 		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header,
4139 		    NULL, 0);
4140 	}
4141 
4142 	if (DMU_PROJECTUSED_DNODE(os) != NULL &&
4143 	    DMU_PROJECTUSED_DNODE(os)->dn_type != 0)
4144 		dump_object(os, DMU_PROJECTUSED_OBJECT, verbosity,
4145 		    &print_header, NULL, 0);
4146 
4147 	object = 0;
4148 	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
4149 		dump_object(os, object, verbosity, &print_header, &dnode_slots,
4150 		    0);
4151 		object_count++;
4152 		total_slots_used += dnode_slots;
4153 		max_slot_used = object + dnode_slots - 1;
4154 	}
4155 
4156 	(void) printf("\n");
4157 
4158 	(void) printf("    Dnode slots:\n");
4159 	(void) printf("\tTotal used:    %10llu\n",
4160 	    (u_longlong_t)total_slots_used);
4161 	(void) printf("\tMax used:      %10llu\n",
4162 	    (u_longlong_t)max_slot_used);
4163 	(void) printf("\tPercent empty: %10lf\n",
4164 	    (double)(max_slot_used - total_slots_used)*100 /
4165 	    (double)max_slot_used);
4166 	(void) printf("\n");
4167 
4168 	if (error != ESRCH) {
4169 		(void) fprintf(stderr, "dmu_object_next() = %d\n", error);
4170 		abort();
4171 	}
4172 
4173 	ASSERT3U(object_count, ==, usedobjs);
4174 
4175 	if (leaked_objects != 0) {
4176 		(void) printf("%d potentially leaked objects detected\n",
4177 		    leaked_objects);
4178 		leaked_objects = 0;
4179 	}
4180 }
4181 
4182 static void
4183 dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
4184 {
4185 	time_t timestamp = ub->ub_timestamp;
4186 
4187 	(void) printf("%s", header ? header : "");
4188 	(void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
4189 	(void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
4190 	(void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
4191 	(void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
4192 	(void) printf("\ttimestamp = %llu UTC = %s",
4193 	    (u_longlong_t)ub->ub_timestamp, ctime(&timestamp));
4194 
4195 	(void) printf("\tmmp_magic = %016llx\n",
4196 	    (u_longlong_t)ub->ub_mmp_magic);
4197 	if (MMP_VALID(ub)) {
4198 		(void) printf("\tmmp_delay = %0llu\n",
4199 		    (u_longlong_t)ub->ub_mmp_delay);
4200 		if (MMP_SEQ_VALID(ub))
4201 			(void) printf("\tmmp_seq = %u\n",
4202 			    (unsigned int) MMP_SEQ(ub));
4203 		if (MMP_FAIL_INT_VALID(ub))
4204 			(void) printf("\tmmp_fail = %u\n",
4205 			    (unsigned int) MMP_FAIL_INT(ub));
4206 		if (MMP_INTERVAL_VALID(ub))
4207 			(void) printf("\tmmp_write = %u\n",
4208 			    (unsigned int) MMP_INTERVAL(ub));
4209 		/* After MMP_* to make summarize_uberblock_mmp cleaner */
4210 		(void) printf("\tmmp_valid = %x\n",
4211 		    (unsigned int) ub->ub_mmp_config & 0xFF);
4212 	}
4213 
4214 	if (dump_opt['u'] >= 4) {
4215 		char blkbuf[BP_SPRINTF_LEN];
4216 		snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
4217 		(void) printf("\trootbp = %s\n", blkbuf);
4218 	}
4219 	(void) printf("\tcheckpoint_txg = %llu\n",
4220 	    (u_longlong_t)ub->ub_checkpoint_txg);
4221 
4222 	(void) printf("\traidz_reflow state=%u off=%llu\n",
4223 	    (int)RRSS_GET_STATE(ub),
4224 	    (u_longlong_t)RRSS_GET_OFFSET(ub));
4225 
4226 	(void) printf("%s", footer ? footer : "");
4227 }
4228 
4229 static void
4230 dump_config(spa_t *spa)
4231 {
4232 	dmu_buf_t *db;
4233 	size_t nvsize = 0;
4234 	int error = 0;
4235 
4236 
4237 	error = dmu_bonus_hold(spa->spa_meta_objset,
4238 	    spa->spa_config_object, FTAG, &db);
4239 
4240 	if (error == 0) {
4241 		nvsize = *(uint64_t *)db->db_data;
4242 		dmu_buf_rele(db, FTAG);
4243 
4244 		(void) printf("\nMOS Configuration:\n");
4245 		dump_packed_nvlist(spa->spa_meta_objset,
4246 		    spa->spa_config_object, (void *)&nvsize, 1);
4247 	} else {
4248 		(void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
4249 		    (u_longlong_t)spa->spa_config_object, error);
4250 	}
4251 }
4252 
4253 static void
4254 dump_cachefile(const char *cachefile)
4255 {
4256 	int fd;
4257 	struct stat64 statbuf;
4258 	char *buf;
4259 	nvlist_t *config;
4260 
4261 	if ((fd = open64(cachefile, O_RDONLY)) < 0) {
4262 		(void) printf("cannot open '%s': %s\n", cachefile,
4263 		    strerror(errno));
4264 		exit(1);
4265 	}
4266 
4267 	if (fstat64(fd, &statbuf) != 0) {
4268 		(void) printf("failed to stat '%s': %s\n", cachefile,
4269 		    strerror(errno));
4270 		exit(1);
4271 	}
4272 
4273 	if ((buf = malloc(statbuf.st_size)) == NULL) {
4274 		(void) fprintf(stderr, "failed to allocate %llu bytes\n",
4275 		    (u_longlong_t)statbuf.st_size);
4276 		exit(1);
4277 	}
4278 
4279 	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
4280 		(void) fprintf(stderr, "failed to read %llu bytes\n",
4281 		    (u_longlong_t)statbuf.st_size);
4282 		exit(1);
4283 	}
4284 
4285 	(void) close(fd);
4286 
4287 	if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
4288 		(void) fprintf(stderr, "failed to unpack nvlist\n");
4289 		exit(1);
4290 	}
4291 
4292 	free(buf);
4293 
4294 	dump_nvlist(config, 0);
4295 
4296 	nvlist_free(config);
4297 }
4298 
4299 /*
4300  * ZFS label nvlist stats
4301  */
4302 typedef struct zdb_nvl_stats {
4303 	int		zns_list_count;
4304 	int		zns_leaf_count;
4305 	size_t		zns_leaf_largest;
4306 	size_t		zns_leaf_total;
4307 	nvlist_t	*zns_string;
4308 	nvlist_t	*zns_uint64;
4309 	nvlist_t	*zns_boolean;
4310 } zdb_nvl_stats_t;
4311 
4312 static void
4313 collect_nvlist_stats(nvlist_t *nvl, zdb_nvl_stats_t *stats)
4314 {
4315 	nvlist_t *list, **array;
4316 	nvpair_t *nvp = NULL;
4317 	const char *name;
4318 	uint_t i, items;
4319 
4320 	stats->zns_list_count++;
4321 
4322 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
4323 		name = nvpair_name(nvp);
4324 
4325 		switch (nvpair_type(nvp)) {
4326 		case DATA_TYPE_STRING:
4327 			fnvlist_add_string(stats->zns_string, name,
4328 			    fnvpair_value_string(nvp));
4329 			break;
4330 		case DATA_TYPE_UINT64:
4331 			fnvlist_add_uint64(stats->zns_uint64, name,
4332 			    fnvpair_value_uint64(nvp));
4333 			break;
4334 		case DATA_TYPE_BOOLEAN:
4335 			fnvlist_add_boolean(stats->zns_boolean, name);
4336 			break;
4337 		case DATA_TYPE_NVLIST:
4338 			if (nvpair_value_nvlist(nvp, &list) == 0)
4339 				collect_nvlist_stats(list, stats);
4340 			break;
4341 		case DATA_TYPE_NVLIST_ARRAY:
4342 			if (nvpair_value_nvlist_array(nvp, &array, &items) != 0)
4343 				break;
4344 
4345 			for (i = 0; i < items; i++) {
4346 				collect_nvlist_stats(array[i], stats);
4347 
4348 				/* collect stats on leaf vdev */
4349 				if (strcmp(name, "children") == 0) {
4350 					size_t size;
4351 
4352 					(void) nvlist_size(array[i], &size,
4353 					    NV_ENCODE_XDR);
4354 					stats->zns_leaf_total += size;
4355 					if (size > stats->zns_leaf_largest)
4356 						stats->zns_leaf_largest = size;
4357 					stats->zns_leaf_count++;
4358 				}
4359 			}
4360 			break;
4361 		default:
4362 			(void) printf("skip type %d!\n", (int)nvpair_type(nvp));
4363 		}
4364 	}
4365 }
4366 
4367 static void
4368 dump_nvlist_stats(nvlist_t *nvl, size_t cap)
4369 {
4370 	zdb_nvl_stats_t stats = { 0 };
4371 	size_t size, sum = 0, total;
4372 	size_t noise;
4373 
4374 	/* requires nvlist with non-unique names for stat collection */
4375 	VERIFY0(nvlist_alloc(&stats.zns_string, 0, 0));
4376 	VERIFY0(nvlist_alloc(&stats.zns_uint64, 0, 0));
4377 	VERIFY0(nvlist_alloc(&stats.zns_boolean, 0, 0));
4378 	VERIFY0(nvlist_size(stats.zns_boolean, &noise, NV_ENCODE_XDR));
4379 
4380 	(void) printf("\n\nZFS Label NVList Config Stats:\n");
4381 
4382 	VERIFY0(nvlist_size(nvl, &total, NV_ENCODE_XDR));
4383 	(void) printf("  %d bytes used, %d bytes free (using %4.1f%%)\n\n",
4384 	    (int)total, (int)(cap - total), 100.0 * total / cap);
4385 
4386 	collect_nvlist_stats(nvl, &stats);
4387 
4388 	VERIFY0(nvlist_size(stats.zns_uint64, &size, NV_ENCODE_XDR));
4389 	size -= noise;
4390 	sum += size;
4391 	(void) printf("%12s %4d %6d bytes (%5.2f%%)\n", "integers:",
4392 	    (int)fnvlist_num_pairs(stats.zns_uint64),
4393 	    (int)size, 100.0 * size / total);
4394 
4395 	VERIFY0(nvlist_size(stats.zns_string, &size, NV_ENCODE_XDR));
4396 	size -= noise;
4397 	sum += size;
4398 	(void) printf("%12s %4d %6d bytes (%5.2f%%)\n", "strings:",
4399 	    (int)fnvlist_num_pairs(stats.zns_string),
4400 	    (int)size, 100.0 * size / total);
4401 
4402 	VERIFY0(nvlist_size(stats.zns_boolean, &size, NV_ENCODE_XDR));
4403 	size -= noise;
4404 	sum += size;
4405 	(void) printf("%12s %4d %6d bytes (%5.2f%%)\n", "booleans:",
4406 	    (int)fnvlist_num_pairs(stats.zns_boolean),
4407 	    (int)size, 100.0 * size / total);
4408 
4409 	size = total - sum;	/* treat remainder as nvlist overhead */
4410 	(void) printf("%12s %4d %6d bytes (%5.2f%%)\n\n", "nvlists:",
4411 	    stats.zns_list_count, (int)size, 100.0 * size / total);
4412 
4413 	if (stats.zns_leaf_count > 0) {
4414 		size_t average = stats.zns_leaf_total / stats.zns_leaf_count;
4415 
4416 		(void) printf("%12s %4d %6d bytes average\n", "leaf vdevs:",
4417 		    stats.zns_leaf_count, (int)average);
4418 		(void) printf("%24d bytes largest\n",
4419 		    (int)stats.zns_leaf_largest);
4420 
4421 		if (dump_opt['l'] >= 3 && average > 0)
4422 			(void) printf("  space for %d additional leaf vdevs\n",
4423 			    (int)((cap - total) / average));
4424 	}
4425 	(void) printf("\n");
4426 
4427 	nvlist_free(stats.zns_string);
4428 	nvlist_free(stats.zns_uint64);
4429 	nvlist_free(stats.zns_boolean);
4430 }
4431 
4432 typedef struct cksum_record {
4433 	zio_cksum_t cksum;
4434 	boolean_t labels[VDEV_LABELS];
4435 	avl_node_t link;
4436 } cksum_record_t;
4437 
4438 static int
4439 cksum_record_compare(const void *x1, const void *x2)
4440 {
4441 	const cksum_record_t *l = (cksum_record_t *)x1;
4442 	const cksum_record_t *r = (cksum_record_t *)x2;
4443 	int arraysize = ARRAY_SIZE(l->cksum.zc_word);
4444 	int difference = 0;
4445 
4446 	for (int i = 0; i < arraysize; i++) {
4447 		difference = TREE_CMP(l->cksum.zc_word[i], r->cksum.zc_word[i]);
4448 		if (difference)
4449 			break;
4450 	}
4451 
4452 	return (difference);
4453 }
4454 
4455 static cksum_record_t *
4456 cksum_record_alloc(zio_cksum_t *cksum, int l)
4457 {
4458 	cksum_record_t *rec;
4459 
4460 	rec = umem_zalloc(sizeof (*rec), UMEM_NOFAIL);
4461 	rec->cksum = *cksum;
4462 	rec->labels[l] = B_TRUE;
4463 
4464 	return (rec);
4465 }
4466 
4467 static cksum_record_t *
4468 cksum_record_lookup(avl_tree_t *tree, zio_cksum_t *cksum)
4469 {
4470 	cksum_record_t lookup = { .cksum = *cksum };
4471 	avl_index_t where;
4472 
4473 	return (avl_find(tree, &lookup, &where));
4474 }
4475 
4476 static cksum_record_t *
4477 cksum_record_insert(avl_tree_t *tree, zio_cksum_t *cksum, int l)
4478 {
4479 	cksum_record_t *rec;
4480 
4481 	rec = cksum_record_lookup(tree, cksum);
4482 	if (rec) {
4483 		rec->labels[l] = B_TRUE;
4484 	} else {
4485 		rec = cksum_record_alloc(cksum, l);
4486 		avl_add(tree, rec);
4487 	}
4488 
4489 	return (rec);
4490 }
4491 
4492 static int
4493 first_label(cksum_record_t *rec)
4494 {
4495 	for (int i = 0; i < VDEV_LABELS; i++)
4496 		if (rec->labels[i])
4497 			return (i);
4498 
4499 	return (-1);
4500 }
4501 
4502 static void
4503 print_label_numbers(const char *prefix, const cksum_record_t *rec)
4504 {
4505 	fputs(prefix, stdout);
4506 	for (int i = 0; i < VDEV_LABELS; i++)
4507 		if (rec->labels[i] == B_TRUE)
4508 			printf("%d ", i);
4509 	putchar('\n');
4510 }
4511 
4512 #define	MAX_UBERBLOCK_COUNT (VDEV_UBERBLOCK_RING >> UBERBLOCK_SHIFT)
4513 
4514 typedef struct zdb_label {
4515 	vdev_label_t label;
4516 	uint64_t label_offset;
4517 	nvlist_t *config_nv;
4518 	cksum_record_t *config;
4519 	cksum_record_t *uberblocks[MAX_UBERBLOCK_COUNT];
4520 	boolean_t header_printed;
4521 	boolean_t read_failed;
4522 	boolean_t cksum_valid;
4523 } zdb_label_t;
4524 
4525 static void
4526 print_label_header(zdb_label_t *label, int l)
4527 {
4528 
4529 	if (dump_opt['q'])
4530 		return;
4531 
4532 	if (label->header_printed == B_TRUE)
4533 		return;
4534 
4535 	(void) printf("------------------------------------\n");
4536 	(void) printf("LABEL %d %s\n", l,
4537 	    label->cksum_valid ? "" : "(Bad label cksum)");
4538 	(void) printf("------------------------------------\n");
4539 
4540 	label->header_printed = B_TRUE;
4541 }
4542 
4543 static void
4544 print_l2arc_header(void)
4545 {
4546 	(void) printf("------------------------------------\n");
4547 	(void) printf("L2ARC device header\n");
4548 	(void) printf("------------------------------------\n");
4549 }
4550 
4551 static void
4552 print_l2arc_log_blocks(void)
4553 {
4554 	(void) printf("------------------------------------\n");
4555 	(void) printf("L2ARC device log blocks\n");
4556 	(void) printf("------------------------------------\n");
4557 }
4558 
4559 static void
4560 dump_l2arc_log_entries(uint64_t log_entries,
4561     l2arc_log_ent_phys_t *le, uint64_t i)
4562 {
4563 	for (int j = 0; j < log_entries; j++) {
4564 		dva_t dva = le[j].le_dva;
4565 		(void) printf("lb[%4llu]\tle[%4d]\tDVA asize: %llu, "
4566 		    "vdev: %llu, offset: %llu\n",
4567 		    (u_longlong_t)i, j + 1,
4568 		    (u_longlong_t)DVA_GET_ASIZE(&dva),
4569 		    (u_longlong_t)DVA_GET_VDEV(&dva),
4570 		    (u_longlong_t)DVA_GET_OFFSET(&dva));
4571 		(void) printf("|\t\t\t\tbirth: %llu\n",
4572 		    (u_longlong_t)le[j].le_birth);
4573 		(void) printf("|\t\t\t\tlsize: %llu\n",
4574 		    (u_longlong_t)L2BLK_GET_LSIZE((&le[j])->le_prop));
4575 		(void) printf("|\t\t\t\tpsize: %llu\n",
4576 		    (u_longlong_t)L2BLK_GET_PSIZE((&le[j])->le_prop));
4577 		(void) printf("|\t\t\t\tcompr: %llu\n",
4578 		    (u_longlong_t)L2BLK_GET_COMPRESS((&le[j])->le_prop));
4579 		(void) printf("|\t\t\t\tcomplevel: %llu\n",
4580 		    (u_longlong_t)(&le[j])->le_complevel);
4581 		(void) printf("|\t\t\t\ttype: %llu\n",
4582 		    (u_longlong_t)L2BLK_GET_TYPE((&le[j])->le_prop));
4583 		(void) printf("|\t\t\t\tprotected: %llu\n",
4584 		    (u_longlong_t)L2BLK_GET_PROTECTED((&le[j])->le_prop));
4585 		(void) printf("|\t\t\t\tprefetch: %llu\n",
4586 		    (u_longlong_t)L2BLK_GET_PREFETCH((&le[j])->le_prop));
4587 		(void) printf("|\t\t\t\taddress: %llu\n",
4588 		    (u_longlong_t)le[j].le_daddr);
4589 		(void) printf("|\t\t\t\tARC state: %llu\n",
4590 		    (u_longlong_t)L2BLK_GET_STATE((&le[j])->le_prop));
4591 		(void) printf("|\n");
4592 	}
4593 	(void) printf("\n");
4594 }
4595 
4596 static void
4597 dump_l2arc_log_blkptr(const l2arc_log_blkptr_t *lbps)
4598 {
4599 	(void) printf("|\t\tdaddr: %llu\n", (u_longlong_t)lbps->lbp_daddr);
4600 	(void) printf("|\t\tpayload_asize: %llu\n",
4601 	    (u_longlong_t)lbps->lbp_payload_asize);
4602 	(void) printf("|\t\tpayload_start: %llu\n",
4603 	    (u_longlong_t)lbps->lbp_payload_start);
4604 	(void) printf("|\t\tlsize: %llu\n",
4605 	    (u_longlong_t)L2BLK_GET_LSIZE(lbps->lbp_prop));
4606 	(void) printf("|\t\tasize: %llu\n",
4607 	    (u_longlong_t)L2BLK_GET_PSIZE(lbps->lbp_prop));
4608 	(void) printf("|\t\tcompralgo: %llu\n",
4609 	    (u_longlong_t)L2BLK_GET_COMPRESS(lbps->lbp_prop));
4610 	(void) printf("|\t\tcksumalgo: %llu\n",
4611 	    (u_longlong_t)L2BLK_GET_CHECKSUM(lbps->lbp_prop));
4612 	(void) printf("|\n\n");
4613 }
4614 
4615 static void
4616 dump_l2arc_log_blocks(int fd, const l2arc_dev_hdr_phys_t *l2dhdr,
4617     l2arc_dev_hdr_phys_t *rebuild)
4618 {
4619 	l2arc_log_blk_phys_t this_lb;
4620 	uint64_t asize;
4621 	l2arc_log_blkptr_t lbps[2];
4622 	abd_t *abd;
4623 	zio_cksum_t cksum;
4624 	int failed = 0;
4625 	l2arc_dev_t dev;
4626 
4627 	if (!dump_opt['q'])
4628 		print_l2arc_log_blocks();
4629 	memcpy(lbps, l2dhdr->dh_start_lbps, sizeof (lbps));
4630 
4631 	dev.l2ad_evict = l2dhdr->dh_evict;
4632 	dev.l2ad_start = l2dhdr->dh_start;
4633 	dev.l2ad_end = l2dhdr->dh_end;
4634 
4635 	if (l2dhdr->dh_start_lbps[0].lbp_daddr == 0) {
4636 		/* no log blocks to read */
4637 		if (!dump_opt['q']) {
4638 			(void) printf("No log blocks to read\n");
4639 			(void) printf("\n");
4640 		}
4641 		return;
4642 	} else {
4643 		dev.l2ad_hand = lbps[0].lbp_daddr +
4644 		    L2BLK_GET_PSIZE((&lbps[0])->lbp_prop);
4645 	}
4646 
4647 	dev.l2ad_first = !!(l2dhdr->dh_flags & L2ARC_DEV_HDR_EVICT_FIRST);
4648 
4649 	for (;;) {
4650 		if (!l2arc_log_blkptr_valid(&dev, &lbps[0]))
4651 			break;
4652 
4653 		/* L2BLK_GET_PSIZE returns aligned size for log blocks */
4654 		asize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop);
4655 		if (pread64(fd, &this_lb, asize, lbps[0].lbp_daddr) != asize) {
4656 			if (!dump_opt['q']) {
4657 				(void) printf("Error while reading next log "
4658 				    "block\n\n");
4659 			}
4660 			break;
4661 		}
4662 
4663 		fletcher_4_native_varsize(&this_lb, asize, &cksum);
4664 		if (!ZIO_CHECKSUM_EQUAL(cksum, lbps[0].lbp_cksum)) {
4665 			failed++;
4666 			if (!dump_opt['q']) {
4667 				(void) printf("Invalid cksum\n");
4668 				dump_l2arc_log_blkptr(&lbps[0]);
4669 			}
4670 			break;
4671 		}
4672 
4673 		switch (L2BLK_GET_COMPRESS((&lbps[0])->lbp_prop)) {
4674 		case ZIO_COMPRESS_OFF:
4675 			break;
4676 		default:
4677 			abd = abd_alloc_for_io(asize, B_TRUE);
4678 			abd_copy_from_buf_off(abd, &this_lb, 0, asize);
4679 			if (zio_decompress_data(L2BLK_GET_COMPRESS(
4680 			    (&lbps[0])->lbp_prop), abd, &this_lb,
4681 			    asize, sizeof (this_lb), NULL) != 0) {
4682 				(void) printf("L2ARC block decompression "
4683 				    "failed\n");
4684 				abd_free(abd);
4685 				goto out;
4686 			}
4687 			abd_free(abd);
4688 			break;
4689 		}
4690 
4691 		if (this_lb.lb_magic == BSWAP_64(L2ARC_LOG_BLK_MAGIC))
4692 			byteswap_uint64_array(&this_lb, sizeof (this_lb));
4693 		if (this_lb.lb_magic != L2ARC_LOG_BLK_MAGIC) {
4694 			if (!dump_opt['q'])
4695 				(void) printf("Invalid log block magic\n\n");
4696 			break;
4697 		}
4698 
4699 		rebuild->dh_lb_count++;
4700 		rebuild->dh_lb_asize += asize;
4701 		if (dump_opt['l'] > 1 && !dump_opt['q']) {
4702 			(void) printf("lb[%4llu]\tmagic: %llu\n",
4703 			    (u_longlong_t)rebuild->dh_lb_count,
4704 			    (u_longlong_t)this_lb.lb_magic);
4705 			dump_l2arc_log_blkptr(&lbps[0]);
4706 		}
4707 
4708 		if (dump_opt['l'] > 2 && !dump_opt['q'])
4709 			dump_l2arc_log_entries(l2dhdr->dh_log_entries,
4710 			    this_lb.lb_entries,
4711 			    rebuild->dh_lb_count);
4712 
4713 		if (l2arc_range_check_overlap(lbps[1].lbp_payload_start,
4714 		    lbps[0].lbp_payload_start, dev.l2ad_evict) &&
4715 		    !dev.l2ad_first)
4716 			break;
4717 
4718 		lbps[0] = lbps[1];
4719 		lbps[1] = this_lb.lb_prev_lbp;
4720 	}
4721 out:
4722 	if (!dump_opt['q']) {
4723 		(void) printf("log_blk_count:\t %llu with valid cksum\n",
4724 		    (u_longlong_t)rebuild->dh_lb_count);
4725 		(void) printf("\t\t %d with invalid cksum\n", failed);
4726 		(void) printf("log_blk_asize:\t %llu\n\n",
4727 		    (u_longlong_t)rebuild->dh_lb_asize);
4728 	}
4729 }
4730 
4731 static int
4732 dump_l2arc_header(int fd)
4733 {
4734 	l2arc_dev_hdr_phys_t l2dhdr = {0}, rebuild = {0};
4735 	int error = B_FALSE;
4736 
4737 	if (pread64(fd, &l2dhdr, sizeof (l2dhdr),
4738 	    VDEV_LABEL_START_SIZE) != sizeof (l2dhdr)) {
4739 		error = B_TRUE;
4740 	} else {
4741 		if (l2dhdr.dh_magic == BSWAP_64(L2ARC_DEV_HDR_MAGIC))
4742 			byteswap_uint64_array(&l2dhdr, sizeof (l2dhdr));
4743 
4744 		if (l2dhdr.dh_magic != L2ARC_DEV_HDR_MAGIC)
4745 			error = B_TRUE;
4746 	}
4747 
4748 	if (error) {
4749 		(void) printf("L2ARC device header not found\n\n");
4750 		/* Do not return an error here for backward compatibility */
4751 		return (0);
4752 	} else if (!dump_opt['q']) {
4753 		print_l2arc_header();
4754 
4755 		(void) printf("    magic: %llu\n",
4756 		    (u_longlong_t)l2dhdr.dh_magic);
4757 		(void) printf("    version: %llu\n",
4758 		    (u_longlong_t)l2dhdr.dh_version);
4759 		(void) printf("    pool_guid: %llu\n",
4760 		    (u_longlong_t)l2dhdr.dh_spa_guid);
4761 		(void) printf("    flags: %llu\n",
4762 		    (u_longlong_t)l2dhdr.dh_flags);
4763 		(void) printf("    start_lbps[0]: %llu\n",
4764 		    (u_longlong_t)
4765 		    l2dhdr.dh_start_lbps[0].lbp_daddr);
4766 		(void) printf("    start_lbps[1]: %llu\n",
4767 		    (u_longlong_t)
4768 		    l2dhdr.dh_start_lbps[1].lbp_daddr);
4769 		(void) printf("    log_blk_ent: %llu\n",
4770 		    (u_longlong_t)l2dhdr.dh_log_entries);
4771 		(void) printf("    start: %llu\n",
4772 		    (u_longlong_t)l2dhdr.dh_start);
4773 		(void) printf("    end: %llu\n",
4774 		    (u_longlong_t)l2dhdr.dh_end);
4775 		(void) printf("    evict: %llu\n",
4776 		    (u_longlong_t)l2dhdr.dh_evict);
4777 		(void) printf("    lb_asize_refcount: %llu\n",
4778 		    (u_longlong_t)l2dhdr.dh_lb_asize);
4779 		(void) printf("    lb_count_refcount: %llu\n",
4780 		    (u_longlong_t)l2dhdr.dh_lb_count);
4781 		(void) printf("    trim_action_time: %llu\n",
4782 		    (u_longlong_t)l2dhdr.dh_trim_action_time);
4783 		(void) printf("    trim_state: %llu\n\n",
4784 		    (u_longlong_t)l2dhdr.dh_trim_state);
4785 	}
4786 
4787 	dump_l2arc_log_blocks(fd, &l2dhdr, &rebuild);
4788 	/*
4789 	 * The total aligned size of log blocks and the number of log blocks
4790 	 * reported in the header of the device may be less than what zdb
4791 	 * reports by dump_l2arc_log_blocks() which emulates l2arc_rebuild().
4792 	 * This happens because dump_l2arc_log_blocks() lacks the memory
4793 	 * pressure valve that l2arc_rebuild() has. Thus, if we are on a system
4794 	 * with low memory, l2arc_rebuild will exit prematurely and dh_lb_asize
4795 	 * and dh_lb_count will be lower to begin with than what exists on the
4796 	 * device. This is normal and zdb should not exit with an error. The
4797 	 * opposite case should never happen though, the values reported in the
4798 	 * header should never be higher than what dump_l2arc_log_blocks() and
4799 	 * l2arc_rebuild() report. If this happens there is a leak in the
4800 	 * accounting of log blocks.
4801 	 */
4802 	if (l2dhdr.dh_lb_asize > rebuild.dh_lb_asize ||
4803 	    l2dhdr.dh_lb_count > rebuild.dh_lb_count)
4804 		return (1);
4805 
4806 	return (0);
4807 }
4808 
4809 static void
4810 dump_config_from_label(zdb_label_t *label, size_t buflen, int l)
4811 {
4812 	if (dump_opt['q'])
4813 		return;
4814 
4815 	if ((dump_opt['l'] < 3) && (first_label(label->config) != l))
4816 		return;
4817 
4818 	print_label_header(label, l);
4819 	dump_nvlist(label->config_nv, 4);
4820 	print_label_numbers("    labels = ", label->config);
4821 
4822 	if (dump_opt['l'] >= 2)
4823 		dump_nvlist_stats(label->config_nv, buflen);
4824 }
4825 
4826 #define	ZDB_MAX_UB_HEADER_SIZE 32
4827 
4828 static void
4829 dump_label_uberblocks(zdb_label_t *label, uint64_t ashift, int label_num)
4830 {
4831 
4832 	vdev_t vd;
4833 	char header[ZDB_MAX_UB_HEADER_SIZE];
4834 
4835 	vd.vdev_ashift = ashift;
4836 	vd.vdev_top = &vd;
4837 
4838 	for (int i = 0; i < VDEV_UBERBLOCK_COUNT(&vd); i++) {
4839 		uint64_t uoff = VDEV_UBERBLOCK_OFFSET(&vd, i);
4840 		uberblock_t *ub = (void *)((char *)&label->label + uoff);
4841 		cksum_record_t *rec = label->uberblocks[i];
4842 
4843 		if (rec == NULL) {
4844 			if (dump_opt['u'] >= 2) {
4845 				print_label_header(label, label_num);
4846 				(void) printf("    Uberblock[%d] invalid\n", i);
4847 			}
4848 			continue;
4849 		}
4850 
4851 		if ((dump_opt['u'] < 3) && (first_label(rec) != label_num))
4852 			continue;
4853 
4854 		if ((dump_opt['u'] < 4) &&
4855 		    (ub->ub_mmp_magic == MMP_MAGIC) && ub->ub_mmp_delay &&
4856 		    (i >= VDEV_UBERBLOCK_COUNT(&vd) - MMP_BLOCKS_PER_LABEL))
4857 			continue;
4858 
4859 		print_label_header(label, label_num);
4860 		(void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
4861 		    "    Uberblock[%d]\n", i);
4862 		dump_uberblock(ub, header, "");
4863 		print_label_numbers("        labels = ", rec);
4864 	}
4865 }
4866 
4867 static char curpath[PATH_MAX];
4868 
4869 /*
4870  * Iterate through the path components, recursively passing
4871  * current one's obj and remaining path until we find the obj
4872  * for the last one.
4873  */
4874 static int
4875 dump_path_impl(objset_t *os, uint64_t obj, char *name, uint64_t *retobj)
4876 {
4877 	int err;
4878 	boolean_t header = B_TRUE;
4879 	uint64_t child_obj;
4880 	char *s;
4881 	dmu_buf_t *db;
4882 	dmu_object_info_t doi;
4883 
4884 	if ((s = strchr(name, '/')) != NULL)
4885 		*s = '\0';
4886 	err = zap_lookup(os, obj, name, 8, 1, &child_obj);
4887 
4888 	(void) strlcat(curpath, name, sizeof (curpath));
4889 
4890 	if (err != 0) {
4891 		(void) fprintf(stderr, "failed to lookup %s: %s\n",
4892 		    curpath, strerror(err));
4893 		return (err);
4894 	}
4895 
4896 	child_obj = ZFS_DIRENT_OBJ(child_obj);
4897 	err = sa_buf_hold(os, child_obj, FTAG, &db);
4898 	if (err != 0) {
4899 		(void) fprintf(stderr,
4900 		    "failed to get SA dbuf for obj %llu: %s\n",
4901 		    (u_longlong_t)child_obj, strerror(err));
4902 		return (EINVAL);
4903 	}
4904 	dmu_object_info_from_db(db, &doi);
4905 	sa_buf_rele(db, FTAG);
4906 
4907 	if (doi.doi_bonus_type != DMU_OT_SA &&
4908 	    doi.doi_bonus_type != DMU_OT_ZNODE) {
4909 		(void) fprintf(stderr, "invalid bonus type %d for obj %llu\n",
4910 		    doi.doi_bonus_type, (u_longlong_t)child_obj);
4911 		return (EINVAL);
4912 	}
4913 
4914 	if (dump_opt['v'] > 6) {
4915 		(void) printf("obj=%llu %s type=%d bonustype=%d\n",
4916 		    (u_longlong_t)child_obj, curpath, doi.doi_type,
4917 		    doi.doi_bonus_type);
4918 	}
4919 
4920 	(void) strlcat(curpath, "/", sizeof (curpath));
4921 
4922 	switch (doi.doi_type) {
4923 	case DMU_OT_DIRECTORY_CONTENTS:
4924 		if (s != NULL && *(s + 1) != '\0')
4925 			return (dump_path_impl(os, child_obj, s + 1, retobj));
4926 		zfs_fallthrough;
4927 	case DMU_OT_PLAIN_FILE_CONTENTS:
4928 		if (retobj != NULL) {
4929 			*retobj = child_obj;
4930 		} else {
4931 			dump_object(os, child_obj, dump_opt['v'], &header,
4932 			    NULL, 0);
4933 		}
4934 		return (0);
4935 	default:
4936 		(void) fprintf(stderr, "object %llu has non-file/directory "
4937 		    "type %d\n", (u_longlong_t)obj, doi.doi_type);
4938 		break;
4939 	}
4940 
4941 	return (EINVAL);
4942 }
4943 
4944 /*
4945  * Dump the blocks for the object specified by path inside the dataset.
4946  */
4947 static int
4948 dump_path(char *ds, char *path, uint64_t *retobj)
4949 {
4950 	int err;
4951 	objset_t *os;
4952 	uint64_t root_obj;
4953 
4954 	err = open_objset(ds, FTAG, &os);
4955 	if (err != 0)
4956 		return (err);
4957 
4958 	err = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &root_obj);
4959 	if (err != 0) {
4960 		(void) fprintf(stderr, "can't lookup root znode: %s\n",
4961 		    strerror(err));
4962 		close_objset(os, FTAG);
4963 		return (EINVAL);
4964 	}
4965 
4966 	(void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds);
4967 
4968 	err = dump_path_impl(os, root_obj, path, retobj);
4969 
4970 	close_objset(os, FTAG);
4971 	return (err);
4972 }
4973 
4974 static int
4975 dump_backup_bytes(objset_t *os, void *buf, int len, void *arg)
4976 {
4977 	const char *p = (const char *)buf;
4978 	ssize_t nwritten;
4979 
4980 	(void) os;
4981 	(void) arg;
4982 
4983 	/* Write the data out, handling short writes and signals. */
4984 	while ((nwritten = write(STDOUT_FILENO, p, len)) < len) {
4985 		if (nwritten < 0) {
4986 			if (errno == EINTR)
4987 				continue;
4988 			return (errno);
4989 		}
4990 		p += nwritten;
4991 		len -= nwritten;
4992 	}
4993 
4994 	return (0);
4995 }
4996 
4997 static void
4998 dump_backup(const char *pool, uint64_t objset_id, const char *flagstr)
4999 {
5000 	boolean_t embed = B_FALSE;
5001 	boolean_t large_block = B_FALSE;
5002 	boolean_t compress = B_FALSE;
5003 	boolean_t raw = B_FALSE;
5004 
5005 	const char *c;
5006 	for (c = flagstr; c != NULL && *c != '\0'; c++) {
5007 		switch (*c) {
5008 			case 'e':
5009 				embed = B_TRUE;
5010 				break;
5011 			case 'L':
5012 				large_block = B_TRUE;
5013 				break;
5014 			case 'c':
5015 				compress = B_TRUE;
5016 				break;
5017 			case 'w':
5018 				raw = B_TRUE;
5019 				break;
5020 			default:
5021 				fprintf(stderr, "dump_backup: invalid flag "
5022 				    "'%c'\n", *c);
5023 				return;
5024 		}
5025 	}
5026 
5027 	if (isatty(STDOUT_FILENO)) {
5028 		fprintf(stderr, "dump_backup: stream cannot be written "
5029 		    "to a terminal\n");
5030 		return;
5031 	}
5032 
5033 	offset_t off = 0;
5034 	dmu_send_outparams_t out = {
5035 	    .dso_outfunc = dump_backup_bytes,
5036 	    .dso_dryrun  = B_FALSE,
5037 	};
5038 
5039 	int err = dmu_send_obj(pool, objset_id, /* fromsnap */0, embed,
5040 	    large_block, compress, raw, /* saved */ B_FALSE, STDOUT_FILENO,
5041 	    &off, &out);
5042 	if (err != 0) {
5043 		fprintf(stderr, "dump_backup: dmu_send_obj: %s\n",
5044 		    strerror(err));
5045 		return;
5046 	}
5047 }
5048 
5049 static int
5050 zdb_copy_object(objset_t *os, uint64_t srcobj, char *destfile)
5051 {
5052 	int err = 0;
5053 	uint64_t size, readsize, oursize, offset;
5054 	ssize_t writesize;
5055 	sa_handle_t *hdl;
5056 
5057 	(void) printf("Copying object %" PRIu64 " to file %s\n", srcobj,
5058 	    destfile);
5059 
5060 	VERIFY3P(os, ==, sa_os);
5061 	if ((err = sa_handle_get(os, srcobj, NULL, SA_HDL_PRIVATE, &hdl))) {
5062 		(void) printf("Failed to get handle for SA znode\n");
5063 		return (err);
5064 	}
5065 	if ((err = sa_lookup(hdl, sa_attr_table[ZPL_SIZE], &size, 8))) {
5066 		(void) sa_handle_destroy(hdl);
5067 		return (err);
5068 	}
5069 	(void) sa_handle_destroy(hdl);
5070 
5071 	(void) printf("Object %" PRIu64 " is %" PRIu64 " bytes\n", srcobj,
5072 	    size);
5073 	if (size == 0) {
5074 		return (EINVAL);
5075 	}
5076 
5077 	int fd = open(destfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
5078 	if (fd == -1)
5079 		return (errno);
5080 	/*
5081 	 * We cap the size at 1 mebibyte here to prevent
5082 	 * allocation failures and nigh-infinite printing if the
5083 	 * object is extremely large.
5084 	 */
5085 	oursize = MIN(size, 1 << 20);
5086 	offset = 0;
5087 	char *buf = kmem_alloc(oursize, KM_NOSLEEP);
5088 	if (buf == NULL) {
5089 		(void) close(fd);
5090 		return (ENOMEM);
5091 	}
5092 
5093 	while (offset < size) {
5094 		readsize = MIN(size - offset, 1 << 20);
5095 		err = dmu_read(os, srcobj, offset, readsize, buf, 0);
5096 		if (err != 0) {
5097 			(void) printf("got error %u from dmu_read\n", err);
5098 			kmem_free(buf, oursize);
5099 			(void) close(fd);
5100 			return (err);
5101 		}
5102 		if (dump_opt['v'] > 3) {
5103 			(void) printf("Read offset=%" PRIu64 " size=%" PRIu64
5104 			    " error=%d\n", offset, readsize, err);
5105 		}
5106 
5107 		writesize = write(fd, buf, readsize);
5108 		if (writesize < 0) {
5109 			err = errno;
5110 			break;
5111 		} else if (writesize != readsize) {
5112 			/* Incomplete write */
5113 			(void) fprintf(stderr, "Short write, only wrote %llu of"
5114 			    " %" PRIu64 " bytes, exiting...\n",
5115 			    (u_longlong_t)writesize, readsize);
5116 			break;
5117 		}
5118 
5119 		offset += readsize;
5120 	}
5121 
5122 	(void) close(fd);
5123 
5124 	if (buf != NULL)
5125 		kmem_free(buf, oursize);
5126 
5127 	return (err);
5128 }
5129 
5130 static boolean_t
5131 label_cksum_valid(vdev_label_t *label, uint64_t offset)
5132 {
5133 	zio_checksum_info_t *ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL];
5134 	zio_cksum_t expected_cksum;
5135 	zio_cksum_t actual_cksum;
5136 	zio_cksum_t verifier;
5137 	zio_eck_t *eck;
5138 	int byteswap;
5139 
5140 	void *data = (char *)label + offsetof(vdev_label_t, vl_vdev_phys);
5141 	eck = (zio_eck_t *)((char *)(data) + VDEV_PHYS_SIZE) - 1;
5142 
5143 	offset += offsetof(vdev_label_t, vl_vdev_phys);
5144 	ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0);
5145 
5146 	byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC));
5147 	if (byteswap)
5148 		byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
5149 
5150 	expected_cksum = eck->zec_cksum;
5151 	eck->zec_cksum = verifier;
5152 
5153 	abd_t *abd = abd_get_from_buf(data, VDEV_PHYS_SIZE);
5154 	ci->ci_func[byteswap](abd, VDEV_PHYS_SIZE, NULL, &actual_cksum);
5155 	abd_free(abd);
5156 
5157 	if (byteswap)
5158 		byteswap_uint64_array(&expected_cksum, sizeof (zio_cksum_t));
5159 
5160 	if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
5161 		return (B_TRUE);
5162 
5163 	return (B_FALSE);
5164 }
5165 
5166 static int
5167 dump_label(const char *dev)
5168 {
5169 	char path[MAXPATHLEN];
5170 	zdb_label_t labels[VDEV_LABELS] = {{{{0}}}};
5171 	uint64_t psize, ashift, l2cache;
5172 	struct stat64 statbuf;
5173 	boolean_t config_found = B_FALSE;
5174 	boolean_t error = B_FALSE;
5175 	boolean_t read_l2arc_header = B_FALSE;
5176 	avl_tree_t config_tree;
5177 	avl_tree_t uberblock_tree;
5178 	void *node, *cookie;
5179 	int fd;
5180 
5181 	/*
5182 	 * Check if we were given absolute path and use it as is.
5183 	 * Otherwise if the provided vdev name doesn't point to a file,
5184 	 * try prepending expected disk paths and partition numbers.
5185 	 */
5186 	(void) strlcpy(path, dev, sizeof (path));
5187 	if (dev[0] != '/' && stat64(path, &statbuf) != 0) {
5188 		int error;
5189 
5190 		error = zfs_resolve_shortname(dev, path, MAXPATHLEN);
5191 		if (error == 0 && zfs_dev_is_whole_disk(path)) {
5192 			if (zfs_append_partition(path, MAXPATHLEN) == -1)
5193 				error = ENOENT;
5194 		}
5195 
5196 		if (error || (stat64(path, &statbuf) != 0)) {
5197 			(void) printf("failed to find device %s, try "
5198 			    "specifying absolute path instead\n", dev);
5199 			return (1);
5200 		}
5201 	}
5202 
5203 	if ((fd = open64(path, O_RDONLY)) < 0) {
5204 		(void) printf("cannot open '%s': %s\n", path, strerror(errno));
5205 		exit(1);
5206 	}
5207 
5208 	if (fstat64_blk(fd, &statbuf) != 0) {
5209 		(void) printf("failed to stat '%s': %s\n", path,
5210 		    strerror(errno));
5211 		(void) close(fd);
5212 		exit(1);
5213 	}
5214 
5215 	if (S_ISBLK(statbuf.st_mode) && zfs_dev_flush(fd) != 0)
5216 		(void) printf("failed to invalidate cache '%s' : %s\n", path,
5217 		    strerror(errno));
5218 
5219 	avl_create(&config_tree, cksum_record_compare,
5220 	    sizeof (cksum_record_t), offsetof(cksum_record_t, link));
5221 	avl_create(&uberblock_tree, cksum_record_compare,
5222 	    sizeof (cksum_record_t), offsetof(cksum_record_t, link));
5223 
5224 	psize = statbuf.st_size;
5225 	psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
5226 	ashift = SPA_MINBLOCKSHIFT;
5227 
5228 	/*
5229 	 * 1. Read the label from disk
5230 	 * 2. Verify label cksum
5231 	 * 3. Unpack the configuration and insert in config tree.
5232 	 * 4. Traverse all uberblocks and insert in uberblock tree.
5233 	 */
5234 	for (int l = 0; l < VDEV_LABELS; l++) {
5235 		zdb_label_t *label = &labels[l];
5236 		char *buf = label->label.vl_vdev_phys.vp_nvlist;
5237 		size_t buflen = sizeof (label->label.vl_vdev_phys.vp_nvlist);
5238 		nvlist_t *config;
5239 		cksum_record_t *rec;
5240 		zio_cksum_t cksum;
5241 		vdev_t vd;
5242 
5243 		label->label_offset = vdev_label_offset(psize, l, 0);
5244 
5245 		if (pread64(fd, &label->label, sizeof (label->label),
5246 		    label->label_offset) != sizeof (label->label)) {
5247 			if (!dump_opt['q'])
5248 				(void) printf("failed to read label %d\n", l);
5249 			label->read_failed = B_TRUE;
5250 			error = B_TRUE;
5251 			continue;
5252 		}
5253 
5254 		label->read_failed = B_FALSE;
5255 		label->cksum_valid = label_cksum_valid(&label->label,
5256 		    label->label_offset);
5257 
5258 		if (nvlist_unpack(buf, buflen, &config, 0) == 0) {
5259 			nvlist_t *vdev_tree = NULL;
5260 			size_t size;
5261 
5262 			if ((nvlist_lookup_nvlist(config,
5263 			    ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
5264 			    (nvlist_lookup_uint64(vdev_tree,
5265 			    ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
5266 				ashift = SPA_MINBLOCKSHIFT;
5267 
5268 			if (nvlist_size(config, &size, NV_ENCODE_XDR) != 0)
5269 				size = buflen;
5270 
5271 			/* If the device is a cache device read the header. */
5272 			if (!read_l2arc_header) {
5273 				if (nvlist_lookup_uint64(config,
5274 				    ZPOOL_CONFIG_POOL_STATE, &l2cache) == 0 &&
5275 				    l2cache == POOL_STATE_L2CACHE) {
5276 					read_l2arc_header = B_TRUE;
5277 				}
5278 			}
5279 
5280 			fletcher_4_native_varsize(buf, size, &cksum);
5281 			rec = cksum_record_insert(&config_tree, &cksum, l);
5282 
5283 			label->config = rec;
5284 			label->config_nv = config;
5285 			config_found = B_TRUE;
5286 		} else {
5287 			error = B_TRUE;
5288 		}
5289 
5290 		vd.vdev_ashift = ashift;
5291 		vd.vdev_top = &vd;
5292 
5293 		for (int i = 0; i < VDEV_UBERBLOCK_COUNT(&vd); i++) {
5294 			uint64_t uoff = VDEV_UBERBLOCK_OFFSET(&vd, i);
5295 			uberblock_t *ub = (void *)((char *)label + uoff);
5296 
5297 			if (uberblock_verify(ub))
5298 				continue;
5299 
5300 			fletcher_4_native_varsize(ub, sizeof (*ub), &cksum);
5301 			rec = cksum_record_insert(&uberblock_tree, &cksum, l);
5302 
5303 			label->uberblocks[i] = rec;
5304 		}
5305 	}
5306 
5307 	/*
5308 	 * Dump the label and uberblocks.
5309 	 */
5310 	for (int l = 0; l < VDEV_LABELS; l++) {
5311 		zdb_label_t *label = &labels[l];
5312 		size_t buflen = sizeof (label->label.vl_vdev_phys.vp_nvlist);
5313 
5314 		if (label->read_failed == B_TRUE)
5315 			continue;
5316 
5317 		if (label->config_nv) {
5318 			dump_config_from_label(label, buflen, l);
5319 		} else {
5320 			if (!dump_opt['q'])
5321 				(void) printf("failed to unpack label %d\n", l);
5322 		}
5323 
5324 		if (dump_opt['u'])
5325 			dump_label_uberblocks(label, ashift, l);
5326 
5327 		nvlist_free(label->config_nv);
5328 	}
5329 
5330 	/*
5331 	 * Dump the L2ARC header, if existent.
5332 	 */
5333 	if (read_l2arc_header)
5334 		error |= dump_l2arc_header(fd);
5335 
5336 	cookie = NULL;
5337 	while ((node = avl_destroy_nodes(&config_tree, &cookie)) != NULL)
5338 		umem_free(node, sizeof (cksum_record_t));
5339 
5340 	cookie = NULL;
5341 	while ((node = avl_destroy_nodes(&uberblock_tree, &cookie)) != NULL)
5342 		umem_free(node, sizeof (cksum_record_t));
5343 
5344 	avl_destroy(&config_tree);
5345 	avl_destroy(&uberblock_tree);
5346 
5347 	(void) close(fd);
5348 
5349 	return (config_found == B_FALSE ? 2 :
5350 	    (error == B_TRUE ? 1 : 0));
5351 }
5352 
5353 static uint64_t dataset_feature_count[SPA_FEATURES];
5354 static uint64_t global_feature_count[SPA_FEATURES];
5355 static uint64_t remap_deadlist_count = 0;
5356 
5357 static int
5358 dump_one_objset(const char *dsname, void *arg)
5359 {
5360 	(void) arg;
5361 	int error;
5362 	objset_t *os;
5363 	spa_feature_t f;
5364 
5365 	error = open_objset(dsname, FTAG, &os);
5366 	if (error != 0)
5367 		return (0);
5368 
5369 	for (f = 0; f < SPA_FEATURES; f++) {
5370 		if (!dsl_dataset_feature_is_active(dmu_objset_ds(os), f))
5371 			continue;
5372 		ASSERT(spa_feature_table[f].fi_flags &
5373 		    ZFEATURE_FLAG_PER_DATASET);
5374 		dataset_feature_count[f]++;
5375 	}
5376 
5377 	if (dsl_dataset_remap_deadlist_exists(dmu_objset_ds(os))) {
5378 		remap_deadlist_count++;
5379 	}
5380 
5381 	for (dsl_bookmark_node_t *dbn =
5382 	    avl_first(&dmu_objset_ds(os)->ds_bookmarks); dbn != NULL;
5383 	    dbn = AVL_NEXT(&dmu_objset_ds(os)->ds_bookmarks, dbn)) {
5384 		mos_obj_refd(dbn->dbn_phys.zbm_redaction_obj);
5385 		if (dbn->dbn_phys.zbm_redaction_obj != 0) {
5386 			global_feature_count[
5387 			    SPA_FEATURE_REDACTION_BOOKMARKS]++;
5388 			objset_t *mos = os->os_spa->spa_meta_objset;
5389 			dnode_t *rl;
5390 			VERIFY0(dnode_hold(mos,
5391 			    dbn->dbn_phys.zbm_redaction_obj, FTAG, &rl));
5392 			if (rl->dn_have_spill) {
5393 				global_feature_count[
5394 				    SPA_FEATURE_REDACTION_LIST_SPILL]++;
5395 			}
5396 		}
5397 		if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)
5398 			global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN]++;
5399 	}
5400 
5401 	if (dsl_deadlist_is_open(&dmu_objset_ds(os)->ds_dir->dd_livelist) &&
5402 	    !dmu_objset_is_snapshot(os)) {
5403 		global_feature_count[SPA_FEATURE_LIVELIST]++;
5404 	}
5405 
5406 	dump_objset(os);
5407 	close_objset(os, FTAG);
5408 	fuid_table_destroy();
5409 	return (0);
5410 }
5411 
5412 /*
5413  * Block statistics.
5414  */
5415 #define	PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2)
5416 typedef struct zdb_blkstats {
5417 	uint64_t zb_asize;
5418 	uint64_t zb_lsize;
5419 	uint64_t zb_psize;
5420 	uint64_t zb_count;
5421 	uint64_t zb_gangs;
5422 	uint64_t zb_ditto_samevdev;
5423 	uint64_t zb_ditto_same_ms;
5424 	uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE];
5425 } zdb_blkstats_t;
5426 
5427 /*
5428  * Extended object types to report deferred frees and dedup auto-ditto blocks.
5429  */
5430 #define	ZDB_OT_DEFERRED	(DMU_OT_NUMTYPES + 0)
5431 #define	ZDB_OT_DITTO	(DMU_OT_NUMTYPES + 1)
5432 #define	ZDB_OT_OTHER	(DMU_OT_NUMTYPES + 2)
5433 #define	ZDB_OT_TOTAL	(DMU_OT_NUMTYPES + 3)
5434 
5435 static const char *zdb_ot_extname[] = {
5436 	"deferred free",
5437 	"dedup ditto",
5438 	"other",
5439 	"Total",
5440 };
5441 
5442 #define	ZB_TOTAL	DN_MAX_LEVELS
5443 #define	SPA_MAX_FOR_16M	(SPA_MAXBLOCKSHIFT+1)
5444 
5445 typedef struct zdb_brt_entry {
5446 	dva_t		zbre_dva;
5447 	uint64_t	zbre_refcount;
5448 	avl_node_t	zbre_node;
5449 } zdb_brt_entry_t;
5450 
5451 typedef struct zdb_cb {
5452 	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
5453 	uint64_t	zcb_removing_size;
5454 	uint64_t	zcb_checkpoint_size;
5455 	uint64_t	zcb_dedup_asize;
5456 	uint64_t	zcb_dedup_blocks;
5457 	uint64_t	zcb_clone_asize;
5458 	uint64_t	zcb_clone_blocks;
5459 	uint64_t	zcb_psize_count[SPA_MAX_FOR_16M];
5460 	uint64_t	zcb_lsize_count[SPA_MAX_FOR_16M];
5461 	uint64_t	zcb_asize_count[SPA_MAX_FOR_16M];
5462 	uint64_t	zcb_psize_len[SPA_MAX_FOR_16M];
5463 	uint64_t	zcb_lsize_len[SPA_MAX_FOR_16M];
5464 	uint64_t	zcb_asize_len[SPA_MAX_FOR_16M];
5465 	uint64_t	zcb_psize_total;
5466 	uint64_t	zcb_lsize_total;
5467 	uint64_t	zcb_asize_total;
5468 	uint64_t	zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
5469 	uint64_t	zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES]
5470 	    [BPE_PAYLOAD_SIZE + 1];
5471 	uint64_t	zcb_start;
5472 	hrtime_t	zcb_lastprint;
5473 	uint64_t	zcb_totalasize;
5474 	uint64_t	zcb_errors[256];
5475 	int		zcb_readfails;
5476 	int		zcb_haderrors;
5477 	spa_t		*zcb_spa;
5478 	uint32_t	**zcb_vd_obsolete_counts;
5479 	avl_tree_t	zcb_brt;
5480 	boolean_t	zcb_brt_is_active;
5481 } zdb_cb_t;
5482 
5483 /* test if two DVA offsets from same vdev are within the same metaslab */
5484 static boolean_t
5485 same_metaslab(spa_t *spa, uint64_t vdev, uint64_t off1, uint64_t off2)
5486 {
5487 	vdev_t *vd = vdev_lookup_top(spa, vdev);
5488 	uint64_t ms_shift = vd->vdev_ms_shift;
5489 
5490 	return ((off1 >> ms_shift) == (off2 >> ms_shift));
5491 }
5492 
5493 /*
5494  * Used to simplify reporting of the histogram data.
5495  */
5496 typedef struct one_histo {
5497 	const char *name;
5498 	uint64_t *count;
5499 	uint64_t *len;
5500 	uint64_t cumulative;
5501 } one_histo_t;
5502 
5503 /*
5504  * The number of separate histograms processed for psize, lsize and asize.
5505  */
5506 #define	NUM_HISTO 3
5507 
5508 /*
5509  * This routine will create a fixed column size output of three different
5510  * histograms showing by blocksize of 512 - 2^ SPA_MAX_FOR_16M
5511  * the count, length and cumulative length of the psize, lsize and
5512  * asize blocks.
5513  *
5514  * All three types of blocks are listed on a single line
5515  *
5516  * By default the table is printed in nicenumber format (e.g. 123K) but
5517  * if the '-P' parameter is specified then the full raw number (parseable)
5518  * is printed out.
5519  */
5520 static void
5521 dump_size_histograms(zdb_cb_t *zcb)
5522 {
5523 	/*
5524 	 * A temporary buffer that allows us to convert a number into
5525 	 * a string using zdb_nicenumber to allow either raw or human
5526 	 * readable numbers to be output.
5527 	 */
5528 	char numbuf[32];
5529 
5530 	/*
5531 	 * Define titles which are used in the headers of the tables
5532 	 * printed by this routine.
5533 	 */
5534 	const char blocksize_title1[] = "block";
5535 	const char blocksize_title2[] = "size";
5536 	const char count_title[] = "Count";
5537 	const char length_title[] = "Size";
5538 	const char cumulative_title[] = "Cum.";
5539 
5540 	/*
5541 	 * Setup the histogram arrays (psize, lsize, and asize).
5542 	 */
5543 	one_histo_t parm_histo[NUM_HISTO];
5544 
5545 	parm_histo[0].name = "psize";
5546 	parm_histo[0].count = zcb->zcb_psize_count;
5547 	parm_histo[0].len = zcb->zcb_psize_len;
5548 	parm_histo[0].cumulative = 0;
5549 
5550 	parm_histo[1].name = "lsize";
5551 	parm_histo[1].count = zcb->zcb_lsize_count;
5552 	parm_histo[1].len = zcb->zcb_lsize_len;
5553 	parm_histo[1].cumulative = 0;
5554 
5555 	parm_histo[2].name = "asize";
5556 	parm_histo[2].count = zcb->zcb_asize_count;
5557 	parm_histo[2].len = zcb->zcb_asize_len;
5558 	parm_histo[2].cumulative = 0;
5559 
5560 
5561 	(void) printf("\nBlock Size Histogram\n");
5562 	/*
5563 	 * Print the first line titles
5564 	 */
5565 	if (dump_opt['P'])
5566 		(void) printf("\n%s\t", blocksize_title1);
5567 	else
5568 		(void) printf("\n%7s   ", blocksize_title1);
5569 
5570 	for (int j = 0; j < NUM_HISTO; j++) {
5571 		if (dump_opt['P']) {
5572 			if (j < NUM_HISTO - 1) {
5573 				(void) printf("%s\t\t\t", parm_histo[j].name);
5574 			} else {
5575 				/* Don't print trailing spaces */
5576 				(void) printf("  %s", parm_histo[j].name);
5577 			}
5578 		} else {
5579 			if (j < NUM_HISTO - 1) {
5580 				/* Left aligned strings in the output */
5581 				(void) printf("%-7s              ",
5582 				    parm_histo[j].name);
5583 			} else {
5584 				/* Don't print trailing spaces */
5585 				(void) printf("%s", parm_histo[j].name);
5586 			}
5587 		}
5588 	}
5589 	(void) printf("\n");
5590 
5591 	/*
5592 	 * Print the second line titles
5593 	 */
5594 	if (dump_opt['P']) {
5595 		(void) printf("%s\t", blocksize_title2);
5596 	} else {
5597 		(void) printf("%7s ", blocksize_title2);
5598 	}
5599 
5600 	for (int i = 0; i < NUM_HISTO; i++) {
5601 		if (dump_opt['P']) {
5602 			(void) printf("%s\t%s\t%s\t",
5603 			    count_title, length_title, cumulative_title);
5604 		} else {
5605 			(void) printf("%7s%7s%7s",
5606 			    count_title, length_title, cumulative_title);
5607 		}
5608 	}
5609 	(void) printf("\n");
5610 
5611 	/*
5612 	 * Print the rows
5613 	 */
5614 	for (int i = SPA_MINBLOCKSHIFT; i < SPA_MAX_FOR_16M; i++) {
5615 
5616 		/*
5617 		 * Print the first column showing the blocksize
5618 		 */
5619 		zdb_nicenum((1ULL << i), numbuf, sizeof (numbuf));
5620 
5621 		if (dump_opt['P']) {
5622 			printf("%s", numbuf);
5623 		} else {
5624 			printf("%7s:", numbuf);
5625 		}
5626 
5627 		/*
5628 		 * Print the remaining set of 3 columns per size:
5629 		 * for psize, lsize and asize
5630 		 */
5631 		for (int j = 0; j < NUM_HISTO; j++) {
5632 			parm_histo[j].cumulative += parm_histo[j].len[i];
5633 
5634 			zdb_nicenum(parm_histo[j].count[i],
5635 			    numbuf, sizeof (numbuf));
5636 			if (dump_opt['P'])
5637 				(void) printf("\t%s", numbuf);
5638 			else
5639 				(void) printf("%7s", numbuf);
5640 
5641 			zdb_nicenum(parm_histo[j].len[i],
5642 			    numbuf, sizeof (numbuf));
5643 			if (dump_opt['P'])
5644 				(void) printf("\t%s", numbuf);
5645 			else
5646 				(void) printf("%7s", numbuf);
5647 
5648 			zdb_nicenum(parm_histo[j].cumulative,
5649 			    numbuf, sizeof (numbuf));
5650 			if (dump_opt['P'])
5651 				(void) printf("\t%s", numbuf);
5652 			else
5653 				(void) printf("%7s", numbuf);
5654 		}
5655 		(void) printf("\n");
5656 	}
5657 }
5658 
5659 static void
5660 zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
5661     dmu_object_type_t type)
5662 {
5663 	uint64_t refcnt = 0;
5664 	int i;
5665 
5666 	ASSERT(type < ZDB_OT_TOTAL);
5667 
5668 	if (zilog && zil_bp_tree_add(zilog, bp) != 0)
5669 		return;
5670 
5671 	spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER);
5672 
5673 	for (i = 0; i < 4; i++) {
5674 		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
5675 		int t = (i & 1) ? type : ZDB_OT_TOTAL;
5676 		int equal;
5677 		zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
5678 
5679 		zb->zb_asize += BP_GET_ASIZE(bp);
5680 		zb->zb_lsize += BP_GET_LSIZE(bp);
5681 		zb->zb_psize += BP_GET_PSIZE(bp);
5682 		zb->zb_count++;
5683 
5684 		/*
5685 		 * The histogram is only big enough to record blocks up to
5686 		 * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last,
5687 		 * "other", bucket.
5688 		 */
5689 		unsigned idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT;
5690 		idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1);
5691 		zb->zb_psize_histogram[idx]++;
5692 
5693 		zb->zb_gangs += BP_COUNT_GANG(bp);
5694 
5695 		switch (BP_GET_NDVAS(bp)) {
5696 		case 2:
5697 			if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
5698 			    DVA_GET_VDEV(&bp->blk_dva[1])) {
5699 				zb->zb_ditto_samevdev++;
5700 
5701 				if (same_metaslab(zcb->zcb_spa,
5702 				    DVA_GET_VDEV(&bp->blk_dva[0]),
5703 				    DVA_GET_OFFSET(&bp->blk_dva[0]),
5704 				    DVA_GET_OFFSET(&bp->blk_dva[1])))
5705 					zb->zb_ditto_same_ms++;
5706 			}
5707 			break;
5708 		case 3:
5709 			equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
5710 			    DVA_GET_VDEV(&bp->blk_dva[1])) +
5711 			    (DVA_GET_VDEV(&bp->blk_dva[0]) ==
5712 			    DVA_GET_VDEV(&bp->blk_dva[2])) +
5713 			    (DVA_GET_VDEV(&bp->blk_dva[1]) ==
5714 			    DVA_GET_VDEV(&bp->blk_dva[2]));
5715 			if (equal != 0) {
5716 				zb->zb_ditto_samevdev++;
5717 
5718 				if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
5719 				    DVA_GET_VDEV(&bp->blk_dva[1]) &&
5720 				    same_metaslab(zcb->zcb_spa,
5721 				    DVA_GET_VDEV(&bp->blk_dva[0]),
5722 				    DVA_GET_OFFSET(&bp->blk_dva[0]),
5723 				    DVA_GET_OFFSET(&bp->blk_dva[1])))
5724 					zb->zb_ditto_same_ms++;
5725 				else if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
5726 				    DVA_GET_VDEV(&bp->blk_dva[2]) &&
5727 				    same_metaslab(zcb->zcb_spa,
5728 				    DVA_GET_VDEV(&bp->blk_dva[0]),
5729 				    DVA_GET_OFFSET(&bp->blk_dva[0]),
5730 				    DVA_GET_OFFSET(&bp->blk_dva[2])))
5731 					zb->zb_ditto_same_ms++;
5732 				else if (DVA_GET_VDEV(&bp->blk_dva[1]) ==
5733 				    DVA_GET_VDEV(&bp->blk_dva[2]) &&
5734 				    same_metaslab(zcb->zcb_spa,
5735 				    DVA_GET_VDEV(&bp->blk_dva[1]),
5736 				    DVA_GET_OFFSET(&bp->blk_dva[1]),
5737 				    DVA_GET_OFFSET(&bp->blk_dva[2])))
5738 					zb->zb_ditto_same_ms++;
5739 			}
5740 			break;
5741 		}
5742 	}
5743 
5744 	spa_config_exit(zcb->zcb_spa, SCL_CONFIG, FTAG);
5745 
5746 	if (BP_IS_EMBEDDED(bp)) {
5747 		zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
5748 		zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
5749 		    [BPE_GET_PSIZE(bp)]++;
5750 		return;
5751 	}
5752 	/*
5753 	 * The binning histogram bins by powers of two up to
5754 	 * SPA_MAXBLOCKSIZE rather than creating bins for
5755 	 * every possible blocksize found in the pool.
5756 	 */
5757 	int bin = highbit64(BP_GET_PSIZE(bp)) - 1;
5758 
5759 	zcb->zcb_psize_count[bin]++;
5760 	zcb->zcb_psize_len[bin] += BP_GET_PSIZE(bp);
5761 	zcb->zcb_psize_total += BP_GET_PSIZE(bp);
5762 
5763 	bin = highbit64(BP_GET_LSIZE(bp)) - 1;
5764 
5765 	zcb->zcb_lsize_count[bin]++;
5766 	zcb->zcb_lsize_len[bin] += BP_GET_LSIZE(bp);
5767 	zcb->zcb_lsize_total += BP_GET_LSIZE(bp);
5768 
5769 	bin = highbit64(BP_GET_ASIZE(bp)) - 1;
5770 
5771 	zcb->zcb_asize_count[bin]++;
5772 	zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp);
5773 	zcb->zcb_asize_total += BP_GET_ASIZE(bp);
5774 
5775 	if (zcb->zcb_brt_is_active && brt_maybe_exists(zcb->zcb_spa, bp)) {
5776 		/*
5777 		 * Cloned blocks are special. We need to count them, so we can
5778 		 * later uncount them when reporting leaked space, and we must
5779 		 * only claim them them once.
5780 		 *
5781 		 * To do this, we keep our own in-memory BRT. For each block
5782 		 * we haven't seen before, we look it up in the real BRT and
5783 		 * if its there, we note it and its refcount then proceed as
5784 		 * normal. If we see the block again, we count it as a clone
5785 		 * and then give it no further consideration.
5786 		 */
5787 		zdb_brt_entry_t zbre_search, *zbre;
5788 		avl_index_t where;
5789 
5790 		zbre_search.zbre_dva = bp->blk_dva[0];
5791 		zbre = avl_find(&zcb->zcb_brt, &zbre_search, &where);
5792 		if (zbre != NULL) {
5793 			zcb->zcb_clone_asize += BP_GET_ASIZE(bp);
5794 			zcb->zcb_clone_blocks++;
5795 
5796 			zbre->zbre_refcount--;
5797 			if (zbre->zbre_refcount == 0) {
5798 				avl_remove(&zcb->zcb_brt, zbre);
5799 				umem_free(zbre, sizeof (zdb_brt_entry_t));
5800 			}
5801 			return;
5802 		}
5803 
5804 		uint64_t crefcnt = brt_entry_get_refcount(zcb->zcb_spa, bp);
5805 		if (crefcnt > 0) {
5806 			zbre = umem_zalloc(sizeof (zdb_brt_entry_t),
5807 			    UMEM_NOFAIL);
5808 			zbre->zbre_dva = bp->blk_dva[0];
5809 			zbre->zbre_refcount = crefcnt;
5810 			avl_insert(&zcb->zcb_brt, zbre, where);
5811 		}
5812 	}
5813 
5814 	if (dump_opt['L'])
5815 		return;
5816 
5817 	if (BP_GET_DEDUP(bp)) {
5818 		ddt_t *ddt;
5819 		ddt_entry_t *dde;
5820 
5821 		ddt = ddt_select(zcb->zcb_spa, bp);
5822 		ddt_enter(ddt);
5823 		dde = ddt_lookup(ddt, bp, B_FALSE);
5824 
5825 		if (dde == NULL) {
5826 			refcnt = 0;
5827 		} else {
5828 			ddt_phys_t *ddp = ddt_phys_select(dde, bp);
5829 			ddt_phys_decref(ddp);
5830 			refcnt = ddp->ddp_refcnt;
5831 			if (ddt_phys_total_refcnt(dde) == 0)
5832 				ddt_remove(ddt, dde);
5833 		}
5834 		ddt_exit(ddt);
5835 	}
5836 
5837 	VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
5838 	    refcnt ? 0 : spa_min_claim_txg(zcb->zcb_spa),
5839 	    bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
5840 }
5841 
5842 static void
5843 zdb_blkptr_done(zio_t *zio)
5844 {
5845 	spa_t *spa = zio->io_spa;
5846 	blkptr_t *bp = zio->io_bp;
5847 	int ioerr = zio->io_error;
5848 	zdb_cb_t *zcb = zio->io_private;
5849 	zbookmark_phys_t *zb = &zio->io_bookmark;
5850 
5851 	mutex_enter(&spa->spa_scrub_lock);
5852 	spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp);
5853 	cv_broadcast(&spa->spa_scrub_io_cv);
5854 
5855 	if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
5856 		char blkbuf[BP_SPRINTF_LEN];
5857 
5858 		zcb->zcb_haderrors = 1;
5859 		zcb->zcb_errors[ioerr]++;
5860 
5861 		if (dump_opt['b'] >= 2)
5862 			snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
5863 		else
5864 			blkbuf[0] = '\0';
5865 
5866 		(void) printf("zdb_blkptr_cb: "
5867 		    "Got error %d reading "
5868 		    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
5869 		    ioerr,
5870 		    (u_longlong_t)zb->zb_objset,
5871 		    (u_longlong_t)zb->zb_object,
5872 		    (u_longlong_t)zb->zb_level,
5873 		    (u_longlong_t)zb->zb_blkid,
5874 		    blkbuf);
5875 	}
5876 	mutex_exit(&spa->spa_scrub_lock);
5877 
5878 	abd_free(zio->io_abd);
5879 }
5880 
5881 static int
5882 zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
5883     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
5884 {
5885 	zdb_cb_t *zcb = arg;
5886 	dmu_object_type_t type;
5887 	boolean_t is_metadata;
5888 
5889 	if (zb->zb_level == ZB_DNODE_LEVEL)
5890 		return (0);
5891 
5892 	if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
5893 		char blkbuf[BP_SPRINTF_LEN];
5894 		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
5895 		(void) printf("objset %llu object %llu "
5896 		    "level %lld offset 0x%llx %s\n",
5897 		    (u_longlong_t)zb->zb_objset,
5898 		    (u_longlong_t)zb->zb_object,
5899 		    (longlong_t)zb->zb_level,
5900 		    (u_longlong_t)blkid2offset(dnp, bp, zb),
5901 		    blkbuf);
5902 	}
5903 
5904 	if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp))
5905 		return (0);
5906 
5907 	type = BP_GET_TYPE(bp);
5908 
5909 	zdb_count_block(zcb, zilog, bp,
5910 	    (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type);
5911 
5912 	is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
5913 
5914 	if (!BP_IS_EMBEDDED(bp) &&
5915 	    (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
5916 		size_t size = BP_GET_PSIZE(bp);
5917 		abd_t *abd = abd_alloc(size, B_FALSE);
5918 		int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
5919 
5920 		/* If it's an intent log block, failure is expected. */
5921 		if (zb->zb_level == ZB_ZIL_LEVEL)
5922 			flags |= ZIO_FLAG_SPECULATIVE;
5923 
5924 		mutex_enter(&spa->spa_scrub_lock);
5925 		while (spa->spa_load_verify_bytes > max_inflight_bytes)
5926 			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
5927 		spa->spa_load_verify_bytes += size;
5928 		mutex_exit(&spa->spa_scrub_lock);
5929 
5930 		zio_nowait(zio_read(NULL, spa, bp, abd, size,
5931 		    zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
5932 	}
5933 
5934 	zcb->zcb_readfails = 0;
5935 
5936 	/* only call gethrtime() every 100 blocks */
5937 	static int iters;
5938 	if (++iters > 100)
5939 		iters = 0;
5940 	else
5941 		return (0);
5942 
5943 	if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) {
5944 		uint64_t now = gethrtime();
5945 		char buf[10];
5946 		uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize;
5947 		uint64_t kb_per_sec =
5948 		    1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000));
5949 		uint64_t sec_remaining =
5950 		    (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec;
5951 
5952 		/* make sure nicenum has enough space */
5953 		_Static_assert(sizeof (buf) >= NN_NUMBUF_SZ, "buf truncated");
5954 
5955 		zfs_nicebytes(bytes, buf, sizeof (buf));
5956 		(void) fprintf(stderr,
5957 		    "\r%5s completed (%4"PRIu64"MB/s) "
5958 		    "estimated time remaining: "
5959 		    "%"PRIu64"hr %02"PRIu64"min %02"PRIu64"sec        ",
5960 		    buf, kb_per_sec / 1024,
5961 		    sec_remaining / 60 / 60,
5962 		    sec_remaining / 60 % 60,
5963 		    sec_remaining % 60);
5964 
5965 		zcb->zcb_lastprint = now;
5966 	}
5967 
5968 	return (0);
5969 }
5970 
5971 static void
5972 zdb_leak(void *arg, uint64_t start, uint64_t size)
5973 {
5974 	vdev_t *vd = arg;
5975 
5976 	(void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
5977 	    (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
5978 }
5979 
5980 static metaslab_ops_t zdb_metaslab_ops = {
5981 	NULL	/* alloc */
5982 };
5983 
5984 static int
5985 load_unflushed_svr_segs_cb(spa_t *spa, space_map_entry_t *sme,
5986     uint64_t txg, void *arg)
5987 {
5988 	spa_vdev_removal_t *svr = arg;
5989 
5990 	uint64_t offset = sme->sme_offset;
5991 	uint64_t size = sme->sme_run;
5992 
5993 	/* skip vdevs we don't care about */
5994 	if (sme->sme_vdev != svr->svr_vdev_id)
5995 		return (0);
5996 
5997 	vdev_t *vd = vdev_lookup_top(spa, sme->sme_vdev);
5998 	metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
5999 	ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE);
6000 
6001 	if (txg < metaslab_unflushed_txg(ms))
6002 		return (0);
6003 
6004 	if (sme->sme_type == SM_ALLOC)
6005 		range_tree_add(svr->svr_allocd_segs, offset, size);
6006 	else
6007 		range_tree_remove(svr->svr_allocd_segs, offset, size);
6008 
6009 	return (0);
6010 }
6011 
6012 static void
6013 claim_segment_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset,
6014     uint64_t size, void *arg)
6015 {
6016 	(void) inner_offset, (void) arg;
6017 
6018 	/*
6019 	 * This callback was called through a remap from
6020 	 * a device being removed. Therefore, the vdev that
6021 	 * this callback is applied to is a concrete
6022 	 * vdev.
6023 	 */
6024 	ASSERT(vdev_is_concrete(vd));
6025 
6026 	VERIFY0(metaslab_claim_impl(vd, offset, size,
6027 	    spa_min_claim_txg(vd->vdev_spa)));
6028 }
6029 
6030 static void
6031 claim_segment_cb(void *arg, uint64_t offset, uint64_t size)
6032 {
6033 	vdev_t *vd = arg;
6034 
6035 	vdev_indirect_ops.vdev_op_remap(vd, offset, size,
6036 	    claim_segment_impl_cb, NULL);
6037 }
6038 
6039 /*
6040  * After accounting for all allocated blocks that are directly referenced,
6041  * we might have missed a reference to a block from a partially complete
6042  * (and thus unused) indirect mapping object. We perform a secondary pass
6043  * through the metaslabs we have already mapped and claim the destination
6044  * blocks.
6045  */
6046 static void
6047 zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb)
6048 {
6049 	if (dump_opt['L'])
6050 		return;
6051 
6052 	if (spa->spa_vdev_removal == NULL)
6053 		return;
6054 
6055 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
6056 
6057 	spa_vdev_removal_t *svr = spa->spa_vdev_removal;
6058 	vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id);
6059 	vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
6060 
6061 	ASSERT0(range_tree_space(svr->svr_allocd_segs));
6062 
6063 	range_tree_t *allocs = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0);
6064 	for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) {
6065 		metaslab_t *msp = vd->vdev_ms[msi];
6066 
6067 		ASSERT0(range_tree_space(allocs));
6068 		if (msp->ms_sm != NULL)
6069 			VERIFY0(space_map_load(msp->ms_sm, allocs, SM_ALLOC));
6070 		range_tree_vacate(allocs, range_tree_add, svr->svr_allocd_segs);
6071 	}
6072 	range_tree_destroy(allocs);
6073 
6074 	iterate_through_spacemap_logs(spa, load_unflushed_svr_segs_cb, svr);
6075 
6076 	/*
6077 	 * Clear everything past what has been synced,
6078 	 * because we have not allocated mappings for
6079 	 * it yet.
6080 	 */
6081 	range_tree_clear(svr->svr_allocd_segs,
6082 	    vdev_indirect_mapping_max_offset(vim),
6083 	    vd->vdev_asize - vdev_indirect_mapping_max_offset(vim));
6084 
6085 	zcb->zcb_removing_size += range_tree_space(svr->svr_allocd_segs);
6086 	range_tree_vacate(svr->svr_allocd_segs, claim_segment_cb, vd);
6087 
6088 	spa_config_exit(spa, SCL_CONFIG, FTAG);
6089 }
6090 
6091 static int
6092 increment_indirect_mapping_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
6093     dmu_tx_t *tx)
6094 {
6095 	(void) tx;
6096 	zdb_cb_t *zcb = arg;
6097 	spa_t *spa = zcb->zcb_spa;
6098 	vdev_t *vd;
6099 	const dva_t *dva = &bp->blk_dva[0];
6100 
6101 	ASSERT(!bp_freed);
6102 	ASSERT(!dump_opt['L']);
6103 	ASSERT3U(BP_GET_NDVAS(bp), ==, 1);
6104 
6105 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
6106 	vd = vdev_lookup_top(zcb->zcb_spa, DVA_GET_VDEV(dva));
6107 	ASSERT3P(vd, !=, NULL);
6108 	spa_config_exit(spa, SCL_VDEV, FTAG);
6109 
6110 	ASSERT(vd->vdev_indirect_config.vic_mapping_object != 0);
6111 	ASSERT3P(zcb->zcb_vd_obsolete_counts[vd->vdev_id], !=, NULL);
6112 
6113 	vdev_indirect_mapping_increment_obsolete_count(
6114 	    vd->vdev_indirect_mapping,
6115 	    DVA_GET_OFFSET(dva), DVA_GET_ASIZE(dva),
6116 	    zcb->zcb_vd_obsolete_counts[vd->vdev_id]);
6117 
6118 	return (0);
6119 }
6120 
6121 static uint32_t *
6122 zdb_load_obsolete_counts(vdev_t *vd)
6123 {
6124 	vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
6125 	spa_t *spa = vd->vdev_spa;
6126 	spa_condensing_indirect_phys_t *scip =
6127 	    &spa->spa_condensing_indirect_phys;
6128 	uint64_t obsolete_sm_object;
6129 	uint32_t *counts;
6130 
6131 	VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
6132 	EQUIV(obsolete_sm_object != 0, vd->vdev_obsolete_sm != NULL);
6133 	counts = vdev_indirect_mapping_load_obsolete_counts(vim);
6134 	if (vd->vdev_obsolete_sm != NULL) {
6135 		vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
6136 		    vd->vdev_obsolete_sm);
6137 	}
6138 	if (scip->scip_vdev == vd->vdev_id &&
6139 	    scip->scip_prev_obsolete_sm_object != 0) {
6140 		space_map_t *prev_obsolete_sm = NULL;
6141 		VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset,
6142 		    scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0));
6143 		vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
6144 		    prev_obsolete_sm);
6145 		space_map_close(prev_obsolete_sm);
6146 	}
6147 	return (counts);
6148 }
6149 
6150 static void
6151 zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
6152 {
6153 	ddt_bookmark_t ddb = {0};
6154 	ddt_entry_t dde;
6155 	int error;
6156 	int p;
6157 
6158 	ASSERT(!dump_opt['L']);
6159 
6160 	while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
6161 		blkptr_t blk;
6162 		ddt_phys_t *ddp = dde.dde_phys;
6163 
6164 		if (ddb.ddb_class == DDT_CLASS_UNIQUE)
6165 			return;
6166 
6167 		ASSERT(ddt_phys_total_refcnt(&dde) > 1);
6168 
6169 		for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
6170 			if (ddp->ddp_phys_birth == 0)
6171 				continue;
6172 			ddt_bp_create(ddb.ddb_checksum,
6173 			    &dde.dde_key, ddp, &blk);
6174 			if (p == DDT_PHYS_DITTO) {
6175 				zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
6176 			} else {
6177 				zcb->zcb_dedup_asize +=
6178 				    BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
6179 				zcb->zcb_dedup_blocks++;
6180 			}
6181 		}
6182 		ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
6183 		ddt_enter(ddt);
6184 		VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
6185 		ddt_exit(ddt);
6186 	}
6187 
6188 	ASSERT(error == ENOENT);
6189 }
6190 
6191 typedef struct checkpoint_sm_exclude_entry_arg {
6192 	vdev_t *cseea_vd;
6193 	uint64_t cseea_checkpoint_size;
6194 } checkpoint_sm_exclude_entry_arg_t;
6195 
6196 static int
6197 checkpoint_sm_exclude_entry_cb(space_map_entry_t *sme, void *arg)
6198 {
6199 	checkpoint_sm_exclude_entry_arg_t *cseea = arg;
6200 	vdev_t *vd = cseea->cseea_vd;
6201 	metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift];
6202 	uint64_t end = sme->sme_offset + sme->sme_run;
6203 
6204 	ASSERT(sme->sme_type == SM_FREE);
6205 
6206 	/*
6207 	 * Since the vdev_checkpoint_sm exists in the vdev level
6208 	 * and the ms_sm space maps exist in the metaslab level,
6209 	 * an entry in the checkpoint space map could theoretically
6210 	 * cross the boundaries of the metaslab that it belongs.
6211 	 *
6212 	 * In reality, because of the way that we populate and
6213 	 * manipulate the checkpoint's space maps currently,
6214 	 * there shouldn't be any entries that cross metaslabs.
6215 	 * Hence the assertion below.
6216 	 *
6217 	 * That said, there is no fundamental requirement that
6218 	 * the checkpoint's space map entries should not cross
6219 	 * metaslab boundaries. So if needed we could add code
6220 	 * that handles metaslab-crossing segments in the future.
6221 	 */
6222 	VERIFY3U(sme->sme_offset, >=, ms->ms_start);
6223 	VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
6224 
6225 	/*
6226 	 * By removing the entry from the allocated segments we
6227 	 * also verify that the entry is there to begin with.
6228 	 */
6229 	mutex_enter(&ms->ms_lock);
6230 	range_tree_remove(ms->ms_allocatable, sme->sme_offset, sme->sme_run);
6231 	mutex_exit(&ms->ms_lock);
6232 
6233 	cseea->cseea_checkpoint_size += sme->sme_run;
6234 	return (0);
6235 }
6236 
6237 static void
6238 zdb_leak_init_vdev_exclude_checkpoint(vdev_t *vd, zdb_cb_t *zcb)
6239 {
6240 	spa_t *spa = vd->vdev_spa;
6241 	space_map_t *checkpoint_sm = NULL;
6242 	uint64_t checkpoint_sm_obj;
6243 
6244 	/*
6245 	 * If there is no vdev_top_zap, we are in a pool whose
6246 	 * version predates the pool checkpoint feature.
6247 	 */
6248 	if (vd->vdev_top_zap == 0)
6249 		return;
6250 
6251 	/*
6252 	 * If there is no reference of the vdev_checkpoint_sm in
6253 	 * the vdev_top_zap, then one of the following scenarios
6254 	 * is true:
6255 	 *
6256 	 * 1] There is no checkpoint
6257 	 * 2] There is a checkpoint, but no checkpointed blocks
6258 	 *    have been freed yet
6259 	 * 3] The current vdev is indirect
6260 	 *
6261 	 * In these cases we return immediately.
6262 	 */
6263 	if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap,
6264 	    VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
6265 		return;
6266 
6267 	VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap,
6268 	    VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, sizeof (uint64_t), 1,
6269 	    &checkpoint_sm_obj));
6270 
6271 	checkpoint_sm_exclude_entry_arg_t cseea;
6272 	cseea.cseea_vd = vd;
6273 	cseea.cseea_checkpoint_size = 0;
6274 
6275 	VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
6276 	    checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
6277 
6278 	VERIFY0(space_map_iterate(checkpoint_sm,
6279 	    space_map_length(checkpoint_sm),
6280 	    checkpoint_sm_exclude_entry_cb, &cseea));
6281 	space_map_close(checkpoint_sm);
6282 
6283 	zcb->zcb_checkpoint_size += cseea.cseea_checkpoint_size;
6284 }
6285 
6286 static void
6287 zdb_leak_init_exclude_checkpoint(spa_t *spa, zdb_cb_t *zcb)
6288 {
6289 	ASSERT(!dump_opt['L']);
6290 
6291 	vdev_t *rvd = spa->spa_root_vdev;
6292 	for (uint64_t c = 0; c < rvd->vdev_children; c++) {
6293 		ASSERT3U(c, ==, rvd->vdev_child[c]->vdev_id);
6294 		zdb_leak_init_vdev_exclude_checkpoint(rvd->vdev_child[c], zcb);
6295 	}
6296 }
6297 
6298 static int
6299 count_unflushed_space_cb(spa_t *spa, space_map_entry_t *sme,
6300     uint64_t txg, void *arg)
6301 {
6302 	int64_t *ualloc_space = arg;
6303 
6304 	uint64_t offset = sme->sme_offset;
6305 	uint64_t vdev_id = sme->sme_vdev;
6306 
6307 	vdev_t *vd = vdev_lookup_top(spa, vdev_id);
6308 	if (!vdev_is_concrete(vd))
6309 		return (0);
6310 
6311 	metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
6312 	ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE);
6313 
6314 	if (txg < metaslab_unflushed_txg(ms))
6315 		return (0);
6316 
6317 	if (sme->sme_type == SM_ALLOC)
6318 		*ualloc_space += sme->sme_run;
6319 	else
6320 		*ualloc_space -= sme->sme_run;
6321 
6322 	return (0);
6323 }
6324 
6325 static int64_t
6326 get_unflushed_alloc_space(spa_t *spa)
6327 {
6328 	if (dump_opt['L'])
6329 		return (0);
6330 
6331 	int64_t ualloc_space = 0;
6332 	iterate_through_spacemap_logs(spa, count_unflushed_space_cb,
6333 	    &ualloc_space);
6334 	return (ualloc_space);
6335 }
6336 
6337 static int
6338 load_unflushed_cb(spa_t *spa, space_map_entry_t *sme, uint64_t txg, void *arg)
6339 {
6340 	maptype_t *uic_maptype = arg;
6341 
6342 	uint64_t offset = sme->sme_offset;
6343 	uint64_t size = sme->sme_run;
6344 	uint64_t vdev_id = sme->sme_vdev;
6345 
6346 	vdev_t *vd = vdev_lookup_top(spa, vdev_id);
6347 
6348 	/* skip indirect vdevs */
6349 	if (!vdev_is_concrete(vd))
6350 		return (0);
6351 
6352 	metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
6353 
6354 	ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE);
6355 	ASSERT(*uic_maptype == SM_ALLOC || *uic_maptype == SM_FREE);
6356 
6357 	if (txg < metaslab_unflushed_txg(ms))
6358 		return (0);
6359 
6360 	if (*uic_maptype == sme->sme_type)
6361 		range_tree_add(ms->ms_allocatable, offset, size);
6362 	else
6363 		range_tree_remove(ms->ms_allocatable, offset, size);
6364 
6365 	return (0);
6366 }
6367 
6368 static void
6369 load_unflushed_to_ms_allocatables(spa_t *spa, maptype_t maptype)
6370 {
6371 	iterate_through_spacemap_logs(spa, load_unflushed_cb, &maptype);
6372 }
6373 
6374 static void
6375 load_concrete_ms_allocatable_trees(spa_t *spa, maptype_t maptype)
6376 {
6377 	vdev_t *rvd = spa->spa_root_vdev;
6378 	for (uint64_t i = 0; i < rvd->vdev_children; i++) {
6379 		vdev_t *vd = rvd->vdev_child[i];
6380 
6381 		ASSERT3U(i, ==, vd->vdev_id);
6382 
6383 		if (vd->vdev_ops == &vdev_indirect_ops)
6384 			continue;
6385 
6386 		for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
6387 			metaslab_t *msp = vd->vdev_ms[m];
6388 
6389 			(void) fprintf(stderr,
6390 			    "\rloading concrete vdev %llu, "
6391 			    "metaslab %llu of %llu ...",
6392 			    (longlong_t)vd->vdev_id,
6393 			    (longlong_t)msp->ms_id,
6394 			    (longlong_t)vd->vdev_ms_count);
6395 
6396 			mutex_enter(&msp->ms_lock);
6397 			range_tree_vacate(msp->ms_allocatable, NULL, NULL);
6398 
6399 			/*
6400 			 * We don't want to spend the CPU manipulating the
6401 			 * size-ordered tree, so clear the range_tree ops.
6402 			 */
6403 			msp->ms_allocatable->rt_ops = NULL;
6404 
6405 			if (msp->ms_sm != NULL) {
6406 				VERIFY0(space_map_load(msp->ms_sm,
6407 				    msp->ms_allocatable, maptype));
6408 			}
6409 			if (!msp->ms_loaded)
6410 				msp->ms_loaded = B_TRUE;
6411 			mutex_exit(&msp->ms_lock);
6412 		}
6413 	}
6414 
6415 	load_unflushed_to_ms_allocatables(spa, maptype);
6416 }
6417 
6418 /*
6419  * vm_idxp is an in-out parameter which (for indirect vdevs) is the
6420  * index in vim_entries that has the first entry in this metaslab.
6421  * On return, it will be set to the first entry after this metaslab.
6422  */
6423 static void
6424 load_indirect_ms_allocatable_tree(vdev_t *vd, metaslab_t *msp,
6425     uint64_t *vim_idxp)
6426 {
6427 	vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
6428 
6429 	mutex_enter(&msp->ms_lock);
6430 	range_tree_vacate(msp->ms_allocatable, NULL, NULL);
6431 
6432 	/*
6433 	 * We don't want to spend the CPU manipulating the
6434 	 * size-ordered tree, so clear the range_tree ops.
6435 	 */
6436 	msp->ms_allocatable->rt_ops = NULL;
6437 
6438 	for (; *vim_idxp < vdev_indirect_mapping_num_entries(vim);
6439 	    (*vim_idxp)++) {
6440 		vdev_indirect_mapping_entry_phys_t *vimep =
6441 		    &vim->vim_entries[*vim_idxp];
6442 		uint64_t ent_offset = DVA_MAPPING_GET_SRC_OFFSET(vimep);
6443 		uint64_t ent_len = DVA_GET_ASIZE(&vimep->vimep_dst);
6444 		ASSERT3U(ent_offset, >=, msp->ms_start);
6445 		if (ent_offset >= msp->ms_start + msp->ms_size)
6446 			break;
6447 
6448 		/*
6449 		 * Mappings do not cross metaslab boundaries,
6450 		 * because we create them by walking the metaslabs.
6451 		 */
6452 		ASSERT3U(ent_offset + ent_len, <=,
6453 		    msp->ms_start + msp->ms_size);
6454 		range_tree_add(msp->ms_allocatable, ent_offset, ent_len);
6455 	}
6456 
6457 	if (!msp->ms_loaded)
6458 		msp->ms_loaded = B_TRUE;
6459 	mutex_exit(&msp->ms_lock);
6460 }
6461 
6462 static void
6463 zdb_leak_init_prepare_indirect_vdevs(spa_t *spa, zdb_cb_t *zcb)
6464 {
6465 	ASSERT(!dump_opt['L']);
6466 
6467 	vdev_t *rvd = spa->spa_root_vdev;
6468 	for (uint64_t c = 0; c < rvd->vdev_children; c++) {
6469 		vdev_t *vd = rvd->vdev_child[c];
6470 
6471 		ASSERT3U(c, ==, vd->vdev_id);
6472 
6473 		if (vd->vdev_ops != &vdev_indirect_ops)
6474 			continue;
6475 
6476 		/*
6477 		 * Note: we don't check for mapping leaks on
6478 		 * removing vdevs because their ms_allocatable's
6479 		 * are used to look for leaks in allocated space.
6480 		 */
6481 		zcb->zcb_vd_obsolete_counts[c] = zdb_load_obsolete_counts(vd);
6482 
6483 		/*
6484 		 * Normally, indirect vdevs don't have any
6485 		 * metaslabs.  We want to set them up for
6486 		 * zio_claim().
6487 		 */
6488 		vdev_metaslab_group_create(vd);
6489 		VERIFY0(vdev_metaslab_init(vd, 0));
6490 
6491 		vdev_indirect_mapping_t *vim __maybe_unused =
6492 		    vd->vdev_indirect_mapping;
6493 		uint64_t vim_idx = 0;
6494 		for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
6495 
6496 			(void) fprintf(stderr,
6497 			    "\rloading indirect vdev %llu, "
6498 			    "metaslab %llu of %llu ...",
6499 			    (longlong_t)vd->vdev_id,
6500 			    (longlong_t)vd->vdev_ms[m]->ms_id,
6501 			    (longlong_t)vd->vdev_ms_count);
6502 
6503 			load_indirect_ms_allocatable_tree(vd, vd->vdev_ms[m],
6504 			    &vim_idx);
6505 		}
6506 		ASSERT3U(vim_idx, ==, vdev_indirect_mapping_num_entries(vim));
6507 	}
6508 }
6509 
6510 static void
6511 zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
6512 {
6513 	zcb->zcb_spa = spa;
6514 
6515 	if (dump_opt['L'])
6516 		return;
6517 
6518 	dsl_pool_t *dp = spa->spa_dsl_pool;
6519 	vdev_t *rvd = spa->spa_root_vdev;
6520 
6521 	/*
6522 	 * We are going to be changing the meaning of the metaslab's
6523 	 * ms_allocatable.  Ensure that the allocator doesn't try to
6524 	 * use the tree.
6525 	 */
6526 	spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
6527 	spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
6528 	spa->spa_embedded_log_class->mc_ops = &zdb_metaslab_ops;
6529 
6530 	zcb->zcb_vd_obsolete_counts =
6531 	    umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
6532 	    UMEM_NOFAIL);
6533 
6534 	/*
6535 	 * For leak detection, we overload the ms_allocatable trees
6536 	 * to contain allocated segments instead of free segments.
6537 	 * As a result, we can't use the normal metaslab_load/unload
6538 	 * interfaces.
6539 	 */
6540 	zdb_leak_init_prepare_indirect_vdevs(spa, zcb);
6541 	load_concrete_ms_allocatable_trees(spa, SM_ALLOC);
6542 
6543 	/*
6544 	 * On load_concrete_ms_allocatable_trees() we loaded all the
6545 	 * allocated entries from the ms_sm to the ms_allocatable for
6546 	 * each metaslab. If the pool has a checkpoint or is in the
6547 	 * middle of discarding a checkpoint, some of these blocks
6548 	 * may have been freed but their ms_sm may not have been
6549 	 * updated because they are referenced by the checkpoint. In
6550 	 * order to avoid false-positives during leak-detection, we
6551 	 * go through the vdev's checkpoint space map and exclude all
6552 	 * its entries from their relevant ms_allocatable.
6553 	 *
6554 	 * We also aggregate the space held by the checkpoint and add
6555 	 * it to zcb_checkpoint_size.
6556 	 *
6557 	 * Note that at this point we are also verifying that all the
6558 	 * entries on the checkpoint_sm are marked as allocated in
6559 	 * the ms_sm of their relevant metaslab.
6560 	 * [see comment in checkpoint_sm_exclude_entry_cb()]
6561 	 */
6562 	zdb_leak_init_exclude_checkpoint(spa, zcb);
6563 	ASSERT3U(zcb->zcb_checkpoint_size, ==, spa_get_checkpoint_space(spa));
6564 
6565 	/* for cleaner progress output */
6566 	(void) fprintf(stderr, "\n");
6567 
6568 	if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
6569 		ASSERT(spa_feature_is_enabled(spa,
6570 		    SPA_FEATURE_DEVICE_REMOVAL));
6571 		(void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
6572 		    increment_indirect_mapping_cb, zcb, NULL);
6573 	}
6574 
6575 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
6576 	zdb_ddt_leak_init(spa, zcb);
6577 	spa_config_exit(spa, SCL_CONFIG, FTAG);
6578 }
6579 
6580 static boolean_t
6581 zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
6582 {
6583 	boolean_t leaks = B_FALSE;
6584 	vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
6585 	uint64_t total_leaked = 0;
6586 	boolean_t are_precise = B_FALSE;
6587 
6588 	ASSERT(vim != NULL);
6589 
6590 	for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) {
6591 		vdev_indirect_mapping_entry_phys_t *vimep =
6592 		    &vim->vim_entries[i];
6593 		uint64_t obsolete_bytes = 0;
6594 		uint64_t offset = DVA_MAPPING_GET_SRC_OFFSET(vimep);
6595 		metaslab_t *msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
6596 
6597 		/*
6598 		 * This is not very efficient but it's easy to
6599 		 * verify correctness.
6600 		 */
6601 		for (uint64_t inner_offset = 0;
6602 		    inner_offset < DVA_GET_ASIZE(&vimep->vimep_dst);
6603 		    inner_offset += 1ULL << vd->vdev_ashift) {
6604 			if (range_tree_contains(msp->ms_allocatable,
6605 			    offset + inner_offset, 1ULL << vd->vdev_ashift)) {
6606 				obsolete_bytes += 1ULL << vd->vdev_ashift;
6607 			}
6608 		}
6609 
6610 		int64_t bytes_leaked = obsolete_bytes -
6611 		    zcb->zcb_vd_obsolete_counts[vd->vdev_id][i];
6612 		ASSERT3U(DVA_GET_ASIZE(&vimep->vimep_dst), >=,
6613 		    zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]);
6614 
6615 		VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise));
6616 		if (bytes_leaked != 0 && (are_precise || dump_opt['d'] >= 5)) {
6617 			(void) printf("obsolete indirect mapping count "
6618 			    "mismatch on %llu:%llx:%llx : %llx bytes leaked\n",
6619 			    (u_longlong_t)vd->vdev_id,
6620 			    (u_longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep),
6621 			    (u_longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
6622 			    (u_longlong_t)bytes_leaked);
6623 		}
6624 		total_leaked += ABS(bytes_leaked);
6625 	}
6626 
6627 	VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise));
6628 	if (!are_precise && total_leaked > 0) {
6629 		int pct_leaked = total_leaked * 100 /
6630 		    vdev_indirect_mapping_bytes_mapped(vim);
6631 		(void) printf("cannot verify obsolete indirect mapping "
6632 		    "counts of vdev %llu because precise feature was not "
6633 		    "enabled when it was removed: %d%% (%llx bytes) of mapping"
6634 		    "unreferenced\n",
6635 		    (u_longlong_t)vd->vdev_id, pct_leaked,
6636 		    (u_longlong_t)total_leaked);
6637 	} else if (total_leaked > 0) {
6638 		(void) printf("obsolete indirect mapping count mismatch "
6639 		    "for vdev %llu -- %llx total bytes mismatched\n",
6640 		    (u_longlong_t)vd->vdev_id,
6641 		    (u_longlong_t)total_leaked);
6642 		leaks |= B_TRUE;
6643 	}
6644 
6645 	vdev_indirect_mapping_free_obsolete_counts(vim,
6646 	    zcb->zcb_vd_obsolete_counts[vd->vdev_id]);
6647 	zcb->zcb_vd_obsolete_counts[vd->vdev_id] = NULL;
6648 
6649 	return (leaks);
6650 }
6651 
6652 static boolean_t
6653 zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)
6654 {
6655 	if (dump_opt['L'])
6656 		return (B_FALSE);
6657 
6658 	boolean_t leaks = B_FALSE;
6659 	vdev_t *rvd = spa->spa_root_vdev;
6660 	for (unsigned c = 0; c < rvd->vdev_children; c++) {
6661 		vdev_t *vd = rvd->vdev_child[c];
6662 
6663 		if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
6664 			leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
6665 		}
6666 
6667 		for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
6668 			metaslab_t *msp = vd->vdev_ms[m];
6669 			ASSERT3P(msp->ms_group, ==, (msp->ms_group->mg_class ==
6670 			    spa_embedded_log_class(spa)) ?
6671 			    vd->vdev_log_mg : vd->vdev_mg);
6672 
6673 			/*
6674 			 * ms_allocatable has been overloaded
6675 			 * to contain allocated segments. Now that
6676 			 * we finished traversing all blocks, any
6677 			 * block that remains in the ms_allocatable
6678 			 * represents an allocated block that we
6679 			 * did not claim during the traversal.
6680 			 * Claimed blocks would have been removed
6681 			 * from the ms_allocatable.  For indirect
6682 			 * vdevs, space remaining in the tree
6683 			 * represents parts of the mapping that are
6684 			 * not referenced, which is not a bug.
6685 			 */
6686 			if (vd->vdev_ops == &vdev_indirect_ops) {
6687 				range_tree_vacate(msp->ms_allocatable,
6688 				    NULL, NULL);
6689 			} else {
6690 				range_tree_vacate(msp->ms_allocatable,
6691 				    zdb_leak, vd);
6692 			}
6693 			if (msp->ms_loaded) {
6694 				msp->ms_loaded = B_FALSE;
6695 			}
6696 		}
6697 	}
6698 
6699 	umem_free(zcb->zcb_vd_obsolete_counts,
6700 	    rvd->vdev_children * sizeof (uint32_t *));
6701 	zcb->zcb_vd_obsolete_counts = NULL;
6702 
6703 	return (leaks);
6704 }
6705 
6706 static int
6707 count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
6708 {
6709 	(void) tx;
6710 	zdb_cb_t *zcb = arg;
6711 
6712 	if (dump_opt['b'] >= 5) {
6713 		char blkbuf[BP_SPRINTF_LEN];
6714 		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
6715 		(void) printf("[%s] %s\n",
6716 		    "deferred free", blkbuf);
6717 	}
6718 	zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
6719 	return (0);
6720 }
6721 
6722 /*
6723  * Iterate over livelists which have been destroyed by the user but
6724  * are still present in the MOS, waiting to be freed
6725  */
6726 static void
6727 iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg)
6728 {
6729 	objset_t *mos = spa->spa_meta_objset;
6730 	uint64_t zap_obj;
6731 	int err = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT,
6732 	    DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &zap_obj);
6733 	if (err == ENOENT)
6734 		return;
6735 	ASSERT0(err);
6736 
6737 	zap_cursor_t zc;
6738 	zap_attribute_t attr;
6739 	dsl_deadlist_t ll;
6740 	/* NULL out os prior to dsl_deadlist_open in case it's garbage */
6741 	ll.dl_os = NULL;
6742 	for (zap_cursor_init(&zc, mos, zap_obj);
6743 	    zap_cursor_retrieve(&zc, &attr) == 0;
6744 	    (void) zap_cursor_advance(&zc)) {
6745 		dsl_deadlist_open(&ll, mos, attr.za_first_integer);
6746 		func(&ll, arg);
6747 		dsl_deadlist_close(&ll);
6748 	}
6749 	zap_cursor_fini(&zc);
6750 }
6751 
6752 static int
6753 bpobj_count_block_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
6754     dmu_tx_t *tx)
6755 {
6756 	ASSERT(!bp_freed);
6757 	return (count_block_cb(arg, bp, tx));
6758 }
6759 
6760 static int
6761 livelist_entry_count_blocks_cb(void *args, dsl_deadlist_entry_t *dle)
6762 {
6763 	zdb_cb_t *zbc = args;
6764 	bplist_t blks;
6765 	bplist_create(&blks);
6766 	/* determine which blocks have been alloc'd but not freed */
6767 	VERIFY0(dsl_process_sub_livelist(&dle->dle_bpobj, &blks, NULL, NULL));
6768 	/* count those blocks */
6769 	(void) bplist_iterate(&blks, count_block_cb, zbc, NULL);
6770 	bplist_destroy(&blks);
6771 	return (0);
6772 }
6773 
6774 static void
6775 livelist_count_blocks(dsl_deadlist_t *ll, void *arg)
6776 {
6777 	dsl_deadlist_iterate(ll, livelist_entry_count_blocks_cb, arg);
6778 }
6779 
6780 /*
6781  * Count the blocks in the livelists that have been destroyed by the user
6782  * but haven't yet been freed.
6783  */
6784 static void
6785 deleted_livelists_count_blocks(spa_t *spa, zdb_cb_t *zbc)
6786 {
6787 	iterate_deleted_livelists(spa, livelist_count_blocks, zbc);
6788 }
6789 
6790 static void
6791 dump_livelist_cb(dsl_deadlist_t *ll, void *arg)
6792 {
6793 	ASSERT3P(arg, ==, NULL);
6794 	global_feature_count[SPA_FEATURE_LIVELIST]++;
6795 	dump_blkptr_list(ll, "Deleted Livelist");
6796 	dsl_deadlist_iterate(ll, sublivelist_verify_lightweight, NULL);
6797 }
6798 
6799 /*
6800  * Print out, register object references to, and increment feature counts for
6801  * livelists that have been destroyed by the user but haven't yet been freed.
6802  */
6803 static void
6804 deleted_livelists_dump_mos(spa_t *spa)
6805 {
6806 	uint64_t zap_obj;
6807 	objset_t *mos = spa->spa_meta_objset;
6808 	int err = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT,
6809 	    DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &zap_obj);
6810 	if (err == ENOENT)
6811 		return;
6812 	mos_obj_refd(zap_obj);
6813 	iterate_deleted_livelists(spa, dump_livelist_cb, NULL);
6814 }
6815 
6816 static int
6817 zdb_brt_entry_compare(const void *zcn1, const void *zcn2)
6818 {
6819 	const dva_t *dva1 = &((const zdb_brt_entry_t *)zcn1)->zbre_dva;
6820 	const dva_t *dva2 = &((const zdb_brt_entry_t *)zcn2)->zbre_dva;
6821 	int cmp;
6822 
6823 	cmp = TREE_CMP(DVA_GET_VDEV(dva1), DVA_GET_VDEV(dva2));
6824 	if (cmp == 0)
6825 		cmp = TREE_CMP(DVA_GET_OFFSET(dva1), DVA_GET_OFFSET(dva2));
6826 
6827 	return (cmp);
6828 }
6829 
6830 static int
6831 dump_block_stats(spa_t *spa)
6832 {
6833 	zdb_cb_t *zcb;
6834 	zdb_blkstats_t *zb, *tzb;
6835 	uint64_t norm_alloc, norm_space, total_alloc, total_found;
6836 	int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
6837 	    TRAVERSE_NO_DECRYPT | TRAVERSE_HARD;
6838 	boolean_t leaks = B_FALSE;
6839 	int e, c, err;
6840 	bp_embedded_type_t i;
6841 
6842 	zcb = umem_zalloc(sizeof (zdb_cb_t), UMEM_NOFAIL);
6843 
6844 	if (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) {
6845 		avl_create(&zcb->zcb_brt, zdb_brt_entry_compare,
6846 		    sizeof (zdb_brt_entry_t),
6847 		    offsetof(zdb_brt_entry_t, zbre_node));
6848 		zcb->zcb_brt_is_active = B_TRUE;
6849 	}
6850 
6851 	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
6852 	    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
6853 	    (dump_opt['c'] == 1) ? "metadata " : "",
6854 	    dump_opt['c'] ? "checksums " : "",
6855 	    (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
6856 	    !dump_opt['L'] ? "nothing leaked " : "");
6857 
6858 	/*
6859 	 * When leak detection is enabled we load all space maps as SM_ALLOC
6860 	 * maps, then traverse the pool claiming each block we discover. If
6861 	 * the pool is perfectly consistent, the segment trees will be empty
6862 	 * when we're done. Anything left over is a leak; any block we can't
6863 	 * claim (because it's not part of any space map) is a double
6864 	 * allocation, reference to a freed block, or an unclaimed log block.
6865 	 *
6866 	 * When leak detection is disabled (-L option) we still traverse the
6867 	 * pool claiming each block we discover, but we skip opening any space
6868 	 * maps.
6869 	 */
6870 	zdb_leak_init(spa, zcb);
6871 
6872 	/*
6873 	 * If there's a deferred-free bplist, process that first.
6874 	 */
6875 	(void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
6876 	    bpobj_count_block_cb, zcb, NULL);
6877 
6878 	if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
6879 		(void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
6880 		    bpobj_count_block_cb, zcb, NULL);
6881 	}
6882 
6883 	zdb_claim_removing(spa, zcb);
6884 
6885 	if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
6886 		VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
6887 		    spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
6888 		    zcb, NULL));
6889 	}
6890 
6891 	deleted_livelists_count_blocks(spa, zcb);
6892 
6893 	if (dump_opt['c'] > 1)
6894 		flags |= TRAVERSE_PREFETCH_DATA;
6895 
6896 	zcb->zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
6897 	zcb->zcb_totalasize += metaslab_class_get_alloc(spa_special_class(spa));
6898 	zcb->zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa));
6899 	zcb->zcb_totalasize +=
6900 	    metaslab_class_get_alloc(spa_embedded_log_class(spa));
6901 	zcb->zcb_start = zcb->zcb_lastprint = gethrtime();
6902 	err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, zcb);
6903 
6904 	/*
6905 	 * If we've traversed the data blocks then we need to wait for those
6906 	 * I/Os to complete. We leverage "The Godfather" zio to wait on
6907 	 * all async I/Os to complete.
6908 	 */
6909 	if (dump_opt['c']) {
6910 		for (c = 0; c < max_ncpus; c++) {
6911 			(void) zio_wait(spa->spa_async_zio_root[c]);
6912 			spa->spa_async_zio_root[c] = zio_root(spa, NULL, NULL,
6913 			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
6914 			    ZIO_FLAG_GODFATHER);
6915 		}
6916 	}
6917 	ASSERT0(spa->spa_load_verify_bytes);
6918 
6919 	/*
6920 	 * Done after zio_wait() since zcb_haderrors is modified in
6921 	 * zdb_blkptr_done()
6922 	 */
6923 	zcb->zcb_haderrors |= err;
6924 
6925 	if (zcb->zcb_haderrors) {
6926 		(void) printf("\nError counts:\n\n");
6927 		(void) printf("\t%5s  %s\n", "errno", "count");
6928 		for (e = 0; e < 256; e++) {
6929 			if (zcb->zcb_errors[e] != 0) {
6930 				(void) printf("\t%5d  %llu\n",
6931 				    e, (u_longlong_t)zcb->zcb_errors[e]);
6932 			}
6933 		}
6934 	}
6935 
6936 	/*
6937 	 * Report any leaked segments.
6938 	 */
6939 	leaks |= zdb_leak_fini(spa, zcb);
6940 
6941 	tzb = &zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
6942 
6943 	norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
6944 	norm_space = metaslab_class_get_space(spa_normal_class(spa));
6945 
6946 	total_alloc = norm_alloc +
6947 	    metaslab_class_get_alloc(spa_log_class(spa)) +
6948 	    metaslab_class_get_alloc(spa_embedded_log_class(spa)) +
6949 	    metaslab_class_get_alloc(spa_special_class(spa)) +
6950 	    metaslab_class_get_alloc(spa_dedup_class(spa)) +
6951 	    get_unflushed_alloc_space(spa);
6952 	total_found =
6953 	    tzb->zb_asize - zcb->zcb_dedup_asize - zcb->zcb_clone_asize +
6954 	    zcb->zcb_removing_size + zcb->zcb_checkpoint_size;
6955 
6956 	if (total_found == total_alloc && !dump_opt['L']) {
6957 		(void) printf("\n\tNo leaks (block sum matches space"
6958 		    " maps exactly)\n");
6959 	} else if (!dump_opt['L']) {
6960 		(void) printf("block traversal size %llu != alloc %llu "
6961 		    "(%s %lld)\n",
6962 		    (u_longlong_t)total_found,
6963 		    (u_longlong_t)total_alloc,
6964 		    (dump_opt['L']) ? "unreachable" : "leaked",
6965 		    (longlong_t)(total_alloc - total_found));
6966 		leaks = B_TRUE;
6967 	}
6968 
6969 	if (tzb->zb_count == 0) {
6970 		umem_free(zcb, sizeof (zdb_cb_t));
6971 		return (2);
6972 	}
6973 
6974 	(void) printf("\n");
6975 	(void) printf("\t%-16s %14llu\n", "bp count:",
6976 	    (u_longlong_t)tzb->zb_count);
6977 	(void) printf("\t%-16s %14llu\n", "ganged count:",
6978 	    (longlong_t)tzb->zb_gangs);
6979 	(void) printf("\t%-16s %14llu      avg: %6llu\n", "bp logical:",
6980 	    (u_longlong_t)tzb->zb_lsize,
6981 	    (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
6982 	(void) printf("\t%-16s %14llu      avg: %6llu     compression: %6.2f\n",
6983 	    "bp physical:", (u_longlong_t)tzb->zb_psize,
6984 	    (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
6985 	    (double)tzb->zb_lsize / tzb->zb_psize);
6986 	(void) printf("\t%-16s %14llu      avg: %6llu     compression: %6.2f\n",
6987 	    "bp allocated:", (u_longlong_t)tzb->zb_asize,
6988 	    (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
6989 	    (double)tzb->zb_lsize / tzb->zb_asize);
6990 	(void) printf("\t%-16s %14llu    ref>1: %6llu   deduplication: %6.2f\n",
6991 	    "bp deduped:", (u_longlong_t)zcb->zcb_dedup_asize,
6992 	    (u_longlong_t)zcb->zcb_dedup_blocks,
6993 	    (double)zcb->zcb_dedup_asize / tzb->zb_asize + 1.0);
6994 	(void) printf("\t%-16s %14llu    count: %6llu\n",
6995 	    "bp cloned:", (u_longlong_t)zcb->zcb_clone_asize,
6996 	    (u_longlong_t)zcb->zcb_clone_blocks);
6997 	(void) printf("\t%-16s %14llu     used: %5.2f%%\n", "Normal class:",
6998 	    (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
6999 
7000 	if (spa_special_class(spa)->mc_allocator[0].mca_rotor != NULL) {
7001 		uint64_t alloc = metaslab_class_get_alloc(
7002 		    spa_special_class(spa));
7003 		uint64_t space = metaslab_class_get_space(
7004 		    spa_special_class(spa));
7005 
7006 		(void) printf("\t%-16s %14llu     used: %5.2f%%\n",
7007 		    "Special class", (u_longlong_t)alloc,
7008 		    100.0 * alloc / space);
7009 	}
7010 
7011 	if (spa_dedup_class(spa)->mc_allocator[0].mca_rotor != NULL) {
7012 		uint64_t alloc = metaslab_class_get_alloc(
7013 		    spa_dedup_class(spa));
7014 		uint64_t space = metaslab_class_get_space(
7015 		    spa_dedup_class(spa));
7016 
7017 		(void) printf("\t%-16s %14llu     used: %5.2f%%\n",
7018 		    "Dedup class", (u_longlong_t)alloc,
7019 		    100.0 * alloc / space);
7020 	}
7021 
7022 	if (spa_embedded_log_class(spa)->mc_allocator[0].mca_rotor != NULL) {
7023 		uint64_t alloc = metaslab_class_get_alloc(
7024 		    spa_embedded_log_class(spa));
7025 		uint64_t space = metaslab_class_get_space(
7026 		    spa_embedded_log_class(spa));
7027 
7028 		(void) printf("\t%-16s %14llu     used: %5.2f%%\n",
7029 		    "Embedded log class", (u_longlong_t)alloc,
7030 		    100.0 * alloc / space);
7031 	}
7032 
7033 	for (i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
7034 		if (zcb->zcb_embedded_blocks[i] == 0)
7035 			continue;
7036 		(void) printf("\n");
7037 		(void) printf("\tadditional, non-pointer bps of type %u: "
7038 		    "%10llu\n",
7039 		    i, (u_longlong_t)zcb->zcb_embedded_blocks[i]);
7040 
7041 		if (dump_opt['b'] >= 3) {
7042 			(void) printf("\t number of (compressed) bytes:  "
7043 			    "number of bps\n");
7044 			dump_histogram(zcb->zcb_embedded_histogram[i],
7045 			    sizeof (zcb->zcb_embedded_histogram[i]) /
7046 			    sizeof (zcb->zcb_embedded_histogram[i][0]), 0);
7047 		}
7048 	}
7049 
7050 	if (tzb->zb_ditto_samevdev != 0) {
7051 		(void) printf("\tDittoed blocks on same vdev: %llu\n",
7052 		    (longlong_t)tzb->zb_ditto_samevdev);
7053 	}
7054 	if (tzb->zb_ditto_same_ms != 0) {
7055 		(void) printf("\tDittoed blocks in same metaslab: %llu\n",
7056 		    (longlong_t)tzb->zb_ditto_same_ms);
7057 	}
7058 
7059 	for (uint64_t v = 0; v < spa->spa_root_vdev->vdev_children; v++) {
7060 		vdev_t *vd = spa->spa_root_vdev->vdev_child[v];
7061 		vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
7062 
7063 		if (vim == NULL) {
7064 			continue;
7065 		}
7066 
7067 		char mem[32];
7068 		zdb_nicenum(vdev_indirect_mapping_num_entries(vim),
7069 		    mem, vdev_indirect_mapping_size(vim));
7070 
7071 		(void) printf("\tindirect vdev id %llu has %llu segments "
7072 		    "(%s in memory)\n",
7073 		    (longlong_t)vd->vdev_id,
7074 		    (longlong_t)vdev_indirect_mapping_num_entries(vim), mem);
7075 	}
7076 
7077 	if (dump_opt['b'] >= 2) {
7078 		int l, t, level;
7079 		char csize[32], lsize[32], psize[32], asize[32];
7080 		char avg[32], gang[32];
7081 		(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
7082 		    "\t  avg\t comp\t%%Total\tType\n");
7083 
7084 		zfs_blkstat_t *mdstats = umem_zalloc(sizeof (zfs_blkstat_t),
7085 		    UMEM_NOFAIL);
7086 
7087 		for (t = 0; t <= ZDB_OT_TOTAL; t++) {
7088 			const char *typename;
7089 
7090 			/* make sure nicenum has enough space */
7091 			_Static_assert(sizeof (csize) >= NN_NUMBUF_SZ,
7092 			    "csize truncated");
7093 			_Static_assert(sizeof (lsize) >= NN_NUMBUF_SZ,
7094 			    "lsize truncated");
7095 			_Static_assert(sizeof (psize) >= NN_NUMBUF_SZ,
7096 			    "psize truncated");
7097 			_Static_assert(sizeof (asize) >= NN_NUMBUF_SZ,
7098 			    "asize truncated");
7099 			_Static_assert(sizeof (avg) >= NN_NUMBUF_SZ,
7100 			    "avg truncated");
7101 			_Static_assert(sizeof (gang) >= NN_NUMBUF_SZ,
7102 			    "gang truncated");
7103 
7104 			if (t < DMU_OT_NUMTYPES)
7105 				typename = dmu_ot[t].ot_name;
7106 			else
7107 				typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
7108 
7109 			if (zcb->zcb_type[ZB_TOTAL][t].zb_asize == 0) {
7110 				(void) printf("%6s\t%5s\t%5s\t%5s"
7111 				    "\t%5s\t%5s\t%6s\t%s\n",
7112 				    "-",
7113 				    "-",
7114 				    "-",
7115 				    "-",
7116 				    "-",
7117 				    "-",
7118 				    "-",
7119 				    typename);
7120 				continue;
7121 			}
7122 
7123 			for (l = ZB_TOTAL - 1; l >= -1; l--) {
7124 				level = (l == -1 ? ZB_TOTAL : l);
7125 				zb = &zcb->zcb_type[level][t];
7126 
7127 				if (zb->zb_asize == 0)
7128 					continue;
7129 
7130 				if (level != ZB_TOTAL && t < DMU_OT_NUMTYPES &&
7131 				    (level > 0 || DMU_OT_IS_METADATA(t))) {
7132 					mdstats->zb_count += zb->zb_count;
7133 					mdstats->zb_lsize += zb->zb_lsize;
7134 					mdstats->zb_psize += zb->zb_psize;
7135 					mdstats->zb_asize += zb->zb_asize;
7136 					mdstats->zb_gangs += zb->zb_gangs;
7137 				}
7138 
7139 				if (dump_opt['b'] < 3 && level != ZB_TOTAL)
7140 					continue;
7141 
7142 				if (level == 0 && zb->zb_asize ==
7143 				    zcb->zcb_type[ZB_TOTAL][t].zb_asize)
7144 					continue;
7145 
7146 				zdb_nicenum(zb->zb_count, csize,
7147 				    sizeof (csize));
7148 				zdb_nicenum(zb->zb_lsize, lsize,
7149 				    sizeof (lsize));
7150 				zdb_nicenum(zb->zb_psize, psize,
7151 				    sizeof (psize));
7152 				zdb_nicenum(zb->zb_asize, asize,
7153 				    sizeof (asize));
7154 				zdb_nicenum(zb->zb_asize / zb->zb_count, avg,
7155 				    sizeof (avg));
7156 				zdb_nicenum(zb->zb_gangs, gang, sizeof (gang));
7157 
7158 				(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
7159 				    "\t%5.2f\t%6.2f\t",
7160 				    csize, lsize, psize, asize, avg,
7161 				    (double)zb->zb_lsize / zb->zb_psize,
7162 				    100.0 * zb->zb_asize / tzb->zb_asize);
7163 
7164 				if (level == ZB_TOTAL)
7165 					(void) printf("%s\n", typename);
7166 				else
7167 					(void) printf("    L%d %s\n",
7168 					    level, typename);
7169 
7170 				if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) {
7171 					(void) printf("\t number of ganged "
7172 					    "blocks: %s\n", gang);
7173 				}
7174 
7175 				if (dump_opt['b'] >= 4) {
7176 					(void) printf("psize "
7177 					    "(in 512-byte sectors): "
7178 					    "number of blocks\n");
7179 					dump_histogram(zb->zb_psize_histogram,
7180 					    PSIZE_HISTO_SIZE, 0);
7181 				}
7182 			}
7183 		}
7184 		zdb_nicenum(mdstats->zb_count, csize,
7185 		    sizeof (csize));
7186 		zdb_nicenum(mdstats->zb_lsize, lsize,
7187 		    sizeof (lsize));
7188 		zdb_nicenum(mdstats->zb_psize, psize,
7189 		    sizeof (psize));
7190 		zdb_nicenum(mdstats->zb_asize, asize,
7191 		    sizeof (asize));
7192 		zdb_nicenum(mdstats->zb_asize / mdstats->zb_count, avg,
7193 		    sizeof (avg));
7194 		zdb_nicenum(mdstats->zb_gangs, gang, sizeof (gang));
7195 
7196 		(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
7197 		    "\t%5.2f\t%6.2f\t",
7198 		    csize, lsize, psize, asize, avg,
7199 		    (double)mdstats->zb_lsize / mdstats->zb_psize,
7200 		    100.0 * mdstats->zb_asize / tzb->zb_asize);
7201 		(void) printf("%s\n", "Metadata Total");
7202 
7203 		/* Output a table summarizing block sizes in the pool */
7204 		if (dump_opt['b'] >= 2) {
7205 			dump_size_histograms(zcb);
7206 		}
7207 
7208 		umem_free(mdstats, sizeof (zfs_blkstat_t));
7209 	}
7210 
7211 	(void) printf("\n");
7212 
7213 	if (leaks) {
7214 		umem_free(zcb, sizeof (zdb_cb_t));
7215 		return (2);
7216 	}
7217 
7218 	if (zcb->zcb_haderrors) {
7219 		umem_free(zcb, sizeof (zdb_cb_t));
7220 		return (3);
7221 	}
7222 
7223 	umem_free(zcb, sizeof (zdb_cb_t));
7224 	return (0);
7225 }
7226 
7227 typedef struct zdb_ddt_entry {
7228 	ddt_key_t	zdde_key;
7229 	uint64_t	zdde_ref_blocks;
7230 	uint64_t	zdde_ref_lsize;
7231 	uint64_t	zdde_ref_psize;
7232 	uint64_t	zdde_ref_dsize;
7233 	avl_node_t	zdde_node;
7234 } zdb_ddt_entry_t;
7235 
7236 static int
7237 zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
7238     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
7239 {
7240 	(void) zilog, (void) dnp;
7241 	avl_tree_t *t = arg;
7242 	avl_index_t where;
7243 	zdb_ddt_entry_t *zdde, zdde_search;
7244 
7245 	if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) ||
7246 	    BP_IS_EMBEDDED(bp))
7247 		return (0);
7248 
7249 	if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
7250 		(void) printf("traversing objset %llu, %llu objects, "
7251 		    "%lu blocks so far\n",
7252 		    (u_longlong_t)zb->zb_objset,
7253 		    (u_longlong_t)BP_GET_FILL(bp),
7254 		    avl_numnodes(t));
7255 	}
7256 
7257 	if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
7258 	    BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
7259 		return (0);
7260 
7261 	ddt_key_fill(&zdde_search.zdde_key, bp);
7262 
7263 	zdde = avl_find(t, &zdde_search, &where);
7264 
7265 	if (zdde == NULL) {
7266 		zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
7267 		zdde->zdde_key = zdde_search.zdde_key;
7268 		avl_insert(t, zdde, where);
7269 	}
7270 
7271 	zdde->zdde_ref_blocks += 1;
7272 	zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
7273 	zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
7274 	zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
7275 
7276 	return (0);
7277 }
7278 
7279 static void
7280 dump_simulated_ddt(spa_t *spa)
7281 {
7282 	avl_tree_t t;
7283 	void *cookie = NULL;
7284 	zdb_ddt_entry_t *zdde;
7285 	ddt_histogram_t ddh_total = {{{0}}};
7286 	ddt_stat_t dds_total = {0};
7287 
7288 	avl_create(&t, ddt_entry_compare,
7289 	    sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
7290 
7291 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
7292 
7293 	(void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
7294 	    TRAVERSE_NO_DECRYPT, zdb_ddt_add_cb, &t);
7295 
7296 	spa_config_exit(spa, SCL_CONFIG, FTAG);
7297 
7298 	while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
7299 		ddt_stat_t dds;
7300 		uint64_t refcnt = zdde->zdde_ref_blocks;
7301 		ASSERT(refcnt != 0);
7302 
7303 		dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
7304 		dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
7305 		dds.dds_psize = zdde->zdde_ref_psize / refcnt;
7306 		dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
7307 
7308 		dds.dds_ref_blocks = zdde->zdde_ref_blocks;
7309 		dds.dds_ref_lsize = zdde->zdde_ref_lsize;
7310 		dds.dds_ref_psize = zdde->zdde_ref_psize;
7311 		dds.dds_ref_dsize = zdde->zdde_ref_dsize;
7312 
7313 		ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1],
7314 		    &dds, 0);
7315 
7316 		umem_free(zdde, sizeof (*zdde));
7317 	}
7318 
7319 	avl_destroy(&t);
7320 
7321 	ddt_histogram_stat(&dds_total, &ddh_total);
7322 
7323 	(void) printf("Simulated DDT histogram:\n");
7324 
7325 	zpool_dump_ddt(&dds_total, &ddh_total);
7326 
7327 	dump_dedup_ratio(&dds_total);
7328 }
7329 
7330 static int
7331 verify_device_removal_feature_counts(spa_t *spa)
7332 {
7333 	uint64_t dr_feature_refcount = 0;
7334 	uint64_t oc_feature_refcount = 0;
7335 	uint64_t indirect_vdev_count = 0;
7336 	uint64_t precise_vdev_count = 0;
7337 	uint64_t obsolete_counts_object_count = 0;
7338 	uint64_t obsolete_sm_count = 0;
7339 	uint64_t obsolete_counts_count = 0;
7340 	uint64_t scip_count = 0;
7341 	uint64_t obsolete_bpobj_count = 0;
7342 	int ret = 0;
7343 
7344 	spa_condensing_indirect_phys_t *scip =
7345 	    &spa->spa_condensing_indirect_phys;
7346 	if (scip->scip_next_mapping_object != 0) {
7347 		vdev_t *vd = spa->spa_root_vdev->vdev_child[scip->scip_vdev];
7348 		ASSERT(scip->scip_prev_obsolete_sm_object != 0);
7349 		ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops);
7350 
7351 		(void) printf("Condensing indirect vdev %llu: new mapping "
7352 		    "object %llu, prev obsolete sm %llu\n",
7353 		    (u_longlong_t)scip->scip_vdev,
7354 		    (u_longlong_t)scip->scip_next_mapping_object,
7355 		    (u_longlong_t)scip->scip_prev_obsolete_sm_object);
7356 		if (scip->scip_prev_obsolete_sm_object != 0) {
7357 			space_map_t *prev_obsolete_sm = NULL;
7358 			VERIFY0(space_map_open(&prev_obsolete_sm,
7359 			    spa->spa_meta_objset,
7360 			    scip->scip_prev_obsolete_sm_object,
7361 			    0, vd->vdev_asize, 0));
7362 			dump_spacemap(spa->spa_meta_objset, prev_obsolete_sm);
7363 			(void) printf("\n");
7364 			space_map_close(prev_obsolete_sm);
7365 		}
7366 
7367 		scip_count += 2;
7368 	}
7369 
7370 	for (uint64_t i = 0; i < spa->spa_root_vdev->vdev_children; i++) {
7371 		vdev_t *vd = spa->spa_root_vdev->vdev_child[i];
7372 		vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
7373 
7374 		if (vic->vic_mapping_object != 0) {
7375 			ASSERT(vd->vdev_ops == &vdev_indirect_ops ||
7376 			    vd->vdev_removing);
7377 			indirect_vdev_count++;
7378 
7379 			if (vd->vdev_indirect_mapping->vim_havecounts) {
7380 				obsolete_counts_count++;
7381 			}
7382 		}
7383 
7384 		boolean_t are_precise;
7385 		VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise));
7386 		if (are_precise) {
7387 			ASSERT(vic->vic_mapping_object != 0);
7388 			precise_vdev_count++;
7389 		}
7390 
7391 		uint64_t obsolete_sm_object;
7392 		VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
7393 		if (obsolete_sm_object != 0) {
7394 			ASSERT(vic->vic_mapping_object != 0);
7395 			obsolete_sm_count++;
7396 		}
7397 	}
7398 
7399 	(void) feature_get_refcount(spa,
7400 	    &spa_feature_table[SPA_FEATURE_DEVICE_REMOVAL],
7401 	    &dr_feature_refcount);
7402 	(void) feature_get_refcount(spa,
7403 	    &spa_feature_table[SPA_FEATURE_OBSOLETE_COUNTS],
7404 	    &oc_feature_refcount);
7405 
7406 	if (dr_feature_refcount != indirect_vdev_count) {
7407 		ret = 1;
7408 		(void) printf("Number of indirect vdevs (%llu) " \
7409 		    "does not match feature count (%llu)\n",
7410 		    (u_longlong_t)indirect_vdev_count,
7411 		    (u_longlong_t)dr_feature_refcount);
7412 	} else {
7413 		(void) printf("Verified device_removal feature refcount " \
7414 		    "of %llu is correct\n",
7415 		    (u_longlong_t)dr_feature_refcount);
7416 	}
7417 
7418 	if (zap_contains(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT,
7419 	    DMU_POOL_OBSOLETE_BPOBJ) == 0) {
7420 		obsolete_bpobj_count++;
7421 	}
7422 
7423 
7424 	obsolete_counts_object_count = precise_vdev_count;
7425 	obsolete_counts_object_count += obsolete_sm_count;
7426 	obsolete_counts_object_count += obsolete_counts_count;
7427 	obsolete_counts_object_count += scip_count;
7428 	obsolete_counts_object_count += obsolete_bpobj_count;
7429 	obsolete_counts_object_count += remap_deadlist_count;
7430 
7431 	if (oc_feature_refcount != obsolete_counts_object_count) {
7432 		ret = 1;
7433 		(void) printf("Number of obsolete counts objects (%llu) " \
7434 		    "does not match feature count (%llu)\n",
7435 		    (u_longlong_t)obsolete_counts_object_count,
7436 		    (u_longlong_t)oc_feature_refcount);
7437 		(void) printf("pv:%llu os:%llu oc:%llu sc:%llu "
7438 		    "ob:%llu rd:%llu\n",
7439 		    (u_longlong_t)precise_vdev_count,
7440 		    (u_longlong_t)obsolete_sm_count,
7441 		    (u_longlong_t)obsolete_counts_count,
7442 		    (u_longlong_t)scip_count,
7443 		    (u_longlong_t)obsolete_bpobj_count,
7444 		    (u_longlong_t)remap_deadlist_count);
7445 	} else {
7446 		(void) printf("Verified indirect_refcount feature refcount " \
7447 		    "of %llu is correct\n",
7448 		    (u_longlong_t)oc_feature_refcount);
7449 	}
7450 	return (ret);
7451 }
7452 
7453 static void
7454 zdb_set_skip_mmp(char *target)
7455 {
7456 	spa_t *spa;
7457 
7458 	/*
7459 	 * Disable the activity check to allow examination of
7460 	 * active pools.
7461 	 */
7462 	mutex_enter(&spa_namespace_lock);
7463 	if ((spa = spa_lookup(target)) != NULL) {
7464 		spa->spa_import_flags |= ZFS_IMPORT_SKIP_MMP;
7465 	}
7466 	mutex_exit(&spa_namespace_lock);
7467 }
7468 
7469 #define	BOGUS_SUFFIX "_CHECKPOINTED_UNIVERSE"
7470 /*
7471  * Import the checkpointed state of the pool specified by the target
7472  * parameter as readonly. The function also accepts a pool config
7473  * as an optional parameter, else it attempts to infer the config by
7474  * the name of the target pool.
7475  *
7476  * Note that the checkpointed state's pool name will be the name of
7477  * the original pool with the above suffix appended to it. In addition,
7478  * if the target is not a pool name (e.g. a path to a dataset) then
7479  * the new_path parameter is populated with the updated path to
7480  * reflect the fact that we are looking into the checkpointed state.
7481  *
7482  * The function returns a newly-allocated copy of the name of the
7483  * pool containing the checkpointed state. When this copy is no
7484  * longer needed it should be freed with free(3C). Same thing
7485  * applies to the new_path parameter if allocated.
7486  */
7487 static char *
7488 import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path)
7489 {
7490 	int error = 0;
7491 	char *poolname, *bogus_name = NULL;
7492 	boolean_t freecfg = B_FALSE;
7493 
7494 	/* If the target is not a pool, the extract the pool name */
7495 	char *path_start = strchr(target, '/');
7496 	if (path_start != NULL) {
7497 		size_t poolname_len = path_start - target;
7498 		poolname = strndup(target, poolname_len);
7499 	} else {
7500 		poolname = target;
7501 	}
7502 
7503 	if (cfg == NULL) {
7504 		zdb_set_skip_mmp(poolname);
7505 		error = spa_get_stats(poolname, &cfg, NULL, 0);
7506 		if (error != 0) {
7507 			fatal("Tried to read config of pool \"%s\" but "
7508 			    "spa_get_stats() failed with error %d\n",
7509 			    poolname, error);
7510 		}
7511 		freecfg = B_TRUE;
7512 	}
7513 
7514 	if (asprintf(&bogus_name, "%s%s", poolname, BOGUS_SUFFIX) == -1) {
7515 		if (target != poolname)
7516 			free(poolname);
7517 		return (NULL);
7518 	}
7519 	fnvlist_add_string(cfg, ZPOOL_CONFIG_POOL_NAME, bogus_name);
7520 
7521 	error = spa_import(bogus_name, cfg, NULL,
7522 	    ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT |
7523 	    ZFS_IMPORT_SKIP_MMP);
7524 	if (freecfg)
7525 		nvlist_free(cfg);
7526 	if (error != 0) {
7527 		fatal("Tried to import pool \"%s\" but spa_import() failed "
7528 		    "with error %d\n", bogus_name, error);
7529 	}
7530 
7531 	if (new_path != NULL && path_start != NULL) {
7532 		if (asprintf(new_path, "%s%s", bogus_name, path_start) == -1) {
7533 			free(bogus_name);
7534 			if (path_start != NULL)
7535 				free(poolname);
7536 			return (NULL);
7537 		}
7538 	}
7539 
7540 	if (target != poolname)
7541 		free(poolname);
7542 
7543 	return (bogus_name);
7544 }
7545 
7546 typedef struct verify_checkpoint_sm_entry_cb_arg {
7547 	vdev_t *vcsec_vd;
7548 
7549 	/* the following fields are only used for printing progress */
7550 	uint64_t vcsec_entryid;
7551 	uint64_t vcsec_num_entries;
7552 } verify_checkpoint_sm_entry_cb_arg_t;
7553 
7554 #define	ENTRIES_PER_PROGRESS_UPDATE 10000
7555 
7556 static int
7557 verify_checkpoint_sm_entry_cb(space_map_entry_t *sme, void *arg)
7558 {
7559 	verify_checkpoint_sm_entry_cb_arg_t *vcsec = arg;
7560 	vdev_t *vd = vcsec->vcsec_vd;
7561 	metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift];
7562 	uint64_t end = sme->sme_offset + sme->sme_run;
7563 
7564 	ASSERT(sme->sme_type == SM_FREE);
7565 
7566 	if ((vcsec->vcsec_entryid % ENTRIES_PER_PROGRESS_UPDATE) == 0) {
7567 		(void) fprintf(stderr,
7568 		    "\rverifying vdev %llu, space map entry %llu of %llu ...",
7569 		    (longlong_t)vd->vdev_id,
7570 		    (longlong_t)vcsec->vcsec_entryid,
7571 		    (longlong_t)vcsec->vcsec_num_entries);
7572 	}
7573 	vcsec->vcsec_entryid++;
7574 
7575 	/*
7576 	 * See comment in checkpoint_sm_exclude_entry_cb()
7577 	 */
7578 	VERIFY3U(sme->sme_offset, >=, ms->ms_start);
7579 	VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
7580 
7581 	/*
7582 	 * The entries in the vdev_checkpoint_sm should be marked as
7583 	 * allocated in the checkpointed state of the pool, therefore
7584 	 * their respective ms_allocateable trees should not contain them.
7585 	 */
7586 	mutex_enter(&ms->ms_lock);
7587 	range_tree_verify_not_present(ms->ms_allocatable,
7588 	    sme->sme_offset, sme->sme_run);
7589 	mutex_exit(&ms->ms_lock);
7590 
7591 	return (0);
7592 }
7593 
7594 /*
7595  * Verify that all segments in the vdev_checkpoint_sm are allocated
7596  * according to the checkpoint's ms_sm (i.e. are not in the checkpoint's
7597  * ms_allocatable).
7598  *
7599  * Do so by comparing the checkpoint space maps (vdev_checkpoint_sm) of
7600  * each vdev in the current state of the pool to the metaslab space maps
7601  * (ms_sm) of the checkpointed state of the pool.
7602  *
7603  * Note that the function changes the state of the ms_allocatable
7604  * trees of the current spa_t. The entries of these ms_allocatable
7605  * trees are cleared out and then repopulated from with the free
7606  * entries of their respective ms_sm space maps.
7607  */
7608 static void
7609 verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current)
7610 {
7611 	vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev;
7612 	vdev_t *current_rvd = current->spa_root_vdev;
7613 
7614 	load_concrete_ms_allocatable_trees(checkpoint, SM_FREE);
7615 
7616 	for (uint64_t c = 0; c < ckpoint_rvd->vdev_children; c++) {
7617 		vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[c];
7618 		vdev_t *current_vd = current_rvd->vdev_child[c];
7619 
7620 		space_map_t *checkpoint_sm = NULL;
7621 		uint64_t checkpoint_sm_obj;
7622 
7623 		if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) {
7624 			/*
7625 			 * Since we don't allow device removal in a pool
7626 			 * that has a checkpoint, we expect that all removed
7627 			 * vdevs were removed from the pool before the
7628 			 * checkpoint.
7629 			 */
7630 			ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops);
7631 			continue;
7632 		}
7633 
7634 		/*
7635 		 * If the checkpoint space map doesn't exist, then nothing
7636 		 * here is checkpointed so there's nothing to verify.
7637 		 */
7638 		if (current_vd->vdev_top_zap == 0 ||
7639 		    zap_contains(spa_meta_objset(current),
7640 		    current_vd->vdev_top_zap,
7641 		    VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
7642 			continue;
7643 
7644 		VERIFY0(zap_lookup(spa_meta_objset(current),
7645 		    current_vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM,
7646 		    sizeof (uint64_t), 1, &checkpoint_sm_obj));
7647 
7648 		VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(current),
7649 		    checkpoint_sm_obj, 0, current_vd->vdev_asize,
7650 		    current_vd->vdev_ashift));
7651 
7652 		verify_checkpoint_sm_entry_cb_arg_t vcsec;
7653 		vcsec.vcsec_vd = ckpoint_vd;
7654 		vcsec.vcsec_entryid = 0;
7655 		vcsec.vcsec_num_entries =
7656 		    space_map_length(checkpoint_sm) / sizeof (uint64_t);
7657 		VERIFY0(space_map_iterate(checkpoint_sm,
7658 		    space_map_length(checkpoint_sm),
7659 		    verify_checkpoint_sm_entry_cb, &vcsec));
7660 		if (dump_opt['m'] > 3)
7661 			dump_spacemap(current->spa_meta_objset, checkpoint_sm);
7662 		space_map_close(checkpoint_sm);
7663 	}
7664 
7665 	/*
7666 	 * If we've added vdevs since we took the checkpoint, ensure
7667 	 * that their checkpoint space maps are empty.
7668 	 */
7669 	if (ckpoint_rvd->vdev_children < current_rvd->vdev_children) {
7670 		for (uint64_t c = ckpoint_rvd->vdev_children;
7671 		    c < current_rvd->vdev_children; c++) {
7672 			vdev_t *current_vd = current_rvd->vdev_child[c];
7673 			VERIFY3P(current_vd->vdev_checkpoint_sm, ==, NULL);
7674 		}
7675 	}
7676 
7677 	/* for cleaner progress output */
7678 	(void) fprintf(stderr, "\n");
7679 }
7680 
7681 /*
7682  * Verifies that all space that's allocated in the checkpoint is
7683  * still allocated in the current version, by checking that everything
7684  * in checkpoint's ms_allocatable (which is actually allocated, not
7685  * allocatable/free) is not present in current's ms_allocatable.
7686  *
7687  * Note that the function changes the state of the ms_allocatable
7688  * trees of both spas when called. The entries of all ms_allocatable
7689  * trees are cleared out and then repopulated from their respective
7690  * ms_sm space maps. In the checkpointed state we load the allocated
7691  * entries, and in the current state we load the free entries.
7692  */
7693 static void
7694 verify_checkpoint_ms_spacemaps(spa_t *checkpoint, spa_t *current)
7695 {
7696 	vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev;
7697 	vdev_t *current_rvd = current->spa_root_vdev;
7698 
7699 	load_concrete_ms_allocatable_trees(checkpoint, SM_ALLOC);
7700 	load_concrete_ms_allocatable_trees(current, SM_FREE);
7701 
7702 	for (uint64_t i = 0; i < ckpoint_rvd->vdev_children; i++) {
7703 		vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[i];
7704 		vdev_t *current_vd = current_rvd->vdev_child[i];
7705 
7706 		if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) {
7707 			/*
7708 			 * See comment in verify_checkpoint_vdev_spacemaps()
7709 			 */
7710 			ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops);
7711 			continue;
7712 		}
7713 
7714 		for (uint64_t m = 0; m < ckpoint_vd->vdev_ms_count; m++) {
7715 			metaslab_t *ckpoint_msp = ckpoint_vd->vdev_ms[m];
7716 			metaslab_t *current_msp = current_vd->vdev_ms[m];
7717 
7718 			(void) fprintf(stderr,
7719 			    "\rverifying vdev %llu of %llu, "
7720 			    "metaslab %llu of %llu ...",
7721 			    (longlong_t)current_vd->vdev_id,
7722 			    (longlong_t)current_rvd->vdev_children,
7723 			    (longlong_t)current_vd->vdev_ms[m]->ms_id,
7724 			    (longlong_t)current_vd->vdev_ms_count);
7725 
7726 			/*
7727 			 * We walk through the ms_allocatable trees that
7728 			 * are loaded with the allocated blocks from the
7729 			 * ms_sm spacemaps of the checkpoint. For each
7730 			 * one of these ranges we ensure that none of them
7731 			 * exists in the ms_allocatable trees of the
7732 			 * current state which are loaded with the ranges
7733 			 * that are currently free.
7734 			 *
7735 			 * This way we ensure that none of the blocks that
7736 			 * are part of the checkpoint were freed by mistake.
7737 			 */
7738 			range_tree_walk(ckpoint_msp->ms_allocatable,
7739 			    (range_tree_func_t *)range_tree_verify_not_present,
7740 			    current_msp->ms_allocatable);
7741 		}
7742 	}
7743 
7744 	/* for cleaner progress output */
7745 	(void) fprintf(stderr, "\n");
7746 }
7747 
7748 static void
7749 verify_checkpoint_blocks(spa_t *spa)
7750 {
7751 	ASSERT(!dump_opt['L']);
7752 
7753 	spa_t *checkpoint_spa;
7754 	char *checkpoint_pool;
7755 	int error = 0;
7756 
7757 	/*
7758 	 * We import the checkpointed state of the pool (under a different
7759 	 * name) so we can do verification on it against the current state
7760 	 * of the pool.
7761 	 */
7762 	checkpoint_pool = import_checkpointed_state(spa->spa_name, NULL,
7763 	    NULL);
7764 	ASSERT(strcmp(spa->spa_name, checkpoint_pool) != 0);
7765 
7766 	error = spa_open(checkpoint_pool, &checkpoint_spa, FTAG);
7767 	if (error != 0) {
7768 		fatal("Tried to open pool \"%s\" but spa_open() failed with "
7769 		    "error %d\n", checkpoint_pool, error);
7770 	}
7771 
7772 	/*
7773 	 * Ensure that ranges in the checkpoint space maps of each vdev
7774 	 * are allocated according to the checkpointed state's metaslab
7775 	 * space maps.
7776 	 */
7777 	verify_checkpoint_vdev_spacemaps(checkpoint_spa, spa);
7778 
7779 	/*
7780 	 * Ensure that allocated ranges in the checkpoint's metaslab
7781 	 * space maps remain allocated in the metaslab space maps of
7782 	 * the current state.
7783 	 */
7784 	verify_checkpoint_ms_spacemaps(checkpoint_spa, spa);
7785 
7786 	/*
7787 	 * Once we are done, we get rid of the checkpointed state.
7788 	 */
7789 	spa_close(checkpoint_spa, FTAG);
7790 	free(checkpoint_pool);
7791 }
7792 
7793 static void
7794 dump_leftover_checkpoint_blocks(spa_t *spa)
7795 {
7796 	vdev_t *rvd = spa->spa_root_vdev;
7797 
7798 	for (uint64_t i = 0; i < rvd->vdev_children; i++) {
7799 		vdev_t *vd = rvd->vdev_child[i];
7800 
7801 		space_map_t *checkpoint_sm = NULL;
7802 		uint64_t checkpoint_sm_obj;
7803 
7804 		if (vd->vdev_top_zap == 0)
7805 			continue;
7806 
7807 		if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap,
7808 		    VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0)
7809 			continue;
7810 
7811 		VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap,
7812 		    VDEV_TOP_ZAP_POOL_CHECKPOINT_SM,
7813 		    sizeof (uint64_t), 1, &checkpoint_sm_obj));
7814 
7815 		VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa),
7816 		    checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift));
7817 		dump_spacemap(spa->spa_meta_objset, checkpoint_sm);
7818 		space_map_close(checkpoint_sm);
7819 	}
7820 }
7821 
7822 static int
7823 verify_checkpoint(spa_t *spa)
7824 {
7825 	uberblock_t checkpoint;
7826 	int error;
7827 
7828 	if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT))
7829 		return (0);
7830 
7831 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
7832 	    DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t),
7833 	    sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint);
7834 
7835 	if (error == ENOENT && !dump_opt['L']) {
7836 		/*
7837 		 * If the feature is active but the uberblock is missing
7838 		 * then we must be in the middle of discarding the
7839 		 * checkpoint.
7840 		 */
7841 		(void) printf("\nPartially discarded checkpoint "
7842 		    "state found:\n");
7843 		if (dump_opt['m'] > 3)
7844 			dump_leftover_checkpoint_blocks(spa);
7845 		return (0);
7846 	} else if (error != 0) {
7847 		(void) printf("lookup error %d when looking for "
7848 		    "checkpointed uberblock in MOS\n", error);
7849 		return (error);
7850 	}
7851 	dump_uberblock(&checkpoint, "\nCheckpointed uberblock found:\n", "\n");
7852 
7853 	if (checkpoint.ub_checkpoint_txg == 0) {
7854 		(void) printf("\nub_checkpoint_txg not set in checkpointed "
7855 		    "uberblock\n");
7856 		error = 3;
7857 	}
7858 
7859 	if (error == 0 && !dump_opt['L'])
7860 		verify_checkpoint_blocks(spa);
7861 
7862 	return (error);
7863 }
7864 
7865 static void
7866 mos_leaks_cb(void *arg, uint64_t start, uint64_t size)
7867 {
7868 	(void) arg;
7869 	for (uint64_t i = start; i < size; i++) {
7870 		(void) printf("MOS object %llu referenced but not allocated\n",
7871 		    (u_longlong_t)i);
7872 	}
7873 }
7874 
7875 static void
7876 mos_obj_refd(uint64_t obj)
7877 {
7878 	if (obj != 0 && mos_refd_objs != NULL)
7879 		range_tree_add(mos_refd_objs, obj, 1);
7880 }
7881 
7882 /*
7883  * Call on a MOS object that may already have been referenced.
7884  */
7885 static void
7886 mos_obj_refd_multiple(uint64_t obj)
7887 {
7888 	if (obj != 0 && mos_refd_objs != NULL &&
7889 	    !range_tree_contains(mos_refd_objs, obj, 1))
7890 		range_tree_add(mos_refd_objs, obj, 1);
7891 }
7892 
7893 static void
7894 mos_leak_vdev_top_zap(vdev_t *vd)
7895 {
7896 	uint64_t ms_flush_data_obj;
7897 	int error = zap_lookup(spa_meta_objset(vd->vdev_spa),
7898 	    vd->vdev_top_zap, VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS,
7899 	    sizeof (ms_flush_data_obj), 1, &ms_flush_data_obj);
7900 	if (error == ENOENT)
7901 		return;
7902 	ASSERT0(error);
7903 
7904 	mos_obj_refd(ms_flush_data_obj);
7905 }
7906 
7907 static void
7908 mos_leak_vdev(vdev_t *vd)
7909 {
7910 	mos_obj_refd(vd->vdev_dtl_object);
7911 	mos_obj_refd(vd->vdev_ms_array);
7912 	mos_obj_refd(vd->vdev_indirect_config.vic_births_object);
7913 	mos_obj_refd(vd->vdev_indirect_config.vic_mapping_object);
7914 	mos_obj_refd(vd->vdev_leaf_zap);
7915 	if (vd->vdev_checkpoint_sm != NULL)
7916 		mos_obj_refd(vd->vdev_checkpoint_sm->sm_object);
7917 	if (vd->vdev_indirect_mapping != NULL) {
7918 		mos_obj_refd(vd->vdev_indirect_mapping->
7919 		    vim_phys->vimp_counts_object);
7920 	}
7921 	if (vd->vdev_obsolete_sm != NULL)
7922 		mos_obj_refd(vd->vdev_obsolete_sm->sm_object);
7923 
7924 	for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
7925 		metaslab_t *ms = vd->vdev_ms[m];
7926 		mos_obj_refd(space_map_object(ms->ms_sm));
7927 	}
7928 
7929 	if (vd->vdev_root_zap != 0)
7930 		mos_obj_refd(vd->vdev_root_zap);
7931 
7932 	if (vd->vdev_top_zap != 0) {
7933 		mos_obj_refd(vd->vdev_top_zap);
7934 		mos_leak_vdev_top_zap(vd);
7935 	}
7936 
7937 	for (uint64_t c = 0; c < vd->vdev_children; c++) {
7938 		mos_leak_vdev(vd->vdev_child[c]);
7939 	}
7940 }
7941 
7942 static void
7943 mos_leak_log_spacemaps(spa_t *spa)
7944 {
7945 	uint64_t spacemap_zap;
7946 	int error = zap_lookup(spa_meta_objset(spa),
7947 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_LOG_SPACEMAP_ZAP,
7948 	    sizeof (spacemap_zap), 1, &spacemap_zap);
7949 	if (error == ENOENT)
7950 		return;
7951 	ASSERT0(error);
7952 
7953 	mos_obj_refd(spacemap_zap);
7954 	for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg);
7955 	    sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls))
7956 		mos_obj_refd(sls->sls_sm_obj);
7957 }
7958 
7959 static void
7960 errorlog_count_refd(objset_t *mos, uint64_t errlog)
7961 {
7962 	zap_cursor_t zc;
7963 	zap_attribute_t za;
7964 	for (zap_cursor_init(&zc, mos, errlog);
7965 	    zap_cursor_retrieve(&zc, &za) == 0;
7966 	    zap_cursor_advance(&zc)) {
7967 		mos_obj_refd(za.za_first_integer);
7968 	}
7969 	zap_cursor_fini(&zc);
7970 }
7971 
7972 static int
7973 dump_mos_leaks(spa_t *spa)
7974 {
7975 	int rv = 0;
7976 	objset_t *mos = spa->spa_meta_objset;
7977 	dsl_pool_t *dp = spa->spa_dsl_pool;
7978 
7979 	/* Visit and mark all referenced objects in the MOS */
7980 
7981 	mos_obj_refd(DMU_POOL_DIRECTORY_OBJECT);
7982 	mos_obj_refd(spa->spa_pool_props_object);
7983 	mos_obj_refd(spa->spa_config_object);
7984 	mos_obj_refd(spa->spa_ddt_stat_object);
7985 	mos_obj_refd(spa->spa_feat_desc_obj);
7986 	mos_obj_refd(spa->spa_feat_enabled_txg_obj);
7987 	mos_obj_refd(spa->spa_feat_for_read_obj);
7988 	mos_obj_refd(spa->spa_feat_for_write_obj);
7989 	mos_obj_refd(spa->spa_history);
7990 	mos_obj_refd(spa->spa_errlog_last);
7991 	mos_obj_refd(spa->spa_errlog_scrub);
7992 
7993 	if (spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) {
7994 		errorlog_count_refd(mos, spa->spa_errlog_last);
7995 		errorlog_count_refd(mos, spa->spa_errlog_scrub);
7996 	}
7997 
7998 	mos_obj_refd(spa->spa_all_vdev_zaps);
7999 	mos_obj_refd(spa->spa_dsl_pool->dp_bptree_obj);
8000 	mos_obj_refd(spa->spa_dsl_pool->dp_tmp_userrefs_obj);
8001 	mos_obj_refd(spa->spa_dsl_pool->dp_scan->scn_phys.scn_queue_obj);
8002 	bpobj_count_refd(&spa->spa_deferred_bpobj);
8003 	mos_obj_refd(dp->dp_empty_bpobj);
8004 	bpobj_count_refd(&dp->dp_obsolete_bpobj);
8005 	bpobj_count_refd(&dp->dp_free_bpobj);
8006 	mos_obj_refd(spa->spa_l2cache.sav_object);
8007 	mos_obj_refd(spa->spa_spares.sav_object);
8008 
8009 	if (spa->spa_syncing_log_sm != NULL)
8010 		mos_obj_refd(spa->spa_syncing_log_sm->sm_object);
8011 	mos_leak_log_spacemaps(spa);
8012 
8013 	mos_obj_refd(spa->spa_condensing_indirect_phys.
8014 	    scip_next_mapping_object);
8015 	mos_obj_refd(spa->spa_condensing_indirect_phys.
8016 	    scip_prev_obsolete_sm_object);
8017 	if (spa->spa_condensing_indirect_phys.scip_next_mapping_object != 0) {
8018 		vdev_indirect_mapping_t *vim =
8019 		    vdev_indirect_mapping_open(mos,
8020 		    spa->spa_condensing_indirect_phys.scip_next_mapping_object);
8021 		mos_obj_refd(vim->vim_phys->vimp_counts_object);
8022 		vdev_indirect_mapping_close(vim);
8023 	}
8024 	deleted_livelists_dump_mos(spa);
8025 
8026 	if (dp->dp_origin_snap != NULL) {
8027 		dsl_dataset_t *ds;
8028 
8029 		dsl_pool_config_enter(dp, FTAG);
8030 		VERIFY0(dsl_dataset_hold_obj(dp,
8031 		    dsl_dataset_phys(dp->dp_origin_snap)->ds_next_snap_obj,
8032 		    FTAG, &ds));
8033 		count_ds_mos_objects(ds);
8034 		dump_blkptr_list(&ds->ds_deadlist, "Deadlist");
8035 		dsl_dataset_rele(ds, FTAG);
8036 		dsl_pool_config_exit(dp, FTAG);
8037 
8038 		count_ds_mos_objects(dp->dp_origin_snap);
8039 		dump_blkptr_list(&dp->dp_origin_snap->ds_deadlist, "Deadlist");
8040 	}
8041 	count_dir_mos_objects(dp->dp_mos_dir);
8042 	if (dp->dp_free_dir != NULL)
8043 		count_dir_mos_objects(dp->dp_free_dir);
8044 	if (dp->dp_leak_dir != NULL)
8045 		count_dir_mos_objects(dp->dp_leak_dir);
8046 
8047 	mos_leak_vdev(spa->spa_root_vdev);
8048 
8049 	for (uint64_t class = 0; class < DDT_CLASSES; class++) {
8050 		for (uint64_t type = 0; type < DDT_TYPES; type++) {
8051 			for (uint64_t cksum = 0;
8052 			    cksum < ZIO_CHECKSUM_FUNCTIONS; cksum++) {
8053 				ddt_t *ddt = spa->spa_ddt[cksum];
8054 				mos_obj_refd(ddt->ddt_object[type][class]);
8055 			}
8056 		}
8057 	}
8058 
8059 	/*
8060 	 * Visit all allocated objects and make sure they are referenced.
8061 	 */
8062 	uint64_t object = 0;
8063 	while (dmu_object_next(mos, &object, B_FALSE, 0) == 0) {
8064 		if (range_tree_contains(mos_refd_objs, object, 1)) {
8065 			range_tree_remove(mos_refd_objs, object, 1);
8066 		} else {
8067 			dmu_object_info_t doi;
8068 			const char *name;
8069 			VERIFY0(dmu_object_info(mos, object, &doi));
8070 			if (doi.doi_type & DMU_OT_NEWTYPE) {
8071 				dmu_object_byteswap_t bswap =
8072 				    DMU_OT_BYTESWAP(doi.doi_type);
8073 				name = dmu_ot_byteswap[bswap].ob_name;
8074 			} else {
8075 				name = dmu_ot[doi.doi_type].ot_name;
8076 			}
8077 
8078 			(void) printf("MOS object %llu (%s) leaked\n",
8079 			    (u_longlong_t)object, name);
8080 			rv = 2;
8081 		}
8082 	}
8083 	(void) range_tree_walk(mos_refd_objs, mos_leaks_cb, NULL);
8084 	if (!range_tree_is_empty(mos_refd_objs))
8085 		rv = 2;
8086 	range_tree_vacate(mos_refd_objs, NULL, NULL);
8087 	range_tree_destroy(mos_refd_objs);
8088 	return (rv);
8089 }
8090 
8091 typedef struct log_sm_obsolete_stats_arg {
8092 	uint64_t lsos_current_txg;
8093 
8094 	uint64_t lsos_total_entries;
8095 	uint64_t lsos_valid_entries;
8096 
8097 	uint64_t lsos_sm_entries;
8098 	uint64_t lsos_valid_sm_entries;
8099 } log_sm_obsolete_stats_arg_t;
8100 
8101 static int
8102 log_spacemap_obsolete_stats_cb(spa_t *spa, space_map_entry_t *sme,
8103     uint64_t txg, void *arg)
8104 {
8105 	log_sm_obsolete_stats_arg_t *lsos = arg;
8106 
8107 	uint64_t offset = sme->sme_offset;
8108 	uint64_t vdev_id = sme->sme_vdev;
8109 
8110 	if (lsos->lsos_current_txg == 0) {
8111 		/* this is the first log */
8112 		lsos->lsos_current_txg = txg;
8113 	} else if (lsos->lsos_current_txg < txg) {
8114 		/* we just changed log - print stats and reset */
8115 		(void) printf("%-8llu valid entries out of %-8llu - txg %llu\n",
8116 		    (u_longlong_t)lsos->lsos_valid_sm_entries,
8117 		    (u_longlong_t)lsos->lsos_sm_entries,
8118 		    (u_longlong_t)lsos->lsos_current_txg);
8119 		lsos->lsos_valid_sm_entries = 0;
8120 		lsos->lsos_sm_entries = 0;
8121 		lsos->lsos_current_txg = txg;
8122 	}
8123 	ASSERT3U(lsos->lsos_current_txg, ==, txg);
8124 
8125 	lsos->lsos_sm_entries++;
8126 	lsos->lsos_total_entries++;
8127 
8128 	vdev_t *vd = vdev_lookup_top(spa, vdev_id);
8129 	if (!vdev_is_concrete(vd))
8130 		return (0);
8131 
8132 	metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
8133 	ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE);
8134 
8135 	if (txg < metaslab_unflushed_txg(ms))
8136 		return (0);
8137 	lsos->lsos_valid_sm_entries++;
8138 	lsos->lsos_valid_entries++;
8139 	return (0);
8140 }
8141 
8142 static void
8143 dump_log_spacemap_obsolete_stats(spa_t *spa)
8144 {
8145 	if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP))
8146 		return;
8147 
8148 	log_sm_obsolete_stats_arg_t lsos = {0};
8149 
8150 	(void) printf("Log Space Map Obsolete Entry Statistics:\n");
8151 
8152 	iterate_through_spacemap_logs(spa,
8153 	    log_spacemap_obsolete_stats_cb, &lsos);
8154 
8155 	/* print stats for latest log */
8156 	(void) printf("%-8llu valid entries out of %-8llu - txg %llu\n",
8157 	    (u_longlong_t)lsos.lsos_valid_sm_entries,
8158 	    (u_longlong_t)lsos.lsos_sm_entries,
8159 	    (u_longlong_t)lsos.lsos_current_txg);
8160 
8161 	(void) printf("%-8llu valid entries out of %-8llu - total\n\n",
8162 	    (u_longlong_t)lsos.lsos_valid_entries,
8163 	    (u_longlong_t)lsos.lsos_total_entries);
8164 }
8165 
8166 static void
8167 dump_zpool(spa_t *spa)
8168 {
8169 	dsl_pool_t *dp = spa_get_dsl(spa);
8170 	int rc = 0;
8171 
8172 	if (dump_opt['y']) {
8173 		livelist_metaslab_validate(spa);
8174 	}
8175 
8176 	if (dump_opt['S']) {
8177 		dump_simulated_ddt(spa);
8178 		return;
8179 	}
8180 
8181 	if (!dump_opt['e'] && dump_opt['C'] > 1) {
8182 		(void) printf("\nCached configuration:\n");
8183 		dump_nvlist(spa->spa_config, 8);
8184 	}
8185 
8186 	if (dump_opt['C'])
8187 		dump_config(spa);
8188 
8189 	if (dump_opt['u'])
8190 		dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
8191 
8192 	if (dump_opt['D'])
8193 		dump_all_ddts(spa);
8194 
8195 	if (dump_opt['T'])
8196 		dump_brt(spa);
8197 
8198 	if (dump_opt['d'] > 2 || dump_opt['m'])
8199 		dump_metaslabs(spa);
8200 	if (dump_opt['M'])
8201 		dump_metaslab_groups(spa, dump_opt['M'] > 1);
8202 	if (dump_opt['d'] > 2 || dump_opt['m']) {
8203 		dump_log_spacemaps(spa);
8204 		dump_log_spacemap_obsolete_stats(spa);
8205 	}
8206 
8207 	if (dump_opt['d'] || dump_opt['i']) {
8208 		spa_feature_t f;
8209 		mos_refd_objs = range_tree_create(NULL, RANGE_SEG64, NULL, 0,
8210 		    0);
8211 		dump_objset(dp->dp_meta_objset);
8212 
8213 		if (dump_opt['d'] >= 3) {
8214 			dsl_pool_t *dp = spa->spa_dsl_pool;
8215 			dump_full_bpobj(&spa->spa_deferred_bpobj,
8216 			    "Deferred frees", 0);
8217 			if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
8218 				dump_full_bpobj(&dp->dp_free_bpobj,
8219 				    "Pool snapshot frees", 0);
8220 			}
8221 			if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
8222 				ASSERT(spa_feature_is_enabled(spa,
8223 				    SPA_FEATURE_DEVICE_REMOVAL));
8224 				dump_full_bpobj(&dp->dp_obsolete_bpobj,
8225 				    "Pool obsolete blocks", 0);
8226 			}
8227 
8228 			if (spa_feature_is_active(spa,
8229 			    SPA_FEATURE_ASYNC_DESTROY)) {
8230 				dump_bptree(spa->spa_meta_objset,
8231 				    dp->dp_bptree_obj,
8232 				    "Pool dataset frees");
8233 			}
8234 			dump_dtl(spa->spa_root_vdev, 0);
8235 		}
8236 
8237 		for (spa_feature_t f = 0; f < SPA_FEATURES; f++)
8238 			global_feature_count[f] = UINT64_MAX;
8239 		global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS] = 0;
8240 		global_feature_count[SPA_FEATURE_REDACTION_LIST_SPILL] = 0;
8241 		global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN] = 0;
8242 		global_feature_count[SPA_FEATURE_LIVELIST] = 0;
8243 
8244 		(void) dmu_objset_find(spa_name(spa), dump_one_objset,
8245 		    NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
8246 
8247 		if (rc == 0 && !dump_opt['L'])
8248 			rc = dump_mos_leaks(spa);
8249 
8250 		for (f = 0; f < SPA_FEATURES; f++) {
8251 			uint64_t refcount;
8252 
8253 			uint64_t *arr;
8254 			if (!(spa_feature_table[f].fi_flags &
8255 			    ZFEATURE_FLAG_PER_DATASET)) {
8256 				if (global_feature_count[f] == UINT64_MAX)
8257 					continue;
8258 				if (!spa_feature_is_enabled(spa, f)) {
8259 					ASSERT0(global_feature_count[f]);
8260 					continue;
8261 				}
8262 				arr = global_feature_count;
8263 			} else {
8264 				if (!spa_feature_is_enabled(spa, f)) {
8265 					ASSERT0(dataset_feature_count[f]);
8266 					continue;
8267 				}
8268 				arr = dataset_feature_count;
8269 			}
8270 			if (feature_get_refcount(spa, &spa_feature_table[f],
8271 			    &refcount) == ENOTSUP)
8272 				continue;
8273 			if (arr[f] != refcount) {
8274 				(void) printf("%s feature refcount mismatch: "
8275 				    "%lld consumers != %lld refcount\n",
8276 				    spa_feature_table[f].fi_uname,
8277 				    (longlong_t)arr[f], (longlong_t)refcount);
8278 				rc = 2;
8279 			} else {
8280 				(void) printf("Verified %s feature refcount "
8281 				    "of %llu is correct\n",
8282 				    spa_feature_table[f].fi_uname,
8283 				    (longlong_t)refcount);
8284 			}
8285 		}
8286 
8287 		if (rc == 0)
8288 			rc = verify_device_removal_feature_counts(spa);
8289 	}
8290 
8291 	if (rc == 0 && (dump_opt['b'] || dump_opt['c']))
8292 		rc = dump_block_stats(spa);
8293 
8294 	if (rc == 0)
8295 		rc = verify_spacemap_refcounts(spa);
8296 
8297 	if (dump_opt['s'])
8298 		show_pool_stats(spa);
8299 
8300 	if (dump_opt['h'])
8301 		dump_history(spa);
8302 
8303 	if (rc == 0)
8304 		rc = verify_checkpoint(spa);
8305 
8306 	if (rc != 0) {
8307 		dump_debug_buffer();
8308 		exit(rc);
8309 	}
8310 }
8311 
8312 #define	ZDB_FLAG_CHECKSUM	0x0001
8313 #define	ZDB_FLAG_DECOMPRESS	0x0002
8314 #define	ZDB_FLAG_BSWAP		0x0004
8315 #define	ZDB_FLAG_GBH		0x0008
8316 #define	ZDB_FLAG_INDIRECT	0x0010
8317 #define	ZDB_FLAG_RAW		0x0020
8318 #define	ZDB_FLAG_PRINT_BLKPTR	0x0040
8319 #define	ZDB_FLAG_VERBOSE	0x0080
8320 
8321 static int flagbits[256];
8322 static char flagbitstr[16];
8323 
8324 static void
8325 zdb_print_blkptr(const blkptr_t *bp, int flags)
8326 {
8327 	char blkbuf[BP_SPRINTF_LEN];
8328 
8329 	if (flags & ZDB_FLAG_BSWAP)
8330 		byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
8331 
8332 	snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
8333 	(void) printf("%s\n", blkbuf);
8334 }
8335 
8336 static void
8337 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
8338 {
8339 	int i;
8340 
8341 	for (i = 0; i < nbps; i++)
8342 		zdb_print_blkptr(&bp[i], flags);
8343 }
8344 
8345 static void
8346 zdb_dump_gbh(void *buf, int flags)
8347 {
8348 	zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
8349 }
8350 
8351 static void
8352 zdb_dump_block_raw(void *buf, uint64_t size, int flags)
8353 {
8354 	if (flags & ZDB_FLAG_BSWAP)
8355 		byteswap_uint64_array(buf, size);
8356 	VERIFY(write(fileno(stdout), buf, size) == size);
8357 }
8358 
8359 static void
8360 zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
8361 {
8362 	uint64_t *d = (uint64_t *)buf;
8363 	unsigned nwords = size / sizeof (uint64_t);
8364 	int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
8365 	unsigned i, j;
8366 	const char *hdr;
8367 	char *c;
8368 
8369 
8370 	if (do_bswap)
8371 		hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
8372 	else
8373 		hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
8374 
8375 	(void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
8376 
8377 #ifdef _LITTLE_ENDIAN
8378 	/* correct the endianness */
8379 	do_bswap = !do_bswap;
8380 #endif
8381 	for (i = 0; i < nwords; i += 2) {
8382 		(void) printf("%06llx:  %016llx  %016llx  ",
8383 		    (u_longlong_t)(i * sizeof (uint64_t)),
8384 		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
8385 		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
8386 
8387 		c = (char *)&d[i];
8388 		for (j = 0; j < 2 * sizeof (uint64_t); j++)
8389 			(void) printf("%c", isprint(c[j]) ? c[j] : '.');
8390 		(void) printf("\n");
8391 	}
8392 }
8393 
8394 /*
8395  * There are two acceptable formats:
8396  *	leaf_name	  - For example: c1t0d0 or /tmp/ztest.0a
8397  *	child[.child]*    - For example: 0.1.1
8398  *
8399  * The second form can be used to specify arbitrary vdevs anywhere
8400  * in the hierarchy.  For example, in a pool with a mirror of
8401  * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
8402  */
8403 static vdev_t *
8404 zdb_vdev_lookup(vdev_t *vdev, const char *path)
8405 {
8406 	char *s, *p, *q;
8407 	unsigned i;
8408 
8409 	if (vdev == NULL)
8410 		return (NULL);
8411 
8412 	/* First, assume the x.x.x.x format */
8413 	i = strtoul(path, &s, 10);
8414 	if (s == path || (s && *s != '.' && *s != '\0'))
8415 		goto name;
8416 	if (i >= vdev->vdev_children)
8417 		return (NULL);
8418 
8419 	vdev = vdev->vdev_child[i];
8420 	if (s && *s == '\0')
8421 		return (vdev);
8422 	return (zdb_vdev_lookup(vdev, s+1));
8423 
8424 name:
8425 	for (i = 0; i < vdev->vdev_children; i++) {
8426 		vdev_t *vc = vdev->vdev_child[i];
8427 
8428 		if (vc->vdev_path == NULL) {
8429 			vc = zdb_vdev_lookup(vc, path);
8430 			if (vc == NULL)
8431 				continue;
8432 			else
8433 				return (vc);
8434 		}
8435 
8436 		p = strrchr(vc->vdev_path, '/');
8437 		p = p ? p + 1 : vc->vdev_path;
8438 		q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
8439 
8440 		if (strcmp(vc->vdev_path, path) == 0)
8441 			return (vc);
8442 		if (strcmp(p, path) == 0)
8443 			return (vc);
8444 		if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
8445 			return (vc);
8446 	}
8447 
8448 	return (NULL);
8449 }
8450 
8451 static int
8452 name_from_objset_id(spa_t *spa, uint64_t objset_id, char *outstr)
8453 {
8454 	dsl_dataset_t *ds;
8455 
8456 	dsl_pool_config_enter(spa->spa_dsl_pool, FTAG);
8457 	int error = dsl_dataset_hold_obj(spa->spa_dsl_pool, objset_id,
8458 	    NULL, &ds);
8459 	if (error != 0) {
8460 		(void) fprintf(stderr, "failed to hold objset %llu: %s\n",
8461 		    (u_longlong_t)objset_id, strerror(error));
8462 		dsl_pool_config_exit(spa->spa_dsl_pool, FTAG);
8463 		return (error);
8464 	}
8465 	dsl_dataset_name(ds, outstr);
8466 	dsl_dataset_rele(ds, NULL);
8467 	dsl_pool_config_exit(spa->spa_dsl_pool, FTAG);
8468 	return (0);
8469 }
8470 
8471 static boolean_t
8472 zdb_parse_block_sizes(char *sizes, uint64_t *lsize, uint64_t *psize)
8473 {
8474 	char *s0, *s1, *tmp = NULL;
8475 
8476 	if (sizes == NULL)
8477 		return (B_FALSE);
8478 
8479 	s0 = strtok_r(sizes, "/", &tmp);
8480 	if (s0 == NULL)
8481 		return (B_FALSE);
8482 	s1 = strtok_r(NULL, "/", &tmp);
8483 	*lsize = strtoull(s0, NULL, 16);
8484 	*psize = s1 ? strtoull(s1, NULL, 16) : *lsize;
8485 	return (*lsize >= *psize && *psize > 0);
8486 }
8487 
8488 #define	ZIO_COMPRESS_MASK(alg)	(1ULL << (ZIO_COMPRESS_##alg))
8489 
8490 static boolean_t
8491 try_decompress_block(abd_t *pabd, uint64_t lsize, uint64_t psize,
8492     int flags, int cfunc, void *lbuf, void *lbuf2)
8493 {
8494 	if (flags & ZDB_FLAG_VERBOSE) {
8495 		(void) fprintf(stderr,
8496 		    "Trying %05llx -> %05llx (%s)\n",
8497 		    (u_longlong_t)psize,
8498 		    (u_longlong_t)lsize,
8499 		    zio_compress_table[cfunc].ci_name);
8500 	}
8501 
8502 	/*
8503 	 * We set lbuf to all zeros and lbuf2 to all
8504 	 * ones, then decompress to both buffers and
8505 	 * compare their contents. This way we can
8506 	 * know if decompression filled exactly to
8507 	 * lsize or if it left some bytes unwritten.
8508 	 */
8509 
8510 	memset(lbuf, 0x00, lsize);
8511 	memset(lbuf2, 0xff, lsize);
8512 
8513 	if (zio_decompress_data(cfunc, pabd,
8514 	    lbuf, psize, lsize, NULL) == 0 &&
8515 	    zio_decompress_data(cfunc, pabd,
8516 	    lbuf2, psize, lsize, NULL) == 0 &&
8517 	    memcmp(lbuf, lbuf2, lsize) == 0)
8518 		return (B_TRUE);
8519 	return (B_FALSE);
8520 }
8521 
8522 static uint64_t
8523 zdb_decompress_block(abd_t *pabd, void *buf, void *lbuf, uint64_t lsize,
8524     uint64_t psize, int flags)
8525 {
8526 	(void) buf;
8527 	uint64_t orig_lsize = lsize;
8528 	boolean_t tryzle = ((getenv("ZDB_NO_ZLE") == NULL));
8529 	boolean_t found = B_FALSE;
8530 	/*
8531 	 * We don't know how the data was compressed, so just try
8532 	 * every decompress function at every inflated blocksize.
8533 	 */
8534 	void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
8535 	int cfuncs[ZIO_COMPRESS_FUNCTIONS] = { 0 };
8536 	int *cfuncp = cfuncs;
8537 	uint64_t maxlsize = SPA_MAXBLOCKSIZE;
8538 	uint64_t mask = ZIO_COMPRESS_MASK(ON) | ZIO_COMPRESS_MASK(OFF) |
8539 	    ZIO_COMPRESS_MASK(INHERIT) | ZIO_COMPRESS_MASK(EMPTY) |
8540 	    ZIO_COMPRESS_MASK(ZLE);
8541 	*cfuncp++ = ZIO_COMPRESS_LZ4;
8542 	*cfuncp++ = ZIO_COMPRESS_LZJB;
8543 	mask |= ZIO_COMPRESS_MASK(LZ4) | ZIO_COMPRESS_MASK(LZJB);
8544 	/*
8545 	 * Every gzip level has the same decompressor, no need to
8546 	 * run it 9 times per bruteforce attempt.
8547 	 */
8548 	mask |= ZIO_COMPRESS_MASK(GZIP_2) | ZIO_COMPRESS_MASK(GZIP_3);
8549 	mask |= ZIO_COMPRESS_MASK(GZIP_4) | ZIO_COMPRESS_MASK(GZIP_5);
8550 	mask |= ZIO_COMPRESS_MASK(GZIP_6) | ZIO_COMPRESS_MASK(GZIP_7);
8551 	mask |= ZIO_COMPRESS_MASK(GZIP_8) | ZIO_COMPRESS_MASK(GZIP_9);
8552 	for (int c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++)
8553 		if (((1ULL << c) & mask) == 0)
8554 			*cfuncp++ = c;
8555 
8556 	/*
8557 	 * On the one hand, with SPA_MAXBLOCKSIZE at 16MB, this
8558 	 * could take a while and we should let the user know
8559 	 * we are not stuck.  On the other hand, printing progress
8560 	 * info gets old after a while.  User can specify 'v' flag
8561 	 * to see the progression.
8562 	 */
8563 	if (lsize == psize)
8564 		lsize += SPA_MINBLOCKSIZE;
8565 	else
8566 		maxlsize = lsize;
8567 
8568 	for (; lsize <= maxlsize; lsize += SPA_MINBLOCKSIZE) {
8569 		for (cfuncp = cfuncs; *cfuncp; cfuncp++) {
8570 			if (try_decompress_block(pabd, lsize, psize, flags,
8571 			    *cfuncp, lbuf, lbuf2)) {
8572 				found = B_TRUE;
8573 				break;
8574 			}
8575 		}
8576 		if (*cfuncp != 0)
8577 			break;
8578 	}
8579 	if (!found && tryzle) {
8580 		for (lsize = orig_lsize; lsize <= maxlsize;
8581 		    lsize += SPA_MINBLOCKSIZE) {
8582 			if (try_decompress_block(pabd, lsize, psize, flags,
8583 			    ZIO_COMPRESS_ZLE, lbuf, lbuf2)) {
8584 				*cfuncp = ZIO_COMPRESS_ZLE;
8585 				found = B_TRUE;
8586 				break;
8587 			}
8588 		}
8589 	}
8590 	umem_free(lbuf2, SPA_MAXBLOCKSIZE);
8591 
8592 	if (*cfuncp == ZIO_COMPRESS_ZLE) {
8593 		printf("\nZLE decompression was selected. If you "
8594 		    "suspect the results are wrong,\ntry avoiding ZLE "
8595 		    "by setting and exporting ZDB_NO_ZLE=\"true\"\n");
8596 	}
8597 
8598 	return (lsize > maxlsize ? -1 : lsize);
8599 }
8600 
8601 /*
8602  * Read a block from a pool and print it out.  The syntax of the
8603  * block descriptor is:
8604  *
8605  *	pool:vdev_specifier:offset:[lsize/]psize[:flags]
8606  *
8607  *	pool           - The name of the pool you wish to read from
8608  *	vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
8609  *	offset         - offset, in hex, in bytes
8610  *	size           - Amount of data to read, in hex, in bytes
8611  *	flags          - A string of characters specifying options
8612  *		 b: Decode a blkptr at given offset within block
8613  *		 c: Calculate and display checksums
8614  *		 d: Decompress data before dumping
8615  *		 e: Byteswap data before dumping
8616  *		 g: Display data as a gang block header
8617  *		 i: Display as an indirect block
8618  *		 r: Dump raw data to stdout
8619  *		 v: Verbose
8620  *
8621  */
8622 static void
8623 zdb_read_block(char *thing, spa_t *spa)
8624 {
8625 	blkptr_t blk, *bp = &blk;
8626 	dva_t *dva = bp->blk_dva;
8627 	int flags = 0;
8628 	uint64_t offset = 0, psize = 0, lsize = 0, blkptr_offset = 0;
8629 	zio_t *zio;
8630 	vdev_t *vd;
8631 	abd_t *pabd;
8632 	void *lbuf, *buf;
8633 	char *s, *p, *dup, *flagstr, *sizes, *tmp = NULL;
8634 	const char *vdev, *errmsg = NULL;
8635 	int i, error;
8636 	boolean_t borrowed = B_FALSE, found = B_FALSE;
8637 
8638 	dup = strdup(thing);
8639 	s = strtok_r(dup, ":", &tmp);
8640 	vdev = s ?: "";
8641 	s = strtok_r(NULL, ":", &tmp);
8642 	offset = strtoull(s ? s : "", NULL, 16);
8643 	sizes = strtok_r(NULL, ":", &tmp);
8644 	s = strtok_r(NULL, ":", &tmp);
8645 	flagstr = strdup(s ?: "");
8646 
8647 	if (!zdb_parse_block_sizes(sizes, &lsize, &psize))
8648 		errmsg = "invalid size(s)";
8649 	if (!IS_P2ALIGNED(psize, DEV_BSIZE) || !IS_P2ALIGNED(lsize, DEV_BSIZE))
8650 		errmsg = "size must be a multiple of sector size";
8651 	if (!IS_P2ALIGNED(offset, DEV_BSIZE))
8652 		errmsg = "offset must be a multiple of sector size";
8653 	if (errmsg) {
8654 		(void) printf("Invalid block specifier: %s  - %s\n",
8655 		    thing, errmsg);
8656 		goto done;
8657 	}
8658 
8659 	tmp = NULL;
8660 	for (s = strtok_r(flagstr, ":", &tmp);
8661 	    s != NULL;
8662 	    s = strtok_r(NULL, ":", &tmp)) {
8663 		for (i = 0; i < strlen(flagstr); i++) {
8664 			int bit = flagbits[(uchar_t)flagstr[i]];
8665 
8666 			if (bit == 0) {
8667 				(void) printf("***Ignoring flag: %c\n",
8668 				    (uchar_t)flagstr[i]);
8669 				continue;
8670 			}
8671 			found = B_TRUE;
8672 			flags |= bit;
8673 
8674 			p = &flagstr[i + 1];
8675 			if (*p != ':' && *p != '\0') {
8676 				int j = 0, nextbit = flagbits[(uchar_t)*p];
8677 				char *end, offstr[8] = { 0 };
8678 				if ((bit == ZDB_FLAG_PRINT_BLKPTR) &&
8679 				    (nextbit == 0)) {
8680 					/* look ahead to isolate the offset */
8681 					while (nextbit == 0 &&
8682 					    strchr(flagbitstr, *p) == NULL) {
8683 						offstr[j] = *p;
8684 						j++;
8685 						if (i + j > strlen(flagstr))
8686 							break;
8687 						p++;
8688 						nextbit = flagbits[(uchar_t)*p];
8689 					}
8690 					blkptr_offset = strtoull(offstr, &end,
8691 					    16);
8692 					i += j;
8693 				} else if (nextbit == 0) {
8694 					(void) printf("***Ignoring flag arg:"
8695 					    " '%c'\n", (uchar_t)*p);
8696 				}
8697 			}
8698 		}
8699 	}
8700 	if (blkptr_offset % sizeof (blkptr_t)) {
8701 		printf("Block pointer offset 0x%llx "
8702 		    "must be divisible by 0x%x\n",
8703 		    (longlong_t)blkptr_offset, (int)sizeof (blkptr_t));
8704 		goto done;
8705 	}
8706 	if (found == B_FALSE && strlen(flagstr) > 0) {
8707 		printf("Invalid flag arg: '%s'\n", flagstr);
8708 		goto done;
8709 	}
8710 
8711 	vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
8712 	if (vd == NULL) {
8713 		(void) printf("***Invalid vdev: %s\n", vdev);
8714 		goto done;
8715 	} else {
8716 		if (vd->vdev_path)
8717 			(void) fprintf(stderr, "Found vdev: %s\n",
8718 			    vd->vdev_path);
8719 		else
8720 			(void) fprintf(stderr, "Found vdev type: %s\n",
8721 			    vd->vdev_ops->vdev_op_type);
8722 	}
8723 
8724 	pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE);
8725 	lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
8726 
8727 	BP_ZERO(bp);
8728 
8729 	DVA_SET_VDEV(&dva[0], vd->vdev_id);
8730 	DVA_SET_OFFSET(&dva[0], offset);
8731 	DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
8732 	DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
8733 
8734 	BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
8735 
8736 	BP_SET_LSIZE(bp, lsize);
8737 	BP_SET_PSIZE(bp, psize);
8738 	BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
8739 	BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
8740 	BP_SET_TYPE(bp, DMU_OT_NONE);
8741 	BP_SET_LEVEL(bp, 0);
8742 	BP_SET_DEDUP(bp, 0);
8743 	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
8744 
8745 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
8746 	zio = zio_root(spa, NULL, NULL, 0);
8747 
8748 	if (vd == vd->vdev_top) {
8749 		/*
8750 		 * Treat this as a normal block read.
8751 		 */
8752 		zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL,
8753 		    ZIO_PRIORITY_SYNC_READ,
8754 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
8755 	} else {
8756 		/*
8757 		 * Treat this as a vdev child I/O.
8758 		 */
8759 		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
8760 		    psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
8761 		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
8762 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | ZIO_FLAG_OPTIONAL,
8763 		    NULL, NULL));
8764 	}
8765 
8766 	error = zio_wait(zio);
8767 	spa_config_exit(spa, SCL_STATE, FTAG);
8768 
8769 	if (error) {
8770 		(void) printf("Read of %s failed, error: %d\n", thing, error);
8771 		goto out;
8772 	}
8773 
8774 	uint64_t orig_lsize = lsize;
8775 	buf = lbuf;
8776 	if (flags & ZDB_FLAG_DECOMPRESS) {
8777 		lsize = zdb_decompress_block(pabd, buf, lbuf,
8778 		    lsize, psize, flags);
8779 		if (lsize == -1) {
8780 			(void) printf("Decompress of %s failed\n", thing);
8781 			goto out;
8782 		}
8783 	} else {
8784 		buf = abd_borrow_buf_copy(pabd, lsize);
8785 		borrowed = B_TRUE;
8786 	}
8787 	/*
8788 	 * Try to detect invalid block pointer.  If invalid, try
8789 	 * decompressing.
8790 	 */
8791 	if ((flags & ZDB_FLAG_PRINT_BLKPTR || flags & ZDB_FLAG_INDIRECT) &&
8792 	    !(flags & ZDB_FLAG_DECOMPRESS)) {
8793 		const blkptr_t *b = (const blkptr_t *)(void *)
8794 		    ((uintptr_t)buf + (uintptr_t)blkptr_offset);
8795 		if (zfs_blkptr_verify(spa, b,
8796 		    BLK_CONFIG_NEEDED, BLK_VERIFY_ONLY) == B_FALSE) {
8797 			abd_return_buf_copy(pabd, buf, lsize);
8798 			borrowed = B_FALSE;
8799 			buf = lbuf;
8800 			lsize = zdb_decompress_block(pabd, buf,
8801 			    lbuf, lsize, psize, flags);
8802 			b = (const blkptr_t *)(void *)
8803 			    ((uintptr_t)buf + (uintptr_t)blkptr_offset);
8804 			if (lsize == -1 || zfs_blkptr_verify(spa, b,
8805 			    BLK_CONFIG_NEEDED, BLK_VERIFY_LOG) == B_FALSE) {
8806 				printf("invalid block pointer at this DVA\n");
8807 				goto out;
8808 			}
8809 		}
8810 	}
8811 
8812 	if (flags & ZDB_FLAG_PRINT_BLKPTR)
8813 		zdb_print_blkptr((blkptr_t *)(void *)
8814 		    ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
8815 	else if (flags & ZDB_FLAG_RAW)
8816 		zdb_dump_block_raw(buf, lsize, flags);
8817 	else if (flags & ZDB_FLAG_INDIRECT)
8818 		zdb_dump_indirect((blkptr_t *)buf,
8819 		    orig_lsize / sizeof (blkptr_t), flags);
8820 	else if (flags & ZDB_FLAG_GBH)
8821 		zdb_dump_gbh(buf, flags);
8822 	else
8823 		zdb_dump_block(thing, buf, lsize, flags);
8824 
8825 	/*
8826 	 * If :c was specified, iterate through the checksum table to
8827 	 * calculate and display each checksum for our specified
8828 	 * DVA and length.
8829 	 */
8830 	if ((flags & ZDB_FLAG_CHECKSUM) && !(flags & ZDB_FLAG_RAW) &&
8831 	    !(flags & ZDB_FLAG_GBH)) {
8832 		zio_t *czio;
8833 		(void) printf("\n");
8834 		for (enum zio_checksum ck = ZIO_CHECKSUM_LABEL;
8835 		    ck < ZIO_CHECKSUM_FUNCTIONS; ck++) {
8836 
8837 			if ((zio_checksum_table[ck].ci_flags &
8838 			    ZCHECKSUM_FLAG_EMBEDDED) ||
8839 			    ck == ZIO_CHECKSUM_NOPARITY) {
8840 				continue;
8841 			}
8842 			BP_SET_CHECKSUM(bp, ck);
8843 			spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
8844 			czio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
8845 			if (vd == vd->vdev_top) {
8846 				zio_nowait(zio_read(czio, spa, bp, pabd, psize,
8847 				    NULL, NULL,
8848 				    ZIO_PRIORITY_SYNC_READ,
8849 				    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW |
8850 				    ZIO_FLAG_DONT_RETRY, NULL));
8851 			} else {
8852 				zio_nowait(zio_vdev_child_io(czio, bp, vd,
8853 				    offset, pabd, psize, ZIO_TYPE_READ,
8854 				    ZIO_PRIORITY_SYNC_READ,
8855 				    ZIO_FLAG_DONT_PROPAGATE |
8856 				    ZIO_FLAG_DONT_RETRY |
8857 				    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW |
8858 				    ZIO_FLAG_SPECULATIVE |
8859 				    ZIO_FLAG_OPTIONAL, NULL, NULL));
8860 			}
8861 			error = zio_wait(czio);
8862 			if (error == 0 || error == ECKSUM) {
8863 				zio_t *ck_zio = zio_null(NULL, spa, NULL,
8864 				    NULL, NULL, 0);
8865 				ck_zio->io_offset =
8866 				    DVA_GET_OFFSET(&bp->blk_dva[0]);
8867 				ck_zio->io_bp = bp;
8868 				zio_checksum_compute(ck_zio, ck, pabd, lsize);
8869 				printf(
8870 				    "%12s\t"
8871 				    "cksum=%016llx:%016llx:%016llx:%016llx\n",
8872 				    zio_checksum_table[ck].ci_name,
8873 				    (u_longlong_t)bp->blk_cksum.zc_word[0],
8874 				    (u_longlong_t)bp->blk_cksum.zc_word[1],
8875 				    (u_longlong_t)bp->blk_cksum.zc_word[2],
8876 				    (u_longlong_t)bp->blk_cksum.zc_word[3]);
8877 				zio_wait(ck_zio);
8878 			} else {
8879 				printf("error %d reading block\n", error);
8880 			}
8881 			spa_config_exit(spa, SCL_STATE, FTAG);
8882 		}
8883 	}
8884 
8885 	if (borrowed)
8886 		abd_return_buf_copy(pabd, buf, lsize);
8887 
8888 out:
8889 	abd_free(pabd);
8890 	umem_free(lbuf, SPA_MAXBLOCKSIZE);
8891 done:
8892 	free(flagstr);
8893 	free(dup);
8894 }
8895 
8896 static void
8897 zdb_embedded_block(char *thing)
8898 {
8899 	blkptr_t bp = {{{{0}}}};
8900 	unsigned long long *words = (void *)&bp;
8901 	char *buf;
8902 	int err;
8903 
8904 	err = sscanf(thing, "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx:"
8905 	    "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx",
8906 	    words + 0, words + 1, words + 2, words + 3,
8907 	    words + 4, words + 5, words + 6, words + 7,
8908 	    words + 8, words + 9, words + 10, words + 11,
8909 	    words + 12, words + 13, words + 14, words + 15);
8910 	if (err != 16) {
8911 		(void) fprintf(stderr, "invalid input format\n");
8912 		exit(1);
8913 	}
8914 	ASSERT3U(BPE_GET_LSIZE(&bp), <=, SPA_MAXBLOCKSIZE);
8915 	buf = malloc(SPA_MAXBLOCKSIZE);
8916 	if (buf == NULL) {
8917 		(void) fprintf(stderr, "out of memory\n");
8918 		exit(1);
8919 	}
8920 	err = decode_embedded_bp(&bp, buf, BPE_GET_LSIZE(&bp));
8921 	if (err != 0) {
8922 		(void) fprintf(stderr, "decode failed: %u\n", err);
8923 		exit(1);
8924 	}
8925 	zdb_dump_block_raw(buf, BPE_GET_LSIZE(&bp), 0);
8926 	free(buf);
8927 }
8928 
8929 /* check for valid hex or decimal numeric string */
8930 static boolean_t
8931 zdb_numeric(char *str)
8932 {
8933 	int i = 0;
8934 
8935 	if (strlen(str) == 0)
8936 		return (B_FALSE);
8937 	if (strncmp(str, "0x", 2) == 0 || strncmp(str, "0X", 2) == 0)
8938 		i = 2;
8939 	for (; i < strlen(str); i++) {
8940 		if (!isxdigit(str[i]))
8941 			return (B_FALSE);
8942 	}
8943 	return (B_TRUE);
8944 }
8945 
8946 int
8947 main(int argc, char **argv)
8948 {
8949 	int c;
8950 	spa_t *spa = NULL;
8951 	objset_t *os = NULL;
8952 	int dump_all = 1;
8953 	int verbose = 0;
8954 	int error = 0;
8955 	char **searchdirs = NULL;
8956 	int nsearch = 0;
8957 	char *target, *target_pool, dsname[ZFS_MAX_DATASET_NAME_LEN];
8958 	nvlist_t *policy = NULL;
8959 	uint64_t max_txg = UINT64_MAX;
8960 	int64_t objset_id = -1;
8961 	uint64_t object;
8962 	int flags = ZFS_IMPORT_MISSING_LOG;
8963 	int rewind = ZPOOL_NEVER_REWIND;
8964 	char *spa_config_path_env, *objset_str;
8965 	boolean_t target_is_spa = B_TRUE, dataset_lookup = B_FALSE;
8966 	nvlist_t *cfg = NULL;
8967 
8968 	dprintf_setup(&argc, argv);
8969 
8970 	/*
8971 	 * If there is an environment variable SPA_CONFIG_PATH it overrides
8972 	 * default spa_config_path setting. If -U flag is specified it will
8973 	 * override this environment variable settings once again.
8974 	 */
8975 	spa_config_path_env = getenv("SPA_CONFIG_PATH");
8976 	if (spa_config_path_env != NULL)
8977 		spa_config_path = spa_config_path_env;
8978 
8979 	/*
8980 	 * For performance reasons, we set this tunable down. We do so before
8981 	 * the arg parsing section so that the user can override this value if
8982 	 * they choose.
8983 	 */
8984 	zfs_btree_verify_intensity = 3;
8985 
8986 	struct option long_options[] = {
8987 		{"ignore-assertions",	no_argument,		NULL, 'A'},
8988 		{"block-stats",		no_argument,		NULL, 'b'},
8989 		{"backup",		no_argument,		NULL, 'B'},
8990 		{"checksum",		no_argument,		NULL, 'c'},
8991 		{"config",		no_argument,		NULL, 'C'},
8992 		{"datasets",		no_argument,		NULL, 'd'},
8993 		{"dedup-stats",		no_argument,		NULL, 'D'},
8994 		{"exported",		no_argument,		NULL, 'e'},
8995 		{"embedded-block-pointer",	no_argument,	NULL, 'E'},
8996 		{"automatic-rewind",	no_argument,		NULL, 'F'},
8997 		{"dump-debug-msg",	no_argument,		NULL, 'G'},
8998 		{"history",		no_argument,		NULL, 'h'},
8999 		{"intent-logs",		no_argument,		NULL, 'i'},
9000 		{"inflight",		required_argument,	NULL, 'I'},
9001 		{"checkpointed-state",	no_argument,		NULL, 'k'},
9002 		{"key",			required_argument,	NULL, 'K'},
9003 		{"label",		no_argument,		NULL, 'l'},
9004 		{"disable-leak-tracking",	no_argument,	NULL, 'L'},
9005 		{"metaslabs",		no_argument,		NULL, 'm'},
9006 		{"metaslab-groups",	no_argument,		NULL, 'M'},
9007 		{"numeric",		no_argument,		NULL, 'N'},
9008 		{"option",		required_argument,	NULL, 'o'},
9009 		{"object-lookups",	no_argument,		NULL, 'O'},
9010 		{"path",		required_argument,	NULL, 'p'},
9011 		{"parseable",		no_argument,		NULL, 'P'},
9012 		{"skip-label",		no_argument,		NULL, 'q'},
9013 		{"copy-object",		no_argument,		NULL, 'r'},
9014 		{"read-block",		no_argument,		NULL, 'R'},
9015 		{"io-stats",		no_argument,		NULL, 's'},
9016 		{"simulate-dedup",	no_argument,		NULL, 'S'},
9017 		{"txg",			required_argument,	NULL, 't'},
9018 		{"brt-stats",		no_argument,		NULL, 'T'},
9019 		{"uberblock",		no_argument,		NULL, 'u'},
9020 		{"cachefile",		required_argument,	NULL, 'U'},
9021 		{"verbose",		no_argument,		NULL, 'v'},
9022 		{"verbatim",		no_argument,		NULL, 'V'},
9023 		{"dump-blocks",		required_argument,	NULL, 'x'},
9024 		{"extreme-rewind",	no_argument,		NULL, 'X'},
9025 		{"all-reconstruction",	no_argument,		NULL, 'Y'},
9026 		{"livelist",		no_argument,		NULL, 'y'},
9027 		{"zstd-headers",	no_argument,		NULL, 'Z'},
9028 		{0, 0, 0, 0}
9029 	};
9030 
9031 	while ((c = getopt_long(argc, argv,
9032 	    "AbBcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:TuU:vVx:XYyZ",
9033 	    long_options, NULL)) != -1) {
9034 		switch (c) {
9035 		case 'b':
9036 		case 'B':
9037 		case 'c':
9038 		case 'C':
9039 		case 'd':
9040 		case 'D':
9041 		case 'E':
9042 		case 'G':
9043 		case 'h':
9044 		case 'i':
9045 		case 'l':
9046 		case 'm':
9047 		case 'M':
9048 		case 'N':
9049 		case 'O':
9050 		case 'r':
9051 		case 'R':
9052 		case 's':
9053 		case 'S':
9054 		case 'T':
9055 		case 'u':
9056 		case 'y':
9057 		case 'Z':
9058 			dump_opt[c]++;
9059 			dump_all = 0;
9060 			break;
9061 		case 'A':
9062 		case 'e':
9063 		case 'F':
9064 		case 'k':
9065 		case 'L':
9066 		case 'P':
9067 		case 'q':
9068 		case 'X':
9069 			dump_opt[c]++;
9070 			break;
9071 		case 'Y':
9072 			zfs_reconstruct_indirect_combinations_max = INT_MAX;
9073 			zfs_deadman_enabled = 0;
9074 			break;
9075 		/* NB: Sort single match options below. */
9076 		case 'I':
9077 			max_inflight_bytes = strtoull(optarg, NULL, 0);
9078 			if (max_inflight_bytes == 0) {
9079 				(void) fprintf(stderr, "maximum number "
9080 				    "of inflight bytes must be greater "
9081 				    "than 0\n");
9082 				usage();
9083 			}
9084 			break;
9085 		case 'K':
9086 			dump_opt[c]++;
9087 			key_material = strdup(optarg);
9088 			/* redact key material in process table */
9089 			while (*optarg != '\0') { *optarg++ = '*'; }
9090 			break;
9091 		case 'o':
9092 			error = set_global_var(optarg);
9093 			if (error != 0)
9094 				usage();
9095 			break;
9096 		case 'p':
9097 			if (searchdirs == NULL) {
9098 				searchdirs = umem_alloc(sizeof (char *),
9099 				    UMEM_NOFAIL);
9100 			} else {
9101 				char **tmp = umem_alloc((nsearch + 1) *
9102 				    sizeof (char *), UMEM_NOFAIL);
9103 				memcpy(tmp, searchdirs, nsearch *
9104 				    sizeof (char *));
9105 				umem_free(searchdirs,
9106 				    nsearch * sizeof (char *));
9107 				searchdirs = tmp;
9108 			}
9109 			searchdirs[nsearch++] = optarg;
9110 			break;
9111 		case 't':
9112 			max_txg = strtoull(optarg, NULL, 0);
9113 			if (max_txg < TXG_INITIAL) {
9114 				(void) fprintf(stderr, "incorrect txg "
9115 				    "specified: %s\n", optarg);
9116 				usage();
9117 			}
9118 			break;
9119 		case 'U':
9120 			spa_config_path = optarg;
9121 			if (spa_config_path[0] != '/') {
9122 				(void) fprintf(stderr,
9123 				    "cachefile must be an absolute path "
9124 				    "(i.e. start with a slash)\n");
9125 				usage();
9126 			}
9127 			break;
9128 		case 'v':
9129 			verbose++;
9130 			break;
9131 		case 'V':
9132 			flags = ZFS_IMPORT_VERBATIM;
9133 			break;
9134 		case 'x':
9135 			vn_dumpdir = optarg;
9136 			break;
9137 		default:
9138 			usage();
9139 			break;
9140 		}
9141 	}
9142 
9143 	if (!dump_opt['e'] && searchdirs != NULL) {
9144 		(void) fprintf(stderr, "-p option requires use of -e\n");
9145 		usage();
9146 	}
9147 #if defined(_LP64)
9148 	/*
9149 	 * ZDB does not typically re-read blocks; therefore limit the ARC
9150 	 * to 256 MB, which can be used entirely for metadata.
9151 	 */
9152 	zfs_arc_min = 2ULL << SPA_MAXBLOCKSHIFT;
9153 	zfs_arc_max = 256 * 1024 * 1024;
9154 #endif
9155 
9156 	/*
9157 	 * "zdb -c" uses checksum-verifying scrub i/os which are async reads.
9158 	 * "zdb -b" uses traversal prefetch which uses async reads.
9159 	 * For good performance, let several of them be active at once.
9160 	 */
9161 	zfs_vdev_async_read_max_active = 10;
9162 
9163 	/*
9164 	 * Disable reference tracking for better performance.
9165 	 */
9166 	reference_tracking_enable = B_FALSE;
9167 
9168 	/*
9169 	 * Do not fail spa_load when spa_load_verify fails. This is needed
9170 	 * to load non-idle pools.
9171 	 */
9172 	spa_load_verify_dryrun = B_TRUE;
9173 
9174 	/*
9175 	 * ZDB should have ability to read spacemaps.
9176 	 */
9177 	spa_mode_readable_spacemaps = B_TRUE;
9178 
9179 	kernel_init(SPA_MODE_READ);
9180 
9181 	if (dump_all)
9182 		verbose = MAX(verbose, 1);
9183 
9184 	for (c = 0; c < 256; c++) {
9185 		if (dump_all && strchr("ABeEFkKlLNOPrRSXy", c) == NULL)
9186 			dump_opt[c] = 1;
9187 		if (dump_opt[c])
9188 			dump_opt[c] += verbose;
9189 	}
9190 
9191 	libspl_set_assert_ok((dump_opt['A'] == 1) || (dump_opt['A'] > 2));
9192 	zfs_recover = (dump_opt['A'] > 1);
9193 
9194 	argc -= optind;
9195 	argv += optind;
9196 	if (argc < 2 && dump_opt['R'])
9197 		usage();
9198 
9199 	if (dump_opt['E']) {
9200 		if (argc != 1)
9201 			usage();
9202 		zdb_embedded_block(argv[0]);
9203 		return (0);
9204 	}
9205 
9206 	if (argc < 1) {
9207 		if (!dump_opt['e'] && dump_opt['C']) {
9208 			dump_cachefile(spa_config_path);
9209 			return (0);
9210 		}
9211 		usage();
9212 	}
9213 
9214 	if (dump_opt['l'])
9215 		return (dump_label(argv[0]));
9216 
9217 	if (dump_opt['X'] || dump_opt['F'])
9218 		rewind = ZPOOL_DO_REWIND |
9219 		    (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
9220 
9221 	/* -N implies -d */
9222 	if (dump_opt['N'] && dump_opt['d'] == 0)
9223 		dump_opt['d'] = dump_opt['N'];
9224 
9225 	if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
9226 	    nvlist_add_uint64(policy, ZPOOL_LOAD_REQUEST_TXG, max_txg) != 0 ||
9227 	    nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, rewind) != 0)
9228 		fatal("internal error: %s", strerror(ENOMEM));
9229 
9230 	error = 0;
9231 	target = argv[0];
9232 
9233 	if (strpbrk(target, "/@") != NULL) {
9234 		size_t targetlen;
9235 
9236 		target_pool = strdup(target);
9237 		*strpbrk(target_pool, "/@") = '\0';
9238 
9239 		target_is_spa = B_FALSE;
9240 		targetlen = strlen(target);
9241 		if (targetlen && target[targetlen - 1] == '/')
9242 			target[targetlen - 1] = '\0';
9243 
9244 		/*
9245 		 * See if an objset ID was supplied (-d <pool>/<objset ID>).
9246 		 * To disambiguate tank/100, consider the 100 as objsetID
9247 		 * if -N was given, otherwise 100 is an objsetID iff
9248 		 * tank/100 as a named dataset fails on lookup.
9249 		 */
9250 		objset_str = strchr(target, '/');
9251 		if (objset_str && strlen(objset_str) > 1 &&
9252 		    zdb_numeric(objset_str + 1)) {
9253 			char *endptr;
9254 			errno = 0;
9255 			objset_str++;
9256 			objset_id = strtoull(objset_str, &endptr, 0);
9257 			/* dataset 0 is the same as opening the pool */
9258 			if (errno == 0 && endptr != objset_str &&
9259 			    objset_id != 0) {
9260 				if (dump_opt['N'])
9261 					dataset_lookup = B_TRUE;
9262 			}
9263 			/* normal dataset name not an objset ID */
9264 			if (endptr == objset_str) {
9265 				objset_id = -1;
9266 			}
9267 		} else if (objset_str && !zdb_numeric(objset_str + 1) &&
9268 		    dump_opt['N']) {
9269 			printf("Supply a numeric objset ID with -N\n");
9270 			exit(1);
9271 		}
9272 	} else {
9273 		target_pool = target;
9274 	}
9275 
9276 	if (dump_opt['e']) {
9277 		importargs_t args = { 0 };
9278 
9279 		args.paths = nsearch;
9280 		args.path = searchdirs;
9281 		args.can_be_active = B_TRUE;
9282 
9283 		libpc_handle_t lpch = {
9284 			.lpc_lib_handle = NULL,
9285 			.lpc_ops = &libzpool_config_ops,
9286 			.lpc_printerr = B_TRUE
9287 		};
9288 		error = zpool_find_config(&lpch, target_pool, &cfg, &args);
9289 
9290 		if (error == 0) {
9291 
9292 			if (nvlist_add_nvlist(cfg,
9293 			    ZPOOL_LOAD_POLICY, policy) != 0) {
9294 				fatal("can't open '%s': %s",
9295 				    target, strerror(ENOMEM));
9296 			}
9297 
9298 			if (dump_opt['C'] > 1) {
9299 				(void) printf("\nConfiguration for import:\n");
9300 				dump_nvlist(cfg, 8);
9301 			}
9302 
9303 			/*
9304 			 * Disable the activity check to allow examination of
9305 			 * active pools.
9306 			 */
9307 			error = spa_import(target_pool, cfg, NULL,
9308 			    flags | ZFS_IMPORT_SKIP_MMP);
9309 		}
9310 	}
9311 
9312 	if (searchdirs != NULL) {
9313 		umem_free(searchdirs, nsearch * sizeof (char *));
9314 		searchdirs = NULL;
9315 	}
9316 
9317 	/*
9318 	 * We need to make sure to process -O option or call
9319 	 * dump_path after the -e option has been processed,
9320 	 * which imports the pool to the namespace if it's
9321 	 * not in the cachefile.
9322 	 */
9323 	if (dump_opt['O']) {
9324 		if (argc != 2)
9325 			usage();
9326 		dump_opt['v'] = verbose + 3;
9327 		return (dump_path(argv[0], argv[1], NULL));
9328 	}
9329 
9330 	if (dump_opt['r']) {
9331 		target_is_spa = B_FALSE;
9332 		if (argc != 3)
9333 			usage();
9334 		dump_opt['v'] = verbose;
9335 		error = dump_path(argv[0], argv[1], &object);
9336 		if (error != 0)
9337 			fatal("internal error: %s", strerror(error));
9338 	}
9339 
9340 	/*
9341 	 * import_checkpointed_state makes the assumption that the
9342 	 * target pool that we pass it is already part of the spa
9343 	 * namespace. Because of that we need to make sure to call
9344 	 * it always after the -e option has been processed, which
9345 	 * imports the pool to the namespace if it's not in the
9346 	 * cachefile.
9347 	 */
9348 	char *checkpoint_pool = NULL;
9349 	char *checkpoint_target = NULL;
9350 	if (dump_opt['k']) {
9351 		checkpoint_pool = import_checkpointed_state(target, cfg,
9352 		    &checkpoint_target);
9353 
9354 		if (checkpoint_target != NULL)
9355 			target = checkpoint_target;
9356 	}
9357 
9358 	if (cfg != NULL) {
9359 		nvlist_free(cfg);
9360 		cfg = NULL;
9361 	}
9362 
9363 	if (target_pool != target)
9364 		free(target_pool);
9365 
9366 	if (error == 0) {
9367 		if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) {
9368 			ASSERT(checkpoint_pool != NULL);
9369 			ASSERT(checkpoint_target == NULL);
9370 
9371 			error = spa_open(checkpoint_pool, &spa, FTAG);
9372 			if (error != 0) {
9373 				fatal("Tried to open pool \"%s\" but "
9374 				    "spa_open() failed with error %d\n",
9375 				    checkpoint_pool, error);
9376 			}
9377 
9378 		} else if (target_is_spa || dump_opt['R'] || dump_opt['B'] ||
9379 		    objset_id == 0) {
9380 			zdb_set_skip_mmp(target);
9381 			error = spa_open_rewind(target, &spa, FTAG, policy,
9382 			    NULL);
9383 			if (error) {
9384 				/*
9385 				 * If we're missing the log device then
9386 				 * try opening the pool after clearing the
9387 				 * log state.
9388 				 */
9389 				mutex_enter(&spa_namespace_lock);
9390 				if ((spa = spa_lookup(target)) != NULL &&
9391 				    spa->spa_log_state == SPA_LOG_MISSING) {
9392 					spa->spa_log_state = SPA_LOG_CLEAR;
9393 					error = 0;
9394 				}
9395 				mutex_exit(&spa_namespace_lock);
9396 
9397 				if (!error) {
9398 					error = spa_open_rewind(target, &spa,
9399 					    FTAG, policy, NULL);
9400 				}
9401 			}
9402 		} else if (strpbrk(target, "#") != NULL) {
9403 			dsl_pool_t *dp;
9404 			error = dsl_pool_hold(target, FTAG, &dp);
9405 			if (error != 0) {
9406 				fatal("can't dump '%s': %s", target,
9407 				    strerror(error));
9408 			}
9409 			error = dump_bookmark(dp, target, B_TRUE, verbose > 1);
9410 			dsl_pool_rele(dp, FTAG);
9411 			if (error != 0) {
9412 				fatal("can't dump '%s': %s", target,
9413 				    strerror(error));
9414 			}
9415 			return (error);
9416 		} else {
9417 			target_pool = strdup(target);
9418 			if (strpbrk(target, "/@") != NULL)
9419 				*strpbrk(target_pool, "/@") = '\0';
9420 
9421 			zdb_set_skip_mmp(target);
9422 			/*
9423 			 * If -N was supplied, the user has indicated that
9424 			 * zdb -d <pool>/<objsetID> is in effect.  Otherwise
9425 			 * we first assume that the dataset string is the
9426 			 * dataset name.  If dmu_objset_hold fails with the
9427 			 * dataset string, and we have an objset_id, retry the
9428 			 * lookup with the objsetID.
9429 			 */
9430 			boolean_t retry = B_TRUE;
9431 retry_lookup:
9432 			if (dataset_lookup == B_TRUE) {
9433 				/*
9434 				 * Use the supplied id to get the name
9435 				 * for open_objset.
9436 				 */
9437 				error = spa_open(target_pool, &spa, FTAG);
9438 				if (error == 0) {
9439 					error = name_from_objset_id(spa,
9440 					    objset_id, dsname);
9441 					spa_close(spa, FTAG);
9442 					if (error == 0)
9443 						target = dsname;
9444 				}
9445 			}
9446 			if (error == 0) {
9447 				if (objset_id > 0 && retry) {
9448 					int err = dmu_objset_hold(target, FTAG,
9449 					    &os);
9450 					if (err) {
9451 						dataset_lookup = B_TRUE;
9452 						retry = B_FALSE;
9453 						goto retry_lookup;
9454 					} else {
9455 						dmu_objset_rele(os, FTAG);
9456 					}
9457 				}
9458 				error = open_objset(target, FTAG, &os);
9459 			}
9460 			if (error == 0)
9461 				spa = dmu_objset_spa(os);
9462 			free(target_pool);
9463 		}
9464 	}
9465 	nvlist_free(policy);
9466 
9467 	if (error)
9468 		fatal("can't open '%s': %s", target, strerror(error));
9469 
9470 	/*
9471 	 * Set the pool failure mode to panic in order to prevent the pool
9472 	 * from suspending.  A suspended I/O will have no way to resume and
9473 	 * can prevent the zdb(8) command from terminating as expected.
9474 	 */
9475 	if (spa != NULL)
9476 		spa->spa_failmode = ZIO_FAILURE_MODE_PANIC;
9477 
9478 	argv++;
9479 	argc--;
9480 	if (dump_opt['r']) {
9481 		error = zdb_copy_object(os, object, argv[1]);
9482 	} else if (!dump_opt['R']) {
9483 		flagbits['d'] = ZOR_FLAG_DIRECTORY;
9484 		flagbits['f'] = ZOR_FLAG_PLAIN_FILE;
9485 		flagbits['m'] = ZOR_FLAG_SPACE_MAP;
9486 		flagbits['z'] = ZOR_FLAG_ZAP;
9487 		flagbits['A'] = ZOR_FLAG_ALL_TYPES;
9488 
9489 		if (argc > 0 && dump_opt['d']) {
9490 			zopt_object_args = argc;
9491 			zopt_object_ranges = calloc(zopt_object_args,
9492 			    sizeof (zopt_object_range_t));
9493 			for (unsigned i = 0; i < zopt_object_args; i++) {
9494 				int err;
9495 				const char *msg = NULL;
9496 
9497 				err = parse_object_range(argv[i],
9498 				    &zopt_object_ranges[i], &msg);
9499 				if (err != 0)
9500 					fatal("Bad object or range: '%s': %s\n",
9501 					    argv[i], msg ?: "");
9502 			}
9503 		} else if (argc > 0 && dump_opt['m']) {
9504 			zopt_metaslab_args = argc;
9505 			zopt_metaslab = calloc(zopt_metaslab_args,
9506 			    sizeof (uint64_t));
9507 			for (unsigned i = 0; i < zopt_metaslab_args; i++) {
9508 				errno = 0;
9509 				zopt_metaslab[i] = strtoull(argv[i], NULL, 0);
9510 				if (zopt_metaslab[i] == 0 && errno != 0)
9511 					fatal("bad number %s: %s", argv[i],
9512 					    strerror(errno));
9513 			}
9514 		}
9515 		if (dump_opt['B']) {
9516 			dump_backup(target, objset_id,
9517 			    argc > 0 ? argv[0] : NULL);
9518 		} else if (os != NULL) {
9519 			dump_objset(os);
9520 		} else if (zopt_object_args > 0 && !dump_opt['m']) {
9521 			dump_objset(spa->spa_meta_objset);
9522 		} else {
9523 			dump_zpool(spa);
9524 		}
9525 	} else {
9526 		flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
9527 		flagbits['c'] = ZDB_FLAG_CHECKSUM;
9528 		flagbits['d'] = ZDB_FLAG_DECOMPRESS;
9529 		flagbits['e'] = ZDB_FLAG_BSWAP;
9530 		flagbits['g'] = ZDB_FLAG_GBH;
9531 		flagbits['i'] = ZDB_FLAG_INDIRECT;
9532 		flagbits['r'] = ZDB_FLAG_RAW;
9533 		flagbits['v'] = ZDB_FLAG_VERBOSE;
9534 
9535 		for (int i = 0; i < argc; i++)
9536 			zdb_read_block(argv[i], spa);
9537 	}
9538 
9539 	if (dump_opt['k']) {
9540 		free(checkpoint_pool);
9541 		if (!target_is_spa)
9542 			free(checkpoint_target);
9543 	}
9544 
9545 	if (os != NULL) {
9546 		close_objset(os, FTAG);
9547 	} else {
9548 		spa_close(spa, FTAG);
9549 	}
9550 
9551 	fuid_table_destroy();
9552 
9553 	dump_debug_buffer();
9554 
9555 	kernel_fini();
9556 
9557 	return (error);
9558 }
9559