1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2011, 2019 by Delphix. All rights reserved. 25 * Copyright (c) 2014 Integros [integros.com] 26 * Copyright 2016 Nexenta Systems, Inc. 27 * Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC. 28 * Copyright (c) 2015, 2017, Intel Corporation. 29 * Copyright (c) 2020 Datto Inc. 30 * Copyright (c) 2020, The FreeBSD Foundation [1] 31 * 32 * [1] Portions of this software were developed by Allan Jude 33 * under sponsorship from the FreeBSD Foundation. 34 * Copyright (c) 2021 Allan Jude 35 * Copyright (c) 2021 Toomas Soome <tsoome@me.com> 36 * Copyright (c) 2023, Klara Inc. 37 */ 38 39 #include <stdio.h> 40 #include <unistd.h> 41 #include <stdlib.h> 42 #include <ctype.h> 43 #include <getopt.h> 44 #include <openssl/evp.h> 45 #include <sys/zfs_context.h> 46 #include <sys/spa.h> 47 #include <sys/spa_impl.h> 48 #include <sys/dmu.h> 49 #include <sys/zap.h> 50 #include <sys/fs/zfs.h> 51 #include <sys/zfs_znode.h> 52 #include <sys/zfs_sa.h> 53 #include <sys/sa.h> 54 #include <sys/sa_impl.h> 55 #include <sys/vdev.h> 56 #include <sys/vdev_impl.h> 57 #include <sys/metaslab_impl.h> 58 #include <sys/dmu_objset.h> 59 #include <sys/dsl_dir.h> 60 #include <sys/dsl_dataset.h> 61 #include <sys/dsl_pool.h> 62 #include <sys/dsl_bookmark.h> 63 #include <sys/dbuf.h> 64 #include <sys/zil.h> 65 #include <sys/zil_impl.h> 66 #include <sys/stat.h> 67 #include <sys/resource.h> 68 #include <sys/dmu_send.h> 69 #include <sys/dmu_traverse.h> 70 #include <sys/zio_checksum.h> 71 #include <sys/zio_compress.h> 72 #include <sys/zfs_fuid.h> 73 #include <sys/arc.h> 74 #include <sys/arc_impl.h> 75 #include <sys/ddt.h> 76 #include <sys/zfeature.h> 77 #include <sys/abd.h> 78 #include <sys/blkptr.h> 79 #include <sys/dsl_crypt.h> 80 #include <sys/dsl_scan.h> 81 #include <sys/btree.h> 82 #include <zfs_comutil.h> 83 #include <sys/zstd/zstd.h> 84 85 #include <libnvpair.h> 86 #include <libzutil.h> 87 88 #include "zdb.h" 89 90 #define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \ 91 zio_compress_table[(idx)].ci_name : "UNKNOWN") 92 #define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \ 93 zio_checksum_table[(idx)].ci_name : "UNKNOWN") 94 #define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : \ 95 (idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA ? \ 96 DMU_OT_ZAP_OTHER : \ 97 (idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \ 98 DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES) 99 100 /* Some platforms require part of inode IDs to be remapped */ 101 #ifdef __APPLE__ 102 #define ZDB_MAP_OBJECT_ID(obj) INO_XNUTOZFS(obj, 2) 103 #else 104 #define ZDB_MAP_OBJECT_ID(obj) (obj) 105 #endif 106 107 static const char * 108 zdb_ot_name(dmu_object_type_t type) 109 { 110 if (type < DMU_OT_NUMTYPES) 111 return (dmu_ot[type].ot_name); 112 else if ((type & DMU_OT_NEWTYPE) && 113 ((type & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS)) 114 return (dmu_ot_byteswap[type & DMU_OT_BYTESWAP_MASK].ob_name); 115 else 116 return ("UNKNOWN"); 117 } 118 119 extern int reference_tracking_enable; 120 extern int zfs_recover; 121 extern uint_t zfs_vdev_async_read_max_active; 122 extern boolean_t spa_load_verify_dryrun; 123 extern boolean_t spa_mode_readable_spacemaps; 124 extern uint_t zfs_reconstruct_indirect_combinations_max; 125 extern uint_t zfs_btree_verify_intensity; 126 127 static const char cmdname[] = "zdb"; 128 uint8_t dump_opt[256]; 129 130 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); 131 132 static uint64_t *zopt_metaslab = NULL; 133 static unsigned zopt_metaslab_args = 0; 134 135 typedef struct zopt_object_range { 136 uint64_t zor_obj_start; 137 uint64_t zor_obj_end; 138 uint64_t zor_flags; 139 } zopt_object_range_t; 140 141 static zopt_object_range_t *zopt_object_ranges = NULL; 142 static unsigned zopt_object_args = 0; 143 144 static int flagbits[256]; 145 146 #define ZOR_FLAG_PLAIN_FILE 0x0001 147 #define ZOR_FLAG_DIRECTORY 0x0002 148 #define ZOR_FLAG_SPACE_MAP 0x0004 149 #define ZOR_FLAG_ZAP 0x0008 150 #define ZOR_FLAG_ALL_TYPES -1 151 #define ZOR_SUPPORTED_FLAGS (ZOR_FLAG_PLAIN_FILE | \ 152 ZOR_FLAG_DIRECTORY | \ 153 ZOR_FLAG_SPACE_MAP | \ 154 ZOR_FLAG_ZAP) 155 156 #define ZDB_FLAG_CHECKSUM 0x0001 157 #define ZDB_FLAG_DECOMPRESS 0x0002 158 #define ZDB_FLAG_BSWAP 0x0004 159 #define ZDB_FLAG_GBH 0x0008 160 #define ZDB_FLAG_INDIRECT 0x0010 161 #define ZDB_FLAG_RAW 0x0020 162 #define ZDB_FLAG_PRINT_BLKPTR 0x0040 163 #define ZDB_FLAG_VERBOSE 0x0080 164 165 static uint64_t max_inflight_bytes = 256 * 1024 * 1024; /* 256MB */ 166 static int leaked_objects = 0; 167 static range_tree_t *mos_refd_objs; 168 169 static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *, 170 boolean_t); 171 static void mos_obj_refd(uint64_t); 172 static void mos_obj_refd_multiple(uint64_t); 173 static int dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t free, 174 dmu_tx_t *tx); 175 176 typedef struct sublivelist_verify { 177 /* FREE's that haven't yet matched to an ALLOC, in one sub-livelist */ 178 zfs_btree_t sv_pair; 179 180 /* ALLOC's without a matching FREE, accumulates across sub-livelists */ 181 zfs_btree_t sv_leftover; 182 } sublivelist_verify_t; 183 184 static int 185 livelist_compare(const void *larg, const void *rarg) 186 { 187 const blkptr_t *l = larg; 188 const blkptr_t *r = rarg; 189 190 /* Sort them according to dva[0] */ 191 uint64_t l_dva0_vdev, r_dva0_vdev; 192 l_dva0_vdev = DVA_GET_VDEV(&l->blk_dva[0]); 193 r_dva0_vdev = DVA_GET_VDEV(&r->blk_dva[0]); 194 if (l_dva0_vdev < r_dva0_vdev) 195 return (-1); 196 else if (l_dva0_vdev > r_dva0_vdev) 197 return (+1); 198 199 /* if vdevs are equal, sort by offsets. */ 200 uint64_t l_dva0_offset; 201 uint64_t r_dva0_offset; 202 l_dva0_offset = DVA_GET_OFFSET(&l->blk_dva[0]); 203 r_dva0_offset = DVA_GET_OFFSET(&r->blk_dva[0]); 204 if (l_dva0_offset < r_dva0_offset) { 205 return (-1); 206 } else if (l_dva0_offset > r_dva0_offset) { 207 return (+1); 208 } 209 210 /* 211 * Since we're storing blkptrs without cancelling FREE/ALLOC pairs, 212 * it's possible the offsets are equal. In that case, sort by txg 213 */ 214 if (l->blk_birth < r->blk_birth) { 215 return (-1); 216 } else if (l->blk_birth > r->blk_birth) { 217 return (+1); 218 } 219 return (0); 220 } 221 222 typedef struct sublivelist_verify_block { 223 dva_t svb_dva; 224 225 /* 226 * We need this to check if the block marked as allocated 227 * in the livelist was freed (and potentially reallocated) 228 * in the metaslab spacemaps at a later TXG. 229 */ 230 uint64_t svb_allocated_txg; 231 } sublivelist_verify_block_t; 232 233 static void zdb_print_blkptr(const blkptr_t *bp, int flags); 234 235 typedef struct sublivelist_verify_block_refcnt { 236 /* block pointer entry in livelist being verified */ 237 blkptr_t svbr_blk; 238 239 /* 240 * Refcount gets incremented to 1 when we encounter the first 241 * FREE entry for the svfbr block pointer and a node for it 242 * is created in our ZDB verification/tracking metadata. 243 * 244 * As we encounter more FREE entries we increment this counter 245 * and similarly decrement it whenever we find the respective 246 * ALLOC entries for this block. 247 * 248 * When the refcount gets to 0 it means that all the FREE and 249 * ALLOC entries of this block have paired up and we no longer 250 * need to track it in our verification logic (e.g. the node 251 * containing this struct in our verification data structure 252 * should be freed). 253 * 254 * [refer to sublivelist_verify_blkptr() for the actual code] 255 */ 256 uint32_t svbr_refcnt; 257 } sublivelist_verify_block_refcnt_t; 258 259 static int 260 sublivelist_block_refcnt_compare(const void *larg, const void *rarg) 261 { 262 const sublivelist_verify_block_refcnt_t *l = larg; 263 const sublivelist_verify_block_refcnt_t *r = rarg; 264 return (livelist_compare(&l->svbr_blk, &r->svbr_blk)); 265 } 266 267 static int 268 sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free, 269 dmu_tx_t *tx) 270 { 271 ASSERT3P(tx, ==, NULL); 272 struct sublivelist_verify *sv = arg; 273 sublivelist_verify_block_refcnt_t current = { 274 .svbr_blk = *bp, 275 276 /* 277 * Start with 1 in case this is the first free entry. 278 * This field is not used for our B-Tree comparisons 279 * anyway. 280 */ 281 .svbr_refcnt = 1, 282 }; 283 284 zfs_btree_index_t where; 285 sublivelist_verify_block_refcnt_t *pair = 286 zfs_btree_find(&sv->sv_pair, ¤t, &where); 287 if (free) { 288 if (pair == NULL) { 289 /* first free entry for this block pointer */ 290 zfs_btree_add(&sv->sv_pair, ¤t); 291 } else { 292 pair->svbr_refcnt++; 293 } 294 } else { 295 if (pair == NULL) { 296 /* block that is currently marked as allocated */ 297 for (int i = 0; i < SPA_DVAS_PER_BP; i++) { 298 if (DVA_IS_EMPTY(&bp->blk_dva[i])) 299 break; 300 sublivelist_verify_block_t svb = { 301 .svb_dva = bp->blk_dva[i], 302 .svb_allocated_txg = bp->blk_birth 303 }; 304 305 if (zfs_btree_find(&sv->sv_leftover, &svb, 306 &where) == NULL) { 307 zfs_btree_add_idx(&sv->sv_leftover, 308 &svb, &where); 309 } 310 } 311 } else { 312 /* alloc matches a free entry */ 313 pair->svbr_refcnt--; 314 if (pair->svbr_refcnt == 0) { 315 /* all allocs and frees have been matched */ 316 zfs_btree_remove_idx(&sv->sv_pair, &where); 317 } 318 } 319 } 320 321 return (0); 322 } 323 324 static int 325 sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle) 326 { 327 int err; 328 struct sublivelist_verify *sv = args; 329 330 zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare, NULL, 331 sizeof (sublivelist_verify_block_refcnt_t)); 332 333 err = bpobj_iterate_nofree(&dle->dle_bpobj, sublivelist_verify_blkptr, 334 sv, NULL); 335 336 sublivelist_verify_block_refcnt_t *e; 337 zfs_btree_index_t *cookie = NULL; 338 while ((e = zfs_btree_destroy_nodes(&sv->sv_pair, &cookie)) != NULL) { 339 char blkbuf[BP_SPRINTF_LEN]; 340 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), 341 &e->svbr_blk, B_TRUE); 342 (void) printf("\tERROR: %d unmatched FREE(s): %s\n", 343 e->svbr_refcnt, blkbuf); 344 } 345 zfs_btree_destroy(&sv->sv_pair); 346 347 return (err); 348 } 349 350 static int 351 livelist_block_compare(const void *larg, const void *rarg) 352 { 353 const sublivelist_verify_block_t *l = larg; 354 const sublivelist_verify_block_t *r = rarg; 355 356 if (DVA_GET_VDEV(&l->svb_dva) < DVA_GET_VDEV(&r->svb_dva)) 357 return (-1); 358 else if (DVA_GET_VDEV(&l->svb_dva) > DVA_GET_VDEV(&r->svb_dva)) 359 return (+1); 360 361 if (DVA_GET_OFFSET(&l->svb_dva) < DVA_GET_OFFSET(&r->svb_dva)) 362 return (-1); 363 else if (DVA_GET_OFFSET(&l->svb_dva) > DVA_GET_OFFSET(&r->svb_dva)) 364 return (+1); 365 366 if (DVA_GET_ASIZE(&l->svb_dva) < DVA_GET_ASIZE(&r->svb_dva)) 367 return (-1); 368 else if (DVA_GET_ASIZE(&l->svb_dva) > DVA_GET_ASIZE(&r->svb_dva)) 369 return (+1); 370 371 return (0); 372 } 373 374 /* 375 * Check for errors in a livelist while tracking all unfreed ALLOCs in the 376 * sublivelist_verify_t: sv->sv_leftover 377 */ 378 static void 379 livelist_verify(dsl_deadlist_t *dl, void *arg) 380 { 381 sublivelist_verify_t *sv = arg; 382 dsl_deadlist_iterate(dl, sublivelist_verify_func, sv); 383 } 384 385 /* 386 * Check for errors in the livelist entry and discard the intermediary 387 * data structures 388 */ 389 static int 390 sublivelist_verify_lightweight(void *args, dsl_deadlist_entry_t *dle) 391 { 392 (void) args; 393 sublivelist_verify_t sv; 394 zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL, 395 sizeof (sublivelist_verify_block_t)); 396 int err = sublivelist_verify_func(&sv, dle); 397 zfs_btree_clear(&sv.sv_leftover); 398 zfs_btree_destroy(&sv.sv_leftover); 399 return (err); 400 } 401 402 typedef struct metaslab_verify { 403 /* 404 * Tree containing all the leftover ALLOCs from the livelists 405 * that are part of this metaslab. 406 */ 407 zfs_btree_t mv_livelist_allocs; 408 409 /* 410 * Metaslab information. 411 */ 412 uint64_t mv_vdid; 413 uint64_t mv_msid; 414 uint64_t mv_start; 415 uint64_t mv_end; 416 417 /* 418 * What's currently allocated for this metaslab. 419 */ 420 range_tree_t *mv_allocated; 421 } metaslab_verify_t; 422 423 typedef void ll_iter_t(dsl_deadlist_t *ll, void *arg); 424 425 typedef int (*zdb_log_sm_cb_t)(spa_t *spa, space_map_entry_t *sme, uint64_t txg, 426 void *arg); 427 428 typedef struct unflushed_iter_cb_arg { 429 spa_t *uic_spa; 430 uint64_t uic_txg; 431 void *uic_arg; 432 zdb_log_sm_cb_t uic_cb; 433 } unflushed_iter_cb_arg_t; 434 435 static int 436 iterate_through_spacemap_logs_cb(space_map_entry_t *sme, void *arg) 437 { 438 unflushed_iter_cb_arg_t *uic = arg; 439 return (uic->uic_cb(uic->uic_spa, sme, uic->uic_txg, uic->uic_arg)); 440 } 441 442 static void 443 iterate_through_spacemap_logs(spa_t *spa, zdb_log_sm_cb_t cb, void *arg) 444 { 445 if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) 446 return; 447 448 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 449 for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg); 450 sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) { 451 space_map_t *sm = NULL; 452 VERIFY0(space_map_open(&sm, spa_meta_objset(spa), 453 sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT)); 454 455 unflushed_iter_cb_arg_t uic = { 456 .uic_spa = spa, 457 .uic_txg = sls->sls_txg, 458 .uic_arg = arg, 459 .uic_cb = cb 460 }; 461 VERIFY0(space_map_iterate(sm, space_map_length(sm), 462 iterate_through_spacemap_logs_cb, &uic)); 463 space_map_close(sm); 464 } 465 spa_config_exit(spa, SCL_CONFIG, FTAG); 466 } 467 468 static void 469 verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg, 470 uint64_t offset, uint64_t size) 471 { 472 sublivelist_verify_block_t svb = {{{0}}}; 473 DVA_SET_VDEV(&svb.svb_dva, mv->mv_vdid); 474 DVA_SET_OFFSET(&svb.svb_dva, offset); 475 DVA_SET_ASIZE(&svb.svb_dva, size); 476 zfs_btree_index_t where; 477 uint64_t end_offset = offset + size; 478 479 /* 480 * Look for an exact match for spacemap entry in the livelist entries. 481 * Then, look for other livelist entries that fall within the range 482 * of the spacemap entry as it may have been condensed 483 */ 484 sublivelist_verify_block_t *found = 485 zfs_btree_find(&mv->mv_livelist_allocs, &svb, &where); 486 if (found == NULL) { 487 found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where); 488 } 489 for (; found != NULL && DVA_GET_VDEV(&found->svb_dva) == mv->mv_vdid && 490 DVA_GET_OFFSET(&found->svb_dva) < end_offset; 491 found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) { 492 if (found->svb_allocated_txg <= txg) { 493 (void) printf("ERROR: Livelist ALLOC [%llx:%llx] " 494 "from TXG %llx FREED at TXG %llx\n", 495 (u_longlong_t)DVA_GET_OFFSET(&found->svb_dva), 496 (u_longlong_t)DVA_GET_ASIZE(&found->svb_dva), 497 (u_longlong_t)found->svb_allocated_txg, 498 (u_longlong_t)txg); 499 } 500 } 501 } 502 503 static int 504 metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg) 505 { 506 metaslab_verify_t *mv = arg; 507 uint64_t offset = sme->sme_offset; 508 uint64_t size = sme->sme_run; 509 uint64_t txg = sme->sme_txg; 510 511 if (sme->sme_type == SM_ALLOC) { 512 if (range_tree_contains(mv->mv_allocated, 513 offset, size)) { 514 (void) printf("ERROR: DOUBLE ALLOC: " 515 "%llu [%llx:%llx] " 516 "%llu:%llu LOG_SM\n", 517 (u_longlong_t)txg, (u_longlong_t)offset, 518 (u_longlong_t)size, (u_longlong_t)mv->mv_vdid, 519 (u_longlong_t)mv->mv_msid); 520 } else { 521 range_tree_add(mv->mv_allocated, 522 offset, size); 523 } 524 } else { 525 if (!range_tree_contains(mv->mv_allocated, 526 offset, size)) { 527 (void) printf("ERROR: DOUBLE FREE: " 528 "%llu [%llx:%llx] " 529 "%llu:%llu LOG_SM\n", 530 (u_longlong_t)txg, (u_longlong_t)offset, 531 (u_longlong_t)size, (u_longlong_t)mv->mv_vdid, 532 (u_longlong_t)mv->mv_msid); 533 } else { 534 range_tree_remove(mv->mv_allocated, 535 offset, size); 536 } 537 } 538 539 if (sme->sme_type != SM_ALLOC) { 540 /* 541 * If something is freed in the spacemap, verify that 542 * it is not listed as allocated in the livelist. 543 */ 544 verify_livelist_allocs(mv, txg, offset, size); 545 } 546 return (0); 547 } 548 549 static int 550 spacemap_check_sm_log_cb(spa_t *spa, space_map_entry_t *sme, 551 uint64_t txg, void *arg) 552 { 553 metaslab_verify_t *mv = arg; 554 uint64_t offset = sme->sme_offset; 555 uint64_t vdev_id = sme->sme_vdev; 556 557 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 558 559 /* skip indirect vdevs */ 560 if (!vdev_is_concrete(vd)) 561 return (0); 562 563 if (vdev_id != mv->mv_vdid) 564 return (0); 565 566 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 567 if (ms->ms_id != mv->mv_msid) 568 return (0); 569 570 if (txg < metaslab_unflushed_txg(ms)) 571 return (0); 572 573 574 ASSERT3U(txg, ==, sme->sme_txg); 575 return (metaslab_spacemap_validation_cb(sme, mv)); 576 } 577 578 static void 579 spacemap_check_sm_log(spa_t *spa, metaslab_verify_t *mv) 580 { 581 iterate_through_spacemap_logs(spa, spacemap_check_sm_log_cb, mv); 582 } 583 584 static void 585 spacemap_check_ms_sm(space_map_t *sm, metaslab_verify_t *mv) 586 { 587 if (sm == NULL) 588 return; 589 590 VERIFY0(space_map_iterate(sm, space_map_length(sm), 591 metaslab_spacemap_validation_cb, mv)); 592 } 593 594 static void iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg); 595 596 /* 597 * Transfer blocks from sv_leftover tree to the mv_livelist_allocs if 598 * they are part of that metaslab (mv_msid). 599 */ 600 static void 601 mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv) 602 { 603 zfs_btree_index_t where; 604 sublivelist_verify_block_t *svb; 605 ASSERT3U(zfs_btree_numnodes(&mv->mv_livelist_allocs), ==, 0); 606 for (svb = zfs_btree_first(&sv->sv_leftover, &where); 607 svb != NULL; 608 svb = zfs_btree_next(&sv->sv_leftover, &where, &where)) { 609 if (DVA_GET_VDEV(&svb->svb_dva) != mv->mv_vdid) 610 continue; 611 612 if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start && 613 (DVA_GET_OFFSET(&svb->svb_dva) + 614 DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_start) { 615 (void) printf("ERROR: Found block that crosses " 616 "metaslab boundary: <%llu:%llx:%llx>\n", 617 (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva), 618 (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), 619 (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva)); 620 continue; 621 } 622 623 if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start) 624 continue; 625 626 if (DVA_GET_OFFSET(&svb->svb_dva) >= mv->mv_end) 627 continue; 628 629 if ((DVA_GET_OFFSET(&svb->svb_dva) + 630 DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_end) { 631 (void) printf("ERROR: Found block that crosses " 632 "metaslab boundary: <%llu:%llx:%llx>\n", 633 (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva), 634 (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), 635 (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva)); 636 continue; 637 } 638 639 zfs_btree_add(&mv->mv_livelist_allocs, svb); 640 } 641 642 for (svb = zfs_btree_first(&mv->mv_livelist_allocs, &where); 643 svb != NULL; 644 svb = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) { 645 zfs_btree_remove(&sv->sv_leftover, svb); 646 } 647 } 648 649 /* 650 * [Livelist Check] 651 * Iterate through all the sublivelists and: 652 * - report leftover frees (**) 653 * - record leftover ALLOCs together with their TXG [see Cross Check] 654 * 655 * (**) Note: Double ALLOCs are valid in datasets that have dedup 656 * enabled. Similarly double FREEs are allowed as well but 657 * only if they pair up with a corresponding ALLOC entry once 658 * we our done with our sublivelist iteration. 659 * 660 * [Spacemap Check] 661 * for each metaslab: 662 * - iterate over spacemap and then the metaslab's entries in the 663 * spacemap log, then report any double FREEs and ALLOCs (do not 664 * blow up). 665 * 666 * [Cross Check] 667 * After finishing the Livelist Check phase and while being in the 668 * Spacemap Check phase, we find all the recorded leftover ALLOCs 669 * of the livelist check that are part of the metaslab that we are 670 * currently looking at in the Spacemap Check. We report any entries 671 * that are marked as ALLOCs in the livelists but have been actually 672 * freed (and potentially allocated again) after their TXG stamp in 673 * the spacemaps. Also report any ALLOCs from the livelists that 674 * belong to indirect vdevs (e.g. their vdev completed removal). 675 * 676 * Note that this will miss Log Spacemap entries that cancelled each other 677 * out before being flushed to the metaslab, so we are not guaranteed 678 * to match all erroneous ALLOCs. 679 */ 680 static void 681 livelist_metaslab_validate(spa_t *spa) 682 { 683 (void) printf("Verifying deleted livelist entries\n"); 684 685 sublivelist_verify_t sv; 686 zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL, 687 sizeof (sublivelist_verify_block_t)); 688 iterate_deleted_livelists(spa, livelist_verify, &sv); 689 690 (void) printf("Verifying metaslab entries\n"); 691 vdev_t *rvd = spa->spa_root_vdev; 692 for (uint64_t c = 0; c < rvd->vdev_children; c++) { 693 vdev_t *vd = rvd->vdev_child[c]; 694 695 if (!vdev_is_concrete(vd)) 696 continue; 697 698 for (uint64_t mid = 0; mid < vd->vdev_ms_count; mid++) { 699 metaslab_t *m = vd->vdev_ms[mid]; 700 701 (void) fprintf(stderr, 702 "\rverifying concrete vdev %llu, " 703 "metaslab %llu of %llu ...", 704 (longlong_t)vd->vdev_id, 705 (longlong_t)mid, 706 (longlong_t)vd->vdev_ms_count); 707 708 uint64_t shift, start; 709 range_seg_type_t type = 710 metaslab_calculate_range_tree_type(vd, m, 711 &start, &shift); 712 metaslab_verify_t mv; 713 mv.mv_allocated = range_tree_create(NULL, 714 type, NULL, start, shift); 715 mv.mv_vdid = vd->vdev_id; 716 mv.mv_msid = m->ms_id; 717 mv.mv_start = m->ms_start; 718 mv.mv_end = m->ms_start + m->ms_size; 719 zfs_btree_create(&mv.mv_livelist_allocs, 720 livelist_block_compare, NULL, 721 sizeof (sublivelist_verify_block_t)); 722 723 mv_populate_livelist_allocs(&mv, &sv); 724 725 spacemap_check_ms_sm(m->ms_sm, &mv); 726 spacemap_check_sm_log(spa, &mv); 727 728 range_tree_vacate(mv.mv_allocated, NULL, NULL); 729 range_tree_destroy(mv.mv_allocated); 730 zfs_btree_clear(&mv.mv_livelist_allocs); 731 zfs_btree_destroy(&mv.mv_livelist_allocs); 732 } 733 } 734 (void) fprintf(stderr, "\n"); 735 736 /* 737 * If there are any segments in the leftover tree after we walked 738 * through all the metaslabs in the concrete vdevs then this means 739 * that we have segments in the livelists that belong to indirect 740 * vdevs and are marked as allocated. 741 */ 742 if (zfs_btree_numnodes(&sv.sv_leftover) == 0) { 743 zfs_btree_destroy(&sv.sv_leftover); 744 return; 745 } 746 (void) printf("ERROR: Found livelist blocks marked as allocated " 747 "for indirect vdevs:\n"); 748 749 zfs_btree_index_t *where = NULL; 750 sublivelist_verify_block_t *svb; 751 while ((svb = zfs_btree_destroy_nodes(&sv.sv_leftover, &where)) != 752 NULL) { 753 int vdev_id = DVA_GET_VDEV(&svb->svb_dva); 754 ASSERT3U(vdev_id, <, rvd->vdev_children); 755 vdev_t *vd = rvd->vdev_child[vdev_id]; 756 ASSERT(!vdev_is_concrete(vd)); 757 (void) printf("<%d:%llx:%llx> TXG %llx\n", 758 vdev_id, (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), 759 (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva), 760 (u_longlong_t)svb->svb_allocated_txg); 761 } 762 (void) printf("\n"); 763 zfs_btree_destroy(&sv.sv_leftover); 764 } 765 766 /* 767 * These libumem hooks provide a reasonable set of defaults for the allocator's 768 * debugging facilities. 769 */ 770 const char * 771 _umem_debug_init(void) 772 { 773 return ("default,verbose"); /* $UMEM_DEBUG setting */ 774 } 775 776 const char * 777 _umem_logging_init(void) 778 { 779 return ("fail,contents"); /* $UMEM_LOGGING setting */ 780 } 781 782 static void 783 usage(void) 784 { 785 (void) fprintf(stderr, 786 "Usage:\t%s [-AbcdDFGhikLMPsvXy] [-e [-V] [-p <path> ...]] " 787 "[-I <inflight I/Os>]\n" 788 "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n" 789 "\t\t[-K <key>]\n" 790 "\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]]\n" 791 "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] [-K <key>]\n" 792 "\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]\n" 793 "\t%s -B [-e [-V] [-p <path> ...]] [-I <inflight I/Os>]\n" 794 "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n" 795 "\t\t[-K <key>] <poolname>/<objset id> [<backupflags>]\n" 796 "\t%s [-v] <bookmark>\n" 797 "\t%s -C [-A] [-U <cache>] [<poolname>]\n" 798 "\t%s -l [-Aqu] <device>\n" 799 "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] " 800 "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n" 801 "\t%s -O [-K <key>] <dataset> <path>\n" 802 "\t%s -r [-K <key>] <dataset> <path> <destination>\n" 803 "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n" 804 "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n" 805 "\t%s -E [-A] word0:word1:...:word15\n" 806 "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] " 807 "<poolname>\n\n", 808 cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, 809 cmdname, cmdname, cmdname, cmdname, cmdname); 810 811 (void) fprintf(stderr, " Dataset name must include at least one " 812 "separator character '/' or '@'\n"); 813 (void) fprintf(stderr, " If dataset name is specified, only that " 814 "dataset is dumped\n"); 815 (void) fprintf(stderr, " If object numbers or object number " 816 "ranges are specified, only those\n" 817 " objects or ranges are dumped.\n\n"); 818 (void) fprintf(stderr, 819 " Object ranges take the form <start>:<end>[:<flags>]\n" 820 " start Starting object number\n" 821 " end Ending object number, or -1 for no upper bound\n" 822 " flags Optional flags to select object types:\n" 823 " A All objects (this is the default)\n" 824 " d ZFS directories\n" 825 " f ZFS files \n" 826 " m SPA space maps\n" 827 " z ZAPs\n" 828 " - Negate effect of next flag\n\n"); 829 (void) fprintf(stderr, " Options to control amount of output:\n"); 830 (void) fprintf(stderr, " -b --block-stats " 831 "block statistics\n"); 832 (void) fprintf(stderr, " -B --backup " 833 "backup stream\n"); 834 (void) fprintf(stderr, " -c --checksum " 835 "checksum all metadata (twice for all data) blocks\n"); 836 (void) fprintf(stderr, " -C --config " 837 "config (or cachefile if alone)\n"); 838 (void) fprintf(stderr, " -d --datasets " 839 "dataset(s)\n"); 840 (void) fprintf(stderr, " -D --dedup-stats " 841 "dedup statistics\n"); 842 (void) fprintf(stderr, " -E --embedded-block-pointer=INTEGER\n" 843 " decode and display block " 844 "from an embedded block pointer\n"); 845 (void) fprintf(stderr, " -h --history " 846 "pool history\n"); 847 (void) fprintf(stderr, " -i --intent-logs " 848 "intent logs\n"); 849 (void) fprintf(stderr, " -l --label " 850 "read label contents\n"); 851 (void) fprintf(stderr, " -k --checkpointed-state " 852 "examine the checkpointed state of the pool\n"); 853 (void) fprintf(stderr, " -L --disable-leak-tracking " 854 "disable leak tracking (do not load spacemaps)\n"); 855 (void) fprintf(stderr, " -m --metaslabs " 856 "metaslabs\n"); 857 (void) fprintf(stderr, " -M --metaslab-groups " 858 "metaslab groups\n"); 859 (void) fprintf(stderr, " -O --object-lookups " 860 "perform object lookups by path\n"); 861 (void) fprintf(stderr, " -r --copy-object " 862 "copy an object by path to file\n"); 863 (void) fprintf(stderr, " -R --read-block " 864 "read and display block from a device\n"); 865 (void) fprintf(stderr, " -s --io-stats " 866 "report stats on zdb's I/O\n"); 867 (void) fprintf(stderr, " -S --simulate-dedup " 868 "simulate dedup to measure effect\n"); 869 (void) fprintf(stderr, " -v --verbose " 870 "verbose (applies to all others)\n"); 871 (void) fprintf(stderr, " -y --livelist " 872 "perform livelist and metaslab validation on any livelists being " 873 "deleted\n\n"); 874 (void) fprintf(stderr, " Below options are intended for use " 875 "with other options:\n"); 876 (void) fprintf(stderr, " -A --ignore-assertions " 877 "ignore assertions (-A), enable panic recovery (-AA) or both " 878 "(-AAA)\n"); 879 (void) fprintf(stderr, " -e --exported " 880 "pool is exported/destroyed/has altroot/not in a cachefile\n"); 881 (void) fprintf(stderr, " -F --automatic-rewind " 882 "attempt automatic rewind within safe range of transaction " 883 "groups\n"); 884 (void) fprintf(stderr, " -G --dump-debug-msg " 885 "dump zfs_dbgmsg buffer before exiting\n"); 886 (void) fprintf(stderr, " -I --inflight=INTEGER " 887 "specify the maximum number of checksumming I/Os " 888 "[default is 200]\n"); 889 (void) fprintf(stderr, " -K --key=KEY " 890 "decryption key for encrypted dataset\n"); 891 (void) fprintf(stderr, " -o --option=\"OPTION=INTEGER\" " 892 "set global variable to an unsigned 32-bit integer\n"); 893 (void) fprintf(stderr, " -p --path==PATH " 894 "use one or more with -e to specify path to vdev dir\n"); 895 (void) fprintf(stderr, " -P --parseable " 896 "print numbers in parseable form\n"); 897 (void) fprintf(stderr, " -q --skip-label " 898 "don't print label contents\n"); 899 (void) fprintf(stderr, " -t --txg=INTEGER " 900 "highest txg to use when searching for uberblocks\n"); 901 (void) fprintf(stderr, " -u --uberblock " 902 "uberblock\n"); 903 (void) fprintf(stderr, " -U --cachefile=PATH " 904 "use alternate cachefile\n"); 905 (void) fprintf(stderr, " -V --verbatim " 906 "do verbatim import\n"); 907 (void) fprintf(stderr, " -x --dump-blocks=PATH " 908 "dump all read blocks into specified directory\n"); 909 (void) fprintf(stderr, " -X --extreme-rewind " 910 "attempt extreme rewind (does not work with dataset)\n"); 911 (void) fprintf(stderr, " -Y --all-reconstruction " 912 "attempt all reconstruction combinations for split blocks\n"); 913 (void) fprintf(stderr, " -Z --zstd-headers " 914 "show ZSTD headers \n"); 915 (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " 916 "to make only that option verbose\n"); 917 (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); 918 exit(1); 919 } 920 921 static void 922 dump_debug_buffer(void) 923 { 924 if (dump_opt['G']) { 925 (void) printf("\n"); 926 (void) fflush(stdout); 927 zfs_dbgmsg_print("zdb"); 928 } 929 } 930 931 /* 932 * Called for usage errors that are discovered after a call to spa_open(), 933 * dmu_bonus_hold(), or pool_match(). abort() is called for other errors. 934 */ 935 936 static void 937 fatal(const char *fmt, ...) 938 { 939 va_list ap; 940 941 va_start(ap, fmt); 942 (void) fprintf(stderr, "%s: ", cmdname); 943 (void) vfprintf(stderr, fmt, ap); 944 va_end(ap); 945 (void) fprintf(stderr, "\n"); 946 947 dump_debug_buffer(); 948 949 exit(1); 950 } 951 952 static void 953 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) 954 { 955 (void) size; 956 nvlist_t *nv; 957 size_t nvsize = *(uint64_t *)data; 958 char *packed = umem_alloc(nvsize, UMEM_NOFAIL); 959 960 VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH)); 961 962 VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0); 963 964 umem_free(packed, nvsize); 965 966 dump_nvlist(nv, 8); 967 968 nvlist_free(nv); 969 } 970 971 static void 972 dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size) 973 { 974 (void) os, (void) object, (void) size; 975 spa_history_phys_t *shp = data; 976 977 if (shp == NULL) 978 return; 979 980 (void) printf("\t\tpool_create_len = %llu\n", 981 (u_longlong_t)shp->sh_pool_create_len); 982 (void) printf("\t\tphys_max_off = %llu\n", 983 (u_longlong_t)shp->sh_phys_max_off); 984 (void) printf("\t\tbof = %llu\n", 985 (u_longlong_t)shp->sh_bof); 986 (void) printf("\t\teof = %llu\n", 987 (u_longlong_t)shp->sh_eof); 988 (void) printf("\t\trecords_lost = %llu\n", 989 (u_longlong_t)shp->sh_records_lost); 990 } 991 992 static void 993 zdb_nicenum(uint64_t num, char *buf, size_t buflen) 994 { 995 if (dump_opt['P']) 996 (void) snprintf(buf, buflen, "%llu", (longlong_t)num); 997 else 998 nicenum(num, buf, buflen); 999 } 1000 1001 static const char histo_stars[] = "****************************************"; 1002 static const uint64_t histo_width = sizeof (histo_stars) - 1; 1003 1004 static void 1005 dump_histogram(const uint64_t *histo, int size, int offset) 1006 { 1007 int i; 1008 int minidx = size - 1; 1009 int maxidx = 0; 1010 uint64_t max = 0; 1011 1012 for (i = 0; i < size; i++) { 1013 if (histo[i] == 0) 1014 continue; 1015 if (histo[i] > max) 1016 max = histo[i]; 1017 if (i > maxidx) 1018 maxidx = i; 1019 if (i < minidx) 1020 minidx = i; 1021 } 1022 1023 if (max < histo_width) 1024 max = histo_width; 1025 1026 for (i = minidx; i <= maxidx; i++) { 1027 (void) printf("\t\t\t%3u: %6llu %s\n", 1028 i + offset, (u_longlong_t)histo[i], 1029 &histo_stars[(max - histo[i]) * histo_width / max]); 1030 } 1031 } 1032 1033 static void 1034 dump_zap_stats(objset_t *os, uint64_t object) 1035 { 1036 int error; 1037 zap_stats_t zs; 1038 1039 error = zap_get_stats(os, object, &zs); 1040 if (error) 1041 return; 1042 1043 if (zs.zs_ptrtbl_len == 0) { 1044 ASSERT(zs.zs_num_blocks == 1); 1045 (void) printf("\tmicrozap: %llu bytes, %llu entries\n", 1046 (u_longlong_t)zs.zs_blocksize, 1047 (u_longlong_t)zs.zs_num_entries); 1048 return; 1049 } 1050 1051 (void) printf("\tFat ZAP stats:\n"); 1052 1053 (void) printf("\t\tPointer table:\n"); 1054 (void) printf("\t\t\t%llu elements\n", 1055 (u_longlong_t)zs.zs_ptrtbl_len); 1056 (void) printf("\t\t\tzt_blk: %llu\n", 1057 (u_longlong_t)zs.zs_ptrtbl_zt_blk); 1058 (void) printf("\t\t\tzt_numblks: %llu\n", 1059 (u_longlong_t)zs.zs_ptrtbl_zt_numblks); 1060 (void) printf("\t\t\tzt_shift: %llu\n", 1061 (u_longlong_t)zs.zs_ptrtbl_zt_shift); 1062 (void) printf("\t\t\tzt_blks_copied: %llu\n", 1063 (u_longlong_t)zs.zs_ptrtbl_blks_copied); 1064 (void) printf("\t\t\tzt_nextblk: %llu\n", 1065 (u_longlong_t)zs.zs_ptrtbl_nextblk); 1066 1067 (void) printf("\t\tZAP entries: %llu\n", 1068 (u_longlong_t)zs.zs_num_entries); 1069 (void) printf("\t\tLeaf blocks: %llu\n", 1070 (u_longlong_t)zs.zs_num_leafs); 1071 (void) printf("\t\tTotal blocks: %llu\n", 1072 (u_longlong_t)zs.zs_num_blocks); 1073 (void) printf("\t\tzap_block_type: 0x%llx\n", 1074 (u_longlong_t)zs.zs_block_type); 1075 (void) printf("\t\tzap_magic: 0x%llx\n", 1076 (u_longlong_t)zs.zs_magic); 1077 (void) printf("\t\tzap_salt: 0x%llx\n", 1078 (u_longlong_t)zs.zs_salt); 1079 1080 (void) printf("\t\tLeafs with 2^n pointers:\n"); 1081 dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0); 1082 1083 (void) printf("\t\tBlocks with n*5 entries:\n"); 1084 dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0); 1085 1086 (void) printf("\t\tBlocks n/10 full:\n"); 1087 dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0); 1088 1089 (void) printf("\t\tEntries with n chunks:\n"); 1090 dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0); 1091 1092 (void) printf("\t\tBuckets with n entries:\n"); 1093 dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0); 1094 } 1095 1096 static void 1097 dump_none(objset_t *os, uint64_t object, void *data, size_t size) 1098 { 1099 (void) os, (void) object, (void) data, (void) size; 1100 } 1101 1102 static void 1103 dump_unknown(objset_t *os, uint64_t object, void *data, size_t size) 1104 { 1105 (void) os, (void) object, (void) data, (void) size; 1106 (void) printf("\tUNKNOWN OBJECT TYPE\n"); 1107 } 1108 1109 static void 1110 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size) 1111 { 1112 (void) os, (void) object, (void) data, (void) size; 1113 } 1114 1115 static void 1116 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size) 1117 { 1118 uint64_t *arr; 1119 uint64_t oursize; 1120 if (dump_opt['d'] < 6) 1121 return; 1122 1123 if (data == NULL) { 1124 dmu_object_info_t doi; 1125 1126 VERIFY0(dmu_object_info(os, object, &doi)); 1127 size = doi.doi_max_offset; 1128 /* 1129 * We cap the size at 1 mebibyte here to prevent 1130 * allocation failures and nigh-infinite printing if the 1131 * object is extremely large. 1132 */ 1133 oursize = MIN(size, 1 << 20); 1134 arr = kmem_alloc(oursize, KM_SLEEP); 1135 1136 int err = dmu_read(os, object, 0, oursize, arr, 0); 1137 if (err != 0) { 1138 (void) printf("got error %u from dmu_read\n", err); 1139 kmem_free(arr, oursize); 1140 return; 1141 } 1142 } else { 1143 /* 1144 * Even though the allocation is already done in this code path, 1145 * we still cap the size to prevent excessive printing. 1146 */ 1147 oursize = MIN(size, 1 << 20); 1148 arr = data; 1149 } 1150 1151 if (size == 0) { 1152 if (data == NULL) 1153 kmem_free(arr, oursize); 1154 (void) printf("\t\t[]\n"); 1155 return; 1156 } 1157 1158 (void) printf("\t\t[%0llx", (u_longlong_t)arr[0]); 1159 for (size_t i = 1; i * sizeof (uint64_t) < oursize; i++) { 1160 if (i % 4 != 0) 1161 (void) printf(", %0llx", (u_longlong_t)arr[i]); 1162 else 1163 (void) printf(",\n\t\t%0llx", (u_longlong_t)arr[i]); 1164 } 1165 if (oursize != size) 1166 (void) printf(", ... "); 1167 (void) printf("]\n"); 1168 1169 if (data == NULL) 1170 kmem_free(arr, oursize); 1171 } 1172 1173 static void 1174 dump_zap(objset_t *os, uint64_t object, void *data, size_t size) 1175 { 1176 (void) data, (void) size; 1177 zap_cursor_t zc; 1178 zap_attribute_t attr; 1179 void *prop; 1180 unsigned i; 1181 1182 dump_zap_stats(os, object); 1183 (void) printf("\n"); 1184 1185 for (zap_cursor_init(&zc, os, object); 1186 zap_cursor_retrieve(&zc, &attr) == 0; 1187 zap_cursor_advance(&zc)) { 1188 (void) printf("\t\t%s = ", attr.za_name); 1189 if (attr.za_num_integers == 0) { 1190 (void) printf("\n"); 1191 continue; 1192 } 1193 prop = umem_zalloc(attr.za_num_integers * 1194 attr.za_integer_length, UMEM_NOFAIL); 1195 (void) zap_lookup(os, object, attr.za_name, 1196 attr.za_integer_length, attr.za_num_integers, prop); 1197 if (attr.za_integer_length == 1) { 1198 if (strcmp(attr.za_name, 1199 DSL_CRYPTO_KEY_MASTER_KEY) == 0 || 1200 strcmp(attr.za_name, 1201 DSL_CRYPTO_KEY_HMAC_KEY) == 0 || 1202 strcmp(attr.za_name, DSL_CRYPTO_KEY_IV) == 0 || 1203 strcmp(attr.za_name, DSL_CRYPTO_KEY_MAC) == 0 || 1204 strcmp(attr.za_name, DMU_POOL_CHECKSUM_SALT) == 0) { 1205 uint8_t *u8 = prop; 1206 1207 for (i = 0; i < attr.za_num_integers; i++) { 1208 (void) printf("%02x", u8[i]); 1209 } 1210 } else { 1211 (void) printf("%s", (char *)prop); 1212 } 1213 } else { 1214 for (i = 0; i < attr.za_num_integers; i++) { 1215 switch (attr.za_integer_length) { 1216 case 2: 1217 (void) printf("%u ", 1218 ((uint16_t *)prop)[i]); 1219 break; 1220 case 4: 1221 (void) printf("%u ", 1222 ((uint32_t *)prop)[i]); 1223 break; 1224 case 8: 1225 (void) printf("%lld ", 1226 (u_longlong_t)((int64_t *)prop)[i]); 1227 break; 1228 } 1229 } 1230 } 1231 (void) printf("\n"); 1232 umem_free(prop, attr.za_num_integers * attr.za_integer_length); 1233 } 1234 zap_cursor_fini(&zc); 1235 } 1236 1237 static void 1238 dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size) 1239 { 1240 bpobj_phys_t *bpop = data; 1241 uint64_t i; 1242 char bytes[32], comp[32], uncomp[32]; 1243 1244 /* make sure the output won't get truncated */ 1245 _Static_assert(sizeof (bytes) >= NN_NUMBUF_SZ, "bytes truncated"); 1246 _Static_assert(sizeof (comp) >= NN_NUMBUF_SZ, "comp truncated"); 1247 _Static_assert(sizeof (uncomp) >= NN_NUMBUF_SZ, "uncomp truncated"); 1248 1249 if (bpop == NULL) 1250 return; 1251 1252 zdb_nicenum(bpop->bpo_bytes, bytes, sizeof (bytes)); 1253 zdb_nicenum(bpop->bpo_comp, comp, sizeof (comp)); 1254 zdb_nicenum(bpop->bpo_uncomp, uncomp, sizeof (uncomp)); 1255 1256 (void) printf("\t\tnum_blkptrs = %llu\n", 1257 (u_longlong_t)bpop->bpo_num_blkptrs); 1258 (void) printf("\t\tbytes = %s\n", bytes); 1259 if (size >= BPOBJ_SIZE_V1) { 1260 (void) printf("\t\tcomp = %s\n", comp); 1261 (void) printf("\t\tuncomp = %s\n", uncomp); 1262 } 1263 if (size >= BPOBJ_SIZE_V2) { 1264 (void) printf("\t\tsubobjs = %llu\n", 1265 (u_longlong_t)bpop->bpo_subobjs); 1266 (void) printf("\t\tnum_subobjs = %llu\n", 1267 (u_longlong_t)bpop->bpo_num_subobjs); 1268 } 1269 if (size >= sizeof (*bpop)) { 1270 (void) printf("\t\tnum_freed = %llu\n", 1271 (u_longlong_t)bpop->bpo_num_freed); 1272 } 1273 1274 if (dump_opt['d'] < 5) 1275 return; 1276 1277 for (i = 0; i < bpop->bpo_num_blkptrs; i++) { 1278 char blkbuf[BP_SPRINTF_LEN]; 1279 blkptr_t bp; 1280 1281 int err = dmu_read(os, object, 1282 i * sizeof (bp), sizeof (bp), &bp, 0); 1283 if (err != 0) { 1284 (void) printf("got error %u from dmu_read\n", err); 1285 break; 1286 } 1287 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp, 1288 BP_GET_FREE(&bp)); 1289 (void) printf("\t%s\n", blkbuf); 1290 } 1291 } 1292 1293 static void 1294 dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size) 1295 { 1296 (void) data, (void) size; 1297 dmu_object_info_t doi; 1298 int64_t i; 1299 1300 VERIFY0(dmu_object_info(os, object, &doi)); 1301 uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP); 1302 1303 int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0); 1304 if (err != 0) { 1305 (void) printf("got error %u from dmu_read\n", err); 1306 kmem_free(subobjs, doi.doi_max_offset); 1307 return; 1308 } 1309 1310 int64_t last_nonzero = -1; 1311 for (i = 0; i < doi.doi_max_offset / 8; i++) { 1312 if (subobjs[i] != 0) 1313 last_nonzero = i; 1314 } 1315 1316 for (i = 0; i <= last_nonzero; i++) { 1317 (void) printf("\t%llu\n", (u_longlong_t)subobjs[i]); 1318 } 1319 kmem_free(subobjs, doi.doi_max_offset); 1320 } 1321 1322 static void 1323 dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size) 1324 { 1325 (void) data, (void) size; 1326 dump_zap_stats(os, object); 1327 /* contents are printed elsewhere, properly decoded */ 1328 } 1329 1330 static void 1331 dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size) 1332 { 1333 (void) data, (void) size; 1334 zap_cursor_t zc; 1335 zap_attribute_t attr; 1336 1337 dump_zap_stats(os, object); 1338 (void) printf("\n"); 1339 1340 for (zap_cursor_init(&zc, os, object); 1341 zap_cursor_retrieve(&zc, &attr) == 0; 1342 zap_cursor_advance(&zc)) { 1343 (void) printf("\t\t%s = ", attr.za_name); 1344 if (attr.za_num_integers == 0) { 1345 (void) printf("\n"); 1346 continue; 1347 } 1348 (void) printf(" %llx : [%d:%d:%d]\n", 1349 (u_longlong_t)attr.za_first_integer, 1350 (int)ATTR_LENGTH(attr.za_first_integer), 1351 (int)ATTR_BSWAP(attr.za_first_integer), 1352 (int)ATTR_NUM(attr.za_first_integer)); 1353 } 1354 zap_cursor_fini(&zc); 1355 } 1356 1357 static void 1358 dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size) 1359 { 1360 (void) data, (void) size; 1361 zap_cursor_t zc; 1362 zap_attribute_t attr; 1363 uint16_t *layout_attrs; 1364 unsigned i; 1365 1366 dump_zap_stats(os, object); 1367 (void) printf("\n"); 1368 1369 for (zap_cursor_init(&zc, os, object); 1370 zap_cursor_retrieve(&zc, &attr) == 0; 1371 zap_cursor_advance(&zc)) { 1372 (void) printf("\t\t%s = [", attr.za_name); 1373 if (attr.za_num_integers == 0) { 1374 (void) printf("\n"); 1375 continue; 1376 } 1377 1378 VERIFY(attr.za_integer_length == 2); 1379 layout_attrs = umem_zalloc(attr.za_num_integers * 1380 attr.za_integer_length, UMEM_NOFAIL); 1381 1382 VERIFY(zap_lookup(os, object, attr.za_name, 1383 attr.za_integer_length, 1384 attr.za_num_integers, layout_attrs) == 0); 1385 1386 for (i = 0; i != attr.za_num_integers; i++) 1387 (void) printf(" %d ", (int)layout_attrs[i]); 1388 (void) printf("]\n"); 1389 umem_free(layout_attrs, 1390 attr.za_num_integers * attr.za_integer_length); 1391 } 1392 zap_cursor_fini(&zc); 1393 } 1394 1395 static void 1396 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size) 1397 { 1398 (void) data, (void) size; 1399 zap_cursor_t zc; 1400 zap_attribute_t attr; 1401 const char *typenames[] = { 1402 /* 0 */ "not specified", 1403 /* 1 */ "FIFO", 1404 /* 2 */ "Character Device", 1405 /* 3 */ "3 (invalid)", 1406 /* 4 */ "Directory", 1407 /* 5 */ "5 (invalid)", 1408 /* 6 */ "Block Device", 1409 /* 7 */ "7 (invalid)", 1410 /* 8 */ "Regular File", 1411 /* 9 */ "9 (invalid)", 1412 /* 10 */ "Symbolic Link", 1413 /* 11 */ "11 (invalid)", 1414 /* 12 */ "Socket", 1415 /* 13 */ "Door", 1416 /* 14 */ "Event Port", 1417 /* 15 */ "15 (invalid)", 1418 }; 1419 1420 dump_zap_stats(os, object); 1421 (void) printf("\n"); 1422 1423 for (zap_cursor_init(&zc, os, object); 1424 zap_cursor_retrieve(&zc, &attr) == 0; 1425 zap_cursor_advance(&zc)) { 1426 (void) printf("\t\t%s = %lld (type: %s)\n", 1427 attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer), 1428 typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]); 1429 } 1430 zap_cursor_fini(&zc); 1431 } 1432 1433 static int 1434 get_dtl_refcount(vdev_t *vd) 1435 { 1436 int refcount = 0; 1437 1438 if (vd->vdev_ops->vdev_op_leaf) { 1439 space_map_t *sm = vd->vdev_dtl_sm; 1440 1441 if (sm != NULL && 1442 sm->sm_dbuf->db_size == sizeof (space_map_phys_t)) 1443 return (1); 1444 return (0); 1445 } 1446 1447 for (unsigned c = 0; c < vd->vdev_children; c++) 1448 refcount += get_dtl_refcount(vd->vdev_child[c]); 1449 return (refcount); 1450 } 1451 1452 static int 1453 get_metaslab_refcount(vdev_t *vd) 1454 { 1455 int refcount = 0; 1456 1457 if (vd->vdev_top == vd) { 1458 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 1459 space_map_t *sm = vd->vdev_ms[m]->ms_sm; 1460 1461 if (sm != NULL && 1462 sm->sm_dbuf->db_size == sizeof (space_map_phys_t)) 1463 refcount++; 1464 } 1465 } 1466 for (unsigned c = 0; c < vd->vdev_children; c++) 1467 refcount += get_metaslab_refcount(vd->vdev_child[c]); 1468 1469 return (refcount); 1470 } 1471 1472 static int 1473 get_obsolete_refcount(vdev_t *vd) 1474 { 1475 uint64_t obsolete_sm_object; 1476 int refcount = 0; 1477 1478 VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object)); 1479 if (vd->vdev_top == vd && obsolete_sm_object != 0) { 1480 dmu_object_info_t doi; 1481 VERIFY0(dmu_object_info(vd->vdev_spa->spa_meta_objset, 1482 obsolete_sm_object, &doi)); 1483 if (doi.doi_bonus_size == sizeof (space_map_phys_t)) { 1484 refcount++; 1485 } 1486 } else { 1487 ASSERT3P(vd->vdev_obsolete_sm, ==, NULL); 1488 ASSERT3U(obsolete_sm_object, ==, 0); 1489 } 1490 for (unsigned c = 0; c < vd->vdev_children; c++) { 1491 refcount += get_obsolete_refcount(vd->vdev_child[c]); 1492 } 1493 1494 return (refcount); 1495 } 1496 1497 static int 1498 get_prev_obsolete_spacemap_refcount(spa_t *spa) 1499 { 1500 uint64_t prev_obj = 1501 spa->spa_condensing_indirect_phys.scip_prev_obsolete_sm_object; 1502 if (prev_obj != 0) { 1503 dmu_object_info_t doi; 1504 VERIFY0(dmu_object_info(spa->spa_meta_objset, prev_obj, &doi)); 1505 if (doi.doi_bonus_size == sizeof (space_map_phys_t)) { 1506 return (1); 1507 } 1508 } 1509 return (0); 1510 } 1511 1512 static int 1513 get_checkpoint_refcount(vdev_t *vd) 1514 { 1515 int refcount = 0; 1516 1517 if (vd->vdev_top == vd && vd->vdev_top_zap != 0 && 1518 zap_contains(spa_meta_objset(vd->vdev_spa), 1519 vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) == 0) 1520 refcount++; 1521 1522 for (uint64_t c = 0; c < vd->vdev_children; c++) 1523 refcount += get_checkpoint_refcount(vd->vdev_child[c]); 1524 1525 return (refcount); 1526 } 1527 1528 static int 1529 get_log_spacemap_refcount(spa_t *spa) 1530 { 1531 return (avl_numnodes(&spa->spa_sm_logs_by_txg)); 1532 } 1533 1534 static int 1535 verify_spacemap_refcounts(spa_t *spa) 1536 { 1537 uint64_t expected_refcount = 0; 1538 uint64_t actual_refcount; 1539 1540 (void) feature_get_refcount(spa, 1541 &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM], 1542 &expected_refcount); 1543 actual_refcount = get_dtl_refcount(spa->spa_root_vdev); 1544 actual_refcount += get_metaslab_refcount(spa->spa_root_vdev); 1545 actual_refcount += get_obsolete_refcount(spa->spa_root_vdev); 1546 actual_refcount += get_prev_obsolete_spacemap_refcount(spa); 1547 actual_refcount += get_checkpoint_refcount(spa->spa_root_vdev); 1548 actual_refcount += get_log_spacemap_refcount(spa); 1549 1550 if (expected_refcount != actual_refcount) { 1551 (void) printf("space map refcount mismatch: expected %lld != " 1552 "actual %lld\n", 1553 (longlong_t)expected_refcount, 1554 (longlong_t)actual_refcount); 1555 return (2); 1556 } 1557 return (0); 1558 } 1559 1560 static void 1561 dump_spacemap(objset_t *os, space_map_t *sm) 1562 { 1563 const char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID", 1564 "INVALID", "INVALID", "INVALID", "INVALID" }; 1565 1566 if (sm == NULL) 1567 return; 1568 1569 (void) printf("space map object %llu:\n", 1570 (longlong_t)sm->sm_object); 1571 (void) printf(" smp_length = 0x%llx\n", 1572 (longlong_t)sm->sm_phys->smp_length); 1573 (void) printf(" smp_alloc = 0x%llx\n", 1574 (longlong_t)sm->sm_phys->smp_alloc); 1575 1576 if (dump_opt['d'] < 6 && dump_opt['m'] < 4) 1577 return; 1578 1579 /* 1580 * Print out the freelist entries in both encoded and decoded form. 1581 */ 1582 uint8_t mapshift = sm->sm_shift; 1583 int64_t alloc = 0; 1584 uint64_t word, entry_id = 0; 1585 for (uint64_t offset = 0; offset < space_map_length(sm); 1586 offset += sizeof (word)) { 1587 1588 VERIFY0(dmu_read(os, space_map_object(sm), offset, 1589 sizeof (word), &word, DMU_READ_PREFETCH)); 1590 1591 if (sm_entry_is_debug(word)) { 1592 uint64_t de_txg = SM_DEBUG_TXG_DECODE(word); 1593 uint64_t de_sync_pass = SM_DEBUG_SYNCPASS_DECODE(word); 1594 if (de_txg == 0) { 1595 (void) printf( 1596 "\t [%6llu] PADDING\n", 1597 (u_longlong_t)entry_id); 1598 } else { 1599 (void) printf( 1600 "\t [%6llu] %s: txg %llu pass %llu\n", 1601 (u_longlong_t)entry_id, 1602 ddata[SM_DEBUG_ACTION_DECODE(word)], 1603 (u_longlong_t)de_txg, 1604 (u_longlong_t)de_sync_pass); 1605 } 1606 entry_id++; 1607 continue; 1608 } 1609 1610 uint8_t words; 1611 char entry_type; 1612 uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID; 1613 1614 if (sm_entry_is_single_word(word)) { 1615 entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ? 1616 'A' : 'F'; 1617 entry_off = (SM_OFFSET_DECODE(word) << mapshift) + 1618 sm->sm_start; 1619 entry_run = SM_RUN_DECODE(word) << mapshift; 1620 words = 1; 1621 } else { 1622 /* it is a two-word entry so we read another word */ 1623 ASSERT(sm_entry_is_double_word(word)); 1624 1625 uint64_t extra_word; 1626 offset += sizeof (extra_word); 1627 VERIFY0(dmu_read(os, space_map_object(sm), offset, 1628 sizeof (extra_word), &extra_word, 1629 DMU_READ_PREFETCH)); 1630 1631 ASSERT3U(offset, <=, space_map_length(sm)); 1632 1633 entry_run = SM2_RUN_DECODE(word) << mapshift; 1634 entry_vdev = SM2_VDEV_DECODE(word); 1635 entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ? 1636 'A' : 'F'; 1637 entry_off = (SM2_OFFSET_DECODE(extra_word) << 1638 mapshift) + sm->sm_start; 1639 words = 2; 1640 } 1641 1642 (void) printf("\t [%6llu] %c range:" 1643 " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n", 1644 (u_longlong_t)entry_id, 1645 entry_type, (u_longlong_t)entry_off, 1646 (u_longlong_t)(entry_off + entry_run), 1647 (u_longlong_t)entry_run, 1648 (u_longlong_t)entry_vdev, words); 1649 1650 if (entry_type == 'A') 1651 alloc += entry_run; 1652 else 1653 alloc -= entry_run; 1654 entry_id++; 1655 } 1656 if (alloc != space_map_allocated(sm)) { 1657 (void) printf("space_map_object alloc (%lld) INCONSISTENT " 1658 "with space map summary (%lld)\n", 1659 (longlong_t)space_map_allocated(sm), (longlong_t)alloc); 1660 } 1661 } 1662 1663 static void 1664 dump_metaslab_stats(metaslab_t *msp) 1665 { 1666 char maxbuf[32]; 1667 range_tree_t *rt = msp->ms_allocatable; 1668 zfs_btree_t *t = &msp->ms_allocatable_by_size; 1669 int free_pct = range_tree_space(rt) * 100 / msp->ms_size; 1670 1671 /* max sure nicenum has enough space */ 1672 _Static_assert(sizeof (maxbuf) >= NN_NUMBUF_SZ, "maxbuf truncated"); 1673 1674 zdb_nicenum(metaslab_largest_allocatable(msp), maxbuf, sizeof (maxbuf)); 1675 1676 (void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n", 1677 "segments", zfs_btree_numnodes(t), "maxsize", maxbuf, 1678 "freepct", free_pct); 1679 (void) printf("\tIn-memory histogram:\n"); 1680 dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 1681 } 1682 1683 static void 1684 dump_metaslab(metaslab_t *msp) 1685 { 1686 vdev_t *vd = msp->ms_group->mg_vd; 1687 spa_t *spa = vd->vdev_spa; 1688 space_map_t *sm = msp->ms_sm; 1689 char freebuf[32]; 1690 1691 zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf, 1692 sizeof (freebuf)); 1693 1694 (void) printf( 1695 "\tmetaslab %6llu offset %12llx spacemap %6llu free %5s\n", 1696 (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start, 1697 (u_longlong_t)space_map_object(sm), freebuf); 1698 1699 if (dump_opt['m'] > 2 && !dump_opt['L']) { 1700 mutex_enter(&msp->ms_lock); 1701 VERIFY0(metaslab_load(msp)); 1702 range_tree_stat_verify(msp->ms_allocatable); 1703 dump_metaslab_stats(msp); 1704 metaslab_unload(msp); 1705 mutex_exit(&msp->ms_lock); 1706 } 1707 1708 if (dump_opt['m'] > 1 && sm != NULL && 1709 spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) { 1710 /* 1711 * The space map histogram represents free space in chunks 1712 * of sm_shift (i.e. bucket 0 refers to 2^sm_shift). 1713 */ 1714 (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n", 1715 (u_longlong_t)msp->ms_fragmentation); 1716 dump_histogram(sm->sm_phys->smp_histogram, 1717 SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift); 1718 } 1719 1720 if (vd->vdev_ops == &vdev_draid_ops) 1721 ASSERT3U(msp->ms_size, <=, 1ULL << vd->vdev_ms_shift); 1722 else 1723 ASSERT3U(msp->ms_size, ==, 1ULL << vd->vdev_ms_shift); 1724 1725 dump_spacemap(spa->spa_meta_objset, msp->ms_sm); 1726 1727 if (spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) { 1728 (void) printf("\tFlush data:\n\tunflushed txg=%llu\n\n", 1729 (u_longlong_t)metaslab_unflushed_txg(msp)); 1730 } 1731 } 1732 1733 static void 1734 print_vdev_metaslab_header(vdev_t *vd) 1735 { 1736 vdev_alloc_bias_t alloc_bias = vd->vdev_alloc_bias; 1737 const char *bias_str = ""; 1738 if (alloc_bias == VDEV_BIAS_LOG || vd->vdev_islog) { 1739 bias_str = VDEV_ALLOC_BIAS_LOG; 1740 } else if (alloc_bias == VDEV_BIAS_SPECIAL) { 1741 bias_str = VDEV_ALLOC_BIAS_SPECIAL; 1742 } else if (alloc_bias == VDEV_BIAS_DEDUP) { 1743 bias_str = VDEV_ALLOC_BIAS_DEDUP; 1744 } 1745 1746 uint64_t ms_flush_data_obj = 0; 1747 if (vd->vdev_top_zap != 0) { 1748 int error = zap_lookup(spa_meta_objset(vd->vdev_spa), 1749 vd->vdev_top_zap, VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS, 1750 sizeof (uint64_t), 1, &ms_flush_data_obj); 1751 if (error != ENOENT) { 1752 ASSERT0(error); 1753 } 1754 } 1755 1756 (void) printf("\tvdev %10llu %s", 1757 (u_longlong_t)vd->vdev_id, bias_str); 1758 1759 if (ms_flush_data_obj != 0) { 1760 (void) printf(" ms_unflushed_phys object %llu", 1761 (u_longlong_t)ms_flush_data_obj); 1762 } 1763 1764 (void) printf("\n\t%-10s%5llu %-19s %-15s %-12s\n", 1765 "metaslabs", (u_longlong_t)vd->vdev_ms_count, 1766 "offset", "spacemap", "free"); 1767 (void) printf("\t%15s %19s %15s %12s\n", 1768 "---------------", "-------------------", 1769 "---------------", "------------"); 1770 } 1771 1772 static void 1773 dump_metaslab_groups(spa_t *spa, boolean_t show_special) 1774 { 1775 vdev_t *rvd = spa->spa_root_vdev; 1776 metaslab_class_t *mc = spa_normal_class(spa); 1777 metaslab_class_t *smc = spa_special_class(spa); 1778 uint64_t fragmentation; 1779 1780 metaslab_class_histogram_verify(mc); 1781 1782 for (unsigned c = 0; c < rvd->vdev_children; c++) { 1783 vdev_t *tvd = rvd->vdev_child[c]; 1784 metaslab_group_t *mg = tvd->vdev_mg; 1785 1786 if (mg == NULL || (mg->mg_class != mc && 1787 (!show_special || mg->mg_class != smc))) 1788 continue; 1789 1790 metaslab_group_histogram_verify(mg); 1791 mg->mg_fragmentation = metaslab_group_fragmentation(mg); 1792 1793 (void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t" 1794 "fragmentation", 1795 (u_longlong_t)tvd->vdev_id, 1796 (u_longlong_t)tvd->vdev_ms_count); 1797 if (mg->mg_fragmentation == ZFS_FRAG_INVALID) { 1798 (void) printf("%3s\n", "-"); 1799 } else { 1800 (void) printf("%3llu%%\n", 1801 (u_longlong_t)mg->mg_fragmentation); 1802 } 1803 dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 1804 } 1805 1806 (void) printf("\tpool %s\tfragmentation", spa_name(spa)); 1807 fragmentation = metaslab_class_fragmentation(mc); 1808 if (fragmentation == ZFS_FRAG_INVALID) 1809 (void) printf("\t%3s\n", "-"); 1810 else 1811 (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation); 1812 dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 1813 } 1814 1815 static void 1816 print_vdev_indirect(vdev_t *vd) 1817 { 1818 vdev_indirect_config_t *vic = &vd->vdev_indirect_config; 1819 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 1820 vdev_indirect_births_t *vib = vd->vdev_indirect_births; 1821 1822 if (vim == NULL) { 1823 ASSERT3P(vib, ==, NULL); 1824 return; 1825 } 1826 1827 ASSERT3U(vdev_indirect_mapping_object(vim), ==, 1828 vic->vic_mapping_object); 1829 ASSERT3U(vdev_indirect_births_object(vib), ==, 1830 vic->vic_births_object); 1831 1832 (void) printf("indirect births obj %llu:\n", 1833 (longlong_t)vic->vic_births_object); 1834 (void) printf(" vib_count = %llu\n", 1835 (longlong_t)vdev_indirect_births_count(vib)); 1836 for (uint64_t i = 0; i < vdev_indirect_births_count(vib); i++) { 1837 vdev_indirect_birth_entry_phys_t *cur_vibe = 1838 &vib->vib_entries[i]; 1839 (void) printf("\toffset %llx -> txg %llu\n", 1840 (longlong_t)cur_vibe->vibe_offset, 1841 (longlong_t)cur_vibe->vibe_phys_birth_txg); 1842 } 1843 (void) printf("\n"); 1844 1845 (void) printf("indirect mapping obj %llu:\n", 1846 (longlong_t)vic->vic_mapping_object); 1847 (void) printf(" vim_max_offset = 0x%llx\n", 1848 (longlong_t)vdev_indirect_mapping_max_offset(vim)); 1849 (void) printf(" vim_bytes_mapped = 0x%llx\n", 1850 (longlong_t)vdev_indirect_mapping_bytes_mapped(vim)); 1851 (void) printf(" vim_count = %llu\n", 1852 (longlong_t)vdev_indirect_mapping_num_entries(vim)); 1853 1854 if (dump_opt['d'] <= 5 && dump_opt['m'] <= 3) 1855 return; 1856 1857 uint32_t *counts = vdev_indirect_mapping_load_obsolete_counts(vim); 1858 1859 for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) { 1860 vdev_indirect_mapping_entry_phys_t *vimep = 1861 &vim->vim_entries[i]; 1862 (void) printf("\t<%llx:%llx:%llx> -> " 1863 "<%llx:%llx:%llx> (%x obsolete)\n", 1864 (longlong_t)vd->vdev_id, 1865 (longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep), 1866 (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), 1867 (longlong_t)DVA_GET_VDEV(&vimep->vimep_dst), 1868 (longlong_t)DVA_GET_OFFSET(&vimep->vimep_dst), 1869 (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), 1870 counts[i]); 1871 } 1872 (void) printf("\n"); 1873 1874 uint64_t obsolete_sm_object; 1875 VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object)); 1876 if (obsolete_sm_object != 0) { 1877 objset_t *mos = vd->vdev_spa->spa_meta_objset; 1878 (void) printf("obsolete space map object %llu:\n", 1879 (u_longlong_t)obsolete_sm_object); 1880 ASSERT(vd->vdev_obsolete_sm != NULL); 1881 ASSERT3U(space_map_object(vd->vdev_obsolete_sm), ==, 1882 obsolete_sm_object); 1883 dump_spacemap(mos, vd->vdev_obsolete_sm); 1884 (void) printf("\n"); 1885 } 1886 } 1887 1888 static void 1889 dump_metaslabs(spa_t *spa) 1890 { 1891 vdev_t *vd, *rvd = spa->spa_root_vdev; 1892 uint64_t m, c = 0, children = rvd->vdev_children; 1893 1894 (void) printf("\nMetaslabs:\n"); 1895 1896 if (!dump_opt['d'] && zopt_metaslab_args > 0) { 1897 c = zopt_metaslab[0]; 1898 1899 if (c >= children) 1900 (void) fatal("bad vdev id: %llu", (u_longlong_t)c); 1901 1902 if (zopt_metaslab_args > 1) { 1903 vd = rvd->vdev_child[c]; 1904 print_vdev_metaslab_header(vd); 1905 1906 for (m = 1; m < zopt_metaslab_args; m++) { 1907 if (zopt_metaslab[m] < vd->vdev_ms_count) 1908 dump_metaslab( 1909 vd->vdev_ms[zopt_metaslab[m]]); 1910 else 1911 (void) fprintf(stderr, "bad metaslab " 1912 "number %llu\n", 1913 (u_longlong_t)zopt_metaslab[m]); 1914 } 1915 (void) printf("\n"); 1916 return; 1917 } 1918 children = c + 1; 1919 } 1920 for (; c < children; c++) { 1921 vd = rvd->vdev_child[c]; 1922 print_vdev_metaslab_header(vd); 1923 1924 print_vdev_indirect(vd); 1925 1926 for (m = 0; m < vd->vdev_ms_count; m++) 1927 dump_metaslab(vd->vdev_ms[m]); 1928 (void) printf("\n"); 1929 } 1930 } 1931 1932 static void 1933 dump_log_spacemaps(spa_t *spa) 1934 { 1935 if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) 1936 return; 1937 1938 (void) printf("\nLog Space Maps in Pool:\n"); 1939 for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg); 1940 sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) { 1941 space_map_t *sm = NULL; 1942 VERIFY0(space_map_open(&sm, spa_meta_objset(spa), 1943 sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT)); 1944 1945 (void) printf("Log Spacemap object %llu txg %llu\n", 1946 (u_longlong_t)sls->sls_sm_obj, (u_longlong_t)sls->sls_txg); 1947 dump_spacemap(spa->spa_meta_objset, sm); 1948 space_map_close(sm); 1949 } 1950 (void) printf("\n"); 1951 } 1952 1953 static void 1954 dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index) 1955 { 1956 const ddt_phys_t *ddp = dde->dde_phys; 1957 const ddt_key_t *ddk = &dde->dde_key; 1958 const char *types[4] = { "ditto", "single", "double", "triple" }; 1959 char blkbuf[BP_SPRINTF_LEN]; 1960 blkptr_t blk; 1961 int p; 1962 1963 for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 1964 if (ddp->ddp_phys_birth == 0) 1965 continue; 1966 ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); 1967 snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk); 1968 (void) printf("index %llx refcnt %llu %s %s\n", 1969 (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt, 1970 types[p], blkbuf); 1971 } 1972 } 1973 1974 static void 1975 dump_dedup_ratio(const ddt_stat_t *dds) 1976 { 1977 double rL, rP, rD, D, dedup, compress, copies; 1978 1979 if (dds->dds_blocks == 0) 1980 return; 1981 1982 rL = (double)dds->dds_ref_lsize; 1983 rP = (double)dds->dds_ref_psize; 1984 rD = (double)dds->dds_ref_dsize; 1985 D = (double)dds->dds_dsize; 1986 1987 dedup = rD / D; 1988 compress = rL / rP; 1989 copies = rD / rP; 1990 1991 (void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, " 1992 "dedup * compress / copies = %.2f\n\n", 1993 dedup, compress, copies, dedup * compress / copies); 1994 } 1995 1996 static void 1997 dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class) 1998 { 1999 char name[DDT_NAMELEN]; 2000 ddt_entry_t dde; 2001 uint64_t walk = 0; 2002 dmu_object_info_t doi; 2003 uint64_t count, dspace, mspace; 2004 int error; 2005 2006 error = ddt_object_info(ddt, type, class, &doi); 2007 2008 if (error == ENOENT) 2009 return; 2010 ASSERT(error == 0); 2011 2012 error = ddt_object_count(ddt, type, class, &count); 2013 ASSERT(error == 0); 2014 if (count == 0) 2015 return; 2016 2017 dspace = doi.doi_physical_blocks_512 << 9; 2018 mspace = doi.doi_fill_count * doi.doi_data_block_size; 2019 2020 ddt_object_name(ddt, type, class, name); 2021 2022 (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n", 2023 name, 2024 (u_longlong_t)count, 2025 (u_longlong_t)(dspace / count), 2026 (u_longlong_t)(mspace / count)); 2027 2028 if (dump_opt['D'] < 3) 2029 return; 2030 2031 zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]); 2032 2033 if (dump_opt['D'] < 4) 2034 return; 2035 2036 if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE) 2037 return; 2038 2039 (void) printf("%s contents:\n\n", name); 2040 2041 while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0) 2042 dump_dde(ddt, &dde, walk); 2043 2044 ASSERT3U(error, ==, ENOENT); 2045 2046 (void) printf("\n"); 2047 } 2048 2049 static void 2050 dump_all_ddts(spa_t *spa) 2051 { 2052 ddt_histogram_t ddh_total = {{{0}}}; 2053 ddt_stat_t dds_total = {0}; 2054 2055 for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 2056 ddt_t *ddt = spa->spa_ddt[c]; 2057 for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 2058 for (enum ddt_class class = 0; class < DDT_CLASSES; 2059 class++) { 2060 dump_ddt(ddt, type, class); 2061 } 2062 } 2063 } 2064 2065 ddt_get_dedup_stats(spa, &dds_total); 2066 2067 if (dds_total.dds_blocks == 0) { 2068 (void) printf("All DDTs are empty\n"); 2069 return; 2070 } 2071 2072 (void) printf("\n"); 2073 2074 if (dump_opt['D'] > 1) { 2075 (void) printf("DDT histogram (aggregated over all DDTs):\n"); 2076 ddt_get_dedup_histogram(spa, &ddh_total); 2077 zpool_dump_ddt(&dds_total, &ddh_total); 2078 } 2079 2080 dump_dedup_ratio(&dds_total); 2081 } 2082 2083 static void 2084 dump_dtl_seg(void *arg, uint64_t start, uint64_t size) 2085 { 2086 char *prefix = arg; 2087 2088 (void) printf("%s [%llu,%llu) length %llu\n", 2089 prefix, 2090 (u_longlong_t)start, 2091 (u_longlong_t)(start + size), 2092 (u_longlong_t)(size)); 2093 } 2094 2095 static void 2096 dump_dtl(vdev_t *vd, int indent) 2097 { 2098 spa_t *spa = vd->vdev_spa; 2099 boolean_t required; 2100 const char *name[DTL_TYPES] = { "missing", "partial", "scrub", 2101 "outage" }; 2102 char prefix[256]; 2103 2104 spa_vdev_state_enter(spa, SCL_NONE); 2105 required = vdev_dtl_required(vd); 2106 (void) spa_vdev_state_exit(spa, NULL, 0); 2107 2108 if (indent == 0) 2109 (void) printf("\nDirty time logs:\n\n"); 2110 2111 (void) printf("\t%*s%s [%s]\n", indent, "", 2112 vd->vdev_path ? vd->vdev_path : 2113 vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa), 2114 required ? "DTL-required" : "DTL-expendable"); 2115 2116 for (int t = 0; t < DTL_TYPES; t++) { 2117 range_tree_t *rt = vd->vdev_dtl[t]; 2118 if (range_tree_space(rt) == 0) 2119 continue; 2120 (void) snprintf(prefix, sizeof (prefix), "\t%*s%s", 2121 indent + 2, "", name[t]); 2122 range_tree_walk(rt, dump_dtl_seg, prefix); 2123 if (dump_opt['d'] > 5 && vd->vdev_children == 0) 2124 dump_spacemap(spa->spa_meta_objset, 2125 vd->vdev_dtl_sm); 2126 } 2127 2128 for (unsigned c = 0; c < vd->vdev_children; c++) 2129 dump_dtl(vd->vdev_child[c], indent + 4); 2130 } 2131 2132 static void 2133 dump_history(spa_t *spa) 2134 { 2135 nvlist_t **events = NULL; 2136 char *buf; 2137 uint64_t resid, len, off = 0; 2138 uint_t num = 0; 2139 int error; 2140 char tbuf[30]; 2141 2142 if ((buf = malloc(SPA_OLD_MAXBLOCKSIZE)) == NULL) { 2143 (void) fprintf(stderr, "%s: unable to allocate I/O buffer\n", 2144 __func__); 2145 return; 2146 } 2147 2148 do { 2149 len = SPA_OLD_MAXBLOCKSIZE; 2150 2151 if ((error = spa_history_get(spa, &off, &len, buf)) != 0) { 2152 (void) fprintf(stderr, "Unable to read history: " 2153 "error %d\n", error); 2154 free(buf); 2155 return; 2156 } 2157 2158 if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0) 2159 break; 2160 2161 off -= resid; 2162 } while (len != 0); 2163 2164 (void) printf("\nHistory:\n"); 2165 for (unsigned i = 0; i < num; i++) { 2166 boolean_t printed = B_FALSE; 2167 2168 if (nvlist_exists(events[i], ZPOOL_HIST_TIME)) { 2169 time_t tsec; 2170 struct tm t; 2171 2172 tsec = fnvlist_lookup_uint64(events[i], 2173 ZPOOL_HIST_TIME); 2174 (void) localtime_r(&tsec, &t); 2175 (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); 2176 } else { 2177 tbuf[0] = '\0'; 2178 } 2179 2180 if (nvlist_exists(events[i], ZPOOL_HIST_CMD)) { 2181 (void) printf("%s %s\n", tbuf, 2182 fnvlist_lookup_string(events[i], ZPOOL_HIST_CMD)); 2183 } else if (nvlist_exists(events[i], ZPOOL_HIST_INT_EVENT)) { 2184 uint64_t ievent; 2185 2186 ievent = fnvlist_lookup_uint64(events[i], 2187 ZPOOL_HIST_INT_EVENT); 2188 if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) 2189 goto next; 2190 2191 (void) printf(" %s [internal %s txg:%ju] %s\n", 2192 tbuf, 2193 zfs_history_event_names[ievent], 2194 fnvlist_lookup_uint64(events[i], 2195 ZPOOL_HIST_TXG), 2196 fnvlist_lookup_string(events[i], 2197 ZPOOL_HIST_INT_STR)); 2198 } else if (nvlist_exists(events[i], ZPOOL_HIST_INT_NAME)) { 2199 (void) printf("%s [txg:%ju] %s", tbuf, 2200 fnvlist_lookup_uint64(events[i], 2201 ZPOOL_HIST_TXG), 2202 fnvlist_lookup_string(events[i], 2203 ZPOOL_HIST_INT_NAME)); 2204 2205 if (nvlist_exists(events[i], ZPOOL_HIST_DSNAME)) { 2206 (void) printf(" %s (%llu)", 2207 fnvlist_lookup_string(events[i], 2208 ZPOOL_HIST_DSNAME), 2209 (u_longlong_t)fnvlist_lookup_uint64( 2210 events[i], 2211 ZPOOL_HIST_DSID)); 2212 } 2213 2214 (void) printf(" %s\n", fnvlist_lookup_string(events[i], 2215 ZPOOL_HIST_INT_STR)); 2216 } else if (nvlist_exists(events[i], ZPOOL_HIST_IOCTL)) { 2217 (void) printf("%s ioctl %s\n", tbuf, 2218 fnvlist_lookup_string(events[i], 2219 ZPOOL_HIST_IOCTL)); 2220 2221 if (nvlist_exists(events[i], ZPOOL_HIST_INPUT_NVL)) { 2222 (void) printf(" input:\n"); 2223 dump_nvlist(fnvlist_lookup_nvlist(events[i], 2224 ZPOOL_HIST_INPUT_NVL), 8); 2225 } 2226 if (nvlist_exists(events[i], ZPOOL_HIST_OUTPUT_NVL)) { 2227 (void) printf(" output:\n"); 2228 dump_nvlist(fnvlist_lookup_nvlist(events[i], 2229 ZPOOL_HIST_OUTPUT_NVL), 8); 2230 } 2231 if (nvlist_exists(events[i], ZPOOL_HIST_ERRNO)) { 2232 (void) printf(" errno: %lld\n", 2233 (longlong_t)fnvlist_lookup_int64(events[i], 2234 ZPOOL_HIST_ERRNO)); 2235 } 2236 } else { 2237 goto next; 2238 } 2239 2240 printed = B_TRUE; 2241 next: 2242 if (dump_opt['h'] > 1) { 2243 if (!printed) 2244 (void) printf("unrecognized record:\n"); 2245 dump_nvlist(events[i], 2); 2246 } 2247 } 2248 free(buf); 2249 } 2250 2251 static void 2252 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size) 2253 { 2254 (void) os, (void) object, (void) data, (void) size; 2255 } 2256 2257 static uint64_t 2258 blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, 2259 const zbookmark_phys_t *zb) 2260 { 2261 if (dnp == NULL) { 2262 ASSERT(zb->zb_level < 0); 2263 if (zb->zb_object == 0) 2264 return (zb->zb_blkid); 2265 return (zb->zb_blkid * BP_GET_LSIZE(bp)); 2266 } 2267 2268 ASSERT(zb->zb_level >= 0); 2269 2270 return ((zb->zb_blkid << 2271 (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) * 2272 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); 2273 } 2274 2275 static void 2276 snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen, 2277 const blkptr_t *bp) 2278 { 2279 abd_t *pabd; 2280 void *buf; 2281 zio_t *zio; 2282 zfs_zstdhdr_t zstd_hdr; 2283 int error; 2284 2285 if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_ZSTD) 2286 return; 2287 2288 if (BP_IS_HOLE(bp)) 2289 return; 2290 2291 if (BP_IS_EMBEDDED(bp)) { 2292 buf = malloc(SPA_MAXBLOCKSIZE); 2293 if (buf == NULL) { 2294 (void) fprintf(stderr, "out of memory\n"); 2295 exit(1); 2296 } 2297 decode_embedded_bp_compressed(bp, buf); 2298 memcpy(&zstd_hdr, buf, sizeof (zstd_hdr)); 2299 free(buf); 2300 zstd_hdr.c_len = BE_32(zstd_hdr.c_len); 2301 zstd_hdr.raw_version_level = BE_32(zstd_hdr.raw_version_level); 2302 (void) snprintf(blkbuf + strlen(blkbuf), 2303 buflen - strlen(blkbuf), 2304 " ZSTD:size=%u:version=%u:level=%u:EMBEDDED", 2305 zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr), 2306 zfs_get_hdrlevel(&zstd_hdr)); 2307 return; 2308 } 2309 2310 pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE); 2311 zio = zio_root(spa, NULL, NULL, 0); 2312 2313 /* Decrypt but don't decompress so we can read the compression header */ 2314 zio_nowait(zio_read(zio, spa, bp, pabd, BP_GET_PSIZE(bp), NULL, NULL, 2315 ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW_COMPRESS, 2316 NULL)); 2317 error = zio_wait(zio); 2318 if (error) { 2319 (void) fprintf(stderr, "read failed: %d\n", error); 2320 return; 2321 } 2322 buf = abd_borrow_buf_copy(pabd, BP_GET_LSIZE(bp)); 2323 memcpy(&zstd_hdr, buf, sizeof (zstd_hdr)); 2324 zstd_hdr.c_len = BE_32(zstd_hdr.c_len); 2325 zstd_hdr.raw_version_level = BE_32(zstd_hdr.raw_version_level); 2326 2327 (void) snprintf(blkbuf + strlen(blkbuf), 2328 buflen - strlen(blkbuf), 2329 " ZSTD:size=%u:version=%u:level=%u:NORMAL", 2330 zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr), 2331 zfs_get_hdrlevel(&zstd_hdr)); 2332 2333 abd_return_buf_copy(pabd, buf, BP_GET_LSIZE(bp)); 2334 } 2335 2336 static void 2337 snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp, 2338 boolean_t bp_freed) 2339 { 2340 const dva_t *dva = bp->blk_dva; 2341 int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1; 2342 int i; 2343 2344 if (dump_opt['b'] >= 6) { 2345 snprintf_blkptr(blkbuf, buflen, bp); 2346 if (bp_freed) { 2347 (void) snprintf(blkbuf + strlen(blkbuf), 2348 buflen - strlen(blkbuf), " %s", "FREE"); 2349 } 2350 return; 2351 } 2352 2353 if (BP_IS_EMBEDDED(bp)) { 2354 (void) sprintf(blkbuf, 2355 "EMBEDDED et=%u %llxL/%llxP B=%llu", 2356 (int)BPE_GET_ETYPE(bp), 2357 (u_longlong_t)BPE_GET_LSIZE(bp), 2358 (u_longlong_t)BPE_GET_PSIZE(bp), 2359 (u_longlong_t)bp->blk_birth); 2360 return; 2361 } 2362 2363 blkbuf[0] = '\0'; 2364 2365 for (i = 0; i < ndvas; i++) 2366 (void) snprintf(blkbuf + strlen(blkbuf), 2367 buflen - strlen(blkbuf), "%llu:%llx:%llx ", 2368 (u_longlong_t)DVA_GET_VDEV(&dva[i]), 2369 (u_longlong_t)DVA_GET_OFFSET(&dva[i]), 2370 (u_longlong_t)DVA_GET_ASIZE(&dva[i])); 2371 2372 if (BP_IS_HOLE(bp)) { 2373 (void) snprintf(blkbuf + strlen(blkbuf), 2374 buflen - strlen(blkbuf), 2375 "%llxL B=%llu", 2376 (u_longlong_t)BP_GET_LSIZE(bp), 2377 (u_longlong_t)bp->blk_birth); 2378 } else { 2379 (void) snprintf(blkbuf + strlen(blkbuf), 2380 buflen - strlen(blkbuf), 2381 "%llxL/%llxP F=%llu B=%llu/%llu", 2382 (u_longlong_t)BP_GET_LSIZE(bp), 2383 (u_longlong_t)BP_GET_PSIZE(bp), 2384 (u_longlong_t)BP_GET_FILL(bp), 2385 (u_longlong_t)bp->blk_birth, 2386 (u_longlong_t)BP_PHYSICAL_BIRTH(bp)); 2387 if (bp_freed) 2388 (void) snprintf(blkbuf + strlen(blkbuf), 2389 buflen - strlen(blkbuf), " %s", "FREE"); 2390 (void) snprintf(blkbuf + strlen(blkbuf), 2391 buflen - strlen(blkbuf), 2392 " cksum=%016llx:%016llx:%016llx:%016llx", 2393 (u_longlong_t)bp->blk_cksum.zc_word[0], 2394 (u_longlong_t)bp->blk_cksum.zc_word[1], 2395 (u_longlong_t)bp->blk_cksum.zc_word[2], 2396 (u_longlong_t)bp->blk_cksum.zc_word[3]); 2397 } 2398 } 2399 2400 static void 2401 print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb, 2402 const dnode_phys_t *dnp) 2403 { 2404 char blkbuf[BP_SPRINTF_LEN]; 2405 int l; 2406 2407 if (!BP_IS_EMBEDDED(bp)) { 2408 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); 2409 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); 2410 } 2411 2412 (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb)); 2413 2414 ASSERT(zb->zb_level >= 0); 2415 2416 for (l = dnp->dn_nlevels - 1; l >= -1; l--) { 2417 if (l == zb->zb_level) { 2418 (void) printf("L%llx", (u_longlong_t)zb->zb_level); 2419 } else { 2420 (void) printf(" "); 2421 } 2422 } 2423 2424 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, B_FALSE); 2425 if (dump_opt['Z'] && BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD) 2426 snprintf_zstd_header(spa, blkbuf, sizeof (blkbuf), bp); 2427 (void) printf("%s\n", blkbuf); 2428 } 2429 2430 static int 2431 visit_indirect(spa_t *spa, const dnode_phys_t *dnp, 2432 blkptr_t *bp, const zbookmark_phys_t *zb) 2433 { 2434 int err = 0; 2435 2436 if (bp->blk_birth == 0) 2437 return (0); 2438 2439 print_indirect(spa, bp, zb, dnp); 2440 2441 if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) { 2442 arc_flags_t flags = ARC_FLAG_WAIT; 2443 int i; 2444 blkptr_t *cbp; 2445 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; 2446 arc_buf_t *buf; 2447 uint64_t fill = 0; 2448 ASSERT(!BP_IS_REDACTED(bp)); 2449 2450 err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf, 2451 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); 2452 if (err) 2453 return (err); 2454 ASSERT(buf->b_data); 2455 2456 /* recursively visit blocks below this */ 2457 cbp = buf->b_data; 2458 for (i = 0; i < epb; i++, cbp++) { 2459 zbookmark_phys_t czb; 2460 2461 SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, 2462 zb->zb_level - 1, 2463 zb->zb_blkid * epb + i); 2464 err = visit_indirect(spa, dnp, cbp, &czb); 2465 if (err) 2466 break; 2467 fill += BP_GET_FILL(cbp); 2468 } 2469 if (!err) 2470 ASSERT3U(fill, ==, BP_GET_FILL(bp)); 2471 arc_buf_destroy(buf, &buf); 2472 } 2473 2474 return (err); 2475 } 2476 2477 static void 2478 dump_indirect(dnode_t *dn) 2479 { 2480 dnode_phys_t *dnp = dn->dn_phys; 2481 zbookmark_phys_t czb; 2482 2483 (void) printf("Indirect blocks:\n"); 2484 2485 SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset), 2486 dn->dn_object, dnp->dn_nlevels - 1, 0); 2487 for (int j = 0; j < dnp->dn_nblkptr; j++) { 2488 czb.zb_blkid = j; 2489 (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp, 2490 &dnp->dn_blkptr[j], &czb); 2491 } 2492 2493 (void) printf("\n"); 2494 } 2495 2496 static void 2497 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size) 2498 { 2499 (void) os, (void) object; 2500 dsl_dir_phys_t *dd = data; 2501 time_t crtime; 2502 char nice[32]; 2503 2504 /* make sure nicenum has enough space */ 2505 _Static_assert(sizeof (nice) >= NN_NUMBUF_SZ, "nice truncated"); 2506 2507 if (dd == NULL) 2508 return; 2509 2510 ASSERT3U(size, >=, sizeof (dsl_dir_phys_t)); 2511 2512 crtime = dd->dd_creation_time; 2513 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 2514 (void) printf("\t\thead_dataset_obj = %llu\n", 2515 (u_longlong_t)dd->dd_head_dataset_obj); 2516 (void) printf("\t\tparent_dir_obj = %llu\n", 2517 (u_longlong_t)dd->dd_parent_obj); 2518 (void) printf("\t\torigin_obj = %llu\n", 2519 (u_longlong_t)dd->dd_origin_obj); 2520 (void) printf("\t\tchild_dir_zapobj = %llu\n", 2521 (u_longlong_t)dd->dd_child_dir_zapobj); 2522 zdb_nicenum(dd->dd_used_bytes, nice, sizeof (nice)); 2523 (void) printf("\t\tused_bytes = %s\n", nice); 2524 zdb_nicenum(dd->dd_compressed_bytes, nice, sizeof (nice)); 2525 (void) printf("\t\tcompressed_bytes = %s\n", nice); 2526 zdb_nicenum(dd->dd_uncompressed_bytes, nice, sizeof (nice)); 2527 (void) printf("\t\tuncompressed_bytes = %s\n", nice); 2528 zdb_nicenum(dd->dd_quota, nice, sizeof (nice)); 2529 (void) printf("\t\tquota = %s\n", nice); 2530 zdb_nicenum(dd->dd_reserved, nice, sizeof (nice)); 2531 (void) printf("\t\treserved = %s\n", nice); 2532 (void) printf("\t\tprops_zapobj = %llu\n", 2533 (u_longlong_t)dd->dd_props_zapobj); 2534 (void) printf("\t\tdeleg_zapobj = %llu\n", 2535 (u_longlong_t)dd->dd_deleg_zapobj); 2536 (void) printf("\t\tflags = %llx\n", 2537 (u_longlong_t)dd->dd_flags); 2538 2539 #define DO(which) \ 2540 zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice, \ 2541 sizeof (nice)); \ 2542 (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice) 2543 DO(HEAD); 2544 DO(SNAP); 2545 DO(CHILD); 2546 DO(CHILD_RSRV); 2547 DO(REFRSRV); 2548 #undef DO 2549 (void) printf("\t\tclones = %llu\n", 2550 (u_longlong_t)dd->dd_clones); 2551 } 2552 2553 static void 2554 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) 2555 { 2556 (void) os, (void) object; 2557 dsl_dataset_phys_t *ds = data; 2558 time_t crtime; 2559 char used[32], compressed[32], uncompressed[32], unique[32]; 2560 char blkbuf[BP_SPRINTF_LEN]; 2561 2562 /* make sure nicenum has enough space */ 2563 _Static_assert(sizeof (used) >= NN_NUMBUF_SZ, "used truncated"); 2564 _Static_assert(sizeof (compressed) >= NN_NUMBUF_SZ, 2565 "compressed truncated"); 2566 _Static_assert(sizeof (uncompressed) >= NN_NUMBUF_SZ, 2567 "uncompressed truncated"); 2568 _Static_assert(sizeof (unique) >= NN_NUMBUF_SZ, "unique truncated"); 2569 2570 if (ds == NULL) 2571 return; 2572 2573 ASSERT(size == sizeof (*ds)); 2574 crtime = ds->ds_creation_time; 2575 zdb_nicenum(ds->ds_referenced_bytes, used, sizeof (used)); 2576 zdb_nicenum(ds->ds_compressed_bytes, compressed, sizeof (compressed)); 2577 zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed, 2578 sizeof (uncompressed)); 2579 zdb_nicenum(ds->ds_unique_bytes, unique, sizeof (unique)); 2580 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp); 2581 2582 (void) printf("\t\tdir_obj = %llu\n", 2583 (u_longlong_t)ds->ds_dir_obj); 2584 (void) printf("\t\tprev_snap_obj = %llu\n", 2585 (u_longlong_t)ds->ds_prev_snap_obj); 2586 (void) printf("\t\tprev_snap_txg = %llu\n", 2587 (u_longlong_t)ds->ds_prev_snap_txg); 2588 (void) printf("\t\tnext_snap_obj = %llu\n", 2589 (u_longlong_t)ds->ds_next_snap_obj); 2590 (void) printf("\t\tsnapnames_zapobj = %llu\n", 2591 (u_longlong_t)ds->ds_snapnames_zapobj); 2592 (void) printf("\t\tnum_children = %llu\n", 2593 (u_longlong_t)ds->ds_num_children); 2594 (void) printf("\t\tuserrefs_obj = %llu\n", 2595 (u_longlong_t)ds->ds_userrefs_obj); 2596 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 2597 (void) printf("\t\tcreation_txg = %llu\n", 2598 (u_longlong_t)ds->ds_creation_txg); 2599 (void) printf("\t\tdeadlist_obj = %llu\n", 2600 (u_longlong_t)ds->ds_deadlist_obj); 2601 (void) printf("\t\tused_bytes = %s\n", used); 2602 (void) printf("\t\tcompressed_bytes = %s\n", compressed); 2603 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); 2604 (void) printf("\t\tunique = %s\n", unique); 2605 (void) printf("\t\tfsid_guid = %llu\n", 2606 (u_longlong_t)ds->ds_fsid_guid); 2607 (void) printf("\t\tguid = %llu\n", 2608 (u_longlong_t)ds->ds_guid); 2609 (void) printf("\t\tflags = %llx\n", 2610 (u_longlong_t)ds->ds_flags); 2611 (void) printf("\t\tnext_clones_obj = %llu\n", 2612 (u_longlong_t)ds->ds_next_clones_obj); 2613 (void) printf("\t\tprops_obj = %llu\n", 2614 (u_longlong_t)ds->ds_props_obj); 2615 (void) printf("\t\tbp = %s\n", blkbuf); 2616 } 2617 2618 static int 2619 dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 2620 { 2621 (void) arg, (void) tx; 2622 char blkbuf[BP_SPRINTF_LEN]; 2623 2624 if (bp->blk_birth != 0) { 2625 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 2626 (void) printf("\t%s\n", blkbuf); 2627 } 2628 return (0); 2629 } 2630 2631 static void 2632 dump_bptree(objset_t *os, uint64_t obj, const char *name) 2633 { 2634 char bytes[32]; 2635 bptree_phys_t *bt; 2636 dmu_buf_t *db; 2637 2638 /* make sure nicenum has enough space */ 2639 _Static_assert(sizeof (bytes) >= NN_NUMBUF_SZ, "bytes truncated"); 2640 2641 if (dump_opt['d'] < 3) 2642 return; 2643 2644 VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); 2645 bt = db->db_data; 2646 zdb_nicenum(bt->bt_bytes, bytes, sizeof (bytes)); 2647 (void) printf("\n %s: %llu datasets, %s\n", 2648 name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes); 2649 dmu_buf_rele(db, FTAG); 2650 2651 if (dump_opt['d'] < 5) 2652 return; 2653 2654 (void) printf("\n"); 2655 2656 (void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL); 2657 } 2658 2659 static int 2660 dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx) 2661 { 2662 (void) arg, (void) tx; 2663 char blkbuf[BP_SPRINTF_LEN]; 2664 2665 ASSERT(bp->blk_birth != 0); 2666 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, bp_freed); 2667 (void) printf("\t%s\n", blkbuf); 2668 return (0); 2669 } 2670 2671 static void 2672 dump_full_bpobj(bpobj_t *bpo, const char *name, int indent) 2673 { 2674 char bytes[32]; 2675 char comp[32]; 2676 char uncomp[32]; 2677 uint64_t i; 2678 2679 /* make sure nicenum has enough space */ 2680 _Static_assert(sizeof (bytes) >= NN_NUMBUF_SZ, "bytes truncated"); 2681 _Static_assert(sizeof (comp) >= NN_NUMBUF_SZ, "comp truncated"); 2682 _Static_assert(sizeof (uncomp) >= NN_NUMBUF_SZ, "uncomp truncated"); 2683 2684 if (dump_opt['d'] < 3) 2685 return; 2686 2687 zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes, sizeof (bytes)); 2688 if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) { 2689 zdb_nicenum(bpo->bpo_phys->bpo_comp, comp, sizeof (comp)); 2690 zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp, sizeof (uncomp)); 2691 if (bpo->bpo_havefreed) { 2692 (void) printf(" %*s: object %llu, %llu local " 2693 "blkptrs, %llu freed, %llu subobjs in object %llu, " 2694 "%s (%s/%s comp)\n", 2695 indent * 8, name, 2696 (u_longlong_t)bpo->bpo_object, 2697 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, 2698 (u_longlong_t)bpo->bpo_phys->bpo_num_freed, 2699 (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs, 2700 (u_longlong_t)bpo->bpo_phys->bpo_subobjs, 2701 bytes, comp, uncomp); 2702 } else { 2703 (void) printf(" %*s: object %llu, %llu local " 2704 "blkptrs, %llu subobjs in object %llu, " 2705 "%s (%s/%s comp)\n", 2706 indent * 8, name, 2707 (u_longlong_t)bpo->bpo_object, 2708 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, 2709 (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs, 2710 (u_longlong_t)bpo->bpo_phys->bpo_subobjs, 2711 bytes, comp, uncomp); 2712 } 2713 2714 for (i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) { 2715 uint64_t subobj; 2716 bpobj_t subbpo; 2717 int error; 2718 VERIFY0(dmu_read(bpo->bpo_os, 2719 bpo->bpo_phys->bpo_subobjs, 2720 i * sizeof (subobj), sizeof (subobj), &subobj, 0)); 2721 error = bpobj_open(&subbpo, bpo->bpo_os, subobj); 2722 if (error != 0) { 2723 (void) printf("ERROR %u while trying to open " 2724 "subobj id %llu\n", 2725 error, (u_longlong_t)subobj); 2726 continue; 2727 } 2728 dump_full_bpobj(&subbpo, "subobj", indent + 1); 2729 bpobj_close(&subbpo); 2730 } 2731 } else { 2732 if (bpo->bpo_havefreed) { 2733 (void) printf(" %*s: object %llu, %llu blkptrs, " 2734 "%llu freed, %s\n", 2735 indent * 8, name, 2736 (u_longlong_t)bpo->bpo_object, 2737 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, 2738 (u_longlong_t)bpo->bpo_phys->bpo_num_freed, 2739 bytes); 2740 } else { 2741 (void) printf(" %*s: object %llu, %llu blkptrs, " 2742 "%s\n", 2743 indent * 8, name, 2744 (u_longlong_t)bpo->bpo_object, 2745 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, 2746 bytes); 2747 } 2748 } 2749 2750 if (dump_opt['d'] < 5) 2751 return; 2752 2753 2754 if (indent == 0) { 2755 (void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL); 2756 (void) printf("\n"); 2757 } 2758 } 2759 2760 static int 2761 dump_bookmark(dsl_pool_t *dp, char *name, boolean_t print_redact, 2762 boolean_t print_list) 2763 { 2764 int err = 0; 2765 zfs_bookmark_phys_t prop; 2766 objset_t *mos = dp->dp_spa->spa_meta_objset; 2767 err = dsl_bookmark_lookup(dp, name, NULL, &prop); 2768 2769 if (err != 0) { 2770 return (err); 2771 } 2772 2773 (void) printf("\t#%s: ", strchr(name, '#') + 1); 2774 (void) printf("{guid: %llx creation_txg: %llu creation_time: " 2775 "%llu redaction_obj: %llu}\n", (u_longlong_t)prop.zbm_guid, 2776 (u_longlong_t)prop.zbm_creation_txg, 2777 (u_longlong_t)prop.zbm_creation_time, 2778 (u_longlong_t)prop.zbm_redaction_obj); 2779 2780 IMPLY(print_list, print_redact); 2781 if (!print_redact || prop.zbm_redaction_obj == 0) 2782 return (0); 2783 2784 redaction_list_t *rl; 2785 VERIFY0(dsl_redaction_list_hold_obj(dp, 2786 prop.zbm_redaction_obj, FTAG, &rl)); 2787 2788 redaction_list_phys_t *rlp = rl->rl_phys; 2789 (void) printf("\tRedacted:\n\t\tProgress: "); 2790 if (rlp->rlp_last_object != UINT64_MAX || 2791 rlp->rlp_last_blkid != UINT64_MAX) { 2792 (void) printf("%llu %llu (incomplete)\n", 2793 (u_longlong_t)rlp->rlp_last_object, 2794 (u_longlong_t)rlp->rlp_last_blkid); 2795 } else { 2796 (void) printf("complete\n"); 2797 } 2798 (void) printf("\t\tSnapshots: ["); 2799 for (unsigned int i = 0; i < rlp->rlp_num_snaps; i++) { 2800 if (i > 0) 2801 (void) printf(", "); 2802 (void) printf("%0llu", 2803 (u_longlong_t)rlp->rlp_snaps[i]); 2804 } 2805 (void) printf("]\n\t\tLength: %llu\n", 2806 (u_longlong_t)rlp->rlp_num_entries); 2807 2808 if (!print_list) { 2809 dsl_redaction_list_rele(rl, FTAG); 2810 return (0); 2811 } 2812 2813 if (rlp->rlp_num_entries == 0) { 2814 dsl_redaction_list_rele(rl, FTAG); 2815 (void) printf("\t\tRedaction List: []\n\n"); 2816 return (0); 2817 } 2818 2819 redact_block_phys_t *rbp_buf; 2820 uint64_t size; 2821 dmu_object_info_t doi; 2822 2823 VERIFY0(dmu_object_info(mos, prop.zbm_redaction_obj, &doi)); 2824 size = doi.doi_max_offset; 2825 rbp_buf = kmem_alloc(size, KM_SLEEP); 2826 2827 err = dmu_read(mos, prop.zbm_redaction_obj, 0, size, 2828 rbp_buf, 0); 2829 if (err != 0) { 2830 dsl_redaction_list_rele(rl, FTAG); 2831 kmem_free(rbp_buf, size); 2832 return (err); 2833 } 2834 2835 (void) printf("\t\tRedaction List: [{object: %llx, offset: " 2836 "%llx, blksz: %x, count: %llx}", 2837 (u_longlong_t)rbp_buf[0].rbp_object, 2838 (u_longlong_t)rbp_buf[0].rbp_blkid, 2839 (uint_t)(redact_block_get_size(&rbp_buf[0])), 2840 (u_longlong_t)redact_block_get_count(&rbp_buf[0])); 2841 2842 for (size_t i = 1; i < rlp->rlp_num_entries; i++) { 2843 (void) printf(",\n\t\t{object: %llx, offset: %llx, " 2844 "blksz: %x, count: %llx}", 2845 (u_longlong_t)rbp_buf[i].rbp_object, 2846 (u_longlong_t)rbp_buf[i].rbp_blkid, 2847 (uint_t)(redact_block_get_size(&rbp_buf[i])), 2848 (u_longlong_t)redact_block_get_count(&rbp_buf[i])); 2849 } 2850 dsl_redaction_list_rele(rl, FTAG); 2851 kmem_free(rbp_buf, size); 2852 (void) printf("]\n\n"); 2853 return (0); 2854 } 2855 2856 static void 2857 dump_bookmarks(objset_t *os, int verbosity) 2858 { 2859 zap_cursor_t zc; 2860 zap_attribute_t attr; 2861 dsl_dataset_t *ds = dmu_objset_ds(os); 2862 dsl_pool_t *dp = spa_get_dsl(os->os_spa); 2863 objset_t *mos = os->os_spa->spa_meta_objset; 2864 if (verbosity < 4) 2865 return; 2866 dsl_pool_config_enter(dp, FTAG); 2867 2868 for (zap_cursor_init(&zc, mos, ds->ds_bookmarks_obj); 2869 zap_cursor_retrieve(&zc, &attr) == 0; 2870 zap_cursor_advance(&zc)) { 2871 char osname[ZFS_MAX_DATASET_NAME_LEN]; 2872 char buf[ZFS_MAX_DATASET_NAME_LEN]; 2873 int len; 2874 dmu_objset_name(os, osname); 2875 len = snprintf(buf, sizeof (buf), "%s#%s", osname, 2876 attr.za_name); 2877 VERIFY3S(len, <, ZFS_MAX_DATASET_NAME_LEN); 2878 (void) dump_bookmark(dp, buf, verbosity >= 5, verbosity >= 6); 2879 } 2880 zap_cursor_fini(&zc); 2881 dsl_pool_config_exit(dp, FTAG); 2882 } 2883 2884 static void 2885 bpobj_count_refd(bpobj_t *bpo) 2886 { 2887 mos_obj_refd(bpo->bpo_object); 2888 2889 if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) { 2890 mos_obj_refd(bpo->bpo_phys->bpo_subobjs); 2891 for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) { 2892 uint64_t subobj; 2893 bpobj_t subbpo; 2894 int error; 2895 VERIFY0(dmu_read(bpo->bpo_os, 2896 bpo->bpo_phys->bpo_subobjs, 2897 i * sizeof (subobj), sizeof (subobj), &subobj, 0)); 2898 error = bpobj_open(&subbpo, bpo->bpo_os, subobj); 2899 if (error != 0) { 2900 (void) printf("ERROR %u while trying to open " 2901 "subobj id %llu\n", 2902 error, (u_longlong_t)subobj); 2903 continue; 2904 } 2905 bpobj_count_refd(&subbpo); 2906 bpobj_close(&subbpo); 2907 } 2908 } 2909 } 2910 2911 static int 2912 dsl_deadlist_entry_count_refd(void *arg, dsl_deadlist_entry_t *dle) 2913 { 2914 spa_t *spa = arg; 2915 uint64_t empty_bpobj = spa->spa_dsl_pool->dp_empty_bpobj; 2916 if (dle->dle_bpobj.bpo_object != empty_bpobj) 2917 bpobj_count_refd(&dle->dle_bpobj); 2918 return (0); 2919 } 2920 2921 static int 2922 dsl_deadlist_entry_dump(void *arg, dsl_deadlist_entry_t *dle) 2923 { 2924 ASSERT(arg == NULL); 2925 if (dump_opt['d'] >= 5) { 2926 char buf[128]; 2927 (void) snprintf(buf, sizeof (buf), 2928 "mintxg %llu -> obj %llu", 2929 (longlong_t)dle->dle_mintxg, 2930 (longlong_t)dle->dle_bpobj.bpo_object); 2931 2932 dump_full_bpobj(&dle->dle_bpobj, buf, 0); 2933 } else { 2934 (void) printf("mintxg %llu -> obj %llu\n", 2935 (longlong_t)dle->dle_mintxg, 2936 (longlong_t)dle->dle_bpobj.bpo_object); 2937 } 2938 return (0); 2939 } 2940 2941 static void 2942 dump_blkptr_list(dsl_deadlist_t *dl, const char *name) 2943 { 2944 char bytes[32]; 2945 char comp[32]; 2946 char uncomp[32]; 2947 char entries[32]; 2948 spa_t *spa = dmu_objset_spa(dl->dl_os); 2949 uint64_t empty_bpobj = spa->spa_dsl_pool->dp_empty_bpobj; 2950 2951 if (dl->dl_oldfmt) { 2952 if (dl->dl_bpobj.bpo_object != empty_bpobj) 2953 bpobj_count_refd(&dl->dl_bpobj); 2954 } else { 2955 mos_obj_refd(dl->dl_object); 2956 dsl_deadlist_iterate(dl, dsl_deadlist_entry_count_refd, spa); 2957 } 2958 2959 /* make sure nicenum has enough space */ 2960 _Static_assert(sizeof (bytes) >= NN_NUMBUF_SZ, "bytes truncated"); 2961 _Static_assert(sizeof (comp) >= NN_NUMBUF_SZ, "comp truncated"); 2962 _Static_assert(sizeof (uncomp) >= NN_NUMBUF_SZ, "uncomp truncated"); 2963 _Static_assert(sizeof (entries) >= NN_NUMBUF_SZ, "entries truncated"); 2964 2965 if (dump_opt['d'] < 3) 2966 return; 2967 2968 if (dl->dl_oldfmt) { 2969 dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0); 2970 return; 2971 } 2972 2973 zdb_nicenum(dl->dl_phys->dl_used, bytes, sizeof (bytes)); 2974 zdb_nicenum(dl->dl_phys->dl_comp, comp, sizeof (comp)); 2975 zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp, sizeof (uncomp)); 2976 zdb_nicenum(avl_numnodes(&dl->dl_tree), entries, sizeof (entries)); 2977 (void) printf("\n %s: %s (%s/%s comp), %s entries\n", 2978 name, bytes, comp, uncomp, entries); 2979 2980 if (dump_opt['d'] < 4) 2981 return; 2982 2983 (void) putchar('\n'); 2984 2985 dsl_deadlist_iterate(dl, dsl_deadlist_entry_dump, NULL); 2986 } 2987 2988 static int 2989 verify_dd_livelist(objset_t *os) 2990 { 2991 uint64_t ll_used, used, ll_comp, comp, ll_uncomp, uncomp; 2992 dsl_pool_t *dp = spa_get_dsl(os->os_spa); 2993 dsl_dir_t *dd = os->os_dsl_dataset->ds_dir; 2994 2995 ASSERT(!dmu_objset_is_snapshot(os)); 2996 if (!dsl_deadlist_is_open(&dd->dd_livelist)) 2997 return (0); 2998 2999 /* Iterate through the livelist to check for duplicates */ 3000 dsl_deadlist_iterate(&dd->dd_livelist, sublivelist_verify_lightweight, 3001 NULL); 3002 3003 dsl_pool_config_enter(dp, FTAG); 3004 dsl_deadlist_space(&dd->dd_livelist, &ll_used, 3005 &ll_comp, &ll_uncomp); 3006 3007 dsl_dataset_t *origin_ds; 3008 ASSERT(dsl_pool_config_held(dp)); 3009 VERIFY0(dsl_dataset_hold_obj(dp, 3010 dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin_ds)); 3011 VERIFY0(dsl_dataset_space_written(origin_ds, os->os_dsl_dataset, 3012 &used, &comp, &uncomp)); 3013 dsl_dataset_rele(origin_ds, FTAG); 3014 dsl_pool_config_exit(dp, FTAG); 3015 /* 3016 * It's possible that the dataset's uncomp space is larger than the 3017 * livelist's because livelists do not track embedded block pointers 3018 */ 3019 if (used != ll_used || comp != ll_comp || uncomp < ll_uncomp) { 3020 char nice_used[32], nice_comp[32], nice_uncomp[32]; 3021 (void) printf("Discrepancy in space accounting:\n"); 3022 zdb_nicenum(used, nice_used, sizeof (nice_used)); 3023 zdb_nicenum(comp, nice_comp, sizeof (nice_comp)); 3024 zdb_nicenum(uncomp, nice_uncomp, sizeof (nice_uncomp)); 3025 (void) printf("dir: used %s, comp %s, uncomp %s\n", 3026 nice_used, nice_comp, nice_uncomp); 3027 zdb_nicenum(ll_used, nice_used, sizeof (nice_used)); 3028 zdb_nicenum(ll_comp, nice_comp, sizeof (nice_comp)); 3029 zdb_nicenum(ll_uncomp, nice_uncomp, sizeof (nice_uncomp)); 3030 (void) printf("livelist: used %s, comp %s, uncomp %s\n", 3031 nice_used, nice_comp, nice_uncomp); 3032 return (1); 3033 } 3034 return (0); 3035 } 3036 3037 static char *key_material = NULL; 3038 3039 static boolean_t 3040 zdb_derive_key(dsl_dir_t *dd, uint8_t *key_out) 3041 { 3042 uint64_t keyformat, salt, iters; 3043 int i; 3044 unsigned char c; 3045 3046 VERIFY0(zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, 3047 zfs_prop_to_name(ZFS_PROP_KEYFORMAT), sizeof (uint64_t), 3048 1, &keyformat)); 3049 3050 switch (keyformat) { 3051 case ZFS_KEYFORMAT_HEX: 3052 for (i = 0; i < WRAPPING_KEY_LEN * 2; i += 2) { 3053 if (!isxdigit(key_material[i]) || 3054 !isxdigit(key_material[i+1])) 3055 return (B_FALSE); 3056 if (sscanf(&key_material[i], "%02hhx", &c) != 1) 3057 return (B_FALSE); 3058 key_out[i / 2] = c; 3059 } 3060 break; 3061 3062 case ZFS_KEYFORMAT_PASSPHRASE: 3063 VERIFY0(zap_lookup(dd->dd_pool->dp_meta_objset, 3064 dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 3065 sizeof (uint64_t), 1, &salt)); 3066 VERIFY0(zap_lookup(dd->dd_pool->dp_meta_objset, 3067 dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 3068 sizeof (uint64_t), 1, &iters)); 3069 3070 if (PKCS5_PBKDF2_HMAC_SHA1(key_material, strlen(key_material), 3071 ((uint8_t *)&salt), sizeof (uint64_t), iters, 3072 WRAPPING_KEY_LEN, key_out) != 1) 3073 return (B_FALSE); 3074 3075 break; 3076 3077 default: 3078 fatal("no support for key format %u\n", 3079 (unsigned int) keyformat); 3080 } 3081 3082 return (B_TRUE); 3083 } 3084 3085 static char encroot[ZFS_MAX_DATASET_NAME_LEN]; 3086 static boolean_t key_loaded = B_FALSE; 3087 3088 static void 3089 zdb_load_key(objset_t *os) 3090 { 3091 dsl_pool_t *dp; 3092 dsl_dir_t *dd, *rdd; 3093 uint8_t key[WRAPPING_KEY_LEN]; 3094 uint64_t rddobj; 3095 int err; 3096 3097 dp = spa_get_dsl(os->os_spa); 3098 dd = os->os_dsl_dataset->ds_dir; 3099 3100 dsl_pool_config_enter(dp, FTAG); 3101 VERIFY0(zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, 3102 DSL_CRYPTO_KEY_ROOT_DDOBJ, sizeof (uint64_t), 1, &rddobj)); 3103 VERIFY0(dsl_dir_hold_obj(dd->dd_pool, rddobj, NULL, FTAG, &rdd)); 3104 dsl_dir_name(rdd, encroot); 3105 dsl_dir_rele(rdd, FTAG); 3106 3107 if (!zdb_derive_key(dd, key)) 3108 fatal("couldn't derive encryption key"); 3109 3110 dsl_pool_config_exit(dp, FTAG); 3111 3112 ASSERT3U(dsl_dataset_get_keystatus(dd), ==, ZFS_KEYSTATUS_UNAVAILABLE); 3113 3114 dsl_crypto_params_t *dcp; 3115 nvlist_t *crypto_args; 3116 3117 crypto_args = fnvlist_alloc(); 3118 fnvlist_add_uint8_array(crypto_args, "wkeydata", 3119 (uint8_t *)key, WRAPPING_KEY_LEN); 3120 VERIFY0(dsl_crypto_params_create_nvlist(DCP_CMD_NONE, 3121 NULL, crypto_args, &dcp)); 3122 err = spa_keystore_load_wkey(encroot, dcp, B_FALSE); 3123 3124 dsl_crypto_params_free(dcp, (err != 0)); 3125 fnvlist_free(crypto_args); 3126 3127 if (err != 0) 3128 fatal( 3129 "couldn't load encryption key for %s: %s", 3130 encroot, err == ZFS_ERR_CRYPTO_NOTSUP ? 3131 "crypto params not supported" : strerror(err)); 3132 3133 ASSERT3U(dsl_dataset_get_keystatus(dd), ==, ZFS_KEYSTATUS_AVAILABLE); 3134 3135 printf("Unlocked encryption root: %s\n", encroot); 3136 key_loaded = B_TRUE; 3137 } 3138 3139 static void 3140 zdb_unload_key(void) 3141 { 3142 if (!key_loaded) 3143 return; 3144 3145 VERIFY0(spa_keystore_unload_wkey(encroot)); 3146 key_loaded = B_FALSE; 3147 } 3148 3149 static avl_tree_t idx_tree; 3150 static avl_tree_t domain_tree; 3151 static boolean_t fuid_table_loaded; 3152 static objset_t *sa_os = NULL; 3153 static sa_attr_type_t *sa_attr_table = NULL; 3154 3155 static int 3156 open_objset(const char *path, const void *tag, objset_t **osp) 3157 { 3158 int err; 3159 uint64_t sa_attrs = 0; 3160 uint64_t version = 0; 3161 3162 VERIFY3P(sa_os, ==, NULL); 3163 3164 /* 3165 * We can't own an objset if it's redacted. Therefore, we do this 3166 * dance: hold the objset, then acquire a long hold on its dataset, then 3167 * release the pool (which is held as part of holding the objset). 3168 */ 3169 3170 if (dump_opt['K']) { 3171 /* decryption requested, try to load keys */ 3172 err = dmu_objset_hold(path, tag, osp); 3173 if (err != 0) { 3174 (void) fprintf(stderr, "failed to hold dataset " 3175 "'%s': %s\n", 3176 path, strerror(err)); 3177 return (err); 3178 } 3179 dsl_dataset_long_hold(dmu_objset_ds(*osp), tag); 3180 dsl_pool_rele(dmu_objset_pool(*osp), tag); 3181 3182 /* succeeds or dies */ 3183 zdb_load_key(*osp); 3184 3185 /* release it all */ 3186 dsl_dataset_long_rele(dmu_objset_ds(*osp), tag); 3187 dsl_dataset_rele(dmu_objset_ds(*osp), tag); 3188 } 3189 3190 int ds_hold_flags = key_loaded ? DS_HOLD_FLAG_DECRYPT : 0; 3191 3192 err = dmu_objset_hold_flags(path, ds_hold_flags, tag, osp); 3193 if (err != 0) { 3194 (void) fprintf(stderr, "failed to hold dataset '%s': %s\n", 3195 path, strerror(err)); 3196 return (err); 3197 } 3198 dsl_dataset_long_hold(dmu_objset_ds(*osp), tag); 3199 dsl_pool_rele(dmu_objset_pool(*osp), tag); 3200 3201 if (dmu_objset_type(*osp) == DMU_OST_ZFS && 3202 (key_loaded || !(*osp)->os_encrypted)) { 3203 (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR, 3204 8, 1, &version); 3205 if (version >= ZPL_VERSION_SA) { 3206 (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 3207 8, 1, &sa_attrs); 3208 } 3209 err = sa_setup(*osp, sa_attrs, zfs_attr_table, ZPL_END, 3210 &sa_attr_table); 3211 if (err != 0) { 3212 (void) fprintf(stderr, "sa_setup failed: %s\n", 3213 strerror(err)); 3214 dsl_dataset_long_rele(dmu_objset_ds(*osp), tag); 3215 dsl_dataset_rele_flags(dmu_objset_ds(*osp), 3216 ds_hold_flags, tag); 3217 *osp = NULL; 3218 } 3219 } 3220 sa_os = *osp; 3221 3222 return (err); 3223 } 3224 3225 static void 3226 close_objset(objset_t *os, const void *tag) 3227 { 3228 VERIFY3P(os, ==, sa_os); 3229 if (os->os_sa != NULL) 3230 sa_tear_down(os); 3231 dsl_dataset_long_rele(dmu_objset_ds(os), tag); 3232 dsl_dataset_rele_flags(dmu_objset_ds(os), 3233 key_loaded ? DS_HOLD_FLAG_DECRYPT : 0, tag); 3234 sa_attr_table = NULL; 3235 sa_os = NULL; 3236 3237 zdb_unload_key(); 3238 } 3239 3240 static void 3241 fuid_table_destroy(void) 3242 { 3243 if (fuid_table_loaded) { 3244 zfs_fuid_table_destroy(&idx_tree, &domain_tree); 3245 fuid_table_loaded = B_FALSE; 3246 } 3247 } 3248 3249 /* 3250 * print uid or gid information. 3251 * For normal POSIX id just the id is printed in decimal format. 3252 * For CIFS files with FUID the fuid is printed in hex followed by 3253 * the domain-rid string. 3254 */ 3255 static void 3256 print_idstr(uint64_t id, const char *id_type) 3257 { 3258 if (FUID_INDEX(id)) { 3259 const char *domain = 3260 zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id)); 3261 (void) printf("\t%s %llx [%s-%d]\n", id_type, 3262 (u_longlong_t)id, domain, (int)FUID_RID(id)); 3263 } else { 3264 (void) printf("\t%s %llu\n", id_type, (u_longlong_t)id); 3265 } 3266 3267 } 3268 3269 static void 3270 dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid) 3271 { 3272 uint32_t uid_idx, gid_idx; 3273 3274 uid_idx = FUID_INDEX(uid); 3275 gid_idx = FUID_INDEX(gid); 3276 3277 /* Load domain table, if not already loaded */ 3278 if (!fuid_table_loaded && (uid_idx || gid_idx)) { 3279 uint64_t fuid_obj; 3280 3281 /* first find the fuid object. It lives in the master node */ 3282 VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 3283 8, 1, &fuid_obj) == 0); 3284 zfs_fuid_avl_tree_create(&idx_tree, &domain_tree); 3285 (void) zfs_fuid_table_load(os, fuid_obj, 3286 &idx_tree, &domain_tree); 3287 fuid_table_loaded = B_TRUE; 3288 } 3289 3290 print_idstr(uid, "uid"); 3291 print_idstr(gid, "gid"); 3292 } 3293 3294 static void 3295 dump_znode_sa_xattr(sa_handle_t *hdl) 3296 { 3297 nvlist_t *sa_xattr; 3298 nvpair_t *elem = NULL; 3299 int sa_xattr_size = 0; 3300 int sa_xattr_entries = 0; 3301 int error; 3302 char *sa_xattr_packed; 3303 3304 error = sa_size(hdl, sa_attr_table[ZPL_DXATTR], &sa_xattr_size); 3305 if (error || sa_xattr_size == 0) 3306 return; 3307 3308 sa_xattr_packed = malloc(sa_xattr_size); 3309 if (sa_xattr_packed == NULL) 3310 return; 3311 3312 error = sa_lookup(hdl, sa_attr_table[ZPL_DXATTR], 3313 sa_xattr_packed, sa_xattr_size); 3314 if (error) { 3315 free(sa_xattr_packed); 3316 return; 3317 } 3318 3319 error = nvlist_unpack(sa_xattr_packed, sa_xattr_size, &sa_xattr, 0); 3320 if (error) { 3321 free(sa_xattr_packed); 3322 return; 3323 } 3324 3325 while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) 3326 sa_xattr_entries++; 3327 3328 (void) printf("\tSA xattrs: %d bytes, %d entries\n\n", 3329 sa_xattr_size, sa_xattr_entries); 3330 while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) { 3331 boolean_t can_print = !dump_opt['P']; 3332 uchar_t *value; 3333 uint_t cnt, idx; 3334 3335 (void) printf("\t\t%s = ", nvpair_name(elem)); 3336 nvpair_value_byte_array(elem, &value, &cnt); 3337 3338 for (idx = 0; idx < cnt; ++idx) { 3339 if (!isprint(value[idx])) { 3340 can_print = B_FALSE; 3341 break; 3342 } 3343 } 3344 3345 for (idx = 0; idx < cnt; ++idx) { 3346 if (can_print) 3347 (void) putchar(value[idx]); 3348 else 3349 (void) printf("\\%3.3o", value[idx]); 3350 } 3351 (void) putchar('\n'); 3352 } 3353 3354 nvlist_free(sa_xattr); 3355 free(sa_xattr_packed); 3356 } 3357 3358 static void 3359 dump_znode_symlink(sa_handle_t *hdl) 3360 { 3361 int sa_symlink_size = 0; 3362 char linktarget[MAXPATHLEN]; 3363 int error; 3364 3365 error = sa_size(hdl, sa_attr_table[ZPL_SYMLINK], &sa_symlink_size); 3366 if (error || sa_symlink_size == 0) { 3367 return; 3368 } 3369 if (sa_symlink_size >= sizeof (linktarget)) { 3370 (void) printf("symlink size %d is too large\n", 3371 sa_symlink_size); 3372 return; 3373 } 3374 linktarget[sa_symlink_size] = '\0'; 3375 if (sa_lookup(hdl, sa_attr_table[ZPL_SYMLINK], 3376 &linktarget, sa_symlink_size) == 0) 3377 (void) printf("\ttarget %s\n", linktarget); 3378 } 3379 3380 static void 3381 dump_znode(objset_t *os, uint64_t object, void *data, size_t size) 3382 { 3383 (void) data, (void) size; 3384 char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */ 3385 sa_handle_t *hdl; 3386 uint64_t xattr, rdev, gen; 3387 uint64_t uid, gid, mode, fsize, parent, links; 3388 uint64_t pflags; 3389 uint64_t acctm[2], modtm[2], chgtm[2], crtm[2]; 3390 time_t z_crtime, z_atime, z_mtime, z_ctime; 3391 sa_bulk_attr_t bulk[12]; 3392 int idx = 0; 3393 int error; 3394 3395 VERIFY3P(os, ==, sa_os); 3396 if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) { 3397 (void) printf("Failed to get handle for SA znode\n"); 3398 return; 3399 } 3400 3401 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8); 3402 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8); 3403 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL, 3404 &links, 8); 3405 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8); 3406 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL, 3407 &mode, 8); 3408 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT], 3409 NULL, &parent, 8); 3410 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL, 3411 &fsize, 8); 3412 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL, 3413 acctm, 16); 3414 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL, 3415 modtm, 16); 3416 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL, 3417 crtm, 16); 3418 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL, 3419 chgtm, 16); 3420 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL, 3421 &pflags, 8); 3422 3423 if (sa_bulk_lookup(hdl, bulk, idx)) { 3424 (void) sa_handle_destroy(hdl); 3425 return; 3426 } 3427 3428 z_crtime = (time_t)crtm[0]; 3429 z_atime = (time_t)acctm[0]; 3430 z_mtime = (time_t)modtm[0]; 3431 z_ctime = (time_t)chgtm[0]; 3432 3433 if (dump_opt['d'] > 4) { 3434 error = zfs_obj_to_path(os, object, path, sizeof (path)); 3435 if (error == ESTALE) { 3436 (void) snprintf(path, sizeof (path), "on delete queue"); 3437 } else if (error != 0) { 3438 leaked_objects++; 3439 (void) snprintf(path, sizeof (path), 3440 "path not found, possibly leaked"); 3441 } 3442 (void) printf("\tpath %s\n", path); 3443 } 3444 3445 if (S_ISLNK(mode)) 3446 dump_znode_symlink(hdl); 3447 dump_uidgid(os, uid, gid); 3448 (void) printf("\tatime %s", ctime(&z_atime)); 3449 (void) printf("\tmtime %s", ctime(&z_mtime)); 3450 (void) printf("\tctime %s", ctime(&z_ctime)); 3451 (void) printf("\tcrtime %s", ctime(&z_crtime)); 3452 (void) printf("\tgen %llu\n", (u_longlong_t)gen); 3453 (void) printf("\tmode %llo\n", (u_longlong_t)mode); 3454 (void) printf("\tsize %llu\n", (u_longlong_t)fsize); 3455 (void) printf("\tparent %llu\n", (u_longlong_t)parent); 3456 (void) printf("\tlinks %llu\n", (u_longlong_t)links); 3457 (void) printf("\tpflags %llx\n", (u_longlong_t)pflags); 3458 if (dmu_objset_projectquota_enabled(os) && (pflags & ZFS_PROJID)) { 3459 uint64_t projid; 3460 3461 if (sa_lookup(hdl, sa_attr_table[ZPL_PROJID], &projid, 3462 sizeof (uint64_t)) == 0) 3463 (void) printf("\tprojid %llu\n", (u_longlong_t)projid); 3464 } 3465 if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr, 3466 sizeof (uint64_t)) == 0) 3467 (void) printf("\txattr %llu\n", (u_longlong_t)xattr); 3468 if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev, 3469 sizeof (uint64_t)) == 0) 3470 (void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev); 3471 dump_znode_sa_xattr(hdl); 3472 sa_handle_destroy(hdl); 3473 } 3474 3475 static void 3476 dump_acl(objset_t *os, uint64_t object, void *data, size_t size) 3477 { 3478 (void) os, (void) object, (void) data, (void) size; 3479 } 3480 3481 static void 3482 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size) 3483 { 3484 (void) os, (void) object, (void) data, (void) size; 3485 } 3486 3487 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = { 3488 dump_none, /* unallocated */ 3489 dump_zap, /* object directory */ 3490 dump_uint64, /* object array */ 3491 dump_none, /* packed nvlist */ 3492 dump_packed_nvlist, /* packed nvlist size */ 3493 dump_none, /* bpobj */ 3494 dump_bpobj, /* bpobj header */ 3495 dump_none, /* SPA space map header */ 3496 dump_none, /* SPA space map */ 3497 dump_none, /* ZIL intent log */ 3498 dump_dnode, /* DMU dnode */ 3499 dump_dmu_objset, /* DMU objset */ 3500 dump_dsl_dir, /* DSL directory */ 3501 dump_zap, /* DSL directory child map */ 3502 dump_zap, /* DSL dataset snap map */ 3503 dump_zap, /* DSL props */ 3504 dump_dsl_dataset, /* DSL dataset */ 3505 dump_znode, /* ZFS znode */ 3506 dump_acl, /* ZFS V0 ACL */ 3507 dump_uint8, /* ZFS plain file */ 3508 dump_zpldir, /* ZFS directory */ 3509 dump_zap, /* ZFS master node */ 3510 dump_zap, /* ZFS delete queue */ 3511 dump_uint8, /* zvol object */ 3512 dump_zap, /* zvol prop */ 3513 dump_uint8, /* other uint8[] */ 3514 dump_uint64, /* other uint64[] */ 3515 dump_zap, /* other ZAP */ 3516 dump_zap, /* persistent error log */ 3517 dump_uint8, /* SPA history */ 3518 dump_history_offsets, /* SPA history offsets */ 3519 dump_zap, /* Pool properties */ 3520 dump_zap, /* DSL permissions */ 3521 dump_acl, /* ZFS ACL */ 3522 dump_uint8, /* ZFS SYSACL */ 3523 dump_none, /* FUID nvlist */ 3524 dump_packed_nvlist, /* FUID nvlist size */ 3525 dump_zap, /* DSL dataset next clones */ 3526 dump_zap, /* DSL scrub queue */ 3527 dump_zap, /* ZFS user/group/project used */ 3528 dump_zap, /* ZFS user/group/project quota */ 3529 dump_zap, /* snapshot refcount tags */ 3530 dump_ddt_zap, /* DDT ZAP object */ 3531 dump_zap, /* DDT statistics */ 3532 dump_znode, /* SA object */ 3533 dump_zap, /* SA Master Node */ 3534 dump_sa_attrs, /* SA attribute registration */ 3535 dump_sa_layouts, /* SA attribute layouts */ 3536 dump_zap, /* DSL scrub translations */ 3537 dump_none, /* fake dedup BP */ 3538 dump_zap, /* deadlist */ 3539 dump_none, /* deadlist hdr */ 3540 dump_zap, /* dsl clones */ 3541 dump_bpobj_subobjs, /* bpobj subobjs */ 3542 dump_unknown, /* Unknown type, must be last */ 3543 }; 3544 3545 static boolean_t 3546 match_object_type(dmu_object_type_t obj_type, uint64_t flags) 3547 { 3548 boolean_t match = B_TRUE; 3549 3550 switch (obj_type) { 3551 case DMU_OT_DIRECTORY_CONTENTS: 3552 if (!(flags & ZOR_FLAG_DIRECTORY)) 3553 match = B_FALSE; 3554 break; 3555 case DMU_OT_PLAIN_FILE_CONTENTS: 3556 if (!(flags & ZOR_FLAG_PLAIN_FILE)) 3557 match = B_FALSE; 3558 break; 3559 case DMU_OT_SPACE_MAP: 3560 if (!(flags & ZOR_FLAG_SPACE_MAP)) 3561 match = B_FALSE; 3562 break; 3563 default: 3564 if (strcmp(zdb_ot_name(obj_type), "zap") == 0) { 3565 if (!(flags & ZOR_FLAG_ZAP)) 3566 match = B_FALSE; 3567 break; 3568 } 3569 3570 /* 3571 * If all bits except some of the supported flags are 3572 * set, the user combined the all-types flag (A) with 3573 * a negated flag to exclude some types (e.g. A-f to 3574 * show all object types except plain files). 3575 */ 3576 if ((flags | ZOR_SUPPORTED_FLAGS) != ZOR_FLAG_ALL_TYPES) 3577 match = B_FALSE; 3578 3579 break; 3580 } 3581 3582 return (match); 3583 } 3584 3585 static void 3586 dump_object(objset_t *os, uint64_t object, int verbosity, 3587 boolean_t *print_header, uint64_t *dnode_slots_used, uint64_t flags) 3588 { 3589 dmu_buf_t *db = NULL; 3590 dmu_object_info_t doi; 3591 dnode_t *dn; 3592 boolean_t dnode_held = B_FALSE; 3593 void *bonus = NULL; 3594 size_t bsize = 0; 3595 char iblk[32], dblk[32], lsize[32], asize[32], fill[32], dnsize[32]; 3596 char bonus_size[32]; 3597 char aux[50]; 3598 int error; 3599 3600 /* make sure nicenum has enough space */ 3601 _Static_assert(sizeof (iblk) >= NN_NUMBUF_SZ, "iblk truncated"); 3602 _Static_assert(sizeof (dblk) >= NN_NUMBUF_SZ, "dblk truncated"); 3603 _Static_assert(sizeof (lsize) >= NN_NUMBUF_SZ, "lsize truncated"); 3604 _Static_assert(sizeof (asize) >= NN_NUMBUF_SZ, "asize truncated"); 3605 _Static_assert(sizeof (bonus_size) >= NN_NUMBUF_SZ, 3606 "bonus_size truncated"); 3607 3608 if (*print_header) { 3609 (void) printf("\n%10s %3s %5s %5s %5s %6s %5s %6s %s\n", 3610 "Object", "lvl", "iblk", "dblk", "dsize", "dnsize", 3611 "lsize", "%full", "type"); 3612 *print_header = 0; 3613 } 3614 3615 if (object == 0) { 3616 dn = DMU_META_DNODE(os); 3617 dmu_object_info_from_dnode(dn, &doi); 3618 } else { 3619 /* 3620 * Encrypted datasets will have sensitive bonus buffers 3621 * encrypted. Therefore we cannot hold the bonus buffer and 3622 * must hold the dnode itself instead. 3623 */ 3624 error = dmu_object_info(os, object, &doi); 3625 if (error) 3626 fatal("dmu_object_info() failed, errno %u", error); 3627 3628 if (!key_loaded && os->os_encrypted && 3629 DMU_OT_IS_ENCRYPTED(doi.doi_bonus_type)) { 3630 error = dnode_hold(os, object, FTAG, &dn); 3631 if (error) 3632 fatal("dnode_hold() failed, errno %u", error); 3633 dnode_held = B_TRUE; 3634 } else { 3635 error = dmu_bonus_hold(os, object, FTAG, &db); 3636 if (error) 3637 fatal("dmu_bonus_hold(%llu) failed, errno %u", 3638 object, error); 3639 bonus = db->db_data; 3640 bsize = db->db_size; 3641 dn = DB_DNODE((dmu_buf_impl_t *)db); 3642 } 3643 } 3644 3645 /* 3646 * Default to showing all object types if no flags were specified. 3647 */ 3648 if (flags != 0 && flags != ZOR_FLAG_ALL_TYPES && 3649 !match_object_type(doi.doi_type, flags)) 3650 goto out; 3651 3652 if (dnode_slots_used) 3653 *dnode_slots_used = doi.doi_dnodesize / DNODE_MIN_SIZE; 3654 3655 zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk)); 3656 zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk)); 3657 zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize)); 3658 zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof (asize)); 3659 zdb_nicenum(doi.doi_bonus_size, bonus_size, sizeof (bonus_size)); 3660 zdb_nicenum(doi.doi_dnodesize, dnsize, sizeof (dnsize)); 3661 (void) snprintf(fill, sizeof (fill), "%6.2f", 100.0 * 3662 doi.doi_fill_count * doi.doi_data_block_size / (object == 0 ? 3663 DNODES_PER_BLOCK : 1) / doi.doi_max_offset); 3664 3665 aux[0] = '\0'; 3666 3667 if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) { 3668 (void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux), 3669 " (K=%s)", ZDB_CHECKSUM_NAME(doi.doi_checksum)); 3670 } 3671 3672 if (doi.doi_compress == ZIO_COMPRESS_INHERIT && 3673 ZIO_COMPRESS_HASLEVEL(os->os_compress) && verbosity >= 6) { 3674 const char *compname = NULL; 3675 if (zfs_prop_index_to_string(ZFS_PROP_COMPRESSION, 3676 ZIO_COMPRESS_RAW(os->os_compress, os->os_complevel), 3677 &compname) == 0) { 3678 (void) snprintf(aux + strlen(aux), 3679 sizeof (aux) - strlen(aux), " (Z=inherit=%s)", 3680 compname); 3681 } else { 3682 (void) snprintf(aux + strlen(aux), 3683 sizeof (aux) - strlen(aux), 3684 " (Z=inherit=%s-unknown)", 3685 ZDB_COMPRESS_NAME(os->os_compress)); 3686 } 3687 } else if (doi.doi_compress == ZIO_COMPRESS_INHERIT && verbosity >= 6) { 3688 (void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux), 3689 " (Z=inherit=%s)", ZDB_COMPRESS_NAME(os->os_compress)); 3690 } else if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) { 3691 (void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux), 3692 " (Z=%s)", ZDB_COMPRESS_NAME(doi.doi_compress)); 3693 } 3694 3695 (void) printf("%10lld %3u %5s %5s %5s %6s %5s %6s %s%s\n", 3696 (u_longlong_t)object, doi.doi_indirection, iblk, dblk, 3697 asize, dnsize, lsize, fill, zdb_ot_name(doi.doi_type), aux); 3698 3699 if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) { 3700 (void) printf("%10s %3s %5s %5s %5s %5s %5s %6s %s\n", 3701 "", "", "", "", "", "", bonus_size, "bonus", 3702 zdb_ot_name(doi.doi_bonus_type)); 3703 } 3704 3705 if (verbosity >= 4) { 3706 (void) printf("\tdnode flags: %s%s%s%s\n", 3707 (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ? 3708 "USED_BYTES " : "", 3709 (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ? 3710 "USERUSED_ACCOUNTED " : "", 3711 (dn->dn_phys->dn_flags & DNODE_FLAG_USEROBJUSED_ACCOUNTED) ? 3712 "USEROBJUSED_ACCOUNTED " : "", 3713 (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? 3714 "SPILL_BLKPTR" : ""); 3715 (void) printf("\tdnode maxblkid: %llu\n", 3716 (longlong_t)dn->dn_phys->dn_maxblkid); 3717 3718 if (!dnode_held) { 3719 object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, 3720 object, bonus, bsize); 3721 } else { 3722 (void) printf("\t\t(bonus encrypted)\n"); 3723 } 3724 3725 if (key_loaded || 3726 (!os->os_encrypted || !DMU_OT_IS_ENCRYPTED(doi.doi_type))) { 3727 object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, 3728 NULL, 0); 3729 } else { 3730 (void) printf("\t\t(object encrypted)\n"); 3731 } 3732 3733 *print_header = B_TRUE; 3734 } 3735 3736 if (verbosity >= 5) { 3737 if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { 3738 char blkbuf[BP_SPRINTF_LEN]; 3739 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), 3740 DN_SPILL_BLKPTR(dn->dn_phys), B_FALSE); 3741 (void) printf("\nSpill block: %s\n", blkbuf); 3742 } 3743 dump_indirect(dn); 3744 } 3745 3746 if (verbosity >= 5) { 3747 /* 3748 * Report the list of segments that comprise the object. 3749 */ 3750 uint64_t start = 0; 3751 uint64_t end; 3752 uint64_t blkfill = 1; 3753 int minlvl = 1; 3754 3755 if (dn->dn_type == DMU_OT_DNODE) { 3756 minlvl = 0; 3757 blkfill = DNODES_PER_BLOCK; 3758 } 3759 3760 for (;;) { 3761 char segsize[32]; 3762 /* make sure nicenum has enough space */ 3763 _Static_assert(sizeof (segsize) >= NN_NUMBUF_SZ, 3764 "segsize truncated"); 3765 error = dnode_next_offset(dn, 3766 0, &start, minlvl, blkfill, 0); 3767 if (error) 3768 break; 3769 end = start; 3770 error = dnode_next_offset(dn, 3771 DNODE_FIND_HOLE, &end, minlvl, blkfill, 0); 3772 zdb_nicenum(end - start, segsize, sizeof (segsize)); 3773 (void) printf("\t\tsegment [%016llx, %016llx)" 3774 " size %5s\n", (u_longlong_t)start, 3775 (u_longlong_t)end, segsize); 3776 if (error) 3777 break; 3778 start = end; 3779 } 3780 } 3781 3782 out: 3783 if (db != NULL) 3784 dmu_buf_rele(db, FTAG); 3785 if (dnode_held) 3786 dnode_rele(dn, FTAG); 3787 } 3788 3789 static void 3790 count_dir_mos_objects(dsl_dir_t *dd) 3791 { 3792 mos_obj_refd(dd->dd_object); 3793 mos_obj_refd(dsl_dir_phys(dd)->dd_child_dir_zapobj); 3794 mos_obj_refd(dsl_dir_phys(dd)->dd_deleg_zapobj); 3795 mos_obj_refd(dsl_dir_phys(dd)->dd_props_zapobj); 3796 mos_obj_refd(dsl_dir_phys(dd)->dd_clones); 3797 3798 /* 3799 * The dd_crypto_obj can be referenced by multiple dsl_dir's. 3800 * Ignore the references after the first one. 3801 */ 3802 mos_obj_refd_multiple(dd->dd_crypto_obj); 3803 } 3804 3805 static void 3806 count_ds_mos_objects(dsl_dataset_t *ds) 3807 { 3808 mos_obj_refd(ds->ds_object); 3809 mos_obj_refd(dsl_dataset_phys(ds)->ds_next_clones_obj); 3810 mos_obj_refd(dsl_dataset_phys(ds)->ds_props_obj); 3811 mos_obj_refd(dsl_dataset_phys(ds)->ds_userrefs_obj); 3812 mos_obj_refd(dsl_dataset_phys(ds)->ds_snapnames_zapobj); 3813 mos_obj_refd(ds->ds_bookmarks_obj); 3814 3815 if (!dsl_dataset_is_snapshot(ds)) { 3816 count_dir_mos_objects(ds->ds_dir); 3817 } 3818 } 3819 3820 static const char *const objset_types[DMU_OST_NUMTYPES] = { 3821 "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" }; 3822 3823 /* 3824 * Parse a string denoting a range of object IDs of the form 3825 * <start>[:<end>[:flags]], and store the results in zor. 3826 * Return 0 on success. On error, return 1 and update the msg 3827 * pointer to point to a descriptive error message. 3828 */ 3829 static int 3830 parse_object_range(char *range, zopt_object_range_t *zor, const char **msg) 3831 { 3832 uint64_t flags = 0; 3833 char *p, *s, *dup, *flagstr, *tmp = NULL; 3834 size_t len; 3835 int i; 3836 int rc = 0; 3837 3838 if (strchr(range, ':') == NULL) { 3839 zor->zor_obj_start = strtoull(range, &p, 0); 3840 if (*p != '\0') { 3841 *msg = "Invalid characters in object ID"; 3842 rc = 1; 3843 } 3844 zor->zor_obj_start = ZDB_MAP_OBJECT_ID(zor->zor_obj_start); 3845 zor->zor_obj_end = zor->zor_obj_start; 3846 return (rc); 3847 } 3848 3849 if (strchr(range, ':') == range) { 3850 *msg = "Invalid leading colon"; 3851 rc = 1; 3852 return (rc); 3853 } 3854 3855 len = strlen(range); 3856 if (range[len - 1] == ':') { 3857 *msg = "Invalid trailing colon"; 3858 rc = 1; 3859 return (rc); 3860 } 3861 3862 dup = strdup(range); 3863 s = strtok_r(dup, ":", &tmp); 3864 zor->zor_obj_start = strtoull(s, &p, 0); 3865 3866 if (*p != '\0') { 3867 *msg = "Invalid characters in start object ID"; 3868 rc = 1; 3869 goto out; 3870 } 3871 3872 s = strtok_r(NULL, ":", &tmp); 3873 zor->zor_obj_end = strtoull(s, &p, 0); 3874 3875 if (*p != '\0') { 3876 *msg = "Invalid characters in end object ID"; 3877 rc = 1; 3878 goto out; 3879 } 3880 3881 if (zor->zor_obj_start > zor->zor_obj_end) { 3882 *msg = "Start object ID may not exceed end object ID"; 3883 rc = 1; 3884 goto out; 3885 } 3886 3887 s = strtok_r(NULL, ":", &tmp); 3888 if (s == NULL) { 3889 zor->zor_flags = ZOR_FLAG_ALL_TYPES; 3890 goto out; 3891 } else if (strtok_r(NULL, ":", &tmp) != NULL) { 3892 *msg = "Invalid colon-delimited field after flags"; 3893 rc = 1; 3894 goto out; 3895 } 3896 3897 flagstr = s; 3898 for (i = 0; flagstr[i]; i++) { 3899 int bit; 3900 boolean_t negation = (flagstr[i] == '-'); 3901 3902 if (negation) { 3903 i++; 3904 if (flagstr[i] == '\0') { 3905 *msg = "Invalid trailing negation operator"; 3906 rc = 1; 3907 goto out; 3908 } 3909 } 3910 bit = flagbits[(uchar_t)flagstr[i]]; 3911 if (bit == 0) { 3912 *msg = "Invalid flag"; 3913 rc = 1; 3914 goto out; 3915 } 3916 if (negation) 3917 flags &= ~bit; 3918 else 3919 flags |= bit; 3920 } 3921 zor->zor_flags = flags; 3922 3923 zor->zor_obj_start = ZDB_MAP_OBJECT_ID(zor->zor_obj_start); 3924 zor->zor_obj_end = ZDB_MAP_OBJECT_ID(zor->zor_obj_end); 3925 3926 out: 3927 free(dup); 3928 return (rc); 3929 } 3930 3931 static void 3932 dump_objset(objset_t *os) 3933 { 3934 dmu_objset_stats_t dds = { 0 }; 3935 uint64_t object, object_count; 3936 uint64_t refdbytes, usedobjs, scratch; 3937 char numbuf[32]; 3938 char blkbuf[BP_SPRINTF_LEN + 20]; 3939 char osname[ZFS_MAX_DATASET_NAME_LEN]; 3940 const char *type = "UNKNOWN"; 3941 int verbosity = dump_opt['d']; 3942 boolean_t print_header; 3943 unsigned i; 3944 int error; 3945 uint64_t total_slots_used = 0; 3946 uint64_t max_slot_used = 0; 3947 uint64_t dnode_slots; 3948 uint64_t obj_start; 3949 uint64_t obj_end; 3950 uint64_t flags; 3951 3952 /* make sure nicenum has enough space */ 3953 _Static_assert(sizeof (numbuf) >= NN_NUMBUF_SZ, "numbuf truncated"); 3954 3955 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 3956 dmu_objset_fast_stat(os, &dds); 3957 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 3958 3959 print_header = B_TRUE; 3960 3961 if (dds.dds_type < DMU_OST_NUMTYPES) 3962 type = objset_types[dds.dds_type]; 3963 3964 if (dds.dds_type == DMU_OST_META) { 3965 dds.dds_creation_txg = TXG_INITIAL; 3966 usedobjs = BP_GET_FILL(os->os_rootbp); 3967 refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)-> 3968 dd_used_bytes; 3969 } else { 3970 dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch); 3971 } 3972 3973 ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp)); 3974 3975 zdb_nicenum(refdbytes, numbuf, sizeof (numbuf)); 3976 3977 if (verbosity >= 4) { 3978 (void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp "); 3979 (void) snprintf_blkptr(blkbuf + strlen(blkbuf), 3980 sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp); 3981 } else { 3982 blkbuf[0] = '\0'; 3983 } 3984 3985 dmu_objset_name(os, osname); 3986 3987 (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, " 3988 "%s, %llu objects%s%s\n", 3989 osname, type, (u_longlong_t)dmu_objset_id(os), 3990 (u_longlong_t)dds.dds_creation_txg, 3991 numbuf, (u_longlong_t)usedobjs, blkbuf, 3992 (dds.dds_inconsistent) ? " (inconsistent)" : ""); 3993 3994 for (i = 0; i < zopt_object_args; i++) { 3995 obj_start = zopt_object_ranges[i].zor_obj_start; 3996 obj_end = zopt_object_ranges[i].zor_obj_end; 3997 flags = zopt_object_ranges[i].zor_flags; 3998 3999 object = obj_start; 4000 if (object == 0 || obj_start == obj_end) 4001 dump_object(os, object, verbosity, &print_header, NULL, 4002 flags); 4003 else 4004 object--; 4005 4006 while ((dmu_object_next(os, &object, B_FALSE, 0) == 0) && 4007 object <= obj_end) { 4008 dump_object(os, object, verbosity, &print_header, NULL, 4009 flags); 4010 } 4011 } 4012 4013 if (zopt_object_args > 0) { 4014 (void) printf("\n"); 4015 return; 4016 } 4017 4018 if (dump_opt['i'] != 0 || verbosity >= 2) 4019 dump_intent_log(dmu_objset_zil(os)); 4020 4021 if (dmu_objset_ds(os) != NULL) { 4022 dsl_dataset_t *ds = dmu_objset_ds(os); 4023 dump_blkptr_list(&ds->ds_deadlist, "Deadlist"); 4024 if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) && 4025 !dmu_objset_is_snapshot(os)) { 4026 dump_blkptr_list(&ds->ds_dir->dd_livelist, "Livelist"); 4027 if (verify_dd_livelist(os) != 0) 4028 fatal("livelist is incorrect"); 4029 } 4030 4031 if (dsl_dataset_remap_deadlist_exists(ds)) { 4032 (void) printf("ds_remap_deadlist:\n"); 4033 dump_blkptr_list(&ds->ds_remap_deadlist, "Deadlist"); 4034 } 4035 count_ds_mos_objects(ds); 4036 } 4037 4038 if (dmu_objset_ds(os) != NULL) 4039 dump_bookmarks(os, verbosity); 4040 4041 if (verbosity < 2) 4042 return; 4043 4044 if (BP_IS_HOLE(os->os_rootbp)) 4045 return; 4046 4047 dump_object(os, 0, verbosity, &print_header, NULL, 0); 4048 object_count = 0; 4049 if (DMU_USERUSED_DNODE(os) != NULL && 4050 DMU_USERUSED_DNODE(os)->dn_type != 0) { 4051 dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header, 4052 NULL, 0); 4053 dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header, 4054 NULL, 0); 4055 } 4056 4057 if (DMU_PROJECTUSED_DNODE(os) != NULL && 4058 DMU_PROJECTUSED_DNODE(os)->dn_type != 0) 4059 dump_object(os, DMU_PROJECTUSED_OBJECT, verbosity, 4060 &print_header, NULL, 0); 4061 4062 object = 0; 4063 while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) { 4064 dump_object(os, object, verbosity, &print_header, &dnode_slots, 4065 0); 4066 object_count++; 4067 total_slots_used += dnode_slots; 4068 max_slot_used = object + dnode_slots - 1; 4069 } 4070 4071 (void) printf("\n"); 4072 4073 (void) printf(" Dnode slots:\n"); 4074 (void) printf("\tTotal used: %10llu\n", 4075 (u_longlong_t)total_slots_used); 4076 (void) printf("\tMax used: %10llu\n", 4077 (u_longlong_t)max_slot_used); 4078 (void) printf("\tPercent empty: %10lf\n", 4079 (double)(max_slot_used - total_slots_used)*100 / 4080 (double)max_slot_used); 4081 (void) printf("\n"); 4082 4083 if (error != ESRCH) { 4084 (void) fprintf(stderr, "dmu_object_next() = %d\n", error); 4085 abort(); 4086 } 4087 4088 ASSERT3U(object_count, ==, usedobjs); 4089 4090 if (leaked_objects != 0) { 4091 (void) printf("%d potentially leaked objects detected\n", 4092 leaked_objects); 4093 leaked_objects = 0; 4094 } 4095 } 4096 4097 static void 4098 dump_uberblock(uberblock_t *ub, const char *header, const char *footer) 4099 { 4100 time_t timestamp = ub->ub_timestamp; 4101 4102 (void) printf("%s", header ? header : ""); 4103 (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic); 4104 (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version); 4105 (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg); 4106 (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum); 4107 (void) printf("\ttimestamp = %llu UTC = %s", 4108 (u_longlong_t)ub->ub_timestamp, ctime(×tamp)); 4109 4110 (void) printf("\tmmp_magic = %016llx\n", 4111 (u_longlong_t)ub->ub_mmp_magic); 4112 if (MMP_VALID(ub)) { 4113 (void) printf("\tmmp_delay = %0llu\n", 4114 (u_longlong_t)ub->ub_mmp_delay); 4115 if (MMP_SEQ_VALID(ub)) 4116 (void) printf("\tmmp_seq = %u\n", 4117 (unsigned int) MMP_SEQ(ub)); 4118 if (MMP_FAIL_INT_VALID(ub)) 4119 (void) printf("\tmmp_fail = %u\n", 4120 (unsigned int) MMP_FAIL_INT(ub)); 4121 if (MMP_INTERVAL_VALID(ub)) 4122 (void) printf("\tmmp_write = %u\n", 4123 (unsigned int) MMP_INTERVAL(ub)); 4124 /* After MMP_* to make summarize_uberblock_mmp cleaner */ 4125 (void) printf("\tmmp_valid = %x\n", 4126 (unsigned int) ub->ub_mmp_config & 0xFF); 4127 } 4128 4129 if (dump_opt['u'] >= 4) { 4130 char blkbuf[BP_SPRINTF_LEN]; 4131 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp); 4132 (void) printf("\trootbp = %s\n", blkbuf); 4133 } 4134 (void) printf("\tcheckpoint_txg = %llu\n", 4135 (u_longlong_t)ub->ub_checkpoint_txg); 4136 (void) printf("%s", footer ? footer : ""); 4137 } 4138 4139 static void 4140 dump_config(spa_t *spa) 4141 { 4142 dmu_buf_t *db; 4143 size_t nvsize = 0; 4144 int error = 0; 4145 4146 4147 error = dmu_bonus_hold(spa->spa_meta_objset, 4148 spa->spa_config_object, FTAG, &db); 4149 4150 if (error == 0) { 4151 nvsize = *(uint64_t *)db->db_data; 4152 dmu_buf_rele(db, FTAG); 4153 4154 (void) printf("\nMOS Configuration:\n"); 4155 dump_packed_nvlist(spa->spa_meta_objset, 4156 spa->spa_config_object, (void *)&nvsize, 1); 4157 } else { 4158 (void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d", 4159 (u_longlong_t)spa->spa_config_object, error); 4160 } 4161 } 4162 4163 static void 4164 dump_cachefile(const char *cachefile) 4165 { 4166 int fd; 4167 struct stat64 statbuf; 4168 char *buf; 4169 nvlist_t *config; 4170 4171 if ((fd = open64(cachefile, O_RDONLY)) < 0) { 4172 (void) printf("cannot open '%s': %s\n", cachefile, 4173 strerror(errno)); 4174 exit(1); 4175 } 4176 4177 if (fstat64(fd, &statbuf) != 0) { 4178 (void) printf("failed to stat '%s': %s\n", cachefile, 4179 strerror(errno)); 4180 exit(1); 4181 } 4182 4183 if ((buf = malloc(statbuf.st_size)) == NULL) { 4184 (void) fprintf(stderr, "failed to allocate %llu bytes\n", 4185 (u_longlong_t)statbuf.st_size); 4186 exit(1); 4187 } 4188 4189 if (read(fd, buf, statbuf.st_size) != statbuf.st_size) { 4190 (void) fprintf(stderr, "failed to read %llu bytes\n", 4191 (u_longlong_t)statbuf.st_size); 4192 exit(1); 4193 } 4194 4195 (void) close(fd); 4196 4197 if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) { 4198 (void) fprintf(stderr, "failed to unpack nvlist\n"); 4199 exit(1); 4200 } 4201 4202 free(buf); 4203 4204 dump_nvlist(config, 0); 4205 4206 nvlist_free(config); 4207 } 4208 4209 /* 4210 * ZFS label nvlist stats 4211 */ 4212 typedef struct zdb_nvl_stats { 4213 int zns_list_count; 4214 int zns_leaf_count; 4215 size_t zns_leaf_largest; 4216 size_t zns_leaf_total; 4217 nvlist_t *zns_string; 4218 nvlist_t *zns_uint64; 4219 nvlist_t *zns_boolean; 4220 } zdb_nvl_stats_t; 4221 4222 static void 4223 collect_nvlist_stats(nvlist_t *nvl, zdb_nvl_stats_t *stats) 4224 { 4225 nvlist_t *list, **array; 4226 nvpair_t *nvp = NULL; 4227 const char *name; 4228 uint_t i, items; 4229 4230 stats->zns_list_count++; 4231 4232 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 4233 name = nvpair_name(nvp); 4234 4235 switch (nvpair_type(nvp)) { 4236 case DATA_TYPE_STRING: 4237 fnvlist_add_string(stats->zns_string, name, 4238 fnvpair_value_string(nvp)); 4239 break; 4240 case DATA_TYPE_UINT64: 4241 fnvlist_add_uint64(stats->zns_uint64, name, 4242 fnvpair_value_uint64(nvp)); 4243 break; 4244 case DATA_TYPE_BOOLEAN: 4245 fnvlist_add_boolean(stats->zns_boolean, name); 4246 break; 4247 case DATA_TYPE_NVLIST: 4248 if (nvpair_value_nvlist(nvp, &list) == 0) 4249 collect_nvlist_stats(list, stats); 4250 break; 4251 case DATA_TYPE_NVLIST_ARRAY: 4252 if (nvpair_value_nvlist_array(nvp, &array, &items) != 0) 4253 break; 4254 4255 for (i = 0; i < items; i++) { 4256 collect_nvlist_stats(array[i], stats); 4257 4258 /* collect stats on leaf vdev */ 4259 if (strcmp(name, "children") == 0) { 4260 size_t size; 4261 4262 (void) nvlist_size(array[i], &size, 4263 NV_ENCODE_XDR); 4264 stats->zns_leaf_total += size; 4265 if (size > stats->zns_leaf_largest) 4266 stats->zns_leaf_largest = size; 4267 stats->zns_leaf_count++; 4268 } 4269 } 4270 break; 4271 default: 4272 (void) printf("skip type %d!\n", (int)nvpair_type(nvp)); 4273 } 4274 } 4275 } 4276 4277 static void 4278 dump_nvlist_stats(nvlist_t *nvl, size_t cap) 4279 { 4280 zdb_nvl_stats_t stats = { 0 }; 4281 size_t size, sum = 0, total; 4282 size_t noise; 4283 4284 /* requires nvlist with non-unique names for stat collection */ 4285 VERIFY0(nvlist_alloc(&stats.zns_string, 0, 0)); 4286 VERIFY0(nvlist_alloc(&stats.zns_uint64, 0, 0)); 4287 VERIFY0(nvlist_alloc(&stats.zns_boolean, 0, 0)); 4288 VERIFY0(nvlist_size(stats.zns_boolean, &noise, NV_ENCODE_XDR)); 4289 4290 (void) printf("\n\nZFS Label NVList Config Stats:\n"); 4291 4292 VERIFY0(nvlist_size(nvl, &total, NV_ENCODE_XDR)); 4293 (void) printf(" %d bytes used, %d bytes free (using %4.1f%%)\n\n", 4294 (int)total, (int)(cap - total), 100.0 * total / cap); 4295 4296 collect_nvlist_stats(nvl, &stats); 4297 4298 VERIFY0(nvlist_size(stats.zns_uint64, &size, NV_ENCODE_XDR)); 4299 size -= noise; 4300 sum += size; 4301 (void) printf("%12s %4d %6d bytes (%5.2f%%)\n", "integers:", 4302 (int)fnvlist_num_pairs(stats.zns_uint64), 4303 (int)size, 100.0 * size / total); 4304 4305 VERIFY0(nvlist_size(stats.zns_string, &size, NV_ENCODE_XDR)); 4306 size -= noise; 4307 sum += size; 4308 (void) printf("%12s %4d %6d bytes (%5.2f%%)\n", "strings:", 4309 (int)fnvlist_num_pairs(stats.zns_string), 4310 (int)size, 100.0 * size / total); 4311 4312 VERIFY0(nvlist_size(stats.zns_boolean, &size, NV_ENCODE_XDR)); 4313 size -= noise; 4314 sum += size; 4315 (void) printf("%12s %4d %6d bytes (%5.2f%%)\n", "booleans:", 4316 (int)fnvlist_num_pairs(stats.zns_boolean), 4317 (int)size, 100.0 * size / total); 4318 4319 size = total - sum; /* treat remainder as nvlist overhead */ 4320 (void) printf("%12s %4d %6d bytes (%5.2f%%)\n\n", "nvlists:", 4321 stats.zns_list_count, (int)size, 100.0 * size / total); 4322 4323 if (stats.zns_leaf_count > 0) { 4324 size_t average = stats.zns_leaf_total / stats.zns_leaf_count; 4325 4326 (void) printf("%12s %4d %6d bytes average\n", "leaf vdevs:", 4327 stats.zns_leaf_count, (int)average); 4328 (void) printf("%24d bytes largest\n", 4329 (int)stats.zns_leaf_largest); 4330 4331 if (dump_opt['l'] >= 3 && average > 0) 4332 (void) printf(" space for %d additional leaf vdevs\n", 4333 (int)((cap - total) / average)); 4334 } 4335 (void) printf("\n"); 4336 4337 nvlist_free(stats.zns_string); 4338 nvlist_free(stats.zns_uint64); 4339 nvlist_free(stats.zns_boolean); 4340 } 4341 4342 typedef struct cksum_record { 4343 zio_cksum_t cksum; 4344 boolean_t labels[VDEV_LABELS]; 4345 avl_node_t link; 4346 } cksum_record_t; 4347 4348 static int 4349 cksum_record_compare(const void *x1, const void *x2) 4350 { 4351 const cksum_record_t *l = (cksum_record_t *)x1; 4352 const cksum_record_t *r = (cksum_record_t *)x2; 4353 int arraysize = ARRAY_SIZE(l->cksum.zc_word); 4354 int difference = 0; 4355 4356 for (int i = 0; i < arraysize; i++) { 4357 difference = TREE_CMP(l->cksum.zc_word[i], r->cksum.zc_word[i]); 4358 if (difference) 4359 break; 4360 } 4361 4362 return (difference); 4363 } 4364 4365 static cksum_record_t * 4366 cksum_record_alloc(zio_cksum_t *cksum, int l) 4367 { 4368 cksum_record_t *rec; 4369 4370 rec = umem_zalloc(sizeof (*rec), UMEM_NOFAIL); 4371 rec->cksum = *cksum; 4372 rec->labels[l] = B_TRUE; 4373 4374 return (rec); 4375 } 4376 4377 static cksum_record_t * 4378 cksum_record_lookup(avl_tree_t *tree, zio_cksum_t *cksum) 4379 { 4380 cksum_record_t lookup = { .cksum = *cksum }; 4381 avl_index_t where; 4382 4383 return (avl_find(tree, &lookup, &where)); 4384 } 4385 4386 static cksum_record_t * 4387 cksum_record_insert(avl_tree_t *tree, zio_cksum_t *cksum, int l) 4388 { 4389 cksum_record_t *rec; 4390 4391 rec = cksum_record_lookup(tree, cksum); 4392 if (rec) { 4393 rec->labels[l] = B_TRUE; 4394 } else { 4395 rec = cksum_record_alloc(cksum, l); 4396 avl_add(tree, rec); 4397 } 4398 4399 return (rec); 4400 } 4401 4402 static int 4403 first_label(cksum_record_t *rec) 4404 { 4405 for (int i = 0; i < VDEV_LABELS; i++) 4406 if (rec->labels[i]) 4407 return (i); 4408 4409 return (-1); 4410 } 4411 4412 static void 4413 print_label_numbers(const char *prefix, const cksum_record_t *rec) 4414 { 4415 fputs(prefix, stdout); 4416 for (int i = 0; i < VDEV_LABELS; i++) 4417 if (rec->labels[i] == B_TRUE) 4418 printf("%d ", i); 4419 putchar('\n'); 4420 } 4421 4422 #define MAX_UBERBLOCK_COUNT (VDEV_UBERBLOCK_RING >> UBERBLOCK_SHIFT) 4423 4424 typedef struct zdb_label { 4425 vdev_label_t label; 4426 uint64_t label_offset; 4427 nvlist_t *config_nv; 4428 cksum_record_t *config; 4429 cksum_record_t *uberblocks[MAX_UBERBLOCK_COUNT]; 4430 boolean_t header_printed; 4431 boolean_t read_failed; 4432 boolean_t cksum_valid; 4433 } zdb_label_t; 4434 4435 static void 4436 print_label_header(zdb_label_t *label, int l) 4437 { 4438 4439 if (dump_opt['q']) 4440 return; 4441 4442 if (label->header_printed == B_TRUE) 4443 return; 4444 4445 (void) printf("------------------------------------\n"); 4446 (void) printf("LABEL %d %s\n", l, 4447 label->cksum_valid ? "" : "(Bad label cksum)"); 4448 (void) printf("------------------------------------\n"); 4449 4450 label->header_printed = B_TRUE; 4451 } 4452 4453 static void 4454 print_l2arc_header(void) 4455 { 4456 (void) printf("------------------------------------\n"); 4457 (void) printf("L2ARC device header\n"); 4458 (void) printf("------------------------------------\n"); 4459 } 4460 4461 static void 4462 print_l2arc_log_blocks(void) 4463 { 4464 (void) printf("------------------------------------\n"); 4465 (void) printf("L2ARC device log blocks\n"); 4466 (void) printf("------------------------------------\n"); 4467 } 4468 4469 static void 4470 dump_l2arc_log_entries(uint64_t log_entries, 4471 l2arc_log_ent_phys_t *le, uint64_t i) 4472 { 4473 for (int j = 0; j < log_entries; j++) { 4474 dva_t dva = le[j].le_dva; 4475 (void) printf("lb[%4llu]\tle[%4d]\tDVA asize: %llu, " 4476 "vdev: %llu, offset: %llu\n", 4477 (u_longlong_t)i, j + 1, 4478 (u_longlong_t)DVA_GET_ASIZE(&dva), 4479 (u_longlong_t)DVA_GET_VDEV(&dva), 4480 (u_longlong_t)DVA_GET_OFFSET(&dva)); 4481 (void) printf("|\t\t\t\tbirth: %llu\n", 4482 (u_longlong_t)le[j].le_birth); 4483 (void) printf("|\t\t\t\tlsize: %llu\n", 4484 (u_longlong_t)L2BLK_GET_LSIZE((&le[j])->le_prop)); 4485 (void) printf("|\t\t\t\tpsize: %llu\n", 4486 (u_longlong_t)L2BLK_GET_PSIZE((&le[j])->le_prop)); 4487 (void) printf("|\t\t\t\tcompr: %llu\n", 4488 (u_longlong_t)L2BLK_GET_COMPRESS((&le[j])->le_prop)); 4489 (void) printf("|\t\t\t\tcomplevel: %llu\n", 4490 (u_longlong_t)(&le[j])->le_complevel); 4491 (void) printf("|\t\t\t\ttype: %llu\n", 4492 (u_longlong_t)L2BLK_GET_TYPE((&le[j])->le_prop)); 4493 (void) printf("|\t\t\t\tprotected: %llu\n", 4494 (u_longlong_t)L2BLK_GET_PROTECTED((&le[j])->le_prop)); 4495 (void) printf("|\t\t\t\tprefetch: %llu\n", 4496 (u_longlong_t)L2BLK_GET_PREFETCH((&le[j])->le_prop)); 4497 (void) printf("|\t\t\t\taddress: %llu\n", 4498 (u_longlong_t)le[j].le_daddr); 4499 (void) printf("|\t\t\t\tARC state: %llu\n", 4500 (u_longlong_t)L2BLK_GET_STATE((&le[j])->le_prop)); 4501 (void) printf("|\n"); 4502 } 4503 (void) printf("\n"); 4504 } 4505 4506 static void 4507 dump_l2arc_log_blkptr(const l2arc_log_blkptr_t *lbps) 4508 { 4509 (void) printf("|\t\tdaddr: %llu\n", (u_longlong_t)lbps->lbp_daddr); 4510 (void) printf("|\t\tpayload_asize: %llu\n", 4511 (u_longlong_t)lbps->lbp_payload_asize); 4512 (void) printf("|\t\tpayload_start: %llu\n", 4513 (u_longlong_t)lbps->lbp_payload_start); 4514 (void) printf("|\t\tlsize: %llu\n", 4515 (u_longlong_t)L2BLK_GET_LSIZE(lbps->lbp_prop)); 4516 (void) printf("|\t\tasize: %llu\n", 4517 (u_longlong_t)L2BLK_GET_PSIZE(lbps->lbp_prop)); 4518 (void) printf("|\t\tcompralgo: %llu\n", 4519 (u_longlong_t)L2BLK_GET_COMPRESS(lbps->lbp_prop)); 4520 (void) printf("|\t\tcksumalgo: %llu\n", 4521 (u_longlong_t)L2BLK_GET_CHECKSUM(lbps->lbp_prop)); 4522 (void) printf("|\n\n"); 4523 } 4524 4525 static void 4526 dump_l2arc_log_blocks(int fd, const l2arc_dev_hdr_phys_t *l2dhdr, 4527 l2arc_dev_hdr_phys_t *rebuild) 4528 { 4529 l2arc_log_blk_phys_t this_lb; 4530 uint64_t asize; 4531 l2arc_log_blkptr_t lbps[2]; 4532 abd_t *abd; 4533 zio_cksum_t cksum; 4534 int failed = 0; 4535 l2arc_dev_t dev; 4536 4537 if (!dump_opt['q']) 4538 print_l2arc_log_blocks(); 4539 memcpy(lbps, l2dhdr->dh_start_lbps, sizeof (lbps)); 4540 4541 dev.l2ad_evict = l2dhdr->dh_evict; 4542 dev.l2ad_start = l2dhdr->dh_start; 4543 dev.l2ad_end = l2dhdr->dh_end; 4544 4545 if (l2dhdr->dh_start_lbps[0].lbp_daddr == 0) { 4546 /* no log blocks to read */ 4547 if (!dump_opt['q']) { 4548 (void) printf("No log blocks to read\n"); 4549 (void) printf("\n"); 4550 } 4551 return; 4552 } else { 4553 dev.l2ad_hand = lbps[0].lbp_daddr + 4554 L2BLK_GET_PSIZE((&lbps[0])->lbp_prop); 4555 } 4556 4557 dev.l2ad_first = !!(l2dhdr->dh_flags & L2ARC_DEV_HDR_EVICT_FIRST); 4558 4559 for (;;) { 4560 if (!l2arc_log_blkptr_valid(&dev, &lbps[0])) 4561 break; 4562 4563 /* L2BLK_GET_PSIZE returns aligned size for log blocks */ 4564 asize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop); 4565 if (pread64(fd, &this_lb, asize, lbps[0].lbp_daddr) != asize) { 4566 if (!dump_opt['q']) { 4567 (void) printf("Error while reading next log " 4568 "block\n\n"); 4569 } 4570 break; 4571 } 4572 4573 fletcher_4_native_varsize(&this_lb, asize, &cksum); 4574 if (!ZIO_CHECKSUM_EQUAL(cksum, lbps[0].lbp_cksum)) { 4575 failed++; 4576 if (!dump_opt['q']) { 4577 (void) printf("Invalid cksum\n"); 4578 dump_l2arc_log_blkptr(&lbps[0]); 4579 } 4580 break; 4581 } 4582 4583 switch (L2BLK_GET_COMPRESS((&lbps[0])->lbp_prop)) { 4584 case ZIO_COMPRESS_OFF: 4585 break; 4586 default: 4587 abd = abd_alloc_for_io(asize, B_TRUE); 4588 abd_copy_from_buf_off(abd, &this_lb, 0, asize); 4589 if (zio_decompress_data(L2BLK_GET_COMPRESS( 4590 (&lbps[0])->lbp_prop), abd, &this_lb, 4591 asize, sizeof (this_lb), NULL) != 0) { 4592 (void) printf("L2ARC block decompression " 4593 "failed\n"); 4594 abd_free(abd); 4595 goto out; 4596 } 4597 abd_free(abd); 4598 break; 4599 } 4600 4601 if (this_lb.lb_magic == BSWAP_64(L2ARC_LOG_BLK_MAGIC)) 4602 byteswap_uint64_array(&this_lb, sizeof (this_lb)); 4603 if (this_lb.lb_magic != L2ARC_LOG_BLK_MAGIC) { 4604 if (!dump_opt['q']) 4605 (void) printf("Invalid log block magic\n\n"); 4606 break; 4607 } 4608 4609 rebuild->dh_lb_count++; 4610 rebuild->dh_lb_asize += asize; 4611 if (dump_opt['l'] > 1 && !dump_opt['q']) { 4612 (void) printf("lb[%4llu]\tmagic: %llu\n", 4613 (u_longlong_t)rebuild->dh_lb_count, 4614 (u_longlong_t)this_lb.lb_magic); 4615 dump_l2arc_log_blkptr(&lbps[0]); 4616 } 4617 4618 if (dump_opt['l'] > 2 && !dump_opt['q']) 4619 dump_l2arc_log_entries(l2dhdr->dh_log_entries, 4620 this_lb.lb_entries, 4621 rebuild->dh_lb_count); 4622 4623 if (l2arc_range_check_overlap(lbps[1].lbp_payload_start, 4624 lbps[0].lbp_payload_start, dev.l2ad_evict) && 4625 !dev.l2ad_first) 4626 break; 4627 4628 lbps[0] = lbps[1]; 4629 lbps[1] = this_lb.lb_prev_lbp; 4630 } 4631 out: 4632 if (!dump_opt['q']) { 4633 (void) printf("log_blk_count:\t %llu with valid cksum\n", 4634 (u_longlong_t)rebuild->dh_lb_count); 4635 (void) printf("\t\t %d with invalid cksum\n", failed); 4636 (void) printf("log_blk_asize:\t %llu\n\n", 4637 (u_longlong_t)rebuild->dh_lb_asize); 4638 } 4639 } 4640 4641 static int 4642 dump_l2arc_header(int fd) 4643 { 4644 l2arc_dev_hdr_phys_t l2dhdr = {0}, rebuild = {0}; 4645 int error = B_FALSE; 4646 4647 if (pread64(fd, &l2dhdr, sizeof (l2dhdr), 4648 VDEV_LABEL_START_SIZE) != sizeof (l2dhdr)) { 4649 error = B_TRUE; 4650 } else { 4651 if (l2dhdr.dh_magic == BSWAP_64(L2ARC_DEV_HDR_MAGIC)) 4652 byteswap_uint64_array(&l2dhdr, sizeof (l2dhdr)); 4653 4654 if (l2dhdr.dh_magic != L2ARC_DEV_HDR_MAGIC) 4655 error = B_TRUE; 4656 } 4657 4658 if (error) { 4659 (void) printf("L2ARC device header not found\n\n"); 4660 /* Do not return an error here for backward compatibility */ 4661 return (0); 4662 } else if (!dump_opt['q']) { 4663 print_l2arc_header(); 4664 4665 (void) printf(" magic: %llu\n", 4666 (u_longlong_t)l2dhdr.dh_magic); 4667 (void) printf(" version: %llu\n", 4668 (u_longlong_t)l2dhdr.dh_version); 4669 (void) printf(" pool_guid: %llu\n", 4670 (u_longlong_t)l2dhdr.dh_spa_guid); 4671 (void) printf(" flags: %llu\n", 4672 (u_longlong_t)l2dhdr.dh_flags); 4673 (void) printf(" start_lbps[0]: %llu\n", 4674 (u_longlong_t) 4675 l2dhdr.dh_start_lbps[0].lbp_daddr); 4676 (void) printf(" start_lbps[1]: %llu\n", 4677 (u_longlong_t) 4678 l2dhdr.dh_start_lbps[1].lbp_daddr); 4679 (void) printf(" log_blk_ent: %llu\n", 4680 (u_longlong_t)l2dhdr.dh_log_entries); 4681 (void) printf(" start: %llu\n", 4682 (u_longlong_t)l2dhdr.dh_start); 4683 (void) printf(" end: %llu\n", 4684 (u_longlong_t)l2dhdr.dh_end); 4685 (void) printf(" evict: %llu\n", 4686 (u_longlong_t)l2dhdr.dh_evict); 4687 (void) printf(" lb_asize_refcount: %llu\n", 4688 (u_longlong_t)l2dhdr.dh_lb_asize); 4689 (void) printf(" lb_count_refcount: %llu\n", 4690 (u_longlong_t)l2dhdr.dh_lb_count); 4691 (void) printf(" trim_action_time: %llu\n", 4692 (u_longlong_t)l2dhdr.dh_trim_action_time); 4693 (void) printf(" trim_state: %llu\n\n", 4694 (u_longlong_t)l2dhdr.dh_trim_state); 4695 } 4696 4697 dump_l2arc_log_blocks(fd, &l2dhdr, &rebuild); 4698 /* 4699 * The total aligned size of log blocks and the number of log blocks 4700 * reported in the header of the device may be less than what zdb 4701 * reports by dump_l2arc_log_blocks() which emulates l2arc_rebuild(). 4702 * This happens because dump_l2arc_log_blocks() lacks the memory 4703 * pressure valve that l2arc_rebuild() has. Thus, if we are on a system 4704 * with low memory, l2arc_rebuild will exit prematurely and dh_lb_asize 4705 * and dh_lb_count will be lower to begin with than what exists on the 4706 * device. This is normal and zdb should not exit with an error. The 4707 * opposite case should never happen though, the values reported in the 4708 * header should never be higher than what dump_l2arc_log_blocks() and 4709 * l2arc_rebuild() report. If this happens there is a leak in the 4710 * accounting of log blocks. 4711 */ 4712 if (l2dhdr.dh_lb_asize > rebuild.dh_lb_asize || 4713 l2dhdr.dh_lb_count > rebuild.dh_lb_count) 4714 return (1); 4715 4716 return (0); 4717 } 4718 4719 static void 4720 dump_config_from_label(zdb_label_t *label, size_t buflen, int l) 4721 { 4722 if (dump_opt['q']) 4723 return; 4724 4725 if ((dump_opt['l'] < 3) && (first_label(label->config) != l)) 4726 return; 4727 4728 print_label_header(label, l); 4729 dump_nvlist(label->config_nv, 4); 4730 print_label_numbers(" labels = ", label->config); 4731 4732 if (dump_opt['l'] >= 2) 4733 dump_nvlist_stats(label->config_nv, buflen); 4734 } 4735 4736 #define ZDB_MAX_UB_HEADER_SIZE 32 4737 4738 static void 4739 dump_label_uberblocks(zdb_label_t *label, uint64_t ashift, int label_num) 4740 { 4741 4742 vdev_t vd; 4743 char header[ZDB_MAX_UB_HEADER_SIZE]; 4744 4745 vd.vdev_ashift = ashift; 4746 vd.vdev_top = &vd; 4747 4748 for (int i = 0; i < VDEV_UBERBLOCK_COUNT(&vd); i++) { 4749 uint64_t uoff = VDEV_UBERBLOCK_OFFSET(&vd, i); 4750 uberblock_t *ub = (void *)((char *)&label->label + uoff); 4751 cksum_record_t *rec = label->uberblocks[i]; 4752 4753 if (rec == NULL) { 4754 if (dump_opt['u'] >= 2) { 4755 print_label_header(label, label_num); 4756 (void) printf(" Uberblock[%d] invalid\n", i); 4757 } 4758 continue; 4759 } 4760 4761 if ((dump_opt['u'] < 3) && (first_label(rec) != label_num)) 4762 continue; 4763 4764 if ((dump_opt['u'] < 4) && 4765 (ub->ub_mmp_magic == MMP_MAGIC) && ub->ub_mmp_delay && 4766 (i >= VDEV_UBERBLOCK_COUNT(&vd) - MMP_BLOCKS_PER_LABEL)) 4767 continue; 4768 4769 print_label_header(label, label_num); 4770 (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE, 4771 " Uberblock[%d]\n", i); 4772 dump_uberblock(ub, header, ""); 4773 print_label_numbers(" labels = ", rec); 4774 } 4775 } 4776 4777 static char curpath[PATH_MAX]; 4778 4779 /* 4780 * Iterate through the path components, recursively passing 4781 * current one's obj and remaining path until we find the obj 4782 * for the last one. 4783 */ 4784 static int 4785 dump_path_impl(objset_t *os, uint64_t obj, char *name, uint64_t *retobj) 4786 { 4787 int err; 4788 boolean_t header = B_TRUE; 4789 uint64_t child_obj; 4790 char *s; 4791 dmu_buf_t *db; 4792 dmu_object_info_t doi; 4793 4794 if ((s = strchr(name, '/')) != NULL) 4795 *s = '\0'; 4796 err = zap_lookup(os, obj, name, 8, 1, &child_obj); 4797 4798 (void) strlcat(curpath, name, sizeof (curpath)); 4799 4800 if (err != 0) { 4801 (void) fprintf(stderr, "failed to lookup %s: %s\n", 4802 curpath, strerror(err)); 4803 return (err); 4804 } 4805 4806 child_obj = ZFS_DIRENT_OBJ(child_obj); 4807 err = sa_buf_hold(os, child_obj, FTAG, &db); 4808 if (err != 0) { 4809 (void) fprintf(stderr, 4810 "failed to get SA dbuf for obj %llu: %s\n", 4811 (u_longlong_t)child_obj, strerror(err)); 4812 return (EINVAL); 4813 } 4814 dmu_object_info_from_db(db, &doi); 4815 sa_buf_rele(db, FTAG); 4816 4817 if (doi.doi_bonus_type != DMU_OT_SA && 4818 doi.doi_bonus_type != DMU_OT_ZNODE) { 4819 (void) fprintf(stderr, "invalid bonus type %d for obj %llu\n", 4820 doi.doi_bonus_type, (u_longlong_t)child_obj); 4821 return (EINVAL); 4822 } 4823 4824 if (dump_opt['v'] > 6) { 4825 (void) printf("obj=%llu %s type=%d bonustype=%d\n", 4826 (u_longlong_t)child_obj, curpath, doi.doi_type, 4827 doi.doi_bonus_type); 4828 } 4829 4830 (void) strlcat(curpath, "/", sizeof (curpath)); 4831 4832 switch (doi.doi_type) { 4833 case DMU_OT_DIRECTORY_CONTENTS: 4834 if (s != NULL && *(s + 1) != '\0') 4835 return (dump_path_impl(os, child_obj, s + 1, retobj)); 4836 zfs_fallthrough; 4837 case DMU_OT_PLAIN_FILE_CONTENTS: 4838 if (retobj != NULL) { 4839 *retobj = child_obj; 4840 } else { 4841 dump_object(os, child_obj, dump_opt['v'], &header, 4842 NULL, 0); 4843 } 4844 return (0); 4845 default: 4846 (void) fprintf(stderr, "object %llu has non-file/directory " 4847 "type %d\n", (u_longlong_t)obj, doi.doi_type); 4848 break; 4849 } 4850 4851 return (EINVAL); 4852 } 4853 4854 /* 4855 * Dump the blocks for the object specified by path inside the dataset. 4856 */ 4857 static int 4858 dump_path(char *ds, char *path, uint64_t *retobj) 4859 { 4860 int err; 4861 objset_t *os; 4862 uint64_t root_obj; 4863 4864 err = open_objset(ds, FTAG, &os); 4865 if (err != 0) 4866 return (err); 4867 4868 err = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &root_obj); 4869 if (err != 0) { 4870 (void) fprintf(stderr, "can't lookup root znode: %s\n", 4871 strerror(err)); 4872 close_objset(os, FTAG); 4873 return (EINVAL); 4874 } 4875 4876 (void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds); 4877 4878 err = dump_path_impl(os, root_obj, path, retobj); 4879 4880 close_objset(os, FTAG); 4881 return (err); 4882 } 4883 4884 static int 4885 dump_backup_bytes(objset_t *os, void *buf, int len, void *arg) 4886 { 4887 const char *p = (const char *)buf; 4888 ssize_t nwritten; 4889 4890 (void) os; 4891 (void) arg; 4892 4893 /* Write the data out, handling short writes and signals. */ 4894 while ((nwritten = write(STDOUT_FILENO, p, len)) < len) { 4895 if (nwritten < 0) { 4896 if (errno == EINTR) 4897 continue; 4898 return (errno); 4899 } 4900 p += nwritten; 4901 len -= nwritten; 4902 } 4903 4904 return (0); 4905 } 4906 4907 static void 4908 dump_backup(const char *pool, uint64_t objset_id, const char *flagstr) 4909 { 4910 boolean_t embed = B_FALSE; 4911 boolean_t large_block = B_FALSE; 4912 boolean_t compress = B_FALSE; 4913 boolean_t raw = B_FALSE; 4914 4915 const char *c; 4916 for (c = flagstr; c != NULL && *c != '\0'; c++) { 4917 switch (*c) { 4918 case 'e': 4919 embed = B_TRUE; 4920 break; 4921 case 'L': 4922 large_block = B_TRUE; 4923 break; 4924 case 'c': 4925 compress = B_TRUE; 4926 break; 4927 case 'w': 4928 raw = B_TRUE; 4929 break; 4930 default: 4931 fprintf(stderr, "dump_backup: invalid flag " 4932 "'%c'\n", *c); 4933 return; 4934 } 4935 } 4936 4937 if (isatty(STDOUT_FILENO)) { 4938 fprintf(stderr, "dump_backup: stream cannot be written " 4939 "to a terminal\n"); 4940 return; 4941 } 4942 4943 offset_t off = 0; 4944 dmu_send_outparams_t out = { 4945 .dso_outfunc = dump_backup_bytes, 4946 .dso_dryrun = B_FALSE, 4947 }; 4948 4949 int err = dmu_send_obj(pool, objset_id, /* fromsnap */0, embed, 4950 large_block, compress, raw, /* saved */ B_FALSE, STDOUT_FILENO, 4951 &off, &out); 4952 if (err != 0) { 4953 fprintf(stderr, "dump_backup: dmu_send_obj: %s\n", 4954 strerror(err)); 4955 return; 4956 } 4957 } 4958 4959 static int 4960 zdb_copy_object(objset_t *os, uint64_t srcobj, char *destfile) 4961 { 4962 int err = 0; 4963 uint64_t size, readsize, oursize, offset; 4964 ssize_t writesize; 4965 sa_handle_t *hdl; 4966 4967 (void) printf("Copying object %" PRIu64 " to file %s\n", srcobj, 4968 destfile); 4969 4970 VERIFY3P(os, ==, sa_os); 4971 if ((err = sa_handle_get(os, srcobj, NULL, SA_HDL_PRIVATE, &hdl))) { 4972 (void) printf("Failed to get handle for SA znode\n"); 4973 return (err); 4974 } 4975 if ((err = sa_lookup(hdl, sa_attr_table[ZPL_SIZE], &size, 8))) { 4976 (void) sa_handle_destroy(hdl); 4977 return (err); 4978 } 4979 (void) sa_handle_destroy(hdl); 4980 4981 (void) printf("Object %" PRIu64 " is %" PRIu64 " bytes\n", srcobj, 4982 size); 4983 if (size == 0) { 4984 return (EINVAL); 4985 } 4986 4987 int fd = open(destfile, O_WRONLY | O_CREAT | O_TRUNC, 0644); 4988 if (fd == -1) 4989 return (errno); 4990 /* 4991 * We cap the size at 1 mebibyte here to prevent 4992 * allocation failures and nigh-infinite printing if the 4993 * object is extremely large. 4994 */ 4995 oursize = MIN(size, 1 << 20); 4996 offset = 0; 4997 char *buf = kmem_alloc(oursize, KM_NOSLEEP); 4998 if (buf == NULL) { 4999 (void) close(fd); 5000 return (ENOMEM); 5001 } 5002 5003 while (offset < size) { 5004 readsize = MIN(size - offset, 1 << 20); 5005 err = dmu_read(os, srcobj, offset, readsize, buf, 0); 5006 if (err != 0) { 5007 (void) printf("got error %u from dmu_read\n", err); 5008 kmem_free(buf, oursize); 5009 (void) close(fd); 5010 return (err); 5011 } 5012 if (dump_opt['v'] > 3) { 5013 (void) printf("Read offset=%" PRIu64 " size=%" PRIu64 5014 " error=%d\n", offset, readsize, err); 5015 } 5016 5017 writesize = write(fd, buf, readsize); 5018 if (writesize < 0) { 5019 err = errno; 5020 break; 5021 } else if (writesize != readsize) { 5022 /* Incomplete write */ 5023 (void) fprintf(stderr, "Short write, only wrote %llu of" 5024 " %" PRIu64 " bytes, exiting...\n", 5025 (u_longlong_t)writesize, readsize); 5026 break; 5027 } 5028 5029 offset += readsize; 5030 } 5031 5032 (void) close(fd); 5033 5034 if (buf != NULL) 5035 kmem_free(buf, oursize); 5036 5037 return (err); 5038 } 5039 5040 static boolean_t 5041 label_cksum_valid(vdev_label_t *label, uint64_t offset) 5042 { 5043 zio_checksum_info_t *ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; 5044 zio_cksum_t expected_cksum; 5045 zio_cksum_t actual_cksum; 5046 zio_cksum_t verifier; 5047 zio_eck_t *eck; 5048 int byteswap; 5049 5050 void *data = (char *)label + offsetof(vdev_label_t, vl_vdev_phys); 5051 eck = (zio_eck_t *)((char *)(data) + VDEV_PHYS_SIZE) - 1; 5052 5053 offset += offsetof(vdev_label_t, vl_vdev_phys); 5054 ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0); 5055 5056 byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC)); 5057 if (byteswap) 5058 byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); 5059 5060 expected_cksum = eck->zec_cksum; 5061 eck->zec_cksum = verifier; 5062 5063 abd_t *abd = abd_get_from_buf(data, VDEV_PHYS_SIZE); 5064 ci->ci_func[byteswap](abd, VDEV_PHYS_SIZE, NULL, &actual_cksum); 5065 abd_free(abd); 5066 5067 if (byteswap) 5068 byteswap_uint64_array(&expected_cksum, sizeof (zio_cksum_t)); 5069 5070 if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) 5071 return (B_TRUE); 5072 5073 return (B_FALSE); 5074 } 5075 5076 static int 5077 dump_label(const char *dev) 5078 { 5079 char path[MAXPATHLEN]; 5080 zdb_label_t labels[VDEV_LABELS] = {{{{0}}}}; 5081 uint64_t psize, ashift, l2cache; 5082 struct stat64 statbuf; 5083 boolean_t config_found = B_FALSE; 5084 boolean_t error = B_FALSE; 5085 boolean_t read_l2arc_header = B_FALSE; 5086 avl_tree_t config_tree; 5087 avl_tree_t uberblock_tree; 5088 void *node, *cookie; 5089 int fd; 5090 5091 /* 5092 * Check if we were given absolute path and use it as is. 5093 * Otherwise if the provided vdev name doesn't point to a file, 5094 * try prepending expected disk paths and partition numbers. 5095 */ 5096 (void) strlcpy(path, dev, sizeof (path)); 5097 if (dev[0] != '/' && stat64(path, &statbuf) != 0) { 5098 int error; 5099 5100 error = zfs_resolve_shortname(dev, path, MAXPATHLEN); 5101 if (error == 0 && zfs_dev_is_whole_disk(path)) { 5102 if (zfs_append_partition(path, MAXPATHLEN) == -1) 5103 error = ENOENT; 5104 } 5105 5106 if (error || (stat64(path, &statbuf) != 0)) { 5107 (void) printf("failed to find device %s, try " 5108 "specifying absolute path instead\n", dev); 5109 return (1); 5110 } 5111 } 5112 5113 if ((fd = open64(path, O_RDONLY)) < 0) { 5114 (void) printf("cannot open '%s': %s\n", path, strerror(errno)); 5115 exit(1); 5116 } 5117 5118 if (fstat64_blk(fd, &statbuf) != 0) { 5119 (void) printf("failed to stat '%s': %s\n", path, 5120 strerror(errno)); 5121 (void) close(fd); 5122 exit(1); 5123 } 5124 5125 if (S_ISBLK(statbuf.st_mode) && zfs_dev_flush(fd) != 0) 5126 (void) printf("failed to invalidate cache '%s' : %s\n", path, 5127 strerror(errno)); 5128 5129 avl_create(&config_tree, cksum_record_compare, 5130 sizeof (cksum_record_t), offsetof(cksum_record_t, link)); 5131 avl_create(&uberblock_tree, cksum_record_compare, 5132 sizeof (cksum_record_t), offsetof(cksum_record_t, link)); 5133 5134 psize = statbuf.st_size; 5135 psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t)); 5136 ashift = SPA_MINBLOCKSHIFT; 5137 5138 /* 5139 * 1. Read the label from disk 5140 * 2. Verify label cksum 5141 * 3. Unpack the configuration and insert in config tree. 5142 * 4. Traverse all uberblocks and insert in uberblock tree. 5143 */ 5144 for (int l = 0; l < VDEV_LABELS; l++) { 5145 zdb_label_t *label = &labels[l]; 5146 char *buf = label->label.vl_vdev_phys.vp_nvlist; 5147 size_t buflen = sizeof (label->label.vl_vdev_phys.vp_nvlist); 5148 nvlist_t *config; 5149 cksum_record_t *rec; 5150 zio_cksum_t cksum; 5151 vdev_t vd; 5152 5153 label->label_offset = vdev_label_offset(psize, l, 0); 5154 5155 if (pread64(fd, &label->label, sizeof (label->label), 5156 label->label_offset) != sizeof (label->label)) { 5157 if (!dump_opt['q']) 5158 (void) printf("failed to read label %d\n", l); 5159 label->read_failed = B_TRUE; 5160 error = B_TRUE; 5161 continue; 5162 } 5163 5164 label->read_failed = B_FALSE; 5165 label->cksum_valid = label_cksum_valid(&label->label, 5166 label->label_offset); 5167 5168 if (nvlist_unpack(buf, buflen, &config, 0) == 0) { 5169 nvlist_t *vdev_tree = NULL; 5170 size_t size; 5171 5172 if ((nvlist_lookup_nvlist(config, 5173 ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) || 5174 (nvlist_lookup_uint64(vdev_tree, 5175 ZPOOL_CONFIG_ASHIFT, &ashift) != 0)) 5176 ashift = SPA_MINBLOCKSHIFT; 5177 5178 if (nvlist_size(config, &size, NV_ENCODE_XDR) != 0) 5179 size = buflen; 5180 5181 /* If the device is a cache device clear the header. */ 5182 if (!read_l2arc_header) { 5183 if (nvlist_lookup_uint64(config, 5184 ZPOOL_CONFIG_POOL_STATE, &l2cache) == 0 && 5185 l2cache == POOL_STATE_L2CACHE) { 5186 read_l2arc_header = B_TRUE; 5187 } 5188 } 5189 5190 fletcher_4_native_varsize(buf, size, &cksum); 5191 rec = cksum_record_insert(&config_tree, &cksum, l); 5192 5193 label->config = rec; 5194 label->config_nv = config; 5195 config_found = B_TRUE; 5196 } else { 5197 error = B_TRUE; 5198 } 5199 5200 vd.vdev_ashift = ashift; 5201 vd.vdev_top = &vd; 5202 5203 for (int i = 0; i < VDEV_UBERBLOCK_COUNT(&vd); i++) { 5204 uint64_t uoff = VDEV_UBERBLOCK_OFFSET(&vd, i); 5205 uberblock_t *ub = (void *)((char *)label + uoff); 5206 5207 if (uberblock_verify(ub)) 5208 continue; 5209 5210 fletcher_4_native_varsize(ub, sizeof (*ub), &cksum); 5211 rec = cksum_record_insert(&uberblock_tree, &cksum, l); 5212 5213 label->uberblocks[i] = rec; 5214 } 5215 } 5216 5217 /* 5218 * Dump the label and uberblocks. 5219 */ 5220 for (int l = 0; l < VDEV_LABELS; l++) { 5221 zdb_label_t *label = &labels[l]; 5222 size_t buflen = sizeof (label->label.vl_vdev_phys.vp_nvlist); 5223 5224 if (label->read_failed == B_TRUE) 5225 continue; 5226 5227 if (label->config_nv) { 5228 dump_config_from_label(label, buflen, l); 5229 } else { 5230 if (!dump_opt['q']) 5231 (void) printf("failed to unpack label %d\n", l); 5232 } 5233 5234 if (dump_opt['u']) 5235 dump_label_uberblocks(label, ashift, l); 5236 5237 nvlist_free(label->config_nv); 5238 } 5239 5240 /* 5241 * Dump the L2ARC header, if existent. 5242 */ 5243 if (read_l2arc_header) 5244 error |= dump_l2arc_header(fd); 5245 5246 cookie = NULL; 5247 while ((node = avl_destroy_nodes(&config_tree, &cookie)) != NULL) 5248 umem_free(node, sizeof (cksum_record_t)); 5249 5250 cookie = NULL; 5251 while ((node = avl_destroy_nodes(&uberblock_tree, &cookie)) != NULL) 5252 umem_free(node, sizeof (cksum_record_t)); 5253 5254 avl_destroy(&config_tree); 5255 avl_destroy(&uberblock_tree); 5256 5257 (void) close(fd); 5258 5259 return (config_found == B_FALSE ? 2 : 5260 (error == B_TRUE ? 1 : 0)); 5261 } 5262 5263 static uint64_t dataset_feature_count[SPA_FEATURES]; 5264 static uint64_t global_feature_count[SPA_FEATURES]; 5265 static uint64_t remap_deadlist_count = 0; 5266 5267 static int 5268 dump_one_objset(const char *dsname, void *arg) 5269 { 5270 (void) arg; 5271 int error; 5272 objset_t *os; 5273 spa_feature_t f; 5274 5275 error = open_objset(dsname, FTAG, &os); 5276 if (error != 0) 5277 return (0); 5278 5279 for (f = 0; f < SPA_FEATURES; f++) { 5280 if (!dsl_dataset_feature_is_active(dmu_objset_ds(os), f)) 5281 continue; 5282 ASSERT(spa_feature_table[f].fi_flags & 5283 ZFEATURE_FLAG_PER_DATASET); 5284 dataset_feature_count[f]++; 5285 } 5286 5287 if (dsl_dataset_remap_deadlist_exists(dmu_objset_ds(os))) { 5288 remap_deadlist_count++; 5289 } 5290 5291 for (dsl_bookmark_node_t *dbn = 5292 avl_first(&dmu_objset_ds(os)->ds_bookmarks); dbn != NULL; 5293 dbn = AVL_NEXT(&dmu_objset_ds(os)->ds_bookmarks, dbn)) { 5294 mos_obj_refd(dbn->dbn_phys.zbm_redaction_obj); 5295 if (dbn->dbn_phys.zbm_redaction_obj != 0) 5296 global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS]++; 5297 if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) 5298 global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN]++; 5299 } 5300 5301 if (dsl_deadlist_is_open(&dmu_objset_ds(os)->ds_dir->dd_livelist) && 5302 !dmu_objset_is_snapshot(os)) { 5303 global_feature_count[SPA_FEATURE_LIVELIST]++; 5304 } 5305 5306 dump_objset(os); 5307 close_objset(os, FTAG); 5308 fuid_table_destroy(); 5309 return (0); 5310 } 5311 5312 /* 5313 * Block statistics. 5314 */ 5315 #define PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2) 5316 typedef struct zdb_blkstats { 5317 uint64_t zb_asize; 5318 uint64_t zb_lsize; 5319 uint64_t zb_psize; 5320 uint64_t zb_count; 5321 uint64_t zb_gangs; 5322 uint64_t zb_ditto_samevdev; 5323 uint64_t zb_ditto_same_ms; 5324 uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE]; 5325 } zdb_blkstats_t; 5326 5327 /* 5328 * Extended object types to report deferred frees and dedup auto-ditto blocks. 5329 */ 5330 #define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0) 5331 #define ZDB_OT_DITTO (DMU_OT_NUMTYPES + 1) 5332 #define ZDB_OT_OTHER (DMU_OT_NUMTYPES + 2) 5333 #define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 3) 5334 5335 static const char *zdb_ot_extname[] = { 5336 "deferred free", 5337 "dedup ditto", 5338 "other", 5339 "Total", 5340 }; 5341 5342 #define ZB_TOTAL DN_MAX_LEVELS 5343 #define SPA_MAX_FOR_16M (SPA_MAXBLOCKSHIFT+1) 5344 5345 typedef struct zdb_cb { 5346 zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1]; 5347 uint64_t zcb_removing_size; 5348 uint64_t zcb_checkpoint_size; 5349 uint64_t zcb_dedup_asize; 5350 uint64_t zcb_dedup_blocks; 5351 uint64_t zcb_psize_count[SPA_MAX_FOR_16M]; 5352 uint64_t zcb_lsize_count[SPA_MAX_FOR_16M]; 5353 uint64_t zcb_asize_count[SPA_MAX_FOR_16M]; 5354 uint64_t zcb_psize_len[SPA_MAX_FOR_16M]; 5355 uint64_t zcb_lsize_len[SPA_MAX_FOR_16M]; 5356 uint64_t zcb_asize_len[SPA_MAX_FOR_16M]; 5357 uint64_t zcb_psize_total; 5358 uint64_t zcb_lsize_total; 5359 uint64_t zcb_asize_total; 5360 uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES]; 5361 uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES] 5362 [BPE_PAYLOAD_SIZE + 1]; 5363 uint64_t zcb_start; 5364 hrtime_t zcb_lastprint; 5365 uint64_t zcb_totalasize; 5366 uint64_t zcb_errors[256]; 5367 int zcb_readfails; 5368 int zcb_haderrors; 5369 spa_t *zcb_spa; 5370 uint32_t **zcb_vd_obsolete_counts; 5371 } zdb_cb_t; 5372 5373 /* test if two DVA offsets from same vdev are within the same metaslab */ 5374 static boolean_t 5375 same_metaslab(spa_t *spa, uint64_t vdev, uint64_t off1, uint64_t off2) 5376 { 5377 vdev_t *vd = vdev_lookup_top(spa, vdev); 5378 uint64_t ms_shift = vd->vdev_ms_shift; 5379 5380 return ((off1 >> ms_shift) == (off2 >> ms_shift)); 5381 } 5382 5383 /* 5384 * Used to simplify reporting of the histogram data. 5385 */ 5386 typedef struct one_histo { 5387 const char *name; 5388 uint64_t *count; 5389 uint64_t *len; 5390 uint64_t cumulative; 5391 } one_histo_t; 5392 5393 /* 5394 * The number of separate histograms processed for psize, lsize and asize. 5395 */ 5396 #define NUM_HISTO 3 5397 5398 /* 5399 * This routine will create a fixed column size output of three different 5400 * histograms showing by blocksize of 512 - 2^ SPA_MAX_FOR_16M 5401 * the count, length and cumulative length of the psize, lsize and 5402 * asize blocks. 5403 * 5404 * All three types of blocks are listed on a single line 5405 * 5406 * By default the table is printed in nicenumber format (e.g. 123K) but 5407 * if the '-P' parameter is specified then the full raw number (parseable) 5408 * is printed out. 5409 */ 5410 static void 5411 dump_size_histograms(zdb_cb_t *zcb) 5412 { 5413 /* 5414 * A temporary buffer that allows us to convert a number into 5415 * a string using zdb_nicenumber to allow either raw or human 5416 * readable numbers to be output. 5417 */ 5418 char numbuf[32]; 5419 5420 /* 5421 * Define titles which are used in the headers of the tables 5422 * printed by this routine. 5423 */ 5424 const char blocksize_title1[] = "block"; 5425 const char blocksize_title2[] = "size"; 5426 const char count_title[] = "Count"; 5427 const char length_title[] = "Size"; 5428 const char cumulative_title[] = "Cum."; 5429 5430 /* 5431 * Setup the histogram arrays (psize, lsize, and asize). 5432 */ 5433 one_histo_t parm_histo[NUM_HISTO]; 5434 5435 parm_histo[0].name = "psize"; 5436 parm_histo[0].count = zcb->zcb_psize_count; 5437 parm_histo[0].len = zcb->zcb_psize_len; 5438 parm_histo[0].cumulative = 0; 5439 5440 parm_histo[1].name = "lsize"; 5441 parm_histo[1].count = zcb->zcb_lsize_count; 5442 parm_histo[1].len = zcb->zcb_lsize_len; 5443 parm_histo[1].cumulative = 0; 5444 5445 parm_histo[2].name = "asize"; 5446 parm_histo[2].count = zcb->zcb_asize_count; 5447 parm_histo[2].len = zcb->zcb_asize_len; 5448 parm_histo[2].cumulative = 0; 5449 5450 5451 (void) printf("\nBlock Size Histogram\n"); 5452 /* 5453 * Print the first line titles 5454 */ 5455 if (dump_opt['P']) 5456 (void) printf("\n%s\t", blocksize_title1); 5457 else 5458 (void) printf("\n%7s ", blocksize_title1); 5459 5460 for (int j = 0; j < NUM_HISTO; j++) { 5461 if (dump_opt['P']) { 5462 if (j < NUM_HISTO - 1) { 5463 (void) printf("%s\t\t\t", parm_histo[j].name); 5464 } else { 5465 /* Don't print trailing spaces */ 5466 (void) printf(" %s", parm_histo[j].name); 5467 } 5468 } else { 5469 if (j < NUM_HISTO - 1) { 5470 /* Left aligned strings in the output */ 5471 (void) printf("%-7s ", 5472 parm_histo[j].name); 5473 } else { 5474 /* Don't print trailing spaces */ 5475 (void) printf("%s", parm_histo[j].name); 5476 } 5477 } 5478 } 5479 (void) printf("\n"); 5480 5481 /* 5482 * Print the second line titles 5483 */ 5484 if (dump_opt['P']) { 5485 (void) printf("%s\t", blocksize_title2); 5486 } else { 5487 (void) printf("%7s ", blocksize_title2); 5488 } 5489 5490 for (int i = 0; i < NUM_HISTO; i++) { 5491 if (dump_opt['P']) { 5492 (void) printf("%s\t%s\t%s\t", 5493 count_title, length_title, cumulative_title); 5494 } else { 5495 (void) printf("%7s%7s%7s", 5496 count_title, length_title, cumulative_title); 5497 } 5498 } 5499 (void) printf("\n"); 5500 5501 /* 5502 * Print the rows 5503 */ 5504 for (int i = SPA_MINBLOCKSHIFT; i < SPA_MAX_FOR_16M; i++) { 5505 5506 /* 5507 * Print the first column showing the blocksize 5508 */ 5509 zdb_nicenum((1ULL << i), numbuf, sizeof (numbuf)); 5510 5511 if (dump_opt['P']) { 5512 printf("%s", numbuf); 5513 } else { 5514 printf("%7s:", numbuf); 5515 } 5516 5517 /* 5518 * Print the remaining set of 3 columns per size: 5519 * for psize, lsize and asize 5520 */ 5521 for (int j = 0; j < NUM_HISTO; j++) { 5522 parm_histo[j].cumulative += parm_histo[j].len[i]; 5523 5524 zdb_nicenum(parm_histo[j].count[i], 5525 numbuf, sizeof (numbuf)); 5526 if (dump_opt['P']) 5527 (void) printf("\t%s", numbuf); 5528 else 5529 (void) printf("%7s", numbuf); 5530 5531 zdb_nicenum(parm_histo[j].len[i], 5532 numbuf, sizeof (numbuf)); 5533 if (dump_opt['P']) 5534 (void) printf("\t%s", numbuf); 5535 else 5536 (void) printf("%7s", numbuf); 5537 5538 zdb_nicenum(parm_histo[j].cumulative, 5539 numbuf, sizeof (numbuf)); 5540 if (dump_opt['P']) 5541 (void) printf("\t%s", numbuf); 5542 else 5543 (void) printf("%7s", numbuf); 5544 } 5545 (void) printf("\n"); 5546 } 5547 } 5548 5549 static void 5550 zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, 5551 dmu_object_type_t type) 5552 { 5553 uint64_t refcnt = 0; 5554 int i; 5555 5556 ASSERT(type < ZDB_OT_TOTAL); 5557 5558 if (zilog && zil_bp_tree_add(zilog, bp) != 0) 5559 return; 5560 5561 spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER); 5562 5563 for (i = 0; i < 4; i++) { 5564 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; 5565 int t = (i & 1) ? type : ZDB_OT_TOTAL; 5566 int equal; 5567 zdb_blkstats_t *zb = &zcb->zcb_type[l][t]; 5568 5569 zb->zb_asize += BP_GET_ASIZE(bp); 5570 zb->zb_lsize += BP_GET_LSIZE(bp); 5571 zb->zb_psize += BP_GET_PSIZE(bp); 5572 zb->zb_count++; 5573 5574 /* 5575 * The histogram is only big enough to record blocks up to 5576 * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last, 5577 * "other", bucket. 5578 */ 5579 unsigned idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT; 5580 idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1); 5581 zb->zb_psize_histogram[idx]++; 5582 5583 zb->zb_gangs += BP_COUNT_GANG(bp); 5584 5585 switch (BP_GET_NDVAS(bp)) { 5586 case 2: 5587 if (DVA_GET_VDEV(&bp->blk_dva[0]) == 5588 DVA_GET_VDEV(&bp->blk_dva[1])) { 5589 zb->zb_ditto_samevdev++; 5590 5591 if (same_metaslab(zcb->zcb_spa, 5592 DVA_GET_VDEV(&bp->blk_dva[0]), 5593 DVA_GET_OFFSET(&bp->blk_dva[0]), 5594 DVA_GET_OFFSET(&bp->blk_dva[1]))) 5595 zb->zb_ditto_same_ms++; 5596 } 5597 break; 5598 case 3: 5599 equal = (DVA_GET_VDEV(&bp->blk_dva[0]) == 5600 DVA_GET_VDEV(&bp->blk_dva[1])) + 5601 (DVA_GET_VDEV(&bp->blk_dva[0]) == 5602 DVA_GET_VDEV(&bp->blk_dva[2])) + 5603 (DVA_GET_VDEV(&bp->blk_dva[1]) == 5604 DVA_GET_VDEV(&bp->blk_dva[2])); 5605 if (equal != 0) { 5606 zb->zb_ditto_samevdev++; 5607 5608 if (DVA_GET_VDEV(&bp->blk_dva[0]) == 5609 DVA_GET_VDEV(&bp->blk_dva[1]) && 5610 same_metaslab(zcb->zcb_spa, 5611 DVA_GET_VDEV(&bp->blk_dva[0]), 5612 DVA_GET_OFFSET(&bp->blk_dva[0]), 5613 DVA_GET_OFFSET(&bp->blk_dva[1]))) 5614 zb->zb_ditto_same_ms++; 5615 else if (DVA_GET_VDEV(&bp->blk_dva[0]) == 5616 DVA_GET_VDEV(&bp->blk_dva[2]) && 5617 same_metaslab(zcb->zcb_spa, 5618 DVA_GET_VDEV(&bp->blk_dva[0]), 5619 DVA_GET_OFFSET(&bp->blk_dva[0]), 5620 DVA_GET_OFFSET(&bp->blk_dva[2]))) 5621 zb->zb_ditto_same_ms++; 5622 else if (DVA_GET_VDEV(&bp->blk_dva[1]) == 5623 DVA_GET_VDEV(&bp->blk_dva[2]) && 5624 same_metaslab(zcb->zcb_spa, 5625 DVA_GET_VDEV(&bp->blk_dva[1]), 5626 DVA_GET_OFFSET(&bp->blk_dva[1]), 5627 DVA_GET_OFFSET(&bp->blk_dva[2]))) 5628 zb->zb_ditto_same_ms++; 5629 } 5630 break; 5631 } 5632 } 5633 5634 spa_config_exit(zcb->zcb_spa, SCL_CONFIG, FTAG); 5635 5636 if (BP_IS_EMBEDDED(bp)) { 5637 zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++; 5638 zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)] 5639 [BPE_GET_PSIZE(bp)]++; 5640 return; 5641 } 5642 /* 5643 * The binning histogram bins by powers of two up to 5644 * SPA_MAXBLOCKSIZE rather than creating bins for 5645 * every possible blocksize found in the pool. 5646 */ 5647 int bin = highbit64(BP_GET_PSIZE(bp)) - 1; 5648 5649 zcb->zcb_psize_count[bin]++; 5650 zcb->zcb_psize_len[bin] += BP_GET_PSIZE(bp); 5651 zcb->zcb_psize_total += BP_GET_PSIZE(bp); 5652 5653 bin = highbit64(BP_GET_LSIZE(bp)) - 1; 5654 5655 zcb->zcb_lsize_count[bin]++; 5656 zcb->zcb_lsize_len[bin] += BP_GET_LSIZE(bp); 5657 zcb->zcb_lsize_total += BP_GET_LSIZE(bp); 5658 5659 bin = highbit64(BP_GET_ASIZE(bp)) - 1; 5660 5661 zcb->zcb_asize_count[bin]++; 5662 zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp); 5663 zcb->zcb_asize_total += BP_GET_ASIZE(bp); 5664 5665 if (dump_opt['L']) 5666 return; 5667 5668 if (BP_GET_DEDUP(bp)) { 5669 ddt_t *ddt; 5670 ddt_entry_t *dde; 5671 5672 ddt = ddt_select(zcb->zcb_spa, bp); 5673 ddt_enter(ddt); 5674 dde = ddt_lookup(ddt, bp, B_FALSE); 5675 5676 if (dde == NULL) { 5677 refcnt = 0; 5678 } else { 5679 ddt_phys_t *ddp = ddt_phys_select(dde, bp); 5680 ddt_phys_decref(ddp); 5681 refcnt = ddp->ddp_refcnt; 5682 if (ddt_phys_total_refcnt(dde) == 0) 5683 ddt_remove(ddt, dde); 5684 } 5685 ddt_exit(ddt); 5686 } 5687 5688 VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa, 5689 refcnt ? 0 : spa_min_claim_txg(zcb->zcb_spa), 5690 bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0); 5691 } 5692 5693 static void 5694 zdb_blkptr_done(zio_t *zio) 5695 { 5696 spa_t *spa = zio->io_spa; 5697 blkptr_t *bp = zio->io_bp; 5698 int ioerr = zio->io_error; 5699 zdb_cb_t *zcb = zio->io_private; 5700 zbookmark_phys_t *zb = &zio->io_bookmark; 5701 5702 mutex_enter(&spa->spa_scrub_lock); 5703 spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp); 5704 cv_broadcast(&spa->spa_scrub_io_cv); 5705 5706 if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { 5707 char blkbuf[BP_SPRINTF_LEN]; 5708 5709 zcb->zcb_haderrors = 1; 5710 zcb->zcb_errors[ioerr]++; 5711 5712 if (dump_opt['b'] >= 2) 5713 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 5714 else 5715 blkbuf[0] = '\0'; 5716 5717 (void) printf("zdb_blkptr_cb: " 5718 "Got error %d reading " 5719 "<%llu, %llu, %lld, %llx> %s -- skipping\n", 5720 ioerr, 5721 (u_longlong_t)zb->zb_objset, 5722 (u_longlong_t)zb->zb_object, 5723 (u_longlong_t)zb->zb_level, 5724 (u_longlong_t)zb->zb_blkid, 5725 blkbuf); 5726 } 5727 mutex_exit(&spa->spa_scrub_lock); 5728 5729 abd_free(zio->io_abd); 5730 } 5731 5732 static int 5733 zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 5734 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 5735 { 5736 zdb_cb_t *zcb = arg; 5737 dmu_object_type_t type; 5738 boolean_t is_metadata; 5739 5740 if (zb->zb_level == ZB_DNODE_LEVEL) 5741 return (0); 5742 5743 if (dump_opt['b'] >= 5 && bp->blk_birth > 0) { 5744 char blkbuf[BP_SPRINTF_LEN]; 5745 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 5746 (void) printf("objset %llu object %llu " 5747 "level %lld offset 0x%llx %s\n", 5748 (u_longlong_t)zb->zb_objset, 5749 (u_longlong_t)zb->zb_object, 5750 (longlong_t)zb->zb_level, 5751 (u_longlong_t)blkid2offset(dnp, bp, zb), 5752 blkbuf); 5753 } 5754 5755 if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp)) 5756 return (0); 5757 5758 type = BP_GET_TYPE(bp); 5759 5760 zdb_count_block(zcb, zilog, bp, 5761 (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type); 5762 5763 is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)); 5764 5765 if (!BP_IS_EMBEDDED(bp) && 5766 (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) { 5767 size_t size = BP_GET_PSIZE(bp); 5768 abd_t *abd = abd_alloc(size, B_FALSE); 5769 int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW; 5770 5771 /* If it's an intent log block, failure is expected. */ 5772 if (zb->zb_level == ZB_ZIL_LEVEL) 5773 flags |= ZIO_FLAG_SPECULATIVE; 5774 5775 mutex_enter(&spa->spa_scrub_lock); 5776 while (spa->spa_load_verify_bytes > max_inflight_bytes) 5777 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 5778 spa->spa_load_verify_bytes += size; 5779 mutex_exit(&spa->spa_scrub_lock); 5780 5781 zio_nowait(zio_read(NULL, spa, bp, abd, size, 5782 zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb)); 5783 } 5784 5785 zcb->zcb_readfails = 0; 5786 5787 /* only call gethrtime() every 100 blocks */ 5788 static int iters; 5789 if (++iters > 100) 5790 iters = 0; 5791 else 5792 return (0); 5793 5794 if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) { 5795 uint64_t now = gethrtime(); 5796 char buf[10]; 5797 uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize; 5798 uint64_t kb_per_sec = 5799 1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000)); 5800 uint64_t sec_remaining = 5801 (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec; 5802 5803 /* make sure nicenum has enough space */ 5804 _Static_assert(sizeof (buf) >= NN_NUMBUF_SZ, "buf truncated"); 5805 5806 zfs_nicebytes(bytes, buf, sizeof (buf)); 5807 (void) fprintf(stderr, 5808 "\r%5s completed (%4"PRIu64"MB/s) " 5809 "estimated time remaining: " 5810 "%"PRIu64"hr %02"PRIu64"min %02"PRIu64"sec ", 5811 buf, kb_per_sec / 1024, 5812 sec_remaining / 60 / 60, 5813 sec_remaining / 60 % 60, 5814 sec_remaining % 60); 5815 5816 zcb->zcb_lastprint = now; 5817 } 5818 5819 return (0); 5820 } 5821 5822 static void 5823 zdb_leak(void *arg, uint64_t start, uint64_t size) 5824 { 5825 vdev_t *vd = arg; 5826 5827 (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n", 5828 (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size); 5829 } 5830 5831 static metaslab_ops_t zdb_metaslab_ops = { 5832 NULL /* alloc */ 5833 }; 5834 5835 static int 5836 load_unflushed_svr_segs_cb(spa_t *spa, space_map_entry_t *sme, 5837 uint64_t txg, void *arg) 5838 { 5839 spa_vdev_removal_t *svr = arg; 5840 5841 uint64_t offset = sme->sme_offset; 5842 uint64_t size = sme->sme_run; 5843 5844 /* skip vdevs we don't care about */ 5845 if (sme->sme_vdev != svr->svr_vdev_id) 5846 return (0); 5847 5848 vdev_t *vd = vdev_lookup_top(spa, sme->sme_vdev); 5849 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 5850 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE); 5851 5852 if (txg < metaslab_unflushed_txg(ms)) 5853 return (0); 5854 5855 if (sme->sme_type == SM_ALLOC) 5856 range_tree_add(svr->svr_allocd_segs, offset, size); 5857 else 5858 range_tree_remove(svr->svr_allocd_segs, offset, size); 5859 5860 return (0); 5861 } 5862 5863 static void 5864 claim_segment_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset, 5865 uint64_t size, void *arg) 5866 { 5867 (void) inner_offset, (void) arg; 5868 5869 /* 5870 * This callback was called through a remap from 5871 * a device being removed. Therefore, the vdev that 5872 * this callback is applied to is a concrete 5873 * vdev. 5874 */ 5875 ASSERT(vdev_is_concrete(vd)); 5876 5877 VERIFY0(metaslab_claim_impl(vd, offset, size, 5878 spa_min_claim_txg(vd->vdev_spa))); 5879 } 5880 5881 static void 5882 claim_segment_cb(void *arg, uint64_t offset, uint64_t size) 5883 { 5884 vdev_t *vd = arg; 5885 5886 vdev_indirect_ops.vdev_op_remap(vd, offset, size, 5887 claim_segment_impl_cb, NULL); 5888 } 5889 5890 /* 5891 * After accounting for all allocated blocks that are directly referenced, 5892 * we might have missed a reference to a block from a partially complete 5893 * (and thus unused) indirect mapping object. We perform a secondary pass 5894 * through the metaslabs we have already mapped and claim the destination 5895 * blocks. 5896 */ 5897 static void 5898 zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb) 5899 { 5900 if (dump_opt['L']) 5901 return; 5902 5903 if (spa->spa_vdev_removal == NULL) 5904 return; 5905 5906 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 5907 5908 spa_vdev_removal_t *svr = spa->spa_vdev_removal; 5909 vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id); 5910 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 5911 5912 ASSERT0(range_tree_space(svr->svr_allocd_segs)); 5913 5914 range_tree_t *allocs = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0); 5915 for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) { 5916 metaslab_t *msp = vd->vdev_ms[msi]; 5917 5918 ASSERT0(range_tree_space(allocs)); 5919 if (msp->ms_sm != NULL) 5920 VERIFY0(space_map_load(msp->ms_sm, allocs, SM_ALLOC)); 5921 range_tree_vacate(allocs, range_tree_add, svr->svr_allocd_segs); 5922 } 5923 range_tree_destroy(allocs); 5924 5925 iterate_through_spacemap_logs(spa, load_unflushed_svr_segs_cb, svr); 5926 5927 /* 5928 * Clear everything past what has been synced, 5929 * because we have not allocated mappings for 5930 * it yet. 5931 */ 5932 range_tree_clear(svr->svr_allocd_segs, 5933 vdev_indirect_mapping_max_offset(vim), 5934 vd->vdev_asize - vdev_indirect_mapping_max_offset(vim)); 5935 5936 zcb->zcb_removing_size += range_tree_space(svr->svr_allocd_segs); 5937 range_tree_vacate(svr->svr_allocd_segs, claim_segment_cb, vd); 5938 5939 spa_config_exit(spa, SCL_CONFIG, FTAG); 5940 } 5941 5942 static int 5943 increment_indirect_mapping_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, 5944 dmu_tx_t *tx) 5945 { 5946 (void) tx; 5947 zdb_cb_t *zcb = arg; 5948 spa_t *spa = zcb->zcb_spa; 5949 vdev_t *vd; 5950 const dva_t *dva = &bp->blk_dva[0]; 5951 5952 ASSERT(!bp_freed); 5953 ASSERT(!dump_opt['L']); 5954 ASSERT3U(BP_GET_NDVAS(bp), ==, 1); 5955 5956 spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 5957 vd = vdev_lookup_top(zcb->zcb_spa, DVA_GET_VDEV(dva)); 5958 ASSERT3P(vd, !=, NULL); 5959 spa_config_exit(spa, SCL_VDEV, FTAG); 5960 5961 ASSERT(vd->vdev_indirect_config.vic_mapping_object != 0); 5962 ASSERT3P(zcb->zcb_vd_obsolete_counts[vd->vdev_id], !=, NULL); 5963 5964 vdev_indirect_mapping_increment_obsolete_count( 5965 vd->vdev_indirect_mapping, 5966 DVA_GET_OFFSET(dva), DVA_GET_ASIZE(dva), 5967 zcb->zcb_vd_obsolete_counts[vd->vdev_id]); 5968 5969 return (0); 5970 } 5971 5972 static uint32_t * 5973 zdb_load_obsolete_counts(vdev_t *vd) 5974 { 5975 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 5976 spa_t *spa = vd->vdev_spa; 5977 spa_condensing_indirect_phys_t *scip = 5978 &spa->spa_condensing_indirect_phys; 5979 uint64_t obsolete_sm_object; 5980 uint32_t *counts; 5981 5982 VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object)); 5983 EQUIV(obsolete_sm_object != 0, vd->vdev_obsolete_sm != NULL); 5984 counts = vdev_indirect_mapping_load_obsolete_counts(vim); 5985 if (vd->vdev_obsolete_sm != NULL) { 5986 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts, 5987 vd->vdev_obsolete_sm); 5988 } 5989 if (scip->scip_vdev == vd->vdev_id && 5990 scip->scip_prev_obsolete_sm_object != 0) { 5991 space_map_t *prev_obsolete_sm = NULL; 5992 VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset, 5993 scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0)); 5994 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts, 5995 prev_obsolete_sm); 5996 space_map_close(prev_obsolete_sm); 5997 } 5998 return (counts); 5999 } 6000 6001 static void 6002 zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb) 6003 { 6004 ddt_bookmark_t ddb = {0}; 6005 ddt_entry_t dde; 6006 int error; 6007 int p; 6008 6009 ASSERT(!dump_opt['L']); 6010 6011 while ((error = ddt_walk(spa, &ddb, &dde)) == 0) { 6012 blkptr_t blk; 6013 ddt_phys_t *ddp = dde.dde_phys; 6014 6015 if (ddb.ddb_class == DDT_CLASS_UNIQUE) 6016 return; 6017 6018 ASSERT(ddt_phys_total_refcnt(&dde) > 1); 6019 6020 for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 6021 if (ddp->ddp_phys_birth == 0) 6022 continue; 6023 ddt_bp_create(ddb.ddb_checksum, 6024 &dde.dde_key, ddp, &blk); 6025 if (p == DDT_PHYS_DITTO) { 6026 zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO); 6027 } else { 6028 zcb->zcb_dedup_asize += 6029 BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1); 6030 zcb->zcb_dedup_blocks++; 6031 } 6032 } 6033 ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum]; 6034 ddt_enter(ddt); 6035 VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL); 6036 ddt_exit(ddt); 6037 } 6038 6039 ASSERT(error == ENOENT); 6040 } 6041 6042 typedef struct checkpoint_sm_exclude_entry_arg { 6043 vdev_t *cseea_vd; 6044 uint64_t cseea_checkpoint_size; 6045 } checkpoint_sm_exclude_entry_arg_t; 6046 6047 static int 6048 checkpoint_sm_exclude_entry_cb(space_map_entry_t *sme, void *arg) 6049 { 6050 checkpoint_sm_exclude_entry_arg_t *cseea = arg; 6051 vdev_t *vd = cseea->cseea_vd; 6052 metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift]; 6053 uint64_t end = sme->sme_offset + sme->sme_run; 6054 6055 ASSERT(sme->sme_type == SM_FREE); 6056 6057 /* 6058 * Since the vdev_checkpoint_sm exists in the vdev level 6059 * and the ms_sm space maps exist in the metaslab level, 6060 * an entry in the checkpoint space map could theoretically 6061 * cross the boundaries of the metaslab that it belongs. 6062 * 6063 * In reality, because of the way that we populate and 6064 * manipulate the checkpoint's space maps currently, 6065 * there shouldn't be any entries that cross metaslabs. 6066 * Hence the assertion below. 6067 * 6068 * That said, there is no fundamental requirement that 6069 * the checkpoint's space map entries should not cross 6070 * metaslab boundaries. So if needed we could add code 6071 * that handles metaslab-crossing segments in the future. 6072 */ 6073 VERIFY3U(sme->sme_offset, >=, ms->ms_start); 6074 VERIFY3U(end, <=, ms->ms_start + ms->ms_size); 6075 6076 /* 6077 * By removing the entry from the allocated segments we 6078 * also verify that the entry is there to begin with. 6079 */ 6080 mutex_enter(&ms->ms_lock); 6081 range_tree_remove(ms->ms_allocatable, sme->sme_offset, sme->sme_run); 6082 mutex_exit(&ms->ms_lock); 6083 6084 cseea->cseea_checkpoint_size += sme->sme_run; 6085 return (0); 6086 } 6087 6088 static void 6089 zdb_leak_init_vdev_exclude_checkpoint(vdev_t *vd, zdb_cb_t *zcb) 6090 { 6091 spa_t *spa = vd->vdev_spa; 6092 space_map_t *checkpoint_sm = NULL; 6093 uint64_t checkpoint_sm_obj; 6094 6095 /* 6096 * If there is no vdev_top_zap, we are in a pool whose 6097 * version predates the pool checkpoint feature. 6098 */ 6099 if (vd->vdev_top_zap == 0) 6100 return; 6101 6102 /* 6103 * If there is no reference of the vdev_checkpoint_sm in 6104 * the vdev_top_zap, then one of the following scenarios 6105 * is true: 6106 * 6107 * 1] There is no checkpoint 6108 * 2] There is a checkpoint, but no checkpointed blocks 6109 * have been freed yet 6110 * 3] The current vdev is indirect 6111 * 6112 * In these cases we return immediately. 6113 */ 6114 if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap, 6115 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0) 6116 return; 6117 6118 VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap, 6119 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, sizeof (uint64_t), 1, 6120 &checkpoint_sm_obj)); 6121 6122 checkpoint_sm_exclude_entry_arg_t cseea; 6123 cseea.cseea_vd = vd; 6124 cseea.cseea_checkpoint_size = 0; 6125 6126 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa), 6127 checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift)); 6128 6129 VERIFY0(space_map_iterate(checkpoint_sm, 6130 space_map_length(checkpoint_sm), 6131 checkpoint_sm_exclude_entry_cb, &cseea)); 6132 space_map_close(checkpoint_sm); 6133 6134 zcb->zcb_checkpoint_size += cseea.cseea_checkpoint_size; 6135 } 6136 6137 static void 6138 zdb_leak_init_exclude_checkpoint(spa_t *spa, zdb_cb_t *zcb) 6139 { 6140 ASSERT(!dump_opt['L']); 6141 6142 vdev_t *rvd = spa->spa_root_vdev; 6143 for (uint64_t c = 0; c < rvd->vdev_children; c++) { 6144 ASSERT3U(c, ==, rvd->vdev_child[c]->vdev_id); 6145 zdb_leak_init_vdev_exclude_checkpoint(rvd->vdev_child[c], zcb); 6146 } 6147 } 6148 6149 static int 6150 count_unflushed_space_cb(spa_t *spa, space_map_entry_t *sme, 6151 uint64_t txg, void *arg) 6152 { 6153 int64_t *ualloc_space = arg; 6154 6155 uint64_t offset = sme->sme_offset; 6156 uint64_t vdev_id = sme->sme_vdev; 6157 6158 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 6159 if (!vdev_is_concrete(vd)) 6160 return (0); 6161 6162 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 6163 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE); 6164 6165 if (txg < metaslab_unflushed_txg(ms)) 6166 return (0); 6167 6168 if (sme->sme_type == SM_ALLOC) 6169 *ualloc_space += sme->sme_run; 6170 else 6171 *ualloc_space -= sme->sme_run; 6172 6173 return (0); 6174 } 6175 6176 static int64_t 6177 get_unflushed_alloc_space(spa_t *spa) 6178 { 6179 if (dump_opt['L']) 6180 return (0); 6181 6182 int64_t ualloc_space = 0; 6183 iterate_through_spacemap_logs(spa, count_unflushed_space_cb, 6184 &ualloc_space); 6185 return (ualloc_space); 6186 } 6187 6188 static int 6189 load_unflushed_cb(spa_t *spa, space_map_entry_t *sme, uint64_t txg, void *arg) 6190 { 6191 maptype_t *uic_maptype = arg; 6192 6193 uint64_t offset = sme->sme_offset; 6194 uint64_t size = sme->sme_run; 6195 uint64_t vdev_id = sme->sme_vdev; 6196 6197 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 6198 6199 /* skip indirect vdevs */ 6200 if (!vdev_is_concrete(vd)) 6201 return (0); 6202 6203 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 6204 6205 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE); 6206 ASSERT(*uic_maptype == SM_ALLOC || *uic_maptype == SM_FREE); 6207 6208 if (txg < metaslab_unflushed_txg(ms)) 6209 return (0); 6210 6211 if (*uic_maptype == sme->sme_type) 6212 range_tree_add(ms->ms_allocatable, offset, size); 6213 else 6214 range_tree_remove(ms->ms_allocatable, offset, size); 6215 6216 return (0); 6217 } 6218 6219 static void 6220 load_unflushed_to_ms_allocatables(spa_t *spa, maptype_t maptype) 6221 { 6222 iterate_through_spacemap_logs(spa, load_unflushed_cb, &maptype); 6223 } 6224 6225 static void 6226 load_concrete_ms_allocatable_trees(spa_t *spa, maptype_t maptype) 6227 { 6228 vdev_t *rvd = spa->spa_root_vdev; 6229 for (uint64_t i = 0; i < rvd->vdev_children; i++) { 6230 vdev_t *vd = rvd->vdev_child[i]; 6231 6232 ASSERT3U(i, ==, vd->vdev_id); 6233 6234 if (vd->vdev_ops == &vdev_indirect_ops) 6235 continue; 6236 6237 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 6238 metaslab_t *msp = vd->vdev_ms[m]; 6239 6240 (void) fprintf(stderr, 6241 "\rloading concrete vdev %llu, " 6242 "metaslab %llu of %llu ...", 6243 (longlong_t)vd->vdev_id, 6244 (longlong_t)msp->ms_id, 6245 (longlong_t)vd->vdev_ms_count); 6246 6247 mutex_enter(&msp->ms_lock); 6248 range_tree_vacate(msp->ms_allocatable, NULL, NULL); 6249 6250 /* 6251 * We don't want to spend the CPU manipulating the 6252 * size-ordered tree, so clear the range_tree ops. 6253 */ 6254 msp->ms_allocatable->rt_ops = NULL; 6255 6256 if (msp->ms_sm != NULL) { 6257 VERIFY0(space_map_load(msp->ms_sm, 6258 msp->ms_allocatable, maptype)); 6259 } 6260 if (!msp->ms_loaded) 6261 msp->ms_loaded = B_TRUE; 6262 mutex_exit(&msp->ms_lock); 6263 } 6264 } 6265 6266 load_unflushed_to_ms_allocatables(spa, maptype); 6267 } 6268 6269 /* 6270 * vm_idxp is an in-out parameter which (for indirect vdevs) is the 6271 * index in vim_entries that has the first entry in this metaslab. 6272 * On return, it will be set to the first entry after this metaslab. 6273 */ 6274 static void 6275 load_indirect_ms_allocatable_tree(vdev_t *vd, metaslab_t *msp, 6276 uint64_t *vim_idxp) 6277 { 6278 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 6279 6280 mutex_enter(&msp->ms_lock); 6281 range_tree_vacate(msp->ms_allocatable, NULL, NULL); 6282 6283 /* 6284 * We don't want to spend the CPU manipulating the 6285 * size-ordered tree, so clear the range_tree ops. 6286 */ 6287 msp->ms_allocatable->rt_ops = NULL; 6288 6289 for (; *vim_idxp < vdev_indirect_mapping_num_entries(vim); 6290 (*vim_idxp)++) { 6291 vdev_indirect_mapping_entry_phys_t *vimep = 6292 &vim->vim_entries[*vim_idxp]; 6293 uint64_t ent_offset = DVA_MAPPING_GET_SRC_OFFSET(vimep); 6294 uint64_t ent_len = DVA_GET_ASIZE(&vimep->vimep_dst); 6295 ASSERT3U(ent_offset, >=, msp->ms_start); 6296 if (ent_offset >= msp->ms_start + msp->ms_size) 6297 break; 6298 6299 /* 6300 * Mappings do not cross metaslab boundaries, 6301 * because we create them by walking the metaslabs. 6302 */ 6303 ASSERT3U(ent_offset + ent_len, <=, 6304 msp->ms_start + msp->ms_size); 6305 range_tree_add(msp->ms_allocatable, ent_offset, ent_len); 6306 } 6307 6308 if (!msp->ms_loaded) 6309 msp->ms_loaded = B_TRUE; 6310 mutex_exit(&msp->ms_lock); 6311 } 6312 6313 static void 6314 zdb_leak_init_prepare_indirect_vdevs(spa_t *spa, zdb_cb_t *zcb) 6315 { 6316 ASSERT(!dump_opt['L']); 6317 6318 vdev_t *rvd = spa->spa_root_vdev; 6319 for (uint64_t c = 0; c < rvd->vdev_children; c++) { 6320 vdev_t *vd = rvd->vdev_child[c]; 6321 6322 ASSERT3U(c, ==, vd->vdev_id); 6323 6324 if (vd->vdev_ops != &vdev_indirect_ops) 6325 continue; 6326 6327 /* 6328 * Note: we don't check for mapping leaks on 6329 * removing vdevs because their ms_allocatable's 6330 * are used to look for leaks in allocated space. 6331 */ 6332 zcb->zcb_vd_obsolete_counts[c] = zdb_load_obsolete_counts(vd); 6333 6334 /* 6335 * Normally, indirect vdevs don't have any 6336 * metaslabs. We want to set them up for 6337 * zio_claim(). 6338 */ 6339 vdev_metaslab_group_create(vd); 6340 VERIFY0(vdev_metaslab_init(vd, 0)); 6341 6342 vdev_indirect_mapping_t *vim __maybe_unused = 6343 vd->vdev_indirect_mapping; 6344 uint64_t vim_idx = 0; 6345 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 6346 6347 (void) fprintf(stderr, 6348 "\rloading indirect vdev %llu, " 6349 "metaslab %llu of %llu ...", 6350 (longlong_t)vd->vdev_id, 6351 (longlong_t)vd->vdev_ms[m]->ms_id, 6352 (longlong_t)vd->vdev_ms_count); 6353 6354 load_indirect_ms_allocatable_tree(vd, vd->vdev_ms[m], 6355 &vim_idx); 6356 } 6357 ASSERT3U(vim_idx, ==, vdev_indirect_mapping_num_entries(vim)); 6358 } 6359 } 6360 6361 static void 6362 zdb_leak_init(spa_t *spa, zdb_cb_t *zcb) 6363 { 6364 zcb->zcb_spa = spa; 6365 6366 if (dump_opt['L']) 6367 return; 6368 6369 dsl_pool_t *dp = spa->spa_dsl_pool; 6370 vdev_t *rvd = spa->spa_root_vdev; 6371 6372 /* 6373 * We are going to be changing the meaning of the metaslab's 6374 * ms_allocatable. Ensure that the allocator doesn't try to 6375 * use the tree. 6376 */ 6377 spa->spa_normal_class->mc_ops = &zdb_metaslab_ops; 6378 spa->spa_log_class->mc_ops = &zdb_metaslab_ops; 6379 spa->spa_embedded_log_class->mc_ops = &zdb_metaslab_ops; 6380 6381 zcb->zcb_vd_obsolete_counts = 6382 umem_zalloc(rvd->vdev_children * sizeof (uint32_t *), 6383 UMEM_NOFAIL); 6384 6385 /* 6386 * For leak detection, we overload the ms_allocatable trees 6387 * to contain allocated segments instead of free segments. 6388 * As a result, we can't use the normal metaslab_load/unload 6389 * interfaces. 6390 */ 6391 zdb_leak_init_prepare_indirect_vdevs(spa, zcb); 6392 load_concrete_ms_allocatable_trees(spa, SM_ALLOC); 6393 6394 /* 6395 * On load_concrete_ms_allocatable_trees() we loaded all the 6396 * allocated entries from the ms_sm to the ms_allocatable for 6397 * each metaslab. If the pool has a checkpoint or is in the 6398 * middle of discarding a checkpoint, some of these blocks 6399 * may have been freed but their ms_sm may not have been 6400 * updated because they are referenced by the checkpoint. In 6401 * order to avoid false-positives during leak-detection, we 6402 * go through the vdev's checkpoint space map and exclude all 6403 * its entries from their relevant ms_allocatable. 6404 * 6405 * We also aggregate the space held by the checkpoint and add 6406 * it to zcb_checkpoint_size. 6407 * 6408 * Note that at this point we are also verifying that all the 6409 * entries on the checkpoint_sm are marked as allocated in 6410 * the ms_sm of their relevant metaslab. 6411 * [see comment in checkpoint_sm_exclude_entry_cb()] 6412 */ 6413 zdb_leak_init_exclude_checkpoint(spa, zcb); 6414 ASSERT3U(zcb->zcb_checkpoint_size, ==, spa_get_checkpoint_space(spa)); 6415 6416 /* for cleaner progress output */ 6417 (void) fprintf(stderr, "\n"); 6418 6419 if (bpobj_is_open(&dp->dp_obsolete_bpobj)) { 6420 ASSERT(spa_feature_is_enabled(spa, 6421 SPA_FEATURE_DEVICE_REMOVAL)); 6422 (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj, 6423 increment_indirect_mapping_cb, zcb, NULL); 6424 } 6425 6426 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 6427 zdb_ddt_leak_init(spa, zcb); 6428 spa_config_exit(spa, SCL_CONFIG, FTAG); 6429 } 6430 6431 static boolean_t 6432 zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb) 6433 { 6434 boolean_t leaks = B_FALSE; 6435 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 6436 uint64_t total_leaked = 0; 6437 boolean_t are_precise = B_FALSE; 6438 6439 ASSERT(vim != NULL); 6440 6441 for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) { 6442 vdev_indirect_mapping_entry_phys_t *vimep = 6443 &vim->vim_entries[i]; 6444 uint64_t obsolete_bytes = 0; 6445 uint64_t offset = DVA_MAPPING_GET_SRC_OFFSET(vimep); 6446 metaslab_t *msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 6447 6448 /* 6449 * This is not very efficient but it's easy to 6450 * verify correctness. 6451 */ 6452 for (uint64_t inner_offset = 0; 6453 inner_offset < DVA_GET_ASIZE(&vimep->vimep_dst); 6454 inner_offset += 1ULL << vd->vdev_ashift) { 6455 if (range_tree_contains(msp->ms_allocatable, 6456 offset + inner_offset, 1ULL << vd->vdev_ashift)) { 6457 obsolete_bytes += 1ULL << vd->vdev_ashift; 6458 } 6459 } 6460 6461 int64_t bytes_leaked = obsolete_bytes - 6462 zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]; 6463 ASSERT3U(DVA_GET_ASIZE(&vimep->vimep_dst), >=, 6464 zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]); 6465 6466 VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise)); 6467 if (bytes_leaked != 0 && (are_precise || dump_opt['d'] >= 5)) { 6468 (void) printf("obsolete indirect mapping count " 6469 "mismatch on %llu:%llx:%llx : %llx bytes leaked\n", 6470 (u_longlong_t)vd->vdev_id, 6471 (u_longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep), 6472 (u_longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), 6473 (u_longlong_t)bytes_leaked); 6474 } 6475 total_leaked += ABS(bytes_leaked); 6476 } 6477 6478 VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise)); 6479 if (!are_precise && total_leaked > 0) { 6480 int pct_leaked = total_leaked * 100 / 6481 vdev_indirect_mapping_bytes_mapped(vim); 6482 (void) printf("cannot verify obsolete indirect mapping " 6483 "counts of vdev %llu because precise feature was not " 6484 "enabled when it was removed: %d%% (%llx bytes) of mapping" 6485 "unreferenced\n", 6486 (u_longlong_t)vd->vdev_id, pct_leaked, 6487 (u_longlong_t)total_leaked); 6488 } else if (total_leaked > 0) { 6489 (void) printf("obsolete indirect mapping count mismatch " 6490 "for vdev %llu -- %llx total bytes mismatched\n", 6491 (u_longlong_t)vd->vdev_id, 6492 (u_longlong_t)total_leaked); 6493 leaks |= B_TRUE; 6494 } 6495 6496 vdev_indirect_mapping_free_obsolete_counts(vim, 6497 zcb->zcb_vd_obsolete_counts[vd->vdev_id]); 6498 zcb->zcb_vd_obsolete_counts[vd->vdev_id] = NULL; 6499 6500 return (leaks); 6501 } 6502 6503 static boolean_t 6504 zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb) 6505 { 6506 if (dump_opt['L']) 6507 return (B_FALSE); 6508 6509 boolean_t leaks = B_FALSE; 6510 vdev_t *rvd = spa->spa_root_vdev; 6511 for (unsigned c = 0; c < rvd->vdev_children; c++) { 6512 vdev_t *vd = rvd->vdev_child[c]; 6513 6514 if (zcb->zcb_vd_obsolete_counts[c] != NULL) { 6515 leaks |= zdb_check_for_obsolete_leaks(vd, zcb); 6516 } 6517 6518 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 6519 metaslab_t *msp = vd->vdev_ms[m]; 6520 ASSERT3P(msp->ms_group, ==, (msp->ms_group->mg_class == 6521 spa_embedded_log_class(spa)) ? 6522 vd->vdev_log_mg : vd->vdev_mg); 6523 6524 /* 6525 * ms_allocatable has been overloaded 6526 * to contain allocated segments. Now that 6527 * we finished traversing all blocks, any 6528 * block that remains in the ms_allocatable 6529 * represents an allocated block that we 6530 * did not claim during the traversal. 6531 * Claimed blocks would have been removed 6532 * from the ms_allocatable. For indirect 6533 * vdevs, space remaining in the tree 6534 * represents parts of the mapping that are 6535 * not referenced, which is not a bug. 6536 */ 6537 if (vd->vdev_ops == &vdev_indirect_ops) { 6538 range_tree_vacate(msp->ms_allocatable, 6539 NULL, NULL); 6540 } else { 6541 range_tree_vacate(msp->ms_allocatable, 6542 zdb_leak, vd); 6543 } 6544 if (msp->ms_loaded) { 6545 msp->ms_loaded = B_FALSE; 6546 } 6547 } 6548 } 6549 6550 umem_free(zcb->zcb_vd_obsolete_counts, 6551 rvd->vdev_children * sizeof (uint32_t *)); 6552 zcb->zcb_vd_obsolete_counts = NULL; 6553 6554 return (leaks); 6555 } 6556 6557 static int 6558 count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 6559 { 6560 (void) tx; 6561 zdb_cb_t *zcb = arg; 6562 6563 if (dump_opt['b'] >= 5) { 6564 char blkbuf[BP_SPRINTF_LEN]; 6565 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 6566 (void) printf("[%s] %s\n", 6567 "deferred free", blkbuf); 6568 } 6569 zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED); 6570 return (0); 6571 } 6572 6573 /* 6574 * Iterate over livelists which have been destroyed by the user but 6575 * are still present in the MOS, waiting to be freed 6576 */ 6577 static void 6578 iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg) 6579 { 6580 objset_t *mos = spa->spa_meta_objset; 6581 uint64_t zap_obj; 6582 int err = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT, 6583 DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &zap_obj); 6584 if (err == ENOENT) 6585 return; 6586 ASSERT0(err); 6587 6588 zap_cursor_t zc; 6589 zap_attribute_t attr; 6590 dsl_deadlist_t ll; 6591 /* NULL out os prior to dsl_deadlist_open in case it's garbage */ 6592 ll.dl_os = NULL; 6593 for (zap_cursor_init(&zc, mos, zap_obj); 6594 zap_cursor_retrieve(&zc, &attr) == 0; 6595 (void) zap_cursor_advance(&zc)) { 6596 dsl_deadlist_open(&ll, mos, attr.za_first_integer); 6597 func(&ll, arg); 6598 dsl_deadlist_close(&ll); 6599 } 6600 zap_cursor_fini(&zc); 6601 } 6602 6603 static int 6604 bpobj_count_block_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, 6605 dmu_tx_t *tx) 6606 { 6607 ASSERT(!bp_freed); 6608 return (count_block_cb(arg, bp, tx)); 6609 } 6610 6611 static int 6612 livelist_entry_count_blocks_cb(void *args, dsl_deadlist_entry_t *dle) 6613 { 6614 zdb_cb_t *zbc = args; 6615 bplist_t blks; 6616 bplist_create(&blks); 6617 /* determine which blocks have been alloc'd but not freed */ 6618 VERIFY0(dsl_process_sub_livelist(&dle->dle_bpobj, &blks, NULL, NULL)); 6619 /* count those blocks */ 6620 (void) bplist_iterate(&blks, count_block_cb, zbc, NULL); 6621 bplist_destroy(&blks); 6622 return (0); 6623 } 6624 6625 static void 6626 livelist_count_blocks(dsl_deadlist_t *ll, void *arg) 6627 { 6628 dsl_deadlist_iterate(ll, livelist_entry_count_blocks_cb, arg); 6629 } 6630 6631 /* 6632 * Count the blocks in the livelists that have been destroyed by the user 6633 * but haven't yet been freed. 6634 */ 6635 static void 6636 deleted_livelists_count_blocks(spa_t *spa, zdb_cb_t *zbc) 6637 { 6638 iterate_deleted_livelists(spa, livelist_count_blocks, zbc); 6639 } 6640 6641 static void 6642 dump_livelist_cb(dsl_deadlist_t *ll, void *arg) 6643 { 6644 ASSERT3P(arg, ==, NULL); 6645 global_feature_count[SPA_FEATURE_LIVELIST]++; 6646 dump_blkptr_list(ll, "Deleted Livelist"); 6647 dsl_deadlist_iterate(ll, sublivelist_verify_lightweight, NULL); 6648 } 6649 6650 /* 6651 * Print out, register object references to, and increment feature counts for 6652 * livelists that have been destroyed by the user but haven't yet been freed. 6653 */ 6654 static void 6655 deleted_livelists_dump_mos(spa_t *spa) 6656 { 6657 uint64_t zap_obj; 6658 objset_t *mos = spa->spa_meta_objset; 6659 int err = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT, 6660 DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &zap_obj); 6661 if (err == ENOENT) 6662 return; 6663 mos_obj_refd(zap_obj); 6664 iterate_deleted_livelists(spa, dump_livelist_cb, NULL); 6665 } 6666 6667 static int 6668 dump_block_stats(spa_t *spa) 6669 { 6670 zdb_cb_t *zcb; 6671 zdb_blkstats_t *zb, *tzb; 6672 uint64_t norm_alloc, norm_space, total_alloc, total_found; 6673 int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | 6674 TRAVERSE_NO_DECRYPT | TRAVERSE_HARD; 6675 boolean_t leaks = B_FALSE; 6676 int e, c, err; 6677 bp_embedded_type_t i; 6678 6679 zcb = umem_zalloc(sizeof (zdb_cb_t), UMEM_NOFAIL); 6680 6681 (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n", 6682 (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "", 6683 (dump_opt['c'] == 1) ? "metadata " : "", 6684 dump_opt['c'] ? "checksums " : "", 6685 (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "", 6686 !dump_opt['L'] ? "nothing leaked " : ""); 6687 6688 /* 6689 * When leak detection is enabled we load all space maps as SM_ALLOC 6690 * maps, then traverse the pool claiming each block we discover. If 6691 * the pool is perfectly consistent, the segment trees will be empty 6692 * when we're done. Anything left over is a leak; any block we can't 6693 * claim (because it's not part of any space map) is a double 6694 * allocation, reference to a freed block, or an unclaimed log block. 6695 * 6696 * When leak detection is disabled (-L option) we still traverse the 6697 * pool claiming each block we discover, but we skip opening any space 6698 * maps. 6699 */ 6700 zdb_leak_init(spa, zcb); 6701 6702 /* 6703 * If there's a deferred-free bplist, process that first. 6704 */ 6705 (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj, 6706 bpobj_count_block_cb, zcb, NULL); 6707 6708 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { 6709 (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj, 6710 bpobj_count_block_cb, zcb, NULL); 6711 } 6712 6713 zdb_claim_removing(spa, zcb); 6714 6715 if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) { 6716 VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset, 6717 spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb, 6718 zcb, NULL)); 6719 } 6720 6721 deleted_livelists_count_blocks(spa, zcb); 6722 6723 if (dump_opt['c'] > 1) 6724 flags |= TRAVERSE_PREFETCH_DATA; 6725 6726 zcb->zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa)); 6727 zcb->zcb_totalasize += metaslab_class_get_alloc(spa_special_class(spa)); 6728 zcb->zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa)); 6729 zcb->zcb_totalasize += 6730 metaslab_class_get_alloc(spa_embedded_log_class(spa)); 6731 zcb->zcb_start = zcb->zcb_lastprint = gethrtime(); 6732 err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, zcb); 6733 6734 /* 6735 * If we've traversed the data blocks then we need to wait for those 6736 * I/Os to complete. We leverage "The Godfather" zio to wait on 6737 * all async I/Os to complete. 6738 */ 6739 if (dump_opt['c']) { 6740 for (c = 0; c < max_ncpus; c++) { 6741 (void) zio_wait(spa->spa_async_zio_root[c]); 6742 spa->spa_async_zio_root[c] = zio_root(spa, NULL, NULL, 6743 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | 6744 ZIO_FLAG_GODFATHER); 6745 } 6746 } 6747 ASSERT0(spa->spa_load_verify_bytes); 6748 6749 /* 6750 * Done after zio_wait() since zcb_haderrors is modified in 6751 * zdb_blkptr_done() 6752 */ 6753 zcb->zcb_haderrors |= err; 6754 6755 if (zcb->zcb_haderrors) { 6756 (void) printf("\nError counts:\n\n"); 6757 (void) printf("\t%5s %s\n", "errno", "count"); 6758 for (e = 0; e < 256; e++) { 6759 if (zcb->zcb_errors[e] != 0) { 6760 (void) printf("\t%5d %llu\n", 6761 e, (u_longlong_t)zcb->zcb_errors[e]); 6762 } 6763 } 6764 } 6765 6766 /* 6767 * Report any leaked segments. 6768 */ 6769 leaks |= zdb_leak_fini(spa, zcb); 6770 6771 tzb = &zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL]; 6772 6773 norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 6774 norm_space = metaslab_class_get_space(spa_normal_class(spa)); 6775 6776 total_alloc = norm_alloc + 6777 metaslab_class_get_alloc(spa_log_class(spa)) + 6778 metaslab_class_get_alloc(spa_embedded_log_class(spa)) + 6779 metaslab_class_get_alloc(spa_special_class(spa)) + 6780 metaslab_class_get_alloc(spa_dedup_class(spa)) + 6781 get_unflushed_alloc_space(spa); 6782 total_found = tzb->zb_asize - zcb->zcb_dedup_asize + 6783 zcb->zcb_removing_size + zcb->zcb_checkpoint_size; 6784 6785 if (total_found == total_alloc && !dump_opt['L']) { 6786 (void) printf("\n\tNo leaks (block sum matches space" 6787 " maps exactly)\n"); 6788 } else if (!dump_opt['L']) { 6789 (void) printf("block traversal size %llu != alloc %llu " 6790 "(%s %lld)\n", 6791 (u_longlong_t)total_found, 6792 (u_longlong_t)total_alloc, 6793 (dump_opt['L']) ? "unreachable" : "leaked", 6794 (longlong_t)(total_alloc - total_found)); 6795 leaks = B_TRUE; 6796 } 6797 6798 if (tzb->zb_count == 0) { 6799 umem_free(zcb, sizeof (zdb_cb_t)); 6800 return (2); 6801 } 6802 6803 (void) printf("\n"); 6804 (void) printf("\t%-16s %14llu\n", "bp count:", 6805 (u_longlong_t)tzb->zb_count); 6806 (void) printf("\t%-16s %14llu\n", "ganged count:", 6807 (longlong_t)tzb->zb_gangs); 6808 (void) printf("\t%-16s %14llu avg: %6llu\n", "bp logical:", 6809 (u_longlong_t)tzb->zb_lsize, 6810 (u_longlong_t)(tzb->zb_lsize / tzb->zb_count)); 6811 (void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n", 6812 "bp physical:", (u_longlong_t)tzb->zb_psize, 6813 (u_longlong_t)(tzb->zb_psize / tzb->zb_count), 6814 (double)tzb->zb_lsize / tzb->zb_psize); 6815 (void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n", 6816 "bp allocated:", (u_longlong_t)tzb->zb_asize, 6817 (u_longlong_t)(tzb->zb_asize / tzb->zb_count), 6818 (double)tzb->zb_lsize / tzb->zb_asize); 6819 (void) printf("\t%-16s %14llu ref>1: %6llu deduplication: %6.2f\n", 6820 "bp deduped:", (u_longlong_t)zcb->zcb_dedup_asize, 6821 (u_longlong_t)zcb->zcb_dedup_blocks, 6822 (double)zcb->zcb_dedup_asize / tzb->zb_asize + 1.0); 6823 (void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:", 6824 (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space); 6825 6826 if (spa_special_class(spa)->mc_allocator[0].mca_rotor != NULL) { 6827 uint64_t alloc = metaslab_class_get_alloc( 6828 spa_special_class(spa)); 6829 uint64_t space = metaslab_class_get_space( 6830 spa_special_class(spa)); 6831 6832 (void) printf("\t%-16s %14llu used: %5.2f%%\n", 6833 "Special class", (u_longlong_t)alloc, 6834 100.0 * alloc / space); 6835 } 6836 6837 if (spa_dedup_class(spa)->mc_allocator[0].mca_rotor != NULL) { 6838 uint64_t alloc = metaslab_class_get_alloc( 6839 spa_dedup_class(spa)); 6840 uint64_t space = metaslab_class_get_space( 6841 spa_dedup_class(spa)); 6842 6843 (void) printf("\t%-16s %14llu used: %5.2f%%\n", 6844 "Dedup class", (u_longlong_t)alloc, 6845 100.0 * alloc / space); 6846 } 6847 6848 if (spa_embedded_log_class(spa)->mc_allocator[0].mca_rotor != NULL) { 6849 uint64_t alloc = metaslab_class_get_alloc( 6850 spa_embedded_log_class(spa)); 6851 uint64_t space = metaslab_class_get_space( 6852 spa_embedded_log_class(spa)); 6853 6854 (void) printf("\t%-16s %14llu used: %5.2f%%\n", 6855 "Embedded log class", (u_longlong_t)alloc, 6856 100.0 * alloc / space); 6857 } 6858 6859 for (i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) { 6860 if (zcb->zcb_embedded_blocks[i] == 0) 6861 continue; 6862 (void) printf("\n"); 6863 (void) printf("\tadditional, non-pointer bps of type %u: " 6864 "%10llu\n", 6865 i, (u_longlong_t)zcb->zcb_embedded_blocks[i]); 6866 6867 if (dump_opt['b'] >= 3) { 6868 (void) printf("\t number of (compressed) bytes: " 6869 "number of bps\n"); 6870 dump_histogram(zcb->zcb_embedded_histogram[i], 6871 sizeof (zcb->zcb_embedded_histogram[i]) / 6872 sizeof (zcb->zcb_embedded_histogram[i][0]), 0); 6873 } 6874 } 6875 6876 if (tzb->zb_ditto_samevdev != 0) { 6877 (void) printf("\tDittoed blocks on same vdev: %llu\n", 6878 (longlong_t)tzb->zb_ditto_samevdev); 6879 } 6880 if (tzb->zb_ditto_same_ms != 0) { 6881 (void) printf("\tDittoed blocks in same metaslab: %llu\n", 6882 (longlong_t)tzb->zb_ditto_same_ms); 6883 } 6884 6885 for (uint64_t v = 0; v < spa->spa_root_vdev->vdev_children; v++) { 6886 vdev_t *vd = spa->spa_root_vdev->vdev_child[v]; 6887 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 6888 6889 if (vim == NULL) { 6890 continue; 6891 } 6892 6893 char mem[32]; 6894 zdb_nicenum(vdev_indirect_mapping_num_entries(vim), 6895 mem, vdev_indirect_mapping_size(vim)); 6896 6897 (void) printf("\tindirect vdev id %llu has %llu segments " 6898 "(%s in memory)\n", 6899 (longlong_t)vd->vdev_id, 6900 (longlong_t)vdev_indirect_mapping_num_entries(vim), mem); 6901 } 6902 6903 if (dump_opt['b'] >= 2) { 6904 int l, t, level; 6905 char csize[32], lsize[32], psize[32], asize[32]; 6906 char avg[32], gang[32]; 6907 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" 6908 "\t avg\t comp\t%%Total\tType\n"); 6909 6910 zfs_blkstat_t *mdstats = umem_zalloc(sizeof (zfs_blkstat_t), 6911 UMEM_NOFAIL); 6912 6913 for (t = 0; t <= ZDB_OT_TOTAL; t++) { 6914 const char *typename; 6915 6916 /* make sure nicenum has enough space */ 6917 _Static_assert(sizeof (csize) >= NN_NUMBUF_SZ, 6918 "csize truncated"); 6919 _Static_assert(sizeof (lsize) >= NN_NUMBUF_SZ, 6920 "lsize truncated"); 6921 _Static_assert(sizeof (psize) >= NN_NUMBUF_SZ, 6922 "psize truncated"); 6923 _Static_assert(sizeof (asize) >= NN_NUMBUF_SZ, 6924 "asize truncated"); 6925 _Static_assert(sizeof (avg) >= NN_NUMBUF_SZ, 6926 "avg truncated"); 6927 _Static_assert(sizeof (gang) >= NN_NUMBUF_SZ, 6928 "gang truncated"); 6929 6930 if (t < DMU_OT_NUMTYPES) 6931 typename = dmu_ot[t].ot_name; 6932 else 6933 typename = zdb_ot_extname[t - DMU_OT_NUMTYPES]; 6934 6935 if (zcb->zcb_type[ZB_TOTAL][t].zb_asize == 0) { 6936 (void) printf("%6s\t%5s\t%5s\t%5s" 6937 "\t%5s\t%5s\t%6s\t%s\n", 6938 "-", 6939 "-", 6940 "-", 6941 "-", 6942 "-", 6943 "-", 6944 "-", 6945 typename); 6946 continue; 6947 } 6948 6949 for (l = ZB_TOTAL - 1; l >= -1; l--) { 6950 level = (l == -1 ? ZB_TOTAL : l); 6951 zb = &zcb->zcb_type[level][t]; 6952 6953 if (zb->zb_asize == 0) 6954 continue; 6955 6956 if (level != ZB_TOTAL && t < DMU_OT_NUMTYPES && 6957 (level > 0 || DMU_OT_IS_METADATA(t))) { 6958 mdstats->zb_count += zb->zb_count; 6959 mdstats->zb_lsize += zb->zb_lsize; 6960 mdstats->zb_psize += zb->zb_psize; 6961 mdstats->zb_asize += zb->zb_asize; 6962 mdstats->zb_gangs += zb->zb_gangs; 6963 } 6964 6965 if (dump_opt['b'] < 3 && level != ZB_TOTAL) 6966 continue; 6967 6968 if (level == 0 && zb->zb_asize == 6969 zcb->zcb_type[ZB_TOTAL][t].zb_asize) 6970 continue; 6971 6972 zdb_nicenum(zb->zb_count, csize, 6973 sizeof (csize)); 6974 zdb_nicenum(zb->zb_lsize, lsize, 6975 sizeof (lsize)); 6976 zdb_nicenum(zb->zb_psize, psize, 6977 sizeof (psize)); 6978 zdb_nicenum(zb->zb_asize, asize, 6979 sizeof (asize)); 6980 zdb_nicenum(zb->zb_asize / zb->zb_count, avg, 6981 sizeof (avg)); 6982 zdb_nicenum(zb->zb_gangs, gang, sizeof (gang)); 6983 6984 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s" 6985 "\t%5.2f\t%6.2f\t", 6986 csize, lsize, psize, asize, avg, 6987 (double)zb->zb_lsize / zb->zb_psize, 6988 100.0 * zb->zb_asize / tzb->zb_asize); 6989 6990 if (level == ZB_TOTAL) 6991 (void) printf("%s\n", typename); 6992 else 6993 (void) printf(" L%d %s\n", 6994 level, typename); 6995 6996 if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) { 6997 (void) printf("\t number of ganged " 6998 "blocks: %s\n", gang); 6999 } 7000 7001 if (dump_opt['b'] >= 4) { 7002 (void) printf("psize " 7003 "(in 512-byte sectors): " 7004 "number of blocks\n"); 7005 dump_histogram(zb->zb_psize_histogram, 7006 PSIZE_HISTO_SIZE, 0); 7007 } 7008 } 7009 } 7010 zdb_nicenum(mdstats->zb_count, csize, 7011 sizeof (csize)); 7012 zdb_nicenum(mdstats->zb_lsize, lsize, 7013 sizeof (lsize)); 7014 zdb_nicenum(mdstats->zb_psize, psize, 7015 sizeof (psize)); 7016 zdb_nicenum(mdstats->zb_asize, asize, 7017 sizeof (asize)); 7018 zdb_nicenum(mdstats->zb_asize / mdstats->zb_count, avg, 7019 sizeof (avg)); 7020 zdb_nicenum(mdstats->zb_gangs, gang, sizeof (gang)); 7021 7022 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s" 7023 "\t%5.2f\t%6.2f\t", 7024 csize, lsize, psize, asize, avg, 7025 (double)mdstats->zb_lsize / mdstats->zb_psize, 7026 100.0 * mdstats->zb_asize / tzb->zb_asize); 7027 (void) printf("%s\n", "Metadata Total"); 7028 7029 /* Output a table summarizing block sizes in the pool */ 7030 if (dump_opt['b'] >= 2) { 7031 dump_size_histograms(zcb); 7032 } 7033 7034 umem_free(mdstats, sizeof (zfs_blkstat_t)); 7035 } 7036 7037 (void) printf("\n"); 7038 7039 if (leaks) { 7040 umem_free(zcb, sizeof (zdb_cb_t)); 7041 return (2); 7042 } 7043 7044 if (zcb->zcb_haderrors) { 7045 umem_free(zcb, sizeof (zdb_cb_t)); 7046 return (3); 7047 } 7048 7049 umem_free(zcb, sizeof (zdb_cb_t)); 7050 return (0); 7051 } 7052 7053 typedef struct zdb_ddt_entry { 7054 ddt_key_t zdde_key; 7055 uint64_t zdde_ref_blocks; 7056 uint64_t zdde_ref_lsize; 7057 uint64_t zdde_ref_psize; 7058 uint64_t zdde_ref_dsize; 7059 avl_node_t zdde_node; 7060 } zdb_ddt_entry_t; 7061 7062 static int 7063 zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 7064 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 7065 { 7066 (void) zilog, (void) dnp; 7067 avl_tree_t *t = arg; 7068 avl_index_t where; 7069 zdb_ddt_entry_t *zdde, zdde_search; 7070 7071 if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) || 7072 BP_IS_EMBEDDED(bp)) 7073 return (0); 7074 7075 if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) { 7076 (void) printf("traversing objset %llu, %llu objects, " 7077 "%lu blocks so far\n", 7078 (u_longlong_t)zb->zb_objset, 7079 (u_longlong_t)BP_GET_FILL(bp), 7080 avl_numnodes(t)); 7081 } 7082 7083 if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF || 7084 BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) 7085 return (0); 7086 7087 ddt_key_fill(&zdde_search.zdde_key, bp); 7088 7089 zdde = avl_find(t, &zdde_search, &where); 7090 7091 if (zdde == NULL) { 7092 zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL); 7093 zdde->zdde_key = zdde_search.zdde_key; 7094 avl_insert(t, zdde, where); 7095 } 7096 7097 zdde->zdde_ref_blocks += 1; 7098 zdde->zdde_ref_lsize += BP_GET_LSIZE(bp); 7099 zdde->zdde_ref_psize += BP_GET_PSIZE(bp); 7100 zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp); 7101 7102 return (0); 7103 } 7104 7105 static void 7106 dump_simulated_ddt(spa_t *spa) 7107 { 7108 avl_tree_t t; 7109 void *cookie = NULL; 7110 zdb_ddt_entry_t *zdde; 7111 ddt_histogram_t ddh_total = {{{0}}}; 7112 ddt_stat_t dds_total = {0}; 7113 7114 avl_create(&t, ddt_entry_compare, 7115 sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node)); 7116 7117 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 7118 7119 (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | 7120 TRAVERSE_NO_DECRYPT, zdb_ddt_add_cb, &t); 7121 7122 spa_config_exit(spa, SCL_CONFIG, FTAG); 7123 7124 while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) { 7125 ddt_stat_t dds; 7126 uint64_t refcnt = zdde->zdde_ref_blocks; 7127 ASSERT(refcnt != 0); 7128 7129 dds.dds_blocks = zdde->zdde_ref_blocks / refcnt; 7130 dds.dds_lsize = zdde->zdde_ref_lsize / refcnt; 7131 dds.dds_psize = zdde->zdde_ref_psize / refcnt; 7132 dds.dds_dsize = zdde->zdde_ref_dsize / refcnt; 7133 7134 dds.dds_ref_blocks = zdde->zdde_ref_blocks; 7135 dds.dds_ref_lsize = zdde->zdde_ref_lsize; 7136 dds.dds_ref_psize = zdde->zdde_ref_psize; 7137 dds.dds_ref_dsize = zdde->zdde_ref_dsize; 7138 7139 ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1], 7140 &dds, 0); 7141 7142 umem_free(zdde, sizeof (*zdde)); 7143 } 7144 7145 avl_destroy(&t); 7146 7147 ddt_histogram_stat(&dds_total, &ddh_total); 7148 7149 (void) printf("Simulated DDT histogram:\n"); 7150 7151 zpool_dump_ddt(&dds_total, &ddh_total); 7152 7153 dump_dedup_ratio(&dds_total); 7154 } 7155 7156 static int 7157 verify_device_removal_feature_counts(spa_t *spa) 7158 { 7159 uint64_t dr_feature_refcount = 0; 7160 uint64_t oc_feature_refcount = 0; 7161 uint64_t indirect_vdev_count = 0; 7162 uint64_t precise_vdev_count = 0; 7163 uint64_t obsolete_counts_object_count = 0; 7164 uint64_t obsolete_sm_count = 0; 7165 uint64_t obsolete_counts_count = 0; 7166 uint64_t scip_count = 0; 7167 uint64_t obsolete_bpobj_count = 0; 7168 int ret = 0; 7169 7170 spa_condensing_indirect_phys_t *scip = 7171 &spa->spa_condensing_indirect_phys; 7172 if (scip->scip_next_mapping_object != 0) { 7173 vdev_t *vd = spa->spa_root_vdev->vdev_child[scip->scip_vdev]; 7174 ASSERT(scip->scip_prev_obsolete_sm_object != 0); 7175 ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops); 7176 7177 (void) printf("Condensing indirect vdev %llu: new mapping " 7178 "object %llu, prev obsolete sm %llu\n", 7179 (u_longlong_t)scip->scip_vdev, 7180 (u_longlong_t)scip->scip_next_mapping_object, 7181 (u_longlong_t)scip->scip_prev_obsolete_sm_object); 7182 if (scip->scip_prev_obsolete_sm_object != 0) { 7183 space_map_t *prev_obsolete_sm = NULL; 7184 VERIFY0(space_map_open(&prev_obsolete_sm, 7185 spa->spa_meta_objset, 7186 scip->scip_prev_obsolete_sm_object, 7187 0, vd->vdev_asize, 0)); 7188 dump_spacemap(spa->spa_meta_objset, prev_obsolete_sm); 7189 (void) printf("\n"); 7190 space_map_close(prev_obsolete_sm); 7191 } 7192 7193 scip_count += 2; 7194 } 7195 7196 for (uint64_t i = 0; i < spa->spa_root_vdev->vdev_children; i++) { 7197 vdev_t *vd = spa->spa_root_vdev->vdev_child[i]; 7198 vdev_indirect_config_t *vic = &vd->vdev_indirect_config; 7199 7200 if (vic->vic_mapping_object != 0) { 7201 ASSERT(vd->vdev_ops == &vdev_indirect_ops || 7202 vd->vdev_removing); 7203 indirect_vdev_count++; 7204 7205 if (vd->vdev_indirect_mapping->vim_havecounts) { 7206 obsolete_counts_count++; 7207 } 7208 } 7209 7210 boolean_t are_precise; 7211 VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise)); 7212 if (are_precise) { 7213 ASSERT(vic->vic_mapping_object != 0); 7214 precise_vdev_count++; 7215 } 7216 7217 uint64_t obsolete_sm_object; 7218 VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object)); 7219 if (obsolete_sm_object != 0) { 7220 ASSERT(vic->vic_mapping_object != 0); 7221 obsolete_sm_count++; 7222 } 7223 } 7224 7225 (void) feature_get_refcount(spa, 7226 &spa_feature_table[SPA_FEATURE_DEVICE_REMOVAL], 7227 &dr_feature_refcount); 7228 (void) feature_get_refcount(spa, 7229 &spa_feature_table[SPA_FEATURE_OBSOLETE_COUNTS], 7230 &oc_feature_refcount); 7231 7232 if (dr_feature_refcount != indirect_vdev_count) { 7233 ret = 1; 7234 (void) printf("Number of indirect vdevs (%llu) " \ 7235 "does not match feature count (%llu)\n", 7236 (u_longlong_t)indirect_vdev_count, 7237 (u_longlong_t)dr_feature_refcount); 7238 } else { 7239 (void) printf("Verified device_removal feature refcount " \ 7240 "of %llu is correct\n", 7241 (u_longlong_t)dr_feature_refcount); 7242 } 7243 7244 if (zap_contains(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT, 7245 DMU_POOL_OBSOLETE_BPOBJ) == 0) { 7246 obsolete_bpobj_count++; 7247 } 7248 7249 7250 obsolete_counts_object_count = precise_vdev_count; 7251 obsolete_counts_object_count += obsolete_sm_count; 7252 obsolete_counts_object_count += obsolete_counts_count; 7253 obsolete_counts_object_count += scip_count; 7254 obsolete_counts_object_count += obsolete_bpobj_count; 7255 obsolete_counts_object_count += remap_deadlist_count; 7256 7257 if (oc_feature_refcount != obsolete_counts_object_count) { 7258 ret = 1; 7259 (void) printf("Number of obsolete counts objects (%llu) " \ 7260 "does not match feature count (%llu)\n", 7261 (u_longlong_t)obsolete_counts_object_count, 7262 (u_longlong_t)oc_feature_refcount); 7263 (void) printf("pv:%llu os:%llu oc:%llu sc:%llu " 7264 "ob:%llu rd:%llu\n", 7265 (u_longlong_t)precise_vdev_count, 7266 (u_longlong_t)obsolete_sm_count, 7267 (u_longlong_t)obsolete_counts_count, 7268 (u_longlong_t)scip_count, 7269 (u_longlong_t)obsolete_bpobj_count, 7270 (u_longlong_t)remap_deadlist_count); 7271 } else { 7272 (void) printf("Verified indirect_refcount feature refcount " \ 7273 "of %llu is correct\n", 7274 (u_longlong_t)oc_feature_refcount); 7275 } 7276 return (ret); 7277 } 7278 7279 static void 7280 zdb_set_skip_mmp(char *target) 7281 { 7282 spa_t *spa; 7283 7284 /* 7285 * Disable the activity check to allow examination of 7286 * active pools. 7287 */ 7288 mutex_enter(&spa_namespace_lock); 7289 if ((spa = spa_lookup(target)) != NULL) { 7290 spa->spa_import_flags |= ZFS_IMPORT_SKIP_MMP; 7291 } 7292 mutex_exit(&spa_namespace_lock); 7293 } 7294 7295 #define BOGUS_SUFFIX "_CHECKPOINTED_UNIVERSE" 7296 /* 7297 * Import the checkpointed state of the pool specified by the target 7298 * parameter as readonly. The function also accepts a pool config 7299 * as an optional parameter, else it attempts to infer the config by 7300 * the name of the target pool. 7301 * 7302 * Note that the checkpointed state's pool name will be the name of 7303 * the original pool with the above suffix appended to it. In addition, 7304 * if the target is not a pool name (e.g. a path to a dataset) then 7305 * the new_path parameter is populated with the updated path to 7306 * reflect the fact that we are looking into the checkpointed state. 7307 * 7308 * The function returns a newly-allocated copy of the name of the 7309 * pool containing the checkpointed state. When this copy is no 7310 * longer needed it should be freed with free(3C). Same thing 7311 * applies to the new_path parameter if allocated. 7312 */ 7313 static char * 7314 import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path) 7315 { 7316 int error = 0; 7317 char *poolname, *bogus_name = NULL; 7318 boolean_t freecfg = B_FALSE; 7319 7320 /* If the target is not a pool, the extract the pool name */ 7321 char *path_start = strchr(target, '/'); 7322 if (path_start != NULL) { 7323 size_t poolname_len = path_start - target; 7324 poolname = strndup(target, poolname_len); 7325 } else { 7326 poolname = target; 7327 } 7328 7329 if (cfg == NULL) { 7330 zdb_set_skip_mmp(poolname); 7331 error = spa_get_stats(poolname, &cfg, NULL, 0); 7332 if (error != 0) { 7333 fatal("Tried to read config of pool \"%s\" but " 7334 "spa_get_stats() failed with error %d\n", 7335 poolname, error); 7336 } 7337 freecfg = B_TRUE; 7338 } 7339 7340 if (asprintf(&bogus_name, "%s%s", poolname, BOGUS_SUFFIX) == -1) { 7341 if (target != poolname) 7342 free(poolname); 7343 return (NULL); 7344 } 7345 fnvlist_add_string(cfg, ZPOOL_CONFIG_POOL_NAME, bogus_name); 7346 7347 error = spa_import(bogus_name, cfg, NULL, 7348 ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT | 7349 ZFS_IMPORT_SKIP_MMP); 7350 if (freecfg) 7351 nvlist_free(cfg); 7352 if (error != 0) { 7353 fatal("Tried to import pool \"%s\" but spa_import() failed " 7354 "with error %d\n", bogus_name, error); 7355 } 7356 7357 if (new_path != NULL && path_start != NULL) { 7358 if (asprintf(new_path, "%s%s", bogus_name, path_start) == -1) { 7359 free(bogus_name); 7360 if (path_start != NULL) 7361 free(poolname); 7362 return (NULL); 7363 } 7364 } 7365 7366 if (target != poolname) 7367 free(poolname); 7368 7369 return (bogus_name); 7370 } 7371 7372 typedef struct verify_checkpoint_sm_entry_cb_arg { 7373 vdev_t *vcsec_vd; 7374 7375 /* the following fields are only used for printing progress */ 7376 uint64_t vcsec_entryid; 7377 uint64_t vcsec_num_entries; 7378 } verify_checkpoint_sm_entry_cb_arg_t; 7379 7380 #define ENTRIES_PER_PROGRESS_UPDATE 10000 7381 7382 static int 7383 verify_checkpoint_sm_entry_cb(space_map_entry_t *sme, void *arg) 7384 { 7385 verify_checkpoint_sm_entry_cb_arg_t *vcsec = arg; 7386 vdev_t *vd = vcsec->vcsec_vd; 7387 metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift]; 7388 uint64_t end = sme->sme_offset + sme->sme_run; 7389 7390 ASSERT(sme->sme_type == SM_FREE); 7391 7392 if ((vcsec->vcsec_entryid % ENTRIES_PER_PROGRESS_UPDATE) == 0) { 7393 (void) fprintf(stderr, 7394 "\rverifying vdev %llu, space map entry %llu of %llu ...", 7395 (longlong_t)vd->vdev_id, 7396 (longlong_t)vcsec->vcsec_entryid, 7397 (longlong_t)vcsec->vcsec_num_entries); 7398 } 7399 vcsec->vcsec_entryid++; 7400 7401 /* 7402 * See comment in checkpoint_sm_exclude_entry_cb() 7403 */ 7404 VERIFY3U(sme->sme_offset, >=, ms->ms_start); 7405 VERIFY3U(end, <=, ms->ms_start + ms->ms_size); 7406 7407 /* 7408 * The entries in the vdev_checkpoint_sm should be marked as 7409 * allocated in the checkpointed state of the pool, therefore 7410 * their respective ms_allocateable trees should not contain them. 7411 */ 7412 mutex_enter(&ms->ms_lock); 7413 range_tree_verify_not_present(ms->ms_allocatable, 7414 sme->sme_offset, sme->sme_run); 7415 mutex_exit(&ms->ms_lock); 7416 7417 return (0); 7418 } 7419 7420 /* 7421 * Verify that all segments in the vdev_checkpoint_sm are allocated 7422 * according to the checkpoint's ms_sm (i.e. are not in the checkpoint's 7423 * ms_allocatable). 7424 * 7425 * Do so by comparing the checkpoint space maps (vdev_checkpoint_sm) of 7426 * each vdev in the current state of the pool to the metaslab space maps 7427 * (ms_sm) of the checkpointed state of the pool. 7428 * 7429 * Note that the function changes the state of the ms_allocatable 7430 * trees of the current spa_t. The entries of these ms_allocatable 7431 * trees are cleared out and then repopulated from with the free 7432 * entries of their respective ms_sm space maps. 7433 */ 7434 static void 7435 verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current) 7436 { 7437 vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev; 7438 vdev_t *current_rvd = current->spa_root_vdev; 7439 7440 load_concrete_ms_allocatable_trees(checkpoint, SM_FREE); 7441 7442 for (uint64_t c = 0; c < ckpoint_rvd->vdev_children; c++) { 7443 vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[c]; 7444 vdev_t *current_vd = current_rvd->vdev_child[c]; 7445 7446 space_map_t *checkpoint_sm = NULL; 7447 uint64_t checkpoint_sm_obj; 7448 7449 if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) { 7450 /* 7451 * Since we don't allow device removal in a pool 7452 * that has a checkpoint, we expect that all removed 7453 * vdevs were removed from the pool before the 7454 * checkpoint. 7455 */ 7456 ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops); 7457 continue; 7458 } 7459 7460 /* 7461 * If the checkpoint space map doesn't exist, then nothing 7462 * here is checkpointed so there's nothing to verify. 7463 */ 7464 if (current_vd->vdev_top_zap == 0 || 7465 zap_contains(spa_meta_objset(current), 7466 current_vd->vdev_top_zap, 7467 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0) 7468 continue; 7469 7470 VERIFY0(zap_lookup(spa_meta_objset(current), 7471 current_vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, 7472 sizeof (uint64_t), 1, &checkpoint_sm_obj)); 7473 7474 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(current), 7475 checkpoint_sm_obj, 0, current_vd->vdev_asize, 7476 current_vd->vdev_ashift)); 7477 7478 verify_checkpoint_sm_entry_cb_arg_t vcsec; 7479 vcsec.vcsec_vd = ckpoint_vd; 7480 vcsec.vcsec_entryid = 0; 7481 vcsec.vcsec_num_entries = 7482 space_map_length(checkpoint_sm) / sizeof (uint64_t); 7483 VERIFY0(space_map_iterate(checkpoint_sm, 7484 space_map_length(checkpoint_sm), 7485 verify_checkpoint_sm_entry_cb, &vcsec)); 7486 if (dump_opt['m'] > 3) 7487 dump_spacemap(current->spa_meta_objset, checkpoint_sm); 7488 space_map_close(checkpoint_sm); 7489 } 7490 7491 /* 7492 * If we've added vdevs since we took the checkpoint, ensure 7493 * that their checkpoint space maps are empty. 7494 */ 7495 if (ckpoint_rvd->vdev_children < current_rvd->vdev_children) { 7496 for (uint64_t c = ckpoint_rvd->vdev_children; 7497 c < current_rvd->vdev_children; c++) { 7498 vdev_t *current_vd = current_rvd->vdev_child[c]; 7499 VERIFY3P(current_vd->vdev_checkpoint_sm, ==, NULL); 7500 } 7501 } 7502 7503 /* for cleaner progress output */ 7504 (void) fprintf(stderr, "\n"); 7505 } 7506 7507 /* 7508 * Verifies that all space that's allocated in the checkpoint is 7509 * still allocated in the current version, by checking that everything 7510 * in checkpoint's ms_allocatable (which is actually allocated, not 7511 * allocatable/free) is not present in current's ms_allocatable. 7512 * 7513 * Note that the function changes the state of the ms_allocatable 7514 * trees of both spas when called. The entries of all ms_allocatable 7515 * trees are cleared out and then repopulated from their respective 7516 * ms_sm space maps. In the checkpointed state we load the allocated 7517 * entries, and in the current state we load the free entries. 7518 */ 7519 static void 7520 verify_checkpoint_ms_spacemaps(spa_t *checkpoint, spa_t *current) 7521 { 7522 vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev; 7523 vdev_t *current_rvd = current->spa_root_vdev; 7524 7525 load_concrete_ms_allocatable_trees(checkpoint, SM_ALLOC); 7526 load_concrete_ms_allocatable_trees(current, SM_FREE); 7527 7528 for (uint64_t i = 0; i < ckpoint_rvd->vdev_children; i++) { 7529 vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[i]; 7530 vdev_t *current_vd = current_rvd->vdev_child[i]; 7531 7532 if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) { 7533 /* 7534 * See comment in verify_checkpoint_vdev_spacemaps() 7535 */ 7536 ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops); 7537 continue; 7538 } 7539 7540 for (uint64_t m = 0; m < ckpoint_vd->vdev_ms_count; m++) { 7541 metaslab_t *ckpoint_msp = ckpoint_vd->vdev_ms[m]; 7542 metaslab_t *current_msp = current_vd->vdev_ms[m]; 7543 7544 (void) fprintf(stderr, 7545 "\rverifying vdev %llu of %llu, " 7546 "metaslab %llu of %llu ...", 7547 (longlong_t)current_vd->vdev_id, 7548 (longlong_t)current_rvd->vdev_children, 7549 (longlong_t)current_vd->vdev_ms[m]->ms_id, 7550 (longlong_t)current_vd->vdev_ms_count); 7551 7552 /* 7553 * We walk through the ms_allocatable trees that 7554 * are loaded with the allocated blocks from the 7555 * ms_sm spacemaps of the checkpoint. For each 7556 * one of these ranges we ensure that none of them 7557 * exists in the ms_allocatable trees of the 7558 * current state which are loaded with the ranges 7559 * that are currently free. 7560 * 7561 * This way we ensure that none of the blocks that 7562 * are part of the checkpoint were freed by mistake. 7563 */ 7564 range_tree_walk(ckpoint_msp->ms_allocatable, 7565 (range_tree_func_t *)range_tree_verify_not_present, 7566 current_msp->ms_allocatable); 7567 } 7568 } 7569 7570 /* for cleaner progress output */ 7571 (void) fprintf(stderr, "\n"); 7572 } 7573 7574 static void 7575 verify_checkpoint_blocks(spa_t *spa) 7576 { 7577 ASSERT(!dump_opt['L']); 7578 7579 spa_t *checkpoint_spa; 7580 char *checkpoint_pool; 7581 int error = 0; 7582 7583 /* 7584 * We import the checkpointed state of the pool (under a different 7585 * name) so we can do verification on it against the current state 7586 * of the pool. 7587 */ 7588 checkpoint_pool = import_checkpointed_state(spa->spa_name, NULL, 7589 NULL); 7590 ASSERT(strcmp(spa->spa_name, checkpoint_pool) != 0); 7591 7592 error = spa_open(checkpoint_pool, &checkpoint_spa, FTAG); 7593 if (error != 0) { 7594 fatal("Tried to open pool \"%s\" but spa_open() failed with " 7595 "error %d\n", checkpoint_pool, error); 7596 } 7597 7598 /* 7599 * Ensure that ranges in the checkpoint space maps of each vdev 7600 * are allocated according to the checkpointed state's metaslab 7601 * space maps. 7602 */ 7603 verify_checkpoint_vdev_spacemaps(checkpoint_spa, spa); 7604 7605 /* 7606 * Ensure that allocated ranges in the checkpoint's metaslab 7607 * space maps remain allocated in the metaslab space maps of 7608 * the current state. 7609 */ 7610 verify_checkpoint_ms_spacemaps(checkpoint_spa, spa); 7611 7612 /* 7613 * Once we are done, we get rid of the checkpointed state. 7614 */ 7615 spa_close(checkpoint_spa, FTAG); 7616 free(checkpoint_pool); 7617 } 7618 7619 static void 7620 dump_leftover_checkpoint_blocks(spa_t *spa) 7621 { 7622 vdev_t *rvd = spa->spa_root_vdev; 7623 7624 for (uint64_t i = 0; i < rvd->vdev_children; i++) { 7625 vdev_t *vd = rvd->vdev_child[i]; 7626 7627 space_map_t *checkpoint_sm = NULL; 7628 uint64_t checkpoint_sm_obj; 7629 7630 if (vd->vdev_top_zap == 0) 7631 continue; 7632 7633 if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap, 7634 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0) 7635 continue; 7636 7637 VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap, 7638 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, 7639 sizeof (uint64_t), 1, &checkpoint_sm_obj)); 7640 7641 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa), 7642 checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift)); 7643 dump_spacemap(spa->spa_meta_objset, checkpoint_sm); 7644 space_map_close(checkpoint_sm); 7645 } 7646 } 7647 7648 static int 7649 verify_checkpoint(spa_t *spa) 7650 { 7651 uberblock_t checkpoint; 7652 int error; 7653 7654 if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) 7655 return (0); 7656 7657 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 7658 DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), 7659 sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); 7660 7661 if (error == ENOENT && !dump_opt['L']) { 7662 /* 7663 * If the feature is active but the uberblock is missing 7664 * then we must be in the middle of discarding the 7665 * checkpoint. 7666 */ 7667 (void) printf("\nPartially discarded checkpoint " 7668 "state found:\n"); 7669 if (dump_opt['m'] > 3) 7670 dump_leftover_checkpoint_blocks(spa); 7671 return (0); 7672 } else if (error != 0) { 7673 (void) printf("lookup error %d when looking for " 7674 "checkpointed uberblock in MOS\n", error); 7675 return (error); 7676 } 7677 dump_uberblock(&checkpoint, "\nCheckpointed uberblock found:\n", "\n"); 7678 7679 if (checkpoint.ub_checkpoint_txg == 0) { 7680 (void) printf("\nub_checkpoint_txg not set in checkpointed " 7681 "uberblock\n"); 7682 error = 3; 7683 } 7684 7685 if (error == 0 && !dump_opt['L']) 7686 verify_checkpoint_blocks(spa); 7687 7688 return (error); 7689 } 7690 7691 static void 7692 mos_leaks_cb(void *arg, uint64_t start, uint64_t size) 7693 { 7694 (void) arg; 7695 for (uint64_t i = start; i < size; i++) { 7696 (void) printf("MOS object %llu referenced but not allocated\n", 7697 (u_longlong_t)i); 7698 } 7699 } 7700 7701 static void 7702 mos_obj_refd(uint64_t obj) 7703 { 7704 if (obj != 0 && mos_refd_objs != NULL) 7705 range_tree_add(mos_refd_objs, obj, 1); 7706 } 7707 7708 /* 7709 * Call on a MOS object that may already have been referenced. 7710 */ 7711 static void 7712 mos_obj_refd_multiple(uint64_t obj) 7713 { 7714 if (obj != 0 && mos_refd_objs != NULL && 7715 !range_tree_contains(mos_refd_objs, obj, 1)) 7716 range_tree_add(mos_refd_objs, obj, 1); 7717 } 7718 7719 static void 7720 mos_leak_vdev_top_zap(vdev_t *vd) 7721 { 7722 uint64_t ms_flush_data_obj; 7723 int error = zap_lookup(spa_meta_objset(vd->vdev_spa), 7724 vd->vdev_top_zap, VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS, 7725 sizeof (ms_flush_data_obj), 1, &ms_flush_data_obj); 7726 if (error == ENOENT) 7727 return; 7728 ASSERT0(error); 7729 7730 mos_obj_refd(ms_flush_data_obj); 7731 } 7732 7733 static void 7734 mos_leak_vdev(vdev_t *vd) 7735 { 7736 mos_obj_refd(vd->vdev_dtl_object); 7737 mos_obj_refd(vd->vdev_ms_array); 7738 mos_obj_refd(vd->vdev_indirect_config.vic_births_object); 7739 mos_obj_refd(vd->vdev_indirect_config.vic_mapping_object); 7740 mos_obj_refd(vd->vdev_leaf_zap); 7741 if (vd->vdev_checkpoint_sm != NULL) 7742 mos_obj_refd(vd->vdev_checkpoint_sm->sm_object); 7743 if (vd->vdev_indirect_mapping != NULL) { 7744 mos_obj_refd(vd->vdev_indirect_mapping-> 7745 vim_phys->vimp_counts_object); 7746 } 7747 if (vd->vdev_obsolete_sm != NULL) 7748 mos_obj_refd(vd->vdev_obsolete_sm->sm_object); 7749 7750 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 7751 metaslab_t *ms = vd->vdev_ms[m]; 7752 mos_obj_refd(space_map_object(ms->ms_sm)); 7753 } 7754 7755 if (vd->vdev_root_zap != 0) 7756 mos_obj_refd(vd->vdev_root_zap); 7757 7758 if (vd->vdev_top_zap != 0) { 7759 mos_obj_refd(vd->vdev_top_zap); 7760 mos_leak_vdev_top_zap(vd); 7761 } 7762 7763 for (uint64_t c = 0; c < vd->vdev_children; c++) { 7764 mos_leak_vdev(vd->vdev_child[c]); 7765 } 7766 } 7767 7768 static void 7769 mos_leak_log_spacemaps(spa_t *spa) 7770 { 7771 uint64_t spacemap_zap; 7772 int error = zap_lookup(spa_meta_objset(spa), 7773 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_LOG_SPACEMAP_ZAP, 7774 sizeof (spacemap_zap), 1, &spacemap_zap); 7775 if (error == ENOENT) 7776 return; 7777 ASSERT0(error); 7778 7779 mos_obj_refd(spacemap_zap); 7780 for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg); 7781 sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) 7782 mos_obj_refd(sls->sls_sm_obj); 7783 } 7784 7785 static void 7786 errorlog_count_refd(objset_t *mos, uint64_t errlog) 7787 { 7788 zap_cursor_t zc; 7789 zap_attribute_t za; 7790 for (zap_cursor_init(&zc, mos, errlog); 7791 zap_cursor_retrieve(&zc, &za) == 0; 7792 zap_cursor_advance(&zc)) { 7793 mos_obj_refd(za.za_first_integer); 7794 } 7795 zap_cursor_fini(&zc); 7796 } 7797 7798 static int 7799 dump_mos_leaks(spa_t *spa) 7800 { 7801 int rv = 0; 7802 objset_t *mos = spa->spa_meta_objset; 7803 dsl_pool_t *dp = spa->spa_dsl_pool; 7804 7805 /* Visit and mark all referenced objects in the MOS */ 7806 7807 mos_obj_refd(DMU_POOL_DIRECTORY_OBJECT); 7808 mos_obj_refd(spa->spa_pool_props_object); 7809 mos_obj_refd(spa->spa_config_object); 7810 mos_obj_refd(spa->spa_ddt_stat_object); 7811 mos_obj_refd(spa->spa_feat_desc_obj); 7812 mos_obj_refd(spa->spa_feat_enabled_txg_obj); 7813 mos_obj_refd(spa->spa_feat_for_read_obj); 7814 mos_obj_refd(spa->spa_feat_for_write_obj); 7815 mos_obj_refd(spa->spa_history); 7816 mos_obj_refd(spa->spa_errlog_last); 7817 mos_obj_refd(spa->spa_errlog_scrub); 7818 7819 if (spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) { 7820 errorlog_count_refd(mos, spa->spa_errlog_last); 7821 errorlog_count_refd(mos, spa->spa_errlog_scrub); 7822 } 7823 7824 mos_obj_refd(spa->spa_all_vdev_zaps); 7825 mos_obj_refd(spa->spa_dsl_pool->dp_bptree_obj); 7826 mos_obj_refd(spa->spa_dsl_pool->dp_tmp_userrefs_obj); 7827 mos_obj_refd(spa->spa_dsl_pool->dp_scan->scn_phys.scn_queue_obj); 7828 bpobj_count_refd(&spa->spa_deferred_bpobj); 7829 mos_obj_refd(dp->dp_empty_bpobj); 7830 bpobj_count_refd(&dp->dp_obsolete_bpobj); 7831 bpobj_count_refd(&dp->dp_free_bpobj); 7832 mos_obj_refd(spa->spa_l2cache.sav_object); 7833 mos_obj_refd(spa->spa_spares.sav_object); 7834 7835 if (spa->spa_syncing_log_sm != NULL) 7836 mos_obj_refd(spa->spa_syncing_log_sm->sm_object); 7837 mos_leak_log_spacemaps(spa); 7838 7839 mos_obj_refd(spa->spa_condensing_indirect_phys. 7840 scip_next_mapping_object); 7841 mos_obj_refd(spa->spa_condensing_indirect_phys. 7842 scip_prev_obsolete_sm_object); 7843 if (spa->spa_condensing_indirect_phys.scip_next_mapping_object != 0) { 7844 vdev_indirect_mapping_t *vim = 7845 vdev_indirect_mapping_open(mos, 7846 spa->spa_condensing_indirect_phys.scip_next_mapping_object); 7847 mos_obj_refd(vim->vim_phys->vimp_counts_object); 7848 vdev_indirect_mapping_close(vim); 7849 } 7850 deleted_livelists_dump_mos(spa); 7851 7852 if (dp->dp_origin_snap != NULL) { 7853 dsl_dataset_t *ds; 7854 7855 dsl_pool_config_enter(dp, FTAG); 7856 VERIFY0(dsl_dataset_hold_obj(dp, 7857 dsl_dataset_phys(dp->dp_origin_snap)->ds_next_snap_obj, 7858 FTAG, &ds)); 7859 count_ds_mos_objects(ds); 7860 dump_blkptr_list(&ds->ds_deadlist, "Deadlist"); 7861 dsl_dataset_rele(ds, FTAG); 7862 dsl_pool_config_exit(dp, FTAG); 7863 7864 count_ds_mos_objects(dp->dp_origin_snap); 7865 dump_blkptr_list(&dp->dp_origin_snap->ds_deadlist, "Deadlist"); 7866 } 7867 count_dir_mos_objects(dp->dp_mos_dir); 7868 if (dp->dp_free_dir != NULL) 7869 count_dir_mos_objects(dp->dp_free_dir); 7870 if (dp->dp_leak_dir != NULL) 7871 count_dir_mos_objects(dp->dp_leak_dir); 7872 7873 mos_leak_vdev(spa->spa_root_vdev); 7874 7875 for (uint64_t class = 0; class < DDT_CLASSES; class++) { 7876 for (uint64_t type = 0; type < DDT_TYPES; type++) { 7877 for (uint64_t cksum = 0; 7878 cksum < ZIO_CHECKSUM_FUNCTIONS; cksum++) { 7879 ddt_t *ddt = spa->spa_ddt[cksum]; 7880 mos_obj_refd(ddt->ddt_object[type][class]); 7881 } 7882 } 7883 } 7884 7885 /* 7886 * Visit all allocated objects and make sure they are referenced. 7887 */ 7888 uint64_t object = 0; 7889 while (dmu_object_next(mos, &object, B_FALSE, 0) == 0) { 7890 if (range_tree_contains(mos_refd_objs, object, 1)) { 7891 range_tree_remove(mos_refd_objs, object, 1); 7892 } else { 7893 dmu_object_info_t doi; 7894 const char *name; 7895 VERIFY0(dmu_object_info(mos, object, &doi)); 7896 if (doi.doi_type & DMU_OT_NEWTYPE) { 7897 dmu_object_byteswap_t bswap = 7898 DMU_OT_BYTESWAP(doi.doi_type); 7899 name = dmu_ot_byteswap[bswap].ob_name; 7900 } else { 7901 name = dmu_ot[doi.doi_type].ot_name; 7902 } 7903 7904 (void) printf("MOS object %llu (%s) leaked\n", 7905 (u_longlong_t)object, name); 7906 rv = 2; 7907 } 7908 } 7909 (void) range_tree_walk(mos_refd_objs, mos_leaks_cb, NULL); 7910 if (!range_tree_is_empty(mos_refd_objs)) 7911 rv = 2; 7912 range_tree_vacate(mos_refd_objs, NULL, NULL); 7913 range_tree_destroy(mos_refd_objs); 7914 return (rv); 7915 } 7916 7917 typedef struct log_sm_obsolete_stats_arg { 7918 uint64_t lsos_current_txg; 7919 7920 uint64_t lsos_total_entries; 7921 uint64_t lsos_valid_entries; 7922 7923 uint64_t lsos_sm_entries; 7924 uint64_t lsos_valid_sm_entries; 7925 } log_sm_obsolete_stats_arg_t; 7926 7927 static int 7928 log_spacemap_obsolete_stats_cb(spa_t *spa, space_map_entry_t *sme, 7929 uint64_t txg, void *arg) 7930 { 7931 log_sm_obsolete_stats_arg_t *lsos = arg; 7932 7933 uint64_t offset = sme->sme_offset; 7934 uint64_t vdev_id = sme->sme_vdev; 7935 7936 if (lsos->lsos_current_txg == 0) { 7937 /* this is the first log */ 7938 lsos->lsos_current_txg = txg; 7939 } else if (lsos->lsos_current_txg < txg) { 7940 /* we just changed log - print stats and reset */ 7941 (void) printf("%-8llu valid entries out of %-8llu - txg %llu\n", 7942 (u_longlong_t)lsos->lsos_valid_sm_entries, 7943 (u_longlong_t)lsos->lsos_sm_entries, 7944 (u_longlong_t)lsos->lsos_current_txg); 7945 lsos->lsos_valid_sm_entries = 0; 7946 lsos->lsos_sm_entries = 0; 7947 lsos->lsos_current_txg = txg; 7948 } 7949 ASSERT3U(lsos->lsos_current_txg, ==, txg); 7950 7951 lsos->lsos_sm_entries++; 7952 lsos->lsos_total_entries++; 7953 7954 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 7955 if (!vdev_is_concrete(vd)) 7956 return (0); 7957 7958 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 7959 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE); 7960 7961 if (txg < metaslab_unflushed_txg(ms)) 7962 return (0); 7963 lsos->lsos_valid_sm_entries++; 7964 lsos->lsos_valid_entries++; 7965 return (0); 7966 } 7967 7968 static void 7969 dump_log_spacemap_obsolete_stats(spa_t *spa) 7970 { 7971 if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) 7972 return; 7973 7974 log_sm_obsolete_stats_arg_t lsos = {0}; 7975 7976 (void) printf("Log Space Map Obsolete Entry Statistics:\n"); 7977 7978 iterate_through_spacemap_logs(spa, 7979 log_spacemap_obsolete_stats_cb, &lsos); 7980 7981 /* print stats for latest log */ 7982 (void) printf("%-8llu valid entries out of %-8llu - txg %llu\n", 7983 (u_longlong_t)lsos.lsos_valid_sm_entries, 7984 (u_longlong_t)lsos.lsos_sm_entries, 7985 (u_longlong_t)lsos.lsos_current_txg); 7986 7987 (void) printf("%-8llu valid entries out of %-8llu - total\n\n", 7988 (u_longlong_t)lsos.lsos_valid_entries, 7989 (u_longlong_t)lsos.lsos_total_entries); 7990 } 7991 7992 static void 7993 dump_zpool(spa_t *spa) 7994 { 7995 dsl_pool_t *dp = spa_get_dsl(spa); 7996 int rc = 0; 7997 7998 if (dump_opt['y']) { 7999 livelist_metaslab_validate(spa); 8000 } 8001 8002 if (dump_opt['S']) { 8003 dump_simulated_ddt(spa); 8004 return; 8005 } 8006 8007 if (!dump_opt['e'] && dump_opt['C'] > 1) { 8008 (void) printf("\nCached configuration:\n"); 8009 dump_nvlist(spa->spa_config, 8); 8010 } 8011 8012 if (dump_opt['C']) 8013 dump_config(spa); 8014 8015 if (dump_opt['u']) 8016 dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n"); 8017 8018 if (dump_opt['D']) 8019 dump_all_ddts(spa); 8020 8021 if (dump_opt['d'] > 2 || dump_opt['m']) 8022 dump_metaslabs(spa); 8023 if (dump_opt['M']) 8024 dump_metaslab_groups(spa, dump_opt['M'] > 1); 8025 if (dump_opt['d'] > 2 || dump_opt['m']) { 8026 dump_log_spacemaps(spa); 8027 dump_log_spacemap_obsolete_stats(spa); 8028 } 8029 8030 if (dump_opt['d'] || dump_opt['i']) { 8031 spa_feature_t f; 8032 mos_refd_objs = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 8033 0); 8034 dump_objset(dp->dp_meta_objset); 8035 8036 if (dump_opt['d'] >= 3) { 8037 dsl_pool_t *dp = spa->spa_dsl_pool; 8038 dump_full_bpobj(&spa->spa_deferred_bpobj, 8039 "Deferred frees", 0); 8040 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { 8041 dump_full_bpobj(&dp->dp_free_bpobj, 8042 "Pool snapshot frees", 0); 8043 } 8044 if (bpobj_is_open(&dp->dp_obsolete_bpobj)) { 8045 ASSERT(spa_feature_is_enabled(spa, 8046 SPA_FEATURE_DEVICE_REMOVAL)); 8047 dump_full_bpobj(&dp->dp_obsolete_bpobj, 8048 "Pool obsolete blocks", 0); 8049 } 8050 8051 if (spa_feature_is_active(spa, 8052 SPA_FEATURE_ASYNC_DESTROY)) { 8053 dump_bptree(spa->spa_meta_objset, 8054 dp->dp_bptree_obj, 8055 "Pool dataset frees"); 8056 } 8057 dump_dtl(spa->spa_root_vdev, 0); 8058 } 8059 8060 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) 8061 global_feature_count[f] = UINT64_MAX; 8062 global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS] = 0; 8063 global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN] = 0; 8064 global_feature_count[SPA_FEATURE_LIVELIST] = 0; 8065 8066 (void) dmu_objset_find(spa_name(spa), dump_one_objset, 8067 NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); 8068 8069 if (rc == 0 && !dump_opt['L']) 8070 rc = dump_mos_leaks(spa); 8071 8072 for (f = 0; f < SPA_FEATURES; f++) { 8073 uint64_t refcount; 8074 8075 uint64_t *arr; 8076 if (!(spa_feature_table[f].fi_flags & 8077 ZFEATURE_FLAG_PER_DATASET)) { 8078 if (global_feature_count[f] == UINT64_MAX) 8079 continue; 8080 if (!spa_feature_is_enabled(spa, f)) { 8081 ASSERT0(global_feature_count[f]); 8082 continue; 8083 } 8084 arr = global_feature_count; 8085 } else { 8086 if (!spa_feature_is_enabled(spa, f)) { 8087 ASSERT0(dataset_feature_count[f]); 8088 continue; 8089 } 8090 arr = dataset_feature_count; 8091 } 8092 if (feature_get_refcount(spa, &spa_feature_table[f], 8093 &refcount) == ENOTSUP) 8094 continue; 8095 if (arr[f] != refcount) { 8096 (void) printf("%s feature refcount mismatch: " 8097 "%lld consumers != %lld refcount\n", 8098 spa_feature_table[f].fi_uname, 8099 (longlong_t)arr[f], (longlong_t)refcount); 8100 rc = 2; 8101 } else { 8102 (void) printf("Verified %s feature refcount " 8103 "of %llu is correct\n", 8104 spa_feature_table[f].fi_uname, 8105 (longlong_t)refcount); 8106 } 8107 } 8108 8109 if (rc == 0) 8110 rc = verify_device_removal_feature_counts(spa); 8111 } 8112 8113 if (rc == 0 && (dump_opt['b'] || dump_opt['c'])) 8114 rc = dump_block_stats(spa); 8115 8116 if (rc == 0) 8117 rc = verify_spacemap_refcounts(spa); 8118 8119 if (dump_opt['s']) 8120 show_pool_stats(spa); 8121 8122 if (dump_opt['h']) 8123 dump_history(spa); 8124 8125 if (rc == 0) 8126 rc = verify_checkpoint(spa); 8127 8128 if (rc != 0) { 8129 dump_debug_buffer(); 8130 exit(rc); 8131 } 8132 } 8133 8134 #define ZDB_FLAG_CHECKSUM 0x0001 8135 #define ZDB_FLAG_DECOMPRESS 0x0002 8136 #define ZDB_FLAG_BSWAP 0x0004 8137 #define ZDB_FLAG_GBH 0x0008 8138 #define ZDB_FLAG_INDIRECT 0x0010 8139 #define ZDB_FLAG_RAW 0x0020 8140 #define ZDB_FLAG_PRINT_BLKPTR 0x0040 8141 #define ZDB_FLAG_VERBOSE 0x0080 8142 8143 static int flagbits[256]; 8144 static char flagbitstr[16]; 8145 8146 static void 8147 zdb_print_blkptr(const blkptr_t *bp, int flags) 8148 { 8149 char blkbuf[BP_SPRINTF_LEN]; 8150 8151 if (flags & ZDB_FLAG_BSWAP) 8152 byteswap_uint64_array((void *)bp, sizeof (blkptr_t)); 8153 8154 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 8155 (void) printf("%s\n", blkbuf); 8156 } 8157 8158 static void 8159 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags) 8160 { 8161 int i; 8162 8163 for (i = 0; i < nbps; i++) 8164 zdb_print_blkptr(&bp[i], flags); 8165 } 8166 8167 static void 8168 zdb_dump_gbh(void *buf, int flags) 8169 { 8170 zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags); 8171 } 8172 8173 static void 8174 zdb_dump_block_raw(void *buf, uint64_t size, int flags) 8175 { 8176 if (flags & ZDB_FLAG_BSWAP) 8177 byteswap_uint64_array(buf, size); 8178 VERIFY(write(fileno(stdout), buf, size) == size); 8179 } 8180 8181 static void 8182 zdb_dump_block(char *label, void *buf, uint64_t size, int flags) 8183 { 8184 uint64_t *d = (uint64_t *)buf; 8185 unsigned nwords = size / sizeof (uint64_t); 8186 int do_bswap = !!(flags & ZDB_FLAG_BSWAP); 8187 unsigned i, j; 8188 const char *hdr; 8189 char *c; 8190 8191 8192 if (do_bswap) 8193 hdr = " 7 6 5 4 3 2 1 0 f e d c b a 9 8"; 8194 else 8195 hdr = " 0 1 2 3 4 5 6 7 8 9 a b c d e f"; 8196 8197 (void) printf("\n%s\n%6s %s 0123456789abcdef\n", label, "", hdr); 8198 8199 #ifdef _LITTLE_ENDIAN 8200 /* correct the endianness */ 8201 do_bswap = !do_bswap; 8202 #endif 8203 for (i = 0; i < nwords; i += 2) { 8204 (void) printf("%06llx: %016llx %016llx ", 8205 (u_longlong_t)(i * sizeof (uint64_t)), 8206 (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]), 8207 (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1])); 8208 8209 c = (char *)&d[i]; 8210 for (j = 0; j < 2 * sizeof (uint64_t); j++) 8211 (void) printf("%c", isprint(c[j]) ? c[j] : '.'); 8212 (void) printf("\n"); 8213 } 8214 } 8215 8216 /* 8217 * There are two acceptable formats: 8218 * leaf_name - For example: c1t0d0 or /tmp/ztest.0a 8219 * child[.child]* - For example: 0.1.1 8220 * 8221 * The second form can be used to specify arbitrary vdevs anywhere 8222 * in the hierarchy. For example, in a pool with a mirror of 8223 * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 . 8224 */ 8225 static vdev_t * 8226 zdb_vdev_lookup(vdev_t *vdev, const char *path) 8227 { 8228 char *s, *p, *q; 8229 unsigned i; 8230 8231 if (vdev == NULL) 8232 return (NULL); 8233 8234 /* First, assume the x.x.x.x format */ 8235 i = strtoul(path, &s, 10); 8236 if (s == path || (s && *s != '.' && *s != '\0')) 8237 goto name; 8238 if (i >= vdev->vdev_children) 8239 return (NULL); 8240 8241 vdev = vdev->vdev_child[i]; 8242 if (s && *s == '\0') 8243 return (vdev); 8244 return (zdb_vdev_lookup(vdev, s+1)); 8245 8246 name: 8247 for (i = 0; i < vdev->vdev_children; i++) { 8248 vdev_t *vc = vdev->vdev_child[i]; 8249 8250 if (vc->vdev_path == NULL) { 8251 vc = zdb_vdev_lookup(vc, path); 8252 if (vc == NULL) 8253 continue; 8254 else 8255 return (vc); 8256 } 8257 8258 p = strrchr(vc->vdev_path, '/'); 8259 p = p ? p + 1 : vc->vdev_path; 8260 q = &vc->vdev_path[strlen(vc->vdev_path) - 2]; 8261 8262 if (strcmp(vc->vdev_path, path) == 0) 8263 return (vc); 8264 if (strcmp(p, path) == 0) 8265 return (vc); 8266 if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0) 8267 return (vc); 8268 } 8269 8270 return (NULL); 8271 } 8272 8273 static int 8274 name_from_objset_id(spa_t *spa, uint64_t objset_id, char *outstr) 8275 { 8276 dsl_dataset_t *ds; 8277 8278 dsl_pool_config_enter(spa->spa_dsl_pool, FTAG); 8279 int error = dsl_dataset_hold_obj(spa->spa_dsl_pool, objset_id, 8280 NULL, &ds); 8281 if (error != 0) { 8282 (void) fprintf(stderr, "failed to hold objset %llu: %s\n", 8283 (u_longlong_t)objset_id, strerror(error)); 8284 dsl_pool_config_exit(spa->spa_dsl_pool, FTAG); 8285 return (error); 8286 } 8287 dsl_dataset_name(ds, outstr); 8288 dsl_dataset_rele(ds, NULL); 8289 dsl_pool_config_exit(spa->spa_dsl_pool, FTAG); 8290 return (0); 8291 } 8292 8293 static boolean_t 8294 zdb_parse_block_sizes(char *sizes, uint64_t *lsize, uint64_t *psize) 8295 { 8296 char *s0, *s1, *tmp = NULL; 8297 8298 if (sizes == NULL) 8299 return (B_FALSE); 8300 8301 s0 = strtok_r(sizes, "/", &tmp); 8302 if (s0 == NULL) 8303 return (B_FALSE); 8304 s1 = strtok_r(NULL, "/", &tmp); 8305 *lsize = strtoull(s0, NULL, 16); 8306 *psize = s1 ? strtoull(s1, NULL, 16) : *lsize; 8307 return (*lsize >= *psize && *psize > 0); 8308 } 8309 8310 #define ZIO_COMPRESS_MASK(alg) (1ULL << (ZIO_COMPRESS_##alg)) 8311 8312 static boolean_t 8313 zdb_decompress_block(abd_t *pabd, void *buf, void *lbuf, uint64_t lsize, 8314 uint64_t psize, int flags) 8315 { 8316 (void) buf; 8317 boolean_t exceeded = B_FALSE; 8318 /* 8319 * We don't know how the data was compressed, so just try 8320 * every decompress function at every inflated blocksize. 8321 */ 8322 void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 8323 int cfuncs[ZIO_COMPRESS_FUNCTIONS] = { 0 }; 8324 int *cfuncp = cfuncs; 8325 uint64_t maxlsize = SPA_MAXBLOCKSIZE; 8326 uint64_t mask = ZIO_COMPRESS_MASK(ON) | ZIO_COMPRESS_MASK(OFF) | 8327 ZIO_COMPRESS_MASK(INHERIT) | ZIO_COMPRESS_MASK(EMPTY) | 8328 (getenv("ZDB_NO_ZLE") ? ZIO_COMPRESS_MASK(ZLE) : 0); 8329 *cfuncp++ = ZIO_COMPRESS_LZ4; 8330 *cfuncp++ = ZIO_COMPRESS_LZJB; 8331 mask |= ZIO_COMPRESS_MASK(LZ4) | ZIO_COMPRESS_MASK(LZJB); 8332 for (int c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) 8333 if (((1ULL << c) & mask) == 0) 8334 *cfuncp++ = c; 8335 8336 /* 8337 * On the one hand, with SPA_MAXBLOCKSIZE at 16MB, this 8338 * could take a while and we should let the user know 8339 * we are not stuck. On the other hand, printing progress 8340 * info gets old after a while. User can specify 'v' flag 8341 * to see the progression. 8342 */ 8343 if (lsize == psize) 8344 lsize += SPA_MINBLOCKSIZE; 8345 else 8346 maxlsize = lsize; 8347 for (; lsize <= maxlsize; lsize += SPA_MINBLOCKSIZE) { 8348 for (cfuncp = cfuncs; *cfuncp; cfuncp++) { 8349 if (flags & ZDB_FLAG_VERBOSE) { 8350 (void) fprintf(stderr, 8351 "Trying %05llx -> %05llx (%s)\n", 8352 (u_longlong_t)psize, 8353 (u_longlong_t)lsize, 8354 zio_compress_table[*cfuncp].\ 8355 ci_name); 8356 } 8357 8358 /* 8359 * We randomize lbuf2, and decompress to both 8360 * lbuf and lbuf2. This way, we will know if 8361 * decompression fill exactly to lsize. 8362 */ 8363 VERIFY0(random_get_pseudo_bytes(lbuf2, lsize)); 8364 8365 if (zio_decompress_data(*cfuncp, pabd, 8366 lbuf, psize, lsize, NULL) == 0 && 8367 zio_decompress_data(*cfuncp, pabd, 8368 lbuf2, psize, lsize, NULL) == 0 && 8369 memcmp(lbuf, lbuf2, lsize) == 0) 8370 break; 8371 } 8372 if (*cfuncp != 0) 8373 break; 8374 } 8375 umem_free(lbuf2, SPA_MAXBLOCKSIZE); 8376 8377 if (lsize > maxlsize) { 8378 exceeded = B_TRUE; 8379 } 8380 if (*cfuncp == ZIO_COMPRESS_ZLE) { 8381 printf("\nZLE decompression was selected. If you " 8382 "suspect the results are wrong,\ntry avoiding ZLE " 8383 "by setting and exporting ZDB_NO_ZLE=\"true\"\n"); 8384 } 8385 8386 return (exceeded); 8387 } 8388 8389 /* 8390 * Read a block from a pool and print it out. The syntax of the 8391 * block descriptor is: 8392 * 8393 * pool:vdev_specifier:offset:[lsize/]psize[:flags] 8394 * 8395 * pool - The name of the pool you wish to read from 8396 * vdev_specifier - Which vdev (see comment for zdb_vdev_lookup) 8397 * offset - offset, in hex, in bytes 8398 * size - Amount of data to read, in hex, in bytes 8399 * flags - A string of characters specifying options 8400 * b: Decode a blkptr at given offset within block 8401 * c: Calculate and display checksums 8402 * d: Decompress data before dumping 8403 * e: Byteswap data before dumping 8404 * g: Display data as a gang block header 8405 * i: Display as an indirect block 8406 * r: Dump raw data to stdout 8407 * v: Verbose 8408 * 8409 */ 8410 static void 8411 zdb_read_block(char *thing, spa_t *spa) 8412 { 8413 blkptr_t blk, *bp = &blk; 8414 dva_t *dva = bp->blk_dva; 8415 int flags = 0; 8416 uint64_t offset = 0, psize = 0, lsize = 0, blkptr_offset = 0; 8417 zio_t *zio; 8418 vdev_t *vd; 8419 abd_t *pabd; 8420 void *lbuf, *buf; 8421 char *s, *p, *dup, *flagstr, *sizes, *tmp = NULL; 8422 const char *vdev, *errmsg = NULL; 8423 int i, error; 8424 boolean_t borrowed = B_FALSE, found = B_FALSE; 8425 8426 dup = strdup(thing); 8427 s = strtok_r(dup, ":", &tmp); 8428 vdev = s ?: ""; 8429 s = strtok_r(NULL, ":", &tmp); 8430 offset = strtoull(s ? s : "", NULL, 16); 8431 sizes = strtok_r(NULL, ":", &tmp); 8432 s = strtok_r(NULL, ":", &tmp); 8433 flagstr = strdup(s ?: ""); 8434 8435 if (!zdb_parse_block_sizes(sizes, &lsize, &psize)) 8436 errmsg = "invalid size(s)"; 8437 if (!IS_P2ALIGNED(psize, DEV_BSIZE) || !IS_P2ALIGNED(lsize, DEV_BSIZE)) 8438 errmsg = "size must be a multiple of sector size"; 8439 if (!IS_P2ALIGNED(offset, DEV_BSIZE)) 8440 errmsg = "offset must be a multiple of sector size"; 8441 if (errmsg) { 8442 (void) printf("Invalid block specifier: %s - %s\n", 8443 thing, errmsg); 8444 goto done; 8445 } 8446 8447 tmp = NULL; 8448 for (s = strtok_r(flagstr, ":", &tmp); 8449 s != NULL; 8450 s = strtok_r(NULL, ":", &tmp)) { 8451 for (i = 0; i < strlen(flagstr); i++) { 8452 int bit = flagbits[(uchar_t)flagstr[i]]; 8453 8454 if (bit == 0) { 8455 (void) printf("***Ignoring flag: %c\n", 8456 (uchar_t)flagstr[i]); 8457 continue; 8458 } 8459 found = B_TRUE; 8460 flags |= bit; 8461 8462 p = &flagstr[i + 1]; 8463 if (*p != ':' && *p != '\0') { 8464 int j = 0, nextbit = flagbits[(uchar_t)*p]; 8465 char *end, offstr[8] = { 0 }; 8466 if ((bit == ZDB_FLAG_PRINT_BLKPTR) && 8467 (nextbit == 0)) { 8468 /* look ahead to isolate the offset */ 8469 while (nextbit == 0 && 8470 strchr(flagbitstr, *p) == NULL) { 8471 offstr[j] = *p; 8472 j++; 8473 if (i + j > strlen(flagstr)) 8474 break; 8475 p++; 8476 nextbit = flagbits[(uchar_t)*p]; 8477 } 8478 blkptr_offset = strtoull(offstr, &end, 8479 16); 8480 i += j; 8481 } else if (nextbit == 0) { 8482 (void) printf("***Ignoring flag arg:" 8483 " '%c'\n", (uchar_t)*p); 8484 } 8485 } 8486 } 8487 } 8488 if (blkptr_offset % sizeof (blkptr_t)) { 8489 printf("Block pointer offset 0x%llx " 8490 "must be divisible by 0x%x\n", 8491 (longlong_t)blkptr_offset, (int)sizeof (blkptr_t)); 8492 goto done; 8493 } 8494 if (found == B_FALSE && strlen(flagstr) > 0) { 8495 printf("Invalid flag arg: '%s'\n", flagstr); 8496 goto done; 8497 } 8498 8499 vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev); 8500 if (vd == NULL) { 8501 (void) printf("***Invalid vdev: %s\n", vdev); 8502 goto done; 8503 } else { 8504 if (vd->vdev_path) 8505 (void) fprintf(stderr, "Found vdev: %s\n", 8506 vd->vdev_path); 8507 else 8508 (void) fprintf(stderr, "Found vdev type: %s\n", 8509 vd->vdev_ops->vdev_op_type); 8510 } 8511 8512 pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE); 8513 lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 8514 8515 BP_ZERO(bp); 8516 8517 DVA_SET_VDEV(&dva[0], vd->vdev_id); 8518 DVA_SET_OFFSET(&dva[0], offset); 8519 DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH)); 8520 DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize)); 8521 8522 BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL); 8523 8524 BP_SET_LSIZE(bp, lsize); 8525 BP_SET_PSIZE(bp, psize); 8526 BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF); 8527 BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF); 8528 BP_SET_TYPE(bp, DMU_OT_NONE); 8529 BP_SET_LEVEL(bp, 0); 8530 BP_SET_DEDUP(bp, 0); 8531 BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); 8532 8533 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 8534 zio = zio_root(spa, NULL, NULL, 0); 8535 8536 if (vd == vd->vdev_top) { 8537 /* 8538 * Treat this as a normal block read. 8539 */ 8540 zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL, 8541 ZIO_PRIORITY_SYNC_READ, 8542 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL)); 8543 } else { 8544 /* 8545 * Treat this as a vdev child I/O. 8546 */ 8547 zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd, 8548 psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, 8549 ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY | 8550 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | ZIO_FLAG_OPTIONAL, 8551 NULL, NULL)); 8552 } 8553 8554 error = zio_wait(zio); 8555 spa_config_exit(spa, SCL_STATE, FTAG); 8556 8557 if (error) { 8558 (void) printf("Read of %s failed, error: %d\n", thing, error); 8559 goto out; 8560 } 8561 8562 uint64_t orig_lsize = lsize; 8563 buf = lbuf; 8564 if (flags & ZDB_FLAG_DECOMPRESS) { 8565 boolean_t failed = zdb_decompress_block(pabd, buf, lbuf, 8566 lsize, psize, flags); 8567 if (failed) { 8568 (void) printf("Decompress of %s failed\n", thing); 8569 goto out; 8570 } 8571 } else { 8572 buf = abd_borrow_buf_copy(pabd, lsize); 8573 borrowed = B_TRUE; 8574 } 8575 /* 8576 * Try to detect invalid block pointer. If invalid, try 8577 * decompressing. 8578 */ 8579 if ((flags & ZDB_FLAG_PRINT_BLKPTR || flags & ZDB_FLAG_INDIRECT) && 8580 !(flags & ZDB_FLAG_DECOMPRESS)) { 8581 const blkptr_t *b = (const blkptr_t *)(void *) 8582 ((uintptr_t)buf + (uintptr_t)blkptr_offset); 8583 if (zfs_blkptr_verify(spa, b, 8584 BLK_CONFIG_NEEDED, BLK_VERIFY_ONLY) == B_FALSE) { 8585 abd_return_buf_copy(pabd, buf, lsize); 8586 borrowed = B_FALSE; 8587 buf = lbuf; 8588 boolean_t failed = zdb_decompress_block(pabd, buf, 8589 lbuf, lsize, psize, flags); 8590 b = (const blkptr_t *)(void *) 8591 ((uintptr_t)buf + (uintptr_t)blkptr_offset); 8592 if (failed || zfs_blkptr_verify(spa, b, 8593 BLK_CONFIG_NEEDED, BLK_VERIFY_LOG) == B_FALSE) { 8594 printf("invalid block pointer at this DVA\n"); 8595 goto out; 8596 } 8597 } 8598 } 8599 8600 if (flags & ZDB_FLAG_PRINT_BLKPTR) 8601 zdb_print_blkptr((blkptr_t *)(void *) 8602 ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags); 8603 else if (flags & ZDB_FLAG_RAW) 8604 zdb_dump_block_raw(buf, lsize, flags); 8605 else if (flags & ZDB_FLAG_INDIRECT) 8606 zdb_dump_indirect((blkptr_t *)buf, 8607 orig_lsize / sizeof (blkptr_t), flags); 8608 else if (flags & ZDB_FLAG_GBH) 8609 zdb_dump_gbh(buf, flags); 8610 else 8611 zdb_dump_block(thing, buf, lsize, flags); 8612 8613 /* 8614 * If :c was specified, iterate through the checksum table to 8615 * calculate and display each checksum for our specified 8616 * DVA and length. 8617 */ 8618 if ((flags & ZDB_FLAG_CHECKSUM) && !(flags & ZDB_FLAG_RAW) && 8619 !(flags & ZDB_FLAG_GBH)) { 8620 zio_t *czio; 8621 (void) printf("\n"); 8622 for (enum zio_checksum ck = ZIO_CHECKSUM_LABEL; 8623 ck < ZIO_CHECKSUM_FUNCTIONS; ck++) { 8624 8625 if ((zio_checksum_table[ck].ci_flags & 8626 ZCHECKSUM_FLAG_EMBEDDED) || 8627 ck == ZIO_CHECKSUM_NOPARITY) { 8628 continue; 8629 } 8630 BP_SET_CHECKSUM(bp, ck); 8631 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 8632 czio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 8633 czio->io_bp = bp; 8634 8635 if (vd == vd->vdev_top) { 8636 zio_nowait(zio_read(czio, spa, bp, pabd, psize, 8637 NULL, NULL, 8638 ZIO_PRIORITY_SYNC_READ, 8639 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | 8640 ZIO_FLAG_DONT_RETRY, NULL)); 8641 } else { 8642 zio_nowait(zio_vdev_child_io(czio, bp, vd, 8643 offset, pabd, psize, ZIO_TYPE_READ, 8644 ZIO_PRIORITY_SYNC_READ, 8645 ZIO_FLAG_DONT_PROPAGATE | 8646 ZIO_FLAG_DONT_RETRY | 8647 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | 8648 ZIO_FLAG_SPECULATIVE | 8649 ZIO_FLAG_OPTIONAL, NULL, NULL)); 8650 } 8651 error = zio_wait(czio); 8652 if (error == 0 || error == ECKSUM) { 8653 zio_t *ck_zio = zio_root(spa, NULL, NULL, 0); 8654 ck_zio->io_offset = 8655 DVA_GET_OFFSET(&bp->blk_dva[0]); 8656 ck_zio->io_bp = bp; 8657 zio_checksum_compute(ck_zio, ck, pabd, lsize); 8658 printf( 8659 "%12s\t" 8660 "cksum=%016llx:%016llx:%016llx:%016llx\n", 8661 zio_checksum_table[ck].ci_name, 8662 (u_longlong_t)bp->blk_cksum.zc_word[0], 8663 (u_longlong_t)bp->blk_cksum.zc_word[1], 8664 (u_longlong_t)bp->blk_cksum.zc_word[2], 8665 (u_longlong_t)bp->blk_cksum.zc_word[3]); 8666 zio_wait(ck_zio); 8667 } else { 8668 printf("error %d reading block\n", error); 8669 } 8670 spa_config_exit(spa, SCL_STATE, FTAG); 8671 } 8672 } 8673 8674 if (borrowed) 8675 abd_return_buf_copy(pabd, buf, lsize); 8676 8677 out: 8678 abd_free(pabd); 8679 umem_free(lbuf, SPA_MAXBLOCKSIZE); 8680 done: 8681 free(flagstr); 8682 free(dup); 8683 } 8684 8685 static void 8686 zdb_embedded_block(char *thing) 8687 { 8688 blkptr_t bp = {{{{0}}}}; 8689 unsigned long long *words = (void *)&bp; 8690 char *buf; 8691 int err; 8692 8693 err = sscanf(thing, "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx:" 8694 "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx", 8695 words + 0, words + 1, words + 2, words + 3, 8696 words + 4, words + 5, words + 6, words + 7, 8697 words + 8, words + 9, words + 10, words + 11, 8698 words + 12, words + 13, words + 14, words + 15); 8699 if (err != 16) { 8700 (void) fprintf(stderr, "invalid input format\n"); 8701 exit(1); 8702 } 8703 ASSERT3U(BPE_GET_LSIZE(&bp), <=, SPA_MAXBLOCKSIZE); 8704 buf = malloc(SPA_MAXBLOCKSIZE); 8705 if (buf == NULL) { 8706 (void) fprintf(stderr, "out of memory\n"); 8707 exit(1); 8708 } 8709 err = decode_embedded_bp(&bp, buf, BPE_GET_LSIZE(&bp)); 8710 if (err != 0) { 8711 (void) fprintf(stderr, "decode failed: %u\n", err); 8712 exit(1); 8713 } 8714 zdb_dump_block_raw(buf, BPE_GET_LSIZE(&bp), 0); 8715 free(buf); 8716 } 8717 8718 /* check for valid hex or decimal numeric string */ 8719 static boolean_t 8720 zdb_numeric(char *str) 8721 { 8722 int i = 0; 8723 8724 if (strlen(str) == 0) 8725 return (B_FALSE); 8726 if (strncmp(str, "0x", 2) == 0 || strncmp(str, "0X", 2) == 0) 8727 i = 2; 8728 for (; i < strlen(str); i++) { 8729 if (!isxdigit(str[i])) 8730 return (B_FALSE); 8731 } 8732 return (B_TRUE); 8733 } 8734 8735 int 8736 main(int argc, char **argv) 8737 { 8738 int c; 8739 spa_t *spa = NULL; 8740 objset_t *os = NULL; 8741 int dump_all = 1; 8742 int verbose = 0; 8743 int error = 0; 8744 char **searchdirs = NULL; 8745 int nsearch = 0; 8746 char *target, *target_pool, dsname[ZFS_MAX_DATASET_NAME_LEN]; 8747 nvlist_t *policy = NULL; 8748 uint64_t max_txg = UINT64_MAX; 8749 int64_t objset_id = -1; 8750 uint64_t object; 8751 int flags = ZFS_IMPORT_MISSING_LOG; 8752 int rewind = ZPOOL_NEVER_REWIND; 8753 char *spa_config_path_env, *objset_str; 8754 boolean_t target_is_spa = B_TRUE, dataset_lookup = B_FALSE; 8755 nvlist_t *cfg = NULL; 8756 8757 dprintf_setup(&argc, argv); 8758 8759 /* 8760 * If there is an environment variable SPA_CONFIG_PATH it overrides 8761 * default spa_config_path setting. If -U flag is specified it will 8762 * override this environment variable settings once again. 8763 */ 8764 spa_config_path_env = getenv("SPA_CONFIG_PATH"); 8765 if (spa_config_path_env != NULL) 8766 spa_config_path = spa_config_path_env; 8767 8768 /* 8769 * For performance reasons, we set this tunable down. We do so before 8770 * the arg parsing section so that the user can override this value if 8771 * they choose. 8772 */ 8773 zfs_btree_verify_intensity = 3; 8774 8775 struct option long_options[] = { 8776 {"ignore-assertions", no_argument, NULL, 'A'}, 8777 {"block-stats", no_argument, NULL, 'b'}, 8778 {"backup", no_argument, NULL, 'B'}, 8779 {"checksum", no_argument, NULL, 'c'}, 8780 {"config", no_argument, NULL, 'C'}, 8781 {"datasets", no_argument, NULL, 'd'}, 8782 {"dedup-stats", no_argument, NULL, 'D'}, 8783 {"exported", no_argument, NULL, 'e'}, 8784 {"embedded-block-pointer", no_argument, NULL, 'E'}, 8785 {"automatic-rewind", no_argument, NULL, 'F'}, 8786 {"dump-debug-msg", no_argument, NULL, 'G'}, 8787 {"history", no_argument, NULL, 'h'}, 8788 {"intent-logs", no_argument, NULL, 'i'}, 8789 {"inflight", required_argument, NULL, 'I'}, 8790 {"checkpointed-state", no_argument, NULL, 'k'}, 8791 {"key", required_argument, NULL, 'K'}, 8792 {"label", no_argument, NULL, 'l'}, 8793 {"disable-leak-tracking", no_argument, NULL, 'L'}, 8794 {"metaslabs", no_argument, NULL, 'm'}, 8795 {"metaslab-groups", no_argument, NULL, 'M'}, 8796 {"numeric", no_argument, NULL, 'N'}, 8797 {"option", required_argument, NULL, 'o'}, 8798 {"object-lookups", no_argument, NULL, 'O'}, 8799 {"path", required_argument, NULL, 'p'}, 8800 {"parseable", no_argument, NULL, 'P'}, 8801 {"skip-label", no_argument, NULL, 'q'}, 8802 {"copy-object", no_argument, NULL, 'r'}, 8803 {"read-block", no_argument, NULL, 'R'}, 8804 {"io-stats", no_argument, NULL, 's'}, 8805 {"simulate-dedup", no_argument, NULL, 'S'}, 8806 {"txg", required_argument, NULL, 't'}, 8807 {"uberblock", no_argument, NULL, 'u'}, 8808 {"cachefile", required_argument, NULL, 'U'}, 8809 {"verbose", no_argument, NULL, 'v'}, 8810 {"verbatim", no_argument, NULL, 'V'}, 8811 {"dump-blocks", required_argument, NULL, 'x'}, 8812 {"extreme-rewind", no_argument, NULL, 'X'}, 8813 {"all-reconstruction", no_argument, NULL, 'Y'}, 8814 {"livelist", no_argument, NULL, 'y'}, 8815 {"zstd-headers", no_argument, NULL, 'Z'}, 8816 {0, 0, 0, 0} 8817 }; 8818 8819 while ((c = getopt_long(argc, argv, 8820 "AbBcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:uU:vVx:XYyZ", 8821 long_options, NULL)) != -1) { 8822 switch (c) { 8823 case 'b': 8824 case 'B': 8825 case 'c': 8826 case 'C': 8827 case 'd': 8828 case 'D': 8829 case 'E': 8830 case 'G': 8831 case 'h': 8832 case 'i': 8833 case 'l': 8834 case 'm': 8835 case 'M': 8836 case 'N': 8837 case 'O': 8838 case 'r': 8839 case 'R': 8840 case 's': 8841 case 'S': 8842 case 'u': 8843 case 'y': 8844 case 'Z': 8845 dump_opt[c]++; 8846 dump_all = 0; 8847 break; 8848 case 'A': 8849 case 'e': 8850 case 'F': 8851 case 'k': 8852 case 'L': 8853 case 'P': 8854 case 'q': 8855 case 'X': 8856 dump_opt[c]++; 8857 break; 8858 case 'Y': 8859 zfs_reconstruct_indirect_combinations_max = INT_MAX; 8860 zfs_deadman_enabled = 0; 8861 break; 8862 /* NB: Sort single match options below. */ 8863 case 'I': 8864 max_inflight_bytes = strtoull(optarg, NULL, 0); 8865 if (max_inflight_bytes == 0) { 8866 (void) fprintf(stderr, "maximum number " 8867 "of inflight bytes must be greater " 8868 "than 0\n"); 8869 usage(); 8870 } 8871 break; 8872 case 'K': 8873 dump_opt[c]++; 8874 key_material = strdup(optarg); 8875 /* redact key material in process table */ 8876 while (*optarg != '\0') { *optarg++ = '*'; } 8877 break; 8878 case 'o': 8879 error = set_global_var(optarg); 8880 if (error != 0) 8881 usage(); 8882 break; 8883 case 'p': 8884 if (searchdirs == NULL) { 8885 searchdirs = umem_alloc(sizeof (char *), 8886 UMEM_NOFAIL); 8887 } else { 8888 char **tmp = umem_alloc((nsearch + 1) * 8889 sizeof (char *), UMEM_NOFAIL); 8890 memcpy(tmp, searchdirs, nsearch * 8891 sizeof (char *)); 8892 umem_free(searchdirs, 8893 nsearch * sizeof (char *)); 8894 searchdirs = tmp; 8895 } 8896 searchdirs[nsearch++] = optarg; 8897 break; 8898 case 't': 8899 max_txg = strtoull(optarg, NULL, 0); 8900 if (max_txg < TXG_INITIAL) { 8901 (void) fprintf(stderr, "incorrect txg " 8902 "specified: %s\n", optarg); 8903 usage(); 8904 } 8905 break; 8906 case 'U': 8907 spa_config_path = optarg; 8908 if (spa_config_path[0] != '/') { 8909 (void) fprintf(stderr, 8910 "cachefile must be an absolute path " 8911 "(i.e. start with a slash)\n"); 8912 usage(); 8913 } 8914 break; 8915 case 'v': 8916 verbose++; 8917 break; 8918 case 'V': 8919 flags = ZFS_IMPORT_VERBATIM; 8920 break; 8921 case 'x': 8922 vn_dumpdir = optarg; 8923 break; 8924 default: 8925 usage(); 8926 break; 8927 } 8928 } 8929 8930 if (!dump_opt['e'] && searchdirs != NULL) { 8931 (void) fprintf(stderr, "-p option requires use of -e\n"); 8932 usage(); 8933 } 8934 #if defined(_LP64) 8935 /* 8936 * ZDB does not typically re-read blocks; therefore limit the ARC 8937 * to 256 MB, which can be used entirely for metadata. 8938 */ 8939 zfs_arc_min = 2ULL << SPA_MAXBLOCKSHIFT; 8940 zfs_arc_max = 256 * 1024 * 1024; 8941 #endif 8942 8943 /* 8944 * "zdb -c" uses checksum-verifying scrub i/os which are async reads. 8945 * "zdb -b" uses traversal prefetch which uses async reads. 8946 * For good performance, let several of them be active at once. 8947 */ 8948 zfs_vdev_async_read_max_active = 10; 8949 8950 /* 8951 * Disable reference tracking for better performance. 8952 */ 8953 reference_tracking_enable = B_FALSE; 8954 8955 /* 8956 * Do not fail spa_load when spa_load_verify fails. This is needed 8957 * to load non-idle pools. 8958 */ 8959 spa_load_verify_dryrun = B_TRUE; 8960 8961 /* 8962 * ZDB should have ability to read spacemaps. 8963 */ 8964 spa_mode_readable_spacemaps = B_TRUE; 8965 8966 kernel_init(SPA_MODE_READ); 8967 8968 if (dump_all) 8969 verbose = MAX(verbose, 1); 8970 8971 for (c = 0; c < 256; c++) { 8972 if (dump_all && strchr("ABeEFkKlLNOPrRSXy", c) == NULL) 8973 dump_opt[c] = 1; 8974 if (dump_opt[c]) 8975 dump_opt[c] += verbose; 8976 } 8977 8978 libspl_set_assert_ok((dump_opt['A'] == 1) || (dump_opt['A'] > 2)); 8979 zfs_recover = (dump_opt['A'] > 1); 8980 8981 argc -= optind; 8982 argv += optind; 8983 if (argc < 2 && dump_opt['R']) 8984 usage(); 8985 8986 if (dump_opt['E']) { 8987 if (argc != 1) 8988 usage(); 8989 zdb_embedded_block(argv[0]); 8990 return (0); 8991 } 8992 8993 if (argc < 1) { 8994 if (!dump_opt['e'] && dump_opt['C']) { 8995 dump_cachefile(spa_config_path); 8996 return (0); 8997 } 8998 usage(); 8999 } 9000 9001 if (dump_opt['l']) 9002 return (dump_label(argv[0])); 9003 9004 if (dump_opt['O']) { 9005 if (argc != 2) 9006 usage(); 9007 dump_opt['v'] = verbose + 3; 9008 return (dump_path(argv[0], argv[1], NULL)); 9009 } 9010 if (dump_opt['r']) { 9011 target_is_spa = B_FALSE; 9012 if (argc != 3) 9013 usage(); 9014 dump_opt['v'] = verbose; 9015 error = dump_path(argv[0], argv[1], &object); 9016 if (error != 0) 9017 fatal("internal error: %s", strerror(error)); 9018 } 9019 9020 if (dump_opt['X'] || dump_opt['F']) 9021 rewind = ZPOOL_DO_REWIND | 9022 (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0); 9023 9024 /* -N implies -d */ 9025 if (dump_opt['N'] && dump_opt['d'] == 0) 9026 dump_opt['d'] = dump_opt['N']; 9027 9028 if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 || 9029 nvlist_add_uint64(policy, ZPOOL_LOAD_REQUEST_TXG, max_txg) != 0 || 9030 nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, rewind) != 0) 9031 fatal("internal error: %s", strerror(ENOMEM)); 9032 9033 error = 0; 9034 target = argv[0]; 9035 9036 if (strpbrk(target, "/@") != NULL) { 9037 size_t targetlen; 9038 9039 target_pool = strdup(target); 9040 *strpbrk(target_pool, "/@") = '\0'; 9041 9042 target_is_spa = B_FALSE; 9043 targetlen = strlen(target); 9044 if (targetlen && target[targetlen - 1] == '/') 9045 target[targetlen - 1] = '\0'; 9046 9047 /* 9048 * See if an objset ID was supplied (-d <pool>/<objset ID>). 9049 * To disambiguate tank/100, consider the 100 as objsetID 9050 * if -N was given, otherwise 100 is an objsetID iff 9051 * tank/100 as a named dataset fails on lookup. 9052 */ 9053 objset_str = strchr(target, '/'); 9054 if (objset_str && strlen(objset_str) > 1 && 9055 zdb_numeric(objset_str + 1)) { 9056 char *endptr; 9057 errno = 0; 9058 objset_str++; 9059 objset_id = strtoull(objset_str, &endptr, 0); 9060 /* dataset 0 is the same as opening the pool */ 9061 if (errno == 0 && endptr != objset_str && 9062 objset_id != 0) { 9063 if (dump_opt['N']) 9064 dataset_lookup = B_TRUE; 9065 } 9066 /* normal dataset name not an objset ID */ 9067 if (endptr == objset_str) { 9068 objset_id = -1; 9069 } 9070 } else if (objset_str && !zdb_numeric(objset_str + 1) && 9071 dump_opt['N']) { 9072 printf("Supply a numeric objset ID with -N\n"); 9073 exit(1); 9074 } 9075 } else { 9076 target_pool = target; 9077 } 9078 9079 if (dump_opt['e']) { 9080 importargs_t args = { 0 }; 9081 9082 args.paths = nsearch; 9083 args.path = searchdirs; 9084 args.can_be_active = B_TRUE; 9085 9086 libpc_handle_t lpch = { 9087 .lpc_lib_handle = NULL, 9088 .lpc_ops = &libzpool_config_ops, 9089 .lpc_printerr = B_TRUE 9090 }; 9091 error = zpool_find_config(&lpch, target_pool, &cfg, &args); 9092 9093 if (error == 0) { 9094 9095 if (nvlist_add_nvlist(cfg, 9096 ZPOOL_LOAD_POLICY, policy) != 0) { 9097 fatal("can't open '%s': %s", 9098 target, strerror(ENOMEM)); 9099 } 9100 9101 if (dump_opt['C'] > 1) { 9102 (void) printf("\nConfiguration for import:\n"); 9103 dump_nvlist(cfg, 8); 9104 } 9105 9106 /* 9107 * Disable the activity check to allow examination of 9108 * active pools. 9109 */ 9110 error = spa_import(target_pool, cfg, NULL, 9111 flags | ZFS_IMPORT_SKIP_MMP); 9112 } 9113 } 9114 9115 if (searchdirs != NULL) { 9116 umem_free(searchdirs, nsearch * sizeof (char *)); 9117 searchdirs = NULL; 9118 } 9119 9120 /* 9121 * import_checkpointed_state makes the assumption that the 9122 * target pool that we pass it is already part of the spa 9123 * namespace. Because of that we need to make sure to call 9124 * it always after the -e option has been processed, which 9125 * imports the pool to the namespace if it's not in the 9126 * cachefile. 9127 */ 9128 char *checkpoint_pool = NULL; 9129 char *checkpoint_target = NULL; 9130 if (dump_opt['k']) { 9131 checkpoint_pool = import_checkpointed_state(target, cfg, 9132 &checkpoint_target); 9133 9134 if (checkpoint_target != NULL) 9135 target = checkpoint_target; 9136 } 9137 9138 if (cfg != NULL) { 9139 nvlist_free(cfg); 9140 cfg = NULL; 9141 } 9142 9143 if (target_pool != target) 9144 free(target_pool); 9145 9146 if (error == 0) { 9147 if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) { 9148 ASSERT(checkpoint_pool != NULL); 9149 ASSERT(checkpoint_target == NULL); 9150 9151 error = spa_open(checkpoint_pool, &spa, FTAG); 9152 if (error != 0) { 9153 fatal("Tried to open pool \"%s\" but " 9154 "spa_open() failed with error %d\n", 9155 checkpoint_pool, error); 9156 } 9157 9158 } else if (target_is_spa || dump_opt['R'] || dump_opt['B'] || 9159 objset_id == 0) { 9160 zdb_set_skip_mmp(target); 9161 error = spa_open_rewind(target, &spa, FTAG, policy, 9162 NULL); 9163 if (error) { 9164 /* 9165 * If we're missing the log device then 9166 * try opening the pool after clearing the 9167 * log state. 9168 */ 9169 mutex_enter(&spa_namespace_lock); 9170 if ((spa = spa_lookup(target)) != NULL && 9171 spa->spa_log_state == SPA_LOG_MISSING) { 9172 spa->spa_log_state = SPA_LOG_CLEAR; 9173 error = 0; 9174 } 9175 mutex_exit(&spa_namespace_lock); 9176 9177 if (!error) { 9178 error = spa_open_rewind(target, &spa, 9179 FTAG, policy, NULL); 9180 } 9181 } 9182 } else if (strpbrk(target, "#") != NULL) { 9183 dsl_pool_t *dp; 9184 error = dsl_pool_hold(target, FTAG, &dp); 9185 if (error != 0) { 9186 fatal("can't dump '%s': %s", target, 9187 strerror(error)); 9188 } 9189 error = dump_bookmark(dp, target, B_TRUE, verbose > 1); 9190 dsl_pool_rele(dp, FTAG); 9191 if (error != 0) { 9192 fatal("can't dump '%s': %s", target, 9193 strerror(error)); 9194 } 9195 return (error); 9196 } else { 9197 target_pool = strdup(target); 9198 if (strpbrk(target, "/@") != NULL) 9199 *strpbrk(target_pool, "/@") = '\0'; 9200 9201 zdb_set_skip_mmp(target); 9202 /* 9203 * If -N was supplied, the user has indicated that 9204 * zdb -d <pool>/<objsetID> is in effect. Otherwise 9205 * we first assume that the dataset string is the 9206 * dataset name. If dmu_objset_hold fails with the 9207 * dataset string, and we have an objset_id, retry the 9208 * lookup with the objsetID. 9209 */ 9210 boolean_t retry = B_TRUE; 9211 retry_lookup: 9212 if (dataset_lookup == B_TRUE) { 9213 /* 9214 * Use the supplied id to get the name 9215 * for open_objset. 9216 */ 9217 error = spa_open(target_pool, &spa, FTAG); 9218 if (error == 0) { 9219 error = name_from_objset_id(spa, 9220 objset_id, dsname); 9221 spa_close(spa, FTAG); 9222 if (error == 0) 9223 target = dsname; 9224 } 9225 } 9226 if (error == 0) { 9227 if (objset_id > 0 && retry) { 9228 int err = dmu_objset_hold(target, FTAG, 9229 &os); 9230 if (err) { 9231 dataset_lookup = B_TRUE; 9232 retry = B_FALSE; 9233 goto retry_lookup; 9234 } else { 9235 dmu_objset_rele(os, FTAG); 9236 } 9237 } 9238 error = open_objset(target, FTAG, &os); 9239 } 9240 if (error == 0) 9241 spa = dmu_objset_spa(os); 9242 free(target_pool); 9243 } 9244 } 9245 nvlist_free(policy); 9246 9247 if (error) 9248 fatal("can't open '%s': %s", target, strerror(error)); 9249 9250 /* 9251 * Set the pool failure mode to panic in order to prevent the pool 9252 * from suspending. A suspended I/O will have no way to resume and 9253 * can prevent the zdb(8) command from terminating as expected. 9254 */ 9255 if (spa != NULL) 9256 spa->spa_failmode = ZIO_FAILURE_MODE_PANIC; 9257 9258 argv++; 9259 argc--; 9260 if (dump_opt['r']) { 9261 error = zdb_copy_object(os, object, argv[1]); 9262 } else if (!dump_opt['R']) { 9263 flagbits['d'] = ZOR_FLAG_DIRECTORY; 9264 flagbits['f'] = ZOR_FLAG_PLAIN_FILE; 9265 flagbits['m'] = ZOR_FLAG_SPACE_MAP; 9266 flagbits['z'] = ZOR_FLAG_ZAP; 9267 flagbits['A'] = ZOR_FLAG_ALL_TYPES; 9268 9269 if (argc > 0 && dump_opt['d']) { 9270 zopt_object_args = argc; 9271 zopt_object_ranges = calloc(zopt_object_args, 9272 sizeof (zopt_object_range_t)); 9273 for (unsigned i = 0; i < zopt_object_args; i++) { 9274 int err; 9275 const char *msg = NULL; 9276 9277 err = parse_object_range(argv[i], 9278 &zopt_object_ranges[i], &msg); 9279 if (err != 0) 9280 fatal("Bad object or range: '%s': %s\n", 9281 argv[i], msg ?: ""); 9282 } 9283 } else if (argc > 0 && dump_opt['m']) { 9284 zopt_metaslab_args = argc; 9285 zopt_metaslab = calloc(zopt_metaslab_args, 9286 sizeof (uint64_t)); 9287 for (unsigned i = 0; i < zopt_metaslab_args; i++) { 9288 errno = 0; 9289 zopt_metaslab[i] = strtoull(argv[i], NULL, 0); 9290 if (zopt_metaslab[i] == 0 && errno != 0) 9291 fatal("bad number %s: %s", argv[i], 9292 strerror(errno)); 9293 } 9294 } 9295 if (dump_opt['B']) { 9296 dump_backup(target, objset_id, 9297 argc > 0 ? argv[0] : NULL); 9298 } else if (os != NULL) { 9299 dump_objset(os); 9300 } else if (zopt_object_args > 0 && !dump_opt['m']) { 9301 dump_objset(spa->spa_meta_objset); 9302 } else { 9303 dump_zpool(spa); 9304 } 9305 } else { 9306 flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR; 9307 flagbits['c'] = ZDB_FLAG_CHECKSUM; 9308 flagbits['d'] = ZDB_FLAG_DECOMPRESS; 9309 flagbits['e'] = ZDB_FLAG_BSWAP; 9310 flagbits['g'] = ZDB_FLAG_GBH; 9311 flagbits['i'] = ZDB_FLAG_INDIRECT; 9312 flagbits['r'] = ZDB_FLAG_RAW; 9313 flagbits['v'] = ZDB_FLAG_VERBOSE; 9314 9315 for (int i = 0; i < argc; i++) 9316 zdb_read_block(argv[i], spa); 9317 } 9318 9319 if (dump_opt['k']) { 9320 free(checkpoint_pool); 9321 if (!target_is_spa) 9322 free(checkpoint_target); 9323 } 9324 9325 if (os != NULL) { 9326 close_objset(os, FTAG); 9327 } else { 9328 spa_close(spa, FTAG); 9329 } 9330 9331 fuid_table_destroy(); 9332 9333 dump_debug_buffer(); 9334 9335 kernel_fini(); 9336 9337 return (error); 9338 } 9339