1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2011, 2019 by Delphix. All rights reserved. 25 * Copyright (c) 2014 Integros [integros.com] 26 * Copyright 2016 Nexenta Systems, Inc. 27 * Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC. 28 * Copyright (c) 2015, 2017, Intel Corporation. 29 * Copyright (c) 2020 Datto Inc. 30 * Copyright (c) 2020, The FreeBSD Foundation [1] 31 * 32 * [1] Portions of this software were developed by Allan Jude 33 * under sponsorship from the FreeBSD Foundation. 34 * Copyright (c) 2021 Allan Jude 35 * Copyright (c) 2021 Toomas Soome <tsoome@me.com> 36 */ 37 38 #include <stdio.h> 39 #include <unistd.h> 40 #include <stdlib.h> 41 #include <ctype.h> 42 #include <sys/zfs_context.h> 43 #include <sys/spa.h> 44 #include <sys/spa_impl.h> 45 #include <sys/dmu.h> 46 #include <sys/zap.h> 47 #include <sys/fs/zfs.h> 48 #include <sys/zfs_znode.h> 49 #include <sys/zfs_sa.h> 50 #include <sys/sa.h> 51 #include <sys/sa_impl.h> 52 #include <sys/vdev.h> 53 #include <sys/vdev_impl.h> 54 #include <sys/metaslab_impl.h> 55 #include <sys/dmu_objset.h> 56 #include <sys/dsl_dir.h> 57 #include <sys/dsl_dataset.h> 58 #include <sys/dsl_pool.h> 59 #include <sys/dsl_bookmark.h> 60 #include <sys/dbuf.h> 61 #include <sys/zil.h> 62 #include <sys/zil_impl.h> 63 #include <sys/stat.h> 64 #include <sys/resource.h> 65 #include <sys/dmu_send.h> 66 #include <sys/dmu_traverse.h> 67 #include <sys/zio_checksum.h> 68 #include <sys/zio_compress.h> 69 #include <sys/zfs_fuid.h> 70 #include <sys/arc.h> 71 #include <sys/arc_impl.h> 72 #include <sys/ddt.h> 73 #include <sys/zfeature.h> 74 #include <sys/abd.h> 75 #include <sys/blkptr.h> 76 #include <sys/dsl_crypt.h> 77 #include <sys/dsl_scan.h> 78 #include <sys/btree.h> 79 #include <zfs_comutil.h> 80 #include <sys/zstd/zstd.h> 81 82 #include <libnvpair.h> 83 #include <libzutil.h> 84 85 #include "zdb.h" 86 87 #define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \ 88 zio_compress_table[(idx)].ci_name : "UNKNOWN") 89 #define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \ 90 zio_checksum_table[(idx)].ci_name : "UNKNOWN") 91 #define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : \ 92 (idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA ? \ 93 DMU_OT_ZAP_OTHER : \ 94 (idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \ 95 DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES) 96 97 /* Some platforms require part of inode IDs to be remapped */ 98 #ifdef __APPLE__ 99 #define ZDB_MAP_OBJECT_ID(obj) INO_XNUTOZFS(obj, 2) 100 #else 101 #define ZDB_MAP_OBJECT_ID(obj) (obj) 102 #endif 103 104 static char * 105 zdb_ot_name(dmu_object_type_t type) 106 { 107 if (type < DMU_OT_NUMTYPES) 108 return (dmu_ot[type].ot_name); 109 else if ((type & DMU_OT_NEWTYPE) && 110 ((type & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS)) 111 return (dmu_ot_byteswap[type & DMU_OT_BYTESWAP_MASK].ob_name); 112 else 113 return ("UNKNOWN"); 114 } 115 116 extern int reference_tracking_enable; 117 extern int zfs_recover; 118 extern unsigned long zfs_arc_meta_min, zfs_arc_meta_limit; 119 extern int zfs_vdev_async_read_max_active; 120 extern boolean_t spa_load_verify_dryrun; 121 extern int zfs_reconstruct_indirect_combinations_max; 122 extern int zfs_btree_verify_intensity; 123 124 static const char cmdname[] = "zdb"; 125 uint8_t dump_opt[256]; 126 127 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); 128 129 uint64_t *zopt_metaslab = NULL; 130 static unsigned zopt_metaslab_args = 0; 131 132 typedef struct zopt_object_range { 133 uint64_t zor_obj_start; 134 uint64_t zor_obj_end; 135 uint64_t zor_flags; 136 } zopt_object_range_t; 137 zopt_object_range_t *zopt_object_ranges = NULL; 138 static unsigned zopt_object_args = 0; 139 140 static int flagbits[256]; 141 142 #define ZOR_FLAG_PLAIN_FILE 0x0001 143 #define ZOR_FLAG_DIRECTORY 0x0002 144 #define ZOR_FLAG_SPACE_MAP 0x0004 145 #define ZOR_FLAG_ZAP 0x0008 146 #define ZOR_FLAG_ALL_TYPES -1 147 #define ZOR_SUPPORTED_FLAGS (ZOR_FLAG_PLAIN_FILE | \ 148 ZOR_FLAG_DIRECTORY | \ 149 ZOR_FLAG_SPACE_MAP | \ 150 ZOR_FLAG_ZAP) 151 152 #define ZDB_FLAG_CHECKSUM 0x0001 153 #define ZDB_FLAG_DECOMPRESS 0x0002 154 #define ZDB_FLAG_BSWAP 0x0004 155 #define ZDB_FLAG_GBH 0x0008 156 #define ZDB_FLAG_INDIRECT 0x0010 157 #define ZDB_FLAG_RAW 0x0020 158 #define ZDB_FLAG_PRINT_BLKPTR 0x0040 159 #define ZDB_FLAG_VERBOSE 0x0080 160 161 uint64_t max_inflight_bytes = 256 * 1024 * 1024; /* 256MB */ 162 static int leaked_objects = 0; 163 static range_tree_t *mos_refd_objs; 164 165 static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *, 166 boolean_t); 167 static void mos_obj_refd(uint64_t); 168 static void mos_obj_refd_multiple(uint64_t); 169 static int dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t free, 170 dmu_tx_t *tx); 171 172 typedef struct sublivelist_verify { 173 /* FREE's that haven't yet matched to an ALLOC, in one sub-livelist */ 174 zfs_btree_t sv_pair; 175 176 /* ALLOC's without a matching FREE, accumulates across sub-livelists */ 177 zfs_btree_t sv_leftover; 178 } sublivelist_verify_t; 179 180 static int 181 livelist_compare(const void *larg, const void *rarg) 182 { 183 const blkptr_t *l = larg; 184 const blkptr_t *r = rarg; 185 186 /* Sort them according to dva[0] */ 187 uint64_t l_dva0_vdev, r_dva0_vdev; 188 l_dva0_vdev = DVA_GET_VDEV(&l->blk_dva[0]); 189 r_dva0_vdev = DVA_GET_VDEV(&r->blk_dva[0]); 190 if (l_dva0_vdev < r_dva0_vdev) 191 return (-1); 192 else if (l_dva0_vdev > r_dva0_vdev) 193 return (+1); 194 195 /* if vdevs are equal, sort by offsets. */ 196 uint64_t l_dva0_offset; 197 uint64_t r_dva0_offset; 198 l_dva0_offset = DVA_GET_OFFSET(&l->blk_dva[0]); 199 r_dva0_offset = DVA_GET_OFFSET(&r->blk_dva[0]); 200 if (l_dva0_offset < r_dva0_offset) { 201 return (-1); 202 } else if (l_dva0_offset > r_dva0_offset) { 203 return (+1); 204 } 205 206 /* 207 * Since we're storing blkptrs without cancelling FREE/ALLOC pairs, 208 * it's possible the offsets are equal. In that case, sort by txg 209 */ 210 if (l->blk_birth < r->blk_birth) { 211 return (-1); 212 } else if (l->blk_birth > r->blk_birth) { 213 return (+1); 214 } 215 return (0); 216 } 217 218 typedef struct sublivelist_verify_block { 219 dva_t svb_dva; 220 221 /* 222 * We need this to check if the block marked as allocated 223 * in the livelist was freed (and potentially reallocated) 224 * in the metaslab spacemaps at a later TXG. 225 */ 226 uint64_t svb_allocated_txg; 227 } sublivelist_verify_block_t; 228 229 static void zdb_print_blkptr(const blkptr_t *bp, int flags); 230 231 typedef struct sublivelist_verify_block_refcnt { 232 /* block pointer entry in livelist being verified */ 233 blkptr_t svbr_blk; 234 235 /* 236 * Refcount gets incremented to 1 when we encounter the first 237 * FREE entry for the svfbr block pointer and a node for it 238 * is created in our ZDB verification/tracking metadata. 239 * 240 * As we encounter more FREE entries we increment this counter 241 * and similarly decrement it whenever we find the respective 242 * ALLOC entries for this block. 243 * 244 * When the refcount gets to 0 it means that all the FREE and 245 * ALLOC entries of this block have paired up and we no longer 246 * need to track it in our verification logic (e.g. the node 247 * containing this struct in our verification data structure 248 * should be freed). 249 * 250 * [refer to sublivelist_verify_blkptr() for the actual code] 251 */ 252 uint32_t svbr_refcnt; 253 } sublivelist_verify_block_refcnt_t; 254 255 static int 256 sublivelist_block_refcnt_compare(const void *larg, const void *rarg) 257 { 258 const sublivelist_verify_block_refcnt_t *l = larg; 259 const sublivelist_verify_block_refcnt_t *r = rarg; 260 return (livelist_compare(&l->svbr_blk, &r->svbr_blk)); 261 } 262 263 static int 264 sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free, 265 dmu_tx_t *tx) 266 { 267 ASSERT3P(tx, ==, NULL); 268 struct sublivelist_verify *sv = arg; 269 sublivelist_verify_block_refcnt_t current = { 270 .svbr_blk = *bp, 271 272 /* 273 * Start with 1 in case this is the first free entry. 274 * This field is not used for our B-Tree comparisons 275 * anyway. 276 */ 277 .svbr_refcnt = 1, 278 }; 279 280 zfs_btree_index_t where; 281 sublivelist_verify_block_refcnt_t *pair = 282 zfs_btree_find(&sv->sv_pair, ¤t, &where); 283 if (free) { 284 if (pair == NULL) { 285 /* first free entry for this block pointer */ 286 zfs_btree_add(&sv->sv_pair, ¤t); 287 } else { 288 pair->svbr_refcnt++; 289 } 290 } else { 291 if (pair == NULL) { 292 /* block that is currently marked as allocated */ 293 for (int i = 0; i < SPA_DVAS_PER_BP; i++) { 294 if (DVA_IS_EMPTY(&bp->blk_dva[i])) 295 break; 296 sublivelist_verify_block_t svb = { 297 .svb_dva = bp->blk_dva[i], 298 .svb_allocated_txg = bp->blk_birth 299 }; 300 301 if (zfs_btree_find(&sv->sv_leftover, &svb, 302 &where) == NULL) { 303 zfs_btree_add_idx(&sv->sv_leftover, 304 &svb, &where); 305 } 306 } 307 } else { 308 /* alloc matches a free entry */ 309 pair->svbr_refcnt--; 310 if (pair->svbr_refcnt == 0) { 311 /* all allocs and frees have been matched */ 312 zfs_btree_remove_idx(&sv->sv_pair, &where); 313 } 314 } 315 } 316 317 return (0); 318 } 319 320 static int 321 sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle) 322 { 323 int err; 324 struct sublivelist_verify *sv = args; 325 326 zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare, 327 sizeof (sublivelist_verify_block_refcnt_t)); 328 329 err = bpobj_iterate_nofree(&dle->dle_bpobj, sublivelist_verify_blkptr, 330 sv, NULL); 331 332 sublivelist_verify_block_refcnt_t *e; 333 zfs_btree_index_t *cookie = NULL; 334 while ((e = zfs_btree_destroy_nodes(&sv->sv_pair, &cookie)) != NULL) { 335 char blkbuf[BP_SPRINTF_LEN]; 336 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), 337 &e->svbr_blk, B_TRUE); 338 (void) printf("\tERROR: %d unmatched FREE(s): %s\n", 339 e->svbr_refcnt, blkbuf); 340 } 341 zfs_btree_destroy(&sv->sv_pair); 342 343 return (err); 344 } 345 346 static int 347 livelist_block_compare(const void *larg, const void *rarg) 348 { 349 const sublivelist_verify_block_t *l = larg; 350 const sublivelist_verify_block_t *r = rarg; 351 352 if (DVA_GET_VDEV(&l->svb_dva) < DVA_GET_VDEV(&r->svb_dva)) 353 return (-1); 354 else if (DVA_GET_VDEV(&l->svb_dva) > DVA_GET_VDEV(&r->svb_dva)) 355 return (+1); 356 357 if (DVA_GET_OFFSET(&l->svb_dva) < DVA_GET_OFFSET(&r->svb_dva)) 358 return (-1); 359 else if (DVA_GET_OFFSET(&l->svb_dva) > DVA_GET_OFFSET(&r->svb_dva)) 360 return (+1); 361 362 if (DVA_GET_ASIZE(&l->svb_dva) < DVA_GET_ASIZE(&r->svb_dva)) 363 return (-1); 364 else if (DVA_GET_ASIZE(&l->svb_dva) > DVA_GET_ASIZE(&r->svb_dva)) 365 return (+1); 366 367 return (0); 368 } 369 370 /* 371 * Check for errors in a livelist while tracking all unfreed ALLOCs in the 372 * sublivelist_verify_t: sv->sv_leftover 373 */ 374 static void 375 livelist_verify(dsl_deadlist_t *dl, void *arg) 376 { 377 sublivelist_verify_t *sv = arg; 378 dsl_deadlist_iterate(dl, sublivelist_verify_func, sv); 379 } 380 381 /* 382 * Check for errors in the livelist entry and discard the intermediary 383 * data structures 384 */ 385 /* ARGSUSED */ 386 static int 387 sublivelist_verify_lightweight(void *args, dsl_deadlist_entry_t *dle) 388 { 389 sublivelist_verify_t sv; 390 zfs_btree_create(&sv.sv_leftover, livelist_block_compare, 391 sizeof (sublivelist_verify_block_t)); 392 int err = sublivelist_verify_func(&sv, dle); 393 zfs_btree_clear(&sv.sv_leftover); 394 zfs_btree_destroy(&sv.sv_leftover); 395 return (err); 396 } 397 398 typedef struct metaslab_verify { 399 /* 400 * Tree containing all the leftover ALLOCs from the livelists 401 * that are part of this metaslab. 402 */ 403 zfs_btree_t mv_livelist_allocs; 404 405 /* 406 * Metaslab information. 407 */ 408 uint64_t mv_vdid; 409 uint64_t mv_msid; 410 uint64_t mv_start; 411 uint64_t mv_end; 412 413 /* 414 * What's currently allocated for this metaslab. 415 */ 416 range_tree_t *mv_allocated; 417 } metaslab_verify_t; 418 419 typedef void ll_iter_t(dsl_deadlist_t *ll, void *arg); 420 421 typedef int (*zdb_log_sm_cb_t)(spa_t *spa, space_map_entry_t *sme, uint64_t txg, 422 void *arg); 423 424 typedef struct unflushed_iter_cb_arg { 425 spa_t *uic_spa; 426 uint64_t uic_txg; 427 void *uic_arg; 428 zdb_log_sm_cb_t uic_cb; 429 } unflushed_iter_cb_arg_t; 430 431 static int 432 iterate_through_spacemap_logs_cb(space_map_entry_t *sme, void *arg) 433 { 434 unflushed_iter_cb_arg_t *uic = arg; 435 return (uic->uic_cb(uic->uic_spa, sme, uic->uic_txg, uic->uic_arg)); 436 } 437 438 static void 439 iterate_through_spacemap_logs(spa_t *spa, zdb_log_sm_cb_t cb, void *arg) 440 { 441 if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) 442 return; 443 444 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 445 for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg); 446 sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) { 447 space_map_t *sm = NULL; 448 VERIFY0(space_map_open(&sm, spa_meta_objset(spa), 449 sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT)); 450 451 unflushed_iter_cb_arg_t uic = { 452 .uic_spa = spa, 453 .uic_txg = sls->sls_txg, 454 .uic_arg = arg, 455 .uic_cb = cb 456 }; 457 VERIFY0(space_map_iterate(sm, space_map_length(sm), 458 iterate_through_spacemap_logs_cb, &uic)); 459 space_map_close(sm); 460 } 461 spa_config_exit(spa, SCL_CONFIG, FTAG); 462 } 463 464 static void 465 verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg, 466 uint64_t offset, uint64_t size) 467 { 468 sublivelist_verify_block_t svb; 469 DVA_SET_VDEV(&svb.svb_dva, mv->mv_vdid); 470 DVA_SET_OFFSET(&svb.svb_dva, offset); 471 DVA_SET_ASIZE(&svb.svb_dva, size); 472 zfs_btree_index_t where; 473 uint64_t end_offset = offset + size; 474 475 /* 476 * Look for an exact match for spacemap entry in the livelist entries. 477 * Then, look for other livelist entries that fall within the range 478 * of the spacemap entry as it may have been condensed 479 */ 480 sublivelist_verify_block_t *found = 481 zfs_btree_find(&mv->mv_livelist_allocs, &svb, &where); 482 if (found == NULL) { 483 found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where); 484 } 485 for (; found != NULL && DVA_GET_VDEV(&found->svb_dva) == mv->mv_vdid && 486 DVA_GET_OFFSET(&found->svb_dva) < end_offset; 487 found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) { 488 if (found->svb_allocated_txg <= txg) { 489 (void) printf("ERROR: Livelist ALLOC [%llx:%llx] " 490 "from TXG %llx FREED at TXG %llx\n", 491 (u_longlong_t)DVA_GET_OFFSET(&found->svb_dva), 492 (u_longlong_t)DVA_GET_ASIZE(&found->svb_dva), 493 (u_longlong_t)found->svb_allocated_txg, 494 (u_longlong_t)txg); 495 } 496 } 497 } 498 499 static int 500 metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg) 501 { 502 metaslab_verify_t *mv = arg; 503 uint64_t offset = sme->sme_offset; 504 uint64_t size = sme->sme_run; 505 uint64_t txg = sme->sme_txg; 506 507 if (sme->sme_type == SM_ALLOC) { 508 if (range_tree_contains(mv->mv_allocated, 509 offset, size)) { 510 (void) printf("ERROR: DOUBLE ALLOC: " 511 "%llu [%llx:%llx] " 512 "%llu:%llu LOG_SM\n", 513 (u_longlong_t)txg, (u_longlong_t)offset, 514 (u_longlong_t)size, (u_longlong_t)mv->mv_vdid, 515 (u_longlong_t)mv->mv_msid); 516 } else { 517 range_tree_add(mv->mv_allocated, 518 offset, size); 519 } 520 } else { 521 if (!range_tree_contains(mv->mv_allocated, 522 offset, size)) { 523 (void) printf("ERROR: DOUBLE FREE: " 524 "%llu [%llx:%llx] " 525 "%llu:%llu LOG_SM\n", 526 (u_longlong_t)txg, (u_longlong_t)offset, 527 (u_longlong_t)size, (u_longlong_t)mv->mv_vdid, 528 (u_longlong_t)mv->mv_msid); 529 } else { 530 range_tree_remove(mv->mv_allocated, 531 offset, size); 532 } 533 } 534 535 if (sme->sme_type != SM_ALLOC) { 536 /* 537 * If something is freed in the spacemap, verify that 538 * it is not listed as allocated in the livelist. 539 */ 540 verify_livelist_allocs(mv, txg, offset, size); 541 } 542 return (0); 543 } 544 545 static int 546 spacemap_check_sm_log_cb(spa_t *spa, space_map_entry_t *sme, 547 uint64_t txg, void *arg) 548 { 549 metaslab_verify_t *mv = arg; 550 uint64_t offset = sme->sme_offset; 551 uint64_t vdev_id = sme->sme_vdev; 552 553 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 554 555 /* skip indirect vdevs */ 556 if (!vdev_is_concrete(vd)) 557 return (0); 558 559 if (vdev_id != mv->mv_vdid) 560 return (0); 561 562 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 563 if (ms->ms_id != mv->mv_msid) 564 return (0); 565 566 if (txg < metaslab_unflushed_txg(ms)) 567 return (0); 568 569 570 ASSERT3U(txg, ==, sme->sme_txg); 571 return (metaslab_spacemap_validation_cb(sme, mv)); 572 } 573 574 static void 575 spacemap_check_sm_log(spa_t *spa, metaslab_verify_t *mv) 576 { 577 iterate_through_spacemap_logs(spa, spacemap_check_sm_log_cb, mv); 578 } 579 580 static void 581 spacemap_check_ms_sm(space_map_t *sm, metaslab_verify_t *mv) 582 { 583 if (sm == NULL) 584 return; 585 586 VERIFY0(space_map_iterate(sm, space_map_length(sm), 587 metaslab_spacemap_validation_cb, mv)); 588 } 589 590 static void iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg); 591 592 /* 593 * Transfer blocks from sv_leftover tree to the mv_livelist_allocs if 594 * they are part of that metaslab (mv_msid). 595 */ 596 static void 597 mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv) 598 { 599 zfs_btree_index_t where; 600 sublivelist_verify_block_t *svb; 601 ASSERT3U(zfs_btree_numnodes(&mv->mv_livelist_allocs), ==, 0); 602 for (svb = zfs_btree_first(&sv->sv_leftover, &where); 603 svb != NULL; 604 svb = zfs_btree_next(&sv->sv_leftover, &where, &where)) { 605 if (DVA_GET_VDEV(&svb->svb_dva) != mv->mv_vdid) 606 continue; 607 608 if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start && 609 (DVA_GET_OFFSET(&svb->svb_dva) + 610 DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_start) { 611 (void) printf("ERROR: Found block that crosses " 612 "metaslab boundary: <%llu:%llx:%llx>\n", 613 (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva), 614 (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), 615 (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva)); 616 continue; 617 } 618 619 if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start) 620 continue; 621 622 if (DVA_GET_OFFSET(&svb->svb_dva) >= mv->mv_end) 623 continue; 624 625 if ((DVA_GET_OFFSET(&svb->svb_dva) + 626 DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_end) { 627 (void) printf("ERROR: Found block that crosses " 628 "metaslab boundary: <%llu:%llx:%llx>\n", 629 (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva), 630 (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), 631 (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva)); 632 continue; 633 } 634 635 zfs_btree_add(&mv->mv_livelist_allocs, svb); 636 } 637 638 for (svb = zfs_btree_first(&mv->mv_livelist_allocs, &where); 639 svb != NULL; 640 svb = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) { 641 zfs_btree_remove(&sv->sv_leftover, svb); 642 } 643 } 644 645 /* 646 * [Livelist Check] 647 * Iterate through all the sublivelists and: 648 * - report leftover frees (**) 649 * - record leftover ALLOCs together with their TXG [see Cross Check] 650 * 651 * (**) Note: Double ALLOCs are valid in datasets that have dedup 652 * enabled. Similarly double FREEs are allowed as well but 653 * only if they pair up with a corresponding ALLOC entry once 654 * we our done with our sublivelist iteration. 655 * 656 * [Spacemap Check] 657 * for each metaslab: 658 * - iterate over spacemap and then the metaslab's entries in the 659 * spacemap log, then report any double FREEs and ALLOCs (do not 660 * blow up). 661 * 662 * [Cross Check] 663 * After finishing the Livelist Check phase and while being in the 664 * Spacemap Check phase, we find all the recorded leftover ALLOCs 665 * of the livelist check that are part of the metaslab that we are 666 * currently looking at in the Spacemap Check. We report any entries 667 * that are marked as ALLOCs in the livelists but have been actually 668 * freed (and potentially allocated again) after their TXG stamp in 669 * the spacemaps. Also report any ALLOCs from the livelists that 670 * belong to indirect vdevs (e.g. their vdev completed removal). 671 * 672 * Note that this will miss Log Spacemap entries that cancelled each other 673 * out before being flushed to the metaslab, so we are not guaranteed 674 * to match all erroneous ALLOCs. 675 */ 676 static void 677 livelist_metaslab_validate(spa_t *spa) 678 { 679 (void) printf("Verifying deleted livelist entries\n"); 680 681 sublivelist_verify_t sv; 682 zfs_btree_create(&sv.sv_leftover, livelist_block_compare, 683 sizeof (sublivelist_verify_block_t)); 684 iterate_deleted_livelists(spa, livelist_verify, &sv); 685 686 (void) printf("Verifying metaslab entries\n"); 687 vdev_t *rvd = spa->spa_root_vdev; 688 for (uint64_t c = 0; c < rvd->vdev_children; c++) { 689 vdev_t *vd = rvd->vdev_child[c]; 690 691 if (!vdev_is_concrete(vd)) 692 continue; 693 694 for (uint64_t mid = 0; mid < vd->vdev_ms_count; mid++) { 695 metaslab_t *m = vd->vdev_ms[mid]; 696 697 (void) fprintf(stderr, 698 "\rverifying concrete vdev %llu, " 699 "metaslab %llu of %llu ...", 700 (longlong_t)vd->vdev_id, 701 (longlong_t)mid, 702 (longlong_t)vd->vdev_ms_count); 703 704 uint64_t shift, start; 705 range_seg_type_t type = 706 metaslab_calculate_range_tree_type(vd, m, 707 &start, &shift); 708 metaslab_verify_t mv; 709 mv.mv_allocated = range_tree_create(NULL, 710 type, NULL, start, shift); 711 mv.mv_vdid = vd->vdev_id; 712 mv.mv_msid = m->ms_id; 713 mv.mv_start = m->ms_start; 714 mv.mv_end = m->ms_start + m->ms_size; 715 zfs_btree_create(&mv.mv_livelist_allocs, 716 livelist_block_compare, 717 sizeof (sublivelist_verify_block_t)); 718 719 mv_populate_livelist_allocs(&mv, &sv); 720 721 spacemap_check_ms_sm(m->ms_sm, &mv); 722 spacemap_check_sm_log(spa, &mv); 723 724 range_tree_vacate(mv.mv_allocated, NULL, NULL); 725 range_tree_destroy(mv.mv_allocated); 726 zfs_btree_clear(&mv.mv_livelist_allocs); 727 zfs_btree_destroy(&mv.mv_livelist_allocs); 728 } 729 } 730 (void) fprintf(stderr, "\n"); 731 732 /* 733 * If there are any segments in the leftover tree after we walked 734 * through all the metaslabs in the concrete vdevs then this means 735 * that we have segments in the livelists that belong to indirect 736 * vdevs and are marked as allocated. 737 */ 738 if (zfs_btree_numnodes(&sv.sv_leftover) == 0) { 739 zfs_btree_destroy(&sv.sv_leftover); 740 return; 741 } 742 (void) printf("ERROR: Found livelist blocks marked as allocated " 743 "for indirect vdevs:\n"); 744 745 zfs_btree_index_t *where = NULL; 746 sublivelist_verify_block_t *svb; 747 while ((svb = zfs_btree_destroy_nodes(&sv.sv_leftover, &where)) != 748 NULL) { 749 int vdev_id = DVA_GET_VDEV(&svb->svb_dva); 750 ASSERT3U(vdev_id, <, rvd->vdev_children); 751 vdev_t *vd = rvd->vdev_child[vdev_id]; 752 ASSERT(!vdev_is_concrete(vd)); 753 (void) printf("<%d:%llx:%llx> TXG %llx\n", 754 vdev_id, (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), 755 (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva), 756 (u_longlong_t)svb->svb_allocated_txg); 757 } 758 (void) printf("\n"); 759 zfs_btree_destroy(&sv.sv_leftover); 760 } 761 762 /* 763 * These libumem hooks provide a reasonable set of defaults for the allocator's 764 * debugging facilities. 765 */ 766 const char * 767 _umem_debug_init(void) 768 { 769 return ("default,verbose"); /* $UMEM_DEBUG setting */ 770 } 771 772 const char * 773 _umem_logging_init(void) 774 { 775 return ("fail,contents"); /* $UMEM_LOGGING setting */ 776 } 777 778 static void 779 usage(void) 780 { 781 (void) fprintf(stderr, 782 "Usage:\t%s [-AbcdDFGhikLMPsvXy] [-e [-V] [-p <path> ...]] " 783 "[-I <inflight I/Os>]\n" 784 "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n" 785 "\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]]\n" 786 "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>]\n" 787 "\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]\n" 788 "\t%s [-v] <bookmark>\n" 789 "\t%s -C [-A] [-U <cache>]\n" 790 "\t%s -l [-Aqu] <device>\n" 791 "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] " 792 "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n" 793 "\t%s -O <dataset> <path>\n" 794 "\t%s -r <dataset> <path> <destination>\n" 795 "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n" 796 "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n" 797 "\t%s -E [-A] word0:word1:...:word15\n" 798 "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] " 799 "<poolname>\n\n", 800 cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, 801 cmdname, cmdname, cmdname, cmdname); 802 803 (void) fprintf(stderr, " Dataset name must include at least one " 804 "separator character '/' or '@'\n"); 805 (void) fprintf(stderr, " If dataset name is specified, only that " 806 "dataset is dumped\n"); 807 (void) fprintf(stderr, " If object numbers or object number " 808 "ranges are specified, only those\n" 809 " objects or ranges are dumped.\n\n"); 810 (void) fprintf(stderr, 811 " Object ranges take the form <start>:<end>[:<flags>]\n" 812 " start Starting object number\n" 813 " end Ending object number, or -1 for no upper bound\n" 814 " flags Optional flags to select object types:\n" 815 " A All objects (this is the default)\n" 816 " d ZFS directories\n" 817 " f ZFS files \n" 818 " m SPA space maps\n" 819 " z ZAPs\n" 820 " - Negate effect of next flag\n\n"); 821 (void) fprintf(stderr, " Options to control amount of output:\n"); 822 (void) fprintf(stderr, " -b block statistics\n"); 823 (void) fprintf(stderr, " -c checksum all metadata (twice for " 824 "all data) blocks\n"); 825 (void) fprintf(stderr, " -C config (or cachefile if alone)\n"); 826 (void) fprintf(stderr, " -d dataset(s)\n"); 827 (void) fprintf(stderr, " -D dedup statistics\n"); 828 (void) fprintf(stderr, " -E decode and display block from an " 829 "embedded block pointer\n"); 830 (void) fprintf(stderr, " -h pool history\n"); 831 (void) fprintf(stderr, " -i intent logs\n"); 832 (void) fprintf(stderr, " -l read label contents\n"); 833 (void) fprintf(stderr, " -k examine the checkpointed state " 834 "of the pool\n"); 835 (void) fprintf(stderr, " -L disable leak tracking (do not " 836 "load spacemaps)\n"); 837 (void) fprintf(stderr, " -m metaslabs\n"); 838 (void) fprintf(stderr, " -M metaslab groups\n"); 839 (void) fprintf(stderr, " -O perform object lookups by path\n"); 840 (void) fprintf(stderr, " -r copy an object by path to file\n"); 841 (void) fprintf(stderr, " -R read and display block from a " 842 "device\n"); 843 (void) fprintf(stderr, " -s report stats on zdb's I/O\n"); 844 (void) fprintf(stderr, " -S simulate dedup to measure effect\n"); 845 (void) fprintf(stderr, " -v verbose (applies to all " 846 "others)\n"); 847 (void) fprintf(stderr, " -y perform livelist and metaslab " 848 "validation on any livelists being deleted\n\n"); 849 (void) fprintf(stderr, " Below options are intended for use " 850 "with other options:\n"); 851 (void) fprintf(stderr, " -A ignore assertions (-A), enable " 852 "panic recovery (-AA) or both (-AAA)\n"); 853 (void) fprintf(stderr, " -e pool is exported/destroyed/" 854 "has altroot/not in a cachefile\n"); 855 (void) fprintf(stderr, " -F attempt automatic rewind within " 856 "safe range of transaction groups\n"); 857 (void) fprintf(stderr, " -G dump zfs_dbgmsg buffer before " 858 "exiting\n"); 859 (void) fprintf(stderr, " -I <number of inflight I/Os> -- " 860 "specify the maximum number of\n " 861 "checksumming I/Os [default is 200]\n"); 862 (void) fprintf(stderr, " -o <variable>=<value> set global " 863 "variable to an unsigned 32-bit integer\n"); 864 (void) fprintf(stderr, " -p <path> -- use one or more with " 865 "-e to specify path to vdev dir\n"); 866 (void) fprintf(stderr, " -P print numbers in parseable form\n"); 867 (void) fprintf(stderr, " -q don't print label contents\n"); 868 (void) fprintf(stderr, " -t <txg> -- highest txg to use when " 869 "searching for uberblocks\n"); 870 (void) fprintf(stderr, " -u uberblock\n"); 871 (void) fprintf(stderr, " -U <cachefile_path> -- use alternate " 872 "cachefile\n"); 873 (void) fprintf(stderr, " -V do verbatim import\n"); 874 (void) fprintf(stderr, " -x <dumpdir> -- " 875 "dump all read blocks into specified directory\n"); 876 (void) fprintf(stderr, " -X attempt extreme rewind (does not " 877 "work with dataset)\n"); 878 (void) fprintf(stderr, " -Y attempt all reconstruction " 879 "combinations for split blocks\n"); 880 (void) fprintf(stderr, " -Z show ZSTD headers \n"); 881 (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " 882 "to make only that option verbose\n"); 883 (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); 884 exit(1); 885 } 886 887 static void 888 dump_debug_buffer(void) 889 { 890 if (dump_opt['G']) { 891 (void) printf("\n"); 892 (void) fflush(stdout); 893 zfs_dbgmsg_print("zdb"); 894 } 895 } 896 897 /* 898 * Called for usage errors that are discovered after a call to spa_open(), 899 * dmu_bonus_hold(), or pool_match(). abort() is called for other errors. 900 */ 901 902 static void 903 fatal(const char *fmt, ...) 904 { 905 va_list ap; 906 907 va_start(ap, fmt); 908 (void) fprintf(stderr, "%s: ", cmdname); 909 (void) vfprintf(stderr, fmt, ap); 910 va_end(ap); 911 (void) fprintf(stderr, "\n"); 912 913 dump_debug_buffer(); 914 915 exit(1); 916 } 917 918 /* ARGSUSED */ 919 static void 920 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) 921 { 922 nvlist_t *nv; 923 size_t nvsize = *(uint64_t *)data; 924 char *packed = umem_alloc(nvsize, UMEM_NOFAIL); 925 926 VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH)); 927 928 VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0); 929 930 umem_free(packed, nvsize); 931 932 dump_nvlist(nv, 8); 933 934 nvlist_free(nv); 935 } 936 937 /* ARGSUSED */ 938 static void 939 dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size) 940 { 941 spa_history_phys_t *shp = data; 942 943 if (shp == NULL) 944 return; 945 946 (void) printf("\t\tpool_create_len = %llu\n", 947 (u_longlong_t)shp->sh_pool_create_len); 948 (void) printf("\t\tphys_max_off = %llu\n", 949 (u_longlong_t)shp->sh_phys_max_off); 950 (void) printf("\t\tbof = %llu\n", 951 (u_longlong_t)shp->sh_bof); 952 (void) printf("\t\teof = %llu\n", 953 (u_longlong_t)shp->sh_eof); 954 (void) printf("\t\trecords_lost = %llu\n", 955 (u_longlong_t)shp->sh_records_lost); 956 } 957 958 static void 959 zdb_nicenum(uint64_t num, char *buf, size_t buflen) 960 { 961 if (dump_opt['P']) 962 (void) snprintf(buf, buflen, "%llu", (longlong_t)num); 963 else 964 nicenum(num, buf, sizeof (buf)); 965 } 966 967 static const char histo_stars[] = "****************************************"; 968 static const uint64_t histo_width = sizeof (histo_stars) - 1; 969 970 static void 971 dump_histogram(const uint64_t *histo, int size, int offset) 972 { 973 int i; 974 int minidx = size - 1; 975 int maxidx = 0; 976 uint64_t max = 0; 977 978 for (i = 0; i < size; i++) { 979 if (histo[i] > max) 980 max = histo[i]; 981 if (histo[i] > 0 && i > maxidx) 982 maxidx = i; 983 if (histo[i] > 0 && i < minidx) 984 minidx = i; 985 } 986 987 if (max < histo_width) 988 max = histo_width; 989 990 for (i = minidx; i <= maxidx; i++) { 991 (void) printf("\t\t\t%3u: %6llu %s\n", 992 i + offset, (u_longlong_t)histo[i], 993 &histo_stars[(max - histo[i]) * histo_width / max]); 994 } 995 } 996 997 static void 998 dump_zap_stats(objset_t *os, uint64_t object) 999 { 1000 int error; 1001 zap_stats_t zs; 1002 1003 error = zap_get_stats(os, object, &zs); 1004 if (error) 1005 return; 1006 1007 if (zs.zs_ptrtbl_len == 0) { 1008 ASSERT(zs.zs_num_blocks == 1); 1009 (void) printf("\tmicrozap: %llu bytes, %llu entries\n", 1010 (u_longlong_t)zs.zs_blocksize, 1011 (u_longlong_t)zs.zs_num_entries); 1012 return; 1013 } 1014 1015 (void) printf("\tFat ZAP stats:\n"); 1016 1017 (void) printf("\t\tPointer table:\n"); 1018 (void) printf("\t\t\t%llu elements\n", 1019 (u_longlong_t)zs.zs_ptrtbl_len); 1020 (void) printf("\t\t\tzt_blk: %llu\n", 1021 (u_longlong_t)zs.zs_ptrtbl_zt_blk); 1022 (void) printf("\t\t\tzt_numblks: %llu\n", 1023 (u_longlong_t)zs.zs_ptrtbl_zt_numblks); 1024 (void) printf("\t\t\tzt_shift: %llu\n", 1025 (u_longlong_t)zs.zs_ptrtbl_zt_shift); 1026 (void) printf("\t\t\tzt_blks_copied: %llu\n", 1027 (u_longlong_t)zs.zs_ptrtbl_blks_copied); 1028 (void) printf("\t\t\tzt_nextblk: %llu\n", 1029 (u_longlong_t)zs.zs_ptrtbl_nextblk); 1030 1031 (void) printf("\t\tZAP entries: %llu\n", 1032 (u_longlong_t)zs.zs_num_entries); 1033 (void) printf("\t\tLeaf blocks: %llu\n", 1034 (u_longlong_t)zs.zs_num_leafs); 1035 (void) printf("\t\tTotal blocks: %llu\n", 1036 (u_longlong_t)zs.zs_num_blocks); 1037 (void) printf("\t\tzap_block_type: 0x%llx\n", 1038 (u_longlong_t)zs.zs_block_type); 1039 (void) printf("\t\tzap_magic: 0x%llx\n", 1040 (u_longlong_t)zs.zs_magic); 1041 (void) printf("\t\tzap_salt: 0x%llx\n", 1042 (u_longlong_t)zs.zs_salt); 1043 1044 (void) printf("\t\tLeafs with 2^n pointers:\n"); 1045 dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0); 1046 1047 (void) printf("\t\tBlocks with n*5 entries:\n"); 1048 dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0); 1049 1050 (void) printf("\t\tBlocks n/10 full:\n"); 1051 dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0); 1052 1053 (void) printf("\t\tEntries with n chunks:\n"); 1054 dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0); 1055 1056 (void) printf("\t\tBuckets with n entries:\n"); 1057 dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0); 1058 } 1059 1060 /*ARGSUSED*/ 1061 static void 1062 dump_none(objset_t *os, uint64_t object, void *data, size_t size) 1063 { 1064 } 1065 1066 /*ARGSUSED*/ 1067 static void 1068 dump_unknown(objset_t *os, uint64_t object, void *data, size_t size) 1069 { 1070 (void) printf("\tUNKNOWN OBJECT TYPE\n"); 1071 } 1072 1073 /*ARGSUSED*/ 1074 static void 1075 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size) 1076 { 1077 } 1078 1079 /*ARGSUSED*/ 1080 static void 1081 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size) 1082 { 1083 uint64_t *arr; 1084 uint64_t oursize; 1085 if (dump_opt['d'] < 6) 1086 return; 1087 1088 if (data == NULL) { 1089 dmu_object_info_t doi; 1090 1091 VERIFY0(dmu_object_info(os, object, &doi)); 1092 size = doi.doi_max_offset; 1093 /* 1094 * We cap the size at 1 mebibyte here to prevent 1095 * allocation failures and nigh-infinite printing if the 1096 * object is extremely large. 1097 */ 1098 oursize = MIN(size, 1 << 20); 1099 arr = kmem_alloc(oursize, KM_SLEEP); 1100 1101 int err = dmu_read(os, object, 0, oursize, arr, 0); 1102 if (err != 0) { 1103 (void) printf("got error %u from dmu_read\n", err); 1104 kmem_free(arr, oursize); 1105 return; 1106 } 1107 } else { 1108 /* 1109 * Even though the allocation is already done in this code path, 1110 * we still cap the size to prevent excessive printing. 1111 */ 1112 oursize = MIN(size, 1 << 20); 1113 arr = data; 1114 } 1115 1116 if (size == 0) { 1117 (void) printf("\t\t[]\n"); 1118 return; 1119 } 1120 1121 (void) printf("\t\t[%0llx", (u_longlong_t)arr[0]); 1122 for (size_t i = 1; i * sizeof (uint64_t) < oursize; i++) { 1123 if (i % 4 != 0) 1124 (void) printf(", %0llx", (u_longlong_t)arr[i]); 1125 else 1126 (void) printf(",\n\t\t%0llx", (u_longlong_t)arr[i]); 1127 } 1128 if (oursize != size) 1129 (void) printf(", ... "); 1130 (void) printf("]\n"); 1131 1132 if (data == NULL) 1133 kmem_free(arr, oursize); 1134 } 1135 1136 /*ARGSUSED*/ 1137 static void 1138 dump_zap(objset_t *os, uint64_t object, void *data, size_t size) 1139 { 1140 zap_cursor_t zc; 1141 zap_attribute_t attr; 1142 void *prop; 1143 unsigned i; 1144 1145 dump_zap_stats(os, object); 1146 (void) printf("\n"); 1147 1148 for (zap_cursor_init(&zc, os, object); 1149 zap_cursor_retrieve(&zc, &attr) == 0; 1150 zap_cursor_advance(&zc)) { 1151 (void) printf("\t\t%s = ", attr.za_name); 1152 if (attr.za_num_integers == 0) { 1153 (void) printf("\n"); 1154 continue; 1155 } 1156 prop = umem_zalloc(attr.za_num_integers * 1157 attr.za_integer_length, UMEM_NOFAIL); 1158 (void) zap_lookup(os, object, attr.za_name, 1159 attr.za_integer_length, attr.za_num_integers, prop); 1160 if (attr.za_integer_length == 1) { 1161 if (strcmp(attr.za_name, 1162 DSL_CRYPTO_KEY_MASTER_KEY) == 0 || 1163 strcmp(attr.za_name, 1164 DSL_CRYPTO_KEY_HMAC_KEY) == 0 || 1165 strcmp(attr.za_name, DSL_CRYPTO_KEY_IV) == 0 || 1166 strcmp(attr.za_name, DSL_CRYPTO_KEY_MAC) == 0 || 1167 strcmp(attr.za_name, DMU_POOL_CHECKSUM_SALT) == 0) { 1168 uint8_t *u8 = prop; 1169 1170 for (i = 0; i < attr.za_num_integers; i++) { 1171 (void) printf("%02x", u8[i]); 1172 } 1173 } else { 1174 (void) printf("%s", (char *)prop); 1175 } 1176 } else { 1177 for (i = 0; i < attr.za_num_integers; i++) { 1178 switch (attr.za_integer_length) { 1179 case 2: 1180 (void) printf("%u ", 1181 ((uint16_t *)prop)[i]); 1182 break; 1183 case 4: 1184 (void) printf("%u ", 1185 ((uint32_t *)prop)[i]); 1186 break; 1187 case 8: 1188 (void) printf("%lld ", 1189 (u_longlong_t)((int64_t *)prop)[i]); 1190 break; 1191 } 1192 } 1193 } 1194 (void) printf("\n"); 1195 umem_free(prop, attr.za_num_integers * attr.za_integer_length); 1196 } 1197 zap_cursor_fini(&zc); 1198 } 1199 1200 static void 1201 dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size) 1202 { 1203 bpobj_phys_t *bpop = data; 1204 uint64_t i; 1205 char bytes[32], comp[32], uncomp[32]; 1206 1207 /* make sure the output won't get truncated */ 1208 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); 1209 CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ); 1210 CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ); 1211 1212 if (bpop == NULL) 1213 return; 1214 1215 zdb_nicenum(bpop->bpo_bytes, bytes, sizeof (bytes)); 1216 zdb_nicenum(bpop->bpo_comp, comp, sizeof (comp)); 1217 zdb_nicenum(bpop->bpo_uncomp, uncomp, sizeof (uncomp)); 1218 1219 (void) printf("\t\tnum_blkptrs = %llu\n", 1220 (u_longlong_t)bpop->bpo_num_blkptrs); 1221 (void) printf("\t\tbytes = %s\n", bytes); 1222 if (size >= BPOBJ_SIZE_V1) { 1223 (void) printf("\t\tcomp = %s\n", comp); 1224 (void) printf("\t\tuncomp = %s\n", uncomp); 1225 } 1226 if (size >= BPOBJ_SIZE_V2) { 1227 (void) printf("\t\tsubobjs = %llu\n", 1228 (u_longlong_t)bpop->bpo_subobjs); 1229 (void) printf("\t\tnum_subobjs = %llu\n", 1230 (u_longlong_t)bpop->bpo_num_subobjs); 1231 } 1232 if (size >= sizeof (*bpop)) { 1233 (void) printf("\t\tnum_freed = %llu\n", 1234 (u_longlong_t)bpop->bpo_num_freed); 1235 } 1236 1237 if (dump_opt['d'] < 5) 1238 return; 1239 1240 for (i = 0; i < bpop->bpo_num_blkptrs; i++) { 1241 char blkbuf[BP_SPRINTF_LEN]; 1242 blkptr_t bp; 1243 1244 int err = dmu_read(os, object, 1245 i * sizeof (bp), sizeof (bp), &bp, 0); 1246 if (err != 0) { 1247 (void) printf("got error %u from dmu_read\n", err); 1248 break; 1249 } 1250 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp, 1251 BP_GET_FREE(&bp)); 1252 (void) printf("\t%s\n", blkbuf); 1253 } 1254 } 1255 1256 /* ARGSUSED */ 1257 static void 1258 dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size) 1259 { 1260 dmu_object_info_t doi; 1261 int64_t i; 1262 1263 VERIFY0(dmu_object_info(os, object, &doi)); 1264 uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP); 1265 1266 int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0); 1267 if (err != 0) { 1268 (void) printf("got error %u from dmu_read\n", err); 1269 kmem_free(subobjs, doi.doi_max_offset); 1270 return; 1271 } 1272 1273 int64_t last_nonzero = -1; 1274 for (i = 0; i < doi.doi_max_offset / 8; i++) { 1275 if (subobjs[i] != 0) 1276 last_nonzero = i; 1277 } 1278 1279 for (i = 0; i <= last_nonzero; i++) { 1280 (void) printf("\t%llu\n", (u_longlong_t)subobjs[i]); 1281 } 1282 kmem_free(subobjs, doi.doi_max_offset); 1283 } 1284 1285 /*ARGSUSED*/ 1286 static void 1287 dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size) 1288 { 1289 dump_zap_stats(os, object); 1290 /* contents are printed elsewhere, properly decoded */ 1291 } 1292 1293 /*ARGSUSED*/ 1294 static void 1295 dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size) 1296 { 1297 zap_cursor_t zc; 1298 zap_attribute_t attr; 1299 1300 dump_zap_stats(os, object); 1301 (void) printf("\n"); 1302 1303 for (zap_cursor_init(&zc, os, object); 1304 zap_cursor_retrieve(&zc, &attr) == 0; 1305 zap_cursor_advance(&zc)) { 1306 (void) printf("\t\t%s = ", attr.za_name); 1307 if (attr.za_num_integers == 0) { 1308 (void) printf("\n"); 1309 continue; 1310 } 1311 (void) printf(" %llx : [%d:%d:%d]\n", 1312 (u_longlong_t)attr.za_first_integer, 1313 (int)ATTR_LENGTH(attr.za_first_integer), 1314 (int)ATTR_BSWAP(attr.za_first_integer), 1315 (int)ATTR_NUM(attr.za_first_integer)); 1316 } 1317 zap_cursor_fini(&zc); 1318 } 1319 1320 /*ARGSUSED*/ 1321 static void 1322 dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size) 1323 { 1324 zap_cursor_t zc; 1325 zap_attribute_t attr; 1326 uint16_t *layout_attrs; 1327 unsigned i; 1328 1329 dump_zap_stats(os, object); 1330 (void) printf("\n"); 1331 1332 for (zap_cursor_init(&zc, os, object); 1333 zap_cursor_retrieve(&zc, &attr) == 0; 1334 zap_cursor_advance(&zc)) { 1335 (void) printf("\t\t%s = [", attr.za_name); 1336 if (attr.za_num_integers == 0) { 1337 (void) printf("\n"); 1338 continue; 1339 } 1340 1341 VERIFY(attr.za_integer_length == 2); 1342 layout_attrs = umem_zalloc(attr.za_num_integers * 1343 attr.za_integer_length, UMEM_NOFAIL); 1344 1345 VERIFY(zap_lookup(os, object, attr.za_name, 1346 attr.za_integer_length, 1347 attr.za_num_integers, layout_attrs) == 0); 1348 1349 for (i = 0; i != attr.za_num_integers; i++) 1350 (void) printf(" %d ", (int)layout_attrs[i]); 1351 (void) printf("]\n"); 1352 umem_free(layout_attrs, 1353 attr.za_num_integers * attr.za_integer_length); 1354 } 1355 zap_cursor_fini(&zc); 1356 } 1357 1358 /*ARGSUSED*/ 1359 static void 1360 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size) 1361 { 1362 zap_cursor_t zc; 1363 zap_attribute_t attr; 1364 const char *typenames[] = { 1365 /* 0 */ "not specified", 1366 /* 1 */ "FIFO", 1367 /* 2 */ "Character Device", 1368 /* 3 */ "3 (invalid)", 1369 /* 4 */ "Directory", 1370 /* 5 */ "5 (invalid)", 1371 /* 6 */ "Block Device", 1372 /* 7 */ "7 (invalid)", 1373 /* 8 */ "Regular File", 1374 /* 9 */ "9 (invalid)", 1375 /* 10 */ "Symbolic Link", 1376 /* 11 */ "11 (invalid)", 1377 /* 12 */ "Socket", 1378 /* 13 */ "Door", 1379 /* 14 */ "Event Port", 1380 /* 15 */ "15 (invalid)", 1381 }; 1382 1383 dump_zap_stats(os, object); 1384 (void) printf("\n"); 1385 1386 for (zap_cursor_init(&zc, os, object); 1387 zap_cursor_retrieve(&zc, &attr) == 0; 1388 zap_cursor_advance(&zc)) { 1389 (void) printf("\t\t%s = %lld (type: %s)\n", 1390 attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer), 1391 typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]); 1392 } 1393 zap_cursor_fini(&zc); 1394 } 1395 1396 static int 1397 get_dtl_refcount(vdev_t *vd) 1398 { 1399 int refcount = 0; 1400 1401 if (vd->vdev_ops->vdev_op_leaf) { 1402 space_map_t *sm = vd->vdev_dtl_sm; 1403 1404 if (sm != NULL && 1405 sm->sm_dbuf->db_size == sizeof (space_map_phys_t)) 1406 return (1); 1407 return (0); 1408 } 1409 1410 for (unsigned c = 0; c < vd->vdev_children; c++) 1411 refcount += get_dtl_refcount(vd->vdev_child[c]); 1412 return (refcount); 1413 } 1414 1415 static int 1416 get_metaslab_refcount(vdev_t *vd) 1417 { 1418 int refcount = 0; 1419 1420 if (vd->vdev_top == vd) { 1421 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 1422 space_map_t *sm = vd->vdev_ms[m]->ms_sm; 1423 1424 if (sm != NULL && 1425 sm->sm_dbuf->db_size == sizeof (space_map_phys_t)) 1426 refcount++; 1427 } 1428 } 1429 for (unsigned c = 0; c < vd->vdev_children; c++) 1430 refcount += get_metaslab_refcount(vd->vdev_child[c]); 1431 1432 return (refcount); 1433 } 1434 1435 static int 1436 get_obsolete_refcount(vdev_t *vd) 1437 { 1438 uint64_t obsolete_sm_object; 1439 int refcount = 0; 1440 1441 VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object)); 1442 if (vd->vdev_top == vd && obsolete_sm_object != 0) { 1443 dmu_object_info_t doi; 1444 VERIFY0(dmu_object_info(vd->vdev_spa->spa_meta_objset, 1445 obsolete_sm_object, &doi)); 1446 if (doi.doi_bonus_size == sizeof (space_map_phys_t)) { 1447 refcount++; 1448 } 1449 } else { 1450 ASSERT3P(vd->vdev_obsolete_sm, ==, NULL); 1451 ASSERT3U(obsolete_sm_object, ==, 0); 1452 } 1453 for (unsigned c = 0; c < vd->vdev_children; c++) { 1454 refcount += get_obsolete_refcount(vd->vdev_child[c]); 1455 } 1456 1457 return (refcount); 1458 } 1459 1460 static int 1461 get_prev_obsolete_spacemap_refcount(spa_t *spa) 1462 { 1463 uint64_t prev_obj = 1464 spa->spa_condensing_indirect_phys.scip_prev_obsolete_sm_object; 1465 if (prev_obj != 0) { 1466 dmu_object_info_t doi; 1467 VERIFY0(dmu_object_info(spa->spa_meta_objset, prev_obj, &doi)); 1468 if (doi.doi_bonus_size == sizeof (space_map_phys_t)) { 1469 return (1); 1470 } 1471 } 1472 return (0); 1473 } 1474 1475 static int 1476 get_checkpoint_refcount(vdev_t *vd) 1477 { 1478 int refcount = 0; 1479 1480 if (vd->vdev_top == vd && vd->vdev_top_zap != 0 && 1481 zap_contains(spa_meta_objset(vd->vdev_spa), 1482 vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) == 0) 1483 refcount++; 1484 1485 for (uint64_t c = 0; c < vd->vdev_children; c++) 1486 refcount += get_checkpoint_refcount(vd->vdev_child[c]); 1487 1488 return (refcount); 1489 } 1490 1491 static int 1492 get_log_spacemap_refcount(spa_t *spa) 1493 { 1494 return (avl_numnodes(&spa->spa_sm_logs_by_txg)); 1495 } 1496 1497 static int 1498 verify_spacemap_refcounts(spa_t *spa) 1499 { 1500 uint64_t expected_refcount = 0; 1501 uint64_t actual_refcount; 1502 1503 (void) feature_get_refcount(spa, 1504 &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM], 1505 &expected_refcount); 1506 actual_refcount = get_dtl_refcount(spa->spa_root_vdev); 1507 actual_refcount += get_metaslab_refcount(spa->spa_root_vdev); 1508 actual_refcount += get_obsolete_refcount(spa->spa_root_vdev); 1509 actual_refcount += get_prev_obsolete_spacemap_refcount(spa); 1510 actual_refcount += get_checkpoint_refcount(spa->spa_root_vdev); 1511 actual_refcount += get_log_spacemap_refcount(spa); 1512 1513 if (expected_refcount != actual_refcount) { 1514 (void) printf("space map refcount mismatch: expected %lld != " 1515 "actual %lld\n", 1516 (longlong_t)expected_refcount, 1517 (longlong_t)actual_refcount); 1518 return (2); 1519 } 1520 return (0); 1521 } 1522 1523 static void 1524 dump_spacemap(objset_t *os, space_map_t *sm) 1525 { 1526 const char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID", 1527 "INVALID", "INVALID", "INVALID", "INVALID" }; 1528 1529 if (sm == NULL) 1530 return; 1531 1532 (void) printf("space map object %llu:\n", 1533 (longlong_t)sm->sm_object); 1534 (void) printf(" smp_length = 0x%llx\n", 1535 (longlong_t)sm->sm_phys->smp_length); 1536 (void) printf(" smp_alloc = 0x%llx\n", 1537 (longlong_t)sm->sm_phys->smp_alloc); 1538 1539 if (dump_opt['d'] < 6 && dump_opt['m'] < 4) 1540 return; 1541 1542 /* 1543 * Print out the freelist entries in both encoded and decoded form. 1544 */ 1545 uint8_t mapshift = sm->sm_shift; 1546 int64_t alloc = 0; 1547 uint64_t word, entry_id = 0; 1548 for (uint64_t offset = 0; offset < space_map_length(sm); 1549 offset += sizeof (word)) { 1550 1551 VERIFY0(dmu_read(os, space_map_object(sm), offset, 1552 sizeof (word), &word, DMU_READ_PREFETCH)); 1553 1554 if (sm_entry_is_debug(word)) { 1555 uint64_t de_txg = SM_DEBUG_TXG_DECODE(word); 1556 uint64_t de_sync_pass = SM_DEBUG_SYNCPASS_DECODE(word); 1557 if (de_txg == 0) { 1558 (void) printf( 1559 "\t [%6llu] PADDING\n", 1560 (u_longlong_t)entry_id); 1561 } else { 1562 (void) printf( 1563 "\t [%6llu] %s: txg %llu pass %llu\n", 1564 (u_longlong_t)entry_id, 1565 ddata[SM_DEBUG_ACTION_DECODE(word)], 1566 (u_longlong_t)de_txg, 1567 (u_longlong_t)de_sync_pass); 1568 } 1569 entry_id++; 1570 continue; 1571 } 1572 1573 uint8_t words; 1574 char entry_type; 1575 uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID; 1576 1577 if (sm_entry_is_single_word(word)) { 1578 entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ? 1579 'A' : 'F'; 1580 entry_off = (SM_OFFSET_DECODE(word) << mapshift) + 1581 sm->sm_start; 1582 entry_run = SM_RUN_DECODE(word) << mapshift; 1583 words = 1; 1584 } else { 1585 /* it is a two-word entry so we read another word */ 1586 ASSERT(sm_entry_is_double_word(word)); 1587 1588 uint64_t extra_word; 1589 offset += sizeof (extra_word); 1590 VERIFY0(dmu_read(os, space_map_object(sm), offset, 1591 sizeof (extra_word), &extra_word, 1592 DMU_READ_PREFETCH)); 1593 1594 ASSERT3U(offset, <=, space_map_length(sm)); 1595 1596 entry_run = SM2_RUN_DECODE(word) << mapshift; 1597 entry_vdev = SM2_VDEV_DECODE(word); 1598 entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ? 1599 'A' : 'F'; 1600 entry_off = (SM2_OFFSET_DECODE(extra_word) << 1601 mapshift) + sm->sm_start; 1602 words = 2; 1603 } 1604 1605 (void) printf("\t [%6llu] %c range:" 1606 " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n", 1607 (u_longlong_t)entry_id, 1608 entry_type, (u_longlong_t)entry_off, 1609 (u_longlong_t)(entry_off + entry_run), 1610 (u_longlong_t)entry_run, 1611 (u_longlong_t)entry_vdev, words); 1612 1613 if (entry_type == 'A') 1614 alloc += entry_run; 1615 else 1616 alloc -= entry_run; 1617 entry_id++; 1618 } 1619 if (alloc != space_map_allocated(sm)) { 1620 (void) printf("space_map_object alloc (%lld) INCONSISTENT " 1621 "with space map summary (%lld)\n", 1622 (longlong_t)space_map_allocated(sm), (longlong_t)alloc); 1623 } 1624 } 1625 1626 static void 1627 dump_metaslab_stats(metaslab_t *msp) 1628 { 1629 char maxbuf[32]; 1630 range_tree_t *rt = msp->ms_allocatable; 1631 zfs_btree_t *t = &msp->ms_allocatable_by_size; 1632 int free_pct = range_tree_space(rt) * 100 / msp->ms_size; 1633 1634 /* max sure nicenum has enough space */ 1635 CTASSERT(sizeof (maxbuf) >= NN_NUMBUF_SZ); 1636 1637 zdb_nicenum(metaslab_largest_allocatable(msp), maxbuf, sizeof (maxbuf)); 1638 1639 (void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n", 1640 "segments", zfs_btree_numnodes(t), "maxsize", maxbuf, 1641 "freepct", free_pct); 1642 (void) printf("\tIn-memory histogram:\n"); 1643 dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 1644 } 1645 1646 static void 1647 dump_metaslab(metaslab_t *msp) 1648 { 1649 vdev_t *vd = msp->ms_group->mg_vd; 1650 spa_t *spa = vd->vdev_spa; 1651 space_map_t *sm = msp->ms_sm; 1652 char freebuf[32]; 1653 1654 zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf, 1655 sizeof (freebuf)); 1656 1657 (void) printf( 1658 "\tmetaslab %6llu offset %12llx spacemap %6llu free %5s\n", 1659 (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start, 1660 (u_longlong_t)space_map_object(sm), freebuf); 1661 1662 if (dump_opt['m'] > 2 && !dump_opt['L']) { 1663 mutex_enter(&msp->ms_lock); 1664 VERIFY0(metaslab_load(msp)); 1665 range_tree_stat_verify(msp->ms_allocatable); 1666 dump_metaslab_stats(msp); 1667 metaslab_unload(msp); 1668 mutex_exit(&msp->ms_lock); 1669 } 1670 1671 if (dump_opt['m'] > 1 && sm != NULL && 1672 spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) { 1673 /* 1674 * The space map histogram represents free space in chunks 1675 * of sm_shift (i.e. bucket 0 refers to 2^sm_shift). 1676 */ 1677 (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n", 1678 (u_longlong_t)msp->ms_fragmentation); 1679 dump_histogram(sm->sm_phys->smp_histogram, 1680 SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift); 1681 } 1682 1683 if (vd->vdev_ops == &vdev_draid_ops) 1684 ASSERT3U(msp->ms_size, <=, 1ULL << vd->vdev_ms_shift); 1685 else 1686 ASSERT3U(msp->ms_size, ==, 1ULL << vd->vdev_ms_shift); 1687 1688 dump_spacemap(spa->spa_meta_objset, msp->ms_sm); 1689 1690 if (spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) { 1691 (void) printf("\tFlush data:\n\tunflushed txg=%llu\n\n", 1692 (u_longlong_t)metaslab_unflushed_txg(msp)); 1693 } 1694 } 1695 1696 static void 1697 print_vdev_metaslab_header(vdev_t *vd) 1698 { 1699 vdev_alloc_bias_t alloc_bias = vd->vdev_alloc_bias; 1700 const char *bias_str = ""; 1701 if (alloc_bias == VDEV_BIAS_LOG || vd->vdev_islog) { 1702 bias_str = VDEV_ALLOC_BIAS_LOG; 1703 } else if (alloc_bias == VDEV_BIAS_SPECIAL) { 1704 bias_str = VDEV_ALLOC_BIAS_SPECIAL; 1705 } else if (alloc_bias == VDEV_BIAS_DEDUP) { 1706 bias_str = VDEV_ALLOC_BIAS_DEDUP; 1707 } 1708 1709 uint64_t ms_flush_data_obj = 0; 1710 if (vd->vdev_top_zap != 0) { 1711 int error = zap_lookup(spa_meta_objset(vd->vdev_spa), 1712 vd->vdev_top_zap, VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS, 1713 sizeof (uint64_t), 1, &ms_flush_data_obj); 1714 if (error != ENOENT) { 1715 ASSERT0(error); 1716 } 1717 } 1718 1719 (void) printf("\tvdev %10llu %s", 1720 (u_longlong_t)vd->vdev_id, bias_str); 1721 1722 if (ms_flush_data_obj != 0) { 1723 (void) printf(" ms_unflushed_phys object %llu", 1724 (u_longlong_t)ms_flush_data_obj); 1725 } 1726 1727 (void) printf("\n\t%-10s%5llu %-19s %-15s %-12s\n", 1728 "metaslabs", (u_longlong_t)vd->vdev_ms_count, 1729 "offset", "spacemap", "free"); 1730 (void) printf("\t%15s %19s %15s %12s\n", 1731 "---------------", "-------------------", 1732 "---------------", "------------"); 1733 } 1734 1735 static void 1736 dump_metaslab_groups(spa_t *spa) 1737 { 1738 vdev_t *rvd = spa->spa_root_vdev; 1739 metaslab_class_t *mc = spa_normal_class(spa); 1740 uint64_t fragmentation; 1741 1742 metaslab_class_histogram_verify(mc); 1743 1744 for (unsigned c = 0; c < rvd->vdev_children; c++) { 1745 vdev_t *tvd = rvd->vdev_child[c]; 1746 metaslab_group_t *mg = tvd->vdev_mg; 1747 1748 if (mg == NULL || mg->mg_class != mc) 1749 continue; 1750 1751 metaslab_group_histogram_verify(mg); 1752 mg->mg_fragmentation = metaslab_group_fragmentation(mg); 1753 1754 (void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t" 1755 "fragmentation", 1756 (u_longlong_t)tvd->vdev_id, 1757 (u_longlong_t)tvd->vdev_ms_count); 1758 if (mg->mg_fragmentation == ZFS_FRAG_INVALID) { 1759 (void) printf("%3s\n", "-"); 1760 } else { 1761 (void) printf("%3llu%%\n", 1762 (u_longlong_t)mg->mg_fragmentation); 1763 } 1764 dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 1765 } 1766 1767 (void) printf("\tpool %s\tfragmentation", spa_name(spa)); 1768 fragmentation = metaslab_class_fragmentation(mc); 1769 if (fragmentation == ZFS_FRAG_INVALID) 1770 (void) printf("\t%3s\n", "-"); 1771 else 1772 (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation); 1773 dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 1774 } 1775 1776 static void 1777 print_vdev_indirect(vdev_t *vd) 1778 { 1779 vdev_indirect_config_t *vic = &vd->vdev_indirect_config; 1780 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 1781 vdev_indirect_births_t *vib = vd->vdev_indirect_births; 1782 1783 if (vim == NULL) { 1784 ASSERT3P(vib, ==, NULL); 1785 return; 1786 } 1787 1788 ASSERT3U(vdev_indirect_mapping_object(vim), ==, 1789 vic->vic_mapping_object); 1790 ASSERT3U(vdev_indirect_births_object(vib), ==, 1791 vic->vic_births_object); 1792 1793 (void) printf("indirect births obj %llu:\n", 1794 (longlong_t)vic->vic_births_object); 1795 (void) printf(" vib_count = %llu\n", 1796 (longlong_t)vdev_indirect_births_count(vib)); 1797 for (uint64_t i = 0; i < vdev_indirect_births_count(vib); i++) { 1798 vdev_indirect_birth_entry_phys_t *cur_vibe = 1799 &vib->vib_entries[i]; 1800 (void) printf("\toffset %llx -> txg %llu\n", 1801 (longlong_t)cur_vibe->vibe_offset, 1802 (longlong_t)cur_vibe->vibe_phys_birth_txg); 1803 } 1804 (void) printf("\n"); 1805 1806 (void) printf("indirect mapping obj %llu:\n", 1807 (longlong_t)vic->vic_mapping_object); 1808 (void) printf(" vim_max_offset = 0x%llx\n", 1809 (longlong_t)vdev_indirect_mapping_max_offset(vim)); 1810 (void) printf(" vim_bytes_mapped = 0x%llx\n", 1811 (longlong_t)vdev_indirect_mapping_bytes_mapped(vim)); 1812 (void) printf(" vim_count = %llu\n", 1813 (longlong_t)vdev_indirect_mapping_num_entries(vim)); 1814 1815 if (dump_opt['d'] <= 5 && dump_opt['m'] <= 3) 1816 return; 1817 1818 uint32_t *counts = vdev_indirect_mapping_load_obsolete_counts(vim); 1819 1820 for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) { 1821 vdev_indirect_mapping_entry_phys_t *vimep = 1822 &vim->vim_entries[i]; 1823 (void) printf("\t<%llx:%llx:%llx> -> " 1824 "<%llx:%llx:%llx> (%x obsolete)\n", 1825 (longlong_t)vd->vdev_id, 1826 (longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep), 1827 (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), 1828 (longlong_t)DVA_GET_VDEV(&vimep->vimep_dst), 1829 (longlong_t)DVA_GET_OFFSET(&vimep->vimep_dst), 1830 (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), 1831 counts[i]); 1832 } 1833 (void) printf("\n"); 1834 1835 uint64_t obsolete_sm_object; 1836 VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object)); 1837 if (obsolete_sm_object != 0) { 1838 objset_t *mos = vd->vdev_spa->spa_meta_objset; 1839 (void) printf("obsolete space map object %llu:\n", 1840 (u_longlong_t)obsolete_sm_object); 1841 ASSERT(vd->vdev_obsolete_sm != NULL); 1842 ASSERT3U(space_map_object(vd->vdev_obsolete_sm), ==, 1843 obsolete_sm_object); 1844 dump_spacemap(mos, vd->vdev_obsolete_sm); 1845 (void) printf("\n"); 1846 } 1847 } 1848 1849 static void 1850 dump_metaslabs(spa_t *spa) 1851 { 1852 vdev_t *vd, *rvd = spa->spa_root_vdev; 1853 uint64_t m, c = 0, children = rvd->vdev_children; 1854 1855 (void) printf("\nMetaslabs:\n"); 1856 1857 if (!dump_opt['d'] && zopt_metaslab_args > 0) { 1858 c = zopt_metaslab[0]; 1859 1860 if (c >= children) 1861 (void) fatal("bad vdev id: %llu", (u_longlong_t)c); 1862 1863 if (zopt_metaslab_args > 1) { 1864 vd = rvd->vdev_child[c]; 1865 print_vdev_metaslab_header(vd); 1866 1867 for (m = 1; m < zopt_metaslab_args; m++) { 1868 if (zopt_metaslab[m] < vd->vdev_ms_count) 1869 dump_metaslab( 1870 vd->vdev_ms[zopt_metaslab[m]]); 1871 else 1872 (void) fprintf(stderr, "bad metaslab " 1873 "number %llu\n", 1874 (u_longlong_t)zopt_metaslab[m]); 1875 } 1876 (void) printf("\n"); 1877 return; 1878 } 1879 children = c + 1; 1880 } 1881 for (; c < children; c++) { 1882 vd = rvd->vdev_child[c]; 1883 print_vdev_metaslab_header(vd); 1884 1885 print_vdev_indirect(vd); 1886 1887 for (m = 0; m < vd->vdev_ms_count; m++) 1888 dump_metaslab(vd->vdev_ms[m]); 1889 (void) printf("\n"); 1890 } 1891 } 1892 1893 static void 1894 dump_log_spacemaps(spa_t *spa) 1895 { 1896 if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) 1897 return; 1898 1899 (void) printf("\nLog Space Maps in Pool:\n"); 1900 for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg); 1901 sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) { 1902 space_map_t *sm = NULL; 1903 VERIFY0(space_map_open(&sm, spa_meta_objset(spa), 1904 sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT)); 1905 1906 (void) printf("Log Spacemap object %llu txg %llu\n", 1907 (u_longlong_t)sls->sls_sm_obj, (u_longlong_t)sls->sls_txg); 1908 dump_spacemap(spa->spa_meta_objset, sm); 1909 space_map_close(sm); 1910 } 1911 (void) printf("\n"); 1912 } 1913 1914 static void 1915 dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index) 1916 { 1917 const ddt_phys_t *ddp = dde->dde_phys; 1918 const ddt_key_t *ddk = &dde->dde_key; 1919 const char *types[4] = { "ditto", "single", "double", "triple" }; 1920 char blkbuf[BP_SPRINTF_LEN]; 1921 blkptr_t blk; 1922 int p; 1923 1924 for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 1925 if (ddp->ddp_phys_birth == 0) 1926 continue; 1927 ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); 1928 snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk); 1929 (void) printf("index %llx refcnt %llu %s %s\n", 1930 (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt, 1931 types[p], blkbuf); 1932 } 1933 } 1934 1935 static void 1936 dump_dedup_ratio(const ddt_stat_t *dds) 1937 { 1938 double rL, rP, rD, D, dedup, compress, copies; 1939 1940 if (dds->dds_blocks == 0) 1941 return; 1942 1943 rL = (double)dds->dds_ref_lsize; 1944 rP = (double)dds->dds_ref_psize; 1945 rD = (double)dds->dds_ref_dsize; 1946 D = (double)dds->dds_dsize; 1947 1948 dedup = rD / D; 1949 compress = rL / rP; 1950 copies = rD / rP; 1951 1952 (void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, " 1953 "dedup * compress / copies = %.2f\n\n", 1954 dedup, compress, copies, dedup * compress / copies); 1955 } 1956 1957 static void 1958 dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class) 1959 { 1960 char name[DDT_NAMELEN]; 1961 ddt_entry_t dde; 1962 uint64_t walk = 0; 1963 dmu_object_info_t doi; 1964 uint64_t count, dspace, mspace; 1965 int error; 1966 1967 error = ddt_object_info(ddt, type, class, &doi); 1968 1969 if (error == ENOENT) 1970 return; 1971 ASSERT(error == 0); 1972 1973 error = ddt_object_count(ddt, type, class, &count); 1974 ASSERT(error == 0); 1975 if (count == 0) 1976 return; 1977 1978 dspace = doi.doi_physical_blocks_512 << 9; 1979 mspace = doi.doi_fill_count * doi.doi_data_block_size; 1980 1981 ddt_object_name(ddt, type, class, name); 1982 1983 (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n", 1984 name, 1985 (u_longlong_t)count, 1986 (u_longlong_t)(dspace / count), 1987 (u_longlong_t)(mspace / count)); 1988 1989 if (dump_opt['D'] < 3) 1990 return; 1991 1992 zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]); 1993 1994 if (dump_opt['D'] < 4) 1995 return; 1996 1997 if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE) 1998 return; 1999 2000 (void) printf("%s contents:\n\n", name); 2001 2002 while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0) 2003 dump_dde(ddt, &dde, walk); 2004 2005 ASSERT3U(error, ==, ENOENT); 2006 2007 (void) printf("\n"); 2008 } 2009 2010 static void 2011 dump_all_ddts(spa_t *spa) 2012 { 2013 ddt_histogram_t ddh_total; 2014 ddt_stat_t dds_total; 2015 2016 bzero(&ddh_total, sizeof (ddh_total)); 2017 bzero(&dds_total, sizeof (dds_total)); 2018 2019 for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 2020 ddt_t *ddt = spa->spa_ddt[c]; 2021 for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 2022 for (enum ddt_class class = 0; class < DDT_CLASSES; 2023 class++) { 2024 dump_ddt(ddt, type, class); 2025 } 2026 } 2027 } 2028 2029 ddt_get_dedup_stats(spa, &dds_total); 2030 2031 if (dds_total.dds_blocks == 0) { 2032 (void) printf("All DDTs are empty\n"); 2033 return; 2034 } 2035 2036 (void) printf("\n"); 2037 2038 if (dump_opt['D'] > 1) { 2039 (void) printf("DDT histogram (aggregated over all DDTs):\n"); 2040 ddt_get_dedup_histogram(spa, &ddh_total); 2041 zpool_dump_ddt(&dds_total, &ddh_total); 2042 } 2043 2044 dump_dedup_ratio(&dds_total); 2045 } 2046 2047 static void 2048 dump_dtl_seg(void *arg, uint64_t start, uint64_t size) 2049 { 2050 char *prefix = arg; 2051 2052 (void) printf("%s [%llu,%llu) length %llu\n", 2053 prefix, 2054 (u_longlong_t)start, 2055 (u_longlong_t)(start + size), 2056 (u_longlong_t)(size)); 2057 } 2058 2059 static void 2060 dump_dtl(vdev_t *vd, int indent) 2061 { 2062 spa_t *spa = vd->vdev_spa; 2063 boolean_t required; 2064 const char *name[DTL_TYPES] = { "missing", "partial", "scrub", 2065 "outage" }; 2066 char prefix[256]; 2067 2068 spa_vdev_state_enter(spa, SCL_NONE); 2069 required = vdev_dtl_required(vd); 2070 (void) spa_vdev_state_exit(spa, NULL, 0); 2071 2072 if (indent == 0) 2073 (void) printf("\nDirty time logs:\n\n"); 2074 2075 (void) printf("\t%*s%s [%s]\n", indent, "", 2076 vd->vdev_path ? vd->vdev_path : 2077 vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa), 2078 required ? "DTL-required" : "DTL-expendable"); 2079 2080 for (int t = 0; t < DTL_TYPES; t++) { 2081 range_tree_t *rt = vd->vdev_dtl[t]; 2082 if (range_tree_space(rt) == 0) 2083 continue; 2084 (void) snprintf(prefix, sizeof (prefix), "\t%*s%s", 2085 indent + 2, "", name[t]); 2086 range_tree_walk(rt, dump_dtl_seg, prefix); 2087 if (dump_opt['d'] > 5 && vd->vdev_children == 0) 2088 dump_spacemap(spa->spa_meta_objset, 2089 vd->vdev_dtl_sm); 2090 } 2091 2092 for (unsigned c = 0; c < vd->vdev_children; c++) 2093 dump_dtl(vd->vdev_child[c], indent + 4); 2094 } 2095 2096 static void 2097 dump_history(spa_t *spa) 2098 { 2099 nvlist_t **events = NULL; 2100 char *buf; 2101 uint64_t resid, len, off = 0; 2102 uint_t num = 0; 2103 int error; 2104 char tbuf[30]; 2105 2106 if ((buf = malloc(SPA_OLD_MAXBLOCKSIZE)) == NULL) { 2107 (void) fprintf(stderr, "%s: unable to allocate I/O buffer\n", 2108 __func__); 2109 return; 2110 } 2111 2112 do { 2113 len = SPA_OLD_MAXBLOCKSIZE; 2114 2115 if ((error = spa_history_get(spa, &off, &len, buf)) != 0) { 2116 (void) fprintf(stderr, "Unable to read history: " 2117 "error %d\n", error); 2118 free(buf); 2119 return; 2120 } 2121 2122 if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0) 2123 break; 2124 2125 off -= resid; 2126 } while (len != 0); 2127 2128 (void) printf("\nHistory:\n"); 2129 for (unsigned i = 0; i < num; i++) { 2130 boolean_t printed = B_FALSE; 2131 2132 if (nvlist_exists(events[i], ZPOOL_HIST_TIME)) { 2133 time_t tsec; 2134 struct tm t; 2135 2136 tsec = fnvlist_lookup_uint64(events[i], 2137 ZPOOL_HIST_TIME); 2138 (void) localtime_r(&tsec, &t); 2139 (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); 2140 } else { 2141 tbuf[0] = '\0'; 2142 } 2143 2144 if (nvlist_exists(events[i], ZPOOL_HIST_CMD)) { 2145 (void) printf("%s %s\n", tbuf, 2146 fnvlist_lookup_string(events[i], ZPOOL_HIST_CMD)); 2147 } else if (nvlist_exists(events[i], ZPOOL_HIST_INT_EVENT)) { 2148 uint64_t ievent; 2149 2150 ievent = fnvlist_lookup_uint64(events[i], 2151 ZPOOL_HIST_INT_EVENT); 2152 if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) 2153 goto next; 2154 2155 (void) printf(" %s [internal %s txg:%ju] %s\n", 2156 tbuf, 2157 zfs_history_event_names[ievent], 2158 fnvlist_lookup_uint64(events[i], 2159 ZPOOL_HIST_TXG), 2160 fnvlist_lookup_string(events[i], 2161 ZPOOL_HIST_INT_STR)); 2162 } else if (nvlist_exists(events[i], ZPOOL_HIST_INT_NAME)) { 2163 (void) printf("%s [txg:%ju] %s", tbuf, 2164 fnvlist_lookup_uint64(events[i], 2165 ZPOOL_HIST_TXG), 2166 fnvlist_lookup_string(events[i], 2167 ZPOOL_HIST_INT_NAME)); 2168 2169 if (nvlist_exists(events[i], ZPOOL_HIST_DSNAME)) { 2170 (void) printf(" %s (%llu)", 2171 fnvlist_lookup_string(events[i], 2172 ZPOOL_HIST_DSNAME), 2173 (u_longlong_t)fnvlist_lookup_uint64( 2174 events[i], 2175 ZPOOL_HIST_DSID)); 2176 } 2177 2178 (void) printf(" %s\n", fnvlist_lookup_string(events[i], 2179 ZPOOL_HIST_INT_STR)); 2180 } else if (nvlist_exists(events[i], ZPOOL_HIST_IOCTL)) { 2181 (void) printf("%s ioctl %s\n", tbuf, 2182 fnvlist_lookup_string(events[i], 2183 ZPOOL_HIST_IOCTL)); 2184 2185 if (nvlist_exists(events[i], ZPOOL_HIST_INPUT_NVL)) { 2186 (void) printf(" input:\n"); 2187 dump_nvlist(fnvlist_lookup_nvlist(events[i], 2188 ZPOOL_HIST_INPUT_NVL), 8); 2189 } 2190 if (nvlist_exists(events[i], ZPOOL_HIST_OUTPUT_NVL)) { 2191 (void) printf(" output:\n"); 2192 dump_nvlist(fnvlist_lookup_nvlist(events[i], 2193 ZPOOL_HIST_OUTPUT_NVL), 8); 2194 } 2195 if (nvlist_exists(events[i], ZPOOL_HIST_ERRNO)) { 2196 (void) printf(" errno: %lld\n", 2197 (longlong_t)fnvlist_lookup_int64(events[i], 2198 ZPOOL_HIST_ERRNO)); 2199 } 2200 } else { 2201 goto next; 2202 } 2203 2204 printed = B_TRUE; 2205 next: 2206 if (dump_opt['h'] > 1) { 2207 if (!printed) 2208 (void) printf("unrecognized record:\n"); 2209 dump_nvlist(events[i], 2); 2210 } 2211 } 2212 free(buf); 2213 } 2214 2215 /*ARGSUSED*/ 2216 static void 2217 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size) 2218 { 2219 } 2220 2221 static uint64_t 2222 blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, 2223 const zbookmark_phys_t *zb) 2224 { 2225 if (dnp == NULL) { 2226 ASSERT(zb->zb_level < 0); 2227 if (zb->zb_object == 0) 2228 return (zb->zb_blkid); 2229 return (zb->zb_blkid * BP_GET_LSIZE(bp)); 2230 } 2231 2232 ASSERT(zb->zb_level >= 0); 2233 2234 return ((zb->zb_blkid << 2235 (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) * 2236 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); 2237 } 2238 2239 static void 2240 snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen, 2241 const blkptr_t *bp) 2242 { 2243 abd_t *pabd; 2244 void *buf; 2245 zio_t *zio; 2246 zfs_zstdhdr_t zstd_hdr; 2247 int error; 2248 2249 if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_ZSTD) 2250 return; 2251 2252 if (BP_IS_HOLE(bp)) 2253 return; 2254 2255 if (BP_IS_EMBEDDED(bp)) { 2256 buf = malloc(SPA_MAXBLOCKSIZE); 2257 if (buf == NULL) { 2258 (void) fprintf(stderr, "out of memory\n"); 2259 exit(1); 2260 } 2261 decode_embedded_bp_compressed(bp, buf); 2262 memcpy(&zstd_hdr, buf, sizeof (zstd_hdr)); 2263 free(buf); 2264 zstd_hdr.c_len = BE_32(zstd_hdr.c_len); 2265 zstd_hdr.raw_version_level = BE_32(zstd_hdr.raw_version_level); 2266 (void) snprintf(blkbuf + strlen(blkbuf), 2267 buflen - strlen(blkbuf), 2268 " ZSTD:size=%u:version=%u:level=%u:EMBEDDED", 2269 zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr), 2270 zfs_get_hdrlevel(&zstd_hdr)); 2271 return; 2272 } 2273 2274 pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE); 2275 zio = zio_root(spa, NULL, NULL, 0); 2276 2277 /* Decrypt but don't decompress so we can read the compression header */ 2278 zio_nowait(zio_read(zio, spa, bp, pabd, BP_GET_PSIZE(bp), NULL, NULL, 2279 ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW_COMPRESS, 2280 NULL)); 2281 error = zio_wait(zio); 2282 if (error) { 2283 (void) fprintf(stderr, "read failed: %d\n", error); 2284 return; 2285 } 2286 buf = abd_borrow_buf_copy(pabd, BP_GET_LSIZE(bp)); 2287 memcpy(&zstd_hdr, buf, sizeof (zstd_hdr)); 2288 zstd_hdr.c_len = BE_32(zstd_hdr.c_len); 2289 zstd_hdr.raw_version_level = BE_32(zstd_hdr.raw_version_level); 2290 2291 (void) snprintf(blkbuf + strlen(blkbuf), 2292 buflen - strlen(blkbuf), 2293 " ZSTD:size=%u:version=%u:level=%u:NORMAL", 2294 zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr), 2295 zfs_get_hdrlevel(&zstd_hdr)); 2296 2297 abd_return_buf_copy(pabd, buf, BP_GET_LSIZE(bp)); 2298 } 2299 2300 static void 2301 snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp, 2302 boolean_t bp_freed) 2303 { 2304 const dva_t *dva = bp->blk_dva; 2305 int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1; 2306 int i; 2307 2308 if (dump_opt['b'] >= 6) { 2309 snprintf_blkptr(blkbuf, buflen, bp); 2310 if (bp_freed) { 2311 (void) snprintf(blkbuf + strlen(blkbuf), 2312 buflen - strlen(blkbuf), " %s", "FREE"); 2313 } 2314 return; 2315 } 2316 2317 if (BP_IS_EMBEDDED(bp)) { 2318 (void) sprintf(blkbuf, 2319 "EMBEDDED et=%u %llxL/%llxP B=%llu", 2320 (int)BPE_GET_ETYPE(bp), 2321 (u_longlong_t)BPE_GET_LSIZE(bp), 2322 (u_longlong_t)BPE_GET_PSIZE(bp), 2323 (u_longlong_t)bp->blk_birth); 2324 return; 2325 } 2326 2327 blkbuf[0] = '\0'; 2328 2329 for (i = 0; i < ndvas; i++) 2330 (void) snprintf(blkbuf + strlen(blkbuf), 2331 buflen - strlen(blkbuf), "%llu:%llx:%llx ", 2332 (u_longlong_t)DVA_GET_VDEV(&dva[i]), 2333 (u_longlong_t)DVA_GET_OFFSET(&dva[i]), 2334 (u_longlong_t)DVA_GET_ASIZE(&dva[i])); 2335 2336 if (BP_IS_HOLE(bp)) { 2337 (void) snprintf(blkbuf + strlen(blkbuf), 2338 buflen - strlen(blkbuf), 2339 "%llxL B=%llu", 2340 (u_longlong_t)BP_GET_LSIZE(bp), 2341 (u_longlong_t)bp->blk_birth); 2342 } else { 2343 (void) snprintf(blkbuf + strlen(blkbuf), 2344 buflen - strlen(blkbuf), 2345 "%llxL/%llxP F=%llu B=%llu/%llu", 2346 (u_longlong_t)BP_GET_LSIZE(bp), 2347 (u_longlong_t)BP_GET_PSIZE(bp), 2348 (u_longlong_t)BP_GET_FILL(bp), 2349 (u_longlong_t)bp->blk_birth, 2350 (u_longlong_t)BP_PHYSICAL_BIRTH(bp)); 2351 if (bp_freed) 2352 (void) snprintf(blkbuf + strlen(blkbuf), 2353 buflen - strlen(blkbuf), " %s", "FREE"); 2354 (void) snprintf(blkbuf + strlen(blkbuf), 2355 buflen - strlen(blkbuf), " cksum=%llx:%llx:%llx:%llx", 2356 (u_longlong_t)bp->blk_cksum.zc_word[0], 2357 (u_longlong_t)bp->blk_cksum.zc_word[1], 2358 (u_longlong_t)bp->blk_cksum.zc_word[2], 2359 (u_longlong_t)bp->blk_cksum.zc_word[3]); 2360 } 2361 } 2362 2363 static void 2364 print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb, 2365 const dnode_phys_t *dnp) 2366 { 2367 char blkbuf[BP_SPRINTF_LEN]; 2368 int l; 2369 2370 if (!BP_IS_EMBEDDED(bp)) { 2371 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); 2372 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); 2373 } 2374 2375 (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb)); 2376 2377 ASSERT(zb->zb_level >= 0); 2378 2379 for (l = dnp->dn_nlevels - 1; l >= -1; l--) { 2380 if (l == zb->zb_level) { 2381 (void) printf("L%llx", (u_longlong_t)zb->zb_level); 2382 } else { 2383 (void) printf(" "); 2384 } 2385 } 2386 2387 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, B_FALSE); 2388 if (dump_opt['Z'] && BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD) 2389 snprintf_zstd_header(spa, blkbuf, sizeof (blkbuf), bp); 2390 (void) printf("%s\n", blkbuf); 2391 } 2392 2393 static int 2394 visit_indirect(spa_t *spa, const dnode_phys_t *dnp, 2395 blkptr_t *bp, const zbookmark_phys_t *zb) 2396 { 2397 int err = 0; 2398 2399 if (bp->blk_birth == 0) 2400 return (0); 2401 2402 print_indirect(spa, bp, zb, dnp); 2403 2404 if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) { 2405 arc_flags_t flags = ARC_FLAG_WAIT; 2406 int i; 2407 blkptr_t *cbp; 2408 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; 2409 arc_buf_t *buf; 2410 uint64_t fill = 0; 2411 ASSERT(!BP_IS_REDACTED(bp)); 2412 2413 err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf, 2414 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); 2415 if (err) 2416 return (err); 2417 ASSERT(buf->b_data); 2418 2419 /* recursively visit blocks below this */ 2420 cbp = buf->b_data; 2421 for (i = 0; i < epb; i++, cbp++) { 2422 zbookmark_phys_t czb; 2423 2424 SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, 2425 zb->zb_level - 1, 2426 zb->zb_blkid * epb + i); 2427 err = visit_indirect(spa, dnp, cbp, &czb); 2428 if (err) 2429 break; 2430 fill += BP_GET_FILL(cbp); 2431 } 2432 if (!err) 2433 ASSERT3U(fill, ==, BP_GET_FILL(bp)); 2434 arc_buf_destroy(buf, &buf); 2435 } 2436 2437 return (err); 2438 } 2439 2440 /*ARGSUSED*/ 2441 static void 2442 dump_indirect(dnode_t *dn) 2443 { 2444 dnode_phys_t *dnp = dn->dn_phys; 2445 int j; 2446 zbookmark_phys_t czb; 2447 2448 (void) printf("Indirect blocks:\n"); 2449 2450 SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset), 2451 dn->dn_object, dnp->dn_nlevels - 1, 0); 2452 for (j = 0; j < dnp->dn_nblkptr; j++) { 2453 czb.zb_blkid = j; 2454 (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp, 2455 &dnp->dn_blkptr[j], &czb); 2456 } 2457 2458 (void) printf("\n"); 2459 } 2460 2461 /*ARGSUSED*/ 2462 static void 2463 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size) 2464 { 2465 dsl_dir_phys_t *dd = data; 2466 time_t crtime; 2467 char nice[32]; 2468 2469 /* make sure nicenum has enough space */ 2470 CTASSERT(sizeof (nice) >= NN_NUMBUF_SZ); 2471 2472 if (dd == NULL) 2473 return; 2474 2475 ASSERT3U(size, >=, sizeof (dsl_dir_phys_t)); 2476 2477 crtime = dd->dd_creation_time; 2478 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 2479 (void) printf("\t\thead_dataset_obj = %llu\n", 2480 (u_longlong_t)dd->dd_head_dataset_obj); 2481 (void) printf("\t\tparent_dir_obj = %llu\n", 2482 (u_longlong_t)dd->dd_parent_obj); 2483 (void) printf("\t\torigin_obj = %llu\n", 2484 (u_longlong_t)dd->dd_origin_obj); 2485 (void) printf("\t\tchild_dir_zapobj = %llu\n", 2486 (u_longlong_t)dd->dd_child_dir_zapobj); 2487 zdb_nicenum(dd->dd_used_bytes, nice, sizeof (nice)); 2488 (void) printf("\t\tused_bytes = %s\n", nice); 2489 zdb_nicenum(dd->dd_compressed_bytes, nice, sizeof (nice)); 2490 (void) printf("\t\tcompressed_bytes = %s\n", nice); 2491 zdb_nicenum(dd->dd_uncompressed_bytes, nice, sizeof (nice)); 2492 (void) printf("\t\tuncompressed_bytes = %s\n", nice); 2493 zdb_nicenum(dd->dd_quota, nice, sizeof (nice)); 2494 (void) printf("\t\tquota = %s\n", nice); 2495 zdb_nicenum(dd->dd_reserved, nice, sizeof (nice)); 2496 (void) printf("\t\treserved = %s\n", nice); 2497 (void) printf("\t\tprops_zapobj = %llu\n", 2498 (u_longlong_t)dd->dd_props_zapobj); 2499 (void) printf("\t\tdeleg_zapobj = %llu\n", 2500 (u_longlong_t)dd->dd_deleg_zapobj); 2501 (void) printf("\t\tflags = %llx\n", 2502 (u_longlong_t)dd->dd_flags); 2503 2504 #define DO(which) \ 2505 zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice, \ 2506 sizeof (nice)); \ 2507 (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice) 2508 DO(HEAD); 2509 DO(SNAP); 2510 DO(CHILD); 2511 DO(CHILD_RSRV); 2512 DO(REFRSRV); 2513 #undef DO 2514 (void) printf("\t\tclones = %llu\n", 2515 (u_longlong_t)dd->dd_clones); 2516 } 2517 2518 /*ARGSUSED*/ 2519 static void 2520 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) 2521 { 2522 dsl_dataset_phys_t *ds = data; 2523 time_t crtime; 2524 char used[32], compressed[32], uncompressed[32], unique[32]; 2525 char blkbuf[BP_SPRINTF_LEN]; 2526 2527 /* make sure nicenum has enough space */ 2528 CTASSERT(sizeof (used) >= NN_NUMBUF_SZ); 2529 CTASSERT(sizeof (compressed) >= NN_NUMBUF_SZ); 2530 CTASSERT(sizeof (uncompressed) >= NN_NUMBUF_SZ); 2531 CTASSERT(sizeof (unique) >= NN_NUMBUF_SZ); 2532 2533 if (ds == NULL) 2534 return; 2535 2536 ASSERT(size == sizeof (*ds)); 2537 crtime = ds->ds_creation_time; 2538 zdb_nicenum(ds->ds_referenced_bytes, used, sizeof (used)); 2539 zdb_nicenum(ds->ds_compressed_bytes, compressed, sizeof (compressed)); 2540 zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed, 2541 sizeof (uncompressed)); 2542 zdb_nicenum(ds->ds_unique_bytes, unique, sizeof (unique)); 2543 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp); 2544 2545 (void) printf("\t\tdir_obj = %llu\n", 2546 (u_longlong_t)ds->ds_dir_obj); 2547 (void) printf("\t\tprev_snap_obj = %llu\n", 2548 (u_longlong_t)ds->ds_prev_snap_obj); 2549 (void) printf("\t\tprev_snap_txg = %llu\n", 2550 (u_longlong_t)ds->ds_prev_snap_txg); 2551 (void) printf("\t\tnext_snap_obj = %llu\n", 2552 (u_longlong_t)ds->ds_next_snap_obj); 2553 (void) printf("\t\tsnapnames_zapobj = %llu\n", 2554 (u_longlong_t)ds->ds_snapnames_zapobj); 2555 (void) printf("\t\tnum_children = %llu\n", 2556 (u_longlong_t)ds->ds_num_children); 2557 (void) printf("\t\tuserrefs_obj = %llu\n", 2558 (u_longlong_t)ds->ds_userrefs_obj); 2559 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 2560 (void) printf("\t\tcreation_txg = %llu\n", 2561 (u_longlong_t)ds->ds_creation_txg); 2562 (void) printf("\t\tdeadlist_obj = %llu\n", 2563 (u_longlong_t)ds->ds_deadlist_obj); 2564 (void) printf("\t\tused_bytes = %s\n", used); 2565 (void) printf("\t\tcompressed_bytes = %s\n", compressed); 2566 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); 2567 (void) printf("\t\tunique = %s\n", unique); 2568 (void) printf("\t\tfsid_guid = %llu\n", 2569 (u_longlong_t)ds->ds_fsid_guid); 2570 (void) printf("\t\tguid = %llu\n", 2571 (u_longlong_t)ds->ds_guid); 2572 (void) printf("\t\tflags = %llx\n", 2573 (u_longlong_t)ds->ds_flags); 2574 (void) printf("\t\tnext_clones_obj = %llu\n", 2575 (u_longlong_t)ds->ds_next_clones_obj); 2576 (void) printf("\t\tprops_obj = %llu\n", 2577 (u_longlong_t)ds->ds_props_obj); 2578 (void) printf("\t\tbp = %s\n", blkbuf); 2579 } 2580 2581 /* ARGSUSED */ 2582 static int 2583 dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 2584 { 2585 char blkbuf[BP_SPRINTF_LEN]; 2586 2587 if (bp->blk_birth != 0) { 2588 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 2589 (void) printf("\t%s\n", blkbuf); 2590 } 2591 return (0); 2592 } 2593 2594 static void 2595 dump_bptree(objset_t *os, uint64_t obj, const char *name) 2596 { 2597 char bytes[32]; 2598 bptree_phys_t *bt; 2599 dmu_buf_t *db; 2600 2601 /* make sure nicenum has enough space */ 2602 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); 2603 2604 if (dump_opt['d'] < 3) 2605 return; 2606 2607 VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); 2608 bt = db->db_data; 2609 zdb_nicenum(bt->bt_bytes, bytes, sizeof (bytes)); 2610 (void) printf("\n %s: %llu datasets, %s\n", 2611 name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes); 2612 dmu_buf_rele(db, FTAG); 2613 2614 if (dump_opt['d'] < 5) 2615 return; 2616 2617 (void) printf("\n"); 2618 2619 (void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL); 2620 } 2621 2622 /* ARGSUSED */ 2623 static int 2624 dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx) 2625 { 2626 char blkbuf[BP_SPRINTF_LEN]; 2627 2628 ASSERT(bp->blk_birth != 0); 2629 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, bp_freed); 2630 (void) printf("\t%s\n", blkbuf); 2631 return (0); 2632 } 2633 2634 static void 2635 dump_full_bpobj(bpobj_t *bpo, const char *name, int indent) 2636 { 2637 char bytes[32]; 2638 char comp[32]; 2639 char uncomp[32]; 2640 uint64_t i; 2641 2642 /* make sure nicenum has enough space */ 2643 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); 2644 CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ); 2645 CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ); 2646 2647 if (dump_opt['d'] < 3) 2648 return; 2649 2650 zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes, sizeof (bytes)); 2651 if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) { 2652 zdb_nicenum(bpo->bpo_phys->bpo_comp, comp, sizeof (comp)); 2653 zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp, sizeof (uncomp)); 2654 if (bpo->bpo_havefreed) { 2655 (void) printf(" %*s: object %llu, %llu local " 2656 "blkptrs, %llu freed, %llu subobjs in object %llu, " 2657 "%s (%s/%s comp)\n", 2658 indent * 8, name, 2659 (u_longlong_t)bpo->bpo_object, 2660 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, 2661 (u_longlong_t)bpo->bpo_phys->bpo_num_freed, 2662 (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs, 2663 (u_longlong_t)bpo->bpo_phys->bpo_subobjs, 2664 bytes, comp, uncomp); 2665 } else { 2666 (void) printf(" %*s: object %llu, %llu local " 2667 "blkptrs, %llu subobjs in object %llu, " 2668 "%s (%s/%s comp)\n", 2669 indent * 8, name, 2670 (u_longlong_t)bpo->bpo_object, 2671 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, 2672 (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs, 2673 (u_longlong_t)bpo->bpo_phys->bpo_subobjs, 2674 bytes, comp, uncomp); 2675 } 2676 2677 for (i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) { 2678 uint64_t subobj; 2679 bpobj_t subbpo; 2680 int error; 2681 VERIFY0(dmu_read(bpo->bpo_os, 2682 bpo->bpo_phys->bpo_subobjs, 2683 i * sizeof (subobj), sizeof (subobj), &subobj, 0)); 2684 error = bpobj_open(&subbpo, bpo->bpo_os, subobj); 2685 if (error != 0) { 2686 (void) printf("ERROR %u while trying to open " 2687 "subobj id %llu\n", 2688 error, (u_longlong_t)subobj); 2689 continue; 2690 } 2691 dump_full_bpobj(&subbpo, "subobj", indent + 1); 2692 bpobj_close(&subbpo); 2693 } 2694 } else { 2695 if (bpo->bpo_havefreed) { 2696 (void) printf(" %*s: object %llu, %llu blkptrs, " 2697 "%llu freed, %s\n", 2698 indent * 8, name, 2699 (u_longlong_t)bpo->bpo_object, 2700 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, 2701 (u_longlong_t)bpo->bpo_phys->bpo_num_freed, 2702 bytes); 2703 } else { 2704 (void) printf(" %*s: object %llu, %llu blkptrs, " 2705 "%s\n", 2706 indent * 8, name, 2707 (u_longlong_t)bpo->bpo_object, 2708 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, 2709 bytes); 2710 } 2711 } 2712 2713 if (dump_opt['d'] < 5) 2714 return; 2715 2716 2717 if (indent == 0) { 2718 (void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL); 2719 (void) printf("\n"); 2720 } 2721 } 2722 2723 static int 2724 dump_bookmark(dsl_pool_t *dp, char *name, boolean_t print_redact, 2725 boolean_t print_list) 2726 { 2727 int err = 0; 2728 zfs_bookmark_phys_t prop; 2729 objset_t *mos = dp->dp_spa->spa_meta_objset; 2730 err = dsl_bookmark_lookup(dp, name, NULL, &prop); 2731 2732 if (err != 0) { 2733 return (err); 2734 } 2735 2736 (void) printf("\t#%s: ", strchr(name, '#') + 1); 2737 (void) printf("{guid: %llx creation_txg: %llu creation_time: " 2738 "%llu redaction_obj: %llu}\n", (u_longlong_t)prop.zbm_guid, 2739 (u_longlong_t)prop.zbm_creation_txg, 2740 (u_longlong_t)prop.zbm_creation_time, 2741 (u_longlong_t)prop.zbm_redaction_obj); 2742 2743 IMPLY(print_list, print_redact); 2744 if (!print_redact || prop.zbm_redaction_obj == 0) 2745 return (0); 2746 2747 redaction_list_t *rl; 2748 VERIFY0(dsl_redaction_list_hold_obj(dp, 2749 prop.zbm_redaction_obj, FTAG, &rl)); 2750 2751 redaction_list_phys_t *rlp = rl->rl_phys; 2752 (void) printf("\tRedacted:\n\t\tProgress: "); 2753 if (rlp->rlp_last_object != UINT64_MAX || 2754 rlp->rlp_last_blkid != UINT64_MAX) { 2755 (void) printf("%llu %llu (incomplete)\n", 2756 (u_longlong_t)rlp->rlp_last_object, 2757 (u_longlong_t)rlp->rlp_last_blkid); 2758 } else { 2759 (void) printf("complete\n"); 2760 } 2761 (void) printf("\t\tSnapshots: ["); 2762 for (unsigned int i = 0; i < rlp->rlp_num_snaps; i++) { 2763 if (i > 0) 2764 (void) printf(", "); 2765 (void) printf("%0llu", 2766 (u_longlong_t)rlp->rlp_snaps[i]); 2767 } 2768 (void) printf("]\n\t\tLength: %llu\n", 2769 (u_longlong_t)rlp->rlp_num_entries); 2770 2771 if (!print_list) { 2772 dsl_redaction_list_rele(rl, FTAG); 2773 return (0); 2774 } 2775 2776 if (rlp->rlp_num_entries == 0) { 2777 dsl_redaction_list_rele(rl, FTAG); 2778 (void) printf("\t\tRedaction List: []\n\n"); 2779 return (0); 2780 } 2781 2782 redact_block_phys_t *rbp_buf; 2783 uint64_t size; 2784 dmu_object_info_t doi; 2785 2786 VERIFY0(dmu_object_info(mos, prop.zbm_redaction_obj, &doi)); 2787 size = doi.doi_max_offset; 2788 rbp_buf = kmem_alloc(size, KM_SLEEP); 2789 2790 err = dmu_read(mos, prop.zbm_redaction_obj, 0, size, 2791 rbp_buf, 0); 2792 if (err != 0) { 2793 dsl_redaction_list_rele(rl, FTAG); 2794 kmem_free(rbp_buf, size); 2795 return (err); 2796 } 2797 2798 (void) printf("\t\tRedaction List: [{object: %llx, offset: " 2799 "%llx, blksz: %x, count: %llx}", 2800 (u_longlong_t)rbp_buf[0].rbp_object, 2801 (u_longlong_t)rbp_buf[0].rbp_blkid, 2802 (uint_t)(redact_block_get_size(&rbp_buf[0])), 2803 (u_longlong_t)redact_block_get_count(&rbp_buf[0])); 2804 2805 for (size_t i = 1; i < rlp->rlp_num_entries; i++) { 2806 (void) printf(",\n\t\t{object: %llx, offset: %llx, " 2807 "blksz: %x, count: %llx}", 2808 (u_longlong_t)rbp_buf[i].rbp_object, 2809 (u_longlong_t)rbp_buf[i].rbp_blkid, 2810 (uint_t)(redact_block_get_size(&rbp_buf[i])), 2811 (u_longlong_t)redact_block_get_count(&rbp_buf[i])); 2812 } 2813 dsl_redaction_list_rele(rl, FTAG); 2814 kmem_free(rbp_buf, size); 2815 (void) printf("]\n\n"); 2816 return (0); 2817 } 2818 2819 static void 2820 dump_bookmarks(objset_t *os, int verbosity) 2821 { 2822 zap_cursor_t zc; 2823 zap_attribute_t attr; 2824 dsl_dataset_t *ds = dmu_objset_ds(os); 2825 dsl_pool_t *dp = spa_get_dsl(os->os_spa); 2826 objset_t *mos = os->os_spa->spa_meta_objset; 2827 if (verbosity < 4) 2828 return; 2829 dsl_pool_config_enter(dp, FTAG); 2830 2831 for (zap_cursor_init(&zc, mos, ds->ds_bookmarks_obj); 2832 zap_cursor_retrieve(&zc, &attr) == 0; 2833 zap_cursor_advance(&zc)) { 2834 char osname[ZFS_MAX_DATASET_NAME_LEN]; 2835 char buf[ZFS_MAX_DATASET_NAME_LEN]; 2836 dmu_objset_name(os, osname); 2837 VERIFY3S(0, <=, snprintf(buf, sizeof (buf), "%s#%s", osname, 2838 attr.za_name)); 2839 (void) dump_bookmark(dp, buf, verbosity >= 5, verbosity >= 6); 2840 } 2841 zap_cursor_fini(&zc); 2842 dsl_pool_config_exit(dp, FTAG); 2843 } 2844 2845 static void 2846 bpobj_count_refd(bpobj_t *bpo) 2847 { 2848 mos_obj_refd(bpo->bpo_object); 2849 2850 if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) { 2851 mos_obj_refd(bpo->bpo_phys->bpo_subobjs); 2852 for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) { 2853 uint64_t subobj; 2854 bpobj_t subbpo; 2855 int error; 2856 VERIFY0(dmu_read(bpo->bpo_os, 2857 bpo->bpo_phys->bpo_subobjs, 2858 i * sizeof (subobj), sizeof (subobj), &subobj, 0)); 2859 error = bpobj_open(&subbpo, bpo->bpo_os, subobj); 2860 if (error != 0) { 2861 (void) printf("ERROR %u while trying to open " 2862 "subobj id %llu\n", 2863 error, (u_longlong_t)subobj); 2864 continue; 2865 } 2866 bpobj_count_refd(&subbpo); 2867 bpobj_close(&subbpo); 2868 } 2869 } 2870 } 2871 2872 static int 2873 dsl_deadlist_entry_count_refd(void *arg, dsl_deadlist_entry_t *dle) 2874 { 2875 spa_t *spa = arg; 2876 uint64_t empty_bpobj = spa->spa_dsl_pool->dp_empty_bpobj; 2877 if (dle->dle_bpobj.bpo_object != empty_bpobj) 2878 bpobj_count_refd(&dle->dle_bpobj); 2879 return (0); 2880 } 2881 2882 static int 2883 dsl_deadlist_entry_dump(void *arg, dsl_deadlist_entry_t *dle) 2884 { 2885 ASSERT(arg == NULL); 2886 if (dump_opt['d'] >= 5) { 2887 char buf[128]; 2888 (void) snprintf(buf, sizeof (buf), 2889 "mintxg %llu -> obj %llu", 2890 (longlong_t)dle->dle_mintxg, 2891 (longlong_t)dle->dle_bpobj.bpo_object); 2892 2893 dump_full_bpobj(&dle->dle_bpobj, buf, 0); 2894 } else { 2895 (void) printf("mintxg %llu -> obj %llu\n", 2896 (longlong_t)dle->dle_mintxg, 2897 (longlong_t)dle->dle_bpobj.bpo_object); 2898 } 2899 return (0); 2900 } 2901 2902 static void 2903 dump_blkptr_list(dsl_deadlist_t *dl, char *name) 2904 { 2905 char bytes[32]; 2906 char comp[32]; 2907 char uncomp[32]; 2908 char entries[32]; 2909 spa_t *spa = dmu_objset_spa(dl->dl_os); 2910 uint64_t empty_bpobj = spa->spa_dsl_pool->dp_empty_bpobj; 2911 2912 if (dl->dl_oldfmt) { 2913 if (dl->dl_bpobj.bpo_object != empty_bpobj) 2914 bpobj_count_refd(&dl->dl_bpobj); 2915 } else { 2916 mos_obj_refd(dl->dl_object); 2917 dsl_deadlist_iterate(dl, dsl_deadlist_entry_count_refd, spa); 2918 } 2919 2920 /* make sure nicenum has enough space */ 2921 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); 2922 CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ); 2923 CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ); 2924 CTASSERT(sizeof (entries) >= NN_NUMBUF_SZ); 2925 2926 if (dump_opt['d'] < 3) 2927 return; 2928 2929 if (dl->dl_oldfmt) { 2930 dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0); 2931 return; 2932 } 2933 2934 zdb_nicenum(dl->dl_phys->dl_used, bytes, sizeof (bytes)); 2935 zdb_nicenum(dl->dl_phys->dl_comp, comp, sizeof (comp)); 2936 zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp, sizeof (uncomp)); 2937 zdb_nicenum(avl_numnodes(&dl->dl_tree), entries, sizeof (entries)); 2938 (void) printf("\n %s: %s (%s/%s comp), %s entries\n", 2939 name, bytes, comp, uncomp, entries); 2940 2941 if (dump_opt['d'] < 4) 2942 return; 2943 2944 (void) printf("\n"); 2945 2946 dsl_deadlist_iterate(dl, dsl_deadlist_entry_dump, NULL); 2947 } 2948 2949 static int 2950 verify_dd_livelist(objset_t *os) 2951 { 2952 uint64_t ll_used, used, ll_comp, comp, ll_uncomp, uncomp; 2953 dsl_pool_t *dp = spa_get_dsl(os->os_spa); 2954 dsl_dir_t *dd = os->os_dsl_dataset->ds_dir; 2955 2956 ASSERT(!dmu_objset_is_snapshot(os)); 2957 if (!dsl_deadlist_is_open(&dd->dd_livelist)) 2958 return (0); 2959 2960 /* Iterate through the livelist to check for duplicates */ 2961 dsl_deadlist_iterate(&dd->dd_livelist, sublivelist_verify_lightweight, 2962 NULL); 2963 2964 dsl_pool_config_enter(dp, FTAG); 2965 dsl_deadlist_space(&dd->dd_livelist, &ll_used, 2966 &ll_comp, &ll_uncomp); 2967 2968 dsl_dataset_t *origin_ds; 2969 ASSERT(dsl_pool_config_held(dp)); 2970 VERIFY0(dsl_dataset_hold_obj(dp, 2971 dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin_ds)); 2972 VERIFY0(dsl_dataset_space_written(origin_ds, os->os_dsl_dataset, 2973 &used, &comp, &uncomp)); 2974 dsl_dataset_rele(origin_ds, FTAG); 2975 dsl_pool_config_exit(dp, FTAG); 2976 /* 2977 * It's possible that the dataset's uncomp space is larger than the 2978 * livelist's because livelists do not track embedded block pointers 2979 */ 2980 if (used != ll_used || comp != ll_comp || uncomp < ll_uncomp) { 2981 char nice_used[32], nice_comp[32], nice_uncomp[32]; 2982 (void) printf("Discrepancy in space accounting:\n"); 2983 zdb_nicenum(used, nice_used, sizeof (nice_used)); 2984 zdb_nicenum(comp, nice_comp, sizeof (nice_comp)); 2985 zdb_nicenum(uncomp, nice_uncomp, sizeof (nice_uncomp)); 2986 (void) printf("dir: used %s, comp %s, uncomp %s\n", 2987 nice_used, nice_comp, nice_uncomp); 2988 zdb_nicenum(ll_used, nice_used, sizeof (nice_used)); 2989 zdb_nicenum(ll_comp, nice_comp, sizeof (nice_comp)); 2990 zdb_nicenum(ll_uncomp, nice_uncomp, sizeof (nice_uncomp)); 2991 (void) printf("livelist: used %s, comp %s, uncomp %s\n", 2992 nice_used, nice_comp, nice_uncomp); 2993 return (1); 2994 } 2995 return (0); 2996 } 2997 2998 static avl_tree_t idx_tree; 2999 static avl_tree_t domain_tree; 3000 static boolean_t fuid_table_loaded; 3001 static objset_t *sa_os = NULL; 3002 static sa_attr_type_t *sa_attr_table = NULL; 3003 3004 static int 3005 open_objset(const char *path, void *tag, objset_t **osp) 3006 { 3007 int err; 3008 uint64_t sa_attrs = 0; 3009 uint64_t version = 0; 3010 3011 VERIFY3P(sa_os, ==, NULL); 3012 /* 3013 * We can't own an objset if it's redacted. Therefore, we do this 3014 * dance: hold the objset, then acquire a long hold on its dataset, then 3015 * release the pool (which is held as part of holding the objset). 3016 */ 3017 err = dmu_objset_hold(path, tag, osp); 3018 if (err != 0) { 3019 (void) fprintf(stderr, "failed to hold dataset '%s': %s\n", 3020 path, strerror(err)); 3021 return (err); 3022 } 3023 dsl_dataset_long_hold(dmu_objset_ds(*osp), tag); 3024 dsl_pool_rele(dmu_objset_pool(*osp), tag); 3025 3026 if (dmu_objset_type(*osp) == DMU_OST_ZFS && !(*osp)->os_encrypted) { 3027 (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR, 3028 8, 1, &version); 3029 if (version >= ZPL_VERSION_SA) { 3030 (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 3031 8, 1, &sa_attrs); 3032 } 3033 err = sa_setup(*osp, sa_attrs, zfs_attr_table, ZPL_END, 3034 &sa_attr_table); 3035 if (err != 0) { 3036 (void) fprintf(stderr, "sa_setup failed: %s\n", 3037 strerror(err)); 3038 dsl_dataset_long_rele(dmu_objset_ds(*osp), tag); 3039 dsl_dataset_rele(dmu_objset_ds(*osp), tag); 3040 *osp = NULL; 3041 } 3042 } 3043 sa_os = *osp; 3044 3045 return (0); 3046 } 3047 3048 static void 3049 close_objset(objset_t *os, void *tag) 3050 { 3051 VERIFY3P(os, ==, sa_os); 3052 if (os->os_sa != NULL) 3053 sa_tear_down(os); 3054 dsl_dataset_long_rele(dmu_objset_ds(os), tag); 3055 dsl_dataset_rele(dmu_objset_ds(os), tag); 3056 sa_attr_table = NULL; 3057 sa_os = NULL; 3058 } 3059 3060 static void 3061 fuid_table_destroy(void) 3062 { 3063 if (fuid_table_loaded) { 3064 zfs_fuid_table_destroy(&idx_tree, &domain_tree); 3065 fuid_table_loaded = B_FALSE; 3066 } 3067 } 3068 3069 /* 3070 * print uid or gid information. 3071 * For normal POSIX id just the id is printed in decimal format. 3072 * For CIFS files with FUID the fuid is printed in hex followed by 3073 * the domain-rid string. 3074 */ 3075 static void 3076 print_idstr(uint64_t id, const char *id_type) 3077 { 3078 if (FUID_INDEX(id)) { 3079 char *domain; 3080 3081 domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id)); 3082 (void) printf("\t%s %llx [%s-%d]\n", id_type, 3083 (u_longlong_t)id, domain, (int)FUID_RID(id)); 3084 } else { 3085 (void) printf("\t%s %llu\n", id_type, (u_longlong_t)id); 3086 } 3087 3088 } 3089 3090 static void 3091 dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid) 3092 { 3093 uint32_t uid_idx, gid_idx; 3094 3095 uid_idx = FUID_INDEX(uid); 3096 gid_idx = FUID_INDEX(gid); 3097 3098 /* Load domain table, if not already loaded */ 3099 if (!fuid_table_loaded && (uid_idx || gid_idx)) { 3100 uint64_t fuid_obj; 3101 3102 /* first find the fuid object. It lives in the master node */ 3103 VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 3104 8, 1, &fuid_obj) == 0); 3105 zfs_fuid_avl_tree_create(&idx_tree, &domain_tree); 3106 (void) zfs_fuid_table_load(os, fuid_obj, 3107 &idx_tree, &domain_tree); 3108 fuid_table_loaded = B_TRUE; 3109 } 3110 3111 print_idstr(uid, "uid"); 3112 print_idstr(gid, "gid"); 3113 } 3114 3115 static void 3116 dump_znode_sa_xattr(sa_handle_t *hdl) 3117 { 3118 nvlist_t *sa_xattr; 3119 nvpair_t *elem = NULL; 3120 int sa_xattr_size = 0; 3121 int sa_xattr_entries = 0; 3122 int error; 3123 char *sa_xattr_packed; 3124 3125 error = sa_size(hdl, sa_attr_table[ZPL_DXATTR], &sa_xattr_size); 3126 if (error || sa_xattr_size == 0) 3127 return; 3128 3129 sa_xattr_packed = malloc(sa_xattr_size); 3130 if (sa_xattr_packed == NULL) 3131 return; 3132 3133 error = sa_lookup(hdl, sa_attr_table[ZPL_DXATTR], 3134 sa_xattr_packed, sa_xattr_size); 3135 if (error) { 3136 free(sa_xattr_packed); 3137 return; 3138 } 3139 3140 error = nvlist_unpack(sa_xattr_packed, sa_xattr_size, &sa_xattr, 0); 3141 if (error) { 3142 free(sa_xattr_packed); 3143 return; 3144 } 3145 3146 while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) 3147 sa_xattr_entries++; 3148 3149 (void) printf("\tSA xattrs: %d bytes, %d entries\n\n", 3150 sa_xattr_size, sa_xattr_entries); 3151 while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) { 3152 uchar_t *value; 3153 uint_t cnt, idx; 3154 3155 (void) printf("\t\t%s = ", nvpair_name(elem)); 3156 nvpair_value_byte_array(elem, &value, &cnt); 3157 for (idx = 0; idx < cnt; ++idx) { 3158 if (isprint(value[idx])) 3159 (void) putchar(value[idx]); 3160 else 3161 (void) printf("\\%3.3o", value[idx]); 3162 } 3163 (void) putchar('\n'); 3164 } 3165 3166 nvlist_free(sa_xattr); 3167 free(sa_xattr_packed); 3168 } 3169 3170 static void 3171 dump_znode_symlink(sa_handle_t *hdl) 3172 { 3173 int sa_symlink_size = 0; 3174 char linktarget[MAXPATHLEN]; 3175 linktarget[0] = '\0'; 3176 int error; 3177 3178 error = sa_size(hdl, sa_attr_table[ZPL_SYMLINK], &sa_symlink_size); 3179 if (error || sa_symlink_size == 0) { 3180 return; 3181 } 3182 if (sa_lookup(hdl, sa_attr_table[ZPL_SYMLINK], 3183 &linktarget, sa_symlink_size) == 0) 3184 (void) printf("\ttarget %s\n", linktarget); 3185 } 3186 3187 /*ARGSUSED*/ 3188 static void 3189 dump_znode(objset_t *os, uint64_t object, void *data, size_t size) 3190 { 3191 char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */ 3192 sa_handle_t *hdl; 3193 uint64_t xattr, rdev, gen; 3194 uint64_t uid, gid, mode, fsize, parent, links; 3195 uint64_t pflags; 3196 uint64_t acctm[2], modtm[2], chgtm[2], crtm[2]; 3197 time_t z_crtime, z_atime, z_mtime, z_ctime; 3198 sa_bulk_attr_t bulk[12]; 3199 int idx = 0; 3200 int error; 3201 3202 VERIFY3P(os, ==, sa_os); 3203 if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) { 3204 (void) printf("Failed to get handle for SA znode\n"); 3205 return; 3206 } 3207 3208 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8); 3209 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8); 3210 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL, 3211 &links, 8); 3212 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8); 3213 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL, 3214 &mode, 8); 3215 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT], 3216 NULL, &parent, 8); 3217 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL, 3218 &fsize, 8); 3219 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL, 3220 acctm, 16); 3221 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL, 3222 modtm, 16); 3223 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL, 3224 crtm, 16); 3225 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL, 3226 chgtm, 16); 3227 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL, 3228 &pflags, 8); 3229 3230 if (sa_bulk_lookup(hdl, bulk, idx)) { 3231 (void) sa_handle_destroy(hdl); 3232 return; 3233 } 3234 3235 z_crtime = (time_t)crtm[0]; 3236 z_atime = (time_t)acctm[0]; 3237 z_mtime = (time_t)modtm[0]; 3238 z_ctime = (time_t)chgtm[0]; 3239 3240 if (dump_opt['d'] > 4) { 3241 error = zfs_obj_to_path(os, object, path, sizeof (path)); 3242 if (error == ESTALE) { 3243 (void) snprintf(path, sizeof (path), "on delete queue"); 3244 } else if (error != 0) { 3245 leaked_objects++; 3246 (void) snprintf(path, sizeof (path), 3247 "path not found, possibly leaked"); 3248 } 3249 (void) printf("\tpath %s\n", path); 3250 } 3251 3252 if (S_ISLNK(mode)) 3253 dump_znode_symlink(hdl); 3254 dump_uidgid(os, uid, gid); 3255 (void) printf("\tatime %s", ctime(&z_atime)); 3256 (void) printf("\tmtime %s", ctime(&z_mtime)); 3257 (void) printf("\tctime %s", ctime(&z_ctime)); 3258 (void) printf("\tcrtime %s", ctime(&z_crtime)); 3259 (void) printf("\tgen %llu\n", (u_longlong_t)gen); 3260 (void) printf("\tmode %llo\n", (u_longlong_t)mode); 3261 (void) printf("\tsize %llu\n", (u_longlong_t)fsize); 3262 (void) printf("\tparent %llu\n", (u_longlong_t)parent); 3263 (void) printf("\tlinks %llu\n", (u_longlong_t)links); 3264 (void) printf("\tpflags %llx\n", (u_longlong_t)pflags); 3265 if (dmu_objset_projectquota_enabled(os) && (pflags & ZFS_PROJID)) { 3266 uint64_t projid; 3267 3268 if (sa_lookup(hdl, sa_attr_table[ZPL_PROJID], &projid, 3269 sizeof (uint64_t)) == 0) 3270 (void) printf("\tprojid %llu\n", (u_longlong_t)projid); 3271 } 3272 if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr, 3273 sizeof (uint64_t)) == 0) 3274 (void) printf("\txattr %llu\n", (u_longlong_t)xattr); 3275 if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev, 3276 sizeof (uint64_t)) == 0) 3277 (void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev); 3278 dump_znode_sa_xattr(hdl); 3279 sa_handle_destroy(hdl); 3280 } 3281 3282 /*ARGSUSED*/ 3283 static void 3284 dump_acl(objset_t *os, uint64_t object, void *data, size_t size) 3285 { 3286 } 3287 3288 /*ARGSUSED*/ 3289 static void 3290 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size) 3291 { 3292 } 3293 3294 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = { 3295 dump_none, /* unallocated */ 3296 dump_zap, /* object directory */ 3297 dump_uint64, /* object array */ 3298 dump_none, /* packed nvlist */ 3299 dump_packed_nvlist, /* packed nvlist size */ 3300 dump_none, /* bpobj */ 3301 dump_bpobj, /* bpobj header */ 3302 dump_none, /* SPA space map header */ 3303 dump_none, /* SPA space map */ 3304 dump_none, /* ZIL intent log */ 3305 dump_dnode, /* DMU dnode */ 3306 dump_dmu_objset, /* DMU objset */ 3307 dump_dsl_dir, /* DSL directory */ 3308 dump_zap, /* DSL directory child map */ 3309 dump_zap, /* DSL dataset snap map */ 3310 dump_zap, /* DSL props */ 3311 dump_dsl_dataset, /* DSL dataset */ 3312 dump_znode, /* ZFS znode */ 3313 dump_acl, /* ZFS V0 ACL */ 3314 dump_uint8, /* ZFS plain file */ 3315 dump_zpldir, /* ZFS directory */ 3316 dump_zap, /* ZFS master node */ 3317 dump_zap, /* ZFS delete queue */ 3318 dump_uint8, /* zvol object */ 3319 dump_zap, /* zvol prop */ 3320 dump_uint8, /* other uint8[] */ 3321 dump_uint64, /* other uint64[] */ 3322 dump_zap, /* other ZAP */ 3323 dump_zap, /* persistent error log */ 3324 dump_uint8, /* SPA history */ 3325 dump_history_offsets, /* SPA history offsets */ 3326 dump_zap, /* Pool properties */ 3327 dump_zap, /* DSL permissions */ 3328 dump_acl, /* ZFS ACL */ 3329 dump_uint8, /* ZFS SYSACL */ 3330 dump_none, /* FUID nvlist */ 3331 dump_packed_nvlist, /* FUID nvlist size */ 3332 dump_zap, /* DSL dataset next clones */ 3333 dump_zap, /* DSL scrub queue */ 3334 dump_zap, /* ZFS user/group/project used */ 3335 dump_zap, /* ZFS user/group/project quota */ 3336 dump_zap, /* snapshot refcount tags */ 3337 dump_ddt_zap, /* DDT ZAP object */ 3338 dump_zap, /* DDT statistics */ 3339 dump_znode, /* SA object */ 3340 dump_zap, /* SA Master Node */ 3341 dump_sa_attrs, /* SA attribute registration */ 3342 dump_sa_layouts, /* SA attribute layouts */ 3343 dump_zap, /* DSL scrub translations */ 3344 dump_none, /* fake dedup BP */ 3345 dump_zap, /* deadlist */ 3346 dump_none, /* deadlist hdr */ 3347 dump_zap, /* dsl clones */ 3348 dump_bpobj_subobjs, /* bpobj subobjs */ 3349 dump_unknown, /* Unknown type, must be last */ 3350 }; 3351 3352 static boolean_t 3353 match_object_type(dmu_object_type_t obj_type, uint64_t flags) 3354 { 3355 boolean_t match = B_TRUE; 3356 3357 switch (obj_type) { 3358 case DMU_OT_DIRECTORY_CONTENTS: 3359 if (!(flags & ZOR_FLAG_DIRECTORY)) 3360 match = B_FALSE; 3361 break; 3362 case DMU_OT_PLAIN_FILE_CONTENTS: 3363 if (!(flags & ZOR_FLAG_PLAIN_FILE)) 3364 match = B_FALSE; 3365 break; 3366 case DMU_OT_SPACE_MAP: 3367 if (!(flags & ZOR_FLAG_SPACE_MAP)) 3368 match = B_FALSE; 3369 break; 3370 default: 3371 if (strcmp(zdb_ot_name(obj_type), "zap") == 0) { 3372 if (!(flags & ZOR_FLAG_ZAP)) 3373 match = B_FALSE; 3374 break; 3375 } 3376 3377 /* 3378 * If all bits except some of the supported flags are 3379 * set, the user combined the all-types flag (A) with 3380 * a negated flag to exclude some types (e.g. A-f to 3381 * show all object types except plain files). 3382 */ 3383 if ((flags | ZOR_SUPPORTED_FLAGS) != ZOR_FLAG_ALL_TYPES) 3384 match = B_FALSE; 3385 3386 break; 3387 } 3388 3389 return (match); 3390 } 3391 3392 static void 3393 dump_object(objset_t *os, uint64_t object, int verbosity, 3394 boolean_t *print_header, uint64_t *dnode_slots_used, uint64_t flags) 3395 { 3396 dmu_buf_t *db = NULL; 3397 dmu_object_info_t doi; 3398 dnode_t *dn; 3399 boolean_t dnode_held = B_FALSE; 3400 void *bonus = NULL; 3401 size_t bsize = 0; 3402 char iblk[32], dblk[32], lsize[32], asize[32], fill[32], dnsize[32]; 3403 char bonus_size[32]; 3404 char aux[50]; 3405 int error; 3406 3407 /* make sure nicenum has enough space */ 3408 CTASSERT(sizeof (iblk) >= NN_NUMBUF_SZ); 3409 CTASSERT(sizeof (dblk) >= NN_NUMBUF_SZ); 3410 CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ); 3411 CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ); 3412 CTASSERT(sizeof (bonus_size) >= NN_NUMBUF_SZ); 3413 3414 if (*print_header) { 3415 (void) printf("\n%10s %3s %5s %5s %5s %6s %5s %6s %s\n", 3416 "Object", "lvl", "iblk", "dblk", "dsize", "dnsize", 3417 "lsize", "%full", "type"); 3418 *print_header = 0; 3419 } 3420 3421 if (object == 0) { 3422 dn = DMU_META_DNODE(os); 3423 dmu_object_info_from_dnode(dn, &doi); 3424 } else { 3425 /* 3426 * Encrypted datasets will have sensitive bonus buffers 3427 * encrypted. Therefore we cannot hold the bonus buffer and 3428 * must hold the dnode itself instead. 3429 */ 3430 error = dmu_object_info(os, object, &doi); 3431 if (error) 3432 fatal("dmu_object_info() failed, errno %u", error); 3433 3434 if (os->os_encrypted && 3435 DMU_OT_IS_ENCRYPTED(doi.doi_bonus_type)) { 3436 error = dnode_hold(os, object, FTAG, &dn); 3437 if (error) 3438 fatal("dnode_hold() failed, errno %u", error); 3439 dnode_held = B_TRUE; 3440 } else { 3441 error = dmu_bonus_hold(os, object, FTAG, &db); 3442 if (error) 3443 fatal("dmu_bonus_hold(%llu) failed, errno %u", 3444 object, error); 3445 bonus = db->db_data; 3446 bsize = db->db_size; 3447 dn = DB_DNODE((dmu_buf_impl_t *)db); 3448 } 3449 } 3450 3451 /* 3452 * Default to showing all object types if no flags were specified. 3453 */ 3454 if (flags != 0 && flags != ZOR_FLAG_ALL_TYPES && 3455 !match_object_type(doi.doi_type, flags)) 3456 goto out; 3457 3458 if (dnode_slots_used) 3459 *dnode_slots_used = doi.doi_dnodesize / DNODE_MIN_SIZE; 3460 3461 zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk)); 3462 zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk)); 3463 zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize)); 3464 zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof (asize)); 3465 zdb_nicenum(doi.doi_bonus_size, bonus_size, sizeof (bonus_size)); 3466 zdb_nicenum(doi.doi_dnodesize, dnsize, sizeof (dnsize)); 3467 (void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count * 3468 doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) / 3469 doi.doi_max_offset); 3470 3471 aux[0] = '\0'; 3472 3473 if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) { 3474 (void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux), 3475 " (K=%s)", ZDB_CHECKSUM_NAME(doi.doi_checksum)); 3476 } 3477 3478 if (doi.doi_compress == ZIO_COMPRESS_INHERIT && 3479 ZIO_COMPRESS_HASLEVEL(os->os_compress) && verbosity >= 6) { 3480 const char *compname = NULL; 3481 if (zfs_prop_index_to_string(ZFS_PROP_COMPRESSION, 3482 ZIO_COMPRESS_RAW(os->os_compress, os->os_complevel), 3483 &compname) == 0) { 3484 (void) snprintf(aux + strlen(aux), 3485 sizeof (aux) - strlen(aux), " (Z=inherit=%s)", 3486 compname); 3487 } else { 3488 (void) snprintf(aux + strlen(aux), 3489 sizeof (aux) - strlen(aux), 3490 " (Z=inherit=%s-unknown)", 3491 ZDB_COMPRESS_NAME(os->os_compress)); 3492 } 3493 } else if (doi.doi_compress == ZIO_COMPRESS_INHERIT && verbosity >= 6) { 3494 (void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux), 3495 " (Z=inherit=%s)", ZDB_COMPRESS_NAME(os->os_compress)); 3496 } else if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) { 3497 (void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux), 3498 " (Z=%s)", ZDB_COMPRESS_NAME(doi.doi_compress)); 3499 } 3500 3501 (void) printf("%10lld %3u %5s %5s %5s %6s %5s %6s %s%s\n", 3502 (u_longlong_t)object, doi.doi_indirection, iblk, dblk, 3503 asize, dnsize, lsize, fill, zdb_ot_name(doi.doi_type), aux); 3504 3505 if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) { 3506 (void) printf("%10s %3s %5s %5s %5s %5s %5s %6s %s\n", 3507 "", "", "", "", "", "", bonus_size, "bonus", 3508 zdb_ot_name(doi.doi_bonus_type)); 3509 } 3510 3511 if (verbosity >= 4) { 3512 (void) printf("\tdnode flags: %s%s%s%s\n", 3513 (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ? 3514 "USED_BYTES " : "", 3515 (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ? 3516 "USERUSED_ACCOUNTED " : "", 3517 (dn->dn_phys->dn_flags & DNODE_FLAG_USEROBJUSED_ACCOUNTED) ? 3518 "USEROBJUSED_ACCOUNTED " : "", 3519 (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? 3520 "SPILL_BLKPTR" : ""); 3521 (void) printf("\tdnode maxblkid: %llu\n", 3522 (longlong_t)dn->dn_phys->dn_maxblkid); 3523 3524 if (!dnode_held) { 3525 object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, 3526 object, bonus, bsize); 3527 } else { 3528 (void) printf("\t\t(bonus encrypted)\n"); 3529 } 3530 3531 if (!os->os_encrypted || !DMU_OT_IS_ENCRYPTED(doi.doi_type)) { 3532 object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, 3533 NULL, 0); 3534 } else { 3535 (void) printf("\t\t(object encrypted)\n"); 3536 } 3537 3538 *print_header = B_TRUE; 3539 } 3540 3541 if (verbosity >= 5) 3542 dump_indirect(dn); 3543 3544 if (verbosity >= 5) { 3545 /* 3546 * Report the list of segments that comprise the object. 3547 */ 3548 uint64_t start = 0; 3549 uint64_t end; 3550 uint64_t blkfill = 1; 3551 int minlvl = 1; 3552 3553 if (dn->dn_type == DMU_OT_DNODE) { 3554 minlvl = 0; 3555 blkfill = DNODES_PER_BLOCK; 3556 } 3557 3558 for (;;) { 3559 char segsize[32]; 3560 /* make sure nicenum has enough space */ 3561 CTASSERT(sizeof (segsize) >= NN_NUMBUF_SZ); 3562 error = dnode_next_offset(dn, 3563 0, &start, minlvl, blkfill, 0); 3564 if (error) 3565 break; 3566 end = start; 3567 error = dnode_next_offset(dn, 3568 DNODE_FIND_HOLE, &end, minlvl, blkfill, 0); 3569 zdb_nicenum(end - start, segsize, sizeof (segsize)); 3570 (void) printf("\t\tsegment [%016llx, %016llx)" 3571 " size %5s\n", (u_longlong_t)start, 3572 (u_longlong_t)end, segsize); 3573 if (error) 3574 break; 3575 start = end; 3576 } 3577 } 3578 3579 out: 3580 if (db != NULL) 3581 dmu_buf_rele(db, FTAG); 3582 if (dnode_held) 3583 dnode_rele(dn, FTAG); 3584 } 3585 3586 static void 3587 count_dir_mos_objects(dsl_dir_t *dd) 3588 { 3589 mos_obj_refd(dd->dd_object); 3590 mos_obj_refd(dsl_dir_phys(dd)->dd_child_dir_zapobj); 3591 mos_obj_refd(dsl_dir_phys(dd)->dd_deleg_zapobj); 3592 mos_obj_refd(dsl_dir_phys(dd)->dd_props_zapobj); 3593 mos_obj_refd(dsl_dir_phys(dd)->dd_clones); 3594 3595 /* 3596 * The dd_crypto_obj can be referenced by multiple dsl_dir's. 3597 * Ignore the references after the first one. 3598 */ 3599 mos_obj_refd_multiple(dd->dd_crypto_obj); 3600 } 3601 3602 static void 3603 count_ds_mos_objects(dsl_dataset_t *ds) 3604 { 3605 mos_obj_refd(ds->ds_object); 3606 mos_obj_refd(dsl_dataset_phys(ds)->ds_next_clones_obj); 3607 mos_obj_refd(dsl_dataset_phys(ds)->ds_props_obj); 3608 mos_obj_refd(dsl_dataset_phys(ds)->ds_userrefs_obj); 3609 mos_obj_refd(dsl_dataset_phys(ds)->ds_snapnames_zapobj); 3610 mos_obj_refd(ds->ds_bookmarks_obj); 3611 3612 if (!dsl_dataset_is_snapshot(ds)) { 3613 count_dir_mos_objects(ds->ds_dir); 3614 } 3615 } 3616 3617 static const char *objset_types[DMU_OST_NUMTYPES] = { 3618 "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" }; 3619 3620 /* 3621 * Parse a string denoting a range of object IDs of the form 3622 * <start>[:<end>[:flags]], and store the results in zor. 3623 * Return 0 on success. On error, return 1 and update the msg 3624 * pointer to point to a descriptive error message. 3625 */ 3626 static int 3627 parse_object_range(char *range, zopt_object_range_t *zor, char **msg) 3628 { 3629 uint64_t flags = 0; 3630 char *p, *s, *dup, *flagstr, *tmp = NULL; 3631 size_t len; 3632 int i; 3633 int rc = 0; 3634 3635 if (strchr(range, ':') == NULL) { 3636 zor->zor_obj_start = strtoull(range, &p, 0); 3637 if (*p != '\0') { 3638 *msg = "Invalid characters in object ID"; 3639 rc = 1; 3640 } 3641 zor->zor_obj_start = ZDB_MAP_OBJECT_ID(zor->zor_obj_start); 3642 zor->zor_obj_end = zor->zor_obj_start; 3643 return (rc); 3644 } 3645 3646 if (strchr(range, ':') == range) { 3647 *msg = "Invalid leading colon"; 3648 rc = 1; 3649 return (rc); 3650 } 3651 3652 len = strlen(range); 3653 if (range[len - 1] == ':') { 3654 *msg = "Invalid trailing colon"; 3655 rc = 1; 3656 return (rc); 3657 } 3658 3659 dup = strdup(range); 3660 s = strtok_r(dup, ":", &tmp); 3661 zor->zor_obj_start = strtoull(s, &p, 0); 3662 3663 if (*p != '\0') { 3664 *msg = "Invalid characters in start object ID"; 3665 rc = 1; 3666 goto out; 3667 } 3668 3669 s = strtok_r(NULL, ":", &tmp); 3670 zor->zor_obj_end = strtoull(s, &p, 0); 3671 3672 if (*p != '\0') { 3673 *msg = "Invalid characters in end object ID"; 3674 rc = 1; 3675 goto out; 3676 } 3677 3678 if (zor->zor_obj_start > zor->zor_obj_end) { 3679 *msg = "Start object ID may not exceed end object ID"; 3680 rc = 1; 3681 goto out; 3682 } 3683 3684 s = strtok_r(NULL, ":", &tmp); 3685 if (s == NULL) { 3686 zor->zor_flags = ZOR_FLAG_ALL_TYPES; 3687 goto out; 3688 } else if (strtok_r(NULL, ":", &tmp) != NULL) { 3689 *msg = "Invalid colon-delimited field after flags"; 3690 rc = 1; 3691 goto out; 3692 } 3693 3694 flagstr = s; 3695 for (i = 0; flagstr[i]; i++) { 3696 int bit; 3697 boolean_t negation = (flagstr[i] == '-'); 3698 3699 if (negation) { 3700 i++; 3701 if (flagstr[i] == '\0') { 3702 *msg = "Invalid trailing negation operator"; 3703 rc = 1; 3704 goto out; 3705 } 3706 } 3707 bit = flagbits[(uchar_t)flagstr[i]]; 3708 if (bit == 0) { 3709 *msg = "Invalid flag"; 3710 rc = 1; 3711 goto out; 3712 } 3713 if (negation) 3714 flags &= ~bit; 3715 else 3716 flags |= bit; 3717 } 3718 zor->zor_flags = flags; 3719 3720 zor->zor_obj_start = ZDB_MAP_OBJECT_ID(zor->zor_obj_start); 3721 zor->zor_obj_end = ZDB_MAP_OBJECT_ID(zor->zor_obj_end); 3722 3723 out: 3724 free(dup); 3725 return (rc); 3726 } 3727 3728 static void 3729 dump_objset(objset_t *os) 3730 { 3731 dmu_objset_stats_t dds = { 0 }; 3732 uint64_t object, object_count; 3733 uint64_t refdbytes, usedobjs, scratch; 3734 char numbuf[32]; 3735 char blkbuf[BP_SPRINTF_LEN + 20]; 3736 char osname[ZFS_MAX_DATASET_NAME_LEN]; 3737 const char *type = "UNKNOWN"; 3738 int verbosity = dump_opt['d']; 3739 boolean_t print_header; 3740 unsigned i; 3741 int error; 3742 uint64_t total_slots_used = 0; 3743 uint64_t max_slot_used = 0; 3744 uint64_t dnode_slots; 3745 uint64_t obj_start; 3746 uint64_t obj_end; 3747 uint64_t flags; 3748 3749 /* make sure nicenum has enough space */ 3750 CTASSERT(sizeof (numbuf) >= NN_NUMBUF_SZ); 3751 3752 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 3753 dmu_objset_fast_stat(os, &dds); 3754 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 3755 3756 print_header = B_TRUE; 3757 3758 if (dds.dds_type < DMU_OST_NUMTYPES) 3759 type = objset_types[dds.dds_type]; 3760 3761 if (dds.dds_type == DMU_OST_META) { 3762 dds.dds_creation_txg = TXG_INITIAL; 3763 usedobjs = BP_GET_FILL(os->os_rootbp); 3764 refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)-> 3765 dd_used_bytes; 3766 } else { 3767 dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch); 3768 } 3769 3770 ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp)); 3771 3772 zdb_nicenum(refdbytes, numbuf, sizeof (numbuf)); 3773 3774 if (verbosity >= 4) { 3775 (void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp "); 3776 (void) snprintf_blkptr(blkbuf + strlen(blkbuf), 3777 sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp); 3778 } else { 3779 blkbuf[0] = '\0'; 3780 } 3781 3782 dmu_objset_name(os, osname); 3783 3784 (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, " 3785 "%s, %llu objects%s%s\n", 3786 osname, type, (u_longlong_t)dmu_objset_id(os), 3787 (u_longlong_t)dds.dds_creation_txg, 3788 numbuf, (u_longlong_t)usedobjs, blkbuf, 3789 (dds.dds_inconsistent) ? " (inconsistent)" : ""); 3790 3791 for (i = 0; i < zopt_object_args; i++) { 3792 obj_start = zopt_object_ranges[i].zor_obj_start; 3793 obj_end = zopt_object_ranges[i].zor_obj_end; 3794 flags = zopt_object_ranges[i].zor_flags; 3795 3796 object = obj_start; 3797 if (object == 0 || obj_start == obj_end) 3798 dump_object(os, object, verbosity, &print_header, NULL, 3799 flags); 3800 else 3801 object--; 3802 3803 while ((dmu_object_next(os, &object, B_FALSE, 0) == 0) && 3804 object <= obj_end) { 3805 dump_object(os, object, verbosity, &print_header, NULL, 3806 flags); 3807 } 3808 } 3809 3810 if (zopt_object_args > 0) { 3811 (void) printf("\n"); 3812 return; 3813 } 3814 3815 if (dump_opt['i'] != 0 || verbosity >= 2) 3816 dump_intent_log(dmu_objset_zil(os)); 3817 3818 if (dmu_objset_ds(os) != NULL) { 3819 dsl_dataset_t *ds = dmu_objset_ds(os); 3820 dump_blkptr_list(&ds->ds_deadlist, "Deadlist"); 3821 if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) && 3822 !dmu_objset_is_snapshot(os)) { 3823 dump_blkptr_list(&ds->ds_dir->dd_livelist, "Livelist"); 3824 if (verify_dd_livelist(os) != 0) 3825 fatal("livelist is incorrect"); 3826 } 3827 3828 if (dsl_dataset_remap_deadlist_exists(ds)) { 3829 (void) printf("ds_remap_deadlist:\n"); 3830 dump_blkptr_list(&ds->ds_remap_deadlist, "Deadlist"); 3831 } 3832 count_ds_mos_objects(ds); 3833 } 3834 3835 if (dmu_objset_ds(os) != NULL) 3836 dump_bookmarks(os, verbosity); 3837 3838 if (verbosity < 2) 3839 return; 3840 3841 if (BP_IS_HOLE(os->os_rootbp)) 3842 return; 3843 3844 dump_object(os, 0, verbosity, &print_header, NULL, 0); 3845 object_count = 0; 3846 if (DMU_USERUSED_DNODE(os) != NULL && 3847 DMU_USERUSED_DNODE(os)->dn_type != 0) { 3848 dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header, 3849 NULL, 0); 3850 dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header, 3851 NULL, 0); 3852 } 3853 3854 if (DMU_PROJECTUSED_DNODE(os) != NULL && 3855 DMU_PROJECTUSED_DNODE(os)->dn_type != 0) 3856 dump_object(os, DMU_PROJECTUSED_OBJECT, verbosity, 3857 &print_header, NULL, 0); 3858 3859 object = 0; 3860 while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) { 3861 dump_object(os, object, verbosity, &print_header, &dnode_slots, 3862 0); 3863 object_count++; 3864 total_slots_used += dnode_slots; 3865 max_slot_used = object + dnode_slots - 1; 3866 } 3867 3868 (void) printf("\n"); 3869 3870 (void) printf(" Dnode slots:\n"); 3871 (void) printf("\tTotal used: %10llu\n", 3872 (u_longlong_t)total_slots_used); 3873 (void) printf("\tMax used: %10llu\n", 3874 (u_longlong_t)max_slot_used); 3875 (void) printf("\tPercent empty: %10lf\n", 3876 (double)(max_slot_used - total_slots_used)*100 / 3877 (double)max_slot_used); 3878 (void) printf("\n"); 3879 3880 if (error != ESRCH) { 3881 (void) fprintf(stderr, "dmu_object_next() = %d\n", error); 3882 abort(); 3883 } 3884 3885 ASSERT3U(object_count, ==, usedobjs); 3886 3887 if (leaked_objects != 0) { 3888 (void) printf("%d potentially leaked objects detected\n", 3889 leaked_objects); 3890 leaked_objects = 0; 3891 } 3892 } 3893 3894 static void 3895 dump_uberblock(uberblock_t *ub, const char *header, const char *footer) 3896 { 3897 time_t timestamp = ub->ub_timestamp; 3898 3899 (void) printf("%s", header ? header : ""); 3900 (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic); 3901 (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version); 3902 (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg); 3903 (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum); 3904 (void) printf("\ttimestamp = %llu UTC = %s", 3905 (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp))); 3906 3907 (void) printf("\tmmp_magic = %016llx\n", 3908 (u_longlong_t)ub->ub_mmp_magic); 3909 if (MMP_VALID(ub)) { 3910 (void) printf("\tmmp_delay = %0llu\n", 3911 (u_longlong_t)ub->ub_mmp_delay); 3912 if (MMP_SEQ_VALID(ub)) 3913 (void) printf("\tmmp_seq = %u\n", 3914 (unsigned int) MMP_SEQ(ub)); 3915 if (MMP_FAIL_INT_VALID(ub)) 3916 (void) printf("\tmmp_fail = %u\n", 3917 (unsigned int) MMP_FAIL_INT(ub)); 3918 if (MMP_INTERVAL_VALID(ub)) 3919 (void) printf("\tmmp_write = %u\n", 3920 (unsigned int) MMP_INTERVAL(ub)); 3921 /* After MMP_* to make summarize_uberblock_mmp cleaner */ 3922 (void) printf("\tmmp_valid = %x\n", 3923 (unsigned int) ub->ub_mmp_config & 0xFF); 3924 } 3925 3926 if (dump_opt['u'] >= 4) { 3927 char blkbuf[BP_SPRINTF_LEN]; 3928 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp); 3929 (void) printf("\trootbp = %s\n", blkbuf); 3930 } 3931 (void) printf("\tcheckpoint_txg = %llu\n", 3932 (u_longlong_t)ub->ub_checkpoint_txg); 3933 (void) printf("%s", footer ? footer : ""); 3934 } 3935 3936 static void 3937 dump_config(spa_t *spa) 3938 { 3939 dmu_buf_t *db; 3940 size_t nvsize = 0; 3941 int error = 0; 3942 3943 3944 error = dmu_bonus_hold(spa->spa_meta_objset, 3945 spa->spa_config_object, FTAG, &db); 3946 3947 if (error == 0) { 3948 nvsize = *(uint64_t *)db->db_data; 3949 dmu_buf_rele(db, FTAG); 3950 3951 (void) printf("\nMOS Configuration:\n"); 3952 dump_packed_nvlist(spa->spa_meta_objset, 3953 spa->spa_config_object, (void *)&nvsize, 1); 3954 } else { 3955 (void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d", 3956 (u_longlong_t)spa->spa_config_object, error); 3957 } 3958 } 3959 3960 static void 3961 dump_cachefile(const char *cachefile) 3962 { 3963 int fd; 3964 struct stat64 statbuf; 3965 char *buf; 3966 nvlist_t *config; 3967 3968 if ((fd = open64(cachefile, O_RDONLY)) < 0) { 3969 (void) printf("cannot open '%s': %s\n", cachefile, 3970 strerror(errno)); 3971 exit(1); 3972 } 3973 3974 if (fstat64(fd, &statbuf) != 0) { 3975 (void) printf("failed to stat '%s': %s\n", cachefile, 3976 strerror(errno)); 3977 exit(1); 3978 } 3979 3980 if ((buf = malloc(statbuf.st_size)) == NULL) { 3981 (void) fprintf(stderr, "failed to allocate %llu bytes\n", 3982 (u_longlong_t)statbuf.st_size); 3983 exit(1); 3984 } 3985 3986 if (read(fd, buf, statbuf.st_size) != statbuf.st_size) { 3987 (void) fprintf(stderr, "failed to read %llu bytes\n", 3988 (u_longlong_t)statbuf.st_size); 3989 exit(1); 3990 } 3991 3992 (void) close(fd); 3993 3994 if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) { 3995 (void) fprintf(stderr, "failed to unpack nvlist\n"); 3996 exit(1); 3997 } 3998 3999 free(buf); 4000 4001 dump_nvlist(config, 0); 4002 4003 nvlist_free(config); 4004 } 4005 4006 /* 4007 * ZFS label nvlist stats 4008 */ 4009 typedef struct zdb_nvl_stats { 4010 int zns_list_count; 4011 int zns_leaf_count; 4012 size_t zns_leaf_largest; 4013 size_t zns_leaf_total; 4014 nvlist_t *zns_string; 4015 nvlist_t *zns_uint64; 4016 nvlist_t *zns_boolean; 4017 } zdb_nvl_stats_t; 4018 4019 static void 4020 collect_nvlist_stats(nvlist_t *nvl, zdb_nvl_stats_t *stats) 4021 { 4022 nvlist_t *list, **array; 4023 nvpair_t *nvp = NULL; 4024 char *name; 4025 uint_t i, items; 4026 4027 stats->zns_list_count++; 4028 4029 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 4030 name = nvpair_name(nvp); 4031 4032 switch (nvpair_type(nvp)) { 4033 case DATA_TYPE_STRING: 4034 fnvlist_add_string(stats->zns_string, name, 4035 fnvpair_value_string(nvp)); 4036 break; 4037 case DATA_TYPE_UINT64: 4038 fnvlist_add_uint64(stats->zns_uint64, name, 4039 fnvpair_value_uint64(nvp)); 4040 break; 4041 case DATA_TYPE_BOOLEAN: 4042 fnvlist_add_boolean(stats->zns_boolean, name); 4043 break; 4044 case DATA_TYPE_NVLIST: 4045 if (nvpair_value_nvlist(nvp, &list) == 0) 4046 collect_nvlist_stats(list, stats); 4047 break; 4048 case DATA_TYPE_NVLIST_ARRAY: 4049 if (nvpair_value_nvlist_array(nvp, &array, &items) != 0) 4050 break; 4051 4052 for (i = 0; i < items; i++) { 4053 collect_nvlist_stats(array[i], stats); 4054 4055 /* collect stats on leaf vdev */ 4056 if (strcmp(name, "children") == 0) { 4057 size_t size; 4058 4059 (void) nvlist_size(array[i], &size, 4060 NV_ENCODE_XDR); 4061 stats->zns_leaf_total += size; 4062 if (size > stats->zns_leaf_largest) 4063 stats->zns_leaf_largest = size; 4064 stats->zns_leaf_count++; 4065 } 4066 } 4067 break; 4068 default: 4069 (void) printf("skip type %d!\n", (int)nvpair_type(nvp)); 4070 } 4071 } 4072 } 4073 4074 static void 4075 dump_nvlist_stats(nvlist_t *nvl, size_t cap) 4076 { 4077 zdb_nvl_stats_t stats = { 0 }; 4078 size_t size, sum = 0, total; 4079 size_t noise; 4080 4081 /* requires nvlist with non-unique names for stat collection */ 4082 VERIFY0(nvlist_alloc(&stats.zns_string, 0, 0)); 4083 VERIFY0(nvlist_alloc(&stats.zns_uint64, 0, 0)); 4084 VERIFY0(nvlist_alloc(&stats.zns_boolean, 0, 0)); 4085 VERIFY0(nvlist_size(stats.zns_boolean, &noise, NV_ENCODE_XDR)); 4086 4087 (void) printf("\n\nZFS Label NVList Config Stats:\n"); 4088 4089 VERIFY0(nvlist_size(nvl, &total, NV_ENCODE_XDR)); 4090 (void) printf(" %d bytes used, %d bytes free (using %4.1f%%)\n\n", 4091 (int)total, (int)(cap - total), 100.0 * total / cap); 4092 4093 collect_nvlist_stats(nvl, &stats); 4094 4095 VERIFY0(nvlist_size(stats.zns_uint64, &size, NV_ENCODE_XDR)); 4096 size -= noise; 4097 sum += size; 4098 (void) printf("%12s %4d %6d bytes (%5.2f%%)\n", "integers:", 4099 (int)fnvlist_num_pairs(stats.zns_uint64), 4100 (int)size, 100.0 * size / total); 4101 4102 VERIFY0(nvlist_size(stats.zns_string, &size, NV_ENCODE_XDR)); 4103 size -= noise; 4104 sum += size; 4105 (void) printf("%12s %4d %6d bytes (%5.2f%%)\n", "strings:", 4106 (int)fnvlist_num_pairs(stats.zns_string), 4107 (int)size, 100.0 * size / total); 4108 4109 VERIFY0(nvlist_size(stats.zns_boolean, &size, NV_ENCODE_XDR)); 4110 size -= noise; 4111 sum += size; 4112 (void) printf("%12s %4d %6d bytes (%5.2f%%)\n", "booleans:", 4113 (int)fnvlist_num_pairs(stats.zns_boolean), 4114 (int)size, 100.0 * size / total); 4115 4116 size = total - sum; /* treat remainder as nvlist overhead */ 4117 (void) printf("%12s %4d %6d bytes (%5.2f%%)\n\n", "nvlists:", 4118 stats.zns_list_count, (int)size, 100.0 * size / total); 4119 4120 if (stats.zns_leaf_count > 0) { 4121 size_t average = stats.zns_leaf_total / stats.zns_leaf_count; 4122 4123 (void) printf("%12s %4d %6d bytes average\n", "leaf vdevs:", 4124 stats.zns_leaf_count, (int)average); 4125 (void) printf("%24d bytes largest\n", 4126 (int)stats.zns_leaf_largest); 4127 4128 if (dump_opt['l'] >= 3 && average > 0) 4129 (void) printf(" space for %d additional leaf vdevs\n", 4130 (int)((cap - total) / average)); 4131 } 4132 (void) printf("\n"); 4133 4134 nvlist_free(stats.zns_string); 4135 nvlist_free(stats.zns_uint64); 4136 nvlist_free(stats.zns_boolean); 4137 } 4138 4139 typedef struct cksum_record { 4140 zio_cksum_t cksum; 4141 boolean_t labels[VDEV_LABELS]; 4142 avl_node_t link; 4143 } cksum_record_t; 4144 4145 static int 4146 cksum_record_compare(const void *x1, const void *x2) 4147 { 4148 const cksum_record_t *l = (cksum_record_t *)x1; 4149 const cksum_record_t *r = (cksum_record_t *)x2; 4150 int arraysize = ARRAY_SIZE(l->cksum.zc_word); 4151 int difference; 4152 4153 for (int i = 0; i < arraysize; i++) { 4154 difference = TREE_CMP(l->cksum.zc_word[i], r->cksum.zc_word[i]); 4155 if (difference) 4156 break; 4157 } 4158 4159 return (difference); 4160 } 4161 4162 static cksum_record_t * 4163 cksum_record_alloc(zio_cksum_t *cksum, int l) 4164 { 4165 cksum_record_t *rec; 4166 4167 rec = umem_zalloc(sizeof (*rec), UMEM_NOFAIL); 4168 rec->cksum = *cksum; 4169 rec->labels[l] = B_TRUE; 4170 4171 return (rec); 4172 } 4173 4174 static cksum_record_t * 4175 cksum_record_lookup(avl_tree_t *tree, zio_cksum_t *cksum) 4176 { 4177 cksum_record_t lookup = { .cksum = *cksum }; 4178 avl_index_t where; 4179 4180 return (avl_find(tree, &lookup, &where)); 4181 } 4182 4183 static cksum_record_t * 4184 cksum_record_insert(avl_tree_t *tree, zio_cksum_t *cksum, int l) 4185 { 4186 cksum_record_t *rec; 4187 4188 rec = cksum_record_lookup(tree, cksum); 4189 if (rec) { 4190 rec->labels[l] = B_TRUE; 4191 } else { 4192 rec = cksum_record_alloc(cksum, l); 4193 avl_add(tree, rec); 4194 } 4195 4196 return (rec); 4197 } 4198 4199 static int 4200 first_label(cksum_record_t *rec) 4201 { 4202 for (int i = 0; i < VDEV_LABELS; i++) 4203 if (rec->labels[i]) 4204 return (i); 4205 4206 return (-1); 4207 } 4208 4209 static void 4210 print_label_numbers(char *prefix, cksum_record_t *rec) 4211 { 4212 printf("%s", prefix); 4213 for (int i = 0; i < VDEV_LABELS; i++) 4214 if (rec->labels[i] == B_TRUE) 4215 printf("%d ", i); 4216 printf("\n"); 4217 } 4218 4219 #define MAX_UBERBLOCK_COUNT (VDEV_UBERBLOCK_RING >> UBERBLOCK_SHIFT) 4220 4221 typedef struct zdb_label { 4222 vdev_label_t label; 4223 nvlist_t *config_nv; 4224 cksum_record_t *config; 4225 cksum_record_t *uberblocks[MAX_UBERBLOCK_COUNT]; 4226 boolean_t header_printed; 4227 boolean_t read_failed; 4228 } zdb_label_t; 4229 4230 static void 4231 print_label_header(zdb_label_t *label, int l) 4232 { 4233 4234 if (dump_opt['q']) 4235 return; 4236 4237 if (label->header_printed == B_TRUE) 4238 return; 4239 4240 (void) printf("------------------------------------\n"); 4241 (void) printf("LABEL %d\n", l); 4242 (void) printf("------------------------------------\n"); 4243 4244 label->header_printed = B_TRUE; 4245 } 4246 4247 static void 4248 print_l2arc_header(void) 4249 { 4250 (void) printf("------------------------------------\n"); 4251 (void) printf("L2ARC device header\n"); 4252 (void) printf("------------------------------------\n"); 4253 } 4254 4255 static void 4256 print_l2arc_log_blocks(void) 4257 { 4258 (void) printf("------------------------------------\n"); 4259 (void) printf("L2ARC device log blocks\n"); 4260 (void) printf("------------------------------------\n"); 4261 } 4262 4263 static void 4264 dump_l2arc_log_entries(uint64_t log_entries, 4265 l2arc_log_ent_phys_t *le, uint64_t i) 4266 { 4267 for (int j = 0; j < log_entries; j++) { 4268 dva_t dva = le[j].le_dva; 4269 (void) printf("lb[%4llu]\tle[%4d]\tDVA asize: %llu, " 4270 "vdev: %llu, offset: %llu\n", 4271 (u_longlong_t)i, j + 1, 4272 (u_longlong_t)DVA_GET_ASIZE(&dva), 4273 (u_longlong_t)DVA_GET_VDEV(&dva), 4274 (u_longlong_t)DVA_GET_OFFSET(&dva)); 4275 (void) printf("|\t\t\t\tbirth: %llu\n", 4276 (u_longlong_t)le[j].le_birth); 4277 (void) printf("|\t\t\t\tlsize: %llu\n", 4278 (u_longlong_t)L2BLK_GET_LSIZE((&le[j])->le_prop)); 4279 (void) printf("|\t\t\t\tpsize: %llu\n", 4280 (u_longlong_t)L2BLK_GET_PSIZE((&le[j])->le_prop)); 4281 (void) printf("|\t\t\t\tcompr: %llu\n", 4282 (u_longlong_t)L2BLK_GET_COMPRESS((&le[j])->le_prop)); 4283 (void) printf("|\t\t\t\tcomplevel: %llu\n", 4284 (u_longlong_t)(&le[j])->le_complevel); 4285 (void) printf("|\t\t\t\ttype: %llu\n", 4286 (u_longlong_t)L2BLK_GET_TYPE((&le[j])->le_prop)); 4287 (void) printf("|\t\t\t\tprotected: %llu\n", 4288 (u_longlong_t)L2BLK_GET_PROTECTED((&le[j])->le_prop)); 4289 (void) printf("|\t\t\t\tprefetch: %llu\n", 4290 (u_longlong_t)L2BLK_GET_PREFETCH((&le[j])->le_prop)); 4291 (void) printf("|\t\t\t\taddress: %llu\n", 4292 (u_longlong_t)le[j].le_daddr); 4293 (void) printf("|\t\t\t\tARC state: %llu\n", 4294 (u_longlong_t)L2BLK_GET_STATE((&le[j])->le_prop)); 4295 (void) printf("|\n"); 4296 } 4297 (void) printf("\n"); 4298 } 4299 4300 static void 4301 dump_l2arc_log_blkptr(l2arc_log_blkptr_t lbps) 4302 { 4303 (void) printf("|\t\tdaddr: %llu\n", (u_longlong_t)lbps.lbp_daddr); 4304 (void) printf("|\t\tpayload_asize: %llu\n", 4305 (u_longlong_t)lbps.lbp_payload_asize); 4306 (void) printf("|\t\tpayload_start: %llu\n", 4307 (u_longlong_t)lbps.lbp_payload_start); 4308 (void) printf("|\t\tlsize: %llu\n", 4309 (u_longlong_t)L2BLK_GET_LSIZE((&lbps)->lbp_prop)); 4310 (void) printf("|\t\tasize: %llu\n", 4311 (u_longlong_t)L2BLK_GET_PSIZE((&lbps)->lbp_prop)); 4312 (void) printf("|\t\tcompralgo: %llu\n", 4313 (u_longlong_t)L2BLK_GET_COMPRESS((&lbps)->lbp_prop)); 4314 (void) printf("|\t\tcksumalgo: %llu\n", 4315 (u_longlong_t)L2BLK_GET_CHECKSUM((&lbps)->lbp_prop)); 4316 (void) printf("|\n\n"); 4317 } 4318 4319 static void 4320 dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr, 4321 l2arc_dev_hdr_phys_t *rebuild) 4322 { 4323 l2arc_log_blk_phys_t this_lb; 4324 uint64_t asize; 4325 l2arc_log_blkptr_t lbps[2]; 4326 abd_t *abd; 4327 zio_cksum_t cksum; 4328 int failed = 0; 4329 l2arc_dev_t dev; 4330 4331 if (!dump_opt['q']) 4332 print_l2arc_log_blocks(); 4333 bcopy((&l2dhdr)->dh_start_lbps, lbps, sizeof (lbps)); 4334 4335 dev.l2ad_evict = l2dhdr.dh_evict; 4336 dev.l2ad_start = l2dhdr.dh_start; 4337 dev.l2ad_end = l2dhdr.dh_end; 4338 4339 if (l2dhdr.dh_start_lbps[0].lbp_daddr == 0) { 4340 /* no log blocks to read */ 4341 if (!dump_opt['q']) { 4342 (void) printf("No log blocks to read\n"); 4343 (void) printf("\n"); 4344 } 4345 return; 4346 } else { 4347 dev.l2ad_hand = lbps[0].lbp_daddr + 4348 L2BLK_GET_PSIZE((&lbps[0])->lbp_prop); 4349 } 4350 4351 dev.l2ad_first = !!(l2dhdr.dh_flags & L2ARC_DEV_HDR_EVICT_FIRST); 4352 4353 for (;;) { 4354 if (!l2arc_log_blkptr_valid(&dev, &lbps[0])) 4355 break; 4356 4357 /* L2BLK_GET_PSIZE returns aligned size for log blocks */ 4358 asize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop); 4359 if (pread64(fd, &this_lb, asize, lbps[0].lbp_daddr) != asize) { 4360 if (!dump_opt['q']) { 4361 (void) printf("Error while reading next log " 4362 "block\n\n"); 4363 } 4364 break; 4365 } 4366 4367 fletcher_4_native_varsize(&this_lb, asize, &cksum); 4368 if (!ZIO_CHECKSUM_EQUAL(cksum, lbps[0].lbp_cksum)) { 4369 failed++; 4370 if (!dump_opt['q']) { 4371 (void) printf("Invalid cksum\n"); 4372 dump_l2arc_log_blkptr(lbps[0]); 4373 } 4374 break; 4375 } 4376 4377 switch (L2BLK_GET_COMPRESS((&lbps[0])->lbp_prop)) { 4378 case ZIO_COMPRESS_OFF: 4379 break; 4380 default: 4381 abd = abd_alloc_for_io(asize, B_TRUE); 4382 abd_copy_from_buf_off(abd, &this_lb, 0, asize); 4383 zio_decompress_data(L2BLK_GET_COMPRESS( 4384 (&lbps[0])->lbp_prop), abd, &this_lb, 4385 asize, sizeof (this_lb), NULL); 4386 abd_free(abd); 4387 break; 4388 } 4389 4390 if (this_lb.lb_magic == BSWAP_64(L2ARC_LOG_BLK_MAGIC)) 4391 byteswap_uint64_array(&this_lb, sizeof (this_lb)); 4392 if (this_lb.lb_magic != L2ARC_LOG_BLK_MAGIC) { 4393 if (!dump_opt['q']) 4394 (void) printf("Invalid log block magic\n\n"); 4395 break; 4396 } 4397 4398 rebuild->dh_lb_count++; 4399 rebuild->dh_lb_asize += asize; 4400 if (dump_opt['l'] > 1 && !dump_opt['q']) { 4401 (void) printf("lb[%4llu]\tmagic: %llu\n", 4402 (u_longlong_t)rebuild->dh_lb_count, 4403 (u_longlong_t)this_lb.lb_magic); 4404 dump_l2arc_log_blkptr(lbps[0]); 4405 } 4406 4407 if (dump_opt['l'] > 2 && !dump_opt['q']) 4408 dump_l2arc_log_entries(l2dhdr.dh_log_entries, 4409 this_lb.lb_entries, 4410 rebuild->dh_lb_count); 4411 4412 if (l2arc_range_check_overlap(lbps[1].lbp_payload_start, 4413 lbps[0].lbp_payload_start, dev.l2ad_evict) && 4414 !dev.l2ad_first) 4415 break; 4416 4417 lbps[0] = lbps[1]; 4418 lbps[1] = this_lb.lb_prev_lbp; 4419 } 4420 4421 if (!dump_opt['q']) { 4422 (void) printf("log_blk_count:\t %llu with valid cksum\n", 4423 (u_longlong_t)rebuild->dh_lb_count); 4424 (void) printf("\t\t %d with invalid cksum\n", failed); 4425 (void) printf("log_blk_asize:\t %llu\n\n", 4426 (u_longlong_t)rebuild->dh_lb_asize); 4427 } 4428 } 4429 4430 static int 4431 dump_l2arc_header(int fd) 4432 { 4433 l2arc_dev_hdr_phys_t l2dhdr, rebuild; 4434 int error = B_FALSE; 4435 4436 bzero(&l2dhdr, sizeof (l2dhdr)); 4437 bzero(&rebuild, sizeof (rebuild)); 4438 4439 if (pread64(fd, &l2dhdr, sizeof (l2dhdr), 4440 VDEV_LABEL_START_SIZE) != sizeof (l2dhdr)) { 4441 error = B_TRUE; 4442 } else { 4443 if (l2dhdr.dh_magic == BSWAP_64(L2ARC_DEV_HDR_MAGIC)) 4444 byteswap_uint64_array(&l2dhdr, sizeof (l2dhdr)); 4445 4446 if (l2dhdr.dh_magic != L2ARC_DEV_HDR_MAGIC) 4447 error = B_TRUE; 4448 } 4449 4450 if (error) { 4451 (void) printf("L2ARC device header not found\n\n"); 4452 /* Do not return an error here for backward compatibility */ 4453 return (0); 4454 } else if (!dump_opt['q']) { 4455 print_l2arc_header(); 4456 4457 (void) printf(" magic: %llu\n", 4458 (u_longlong_t)l2dhdr.dh_magic); 4459 (void) printf(" version: %llu\n", 4460 (u_longlong_t)l2dhdr.dh_version); 4461 (void) printf(" pool_guid: %llu\n", 4462 (u_longlong_t)l2dhdr.dh_spa_guid); 4463 (void) printf(" flags: %llu\n", 4464 (u_longlong_t)l2dhdr.dh_flags); 4465 (void) printf(" start_lbps[0]: %llu\n", 4466 (u_longlong_t) 4467 l2dhdr.dh_start_lbps[0].lbp_daddr); 4468 (void) printf(" start_lbps[1]: %llu\n", 4469 (u_longlong_t) 4470 l2dhdr.dh_start_lbps[1].lbp_daddr); 4471 (void) printf(" log_blk_ent: %llu\n", 4472 (u_longlong_t)l2dhdr.dh_log_entries); 4473 (void) printf(" start: %llu\n", 4474 (u_longlong_t)l2dhdr.dh_start); 4475 (void) printf(" end: %llu\n", 4476 (u_longlong_t)l2dhdr.dh_end); 4477 (void) printf(" evict: %llu\n", 4478 (u_longlong_t)l2dhdr.dh_evict); 4479 (void) printf(" lb_asize_refcount: %llu\n", 4480 (u_longlong_t)l2dhdr.dh_lb_asize); 4481 (void) printf(" lb_count_refcount: %llu\n", 4482 (u_longlong_t)l2dhdr.dh_lb_count); 4483 (void) printf(" trim_action_time: %llu\n", 4484 (u_longlong_t)l2dhdr.dh_trim_action_time); 4485 (void) printf(" trim_state: %llu\n\n", 4486 (u_longlong_t)l2dhdr.dh_trim_state); 4487 } 4488 4489 dump_l2arc_log_blocks(fd, l2dhdr, &rebuild); 4490 /* 4491 * The total aligned size of log blocks and the number of log blocks 4492 * reported in the header of the device may be less than what zdb 4493 * reports by dump_l2arc_log_blocks() which emulates l2arc_rebuild(). 4494 * This happens because dump_l2arc_log_blocks() lacks the memory 4495 * pressure valve that l2arc_rebuild() has. Thus, if we are on a system 4496 * with low memory, l2arc_rebuild will exit prematurely and dh_lb_asize 4497 * and dh_lb_count will be lower to begin with than what exists on the 4498 * device. This is normal and zdb should not exit with an error. The 4499 * opposite case should never happen though, the values reported in the 4500 * header should never be higher than what dump_l2arc_log_blocks() and 4501 * l2arc_rebuild() report. If this happens there is a leak in the 4502 * accounting of log blocks. 4503 */ 4504 if (l2dhdr.dh_lb_asize > rebuild.dh_lb_asize || 4505 l2dhdr.dh_lb_count > rebuild.dh_lb_count) 4506 return (1); 4507 4508 return (0); 4509 } 4510 4511 static void 4512 dump_config_from_label(zdb_label_t *label, size_t buflen, int l) 4513 { 4514 if (dump_opt['q']) 4515 return; 4516 4517 if ((dump_opt['l'] < 3) && (first_label(label->config) != l)) 4518 return; 4519 4520 print_label_header(label, l); 4521 dump_nvlist(label->config_nv, 4); 4522 print_label_numbers(" labels = ", label->config); 4523 4524 if (dump_opt['l'] >= 2) 4525 dump_nvlist_stats(label->config_nv, buflen); 4526 } 4527 4528 #define ZDB_MAX_UB_HEADER_SIZE 32 4529 4530 static void 4531 dump_label_uberblocks(zdb_label_t *label, uint64_t ashift, int label_num) 4532 { 4533 4534 vdev_t vd; 4535 char header[ZDB_MAX_UB_HEADER_SIZE]; 4536 4537 vd.vdev_ashift = ashift; 4538 vd.vdev_top = &vd; 4539 4540 for (int i = 0; i < VDEV_UBERBLOCK_COUNT(&vd); i++) { 4541 uint64_t uoff = VDEV_UBERBLOCK_OFFSET(&vd, i); 4542 uberblock_t *ub = (void *)((char *)&label->label + uoff); 4543 cksum_record_t *rec = label->uberblocks[i]; 4544 4545 if (rec == NULL) { 4546 if (dump_opt['u'] >= 2) { 4547 print_label_header(label, label_num); 4548 (void) printf(" Uberblock[%d] invalid\n", i); 4549 } 4550 continue; 4551 } 4552 4553 if ((dump_opt['u'] < 3) && (first_label(rec) != label_num)) 4554 continue; 4555 4556 if ((dump_opt['u'] < 4) && 4557 (ub->ub_mmp_magic == MMP_MAGIC) && ub->ub_mmp_delay && 4558 (i >= VDEV_UBERBLOCK_COUNT(&vd) - MMP_BLOCKS_PER_LABEL)) 4559 continue; 4560 4561 print_label_header(label, label_num); 4562 (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE, 4563 " Uberblock[%d]\n", i); 4564 dump_uberblock(ub, header, ""); 4565 print_label_numbers(" labels = ", rec); 4566 } 4567 } 4568 4569 static char curpath[PATH_MAX]; 4570 4571 /* 4572 * Iterate through the path components, recursively passing 4573 * current one's obj and remaining path until we find the obj 4574 * for the last one. 4575 */ 4576 static int 4577 dump_path_impl(objset_t *os, uint64_t obj, char *name, uint64_t *retobj) 4578 { 4579 int err; 4580 boolean_t header = B_TRUE; 4581 uint64_t child_obj; 4582 char *s; 4583 dmu_buf_t *db; 4584 dmu_object_info_t doi; 4585 4586 if ((s = strchr(name, '/')) != NULL) 4587 *s = '\0'; 4588 err = zap_lookup(os, obj, name, 8, 1, &child_obj); 4589 4590 (void) strlcat(curpath, name, sizeof (curpath)); 4591 4592 if (err != 0) { 4593 (void) fprintf(stderr, "failed to lookup %s: %s\n", 4594 curpath, strerror(err)); 4595 return (err); 4596 } 4597 4598 child_obj = ZFS_DIRENT_OBJ(child_obj); 4599 err = sa_buf_hold(os, child_obj, FTAG, &db); 4600 if (err != 0) { 4601 (void) fprintf(stderr, 4602 "failed to get SA dbuf for obj %llu: %s\n", 4603 (u_longlong_t)child_obj, strerror(err)); 4604 return (EINVAL); 4605 } 4606 dmu_object_info_from_db(db, &doi); 4607 sa_buf_rele(db, FTAG); 4608 4609 if (doi.doi_bonus_type != DMU_OT_SA && 4610 doi.doi_bonus_type != DMU_OT_ZNODE) { 4611 (void) fprintf(stderr, "invalid bonus type %d for obj %llu\n", 4612 doi.doi_bonus_type, (u_longlong_t)child_obj); 4613 return (EINVAL); 4614 } 4615 4616 if (dump_opt['v'] > 6) { 4617 (void) printf("obj=%llu %s type=%d bonustype=%d\n", 4618 (u_longlong_t)child_obj, curpath, doi.doi_type, 4619 doi.doi_bonus_type); 4620 } 4621 4622 (void) strlcat(curpath, "/", sizeof (curpath)); 4623 4624 switch (doi.doi_type) { 4625 case DMU_OT_DIRECTORY_CONTENTS: 4626 if (s != NULL && *(s + 1) != '\0') 4627 return (dump_path_impl(os, child_obj, s + 1, retobj)); 4628 fallthrough; 4629 case DMU_OT_PLAIN_FILE_CONTENTS: 4630 if (retobj != NULL) { 4631 *retobj = child_obj; 4632 } else { 4633 dump_object(os, child_obj, dump_opt['v'], &header, 4634 NULL, 0); 4635 } 4636 return (0); 4637 default: 4638 (void) fprintf(stderr, "object %llu has non-file/directory " 4639 "type %d\n", (u_longlong_t)obj, doi.doi_type); 4640 break; 4641 } 4642 4643 return (EINVAL); 4644 } 4645 4646 /* 4647 * Dump the blocks for the object specified by path inside the dataset. 4648 */ 4649 static int 4650 dump_path(char *ds, char *path, uint64_t *retobj) 4651 { 4652 int err; 4653 objset_t *os; 4654 uint64_t root_obj; 4655 4656 err = open_objset(ds, FTAG, &os); 4657 if (err != 0) 4658 return (err); 4659 4660 err = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &root_obj); 4661 if (err != 0) { 4662 (void) fprintf(stderr, "can't lookup root znode: %s\n", 4663 strerror(err)); 4664 close_objset(os, FTAG); 4665 return (EINVAL); 4666 } 4667 4668 (void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds); 4669 4670 err = dump_path_impl(os, root_obj, path, retobj); 4671 4672 close_objset(os, FTAG); 4673 return (err); 4674 } 4675 4676 static int 4677 zdb_copy_object(objset_t *os, uint64_t srcobj, char *destfile) 4678 { 4679 int err = 0; 4680 uint64_t size, readsize, oursize, offset; 4681 ssize_t writesize; 4682 sa_handle_t *hdl; 4683 4684 (void) printf("Copying object %" PRIu64 " to file %s\n", srcobj, 4685 destfile); 4686 4687 VERIFY3P(os, ==, sa_os); 4688 if ((err = sa_handle_get(os, srcobj, NULL, SA_HDL_PRIVATE, &hdl))) { 4689 (void) printf("Failed to get handle for SA znode\n"); 4690 return (err); 4691 } 4692 if ((err = sa_lookup(hdl, sa_attr_table[ZPL_SIZE], &size, 8))) { 4693 (void) sa_handle_destroy(hdl); 4694 return (err); 4695 } 4696 (void) sa_handle_destroy(hdl); 4697 4698 (void) printf("Object %" PRIu64 " is %" PRIu64 " bytes\n", srcobj, 4699 size); 4700 if (size == 0) { 4701 return (EINVAL); 4702 } 4703 4704 int fd = open(destfile, O_WRONLY | O_CREAT | O_TRUNC, 0644); 4705 /* 4706 * We cap the size at 1 mebibyte here to prevent 4707 * allocation failures and nigh-infinite printing if the 4708 * object is extremely large. 4709 */ 4710 oursize = MIN(size, 1 << 20); 4711 offset = 0; 4712 char *buf = kmem_alloc(oursize, KM_NOSLEEP); 4713 if (buf == NULL) { 4714 return (ENOMEM); 4715 } 4716 4717 while (offset < size) { 4718 readsize = MIN(size - offset, 1 << 20); 4719 err = dmu_read(os, srcobj, offset, readsize, buf, 0); 4720 if (err != 0) { 4721 (void) printf("got error %u from dmu_read\n", err); 4722 kmem_free(buf, oursize); 4723 return (err); 4724 } 4725 if (dump_opt['v'] > 3) { 4726 (void) printf("Read offset=%" PRIu64 " size=%" PRIu64 4727 " error=%d\n", offset, readsize, err); 4728 } 4729 4730 writesize = write(fd, buf, readsize); 4731 if (writesize < 0) { 4732 err = errno; 4733 break; 4734 } else if (writesize != readsize) { 4735 /* Incomplete write */ 4736 (void) fprintf(stderr, "Short write, only wrote %llu of" 4737 " %" PRIu64 " bytes, exiting...\n", 4738 (u_longlong_t)writesize, readsize); 4739 break; 4740 } 4741 4742 offset += readsize; 4743 } 4744 4745 (void) close(fd); 4746 4747 if (buf != NULL) 4748 kmem_free(buf, oursize); 4749 4750 return (err); 4751 } 4752 4753 static int 4754 dump_label(const char *dev) 4755 { 4756 char path[MAXPATHLEN]; 4757 zdb_label_t labels[VDEV_LABELS]; 4758 uint64_t psize, ashift, l2cache; 4759 struct stat64 statbuf; 4760 boolean_t config_found = B_FALSE; 4761 boolean_t error = B_FALSE; 4762 boolean_t read_l2arc_header = B_FALSE; 4763 avl_tree_t config_tree; 4764 avl_tree_t uberblock_tree; 4765 void *node, *cookie; 4766 int fd; 4767 4768 bzero(labels, sizeof (labels)); 4769 4770 /* 4771 * Check if we were given absolute path and use it as is. 4772 * Otherwise if the provided vdev name doesn't point to a file, 4773 * try prepending expected disk paths and partition numbers. 4774 */ 4775 (void) strlcpy(path, dev, sizeof (path)); 4776 if (dev[0] != '/' && stat64(path, &statbuf) != 0) { 4777 int error; 4778 4779 error = zfs_resolve_shortname(dev, path, MAXPATHLEN); 4780 if (error == 0 && zfs_dev_is_whole_disk(path)) { 4781 if (zfs_append_partition(path, MAXPATHLEN) == -1) 4782 error = ENOENT; 4783 } 4784 4785 if (error || (stat64(path, &statbuf) != 0)) { 4786 (void) printf("failed to find device %s, try " 4787 "specifying absolute path instead\n", dev); 4788 return (1); 4789 } 4790 } 4791 4792 if ((fd = open64(path, O_RDONLY)) < 0) { 4793 (void) printf("cannot open '%s': %s\n", path, strerror(errno)); 4794 exit(1); 4795 } 4796 4797 if (fstat64_blk(fd, &statbuf) != 0) { 4798 (void) printf("failed to stat '%s': %s\n", path, 4799 strerror(errno)); 4800 (void) close(fd); 4801 exit(1); 4802 } 4803 4804 if (S_ISBLK(statbuf.st_mode) && zfs_dev_flush(fd) != 0) 4805 (void) printf("failed to invalidate cache '%s' : %s\n", path, 4806 strerror(errno)); 4807 4808 avl_create(&config_tree, cksum_record_compare, 4809 sizeof (cksum_record_t), offsetof(cksum_record_t, link)); 4810 avl_create(&uberblock_tree, cksum_record_compare, 4811 sizeof (cksum_record_t), offsetof(cksum_record_t, link)); 4812 4813 psize = statbuf.st_size; 4814 psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t)); 4815 ashift = SPA_MINBLOCKSHIFT; 4816 4817 /* 4818 * 1. Read the label from disk 4819 * 2. Unpack the configuration and insert in config tree. 4820 * 3. Traverse all uberblocks and insert in uberblock tree. 4821 */ 4822 for (int l = 0; l < VDEV_LABELS; l++) { 4823 zdb_label_t *label = &labels[l]; 4824 char *buf = label->label.vl_vdev_phys.vp_nvlist; 4825 size_t buflen = sizeof (label->label.vl_vdev_phys.vp_nvlist); 4826 nvlist_t *config; 4827 cksum_record_t *rec; 4828 zio_cksum_t cksum; 4829 vdev_t vd; 4830 4831 if (pread64(fd, &label->label, sizeof (label->label), 4832 vdev_label_offset(psize, l, 0)) != sizeof (label->label)) { 4833 if (!dump_opt['q']) 4834 (void) printf("failed to read label %d\n", l); 4835 label->read_failed = B_TRUE; 4836 error = B_TRUE; 4837 continue; 4838 } 4839 4840 label->read_failed = B_FALSE; 4841 4842 if (nvlist_unpack(buf, buflen, &config, 0) == 0) { 4843 nvlist_t *vdev_tree = NULL; 4844 size_t size; 4845 4846 if ((nvlist_lookup_nvlist(config, 4847 ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) || 4848 (nvlist_lookup_uint64(vdev_tree, 4849 ZPOOL_CONFIG_ASHIFT, &ashift) != 0)) 4850 ashift = SPA_MINBLOCKSHIFT; 4851 4852 if (nvlist_size(config, &size, NV_ENCODE_XDR) != 0) 4853 size = buflen; 4854 4855 /* If the device is a cache device clear the header. */ 4856 if (!read_l2arc_header) { 4857 if (nvlist_lookup_uint64(config, 4858 ZPOOL_CONFIG_POOL_STATE, &l2cache) == 0 && 4859 l2cache == POOL_STATE_L2CACHE) { 4860 read_l2arc_header = B_TRUE; 4861 } 4862 } 4863 4864 fletcher_4_native_varsize(buf, size, &cksum); 4865 rec = cksum_record_insert(&config_tree, &cksum, l); 4866 4867 label->config = rec; 4868 label->config_nv = config; 4869 config_found = B_TRUE; 4870 } else { 4871 error = B_TRUE; 4872 } 4873 4874 vd.vdev_ashift = ashift; 4875 vd.vdev_top = &vd; 4876 4877 for (int i = 0; i < VDEV_UBERBLOCK_COUNT(&vd); i++) { 4878 uint64_t uoff = VDEV_UBERBLOCK_OFFSET(&vd, i); 4879 uberblock_t *ub = (void *)((char *)label + uoff); 4880 4881 if (uberblock_verify(ub)) 4882 continue; 4883 4884 fletcher_4_native_varsize(ub, sizeof (*ub), &cksum); 4885 rec = cksum_record_insert(&uberblock_tree, &cksum, l); 4886 4887 label->uberblocks[i] = rec; 4888 } 4889 } 4890 4891 /* 4892 * Dump the label and uberblocks. 4893 */ 4894 for (int l = 0; l < VDEV_LABELS; l++) { 4895 zdb_label_t *label = &labels[l]; 4896 size_t buflen = sizeof (label->label.vl_vdev_phys.vp_nvlist); 4897 4898 if (label->read_failed == B_TRUE) 4899 continue; 4900 4901 if (label->config_nv) { 4902 dump_config_from_label(label, buflen, l); 4903 } else { 4904 if (!dump_opt['q']) 4905 (void) printf("failed to unpack label %d\n", l); 4906 } 4907 4908 if (dump_opt['u']) 4909 dump_label_uberblocks(label, ashift, l); 4910 4911 nvlist_free(label->config_nv); 4912 } 4913 4914 /* 4915 * Dump the L2ARC header, if existent. 4916 */ 4917 if (read_l2arc_header) 4918 error |= dump_l2arc_header(fd); 4919 4920 cookie = NULL; 4921 while ((node = avl_destroy_nodes(&config_tree, &cookie)) != NULL) 4922 umem_free(node, sizeof (cksum_record_t)); 4923 4924 cookie = NULL; 4925 while ((node = avl_destroy_nodes(&uberblock_tree, &cookie)) != NULL) 4926 umem_free(node, sizeof (cksum_record_t)); 4927 4928 avl_destroy(&config_tree); 4929 avl_destroy(&uberblock_tree); 4930 4931 (void) close(fd); 4932 4933 return (config_found == B_FALSE ? 2 : 4934 (error == B_TRUE ? 1 : 0)); 4935 } 4936 4937 static uint64_t dataset_feature_count[SPA_FEATURES]; 4938 static uint64_t global_feature_count[SPA_FEATURES]; 4939 static uint64_t remap_deadlist_count = 0; 4940 4941 /*ARGSUSED*/ 4942 static int 4943 dump_one_objset(const char *dsname, void *arg) 4944 { 4945 int error; 4946 objset_t *os; 4947 spa_feature_t f; 4948 4949 error = open_objset(dsname, FTAG, &os); 4950 if (error != 0) 4951 return (0); 4952 4953 for (f = 0; f < SPA_FEATURES; f++) { 4954 if (!dsl_dataset_feature_is_active(dmu_objset_ds(os), f)) 4955 continue; 4956 ASSERT(spa_feature_table[f].fi_flags & 4957 ZFEATURE_FLAG_PER_DATASET); 4958 dataset_feature_count[f]++; 4959 } 4960 4961 if (dsl_dataset_remap_deadlist_exists(dmu_objset_ds(os))) { 4962 remap_deadlist_count++; 4963 } 4964 4965 for (dsl_bookmark_node_t *dbn = 4966 avl_first(&dmu_objset_ds(os)->ds_bookmarks); dbn != NULL; 4967 dbn = AVL_NEXT(&dmu_objset_ds(os)->ds_bookmarks, dbn)) { 4968 mos_obj_refd(dbn->dbn_phys.zbm_redaction_obj); 4969 if (dbn->dbn_phys.zbm_redaction_obj != 0) 4970 global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS]++; 4971 if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) 4972 global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN]++; 4973 } 4974 4975 if (dsl_deadlist_is_open(&dmu_objset_ds(os)->ds_dir->dd_livelist) && 4976 !dmu_objset_is_snapshot(os)) { 4977 global_feature_count[SPA_FEATURE_LIVELIST]++; 4978 } 4979 4980 dump_objset(os); 4981 close_objset(os, FTAG); 4982 fuid_table_destroy(); 4983 return (0); 4984 } 4985 4986 /* 4987 * Block statistics. 4988 */ 4989 #define PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2) 4990 typedef struct zdb_blkstats { 4991 uint64_t zb_asize; 4992 uint64_t zb_lsize; 4993 uint64_t zb_psize; 4994 uint64_t zb_count; 4995 uint64_t zb_gangs; 4996 uint64_t zb_ditto_samevdev; 4997 uint64_t zb_ditto_same_ms; 4998 uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE]; 4999 } zdb_blkstats_t; 5000 5001 /* 5002 * Extended object types to report deferred frees and dedup auto-ditto blocks. 5003 */ 5004 #define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0) 5005 #define ZDB_OT_DITTO (DMU_OT_NUMTYPES + 1) 5006 #define ZDB_OT_OTHER (DMU_OT_NUMTYPES + 2) 5007 #define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 3) 5008 5009 static const char *zdb_ot_extname[] = { 5010 "deferred free", 5011 "dedup ditto", 5012 "other", 5013 "Total", 5014 }; 5015 5016 #define ZB_TOTAL DN_MAX_LEVELS 5017 #define SPA_MAX_FOR_16M (SPA_MAXBLOCKSHIFT+1) 5018 5019 typedef struct zdb_cb { 5020 zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1]; 5021 uint64_t zcb_removing_size; 5022 uint64_t zcb_checkpoint_size; 5023 uint64_t zcb_dedup_asize; 5024 uint64_t zcb_dedup_blocks; 5025 uint64_t zcb_psize_count[SPA_MAX_FOR_16M]; 5026 uint64_t zcb_lsize_count[SPA_MAX_FOR_16M]; 5027 uint64_t zcb_asize_count[SPA_MAX_FOR_16M]; 5028 uint64_t zcb_psize_len[SPA_MAX_FOR_16M]; 5029 uint64_t zcb_lsize_len[SPA_MAX_FOR_16M]; 5030 uint64_t zcb_asize_len[SPA_MAX_FOR_16M]; 5031 uint64_t zcb_psize_total; 5032 uint64_t zcb_lsize_total; 5033 uint64_t zcb_asize_total; 5034 uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES]; 5035 uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES] 5036 [BPE_PAYLOAD_SIZE + 1]; 5037 uint64_t zcb_start; 5038 hrtime_t zcb_lastprint; 5039 uint64_t zcb_totalasize; 5040 uint64_t zcb_errors[256]; 5041 int zcb_readfails; 5042 int zcb_haderrors; 5043 spa_t *zcb_spa; 5044 uint32_t **zcb_vd_obsolete_counts; 5045 } zdb_cb_t; 5046 5047 /* test if two DVA offsets from same vdev are within the same metaslab */ 5048 static boolean_t 5049 same_metaslab(spa_t *spa, uint64_t vdev, uint64_t off1, uint64_t off2) 5050 { 5051 vdev_t *vd = vdev_lookup_top(spa, vdev); 5052 uint64_t ms_shift = vd->vdev_ms_shift; 5053 5054 return ((off1 >> ms_shift) == (off2 >> ms_shift)); 5055 } 5056 5057 /* 5058 * Used to simplify reporting of the histogram data. 5059 */ 5060 typedef struct one_histo { 5061 char *name; 5062 uint64_t *count; 5063 uint64_t *len; 5064 uint64_t cumulative; 5065 } one_histo_t; 5066 5067 /* 5068 * The number of separate histograms processed for psize, lsize and asize. 5069 */ 5070 #define NUM_HISTO 3 5071 5072 /* 5073 * This routine will create a fixed column size output of three different 5074 * histograms showing by blocksize of 512 - 2^ SPA_MAX_FOR_16M 5075 * the count, length and cumulative length of the psize, lsize and 5076 * asize blocks. 5077 * 5078 * All three types of blocks are listed on a single line 5079 * 5080 * By default the table is printed in nicenumber format (e.g. 123K) but 5081 * if the '-P' parameter is specified then the full raw number (parseable) 5082 * is printed out. 5083 */ 5084 static void 5085 dump_size_histograms(zdb_cb_t *zcb) 5086 { 5087 /* 5088 * A temporary buffer that allows us to convert a number into 5089 * a string using zdb_nicenumber to allow either raw or human 5090 * readable numbers to be output. 5091 */ 5092 char numbuf[32]; 5093 5094 /* 5095 * Define titles which are used in the headers of the tables 5096 * printed by this routine. 5097 */ 5098 const char blocksize_title1[] = "block"; 5099 const char blocksize_title2[] = "size"; 5100 const char count_title[] = "Count"; 5101 const char length_title[] = "Size"; 5102 const char cumulative_title[] = "Cum."; 5103 5104 /* 5105 * Setup the histogram arrays (psize, lsize, and asize). 5106 */ 5107 one_histo_t parm_histo[NUM_HISTO]; 5108 5109 parm_histo[0].name = "psize"; 5110 parm_histo[0].count = zcb->zcb_psize_count; 5111 parm_histo[0].len = zcb->zcb_psize_len; 5112 parm_histo[0].cumulative = 0; 5113 5114 parm_histo[1].name = "lsize"; 5115 parm_histo[1].count = zcb->zcb_lsize_count; 5116 parm_histo[1].len = zcb->zcb_lsize_len; 5117 parm_histo[1].cumulative = 0; 5118 5119 parm_histo[2].name = "asize"; 5120 parm_histo[2].count = zcb->zcb_asize_count; 5121 parm_histo[2].len = zcb->zcb_asize_len; 5122 parm_histo[2].cumulative = 0; 5123 5124 5125 (void) printf("\nBlock Size Histogram\n"); 5126 /* 5127 * Print the first line titles 5128 */ 5129 if (dump_opt['P']) 5130 (void) printf("\n%s\t", blocksize_title1); 5131 else 5132 (void) printf("\n%7s ", blocksize_title1); 5133 5134 for (int j = 0; j < NUM_HISTO; j++) { 5135 if (dump_opt['P']) { 5136 if (j < NUM_HISTO - 1) { 5137 (void) printf("%s\t\t\t", parm_histo[j].name); 5138 } else { 5139 /* Don't print trailing spaces */ 5140 (void) printf(" %s", parm_histo[j].name); 5141 } 5142 } else { 5143 if (j < NUM_HISTO - 1) { 5144 /* Left aligned strings in the output */ 5145 (void) printf("%-7s ", 5146 parm_histo[j].name); 5147 } else { 5148 /* Don't print trailing spaces */ 5149 (void) printf("%s", parm_histo[j].name); 5150 } 5151 } 5152 } 5153 (void) printf("\n"); 5154 5155 /* 5156 * Print the second line titles 5157 */ 5158 if (dump_opt['P']) { 5159 (void) printf("%s\t", blocksize_title2); 5160 } else { 5161 (void) printf("%7s ", blocksize_title2); 5162 } 5163 5164 for (int i = 0; i < NUM_HISTO; i++) { 5165 if (dump_opt['P']) { 5166 (void) printf("%s\t%s\t%s\t", 5167 count_title, length_title, cumulative_title); 5168 } else { 5169 (void) printf("%7s%7s%7s", 5170 count_title, length_title, cumulative_title); 5171 } 5172 } 5173 (void) printf("\n"); 5174 5175 /* 5176 * Print the rows 5177 */ 5178 for (int i = SPA_MINBLOCKSHIFT; i < SPA_MAX_FOR_16M; i++) { 5179 5180 /* 5181 * Print the first column showing the blocksize 5182 */ 5183 zdb_nicenum((1ULL << i), numbuf, sizeof (numbuf)); 5184 5185 if (dump_opt['P']) { 5186 printf("%s", numbuf); 5187 } else { 5188 printf("%7s:", numbuf); 5189 } 5190 5191 /* 5192 * Print the remaining set of 3 columns per size: 5193 * for psize, lsize and asize 5194 */ 5195 for (int j = 0; j < NUM_HISTO; j++) { 5196 parm_histo[j].cumulative += parm_histo[j].len[i]; 5197 5198 zdb_nicenum(parm_histo[j].count[i], 5199 numbuf, sizeof (numbuf)); 5200 if (dump_opt['P']) 5201 (void) printf("\t%s", numbuf); 5202 else 5203 (void) printf("%7s", numbuf); 5204 5205 zdb_nicenum(parm_histo[j].len[i], 5206 numbuf, sizeof (numbuf)); 5207 if (dump_opt['P']) 5208 (void) printf("\t%s", numbuf); 5209 else 5210 (void) printf("%7s", numbuf); 5211 5212 zdb_nicenum(parm_histo[j].cumulative, 5213 numbuf, sizeof (numbuf)); 5214 if (dump_opt['P']) 5215 (void) printf("\t%s", numbuf); 5216 else 5217 (void) printf("%7s", numbuf); 5218 } 5219 (void) printf("\n"); 5220 } 5221 } 5222 5223 static void 5224 zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, 5225 dmu_object_type_t type) 5226 { 5227 uint64_t refcnt = 0; 5228 int i; 5229 5230 ASSERT(type < ZDB_OT_TOTAL); 5231 5232 if (zilog && zil_bp_tree_add(zilog, bp) != 0) 5233 return; 5234 5235 spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER); 5236 5237 for (i = 0; i < 4; i++) { 5238 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; 5239 int t = (i & 1) ? type : ZDB_OT_TOTAL; 5240 int equal; 5241 zdb_blkstats_t *zb = &zcb->zcb_type[l][t]; 5242 5243 zb->zb_asize += BP_GET_ASIZE(bp); 5244 zb->zb_lsize += BP_GET_LSIZE(bp); 5245 zb->zb_psize += BP_GET_PSIZE(bp); 5246 zb->zb_count++; 5247 5248 /* 5249 * The histogram is only big enough to record blocks up to 5250 * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last, 5251 * "other", bucket. 5252 */ 5253 unsigned idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT; 5254 idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1); 5255 zb->zb_psize_histogram[idx]++; 5256 5257 zb->zb_gangs += BP_COUNT_GANG(bp); 5258 5259 switch (BP_GET_NDVAS(bp)) { 5260 case 2: 5261 if (DVA_GET_VDEV(&bp->blk_dva[0]) == 5262 DVA_GET_VDEV(&bp->blk_dva[1])) { 5263 zb->zb_ditto_samevdev++; 5264 5265 if (same_metaslab(zcb->zcb_spa, 5266 DVA_GET_VDEV(&bp->blk_dva[0]), 5267 DVA_GET_OFFSET(&bp->blk_dva[0]), 5268 DVA_GET_OFFSET(&bp->blk_dva[1]))) 5269 zb->zb_ditto_same_ms++; 5270 } 5271 break; 5272 case 3: 5273 equal = (DVA_GET_VDEV(&bp->blk_dva[0]) == 5274 DVA_GET_VDEV(&bp->blk_dva[1])) + 5275 (DVA_GET_VDEV(&bp->blk_dva[0]) == 5276 DVA_GET_VDEV(&bp->blk_dva[2])) + 5277 (DVA_GET_VDEV(&bp->blk_dva[1]) == 5278 DVA_GET_VDEV(&bp->blk_dva[2])); 5279 if (equal != 0) { 5280 zb->zb_ditto_samevdev++; 5281 5282 if (DVA_GET_VDEV(&bp->blk_dva[0]) == 5283 DVA_GET_VDEV(&bp->blk_dva[1]) && 5284 same_metaslab(zcb->zcb_spa, 5285 DVA_GET_VDEV(&bp->blk_dva[0]), 5286 DVA_GET_OFFSET(&bp->blk_dva[0]), 5287 DVA_GET_OFFSET(&bp->blk_dva[1]))) 5288 zb->zb_ditto_same_ms++; 5289 else if (DVA_GET_VDEV(&bp->blk_dva[0]) == 5290 DVA_GET_VDEV(&bp->blk_dva[2]) && 5291 same_metaslab(zcb->zcb_spa, 5292 DVA_GET_VDEV(&bp->blk_dva[0]), 5293 DVA_GET_OFFSET(&bp->blk_dva[0]), 5294 DVA_GET_OFFSET(&bp->blk_dva[2]))) 5295 zb->zb_ditto_same_ms++; 5296 else if (DVA_GET_VDEV(&bp->blk_dva[1]) == 5297 DVA_GET_VDEV(&bp->blk_dva[2]) && 5298 same_metaslab(zcb->zcb_spa, 5299 DVA_GET_VDEV(&bp->blk_dva[1]), 5300 DVA_GET_OFFSET(&bp->blk_dva[1]), 5301 DVA_GET_OFFSET(&bp->blk_dva[2]))) 5302 zb->zb_ditto_same_ms++; 5303 } 5304 break; 5305 } 5306 } 5307 5308 spa_config_exit(zcb->zcb_spa, SCL_CONFIG, FTAG); 5309 5310 if (BP_IS_EMBEDDED(bp)) { 5311 zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++; 5312 zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)] 5313 [BPE_GET_PSIZE(bp)]++; 5314 return; 5315 } 5316 /* 5317 * The binning histogram bins by powers of two up to 5318 * SPA_MAXBLOCKSIZE rather than creating bins for 5319 * every possible blocksize found in the pool. 5320 */ 5321 int bin = highbit64(BP_GET_PSIZE(bp)) - 1; 5322 5323 zcb->zcb_psize_count[bin]++; 5324 zcb->zcb_psize_len[bin] += BP_GET_PSIZE(bp); 5325 zcb->zcb_psize_total += BP_GET_PSIZE(bp); 5326 5327 bin = highbit64(BP_GET_LSIZE(bp)) - 1; 5328 5329 zcb->zcb_lsize_count[bin]++; 5330 zcb->zcb_lsize_len[bin] += BP_GET_LSIZE(bp); 5331 zcb->zcb_lsize_total += BP_GET_LSIZE(bp); 5332 5333 bin = highbit64(BP_GET_ASIZE(bp)) - 1; 5334 5335 zcb->zcb_asize_count[bin]++; 5336 zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp); 5337 zcb->zcb_asize_total += BP_GET_ASIZE(bp); 5338 5339 if (dump_opt['L']) 5340 return; 5341 5342 if (BP_GET_DEDUP(bp)) { 5343 ddt_t *ddt; 5344 ddt_entry_t *dde; 5345 5346 ddt = ddt_select(zcb->zcb_spa, bp); 5347 ddt_enter(ddt); 5348 dde = ddt_lookup(ddt, bp, B_FALSE); 5349 5350 if (dde == NULL) { 5351 refcnt = 0; 5352 } else { 5353 ddt_phys_t *ddp = ddt_phys_select(dde, bp); 5354 ddt_phys_decref(ddp); 5355 refcnt = ddp->ddp_refcnt; 5356 if (ddt_phys_total_refcnt(dde) == 0) 5357 ddt_remove(ddt, dde); 5358 } 5359 ddt_exit(ddt); 5360 } 5361 5362 VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa, 5363 refcnt ? 0 : spa_min_claim_txg(zcb->zcb_spa), 5364 bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0); 5365 } 5366 5367 static void 5368 zdb_blkptr_done(zio_t *zio) 5369 { 5370 spa_t *spa = zio->io_spa; 5371 blkptr_t *bp = zio->io_bp; 5372 int ioerr = zio->io_error; 5373 zdb_cb_t *zcb = zio->io_private; 5374 zbookmark_phys_t *zb = &zio->io_bookmark; 5375 5376 mutex_enter(&spa->spa_scrub_lock); 5377 spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp); 5378 cv_broadcast(&spa->spa_scrub_io_cv); 5379 5380 if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { 5381 char blkbuf[BP_SPRINTF_LEN]; 5382 5383 zcb->zcb_haderrors = 1; 5384 zcb->zcb_errors[ioerr]++; 5385 5386 if (dump_opt['b'] >= 2) 5387 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 5388 else 5389 blkbuf[0] = '\0'; 5390 5391 (void) printf("zdb_blkptr_cb: " 5392 "Got error %d reading " 5393 "<%llu, %llu, %lld, %llx> %s -- skipping\n", 5394 ioerr, 5395 (u_longlong_t)zb->zb_objset, 5396 (u_longlong_t)zb->zb_object, 5397 (u_longlong_t)zb->zb_level, 5398 (u_longlong_t)zb->zb_blkid, 5399 blkbuf); 5400 } 5401 mutex_exit(&spa->spa_scrub_lock); 5402 5403 abd_free(zio->io_abd); 5404 } 5405 5406 static int 5407 zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 5408 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 5409 { 5410 zdb_cb_t *zcb = arg; 5411 dmu_object_type_t type; 5412 boolean_t is_metadata; 5413 5414 if (zb->zb_level == ZB_DNODE_LEVEL) 5415 return (0); 5416 5417 if (dump_opt['b'] >= 5 && bp->blk_birth > 0) { 5418 char blkbuf[BP_SPRINTF_LEN]; 5419 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 5420 (void) printf("objset %llu object %llu " 5421 "level %lld offset 0x%llx %s\n", 5422 (u_longlong_t)zb->zb_objset, 5423 (u_longlong_t)zb->zb_object, 5424 (longlong_t)zb->zb_level, 5425 (u_longlong_t)blkid2offset(dnp, bp, zb), 5426 blkbuf); 5427 } 5428 5429 if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp)) 5430 return (0); 5431 5432 type = BP_GET_TYPE(bp); 5433 5434 zdb_count_block(zcb, zilog, bp, 5435 (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type); 5436 5437 is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)); 5438 5439 if (!BP_IS_EMBEDDED(bp) && 5440 (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) { 5441 size_t size = BP_GET_PSIZE(bp); 5442 abd_t *abd = abd_alloc(size, B_FALSE); 5443 int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW; 5444 5445 /* If it's an intent log block, failure is expected. */ 5446 if (zb->zb_level == ZB_ZIL_LEVEL) 5447 flags |= ZIO_FLAG_SPECULATIVE; 5448 5449 mutex_enter(&spa->spa_scrub_lock); 5450 while (spa->spa_load_verify_bytes > max_inflight_bytes) 5451 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 5452 spa->spa_load_verify_bytes += size; 5453 mutex_exit(&spa->spa_scrub_lock); 5454 5455 zio_nowait(zio_read(NULL, spa, bp, abd, size, 5456 zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb)); 5457 } 5458 5459 zcb->zcb_readfails = 0; 5460 5461 /* only call gethrtime() every 100 blocks */ 5462 static int iters; 5463 if (++iters > 100) 5464 iters = 0; 5465 else 5466 return (0); 5467 5468 if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) { 5469 uint64_t now = gethrtime(); 5470 char buf[10]; 5471 uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize; 5472 int kb_per_sec = 5473 1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000)); 5474 int sec_remaining = 5475 (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec; 5476 5477 /* make sure nicenum has enough space */ 5478 CTASSERT(sizeof (buf) >= NN_NUMBUF_SZ); 5479 5480 zfs_nicebytes(bytes, buf, sizeof (buf)); 5481 (void) fprintf(stderr, 5482 "\r%5s completed (%4dMB/s) " 5483 "estimated time remaining: %uhr %02umin %02usec ", 5484 buf, kb_per_sec / 1024, 5485 sec_remaining / 60 / 60, 5486 sec_remaining / 60 % 60, 5487 sec_remaining % 60); 5488 5489 zcb->zcb_lastprint = now; 5490 } 5491 5492 return (0); 5493 } 5494 5495 static void 5496 zdb_leak(void *arg, uint64_t start, uint64_t size) 5497 { 5498 vdev_t *vd = arg; 5499 5500 (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n", 5501 (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size); 5502 } 5503 5504 static metaslab_ops_t zdb_metaslab_ops = { 5505 NULL /* alloc */ 5506 }; 5507 5508 /* ARGSUSED */ 5509 static int 5510 load_unflushed_svr_segs_cb(spa_t *spa, space_map_entry_t *sme, 5511 uint64_t txg, void *arg) 5512 { 5513 spa_vdev_removal_t *svr = arg; 5514 5515 uint64_t offset = sme->sme_offset; 5516 uint64_t size = sme->sme_run; 5517 5518 /* skip vdevs we don't care about */ 5519 if (sme->sme_vdev != svr->svr_vdev_id) 5520 return (0); 5521 5522 vdev_t *vd = vdev_lookup_top(spa, sme->sme_vdev); 5523 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 5524 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE); 5525 5526 if (txg < metaslab_unflushed_txg(ms)) 5527 return (0); 5528 5529 if (sme->sme_type == SM_ALLOC) 5530 range_tree_add(svr->svr_allocd_segs, offset, size); 5531 else 5532 range_tree_remove(svr->svr_allocd_segs, offset, size); 5533 5534 return (0); 5535 } 5536 5537 /* ARGSUSED */ 5538 static void 5539 claim_segment_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset, 5540 uint64_t size, void *arg) 5541 { 5542 /* 5543 * This callback was called through a remap from 5544 * a device being removed. Therefore, the vdev that 5545 * this callback is applied to is a concrete 5546 * vdev. 5547 */ 5548 ASSERT(vdev_is_concrete(vd)); 5549 5550 VERIFY0(metaslab_claim_impl(vd, offset, size, 5551 spa_min_claim_txg(vd->vdev_spa))); 5552 } 5553 5554 static void 5555 claim_segment_cb(void *arg, uint64_t offset, uint64_t size) 5556 { 5557 vdev_t *vd = arg; 5558 5559 vdev_indirect_ops.vdev_op_remap(vd, offset, size, 5560 claim_segment_impl_cb, NULL); 5561 } 5562 5563 /* 5564 * After accounting for all allocated blocks that are directly referenced, 5565 * we might have missed a reference to a block from a partially complete 5566 * (and thus unused) indirect mapping object. We perform a secondary pass 5567 * through the metaslabs we have already mapped and claim the destination 5568 * blocks. 5569 */ 5570 static void 5571 zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb) 5572 { 5573 if (dump_opt['L']) 5574 return; 5575 5576 if (spa->spa_vdev_removal == NULL) 5577 return; 5578 5579 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 5580 5581 spa_vdev_removal_t *svr = spa->spa_vdev_removal; 5582 vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id); 5583 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 5584 5585 ASSERT0(range_tree_space(svr->svr_allocd_segs)); 5586 5587 range_tree_t *allocs = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0); 5588 for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) { 5589 metaslab_t *msp = vd->vdev_ms[msi]; 5590 5591 ASSERT0(range_tree_space(allocs)); 5592 if (msp->ms_sm != NULL) 5593 VERIFY0(space_map_load(msp->ms_sm, allocs, SM_ALLOC)); 5594 range_tree_vacate(allocs, range_tree_add, svr->svr_allocd_segs); 5595 } 5596 range_tree_destroy(allocs); 5597 5598 iterate_through_spacemap_logs(spa, load_unflushed_svr_segs_cb, svr); 5599 5600 /* 5601 * Clear everything past what has been synced, 5602 * because we have not allocated mappings for 5603 * it yet. 5604 */ 5605 range_tree_clear(svr->svr_allocd_segs, 5606 vdev_indirect_mapping_max_offset(vim), 5607 vd->vdev_asize - vdev_indirect_mapping_max_offset(vim)); 5608 5609 zcb->zcb_removing_size += range_tree_space(svr->svr_allocd_segs); 5610 range_tree_vacate(svr->svr_allocd_segs, claim_segment_cb, vd); 5611 5612 spa_config_exit(spa, SCL_CONFIG, FTAG); 5613 } 5614 5615 /* ARGSUSED */ 5616 static int 5617 increment_indirect_mapping_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, 5618 dmu_tx_t *tx) 5619 { 5620 zdb_cb_t *zcb = arg; 5621 spa_t *spa = zcb->zcb_spa; 5622 vdev_t *vd; 5623 const dva_t *dva = &bp->blk_dva[0]; 5624 5625 ASSERT(!bp_freed); 5626 ASSERT(!dump_opt['L']); 5627 ASSERT3U(BP_GET_NDVAS(bp), ==, 1); 5628 5629 spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 5630 vd = vdev_lookup_top(zcb->zcb_spa, DVA_GET_VDEV(dva)); 5631 ASSERT3P(vd, !=, NULL); 5632 spa_config_exit(spa, SCL_VDEV, FTAG); 5633 5634 ASSERT(vd->vdev_indirect_config.vic_mapping_object != 0); 5635 ASSERT3P(zcb->zcb_vd_obsolete_counts[vd->vdev_id], !=, NULL); 5636 5637 vdev_indirect_mapping_increment_obsolete_count( 5638 vd->vdev_indirect_mapping, 5639 DVA_GET_OFFSET(dva), DVA_GET_ASIZE(dva), 5640 zcb->zcb_vd_obsolete_counts[vd->vdev_id]); 5641 5642 return (0); 5643 } 5644 5645 static uint32_t * 5646 zdb_load_obsolete_counts(vdev_t *vd) 5647 { 5648 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 5649 spa_t *spa = vd->vdev_spa; 5650 spa_condensing_indirect_phys_t *scip = 5651 &spa->spa_condensing_indirect_phys; 5652 uint64_t obsolete_sm_object; 5653 uint32_t *counts; 5654 5655 VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object)); 5656 EQUIV(obsolete_sm_object != 0, vd->vdev_obsolete_sm != NULL); 5657 counts = vdev_indirect_mapping_load_obsolete_counts(vim); 5658 if (vd->vdev_obsolete_sm != NULL) { 5659 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts, 5660 vd->vdev_obsolete_sm); 5661 } 5662 if (scip->scip_vdev == vd->vdev_id && 5663 scip->scip_prev_obsolete_sm_object != 0) { 5664 space_map_t *prev_obsolete_sm = NULL; 5665 VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset, 5666 scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0)); 5667 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts, 5668 prev_obsolete_sm); 5669 space_map_close(prev_obsolete_sm); 5670 } 5671 return (counts); 5672 } 5673 5674 static void 5675 zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb) 5676 { 5677 ddt_bookmark_t ddb; 5678 ddt_entry_t dde; 5679 int error; 5680 int p; 5681 5682 ASSERT(!dump_opt['L']); 5683 5684 bzero(&ddb, sizeof (ddb)); 5685 while ((error = ddt_walk(spa, &ddb, &dde)) == 0) { 5686 blkptr_t blk; 5687 ddt_phys_t *ddp = dde.dde_phys; 5688 5689 if (ddb.ddb_class == DDT_CLASS_UNIQUE) 5690 return; 5691 5692 ASSERT(ddt_phys_total_refcnt(&dde) > 1); 5693 5694 for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 5695 if (ddp->ddp_phys_birth == 0) 5696 continue; 5697 ddt_bp_create(ddb.ddb_checksum, 5698 &dde.dde_key, ddp, &blk); 5699 if (p == DDT_PHYS_DITTO) { 5700 zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO); 5701 } else { 5702 zcb->zcb_dedup_asize += 5703 BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1); 5704 zcb->zcb_dedup_blocks++; 5705 } 5706 } 5707 ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum]; 5708 ddt_enter(ddt); 5709 VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL); 5710 ddt_exit(ddt); 5711 } 5712 5713 ASSERT(error == ENOENT); 5714 } 5715 5716 typedef struct checkpoint_sm_exclude_entry_arg { 5717 vdev_t *cseea_vd; 5718 uint64_t cseea_checkpoint_size; 5719 } checkpoint_sm_exclude_entry_arg_t; 5720 5721 static int 5722 checkpoint_sm_exclude_entry_cb(space_map_entry_t *sme, void *arg) 5723 { 5724 checkpoint_sm_exclude_entry_arg_t *cseea = arg; 5725 vdev_t *vd = cseea->cseea_vd; 5726 metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift]; 5727 uint64_t end = sme->sme_offset + sme->sme_run; 5728 5729 ASSERT(sme->sme_type == SM_FREE); 5730 5731 /* 5732 * Since the vdev_checkpoint_sm exists in the vdev level 5733 * and the ms_sm space maps exist in the metaslab level, 5734 * an entry in the checkpoint space map could theoretically 5735 * cross the boundaries of the metaslab that it belongs. 5736 * 5737 * In reality, because of the way that we populate and 5738 * manipulate the checkpoint's space maps currently, 5739 * there shouldn't be any entries that cross metaslabs. 5740 * Hence the assertion below. 5741 * 5742 * That said, there is no fundamental requirement that 5743 * the checkpoint's space map entries should not cross 5744 * metaslab boundaries. So if needed we could add code 5745 * that handles metaslab-crossing segments in the future. 5746 */ 5747 VERIFY3U(sme->sme_offset, >=, ms->ms_start); 5748 VERIFY3U(end, <=, ms->ms_start + ms->ms_size); 5749 5750 /* 5751 * By removing the entry from the allocated segments we 5752 * also verify that the entry is there to begin with. 5753 */ 5754 mutex_enter(&ms->ms_lock); 5755 range_tree_remove(ms->ms_allocatable, sme->sme_offset, sme->sme_run); 5756 mutex_exit(&ms->ms_lock); 5757 5758 cseea->cseea_checkpoint_size += sme->sme_run; 5759 return (0); 5760 } 5761 5762 static void 5763 zdb_leak_init_vdev_exclude_checkpoint(vdev_t *vd, zdb_cb_t *zcb) 5764 { 5765 spa_t *spa = vd->vdev_spa; 5766 space_map_t *checkpoint_sm = NULL; 5767 uint64_t checkpoint_sm_obj; 5768 5769 /* 5770 * If there is no vdev_top_zap, we are in a pool whose 5771 * version predates the pool checkpoint feature. 5772 */ 5773 if (vd->vdev_top_zap == 0) 5774 return; 5775 5776 /* 5777 * If there is no reference of the vdev_checkpoint_sm in 5778 * the vdev_top_zap, then one of the following scenarios 5779 * is true: 5780 * 5781 * 1] There is no checkpoint 5782 * 2] There is a checkpoint, but no checkpointed blocks 5783 * have been freed yet 5784 * 3] The current vdev is indirect 5785 * 5786 * In these cases we return immediately. 5787 */ 5788 if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap, 5789 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0) 5790 return; 5791 5792 VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap, 5793 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, sizeof (uint64_t), 1, 5794 &checkpoint_sm_obj)); 5795 5796 checkpoint_sm_exclude_entry_arg_t cseea; 5797 cseea.cseea_vd = vd; 5798 cseea.cseea_checkpoint_size = 0; 5799 5800 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa), 5801 checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift)); 5802 5803 VERIFY0(space_map_iterate(checkpoint_sm, 5804 space_map_length(checkpoint_sm), 5805 checkpoint_sm_exclude_entry_cb, &cseea)); 5806 space_map_close(checkpoint_sm); 5807 5808 zcb->zcb_checkpoint_size += cseea.cseea_checkpoint_size; 5809 } 5810 5811 static void 5812 zdb_leak_init_exclude_checkpoint(spa_t *spa, zdb_cb_t *zcb) 5813 { 5814 ASSERT(!dump_opt['L']); 5815 5816 vdev_t *rvd = spa->spa_root_vdev; 5817 for (uint64_t c = 0; c < rvd->vdev_children; c++) { 5818 ASSERT3U(c, ==, rvd->vdev_child[c]->vdev_id); 5819 zdb_leak_init_vdev_exclude_checkpoint(rvd->vdev_child[c], zcb); 5820 } 5821 } 5822 5823 static int 5824 count_unflushed_space_cb(spa_t *spa, space_map_entry_t *sme, 5825 uint64_t txg, void *arg) 5826 { 5827 int64_t *ualloc_space = arg; 5828 5829 uint64_t offset = sme->sme_offset; 5830 uint64_t vdev_id = sme->sme_vdev; 5831 5832 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 5833 if (!vdev_is_concrete(vd)) 5834 return (0); 5835 5836 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 5837 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE); 5838 5839 if (txg < metaslab_unflushed_txg(ms)) 5840 return (0); 5841 5842 if (sme->sme_type == SM_ALLOC) 5843 *ualloc_space += sme->sme_run; 5844 else 5845 *ualloc_space -= sme->sme_run; 5846 5847 return (0); 5848 } 5849 5850 static int64_t 5851 get_unflushed_alloc_space(spa_t *spa) 5852 { 5853 if (dump_opt['L']) 5854 return (0); 5855 5856 int64_t ualloc_space = 0; 5857 iterate_through_spacemap_logs(spa, count_unflushed_space_cb, 5858 &ualloc_space); 5859 return (ualloc_space); 5860 } 5861 5862 static int 5863 load_unflushed_cb(spa_t *spa, space_map_entry_t *sme, uint64_t txg, void *arg) 5864 { 5865 maptype_t *uic_maptype = arg; 5866 5867 uint64_t offset = sme->sme_offset; 5868 uint64_t size = sme->sme_run; 5869 uint64_t vdev_id = sme->sme_vdev; 5870 5871 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 5872 5873 /* skip indirect vdevs */ 5874 if (!vdev_is_concrete(vd)) 5875 return (0); 5876 5877 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 5878 5879 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE); 5880 ASSERT(*uic_maptype == SM_ALLOC || *uic_maptype == SM_FREE); 5881 5882 if (txg < metaslab_unflushed_txg(ms)) 5883 return (0); 5884 5885 if (*uic_maptype == sme->sme_type) 5886 range_tree_add(ms->ms_allocatable, offset, size); 5887 else 5888 range_tree_remove(ms->ms_allocatable, offset, size); 5889 5890 return (0); 5891 } 5892 5893 static void 5894 load_unflushed_to_ms_allocatables(spa_t *spa, maptype_t maptype) 5895 { 5896 iterate_through_spacemap_logs(spa, load_unflushed_cb, &maptype); 5897 } 5898 5899 static void 5900 load_concrete_ms_allocatable_trees(spa_t *spa, maptype_t maptype) 5901 { 5902 vdev_t *rvd = spa->spa_root_vdev; 5903 for (uint64_t i = 0; i < rvd->vdev_children; i++) { 5904 vdev_t *vd = rvd->vdev_child[i]; 5905 5906 ASSERT3U(i, ==, vd->vdev_id); 5907 5908 if (vd->vdev_ops == &vdev_indirect_ops) 5909 continue; 5910 5911 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 5912 metaslab_t *msp = vd->vdev_ms[m]; 5913 5914 (void) fprintf(stderr, 5915 "\rloading concrete vdev %llu, " 5916 "metaslab %llu of %llu ...", 5917 (longlong_t)vd->vdev_id, 5918 (longlong_t)msp->ms_id, 5919 (longlong_t)vd->vdev_ms_count); 5920 5921 mutex_enter(&msp->ms_lock); 5922 range_tree_vacate(msp->ms_allocatable, NULL, NULL); 5923 5924 /* 5925 * We don't want to spend the CPU manipulating the 5926 * size-ordered tree, so clear the range_tree ops. 5927 */ 5928 msp->ms_allocatable->rt_ops = NULL; 5929 5930 if (msp->ms_sm != NULL) { 5931 VERIFY0(space_map_load(msp->ms_sm, 5932 msp->ms_allocatable, maptype)); 5933 } 5934 if (!msp->ms_loaded) 5935 msp->ms_loaded = B_TRUE; 5936 mutex_exit(&msp->ms_lock); 5937 } 5938 } 5939 5940 load_unflushed_to_ms_allocatables(spa, maptype); 5941 } 5942 5943 /* 5944 * vm_idxp is an in-out parameter which (for indirect vdevs) is the 5945 * index in vim_entries that has the first entry in this metaslab. 5946 * On return, it will be set to the first entry after this metaslab. 5947 */ 5948 static void 5949 load_indirect_ms_allocatable_tree(vdev_t *vd, metaslab_t *msp, 5950 uint64_t *vim_idxp) 5951 { 5952 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 5953 5954 mutex_enter(&msp->ms_lock); 5955 range_tree_vacate(msp->ms_allocatable, NULL, NULL); 5956 5957 /* 5958 * We don't want to spend the CPU manipulating the 5959 * size-ordered tree, so clear the range_tree ops. 5960 */ 5961 msp->ms_allocatable->rt_ops = NULL; 5962 5963 for (; *vim_idxp < vdev_indirect_mapping_num_entries(vim); 5964 (*vim_idxp)++) { 5965 vdev_indirect_mapping_entry_phys_t *vimep = 5966 &vim->vim_entries[*vim_idxp]; 5967 uint64_t ent_offset = DVA_MAPPING_GET_SRC_OFFSET(vimep); 5968 uint64_t ent_len = DVA_GET_ASIZE(&vimep->vimep_dst); 5969 ASSERT3U(ent_offset, >=, msp->ms_start); 5970 if (ent_offset >= msp->ms_start + msp->ms_size) 5971 break; 5972 5973 /* 5974 * Mappings do not cross metaslab boundaries, 5975 * because we create them by walking the metaslabs. 5976 */ 5977 ASSERT3U(ent_offset + ent_len, <=, 5978 msp->ms_start + msp->ms_size); 5979 range_tree_add(msp->ms_allocatable, ent_offset, ent_len); 5980 } 5981 5982 if (!msp->ms_loaded) 5983 msp->ms_loaded = B_TRUE; 5984 mutex_exit(&msp->ms_lock); 5985 } 5986 5987 static void 5988 zdb_leak_init_prepare_indirect_vdevs(spa_t *spa, zdb_cb_t *zcb) 5989 { 5990 ASSERT(!dump_opt['L']); 5991 5992 vdev_t *rvd = spa->spa_root_vdev; 5993 for (uint64_t c = 0; c < rvd->vdev_children; c++) { 5994 vdev_t *vd = rvd->vdev_child[c]; 5995 5996 ASSERT3U(c, ==, vd->vdev_id); 5997 5998 if (vd->vdev_ops != &vdev_indirect_ops) 5999 continue; 6000 6001 /* 6002 * Note: we don't check for mapping leaks on 6003 * removing vdevs because their ms_allocatable's 6004 * are used to look for leaks in allocated space. 6005 */ 6006 zcb->zcb_vd_obsolete_counts[c] = zdb_load_obsolete_counts(vd); 6007 6008 /* 6009 * Normally, indirect vdevs don't have any 6010 * metaslabs. We want to set them up for 6011 * zio_claim(). 6012 */ 6013 vdev_metaslab_group_create(vd); 6014 VERIFY0(vdev_metaslab_init(vd, 0)); 6015 6016 vdev_indirect_mapping_t *vim __maybe_unused = 6017 vd->vdev_indirect_mapping; 6018 uint64_t vim_idx = 0; 6019 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 6020 6021 (void) fprintf(stderr, 6022 "\rloading indirect vdev %llu, " 6023 "metaslab %llu of %llu ...", 6024 (longlong_t)vd->vdev_id, 6025 (longlong_t)vd->vdev_ms[m]->ms_id, 6026 (longlong_t)vd->vdev_ms_count); 6027 6028 load_indirect_ms_allocatable_tree(vd, vd->vdev_ms[m], 6029 &vim_idx); 6030 } 6031 ASSERT3U(vim_idx, ==, vdev_indirect_mapping_num_entries(vim)); 6032 } 6033 } 6034 6035 static void 6036 zdb_leak_init(spa_t *spa, zdb_cb_t *zcb) 6037 { 6038 zcb->zcb_spa = spa; 6039 6040 if (dump_opt['L']) 6041 return; 6042 6043 dsl_pool_t *dp = spa->spa_dsl_pool; 6044 vdev_t *rvd = spa->spa_root_vdev; 6045 6046 /* 6047 * We are going to be changing the meaning of the metaslab's 6048 * ms_allocatable. Ensure that the allocator doesn't try to 6049 * use the tree. 6050 */ 6051 spa->spa_normal_class->mc_ops = &zdb_metaslab_ops; 6052 spa->spa_log_class->mc_ops = &zdb_metaslab_ops; 6053 spa->spa_embedded_log_class->mc_ops = &zdb_metaslab_ops; 6054 6055 zcb->zcb_vd_obsolete_counts = 6056 umem_zalloc(rvd->vdev_children * sizeof (uint32_t *), 6057 UMEM_NOFAIL); 6058 6059 /* 6060 * For leak detection, we overload the ms_allocatable trees 6061 * to contain allocated segments instead of free segments. 6062 * As a result, we can't use the normal metaslab_load/unload 6063 * interfaces. 6064 */ 6065 zdb_leak_init_prepare_indirect_vdevs(spa, zcb); 6066 load_concrete_ms_allocatable_trees(spa, SM_ALLOC); 6067 6068 /* 6069 * On load_concrete_ms_allocatable_trees() we loaded all the 6070 * allocated entries from the ms_sm to the ms_allocatable for 6071 * each metaslab. If the pool has a checkpoint or is in the 6072 * middle of discarding a checkpoint, some of these blocks 6073 * may have been freed but their ms_sm may not have been 6074 * updated because they are referenced by the checkpoint. In 6075 * order to avoid false-positives during leak-detection, we 6076 * go through the vdev's checkpoint space map and exclude all 6077 * its entries from their relevant ms_allocatable. 6078 * 6079 * We also aggregate the space held by the checkpoint and add 6080 * it to zcb_checkpoint_size. 6081 * 6082 * Note that at this point we are also verifying that all the 6083 * entries on the checkpoint_sm are marked as allocated in 6084 * the ms_sm of their relevant metaslab. 6085 * [see comment in checkpoint_sm_exclude_entry_cb()] 6086 */ 6087 zdb_leak_init_exclude_checkpoint(spa, zcb); 6088 ASSERT3U(zcb->zcb_checkpoint_size, ==, spa_get_checkpoint_space(spa)); 6089 6090 /* for cleaner progress output */ 6091 (void) fprintf(stderr, "\n"); 6092 6093 if (bpobj_is_open(&dp->dp_obsolete_bpobj)) { 6094 ASSERT(spa_feature_is_enabled(spa, 6095 SPA_FEATURE_DEVICE_REMOVAL)); 6096 (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj, 6097 increment_indirect_mapping_cb, zcb, NULL); 6098 } 6099 6100 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 6101 zdb_ddt_leak_init(spa, zcb); 6102 spa_config_exit(spa, SCL_CONFIG, FTAG); 6103 } 6104 6105 static boolean_t 6106 zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb) 6107 { 6108 boolean_t leaks = B_FALSE; 6109 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 6110 uint64_t total_leaked = 0; 6111 boolean_t are_precise = B_FALSE; 6112 6113 ASSERT(vim != NULL); 6114 6115 for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) { 6116 vdev_indirect_mapping_entry_phys_t *vimep = 6117 &vim->vim_entries[i]; 6118 uint64_t obsolete_bytes = 0; 6119 uint64_t offset = DVA_MAPPING_GET_SRC_OFFSET(vimep); 6120 metaslab_t *msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 6121 6122 /* 6123 * This is not very efficient but it's easy to 6124 * verify correctness. 6125 */ 6126 for (uint64_t inner_offset = 0; 6127 inner_offset < DVA_GET_ASIZE(&vimep->vimep_dst); 6128 inner_offset += 1 << vd->vdev_ashift) { 6129 if (range_tree_contains(msp->ms_allocatable, 6130 offset + inner_offset, 1 << vd->vdev_ashift)) { 6131 obsolete_bytes += 1 << vd->vdev_ashift; 6132 } 6133 } 6134 6135 int64_t bytes_leaked = obsolete_bytes - 6136 zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]; 6137 ASSERT3U(DVA_GET_ASIZE(&vimep->vimep_dst), >=, 6138 zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]); 6139 6140 VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise)); 6141 if (bytes_leaked != 0 && (are_precise || dump_opt['d'] >= 5)) { 6142 (void) printf("obsolete indirect mapping count " 6143 "mismatch on %llu:%llx:%llx : %llx bytes leaked\n", 6144 (u_longlong_t)vd->vdev_id, 6145 (u_longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep), 6146 (u_longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), 6147 (u_longlong_t)bytes_leaked); 6148 } 6149 total_leaked += ABS(bytes_leaked); 6150 } 6151 6152 VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise)); 6153 if (!are_precise && total_leaked > 0) { 6154 int pct_leaked = total_leaked * 100 / 6155 vdev_indirect_mapping_bytes_mapped(vim); 6156 (void) printf("cannot verify obsolete indirect mapping " 6157 "counts of vdev %llu because precise feature was not " 6158 "enabled when it was removed: %d%% (%llx bytes) of mapping" 6159 "unreferenced\n", 6160 (u_longlong_t)vd->vdev_id, pct_leaked, 6161 (u_longlong_t)total_leaked); 6162 } else if (total_leaked > 0) { 6163 (void) printf("obsolete indirect mapping count mismatch " 6164 "for vdev %llu -- %llx total bytes mismatched\n", 6165 (u_longlong_t)vd->vdev_id, 6166 (u_longlong_t)total_leaked); 6167 leaks |= B_TRUE; 6168 } 6169 6170 vdev_indirect_mapping_free_obsolete_counts(vim, 6171 zcb->zcb_vd_obsolete_counts[vd->vdev_id]); 6172 zcb->zcb_vd_obsolete_counts[vd->vdev_id] = NULL; 6173 6174 return (leaks); 6175 } 6176 6177 static boolean_t 6178 zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb) 6179 { 6180 if (dump_opt['L']) 6181 return (B_FALSE); 6182 6183 boolean_t leaks = B_FALSE; 6184 vdev_t *rvd = spa->spa_root_vdev; 6185 for (unsigned c = 0; c < rvd->vdev_children; c++) { 6186 vdev_t *vd = rvd->vdev_child[c]; 6187 6188 if (zcb->zcb_vd_obsolete_counts[c] != NULL) { 6189 leaks |= zdb_check_for_obsolete_leaks(vd, zcb); 6190 } 6191 6192 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 6193 metaslab_t *msp = vd->vdev_ms[m]; 6194 ASSERT3P(msp->ms_group, ==, (msp->ms_group->mg_class == 6195 spa_embedded_log_class(spa)) ? 6196 vd->vdev_log_mg : vd->vdev_mg); 6197 6198 /* 6199 * ms_allocatable has been overloaded 6200 * to contain allocated segments. Now that 6201 * we finished traversing all blocks, any 6202 * block that remains in the ms_allocatable 6203 * represents an allocated block that we 6204 * did not claim during the traversal. 6205 * Claimed blocks would have been removed 6206 * from the ms_allocatable. For indirect 6207 * vdevs, space remaining in the tree 6208 * represents parts of the mapping that are 6209 * not referenced, which is not a bug. 6210 */ 6211 if (vd->vdev_ops == &vdev_indirect_ops) { 6212 range_tree_vacate(msp->ms_allocatable, 6213 NULL, NULL); 6214 } else { 6215 range_tree_vacate(msp->ms_allocatable, 6216 zdb_leak, vd); 6217 } 6218 if (msp->ms_loaded) { 6219 msp->ms_loaded = B_FALSE; 6220 } 6221 } 6222 } 6223 6224 umem_free(zcb->zcb_vd_obsolete_counts, 6225 rvd->vdev_children * sizeof (uint32_t *)); 6226 zcb->zcb_vd_obsolete_counts = NULL; 6227 6228 return (leaks); 6229 } 6230 6231 /* ARGSUSED */ 6232 static int 6233 count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 6234 { 6235 zdb_cb_t *zcb = arg; 6236 6237 if (dump_opt['b'] >= 5) { 6238 char blkbuf[BP_SPRINTF_LEN]; 6239 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 6240 (void) printf("[%s] %s\n", 6241 "deferred free", blkbuf); 6242 } 6243 zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED); 6244 return (0); 6245 } 6246 6247 /* 6248 * Iterate over livelists which have been destroyed by the user but 6249 * are still present in the MOS, waiting to be freed 6250 */ 6251 static void 6252 iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg) 6253 { 6254 objset_t *mos = spa->spa_meta_objset; 6255 uint64_t zap_obj; 6256 int err = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT, 6257 DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &zap_obj); 6258 if (err == ENOENT) 6259 return; 6260 ASSERT0(err); 6261 6262 zap_cursor_t zc; 6263 zap_attribute_t attr; 6264 dsl_deadlist_t ll; 6265 /* NULL out os prior to dsl_deadlist_open in case it's garbage */ 6266 ll.dl_os = NULL; 6267 for (zap_cursor_init(&zc, mos, zap_obj); 6268 zap_cursor_retrieve(&zc, &attr) == 0; 6269 (void) zap_cursor_advance(&zc)) { 6270 dsl_deadlist_open(&ll, mos, attr.za_first_integer); 6271 func(&ll, arg); 6272 dsl_deadlist_close(&ll); 6273 } 6274 zap_cursor_fini(&zc); 6275 } 6276 6277 static int 6278 bpobj_count_block_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, 6279 dmu_tx_t *tx) 6280 { 6281 ASSERT(!bp_freed); 6282 return (count_block_cb(arg, bp, tx)); 6283 } 6284 6285 static int 6286 livelist_entry_count_blocks_cb(void *args, dsl_deadlist_entry_t *dle) 6287 { 6288 zdb_cb_t *zbc = args; 6289 bplist_t blks; 6290 bplist_create(&blks); 6291 /* determine which blocks have been alloc'd but not freed */ 6292 VERIFY0(dsl_process_sub_livelist(&dle->dle_bpobj, &blks, NULL, NULL)); 6293 /* count those blocks */ 6294 (void) bplist_iterate(&blks, count_block_cb, zbc, NULL); 6295 bplist_destroy(&blks); 6296 return (0); 6297 } 6298 6299 static void 6300 livelist_count_blocks(dsl_deadlist_t *ll, void *arg) 6301 { 6302 dsl_deadlist_iterate(ll, livelist_entry_count_blocks_cb, arg); 6303 } 6304 6305 /* 6306 * Count the blocks in the livelists that have been destroyed by the user 6307 * but haven't yet been freed. 6308 */ 6309 static void 6310 deleted_livelists_count_blocks(spa_t *spa, zdb_cb_t *zbc) 6311 { 6312 iterate_deleted_livelists(spa, livelist_count_blocks, zbc); 6313 } 6314 6315 static void 6316 dump_livelist_cb(dsl_deadlist_t *ll, void *arg) 6317 { 6318 ASSERT3P(arg, ==, NULL); 6319 global_feature_count[SPA_FEATURE_LIVELIST]++; 6320 dump_blkptr_list(ll, "Deleted Livelist"); 6321 dsl_deadlist_iterate(ll, sublivelist_verify_lightweight, NULL); 6322 } 6323 6324 /* 6325 * Print out, register object references to, and increment feature counts for 6326 * livelists that have been destroyed by the user but haven't yet been freed. 6327 */ 6328 static void 6329 deleted_livelists_dump_mos(spa_t *spa) 6330 { 6331 uint64_t zap_obj; 6332 objset_t *mos = spa->spa_meta_objset; 6333 int err = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT, 6334 DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &zap_obj); 6335 if (err == ENOENT) 6336 return; 6337 mos_obj_refd(zap_obj); 6338 iterate_deleted_livelists(spa, dump_livelist_cb, NULL); 6339 } 6340 6341 static int 6342 dump_block_stats(spa_t *spa) 6343 { 6344 zdb_cb_t zcb; 6345 zdb_blkstats_t *zb, *tzb; 6346 uint64_t norm_alloc, norm_space, total_alloc, total_found; 6347 int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | 6348 TRAVERSE_NO_DECRYPT | TRAVERSE_HARD; 6349 boolean_t leaks = B_FALSE; 6350 int e, c, err; 6351 bp_embedded_type_t i; 6352 6353 bzero(&zcb, sizeof (zcb)); 6354 (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n", 6355 (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "", 6356 (dump_opt['c'] == 1) ? "metadata " : "", 6357 dump_opt['c'] ? "checksums " : "", 6358 (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "", 6359 !dump_opt['L'] ? "nothing leaked " : ""); 6360 6361 /* 6362 * When leak detection is enabled we load all space maps as SM_ALLOC 6363 * maps, then traverse the pool claiming each block we discover. If 6364 * the pool is perfectly consistent, the segment trees will be empty 6365 * when we're done. Anything left over is a leak; any block we can't 6366 * claim (because it's not part of any space map) is a double 6367 * allocation, reference to a freed block, or an unclaimed log block. 6368 * 6369 * When leak detection is disabled (-L option) we still traverse the 6370 * pool claiming each block we discover, but we skip opening any space 6371 * maps. 6372 */ 6373 bzero(&zcb, sizeof (zdb_cb_t)); 6374 zdb_leak_init(spa, &zcb); 6375 6376 /* 6377 * If there's a deferred-free bplist, process that first. 6378 */ 6379 (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj, 6380 bpobj_count_block_cb, &zcb, NULL); 6381 6382 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { 6383 (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj, 6384 bpobj_count_block_cb, &zcb, NULL); 6385 } 6386 6387 zdb_claim_removing(spa, &zcb); 6388 6389 if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) { 6390 VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset, 6391 spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb, 6392 &zcb, NULL)); 6393 } 6394 6395 deleted_livelists_count_blocks(spa, &zcb); 6396 6397 if (dump_opt['c'] > 1) 6398 flags |= TRAVERSE_PREFETCH_DATA; 6399 6400 zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa)); 6401 zcb.zcb_totalasize += metaslab_class_get_alloc(spa_special_class(spa)); 6402 zcb.zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa)); 6403 zcb.zcb_totalasize += 6404 metaslab_class_get_alloc(spa_embedded_log_class(spa)); 6405 zcb.zcb_start = zcb.zcb_lastprint = gethrtime(); 6406 err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb); 6407 6408 /* 6409 * If we've traversed the data blocks then we need to wait for those 6410 * I/Os to complete. We leverage "The Godfather" zio to wait on 6411 * all async I/Os to complete. 6412 */ 6413 if (dump_opt['c']) { 6414 for (c = 0; c < max_ncpus; c++) { 6415 (void) zio_wait(spa->spa_async_zio_root[c]); 6416 spa->spa_async_zio_root[c] = zio_root(spa, NULL, NULL, 6417 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | 6418 ZIO_FLAG_GODFATHER); 6419 } 6420 } 6421 ASSERT0(spa->spa_load_verify_bytes); 6422 6423 /* 6424 * Done after zio_wait() since zcb_haderrors is modified in 6425 * zdb_blkptr_done() 6426 */ 6427 zcb.zcb_haderrors |= err; 6428 6429 if (zcb.zcb_haderrors) { 6430 (void) printf("\nError counts:\n\n"); 6431 (void) printf("\t%5s %s\n", "errno", "count"); 6432 for (e = 0; e < 256; e++) { 6433 if (zcb.zcb_errors[e] != 0) { 6434 (void) printf("\t%5d %llu\n", 6435 e, (u_longlong_t)zcb.zcb_errors[e]); 6436 } 6437 } 6438 } 6439 6440 /* 6441 * Report any leaked segments. 6442 */ 6443 leaks |= zdb_leak_fini(spa, &zcb); 6444 6445 tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL]; 6446 6447 norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 6448 norm_space = metaslab_class_get_space(spa_normal_class(spa)); 6449 6450 total_alloc = norm_alloc + 6451 metaslab_class_get_alloc(spa_log_class(spa)) + 6452 metaslab_class_get_alloc(spa_embedded_log_class(spa)) + 6453 metaslab_class_get_alloc(spa_special_class(spa)) + 6454 metaslab_class_get_alloc(spa_dedup_class(spa)) + 6455 get_unflushed_alloc_space(spa); 6456 total_found = tzb->zb_asize - zcb.zcb_dedup_asize + 6457 zcb.zcb_removing_size + zcb.zcb_checkpoint_size; 6458 6459 if (total_found == total_alloc && !dump_opt['L']) { 6460 (void) printf("\n\tNo leaks (block sum matches space" 6461 " maps exactly)\n"); 6462 } else if (!dump_opt['L']) { 6463 (void) printf("block traversal size %llu != alloc %llu " 6464 "(%s %lld)\n", 6465 (u_longlong_t)total_found, 6466 (u_longlong_t)total_alloc, 6467 (dump_opt['L']) ? "unreachable" : "leaked", 6468 (longlong_t)(total_alloc - total_found)); 6469 leaks = B_TRUE; 6470 } 6471 6472 if (tzb->zb_count == 0) 6473 return (2); 6474 6475 (void) printf("\n"); 6476 (void) printf("\t%-16s %14llu\n", "bp count:", 6477 (u_longlong_t)tzb->zb_count); 6478 (void) printf("\t%-16s %14llu\n", "ganged count:", 6479 (longlong_t)tzb->zb_gangs); 6480 (void) printf("\t%-16s %14llu avg: %6llu\n", "bp logical:", 6481 (u_longlong_t)tzb->zb_lsize, 6482 (u_longlong_t)(tzb->zb_lsize / tzb->zb_count)); 6483 (void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n", 6484 "bp physical:", (u_longlong_t)tzb->zb_psize, 6485 (u_longlong_t)(tzb->zb_psize / tzb->zb_count), 6486 (double)tzb->zb_lsize / tzb->zb_psize); 6487 (void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n", 6488 "bp allocated:", (u_longlong_t)tzb->zb_asize, 6489 (u_longlong_t)(tzb->zb_asize / tzb->zb_count), 6490 (double)tzb->zb_lsize / tzb->zb_asize); 6491 (void) printf("\t%-16s %14llu ref>1: %6llu deduplication: %6.2f\n", 6492 "bp deduped:", (u_longlong_t)zcb.zcb_dedup_asize, 6493 (u_longlong_t)zcb.zcb_dedup_blocks, 6494 (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0); 6495 (void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:", 6496 (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space); 6497 6498 if (spa_special_class(spa)->mc_allocator[0].mca_rotor != NULL) { 6499 uint64_t alloc = metaslab_class_get_alloc( 6500 spa_special_class(spa)); 6501 uint64_t space = metaslab_class_get_space( 6502 spa_special_class(spa)); 6503 6504 (void) printf("\t%-16s %14llu used: %5.2f%%\n", 6505 "Special class", (u_longlong_t)alloc, 6506 100.0 * alloc / space); 6507 } 6508 6509 if (spa_dedup_class(spa)->mc_allocator[0].mca_rotor != NULL) { 6510 uint64_t alloc = metaslab_class_get_alloc( 6511 spa_dedup_class(spa)); 6512 uint64_t space = metaslab_class_get_space( 6513 spa_dedup_class(spa)); 6514 6515 (void) printf("\t%-16s %14llu used: %5.2f%%\n", 6516 "Dedup class", (u_longlong_t)alloc, 6517 100.0 * alloc / space); 6518 } 6519 6520 if (spa_embedded_log_class(spa)->mc_allocator[0].mca_rotor != NULL) { 6521 uint64_t alloc = metaslab_class_get_alloc( 6522 spa_embedded_log_class(spa)); 6523 uint64_t space = metaslab_class_get_space( 6524 spa_embedded_log_class(spa)); 6525 6526 (void) printf("\t%-16s %14llu used: %5.2f%%\n", 6527 "Embedded log class", (u_longlong_t)alloc, 6528 100.0 * alloc / space); 6529 } 6530 6531 for (i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) { 6532 if (zcb.zcb_embedded_blocks[i] == 0) 6533 continue; 6534 (void) printf("\n"); 6535 (void) printf("\tadditional, non-pointer bps of type %u: " 6536 "%10llu\n", 6537 i, (u_longlong_t)zcb.zcb_embedded_blocks[i]); 6538 6539 if (dump_opt['b'] >= 3) { 6540 (void) printf("\t number of (compressed) bytes: " 6541 "number of bps\n"); 6542 dump_histogram(zcb.zcb_embedded_histogram[i], 6543 sizeof (zcb.zcb_embedded_histogram[i]) / 6544 sizeof (zcb.zcb_embedded_histogram[i][0]), 0); 6545 } 6546 } 6547 6548 if (tzb->zb_ditto_samevdev != 0) { 6549 (void) printf("\tDittoed blocks on same vdev: %llu\n", 6550 (longlong_t)tzb->zb_ditto_samevdev); 6551 } 6552 if (tzb->zb_ditto_same_ms != 0) { 6553 (void) printf("\tDittoed blocks in same metaslab: %llu\n", 6554 (longlong_t)tzb->zb_ditto_same_ms); 6555 } 6556 6557 for (uint64_t v = 0; v < spa->spa_root_vdev->vdev_children; v++) { 6558 vdev_t *vd = spa->spa_root_vdev->vdev_child[v]; 6559 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 6560 6561 if (vim == NULL) { 6562 continue; 6563 } 6564 6565 char mem[32]; 6566 zdb_nicenum(vdev_indirect_mapping_num_entries(vim), 6567 mem, vdev_indirect_mapping_size(vim)); 6568 6569 (void) printf("\tindirect vdev id %llu has %llu segments " 6570 "(%s in memory)\n", 6571 (longlong_t)vd->vdev_id, 6572 (longlong_t)vdev_indirect_mapping_num_entries(vim), mem); 6573 } 6574 6575 if (dump_opt['b'] >= 2) { 6576 int l, t, level; 6577 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" 6578 "\t avg\t comp\t%%Total\tType\n"); 6579 6580 for (t = 0; t <= ZDB_OT_TOTAL; t++) { 6581 char csize[32], lsize[32], psize[32], asize[32]; 6582 char avg[32], gang[32]; 6583 const char *typename; 6584 6585 /* make sure nicenum has enough space */ 6586 CTASSERT(sizeof (csize) >= NN_NUMBUF_SZ); 6587 CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ); 6588 CTASSERT(sizeof (psize) >= NN_NUMBUF_SZ); 6589 CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ); 6590 CTASSERT(sizeof (avg) >= NN_NUMBUF_SZ); 6591 CTASSERT(sizeof (gang) >= NN_NUMBUF_SZ); 6592 6593 if (t < DMU_OT_NUMTYPES) 6594 typename = dmu_ot[t].ot_name; 6595 else 6596 typename = zdb_ot_extname[t - DMU_OT_NUMTYPES]; 6597 6598 if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) { 6599 (void) printf("%6s\t%5s\t%5s\t%5s" 6600 "\t%5s\t%5s\t%6s\t%s\n", 6601 "-", 6602 "-", 6603 "-", 6604 "-", 6605 "-", 6606 "-", 6607 "-", 6608 typename); 6609 continue; 6610 } 6611 6612 for (l = ZB_TOTAL - 1; l >= -1; l--) { 6613 level = (l == -1 ? ZB_TOTAL : l); 6614 zb = &zcb.zcb_type[level][t]; 6615 6616 if (zb->zb_asize == 0) 6617 continue; 6618 6619 if (dump_opt['b'] < 3 && level != ZB_TOTAL) 6620 continue; 6621 6622 if (level == 0 && zb->zb_asize == 6623 zcb.zcb_type[ZB_TOTAL][t].zb_asize) 6624 continue; 6625 6626 zdb_nicenum(zb->zb_count, csize, 6627 sizeof (csize)); 6628 zdb_nicenum(zb->zb_lsize, lsize, 6629 sizeof (lsize)); 6630 zdb_nicenum(zb->zb_psize, psize, 6631 sizeof (psize)); 6632 zdb_nicenum(zb->zb_asize, asize, 6633 sizeof (asize)); 6634 zdb_nicenum(zb->zb_asize / zb->zb_count, avg, 6635 sizeof (avg)); 6636 zdb_nicenum(zb->zb_gangs, gang, sizeof (gang)); 6637 6638 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s" 6639 "\t%5.2f\t%6.2f\t", 6640 csize, lsize, psize, asize, avg, 6641 (double)zb->zb_lsize / zb->zb_psize, 6642 100.0 * zb->zb_asize / tzb->zb_asize); 6643 6644 if (level == ZB_TOTAL) 6645 (void) printf("%s\n", typename); 6646 else 6647 (void) printf(" L%d %s\n", 6648 level, typename); 6649 6650 if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) { 6651 (void) printf("\t number of ganged " 6652 "blocks: %s\n", gang); 6653 } 6654 6655 if (dump_opt['b'] >= 4) { 6656 (void) printf("psize " 6657 "(in 512-byte sectors): " 6658 "number of blocks\n"); 6659 dump_histogram(zb->zb_psize_histogram, 6660 PSIZE_HISTO_SIZE, 0); 6661 } 6662 } 6663 } 6664 6665 /* Output a table summarizing block sizes in the pool */ 6666 if (dump_opt['b'] >= 2) { 6667 dump_size_histograms(&zcb); 6668 } 6669 } 6670 6671 (void) printf("\n"); 6672 6673 if (leaks) 6674 return (2); 6675 6676 if (zcb.zcb_haderrors) 6677 return (3); 6678 6679 return (0); 6680 } 6681 6682 typedef struct zdb_ddt_entry { 6683 ddt_key_t zdde_key; 6684 uint64_t zdde_ref_blocks; 6685 uint64_t zdde_ref_lsize; 6686 uint64_t zdde_ref_psize; 6687 uint64_t zdde_ref_dsize; 6688 avl_node_t zdde_node; 6689 } zdb_ddt_entry_t; 6690 6691 /* ARGSUSED */ 6692 static int 6693 zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 6694 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 6695 { 6696 avl_tree_t *t = arg; 6697 avl_index_t where; 6698 zdb_ddt_entry_t *zdde, zdde_search; 6699 6700 if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) || 6701 BP_IS_EMBEDDED(bp)) 6702 return (0); 6703 6704 if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) { 6705 (void) printf("traversing objset %llu, %llu objects, " 6706 "%lu blocks so far\n", 6707 (u_longlong_t)zb->zb_objset, 6708 (u_longlong_t)BP_GET_FILL(bp), 6709 avl_numnodes(t)); 6710 } 6711 6712 if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF || 6713 BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) 6714 return (0); 6715 6716 ddt_key_fill(&zdde_search.zdde_key, bp); 6717 6718 zdde = avl_find(t, &zdde_search, &where); 6719 6720 if (zdde == NULL) { 6721 zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL); 6722 zdde->zdde_key = zdde_search.zdde_key; 6723 avl_insert(t, zdde, where); 6724 } 6725 6726 zdde->zdde_ref_blocks += 1; 6727 zdde->zdde_ref_lsize += BP_GET_LSIZE(bp); 6728 zdde->zdde_ref_psize += BP_GET_PSIZE(bp); 6729 zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp); 6730 6731 return (0); 6732 } 6733 6734 static void 6735 dump_simulated_ddt(spa_t *spa) 6736 { 6737 avl_tree_t t; 6738 void *cookie = NULL; 6739 zdb_ddt_entry_t *zdde; 6740 ddt_histogram_t ddh_total; 6741 ddt_stat_t dds_total; 6742 6743 bzero(&ddh_total, sizeof (ddh_total)); 6744 bzero(&dds_total, sizeof (dds_total)); 6745 avl_create(&t, ddt_entry_compare, 6746 sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node)); 6747 6748 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 6749 6750 (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | 6751 TRAVERSE_NO_DECRYPT, zdb_ddt_add_cb, &t); 6752 6753 spa_config_exit(spa, SCL_CONFIG, FTAG); 6754 6755 while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) { 6756 ddt_stat_t dds; 6757 uint64_t refcnt = zdde->zdde_ref_blocks; 6758 ASSERT(refcnt != 0); 6759 6760 dds.dds_blocks = zdde->zdde_ref_blocks / refcnt; 6761 dds.dds_lsize = zdde->zdde_ref_lsize / refcnt; 6762 dds.dds_psize = zdde->zdde_ref_psize / refcnt; 6763 dds.dds_dsize = zdde->zdde_ref_dsize / refcnt; 6764 6765 dds.dds_ref_blocks = zdde->zdde_ref_blocks; 6766 dds.dds_ref_lsize = zdde->zdde_ref_lsize; 6767 dds.dds_ref_psize = zdde->zdde_ref_psize; 6768 dds.dds_ref_dsize = zdde->zdde_ref_dsize; 6769 6770 ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1], 6771 &dds, 0); 6772 6773 umem_free(zdde, sizeof (*zdde)); 6774 } 6775 6776 avl_destroy(&t); 6777 6778 ddt_histogram_stat(&dds_total, &ddh_total); 6779 6780 (void) printf("Simulated DDT histogram:\n"); 6781 6782 zpool_dump_ddt(&dds_total, &ddh_total); 6783 6784 dump_dedup_ratio(&dds_total); 6785 } 6786 6787 static int 6788 verify_device_removal_feature_counts(spa_t *spa) 6789 { 6790 uint64_t dr_feature_refcount = 0; 6791 uint64_t oc_feature_refcount = 0; 6792 uint64_t indirect_vdev_count = 0; 6793 uint64_t precise_vdev_count = 0; 6794 uint64_t obsolete_counts_object_count = 0; 6795 uint64_t obsolete_sm_count = 0; 6796 uint64_t obsolete_counts_count = 0; 6797 uint64_t scip_count = 0; 6798 uint64_t obsolete_bpobj_count = 0; 6799 int ret = 0; 6800 6801 spa_condensing_indirect_phys_t *scip = 6802 &spa->spa_condensing_indirect_phys; 6803 if (scip->scip_next_mapping_object != 0) { 6804 vdev_t *vd = spa->spa_root_vdev->vdev_child[scip->scip_vdev]; 6805 ASSERT(scip->scip_prev_obsolete_sm_object != 0); 6806 ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops); 6807 6808 (void) printf("Condensing indirect vdev %llu: new mapping " 6809 "object %llu, prev obsolete sm %llu\n", 6810 (u_longlong_t)scip->scip_vdev, 6811 (u_longlong_t)scip->scip_next_mapping_object, 6812 (u_longlong_t)scip->scip_prev_obsolete_sm_object); 6813 if (scip->scip_prev_obsolete_sm_object != 0) { 6814 space_map_t *prev_obsolete_sm = NULL; 6815 VERIFY0(space_map_open(&prev_obsolete_sm, 6816 spa->spa_meta_objset, 6817 scip->scip_prev_obsolete_sm_object, 6818 0, vd->vdev_asize, 0)); 6819 dump_spacemap(spa->spa_meta_objset, prev_obsolete_sm); 6820 (void) printf("\n"); 6821 space_map_close(prev_obsolete_sm); 6822 } 6823 6824 scip_count += 2; 6825 } 6826 6827 for (uint64_t i = 0; i < spa->spa_root_vdev->vdev_children; i++) { 6828 vdev_t *vd = spa->spa_root_vdev->vdev_child[i]; 6829 vdev_indirect_config_t *vic = &vd->vdev_indirect_config; 6830 6831 if (vic->vic_mapping_object != 0) { 6832 ASSERT(vd->vdev_ops == &vdev_indirect_ops || 6833 vd->vdev_removing); 6834 indirect_vdev_count++; 6835 6836 if (vd->vdev_indirect_mapping->vim_havecounts) { 6837 obsolete_counts_count++; 6838 } 6839 } 6840 6841 boolean_t are_precise; 6842 VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise)); 6843 if (are_precise) { 6844 ASSERT(vic->vic_mapping_object != 0); 6845 precise_vdev_count++; 6846 } 6847 6848 uint64_t obsolete_sm_object; 6849 VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object)); 6850 if (obsolete_sm_object != 0) { 6851 ASSERT(vic->vic_mapping_object != 0); 6852 obsolete_sm_count++; 6853 } 6854 } 6855 6856 (void) feature_get_refcount(spa, 6857 &spa_feature_table[SPA_FEATURE_DEVICE_REMOVAL], 6858 &dr_feature_refcount); 6859 (void) feature_get_refcount(spa, 6860 &spa_feature_table[SPA_FEATURE_OBSOLETE_COUNTS], 6861 &oc_feature_refcount); 6862 6863 if (dr_feature_refcount != indirect_vdev_count) { 6864 ret = 1; 6865 (void) printf("Number of indirect vdevs (%llu) " \ 6866 "does not match feature count (%llu)\n", 6867 (u_longlong_t)indirect_vdev_count, 6868 (u_longlong_t)dr_feature_refcount); 6869 } else { 6870 (void) printf("Verified device_removal feature refcount " \ 6871 "of %llu is correct\n", 6872 (u_longlong_t)dr_feature_refcount); 6873 } 6874 6875 if (zap_contains(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT, 6876 DMU_POOL_OBSOLETE_BPOBJ) == 0) { 6877 obsolete_bpobj_count++; 6878 } 6879 6880 6881 obsolete_counts_object_count = precise_vdev_count; 6882 obsolete_counts_object_count += obsolete_sm_count; 6883 obsolete_counts_object_count += obsolete_counts_count; 6884 obsolete_counts_object_count += scip_count; 6885 obsolete_counts_object_count += obsolete_bpobj_count; 6886 obsolete_counts_object_count += remap_deadlist_count; 6887 6888 if (oc_feature_refcount != obsolete_counts_object_count) { 6889 ret = 1; 6890 (void) printf("Number of obsolete counts objects (%llu) " \ 6891 "does not match feature count (%llu)\n", 6892 (u_longlong_t)obsolete_counts_object_count, 6893 (u_longlong_t)oc_feature_refcount); 6894 (void) printf("pv:%llu os:%llu oc:%llu sc:%llu " 6895 "ob:%llu rd:%llu\n", 6896 (u_longlong_t)precise_vdev_count, 6897 (u_longlong_t)obsolete_sm_count, 6898 (u_longlong_t)obsolete_counts_count, 6899 (u_longlong_t)scip_count, 6900 (u_longlong_t)obsolete_bpobj_count, 6901 (u_longlong_t)remap_deadlist_count); 6902 } else { 6903 (void) printf("Verified indirect_refcount feature refcount " \ 6904 "of %llu is correct\n", 6905 (u_longlong_t)oc_feature_refcount); 6906 } 6907 return (ret); 6908 } 6909 6910 static void 6911 zdb_set_skip_mmp(char *target) 6912 { 6913 spa_t *spa; 6914 6915 /* 6916 * Disable the activity check to allow examination of 6917 * active pools. 6918 */ 6919 mutex_enter(&spa_namespace_lock); 6920 if ((spa = spa_lookup(target)) != NULL) { 6921 spa->spa_import_flags |= ZFS_IMPORT_SKIP_MMP; 6922 } 6923 mutex_exit(&spa_namespace_lock); 6924 } 6925 6926 #define BOGUS_SUFFIX "_CHECKPOINTED_UNIVERSE" 6927 /* 6928 * Import the checkpointed state of the pool specified by the target 6929 * parameter as readonly. The function also accepts a pool config 6930 * as an optional parameter, else it attempts to infer the config by 6931 * the name of the target pool. 6932 * 6933 * Note that the checkpointed state's pool name will be the name of 6934 * the original pool with the above suffix appended to it. In addition, 6935 * if the target is not a pool name (e.g. a path to a dataset) then 6936 * the new_path parameter is populated with the updated path to 6937 * reflect the fact that we are looking into the checkpointed state. 6938 * 6939 * The function returns a newly-allocated copy of the name of the 6940 * pool containing the checkpointed state. When this copy is no 6941 * longer needed it should be freed with free(3C). Same thing 6942 * applies to the new_path parameter if allocated. 6943 */ 6944 static char * 6945 import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path) 6946 { 6947 int error = 0; 6948 char *poolname, *bogus_name = NULL; 6949 boolean_t freecfg = B_FALSE; 6950 6951 /* If the target is not a pool, the extract the pool name */ 6952 char *path_start = strchr(target, '/'); 6953 if (path_start != NULL) { 6954 size_t poolname_len = path_start - target; 6955 poolname = strndup(target, poolname_len); 6956 } else { 6957 poolname = target; 6958 } 6959 6960 if (cfg == NULL) { 6961 zdb_set_skip_mmp(poolname); 6962 error = spa_get_stats(poolname, &cfg, NULL, 0); 6963 if (error != 0) { 6964 fatal("Tried to read config of pool \"%s\" but " 6965 "spa_get_stats() failed with error %d\n", 6966 poolname, error); 6967 } 6968 freecfg = B_TRUE; 6969 } 6970 6971 if (asprintf(&bogus_name, "%s%s", poolname, BOGUS_SUFFIX) == -1) 6972 return (NULL); 6973 fnvlist_add_string(cfg, ZPOOL_CONFIG_POOL_NAME, bogus_name); 6974 6975 error = spa_import(bogus_name, cfg, NULL, 6976 ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT | 6977 ZFS_IMPORT_SKIP_MMP); 6978 if (freecfg) 6979 nvlist_free(cfg); 6980 if (error != 0) { 6981 fatal("Tried to import pool \"%s\" but spa_import() failed " 6982 "with error %d\n", bogus_name, error); 6983 } 6984 6985 if (new_path != NULL && path_start != NULL) { 6986 if (asprintf(new_path, "%s%s", bogus_name, path_start) == -1) { 6987 if (path_start != NULL) 6988 free(poolname); 6989 return (NULL); 6990 } 6991 } 6992 6993 if (target != poolname) 6994 free(poolname); 6995 6996 return (bogus_name); 6997 } 6998 6999 typedef struct verify_checkpoint_sm_entry_cb_arg { 7000 vdev_t *vcsec_vd; 7001 7002 /* the following fields are only used for printing progress */ 7003 uint64_t vcsec_entryid; 7004 uint64_t vcsec_num_entries; 7005 } verify_checkpoint_sm_entry_cb_arg_t; 7006 7007 #define ENTRIES_PER_PROGRESS_UPDATE 10000 7008 7009 static int 7010 verify_checkpoint_sm_entry_cb(space_map_entry_t *sme, void *arg) 7011 { 7012 verify_checkpoint_sm_entry_cb_arg_t *vcsec = arg; 7013 vdev_t *vd = vcsec->vcsec_vd; 7014 metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift]; 7015 uint64_t end = sme->sme_offset + sme->sme_run; 7016 7017 ASSERT(sme->sme_type == SM_FREE); 7018 7019 if ((vcsec->vcsec_entryid % ENTRIES_PER_PROGRESS_UPDATE) == 0) { 7020 (void) fprintf(stderr, 7021 "\rverifying vdev %llu, space map entry %llu of %llu ...", 7022 (longlong_t)vd->vdev_id, 7023 (longlong_t)vcsec->vcsec_entryid, 7024 (longlong_t)vcsec->vcsec_num_entries); 7025 } 7026 vcsec->vcsec_entryid++; 7027 7028 /* 7029 * See comment in checkpoint_sm_exclude_entry_cb() 7030 */ 7031 VERIFY3U(sme->sme_offset, >=, ms->ms_start); 7032 VERIFY3U(end, <=, ms->ms_start + ms->ms_size); 7033 7034 /* 7035 * The entries in the vdev_checkpoint_sm should be marked as 7036 * allocated in the checkpointed state of the pool, therefore 7037 * their respective ms_allocateable trees should not contain them. 7038 */ 7039 mutex_enter(&ms->ms_lock); 7040 range_tree_verify_not_present(ms->ms_allocatable, 7041 sme->sme_offset, sme->sme_run); 7042 mutex_exit(&ms->ms_lock); 7043 7044 return (0); 7045 } 7046 7047 /* 7048 * Verify that all segments in the vdev_checkpoint_sm are allocated 7049 * according to the checkpoint's ms_sm (i.e. are not in the checkpoint's 7050 * ms_allocatable). 7051 * 7052 * Do so by comparing the checkpoint space maps (vdev_checkpoint_sm) of 7053 * each vdev in the current state of the pool to the metaslab space maps 7054 * (ms_sm) of the checkpointed state of the pool. 7055 * 7056 * Note that the function changes the state of the ms_allocatable 7057 * trees of the current spa_t. The entries of these ms_allocatable 7058 * trees are cleared out and then repopulated from with the free 7059 * entries of their respective ms_sm space maps. 7060 */ 7061 static void 7062 verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current) 7063 { 7064 vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev; 7065 vdev_t *current_rvd = current->spa_root_vdev; 7066 7067 load_concrete_ms_allocatable_trees(checkpoint, SM_FREE); 7068 7069 for (uint64_t c = 0; c < ckpoint_rvd->vdev_children; c++) { 7070 vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[c]; 7071 vdev_t *current_vd = current_rvd->vdev_child[c]; 7072 7073 space_map_t *checkpoint_sm = NULL; 7074 uint64_t checkpoint_sm_obj; 7075 7076 if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) { 7077 /* 7078 * Since we don't allow device removal in a pool 7079 * that has a checkpoint, we expect that all removed 7080 * vdevs were removed from the pool before the 7081 * checkpoint. 7082 */ 7083 ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops); 7084 continue; 7085 } 7086 7087 /* 7088 * If the checkpoint space map doesn't exist, then nothing 7089 * here is checkpointed so there's nothing to verify. 7090 */ 7091 if (current_vd->vdev_top_zap == 0 || 7092 zap_contains(spa_meta_objset(current), 7093 current_vd->vdev_top_zap, 7094 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0) 7095 continue; 7096 7097 VERIFY0(zap_lookup(spa_meta_objset(current), 7098 current_vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, 7099 sizeof (uint64_t), 1, &checkpoint_sm_obj)); 7100 7101 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(current), 7102 checkpoint_sm_obj, 0, current_vd->vdev_asize, 7103 current_vd->vdev_ashift)); 7104 7105 verify_checkpoint_sm_entry_cb_arg_t vcsec; 7106 vcsec.vcsec_vd = ckpoint_vd; 7107 vcsec.vcsec_entryid = 0; 7108 vcsec.vcsec_num_entries = 7109 space_map_length(checkpoint_sm) / sizeof (uint64_t); 7110 VERIFY0(space_map_iterate(checkpoint_sm, 7111 space_map_length(checkpoint_sm), 7112 verify_checkpoint_sm_entry_cb, &vcsec)); 7113 if (dump_opt['m'] > 3) 7114 dump_spacemap(current->spa_meta_objset, checkpoint_sm); 7115 space_map_close(checkpoint_sm); 7116 } 7117 7118 /* 7119 * If we've added vdevs since we took the checkpoint, ensure 7120 * that their checkpoint space maps are empty. 7121 */ 7122 if (ckpoint_rvd->vdev_children < current_rvd->vdev_children) { 7123 for (uint64_t c = ckpoint_rvd->vdev_children; 7124 c < current_rvd->vdev_children; c++) { 7125 vdev_t *current_vd = current_rvd->vdev_child[c]; 7126 VERIFY3P(current_vd->vdev_checkpoint_sm, ==, NULL); 7127 } 7128 } 7129 7130 /* for cleaner progress output */ 7131 (void) fprintf(stderr, "\n"); 7132 } 7133 7134 /* 7135 * Verifies that all space that's allocated in the checkpoint is 7136 * still allocated in the current version, by checking that everything 7137 * in checkpoint's ms_allocatable (which is actually allocated, not 7138 * allocatable/free) is not present in current's ms_allocatable. 7139 * 7140 * Note that the function changes the state of the ms_allocatable 7141 * trees of both spas when called. The entries of all ms_allocatable 7142 * trees are cleared out and then repopulated from their respective 7143 * ms_sm space maps. In the checkpointed state we load the allocated 7144 * entries, and in the current state we load the free entries. 7145 */ 7146 static void 7147 verify_checkpoint_ms_spacemaps(spa_t *checkpoint, spa_t *current) 7148 { 7149 vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev; 7150 vdev_t *current_rvd = current->spa_root_vdev; 7151 7152 load_concrete_ms_allocatable_trees(checkpoint, SM_ALLOC); 7153 load_concrete_ms_allocatable_trees(current, SM_FREE); 7154 7155 for (uint64_t i = 0; i < ckpoint_rvd->vdev_children; i++) { 7156 vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[i]; 7157 vdev_t *current_vd = current_rvd->vdev_child[i]; 7158 7159 if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) { 7160 /* 7161 * See comment in verify_checkpoint_vdev_spacemaps() 7162 */ 7163 ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops); 7164 continue; 7165 } 7166 7167 for (uint64_t m = 0; m < ckpoint_vd->vdev_ms_count; m++) { 7168 metaslab_t *ckpoint_msp = ckpoint_vd->vdev_ms[m]; 7169 metaslab_t *current_msp = current_vd->vdev_ms[m]; 7170 7171 (void) fprintf(stderr, 7172 "\rverifying vdev %llu of %llu, " 7173 "metaslab %llu of %llu ...", 7174 (longlong_t)current_vd->vdev_id, 7175 (longlong_t)current_rvd->vdev_children, 7176 (longlong_t)current_vd->vdev_ms[m]->ms_id, 7177 (longlong_t)current_vd->vdev_ms_count); 7178 7179 /* 7180 * We walk through the ms_allocatable trees that 7181 * are loaded with the allocated blocks from the 7182 * ms_sm spacemaps of the checkpoint. For each 7183 * one of these ranges we ensure that none of them 7184 * exists in the ms_allocatable trees of the 7185 * current state which are loaded with the ranges 7186 * that are currently free. 7187 * 7188 * This way we ensure that none of the blocks that 7189 * are part of the checkpoint were freed by mistake. 7190 */ 7191 range_tree_walk(ckpoint_msp->ms_allocatable, 7192 (range_tree_func_t *)range_tree_verify_not_present, 7193 current_msp->ms_allocatable); 7194 } 7195 } 7196 7197 /* for cleaner progress output */ 7198 (void) fprintf(stderr, "\n"); 7199 } 7200 7201 static void 7202 verify_checkpoint_blocks(spa_t *spa) 7203 { 7204 ASSERT(!dump_opt['L']); 7205 7206 spa_t *checkpoint_spa; 7207 char *checkpoint_pool; 7208 int error = 0; 7209 7210 /* 7211 * We import the checkpointed state of the pool (under a different 7212 * name) so we can do verification on it against the current state 7213 * of the pool. 7214 */ 7215 checkpoint_pool = import_checkpointed_state(spa->spa_name, NULL, 7216 NULL); 7217 ASSERT(strcmp(spa->spa_name, checkpoint_pool) != 0); 7218 7219 error = spa_open(checkpoint_pool, &checkpoint_spa, FTAG); 7220 if (error != 0) { 7221 fatal("Tried to open pool \"%s\" but spa_open() failed with " 7222 "error %d\n", checkpoint_pool, error); 7223 } 7224 7225 /* 7226 * Ensure that ranges in the checkpoint space maps of each vdev 7227 * are allocated according to the checkpointed state's metaslab 7228 * space maps. 7229 */ 7230 verify_checkpoint_vdev_spacemaps(checkpoint_spa, spa); 7231 7232 /* 7233 * Ensure that allocated ranges in the checkpoint's metaslab 7234 * space maps remain allocated in the metaslab space maps of 7235 * the current state. 7236 */ 7237 verify_checkpoint_ms_spacemaps(checkpoint_spa, spa); 7238 7239 /* 7240 * Once we are done, we get rid of the checkpointed state. 7241 */ 7242 spa_close(checkpoint_spa, FTAG); 7243 free(checkpoint_pool); 7244 } 7245 7246 static void 7247 dump_leftover_checkpoint_blocks(spa_t *spa) 7248 { 7249 vdev_t *rvd = spa->spa_root_vdev; 7250 7251 for (uint64_t i = 0; i < rvd->vdev_children; i++) { 7252 vdev_t *vd = rvd->vdev_child[i]; 7253 7254 space_map_t *checkpoint_sm = NULL; 7255 uint64_t checkpoint_sm_obj; 7256 7257 if (vd->vdev_top_zap == 0) 7258 continue; 7259 7260 if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap, 7261 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0) 7262 continue; 7263 7264 VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap, 7265 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, 7266 sizeof (uint64_t), 1, &checkpoint_sm_obj)); 7267 7268 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa), 7269 checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift)); 7270 dump_spacemap(spa->spa_meta_objset, checkpoint_sm); 7271 space_map_close(checkpoint_sm); 7272 } 7273 } 7274 7275 static int 7276 verify_checkpoint(spa_t *spa) 7277 { 7278 uberblock_t checkpoint; 7279 int error; 7280 7281 if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) 7282 return (0); 7283 7284 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 7285 DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), 7286 sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); 7287 7288 if (error == ENOENT && !dump_opt['L']) { 7289 /* 7290 * If the feature is active but the uberblock is missing 7291 * then we must be in the middle of discarding the 7292 * checkpoint. 7293 */ 7294 (void) printf("\nPartially discarded checkpoint " 7295 "state found:\n"); 7296 if (dump_opt['m'] > 3) 7297 dump_leftover_checkpoint_blocks(spa); 7298 return (0); 7299 } else if (error != 0) { 7300 (void) printf("lookup error %d when looking for " 7301 "checkpointed uberblock in MOS\n", error); 7302 return (error); 7303 } 7304 dump_uberblock(&checkpoint, "\nCheckpointed uberblock found:\n", "\n"); 7305 7306 if (checkpoint.ub_checkpoint_txg == 0) { 7307 (void) printf("\nub_checkpoint_txg not set in checkpointed " 7308 "uberblock\n"); 7309 error = 3; 7310 } 7311 7312 if (error == 0 && !dump_opt['L']) 7313 verify_checkpoint_blocks(spa); 7314 7315 return (error); 7316 } 7317 7318 /* ARGSUSED */ 7319 static void 7320 mos_leaks_cb(void *arg, uint64_t start, uint64_t size) 7321 { 7322 for (uint64_t i = start; i < size; i++) { 7323 (void) printf("MOS object %llu referenced but not allocated\n", 7324 (u_longlong_t)i); 7325 } 7326 } 7327 7328 static void 7329 mos_obj_refd(uint64_t obj) 7330 { 7331 if (obj != 0 && mos_refd_objs != NULL) 7332 range_tree_add(mos_refd_objs, obj, 1); 7333 } 7334 7335 /* 7336 * Call on a MOS object that may already have been referenced. 7337 */ 7338 static void 7339 mos_obj_refd_multiple(uint64_t obj) 7340 { 7341 if (obj != 0 && mos_refd_objs != NULL && 7342 !range_tree_contains(mos_refd_objs, obj, 1)) 7343 range_tree_add(mos_refd_objs, obj, 1); 7344 } 7345 7346 static void 7347 mos_leak_vdev_top_zap(vdev_t *vd) 7348 { 7349 uint64_t ms_flush_data_obj; 7350 int error = zap_lookup(spa_meta_objset(vd->vdev_spa), 7351 vd->vdev_top_zap, VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS, 7352 sizeof (ms_flush_data_obj), 1, &ms_flush_data_obj); 7353 if (error == ENOENT) 7354 return; 7355 ASSERT0(error); 7356 7357 mos_obj_refd(ms_flush_data_obj); 7358 } 7359 7360 static void 7361 mos_leak_vdev(vdev_t *vd) 7362 { 7363 mos_obj_refd(vd->vdev_dtl_object); 7364 mos_obj_refd(vd->vdev_ms_array); 7365 mos_obj_refd(vd->vdev_indirect_config.vic_births_object); 7366 mos_obj_refd(vd->vdev_indirect_config.vic_mapping_object); 7367 mos_obj_refd(vd->vdev_leaf_zap); 7368 if (vd->vdev_checkpoint_sm != NULL) 7369 mos_obj_refd(vd->vdev_checkpoint_sm->sm_object); 7370 if (vd->vdev_indirect_mapping != NULL) { 7371 mos_obj_refd(vd->vdev_indirect_mapping-> 7372 vim_phys->vimp_counts_object); 7373 } 7374 if (vd->vdev_obsolete_sm != NULL) 7375 mos_obj_refd(vd->vdev_obsolete_sm->sm_object); 7376 7377 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 7378 metaslab_t *ms = vd->vdev_ms[m]; 7379 mos_obj_refd(space_map_object(ms->ms_sm)); 7380 } 7381 7382 if (vd->vdev_top_zap != 0) { 7383 mos_obj_refd(vd->vdev_top_zap); 7384 mos_leak_vdev_top_zap(vd); 7385 } 7386 7387 for (uint64_t c = 0; c < vd->vdev_children; c++) { 7388 mos_leak_vdev(vd->vdev_child[c]); 7389 } 7390 } 7391 7392 static void 7393 mos_leak_log_spacemaps(spa_t *spa) 7394 { 7395 uint64_t spacemap_zap; 7396 int error = zap_lookup(spa_meta_objset(spa), 7397 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_LOG_SPACEMAP_ZAP, 7398 sizeof (spacemap_zap), 1, &spacemap_zap); 7399 if (error == ENOENT) 7400 return; 7401 ASSERT0(error); 7402 7403 mos_obj_refd(spacemap_zap); 7404 for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg); 7405 sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) 7406 mos_obj_refd(sls->sls_sm_obj); 7407 } 7408 7409 static int 7410 dump_mos_leaks(spa_t *spa) 7411 { 7412 int rv = 0; 7413 objset_t *mos = spa->spa_meta_objset; 7414 dsl_pool_t *dp = spa->spa_dsl_pool; 7415 7416 /* Visit and mark all referenced objects in the MOS */ 7417 7418 mos_obj_refd(DMU_POOL_DIRECTORY_OBJECT); 7419 mos_obj_refd(spa->spa_pool_props_object); 7420 mos_obj_refd(spa->spa_config_object); 7421 mos_obj_refd(spa->spa_ddt_stat_object); 7422 mos_obj_refd(spa->spa_feat_desc_obj); 7423 mos_obj_refd(spa->spa_feat_enabled_txg_obj); 7424 mos_obj_refd(spa->spa_feat_for_read_obj); 7425 mos_obj_refd(spa->spa_feat_for_write_obj); 7426 mos_obj_refd(spa->spa_history); 7427 mos_obj_refd(spa->spa_errlog_last); 7428 mos_obj_refd(spa->spa_errlog_scrub); 7429 mos_obj_refd(spa->spa_all_vdev_zaps); 7430 mos_obj_refd(spa->spa_dsl_pool->dp_bptree_obj); 7431 mos_obj_refd(spa->spa_dsl_pool->dp_tmp_userrefs_obj); 7432 mos_obj_refd(spa->spa_dsl_pool->dp_scan->scn_phys.scn_queue_obj); 7433 bpobj_count_refd(&spa->spa_deferred_bpobj); 7434 mos_obj_refd(dp->dp_empty_bpobj); 7435 bpobj_count_refd(&dp->dp_obsolete_bpobj); 7436 bpobj_count_refd(&dp->dp_free_bpobj); 7437 mos_obj_refd(spa->spa_l2cache.sav_object); 7438 mos_obj_refd(spa->spa_spares.sav_object); 7439 7440 if (spa->spa_syncing_log_sm != NULL) 7441 mos_obj_refd(spa->spa_syncing_log_sm->sm_object); 7442 mos_leak_log_spacemaps(spa); 7443 7444 mos_obj_refd(spa->spa_condensing_indirect_phys. 7445 scip_next_mapping_object); 7446 mos_obj_refd(spa->spa_condensing_indirect_phys. 7447 scip_prev_obsolete_sm_object); 7448 if (spa->spa_condensing_indirect_phys.scip_next_mapping_object != 0) { 7449 vdev_indirect_mapping_t *vim = 7450 vdev_indirect_mapping_open(mos, 7451 spa->spa_condensing_indirect_phys.scip_next_mapping_object); 7452 mos_obj_refd(vim->vim_phys->vimp_counts_object); 7453 vdev_indirect_mapping_close(vim); 7454 } 7455 deleted_livelists_dump_mos(spa); 7456 7457 if (dp->dp_origin_snap != NULL) { 7458 dsl_dataset_t *ds; 7459 7460 dsl_pool_config_enter(dp, FTAG); 7461 VERIFY0(dsl_dataset_hold_obj(dp, 7462 dsl_dataset_phys(dp->dp_origin_snap)->ds_next_snap_obj, 7463 FTAG, &ds)); 7464 count_ds_mos_objects(ds); 7465 dump_blkptr_list(&ds->ds_deadlist, "Deadlist"); 7466 dsl_dataset_rele(ds, FTAG); 7467 dsl_pool_config_exit(dp, FTAG); 7468 7469 count_ds_mos_objects(dp->dp_origin_snap); 7470 dump_blkptr_list(&dp->dp_origin_snap->ds_deadlist, "Deadlist"); 7471 } 7472 count_dir_mos_objects(dp->dp_mos_dir); 7473 if (dp->dp_free_dir != NULL) 7474 count_dir_mos_objects(dp->dp_free_dir); 7475 if (dp->dp_leak_dir != NULL) 7476 count_dir_mos_objects(dp->dp_leak_dir); 7477 7478 mos_leak_vdev(spa->spa_root_vdev); 7479 7480 for (uint64_t class = 0; class < DDT_CLASSES; class++) { 7481 for (uint64_t type = 0; type < DDT_TYPES; type++) { 7482 for (uint64_t cksum = 0; 7483 cksum < ZIO_CHECKSUM_FUNCTIONS; cksum++) { 7484 ddt_t *ddt = spa->spa_ddt[cksum]; 7485 mos_obj_refd(ddt->ddt_object[type][class]); 7486 } 7487 } 7488 } 7489 7490 /* 7491 * Visit all allocated objects and make sure they are referenced. 7492 */ 7493 uint64_t object = 0; 7494 while (dmu_object_next(mos, &object, B_FALSE, 0) == 0) { 7495 if (range_tree_contains(mos_refd_objs, object, 1)) { 7496 range_tree_remove(mos_refd_objs, object, 1); 7497 } else { 7498 dmu_object_info_t doi; 7499 const char *name; 7500 dmu_object_info(mos, object, &doi); 7501 if (doi.doi_type & DMU_OT_NEWTYPE) { 7502 dmu_object_byteswap_t bswap = 7503 DMU_OT_BYTESWAP(doi.doi_type); 7504 name = dmu_ot_byteswap[bswap].ob_name; 7505 } else { 7506 name = dmu_ot[doi.doi_type].ot_name; 7507 } 7508 7509 (void) printf("MOS object %llu (%s) leaked\n", 7510 (u_longlong_t)object, name); 7511 rv = 2; 7512 } 7513 } 7514 (void) range_tree_walk(mos_refd_objs, mos_leaks_cb, NULL); 7515 if (!range_tree_is_empty(mos_refd_objs)) 7516 rv = 2; 7517 range_tree_vacate(mos_refd_objs, NULL, NULL); 7518 range_tree_destroy(mos_refd_objs); 7519 return (rv); 7520 } 7521 7522 typedef struct log_sm_obsolete_stats_arg { 7523 uint64_t lsos_current_txg; 7524 7525 uint64_t lsos_total_entries; 7526 uint64_t lsos_valid_entries; 7527 7528 uint64_t lsos_sm_entries; 7529 uint64_t lsos_valid_sm_entries; 7530 } log_sm_obsolete_stats_arg_t; 7531 7532 static int 7533 log_spacemap_obsolete_stats_cb(spa_t *spa, space_map_entry_t *sme, 7534 uint64_t txg, void *arg) 7535 { 7536 log_sm_obsolete_stats_arg_t *lsos = arg; 7537 7538 uint64_t offset = sme->sme_offset; 7539 uint64_t vdev_id = sme->sme_vdev; 7540 7541 if (lsos->lsos_current_txg == 0) { 7542 /* this is the first log */ 7543 lsos->lsos_current_txg = txg; 7544 } else if (lsos->lsos_current_txg < txg) { 7545 /* we just changed log - print stats and reset */ 7546 (void) printf("%-8llu valid entries out of %-8llu - txg %llu\n", 7547 (u_longlong_t)lsos->lsos_valid_sm_entries, 7548 (u_longlong_t)lsos->lsos_sm_entries, 7549 (u_longlong_t)lsos->lsos_current_txg); 7550 lsos->lsos_valid_sm_entries = 0; 7551 lsos->lsos_sm_entries = 0; 7552 lsos->lsos_current_txg = txg; 7553 } 7554 ASSERT3U(lsos->lsos_current_txg, ==, txg); 7555 7556 lsos->lsos_sm_entries++; 7557 lsos->lsos_total_entries++; 7558 7559 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 7560 if (!vdev_is_concrete(vd)) 7561 return (0); 7562 7563 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 7564 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE); 7565 7566 if (txg < metaslab_unflushed_txg(ms)) 7567 return (0); 7568 lsos->lsos_valid_sm_entries++; 7569 lsos->lsos_valid_entries++; 7570 return (0); 7571 } 7572 7573 static void 7574 dump_log_spacemap_obsolete_stats(spa_t *spa) 7575 { 7576 if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) 7577 return; 7578 7579 log_sm_obsolete_stats_arg_t lsos; 7580 bzero(&lsos, sizeof (lsos)); 7581 7582 (void) printf("Log Space Map Obsolete Entry Statistics:\n"); 7583 7584 iterate_through_spacemap_logs(spa, 7585 log_spacemap_obsolete_stats_cb, &lsos); 7586 7587 /* print stats for latest log */ 7588 (void) printf("%-8llu valid entries out of %-8llu - txg %llu\n", 7589 (u_longlong_t)lsos.lsos_valid_sm_entries, 7590 (u_longlong_t)lsos.lsos_sm_entries, 7591 (u_longlong_t)lsos.lsos_current_txg); 7592 7593 (void) printf("%-8llu valid entries out of %-8llu - total\n\n", 7594 (u_longlong_t)lsos.lsos_valid_entries, 7595 (u_longlong_t)lsos.lsos_total_entries); 7596 } 7597 7598 static void 7599 dump_zpool(spa_t *spa) 7600 { 7601 dsl_pool_t *dp = spa_get_dsl(spa); 7602 int rc = 0; 7603 7604 if (dump_opt['y']) { 7605 livelist_metaslab_validate(spa); 7606 } 7607 7608 if (dump_opt['S']) { 7609 dump_simulated_ddt(spa); 7610 return; 7611 } 7612 7613 if (!dump_opt['e'] && dump_opt['C'] > 1) { 7614 (void) printf("\nCached configuration:\n"); 7615 dump_nvlist(spa->spa_config, 8); 7616 } 7617 7618 if (dump_opt['C']) 7619 dump_config(spa); 7620 7621 if (dump_opt['u']) 7622 dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n"); 7623 7624 if (dump_opt['D']) 7625 dump_all_ddts(spa); 7626 7627 if (dump_opt['d'] > 2 || dump_opt['m']) 7628 dump_metaslabs(spa); 7629 if (dump_opt['M']) 7630 dump_metaslab_groups(spa); 7631 if (dump_opt['d'] > 2 || dump_opt['m']) { 7632 dump_log_spacemaps(spa); 7633 dump_log_spacemap_obsolete_stats(spa); 7634 } 7635 7636 if (dump_opt['d'] || dump_opt['i']) { 7637 spa_feature_t f; 7638 mos_refd_objs = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 7639 0); 7640 dump_objset(dp->dp_meta_objset); 7641 7642 if (dump_opt['d'] >= 3) { 7643 dsl_pool_t *dp = spa->spa_dsl_pool; 7644 dump_full_bpobj(&spa->spa_deferred_bpobj, 7645 "Deferred frees", 0); 7646 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { 7647 dump_full_bpobj(&dp->dp_free_bpobj, 7648 "Pool snapshot frees", 0); 7649 } 7650 if (bpobj_is_open(&dp->dp_obsolete_bpobj)) { 7651 ASSERT(spa_feature_is_enabled(spa, 7652 SPA_FEATURE_DEVICE_REMOVAL)); 7653 dump_full_bpobj(&dp->dp_obsolete_bpobj, 7654 "Pool obsolete blocks", 0); 7655 } 7656 7657 if (spa_feature_is_active(spa, 7658 SPA_FEATURE_ASYNC_DESTROY)) { 7659 dump_bptree(spa->spa_meta_objset, 7660 dp->dp_bptree_obj, 7661 "Pool dataset frees"); 7662 } 7663 dump_dtl(spa->spa_root_vdev, 0); 7664 } 7665 7666 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) 7667 global_feature_count[f] = UINT64_MAX; 7668 global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS] = 0; 7669 global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN] = 0; 7670 global_feature_count[SPA_FEATURE_LIVELIST] = 0; 7671 7672 (void) dmu_objset_find(spa_name(spa), dump_one_objset, 7673 NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); 7674 7675 if (rc == 0 && !dump_opt['L']) 7676 rc = dump_mos_leaks(spa); 7677 7678 for (f = 0; f < SPA_FEATURES; f++) { 7679 uint64_t refcount; 7680 7681 uint64_t *arr; 7682 if (!(spa_feature_table[f].fi_flags & 7683 ZFEATURE_FLAG_PER_DATASET)) { 7684 if (global_feature_count[f] == UINT64_MAX) 7685 continue; 7686 if (!spa_feature_is_enabled(spa, f)) { 7687 ASSERT0(global_feature_count[f]); 7688 continue; 7689 } 7690 arr = global_feature_count; 7691 } else { 7692 if (!spa_feature_is_enabled(spa, f)) { 7693 ASSERT0(dataset_feature_count[f]); 7694 continue; 7695 } 7696 arr = dataset_feature_count; 7697 } 7698 if (feature_get_refcount(spa, &spa_feature_table[f], 7699 &refcount) == ENOTSUP) 7700 continue; 7701 if (arr[f] != refcount) { 7702 (void) printf("%s feature refcount mismatch: " 7703 "%lld consumers != %lld refcount\n", 7704 spa_feature_table[f].fi_uname, 7705 (longlong_t)arr[f], (longlong_t)refcount); 7706 rc = 2; 7707 } else { 7708 (void) printf("Verified %s feature refcount " 7709 "of %llu is correct\n", 7710 spa_feature_table[f].fi_uname, 7711 (longlong_t)refcount); 7712 } 7713 } 7714 7715 if (rc == 0) 7716 rc = verify_device_removal_feature_counts(spa); 7717 } 7718 7719 if (rc == 0 && (dump_opt['b'] || dump_opt['c'])) 7720 rc = dump_block_stats(spa); 7721 7722 if (rc == 0) 7723 rc = verify_spacemap_refcounts(spa); 7724 7725 if (dump_opt['s']) 7726 show_pool_stats(spa); 7727 7728 if (dump_opt['h']) 7729 dump_history(spa); 7730 7731 if (rc == 0) 7732 rc = verify_checkpoint(spa); 7733 7734 if (rc != 0) { 7735 dump_debug_buffer(); 7736 exit(rc); 7737 } 7738 } 7739 7740 #define ZDB_FLAG_CHECKSUM 0x0001 7741 #define ZDB_FLAG_DECOMPRESS 0x0002 7742 #define ZDB_FLAG_BSWAP 0x0004 7743 #define ZDB_FLAG_GBH 0x0008 7744 #define ZDB_FLAG_INDIRECT 0x0010 7745 #define ZDB_FLAG_RAW 0x0020 7746 #define ZDB_FLAG_PRINT_BLKPTR 0x0040 7747 #define ZDB_FLAG_VERBOSE 0x0080 7748 7749 static int flagbits[256]; 7750 static char flagbitstr[16]; 7751 7752 static void 7753 zdb_print_blkptr(const blkptr_t *bp, int flags) 7754 { 7755 char blkbuf[BP_SPRINTF_LEN]; 7756 7757 if (flags & ZDB_FLAG_BSWAP) 7758 byteswap_uint64_array((void *)bp, sizeof (blkptr_t)); 7759 7760 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 7761 (void) printf("%s\n", blkbuf); 7762 } 7763 7764 static void 7765 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags) 7766 { 7767 int i; 7768 7769 for (i = 0; i < nbps; i++) 7770 zdb_print_blkptr(&bp[i], flags); 7771 } 7772 7773 static void 7774 zdb_dump_gbh(void *buf, int flags) 7775 { 7776 zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags); 7777 } 7778 7779 static void 7780 zdb_dump_block_raw(void *buf, uint64_t size, int flags) 7781 { 7782 if (flags & ZDB_FLAG_BSWAP) 7783 byteswap_uint64_array(buf, size); 7784 VERIFY(write(fileno(stdout), buf, size) == size); 7785 } 7786 7787 static void 7788 zdb_dump_block(char *label, void *buf, uint64_t size, int flags) 7789 { 7790 uint64_t *d = (uint64_t *)buf; 7791 unsigned nwords = size / sizeof (uint64_t); 7792 int do_bswap = !!(flags & ZDB_FLAG_BSWAP); 7793 unsigned i, j; 7794 const char *hdr; 7795 char *c; 7796 7797 7798 if (do_bswap) 7799 hdr = " 7 6 5 4 3 2 1 0 f e d c b a 9 8"; 7800 else 7801 hdr = " 0 1 2 3 4 5 6 7 8 9 a b c d e f"; 7802 7803 (void) printf("\n%s\n%6s %s 0123456789abcdef\n", label, "", hdr); 7804 7805 #ifdef _LITTLE_ENDIAN 7806 /* correct the endianness */ 7807 do_bswap = !do_bswap; 7808 #endif 7809 for (i = 0; i < nwords; i += 2) { 7810 (void) printf("%06llx: %016llx %016llx ", 7811 (u_longlong_t)(i * sizeof (uint64_t)), 7812 (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]), 7813 (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1])); 7814 7815 c = (char *)&d[i]; 7816 for (j = 0; j < 2 * sizeof (uint64_t); j++) 7817 (void) printf("%c", isprint(c[j]) ? c[j] : '.'); 7818 (void) printf("\n"); 7819 } 7820 } 7821 7822 /* 7823 * There are two acceptable formats: 7824 * leaf_name - For example: c1t0d0 or /tmp/ztest.0a 7825 * child[.child]* - For example: 0.1.1 7826 * 7827 * The second form can be used to specify arbitrary vdevs anywhere 7828 * in the hierarchy. For example, in a pool with a mirror of 7829 * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 . 7830 */ 7831 static vdev_t * 7832 zdb_vdev_lookup(vdev_t *vdev, const char *path) 7833 { 7834 char *s, *p, *q; 7835 unsigned i; 7836 7837 if (vdev == NULL) 7838 return (NULL); 7839 7840 /* First, assume the x.x.x.x format */ 7841 i = strtoul(path, &s, 10); 7842 if (s == path || (s && *s != '.' && *s != '\0')) 7843 goto name; 7844 if (i >= vdev->vdev_children) 7845 return (NULL); 7846 7847 vdev = vdev->vdev_child[i]; 7848 if (s && *s == '\0') 7849 return (vdev); 7850 return (zdb_vdev_lookup(vdev, s+1)); 7851 7852 name: 7853 for (i = 0; i < vdev->vdev_children; i++) { 7854 vdev_t *vc = vdev->vdev_child[i]; 7855 7856 if (vc->vdev_path == NULL) { 7857 vc = zdb_vdev_lookup(vc, path); 7858 if (vc == NULL) 7859 continue; 7860 else 7861 return (vc); 7862 } 7863 7864 p = strrchr(vc->vdev_path, '/'); 7865 p = p ? p + 1 : vc->vdev_path; 7866 q = &vc->vdev_path[strlen(vc->vdev_path) - 2]; 7867 7868 if (strcmp(vc->vdev_path, path) == 0) 7869 return (vc); 7870 if (strcmp(p, path) == 0) 7871 return (vc); 7872 if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0) 7873 return (vc); 7874 } 7875 7876 return (NULL); 7877 } 7878 7879 static int 7880 name_from_objset_id(spa_t *spa, uint64_t objset_id, char *outstr) 7881 { 7882 dsl_dataset_t *ds; 7883 7884 dsl_pool_config_enter(spa->spa_dsl_pool, FTAG); 7885 int error = dsl_dataset_hold_obj(spa->spa_dsl_pool, objset_id, 7886 NULL, &ds); 7887 if (error != 0) { 7888 (void) fprintf(stderr, "failed to hold objset %llu: %s\n", 7889 (u_longlong_t)objset_id, strerror(error)); 7890 dsl_pool_config_exit(spa->spa_dsl_pool, FTAG); 7891 return (error); 7892 } 7893 dsl_dataset_name(ds, outstr); 7894 dsl_dataset_rele(ds, NULL); 7895 dsl_pool_config_exit(spa->spa_dsl_pool, FTAG); 7896 return (0); 7897 } 7898 7899 static boolean_t 7900 zdb_parse_block_sizes(char *sizes, uint64_t *lsize, uint64_t *psize) 7901 { 7902 char *s0, *s1, *tmp = NULL; 7903 7904 if (sizes == NULL) 7905 return (B_FALSE); 7906 7907 s0 = strtok_r(sizes, "/", &tmp); 7908 if (s0 == NULL) 7909 return (B_FALSE); 7910 s1 = strtok_r(NULL, "/", &tmp); 7911 *lsize = strtoull(s0, NULL, 16); 7912 *psize = s1 ? strtoull(s1, NULL, 16) : *lsize; 7913 return (*lsize >= *psize && *psize > 0); 7914 } 7915 7916 #define ZIO_COMPRESS_MASK(alg) (1ULL << (ZIO_COMPRESS_##alg)) 7917 7918 static boolean_t 7919 zdb_decompress_block(abd_t *pabd, void *buf, void *lbuf, uint64_t lsize, 7920 uint64_t psize, int flags) 7921 { 7922 boolean_t exceeded = B_FALSE; 7923 /* 7924 * We don't know how the data was compressed, so just try 7925 * every decompress function at every inflated blocksize. 7926 */ 7927 void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 7928 int cfuncs[ZIO_COMPRESS_FUNCTIONS] = { 0 }; 7929 int *cfuncp = cfuncs; 7930 uint64_t maxlsize = SPA_MAXBLOCKSIZE; 7931 uint64_t mask = ZIO_COMPRESS_MASK(ON) | ZIO_COMPRESS_MASK(OFF) | 7932 ZIO_COMPRESS_MASK(INHERIT) | ZIO_COMPRESS_MASK(EMPTY) | 7933 (getenv("ZDB_NO_ZLE") ? ZIO_COMPRESS_MASK(ZLE) : 0); 7934 *cfuncp++ = ZIO_COMPRESS_LZ4; 7935 *cfuncp++ = ZIO_COMPRESS_LZJB; 7936 mask |= ZIO_COMPRESS_MASK(LZ4) | ZIO_COMPRESS_MASK(LZJB); 7937 for (int c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) 7938 if (((1ULL << c) & mask) == 0) 7939 *cfuncp++ = c; 7940 7941 /* 7942 * On the one hand, with SPA_MAXBLOCKSIZE at 16MB, this 7943 * could take a while and we should let the user know 7944 * we are not stuck. On the other hand, printing progress 7945 * info gets old after a while. User can specify 'v' flag 7946 * to see the progression. 7947 */ 7948 if (lsize == psize) 7949 lsize += SPA_MINBLOCKSIZE; 7950 else 7951 maxlsize = lsize; 7952 for (; lsize <= maxlsize; lsize += SPA_MINBLOCKSIZE) { 7953 for (cfuncp = cfuncs; *cfuncp; cfuncp++) { 7954 if (flags & ZDB_FLAG_VERBOSE) { 7955 (void) fprintf(stderr, 7956 "Trying %05llx -> %05llx (%s)\n", 7957 (u_longlong_t)psize, 7958 (u_longlong_t)lsize, 7959 zio_compress_table[*cfuncp].\ 7960 ci_name); 7961 } 7962 7963 /* 7964 * We randomize lbuf2, and decompress to both 7965 * lbuf and lbuf2. This way, we will know if 7966 * decompression fill exactly to lsize. 7967 */ 7968 VERIFY0(random_get_pseudo_bytes(lbuf2, lsize)); 7969 7970 if (zio_decompress_data(*cfuncp, pabd, 7971 lbuf, psize, lsize, NULL) == 0 && 7972 zio_decompress_data(*cfuncp, pabd, 7973 lbuf2, psize, lsize, NULL) == 0 && 7974 bcmp(lbuf, lbuf2, lsize) == 0) 7975 break; 7976 } 7977 if (*cfuncp != 0) 7978 break; 7979 } 7980 umem_free(lbuf2, SPA_MAXBLOCKSIZE); 7981 7982 if (lsize > maxlsize) { 7983 exceeded = B_TRUE; 7984 } 7985 if (*cfuncp == ZIO_COMPRESS_ZLE) { 7986 printf("\nZLE decompression was selected. If you " 7987 "suspect the results are wrong,\ntry avoiding ZLE " 7988 "by setting and exporting ZDB_NO_ZLE=\"true\"\n"); 7989 } 7990 7991 return (exceeded); 7992 } 7993 7994 /* 7995 * Read a block from a pool and print it out. The syntax of the 7996 * block descriptor is: 7997 * 7998 * pool:vdev_specifier:offset:[lsize/]psize[:flags] 7999 * 8000 * pool - The name of the pool you wish to read from 8001 * vdev_specifier - Which vdev (see comment for zdb_vdev_lookup) 8002 * offset - offset, in hex, in bytes 8003 * size - Amount of data to read, in hex, in bytes 8004 * flags - A string of characters specifying options 8005 * b: Decode a blkptr at given offset within block 8006 * c: Calculate and display checksums 8007 * d: Decompress data before dumping 8008 * e: Byteswap data before dumping 8009 * g: Display data as a gang block header 8010 * i: Display as an indirect block 8011 * r: Dump raw data to stdout 8012 * v: Verbose 8013 * 8014 */ 8015 static void 8016 zdb_read_block(char *thing, spa_t *spa) 8017 { 8018 blkptr_t blk, *bp = &blk; 8019 dva_t *dva = bp->blk_dva; 8020 int flags = 0; 8021 uint64_t offset = 0, psize = 0, lsize = 0, blkptr_offset = 0; 8022 zio_t *zio; 8023 vdev_t *vd; 8024 abd_t *pabd; 8025 void *lbuf, *buf; 8026 char *s, *p, *dup, *vdev, *flagstr, *sizes, *tmp = NULL; 8027 int i, error; 8028 boolean_t borrowed = B_FALSE, found = B_FALSE; 8029 8030 dup = strdup(thing); 8031 s = strtok_r(dup, ":", &tmp); 8032 vdev = s ? s : ""; 8033 s = strtok_r(NULL, ":", &tmp); 8034 offset = strtoull(s ? s : "", NULL, 16); 8035 sizes = strtok_r(NULL, ":", &tmp); 8036 s = strtok_r(NULL, ":", &tmp); 8037 flagstr = strdup(s ? s : ""); 8038 8039 s = NULL; 8040 tmp = NULL; 8041 if (!zdb_parse_block_sizes(sizes, &lsize, &psize)) 8042 s = "invalid size(s)"; 8043 if (!IS_P2ALIGNED(psize, DEV_BSIZE) || !IS_P2ALIGNED(lsize, DEV_BSIZE)) 8044 s = "size must be a multiple of sector size"; 8045 if (!IS_P2ALIGNED(offset, DEV_BSIZE)) 8046 s = "offset must be a multiple of sector size"; 8047 if (s) { 8048 (void) printf("Invalid block specifier: %s - %s\n", thing, s); 8049 goto done; 8050 } 8051 8052 for (s = strtok_r(flagstr, ":", &tmp); 8053 s != NULL; 8054 s = strtok_r(NULL, ":", &tmp)) { 8055 for (i = 0; i < strlen(flagstr); i++) { 8056 int bit = flagbits[(uchar_t)flagstr[i]]; 8057 8058 if (bit == 0) { 8059 (void) printf("***Ignoring flag: %c\n", 8060 (uchar_t)flagstr[i]); 8061 continue; 8062 } 8063 found = B_TRUE; 8064 flags |= bit; 8065 8066 p = &flagstr[i + 1]; 8067 if (*p != ':' && *p != '\0') { 8068 int j = 0, nextbit = flagbits[(uchar_t)*p]; 8069 char *end, offstr[8] = { 0 }; 8070 if ((bit == ZDB_FLAG_PRINT_BLKPTR) && 8071 (nextbit == 0)) { 8072 /* look ahead to isolate the offset */ 8073 while (nextbit == 0 && 8074 strchr(flagbitstr, *p) == NULL) { 8075 offstr[j] = *p; 8076 j++; 8077 if (i + j > strlen(flagstr)) 8078 break; 8079 p++; 8080 nextbit = flagbits[(uchar_t)*p]; 8081 } 8082 blkptr_offset = strtoull(offstr, &end, 8083 16); 8084 i += j; 8085 } else if (nextbit == 0) { 8086 (void) printf("***Ignoring flag arg:" 8087 " '%c'\n", (uchar_t)*p); 8088 } 8089 } 8090 } 8091 } 8092 if (blkptr_offset % sizeof (blkptr_t)) { 8093 printf("Block pointer offset 0x%llx " 8094 "must be divisible by 0x%x\n", 8095 (longlong_t)blkptr_offset, (int)sizeof (blkptr_t)); 8096 goto done; 8097 } 8098 if (found == B_FALSE && strlen(flagstr) > 0) { 8099 printf("Invalid flag arg: '%s'\n", flagstr); 8100 goto done; 8101 } 8102 8103 vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev); 8104 if (vd == NULL) { 8105 (void) printf("***Invalid vdev: %s\n", vdev); 8106 free(dup); 8107 return; 8108 } else { 8109 if (vd->vdev_path) 8110 (void) fprintf(stderr, "Found vdev: %s\n", 8111 vd->vdev_path); 8112 else 8113 (void) fprintf(stderr, "Found vdev type: %s\n", 8114 vd->vdev_ops->vdev_op_type); 8115 } 8116 8117 pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE); 8118 lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 8119 8120 BP_ZERO(bp); 8121 8122 DVA_SET_VDEV(&dva[0], vd->vdev_id); 8123 DVA_SET_OFFSET(&dva[0], offset); 8124 DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH)); 8125 DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize)); 8126 8127 BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL); 8128 8129 BP_SET_LSIZE(bp, lsize); 8130 BP_SET_PSIZE(bp, psize); 8131 BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF); 8132 BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF); 8133 BP_SET_TYPE(bp, DMU_OT_NONE); 8134 BP_SET_LEVEL(bp, 0); 8135 BP_SET_DEDUP(bp, 0); 8136 BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); 8137 8138 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 8139 zio = zio_root(spa, NULL, NULL, 0); 8140 8141 if (vd == vd->vdev_top) { 8142 /* 8143 * Treat this as a normal block read. 8144 */ 8145 zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL, 8146 ZIO_PRIORITY_SYNC_READ, 8147 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL)); 8148 } else { 8149 /* 8150 * Treat this as a vdev child I/O. 8151 */ 8152 zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd, 8153 psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, 8154 ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_PROPAGATE | 8155 ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | 8156 ZIO_FLAG_OPTIONAL, NULL, NULL)); 8157 } 8158 8159 error = zio_wait(zio); 8160 spa_config_exit(spa, SCL_STATE, FTAG); 8161 8162 if (error) { 8163 (void) printf("Read of %s failed, error: %d\n", thing, error); 8164 goto out; 8165 } 8166 8167 uint64_t orig_lsize = lsize; 8168 buf = lbuf; 8169 if (flags & ZDB_FLAG_DECOMPRESS) { 8170 boolean_t failed = zdb_decompress_block(pabd, buf, lbuf, 8171 lsize, psize, flags); 8172 if (failed) { 8173 (void) printf("Decompress of %s failed\n", thing); 8174 goto out; 8175 } 8176 } else { 8177 buf = abd_borrow_buf_copy(pabd, lsize); 8178 borrowed = B_TRUE; 8179 } 8180 /* 8181 * Try to detect invalid block pointer. If invalid, try 8182 * decompressing. 8183 */ 8184 if ((flags & ZDB_FLAG_PRINT_BLKPTR || flags & ZDB_FLAG_INDIRECT) && 8185 !(flags & ZDB_FLAG_DECOMPRESS)) { 8186 const blkptr_t *b = (const blkptr_t *)(void *) 8187 ((uintptr_t)buf + (uintptr_t)blkptr_offset); 8188 if (zfs_blkptr_verify(spa, b, B_FALSE, BLK_VERIFY_ONLY) == 8189 B_FALSE) { 8190 abd_return_buf_copy(pabd, buf, lsize); 8191 borrowed = B_FALSE; 8192 buf = lbuf; 8193 boolean_t failed = zdb_decompress_block(pabd, buf, 8194 lbuf, lsize, psize, flags); 8195 b = (const blkptr_t *)(void *) 8196 ((uintptr_t)buf + (uintptr_t)blkptr_offset); 8197 if (failed || zfs_blkptr_verify(spa, b, B_FALSE, 8198 BLK_VERIFY_LOG) == B_FALSE) { 8199 printf("invalid block pointer at this DVA\n"); 8200 goto out; 8201 } 8202 } 8203 } 8204 8205 if (flags & ZDB_FLAG_PRINT_BLKPTR) 8206 zdb_print_blkptr((blkptr_t *)(void *) 8207 ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags); 8208 else if (flags & ZDB_FLAG_RAW) 8209 zdb_dump_block_raw(buf, lsize, flags); 8210 else if (flags & ZDB_FLAG_INDIRECT) 8211 zdb_dump_indirect((blkptr_t *)buf, 8212 orig_lsize / sizeof (blkptr_t), flags); 8213 else if (flags & ZDB_FLAG_GBH) 8214 zdb_dump_gbh(buf, flags); 8215 else 8216 zdb_dump_block(thing, buf, lsize, flags); 8217 8218 /* 8219 * If :c was specified, iterate through the checksum table to 8220 * calculate and display each checksum for our specified 8221 * DVA and length. 8222 */ 8223 if ((flags & ZDB_FLAG_CHECKSUM) && !(flags & ZDB_FLAG_RAW) && 8224 !(flags & ZDB_FLAG_GBH)) { 8225 zio_t *czio; 8226 (void) printf("\n"); 8227 for (enum zio_checksum ck = ZIO_CHECKSUM_LABEL; 8228 ck < ZIO_CHECKSUM_FUNCTIONS; ck++) { 8229 8230 if ((zio_checksum_table[ck].ci_flags & 8231 ZCHECKSUM_FLAG_EMBEDDED) || 8232 ck == ZIO_CHECKSUM_NOPARITY) { 8233 continue; 8234 } 8235 BP_SET_CHECKSUM(bp, ck); 8236 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 8237 czio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 8238 czio->io_bp = bp; 8239 8240 if (vd == vd->vdev_top) { 8241 zio_nowait(zio_read(czio, spa, bp, pabd, psize, 8242 NULL, NULL, 8243 ZIO_PRIORITY_SYNC_READ, 8244 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | 8245 ZIO_FLAG_DONT_RETRY, NULL)); 8246 } else { 8247 zio_nowait(zio_vdev_child_io(czio, bp, vd, 8248 offset, pabd, psize, ZIO_TYPE_READ, 8249 ZIO_PRIORITY_SYNC_READ, 8250 ZIO_FLAG_DONT_CACHE | 8251 ZIO_FLAG_DONT_PROPAGATE | 8252 ZIO_FLAG_DONT_RETRY | 8253 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | 8254 ZIO_FLAG_SPECULATIVE | 8255 ZIO_FLAG_OPTIONAL, NULL, NULL)); 8256 } 8257 error = zio_wait(czio); 8258 if (error == 0 || error == ECKSUM) { 8259 zio_t *ck_zio = zio_root(spa, NULL, NULL, 0); 8260 ck_zio->io_offset = 8261 DVA_GET_OFFSET(&bp->blk_dva[0]); 8262 ck_zio->io_bp = bp; 8263 zio_checksum_compute(ck_zio, ck, pabd, lsize); 8264 printf("%12s\tcksum=%llx:%llx:%llx:%llx\n", 8265 zio_checksum_table[ck].ci_name, 8266 (u_longlong_t)bp->blk_cksum.zc_word[0], 8267 (u_longlong_t)bp->blk_cksum.zc_word[1], 8268 (u_longlong_t)bp->blk_cksum.zc_word[2], 8269 (u_longlong_t)bp->blk_cksum.zc_word[3]); 8270 zio_wait(ck_zio); 8271 } else { 8272 printf("error %d reading block\n", error); 8273 } 8274 spa_config_exit(spa, SCL_STATE, FTAG); 8275 } 8276 } 8277 8278 if (borrowed) 8279 abd_return_buf_copy(pabd, buf, lsize); 8280 8281 out: 8282 abd_free(pabd); 8283 umem_free(lbuf, SPA_MAXBLOCKSIZE); 8284 done: 8285 free(flagstr); 8286 free(dup); 8287 } 8288 8289 static void 8290 zdb_embedded_block(char *thing) 8291 { 8292 blkptr_t bp; 8293 unsigned long long *words = (void *)&bp; 8294 char *buf; 8295 int err; 8296 8297 bzero(&bp, sizeof (bp)); 8298 err = sscanf(thing, "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx:" 8299 "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx", 8300 words + 0, words + 1, words + 2, words + 3, 8301 words + 4, words + 5, words + 6, words + 7, 8302 words + 8, words + 9, words + 10, words + 11, 8303 words + 12, words + 13, words + 14, words + 15); 8304 if (err != 16) { 8305 (void) fprintf(stderr, "invalid input format\n"); 8306 exit(1); 8307 } 8308 ASSERT3U(BPE_GET_LSIZE(&bp), <=, SPA_MAXBLOCKSIZE); 8309 buf = malloc(SPA_MAXBLOCKSIZE); 8310 if (buf == NULL) { 8311 (void) fprintf(stderr, "out of memory\n"); 8312 exit(1); 8313 } 8314 err = decode_embedded_bp(&bp, buf, BPE_GET_LSIZE(&bp)); 8315 if (err != 0) { 8316 (void) fprintf(stderr, "decode failed: %u\n", err); 8317 exit(1); 8318 } 8319 zdb_dump_block_raw(buf, BPE_GET_LSIZE(&bp), 0); 8320 free(buf); 8321 } 8322 8323 int 8324 main(int argc, char **argv) 8325 { 8326 int c; 8327 struct rlimit rl = { 1024, 1024 }; 8328 spa_t *spa = NULL; 8329 objset_t *os = NULL; 8330 int dump_all = 1; 8331 int verbose = 0; 8332 int error = 0; 8333 char **searchdirs = NULL; 8334 int nsearch = 0; 8335 char *target, *target_pool, dsname[ZFS_MAX_DATASET_NAME_LEN]; 8336 nvlist_t *policy = NULL; 8337 uint64_t max_txg = UINT64_MAX; 8338 int64_t objset_id = -1; 8339 uint64_t object; 8340 int flags = ZFS_IMPORT_MISSING_LOG; 8341 int rewind = ZPOOL_NEVER_REWIND; 8342 char *spa_config_path_env, *objset_str; 8343 boolean_t target_is_spa = B_TRUE, dataset_lookup = B_FALSE; 8344 nvlist_t *cfg = NULL; 8345 8346 (void) setrlimit(RLIMIT_NOFILE, &rl); 8347 (void) enable_extended_FILE_stdio(-1, -1); 8348 8349 dprintf_setup(&argc, argv); 8350 8351 /* 8352 * If there is an environment variable SPA_CONFIG_PATH it overrides 8353 * default spa_config_path setting. If -U flag is specified it will 8354 * override this environment variable settings once again. 8355 */ 8356 spa_config_path_env = getenv("SPA_CONFIG_PATH"); 8357 if (spa_config_path_env != NULL) 8358 spa_config_path = spa_config_path_env; 8359 8360 /* 8361 * For performance reasons, we set this tunable down. We do so before 8362 * the arg parsing section so that the user can override this value if 8363 * they choose. 8364 */ 8365 zfs_btree_verify_intensity = 3; 8366 8367 while ((c = getopt(argc, argv, 8368 "AbcCdDeEFGhiI:klLmMo:Op:PqrRsSt:uU:vVx:XYyZ")) != -1) { 8369 switch (c) { 8370 case 'b': 8371 case 'c': 8372 case 'C': 8373 case 'd': 8374 case 'D': 8375 case 'E': 8376 case 'G': 8377 case 'h': 8378 case 'i': 8379 case 'l': 8380 case 'm': 8381 case 'M': 8382 case 'O': 8383 case 'r': 8384 case 'R': 8385 case 's': 8386 case 'S': 8387 case 'u': 8388 case 'y': 8389 case 'Z': 8390 dump_opt[c]++; 8391 dump_all = 0; 8392 break; 8393 case 'A': 8394 case 'e': 8395 case 'F': 8396 case 'k': 8397 case 'L': 8398 case 'P': 8399 case 'q': 8400 case 'X': 8401 dump_opt[c]++; 8402 break; 8403 case 'Y': 8404 zfs_reconstruct_indirect_combinations_max = INT_MAX; 8405 zfs_deadman_enabled = 0; 8406 break; 8407 /* NB: Sort single match options below. */ 8408 case 'I': 8409 max_inflight_bytes = strtoull(optarg, NULL, 0); 8410 if (max_inflight_bytes == 0) { 8411 (void) fprintf(stderr, "maximum number " 8412 "of inflight bytes must be greater " 8413 "than 0\n"); 8414 usage(); 8415 } 8416 break; 8417 case 'o': 8418 error = set_global_var(optarg); 8419 if (error != 0) 8420 usage(); 8421 break; 8422 case 'p': 8423 if (searchdirs == NULL) { 8424 searchdirs = umem_alloc(sizeof (char *), 8425 UMEM_NOFAIL); 8426 } else { 8427 char **tmp = umem_alloc((nsearch + 1) * 8428 sizeof (char *), UMEM_NOFAIL); 8429 bcopy(searchdirs, tmp, nsearch * 8430 sizeof (char *)); 8431 umem_free(searchdirs, 8432 nsearch * sizeof (char *)); 8433 searchdirs = tmp; 8434 } 8435 searchdirs[nsearch++] = optarg; 8436 break; 8437 case 't': 8438 max_txg = strtoull(optarg, NULL, 0); 8439 if (max_txg < TXG_INITIAL) { 8440 (void) fprintf(stderr, "incorrect txg " 8441 "specified: %s\n", optarg); 8442 usage(); 8443 } 8444 break; 8445 case 'U': 8446 spa_config_path = optarg; 8447 if (spa_config_path[0] != '/') { 8448 (void) fprintf(stderr, 8449 "cachefile must be an absolute path " 8450 "(i.e. start with a slash)\n"); 8451 usage(); 8452 } 8453 break; 8454 case 'v': 8455 verbose++; 8456 break; 8457 case 'V': 8458 flags = ZFS_IMPORT_VERBATIM; 8459 break; 8460 case 'x': 8461 vn_dumpdir = optarg; 8462 break; 8463 default: 8464 usage(); 8465 break; 8466 } 8467 } 8468 8469 if (!dump_opt['e'] && searchdirs != NULL) { 8470 (void) fprintf(stderr, "-p option requires use of -e\n"); 8471 usage(); 8472 } 8473 if (dump_opt['d'] || dump_opt['r']) { 8474 /* <pool>[/<dataset | objset id> is accepted */ 8475 if (argv[2] && (objset_str = strchr(argv[2], '/')) != NULL && 8476 objset_str++ != NULL) { 8477 char *endptr; 8478 errno = 0; 8479 objset_id = strtoull(objset_str, &endptr, 0); 8480 /* dataset 0 is the same as opening the pool */ 8481 if (errno == 0 && endptr != objset_str && 8482 objset_id != 0) { 8483 target_is_spa = B_FALSE; 8484 dataset_lookup = B_TRUE; 8485 } else if (objset_id != 0) { 8486 printf("failed to open objset %s " 8487 "%llu %s", objset_str, 8488 (u_longlong_t)objset_id, 8489 strerror(errno)); 8490 exit(1); 8491 } 8492 /* normal dataset name not an objset ID */ 8493 if (endptr == objset_str) { 8494 objset_id = -1; 8495 } 8496 } 8497 } 8498 8499 #if defined(_LP64) 8500 /* 8501 * ZDB does not typically re-read blocks; therefore limit the ARC 8502 * to 256 MB, which can be used entirely for metadata. 8503 */ 8504 zfs_arc_min = zfs_arc_meta_min = 2ULL << SPA_MAXBLOCKSHIFT; 8505 zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024; 8506 #endif 8507 8508 /* 8509 * "zdb -c" uses checksum-verifying scrub i/os which are async reads. 8510 * "zdb -b" uses traversal prefetch which uses async reads. 8511 * For good performance, let several of them be active at once. 8512 */ 8513 zfs_vdev_async_read_max_active = 10; 8514 8515 /* 8516 * Disable reference tracking for better performance. 8517 */ 8518 reference_tracking_enable = B_FALSE; 8519 8520 /* 8521 * Do not fail spa_load when spa_load_verify fails. This is needed 8522 * to load non-idle pools. 8523 */ 8524 spa_load_verify_dryrun = B_TRUE; 8525 8526 kernel_init(SPA_MODE_READ); 8527 8528 if (dump_all) 8529 verbose = MAX(verbose, 1); 8530 8531 for (c = 0; c < 256; c++) { 8532 if (dump_all && strchr("AeEFklLOPrRSXy", c) == NULL) 8533 dump_opt[c] = 1; 8534 if (dump_opt[c]) 8535 dump_opt[c] += verbose; 8536 } 8537 8538 libspl_assert_ok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2); 8539 zfs_recover = (dump_opt['A'] > 1); 8540 8541 argc -= optind; 8542 argv += optind; 8543 if (argc < 2 && dump_opt['R']) 8544 usage(); 8545 8546 if (dump_opt['E']) { 8547 if (argc != 1) 8548 usage(); 8549 zdb_embedded_block(argv[0]); 8550 return (0); 8551 } 8552 8553 if (argc < 1) { 8554 if (!dump_opt['e'] && dump_opt['C']) { 8555 dump_cachefile(spa_config_path); 8556 return (0); 8557 } 8558 usage(); 8559 } 8560 8561 if (dump_opt['l']) 8562 return (dump_label(argv[0])); 8563 8564 if (dump_opt['O']) { 8565 if (argc != 2) 8566 usage(); 8567 dump_opt['v'] = verbose + 3; 8568 return (dump_path(argv[0], argv[1], NULL)); 8569 } 8570 if (dump_opt['r']) { 8571 if (argc != 3) 8572 usage(); 8573 dump_opt['v'] = verbose; 8574 error = dump_path(argv[0], argv[1], &object); 8575 } 8576 8577 if (dump_opt['X'] || dump_opt['F']) 8578 rewind = ZPOOL_DO_REWIND | 8579 (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0); 8580 8581 if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 || 8582 nvlist_add_uint64(policy, ZPOOL_LOAD_REQUEST_TXG, max_txg) != 0 || 8583 nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, rewind) != 0) 8584 fatal("internal error: %s", strerror(ENOMEM)); 8585 8586 error = 0; 8587 target = argv[0]; 8588 8589 if (strpbrk(target, "/@") != NULL) { 8590 size_t targetlen; 8591 8592 target_pool = strdup(target); 8593 *strpbrk(target_pool, "/@") = '\0'; 8594 8595 target_is_spa = B_FALSE; 8596 targetlen = strlen(target); 8597 if (targetlen && target[targetlen - 1] == '/') 8598 target[targetlen - 1] = '\0'; 8599 } else { 8600 target_pool = target; 8601 } 8602 8603 if (dump_opt['e']) { 8604 importargs_t args = { 0 }; 8605 8606 args.paths = nsearch; 8607 args.path = searchdirs; 8608 args.can_be_active = B_TRUE; 8609 8610 error = zpool_find_config(NULL, target_pool, &cfg, &args, 8611 &libzpool_config_ops); 8612 8613 if (error == 0) { 8614 8615 if (nvlist_add_nvlist(cfg, 8616 ZPOOL_LOAD_POLICY, policy) != 0) { 8617 fatal("can't open '%s': %s", 8618 target, strerror(ENOMEM)); 8619 } 8620 8621 if (dump_opt['C'] > 1) { 8622 (void) printf("\nConfiguration for import:\n"); 8623 dump_nvlist(cfg, 8); 8624 } 8625 8626 /* 8627 * Disable the activity check to allow examination of 8628 * active pools. 8629 */ 8630 error = spa_import(target_pool, cfg, NULL, 8631 flags | ZFS_IMPORT_SKIP_MMP); 8632 } 8633 } 8634 8635 if (searchdirs != NULL) { 8636 umem_free(searchdirs, nsearch * sizeof (char *)); 8637 searchdirs = NULL; 8638 } 8639 8640 /* 8641 * import_checkpointed_state makes the assumption that the 8642 * target pool that we pass it is already part of the spa 8643 * namespace. Because of that we need to make sure to call 8644 * it always after the -e option has been processed, which 8645 * imports the pool to the namespace if it's not in the 8646 * cachefile. 8647 */ 8648 char *checkpoint_pool = NULL; 8649 char *checkpoint_target = NULL; 8650 if (dump_opt['k']) { 8651 checkpoint_pool = import_checkpointed_state(target, cfg, 8652 &checkpoint_target); 8653 8654 if (checkpoint_target != NULL) 8655 target = checkpoint_target; 8656 } 8657 8658 if (cfg != NULL) { 8659 nvlist_free(cfg); 8660 cfg = NULL; 8661 } 8662 8663 if (target_pool != target) 8664 free(target_pool); 8665 8666 if (error == 0) { 8667 if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) { 8668 ASSERT(checkpoint_pool != NULL); 8669 ASSERT(checkpoint_target == NULL); 8670 8671 error = spa_open(checkpoint_pool, &spa, FTAG); 8672 if (error != 0) { 8673 fatal("Tried to open pool \"%s\" but " 8674 "spa_open() failed with error %d\n", 8675 checkpoint_pool, error); 8676 } 8677 8678 } else if (target_is_spa || dump_opt['R'] || objset_id == 0) { 8679 zdb_set_skip_mmp(target); 8680 error = spa_open_rewind(target, &spa, FTAG, policy, 8681 NULL); 8682 if (error) { 8683 /* 8684 * If we're missing the log device then 8685 * try opening the pool after clearing the 8686 * log state. 8687 */ 8688 mutex_enter(&spa_namespace_lock); 8689 if ((spa = spa_lookup(target)) != NULL && 8690 spa->spa_log_state == SPA_LOG_MISSING) { 8691 spa->spa_log_state = SPA_LOG_CLEAR; 8692 error = 0; 8693 } 8694 mutex_exit(&spa_namespace_lock); 8695 8696 if (!error) { 8697 error = spa_open_rewind(target, &spa, 8698 FTAG, policy, NULL); 8699 } 8700 } 8701 } else if (strpbrk(target, "#") != NULL) { 8702 dsl_pool_t *dp; 8703 error = dsl_pool_hold(target, FTAG, &dp); 8704 if (error != 0) { 8705 fatal("can't dump '%s': %s", target, 8706 strerror(error)); 8707 } 8708 error = dump_bookmark(dp, target, B_TRUE, verbose > 1); 8709 dsl_pool_rele(dp, FTAG); 8710 if (error != 0) { 8711 fatal("can't dump '%s': %s", target, 8712 strerror(error)); 8713 } 8714 return (error); 8715 } else { 8716 zdb_set_skip_mmp(target); 8717 if (dataset_lookup == B_TRUE) { 8718 /* 8719 * Use the supplied id to get the name 8720 * for open_objset. 8721 */ 8722 error = spa_open(target, &spa, FTAG); 8723 if (error == 0) { 8724 error = name_from_objset_id(spa, 8725 objset_id, dsname); 8726 spa_close(spa, FTAG); 8727 if (error == 0) 8728 target = dsname; 8729 } 8730 } 8731 if (error == 0) 8732 error = open_objset(target, FTAG, &os); 8733 if (error == 0) 8734 spa = dmu_objset_spa(os); 8735 } 8736 } 8737 nvlist_free(policy); 8738 8739 if (error) 8740 fatal("can't open '%s': %s", target, strerror(error)); 8741 8742 /* 8743 * Set the pool failure mode to panic in order to prevent the pool 8744 * from suspending. A suspended I/O will have no way to resume and 8745 * can prevent the zdb(8) command from terminating as expected. 8746 */ 8747 if (spa != NULL) 8748 spa->spa_failmode = ZIO_FAILURE_MODE_PANIC; 8749 8750 argv++; 8751 argc--; 8752 if (dump_opt['r']) { 8753 error = zdb_copy_object(os, object, argv[1]); 8754 } else if (!dump_opt['R']) { 8755 flagbits['d'] = ZOR_FLAG_DIRECTORY; 8756 flagbits['f'] = ZOR_FLAG_PLAIN_FILE; 8757 flagbits['m'] = ZOR_FLAG_SPACE_MAP; 8758 flagbits['z'] = ZOR_FLAG_ZAP; 8759 flagbits['A'] = ZOR_FLAG_ALL_TYPES; 8760 8761 if (argc > 0 && dump_opt['d']) { 8762 zopt_object_args = argc; 8763 zopt_object_ranges = calloc(zopt_object_args, 8764 sizeof (zopt_object_range_t)); 8765 for (unsigned i = 0; i < zopt_object_args; i++) { 8766 int err; 8767 char *msg = NULL; 8768 8769 err = parse_object_range(argv[i], 8770 &zopt_object_ranges[i], &msg); 8771 if (err != 0) 8772 fatal("Bad object or range: '%s': %s\n", 8773 argv[i], msg ? msg : ""); 8774 } 8775 } else if (argc > 0 && dump_opt['m']) { 8776 zopt_metaslab_args = argc; 8777 zopt_metaslab = calloc(zopt_metaslab_args, 8778 sizeof (uint64_t)); 8779 for (unsigned i = 0; i < zopt_metaslab_args; i++) { 8780 errno = 0; 8781 zopt_metaslab[i] = strtoull(argv[i], NULL, 0); 8782 if (zopt_metaslab[i] == 0 && errno != 0) 8783 fatal("bad number %s: %s", argv[i], 8784 strerror(errno)); 8785 } 8786 } 8787 if (os != NULL) { 8788 dump_objset(os); 8789 } else if (zopt_object_args > 0 && !dump_opt['m']) { 8790 dump_objset(spa->spa_meta_objset); 8791 } else { 8792 dump_zpool(spa); 8793 } 8794 } else { 8795 flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR; 8796 flagbits['c'] = ZDB_FLAG_CHECKSUM; 8797 flagbits['d'] = ZDB_FLAG_DECOMPRESS; 8798 flagbits['e'] = ZDB_FLAG_BSWAP; 8799 flagbits['g'] = ZDB_FLAG_GBH; 8800 flagbits['i'] = ZDB_FLAG_INDIRECT; 8801 flagbits['r'] = ZDB_FLAG_RAW; 8802 flagbits['v'] = ZDB_FLAG_VERBOSE; 8803 8804 for (int i = 0; i < argc; i++) 8805 zdb_read_block(argv[i], spa); 8806 } 8807 8808 if (dump_opt['k']) { 8809 free(checkpoint_pool); 8810 if (!target_is_spa) 8811 free(checkpoint_target); 8812 } 8813 8814 if (os != NULL) { 8815 close_objset(os, FTAG); 8816 } else { 8817 spa_close(spa, FTAG); 8818 } 8819 8820 fuid_table_destroy(); 8821 8822 dump_debug_buffer(); 8823 8824 kernel_fini(); 8825 8826 return (error); 8827 } 8828