1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 25 * Copyright (c) 2011, 2019 by Delphix. All rights reserved. 26 * Copyright (c) 2014 Integros [integros.com] 27 * Copyright 2016 Nexenta Systems, Inc. 28 * Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC. 29 * Copyright (c) 2015, 2017, Intel Corporation. 30 * Copyright (c) 2020 Datto Inc. 31 * Copyright (c) 2020, The FreeBSD Foundation [1] 32 * 33 * [1] Portions of this software were developed by Allan Jude 34 * under sponsorship from the FreeBSD Foundation. 35 * Copyright (c) 2021 Allan Jude 36 * Copyright (c) 2021 Toomas Soome <tsoome@me.com> 37 * Copyright (c) 2023, 2024, Klara Inc. 38 * Copyright (c) 2023, Rob Norris <robn@despairlabs.com> 39 */ 40 41 #include <stdio.h> 42 #include <unistd.h> 43 #include <stdlib.h> 44 #include <ctype.h> 45 #include <getopt.h> 46 #include <openssl/evp.h> 47 #include <sys/zfs_context.h> 48 #include <sys/spa.h> 49 #include <sys/spa_impl.h> 50 #include <sys/dmu.h> 51 #include <sys/zap.h> 52 #include <sys/zap_impl.h> 53 #include <sys/fs/zfs.h> 54 #include <sys/zfs_znode.h> 55 #include <sys/zfs_sa.h> 56 #include <sys/sa.h> 57 #include <sys/sa_impl.h> 58 #include <sys/vdev.h> 59 #include <sys/vdev_impl.h> 60 #include <sys/metaslab_impl.h> 61 #include <sys/dmu_objset.h> 62 #include <sys/dsl_dir.h> 63 #include <sys/dsl_dataset.h> 64 #include <sys/dsl_pool.h> 65 #include <sys/dsl_bookmark.h> 66 #include <sys/dbuf.h> 67 #include <sys/zil.h> 68 #include <sys/zil_impl.h> 69 #include <sys/stat.h> 70 #include <sys/resource.h> 71 #include <sys/dmu_send.h> 72 #include <sys/dmu_traverse.h> 73 #include <sys/zio_checksum.h> 74 #include <sys/zio_compress.h> 75 #include <sys/zfs_fuid.h> 76 #include <sys/arc.h> 77 #include <sys/arc_impl.h> 78 #include <sys/ddt.h> 79 #include <sys/ddt_impl.h> 80 #include <sys/zfeature.h> 81 #include <sys/abd.h> 82 #include <sys/blkptr.h> 83 #include <sys/dsl_crypt.h> 84 #include <sys/dsl_scan.h> 85 #include <sys/btree.h> 86 #include <sys/brt.h> 87 #include <sys/brt_impl.h> 88 #include <zfs_comutil.h> 89 #include <sys/zstd/zstd.h> 90 #include <sys/backtrace.h> 91 92 #include <libnvpair.h> 93 #include <libzutil.h> 94 #include <libzfs_core.h> 95 96 #include <libzdb.h> 97 98 #include "zdb.h" 99 100 101 extern int reference_tracking_enable; 102 extern int zfs_recover; 103 extern uint_t zfs_vdev_async_read_max_active; 104 extern boolean_t spa_load_verify_dryrun; 105 extern boolean_t spa_mode_readable_spacemaps; 106 extern uint_t zfs_reconstruct_indirect_combinations_max; 107 extern uint_t zfs_btree_verify_intensity; 108 109 static const char cmdname[] = "zdb"; 110 uint8_t dump_opt[256]; 111 112 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); 113 114 static uint64_t *zopt_metaslab = NULL; 115 static unsigned zopt_metaslab_args = 0; 116 117 118 static zopt_object_range_t *zopt_object_ranges = NULL; 119 static unsigned zopt_object_args = 0; 120 121 static int flagbits[256]; 122 123 124 static uint64_t max_inflight_bytes = 256 * 1024 * 1024; /* 256MB */ 125 static int leaked_objects = 0; 126 static zfs_range_tree_t *mos_refd_objs; 127 static spa_t *spa; 128 static objset_t *os; 129 static boolean_t kernel_init_done; 130 131 static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *, 132 boolean_t); 133 static void mos_obj_refd(uint64_t); 134 static void mos_obj_refd_multiple(uint64_t); 135 static int dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t free, 136 dmu_tx_t *tx); 137 138 139 140 static void zdb_print_blkptr(const blkptr_t *bp, int flags); 141 static void zdb_exit(int reason); 142 143 typedef struct sublivelist_verify_block_refcnt { 144 /* block pointer entry in livelist being verified */ 145 blkptr_t svbr_blk; 146 147 /* 148 * Refcount gets incremented to 1 when we encounter the first 149 * FREE entry for the svfbr block pointer and a node for it 150 * is created in our ZDB verification/tracking metadata. 151 * 152 * As we encounter more FREE entries we increment this counter 153 * and similarly decrement it whenever we find the respective 154 * ALLOC entries for this block. 155 * 156 * When the refcount gets to 0 it means that all the FREE and 157 * ALLOC entries of this block have paired up and we no longer 158 * need to track it in our verification logic (e.g. the node 159 * containing this struct in our verification data structure 160 * should be freed). 161 * 162 * [refer to sublivelist_verify_blkptr() for the actual code] 163 */ 164 uint32_t svbr_refcnt; 165 } sublivelist_verify_block_refcnt_t; 166 167 static int 168 sublivelist_block_refcnt_compare(const void *larg, const void *rarg) 169 { 170 const sublivelist_verify_block_refcnt_t *l = larg; 171 const sublivelist_verify_block_refcnt_t *r = rarg; 172 return (livelist_compare(&l->svbr_blk, &r->svbr_blk)); 173 } 174 175 static int 176 sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free, 177 dmu_tx_t *tx) 178 { 179 ASSERT3P(tx, ==, NULL); 180 struct sublivelist_verify *sv = arg; 181 sublivelist_verify_block_refcnt_t current = { 182 .svbr_blk = *bp, 183 184 /* 185 * Start with 1 in case this is the first free entry. 186 * This field is not used for our B-Tree comparisons 187 * anyway. 188 */ 189 .svbr_refcnt = 1, 190 }; 191 192 zfs_btree_index_t where; 193 sublivelist_verify_block_refcnt_t *pair = 194 zfs_btree_find(&sv->sv_pair, ¤t, &where); 195 if (free) { 196 if (pair == NULL) { 197 /* first free entry for this block pointer */ 198 zfs_btree_add(&sv->sv_pair, ¤t); 199 } else { 200 pair->svbr_refcnt++; 201 } 202 } else { 203 if (pair == NULL) { 204 /* block that is currently marked as allocated */ 205 for (int i = 0; i < SPA_DVAS_PER_BP; i++) { 206 if (DVA_IS_EMPTY(&bp->blk_dva[i])) 207 break; 208 sublivelist_verify_block_t svb = { 209 .svb_dva = bp->blk_dva[i], 210 .svb_allocated_txg = 211 BP_GET_BIRTH(bp) 212 }; 213 214 if (zfs_btree_find(&sv->sv_leftover, &svb, 215 &where) == NULL) { 216 zfs_btree_add_idx(&sv->sv_leftover, 217 &svb, &where); 218 } 219 } 220 } else { 221 /* alloc matches a free entry */ 222 pair->svbr_refcnt--; 223 if (pair->svbr_refcnt == 0) { 224 /* all allocs and frees have been matched */ 225 zfs_btree_remove_idx(&sv->sv_pair, &where); 226 } 227 } 228 } 229 230 return (0); 231 } 232 233 static int 234 sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle) 235 { 236 int err; 237 struct sublivelist_verify *sv = args; 238 239 zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare, NULL, 240 sizeof (sublivelist_verify_block_refcnt_t)); 241 242 err = bpobj_iterate_nofree(&dle->dle_bpobj, sublivelist_verify_blkptr, 243 sv, NULL); 244 245 sublivelist_verify_block_refcnt_t *e; 246 zfs_btree_index_t *cookie = NULL; 247 while ((e = zfs_btree_destroy_nodes(&sv->sv_pair, &cookie)) != NULL) { 248 char blkbuf[BP_SPRINTF_LEN]; 249 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), 250 &e->svbr_blk, B_TRUE); 251 (void) printf("\tERROR: %d unmatched FREE(s): %s\n", 252 e->svbr_refcnt, blkbuf); 253 } 254 zfs_btree_destroy(&sv->sv_pair); 255 256 return (err); 257 } 258 259 static int 260 livelist_block_compare(const void *larg, const void *rarg) 261 { 262 const sublivelist_verify_block_t *l = larg; 263 const sublivelist_verify_block_t *r = rarg; 264 265 if (DVA_GET_VDEV(&l->svb_dva) < DVA_GET_VDEV(&r->svb_dva)) 266 return (-1); 267 else if (DVA_GET_VDEV(&l->svb_dva) > DVA_GET_VDEV(&r->svb_dva)) 268 return (+1); 269 270 if (DVA_GET_OFFSET(&l->svb_dva) < DVA_GET_OFFSET(&r->svb_dva)) 271 return (-1); 272 else if (DVA_GET_OFFSET(&l->svb_dva) > DVA_GET_OFFSET(&r->svb_dva)) 273 return (+1); 274 275 if (DVA_GET_ASIZE(&l->svb_dva) < DVA_GET_ASIZE(&r->svb_dva)) 276 return (-1); 277 else if (DVA_GET_ASIZE(&l->svb_dva) > DVA_GET_ASIZE(&r->svb_dva)) 278 return (+1); 279 280 return (0); 281 } 282 283 /* 284 * Check for errors in a livelist while tracking all unfreed ALLOCs in the 285 * sublivelist_verify_t: sv->sv_leftover 286 */ 287 static void 288 livelist_verify(dsl_deadlist_t *dl, void *arg) 289 { 290 sublivelist_verify_t *sv = arg; 291 dsl_deadlist_iterate(dl, sublivelist_verify_func, sv); 292 } 293 294 /* 295 * Check for errors in the livelist entry and discard the intermediary 296 * data structures 297 */ 298 static int 299 sublivelist_verify_lightweight(void *args, dsl_deadlist_entry_t *dle) 300 { 301 (void) args; 302 sublivelist_verify_t sv; 303 zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL, 304 sizeof (sublivelist_verify_block_t)); 305 int err = sublivelist_verify_func(&sv, dle); 306 zfs_btree_clear(&sv.sv_leftover); 307 zfs_btree_destroy(&sv.sv_leftover); 308 return (err); 309 } 310 311 typedef struct metaslab_verify { 312 /* 313 * Tree containing all the leftover ALLOCs from the livelists 314 * that are part of this metaslab. 315 */ 316 zfs_btree_t mv_livelist_allocs; 317 318 /* 319 * Metaslab information. 320 */ 321 uint64_t mv_vdid; 322 uint64_t mv_msid; 323 uint64_t mv_start; 324 uint64_t mv_end; 325 326 /* 327 * What's currently allocated for this metaslab. 328 */ 329 zfs_range_tree_t *mv_allocated; 330 } metaslab_verify_t; 331 332 typedef void ll_iter_t(dsl_deadlist_t *ll, void *arg); 333 334 typedef int (*zdb_log_sm_cb_t)(spa_t *spa, space_map_entry_t *sme, uint64_t txg, 335 void *arg); 336 337 typedef struct unflushed_iter_cb_arg { 338 spa_t *uic_spa; 339 uint64_t uic_txg; 340 void *uic_arg; 341 zdb_log_sm_cb_t uic_cb; 342 } unflushed_iter_cb_arg_t; 343 344 static int 345 iterate_through_spacemap_logs_cb(space_map_entry_t *sme, void *arg) 346 { 347 unflushed_iter_cb_arg_t *uic = arg; 348 return (uic->uic_cb(uic->uic_spa, sme, uic->uic_txg, uic->uic_arg)); 349 } 350 351 static void 352 iterate_through_spacemap_logs(spa_t *spa, zdb_log_sm_cb_t cb, void *arg) 353 { 354 if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) 355 return; 356 357 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 358 for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg); 359 sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) { 360 space_map_t *sm = NULL; 361 VERIFY0(space_map_open(&sm, spa_meta_objset(spa), 362 sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT)); 363 364 unflushed_iter_cb_arg_t uic = { 365 .uic_spa = spa, 366 .uic_txg = sls->sls_txg, 367 .uic_arg = arg, 368 .uic_cb = cb 369 }; 370 VERIFY0(space_map_iterate(sm, space_map_length(sm), 371 iterate_through_spacemap_logs_cb, &uic)); 372 space_map_close(sm); 373 } 374 spa_config_exit(spa, SCL_CONFIG, FTAG); 375 } 376 377 static void 378 verify_livelist_allocs(metaslab_verify_t *mv, uint64_t txg, 379 uint64_t offset, uint64_t size) 380 { 381 sublivelist_verify_block_t svb = {{{0}}}; 382 DVA_SET_VDEV(&svb.svb_dva, mv->mv_vdid); 383 DVA_SET_OFFSET(&svb.svb_dva, offset); 384 DVA_SET_ASIZE(&svb.svb_dva, size); 385 zfs_btree_index_t where; 386 uint64_t end_offset = offset + size; 387 388 /* 389 * Look for an exact match for spacemap entry in the livelist entries. 390 * Then, look for other livelist entries that fall within the range 391 * of the spacemap entry as it may have been condensed 392 */ 393 sublivelist_verify_block_t *found = 394 zfs_btree_find(&mv->mv_livelist_allocs, &svb, &where); 395 if (found == NULL) { 396 found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where); 397 } 398 for (; found != NULL && DVA_GET_VDEV(&found->svb_dva) == mv->mv_vdid && 399 DVA_GET_OFFSET(&found->svb_dva) < end_offset; 400 found = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) { 401 if (found->svb_allocated_txg <= txg) { 402 (void) printf("ERROR: Livelist ALLOC [%llx:%llx] " 403 "from TXG %llx FREED at TXG %llx\n", 404 (u_longlong_t)DVA_GET_OFFSET(&found->svb_dva), 405 (u_longlong_t)DVA_GET_ASIZE(&found->svb_dva), 406 (u_longlong_t)found->svb_allocated_txg, 407 (u_longlong_t)txg); 408 } 409 } 410 } 411 412 static int 413 metaslab_spacemap_validation_cb(space_map_entry_t *sme, void *arg) 414 { 415 metaslab_verify_t *mv = arg; 416 uint64_t offset = sme->sme_offset; 417 uint64_t size = sme->sme_run; 418 uint64_t txg = sme->sme_txg; 419 420 if (sme->sme_type == SM_ALLOC) { 421 if (zfs_range_tree_contains(mv->mv_allocated, 422 offset, size)) { 423 (void) printf("ERROR: DOUBLE ALLOC: " 424 "%llu [%llx:%llx] " 425 "%llu:%llu LOG_SM\n", 426 (u_longlong_t)txg, (u_longlong_t)offset, 427 (u_longlong_t)size, (u_longlong_t)mv->mv_vdid, 428 (u_longlong_t)mv->mv_msid); 429 } else { 430 zfs_range_tree_add(mv->mv_allocated, 431 offset, size); 432 } 433 } else { 434 if (!zfs_range_tree_contains(mv->mv_allocated, 435 offset, size)) { 436 (void) printf("ERROR: DOUBLE FREE: " 437 "%llu [%llx:%llx] " 438 "%llu:%llu LOG_SM\n", 439 (u_longlong_t)txg, (u_longlong_t)offset, 440 (u_longlong_t)size, (u_longlong_t)mv->mv_vdid, 441 (u_longlong_t)mv->mv_msid); 442 } else { 443 zfs_range_tree_remove(mv->mv_allocated, 444 offset, size); 445 } 446 } 447 448 if (sme->sme_type != SM_ALLOC) { 449 /* 450 * If something is freed in the spacemap, verify that 451 * it is not listed as allocated in the livelist. 452 */ 453 verify_livelist_allocs(mv, txg, offset, size); 454 } 455 return (0); 456 } 457 458 static int 459 spacemap_check_sm_log_cb(spa_t *spa, space_map_entry_t *sme, 460 uint64_t txg, void *arg) 461 { 462 metaslab_verify_t *mv = arg; 463 uint64_t offset = sme->sme_offset; 464 uint64_t vdev_id = sme->sme_vdev; 465 466 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 467 468 /* skip indirect vdevs */ 469 if (!vdev_is_concrete(vd)) 470 return (0); 471 472 if (vdev_id != mv->mv_vdid) 473 return (0); 474 475 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 476 if (ms->ms_id != mv->mv_msid) 477 return (0); 478 479 if (txg < metaslab_unflushed_txg(ms)) 480 return (0); 481 482 483 ASSERT3U(txg, ==, sme->sme_txg); 484 return (metaslab_spacemap_validation_cb(sme, mv)); 485 } 486 487 static void 488 spacemap_check_sm_log(spa_t *spa, metaslab_verify_t *mv) 489 { 490 iterate_through_spacemap_logs(spa, spacemap_check_sm_log_cb, mv); 491 } 492 493 static void 494 spacemap_check_ms_sm(space_map_t *sm, metaslab_verify_t *mv) 495 { 496 if (sm == NULL) 497 return; 498 499 VERIFY0(space_map_iterate(sm, space_map_length(sm), 500 metaslab_spacemap_validation_cb, mv)); 501 } 502 503 static void iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg); 504 505 /* 506 * Transfer blocks from sv_leftover tree to the mv_livelist_allocs if 507 * they are part of that metaslab (mv_msid). 508 */ 509 static void 510 mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv) 511 { 512 zfs_btree_index_t where; 513 sublivelist_verify_block_t *svb; 514 ASSERT3U(zfs_btree_numnodes(&mv->mv_livelist_allocs), ==, 0); 515 for (svb = zfs_btree_first(&sv->sv_leftover, &where); 516 svb != NULL; 517 svb = zfs_btree_next(&sv->sv_leftover, &where, &where)) { 518 if (DVA_GET_VDEV(&svb->svb_dva) != mv->mv_vdid) 519 continue; 520 521 if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start && 522 (DVA_GET_OFFSET(&svb->svb_dva) + 523 DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_start) { 524 (void) printf("ERROR: Found block that crosses " 525 "metaslab boundary: <%llu:%llx:%llx>\n", 526 (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva), 527 (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), 528 (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva)); 529 continue; 530 } 531 532 if (DVA_GET_OFFSET(&svb->svb_dva) < mv->mv_start) 533 continue; 534 535 if (DVA_GET_OFFSET(&svb->svb_dva) >= mv->mv_end) 536 continue; 537 538 if ((DVA_GET_OFFSET(&svb->svb_dva) + 539 DVA_GET_ASIZE(&svb->svb_dva)) > mv->mv_end) { 540 (void) printf("ERROR: Found block that crosses " 541 "metaslab boundary: <%llu:%llx:%llx>\n", 542 (u_longlong_t)DVA_GET_VDEV(&svb->svb_dva), 543 (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), 544 (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva)); 545 continue; 546 } 547 548 zfs_btree_add(&mv->mv_livelist_allocs, svb); 549 } 550 551 for (svb = zfs_btree_first(&mv->mv_livelist_allocs, &where); 552 svb != NULL; 553 svb = zfs_btree_next(&mv->mv_livelist_allocs, &where, &where)) { 554 zfs_btree_remove(&sv->sv_leftover, svb); 555 } 556 } 557 558 /* 559 * [Livelist Check] 560 * Iterate through all the sublivelists and: 561 * - report leftover frees (**) 562 * - record leftover ALLOCs together with their TXG [see Cross Check] 563 * 564 * (**) Note: Double ALLOCs are valid in datasets that have dedup 565 * enabled. Similarly double FREEs are allowed as well but 566 * only if they pair up with a corresponding ALLOC entry once 567 * we our done with our sublivelist iteration. 568 * 569 * [Spacemap Check] 570 * for each metaslab: 571 * - iterate over spacemap and then the metaslab's entries in the 572 * spacemap log, then report any double FREEs and ALLOCs (do not 573 * blow up). 574 * 575 * [Cross Check] 576 * After finishing the Livelist Check phase and while being in the 577 * Spacemap Check phase, we find all the recorded leftover ALLOCs 578 * of the livelist check that are part of the metaslab that we are 579 * currently looking at in the Spacemap Check. We report any entries 580 * that are marked as ALLOCs in the livelists but have been actually 581 * freed (and potentially allocated again) after their TXG stamp in 582 * the spacemaps. Also report any ALLOCs from the livelists that 583 * belong to indirect vdevs (e.g. their vdev completed removal). 584 * 585 * Note that this will miss Log Spacemap entries that cancelled each other 586 * out before being flushed to the metaslab, so we are not guaranteed 587 * to match all erroneous ALLOCs. 588 */ 589 static void 590 livelist_metaslab_validate(spa_t *spa) 591 { 592 (void) printf("Verifying deleted livelist entries\n"); 593 594 sublivelist_verify_t sv; 595 zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL, 596 sizeof (sublivelist_verify_block_t)); 597 iterate_deleted_livelists(spa, livelist_verify, &sv); 598 599 (void) printf("Verifying metaslab entries\n"); 600 vdev_t *rvd = spa->spa_root_vdev; 601 for (uint64_t c = 0; c < rvd->vdev_children; c++) { 602 vdev_t *vd = rvd->vdev_child[c]; 603 604 if (!vdev_is_concrete(vd)) 605 continue; 606 607 for (uint64_t mid = 0; mid < vd->vdev_ms_count; mid++) { 608 metaslab_t *m = vd->vdev_ms[mid]; 609 610 (void) fprintf(stderr, 611 "\rverifying concrete vdev %llu, " 612 "metaslab %llu of %llu ...", 613 (longlong_t)vd->vdev_id, 614 (longlong_t)mid, 615 (longlong_t)vd->vdev_ms_count); 616 617 uint64_t shift, start; 618 zfs_range_seg_type_t type = 619 metaslab_calculate_range_tree_type(vd, m, 620 &start, &shift); 621 metaslab_verify_t mv; 622 mv.mv_allocated = zfs_range_tree_create_flags( 623 NULL, type, NULL, start, shift, 624 0, "livelist_metaslab_validate:mv_allocated"); 625 mv.mv_vdid = vd->vdev_id; 626 mv.mv_msid = m->ms_id; 627 mv.mv_start = m->ms_start; 628 mv.mv_end = m->ms_start + m->ms_size; 629 zfs_btree_create(&mv.mv_livelist_allocs, 630 livelist_block_compare, NULL, 631 sizeof (sublivelist_verify_block_t)); 632 633 mv_populate_livelist_allocs(&mv, &sv); 634 635 spacemap_check_ms_sm(m->ms_sm, &mv); 636 spacemap_check_sm_log(spa, &mv); 637 638 zfs_range_tree_vacate(mv.mv_allocated, NULL, NULL); 639 zfs_range_tree_destroy(mv.mv_allocated); 640 zfs_btree_clear(&mv.mv_livelist_allocs); 641 zfs_btree_destroy(&mv.mv_livelist_allocs); 642 } 643 } 644 (void) fprintf(stderr, "\n"); 645 646 /* 647 * If there are any segments in the leftover tree after we walked 648 * through all the metaslabs in the concrete vdevs then this means 649 * that we have segments in the livelists that belong to indirect 650 * vdevs and are marked as allocated. 651 */ 652 if (zfs_btree_numnodes(&sv.sv_leftover) == 0) { 653 zfs_btree_destroy(&sv.sv_leftover); 654 return; 655 } 656 (void) printf("ERROR: Found livelist blocks marked as allocated " 657 "for indirect vdevs:\n"); 658 659 zfs_btree_index_t *where = NULL; 660 sublivelist_verify_block_t *svb; 661 while ((svb = zfs_btree_destroy_nodes(&sv.sv_leftover, &where)) != 662 NULL) { 663 int vdev_id = DVA_GET_VDEV(&svb->svb_dva); 664 ASSERT3U(vdev_id, <, rvd->vdev_children); 665 vdev_t *vd = rvd->vdev_child[vdev_id]; 666 ASSERT(!vdev_is_concrete(vd)); 667 (void) printf("<%d:%llx:%llx> TXG %llx\n", 668 vdev_id, (u_longlong_t)DVA_GET_OFFSET(&svb->svb_dva), 669 (u_longlong_t)DVA_GET_ASIZE(&svb->svb_dva), 670 (u_longlong_t)svb->svb_allocated_txg); 671 } 672 (void) printf("\n"); 673 zfs_btree_destroy(&sv.sv_leftover); 674 } 675 676 /* 677 * These libumem hooks provide a reasonable set of defaults for the allocator's 678 * debugging facilities. 679 */ 680 const char * 681 _umem_debug_init(void) 682 { 683 return ("default,verbose"); /* $UMEM_DEBUG setting */ 684 } 685 686 const char * 687 _umem_logging_init(void) 688 { 689 return ("fail,contents"); /* $UMEM_LOGGING setting */ 690 } 691 692 static void 693 usage(void) 694 { 695 (void) fprintf(stderr, 696 "Usage:\t%s [-AbcdDFGhikLMPsvXy] [-e [-V] [-p <path> ...]] " 697 "[-I <inflight I/Os>]\n" 698 "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n" 699 "\t\t[-K <key>]\n" 700 "\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]]\n" 701 "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] [-K <key>]\n" 702 "\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]\n" 703 "\t%s -B [-e [-V] [-p <path> ...]] [-I <inflight I/Os>]\n" 704 "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n" 705 "\t\t[-K <key>] <poolname>/<objset id> [<backupflags>]\n" 706 "\t%s [-v] <bookmark>\n" 707 "\t%s -C [-A] [-U <cache>] [<poolname>]\n" 708 "\t%s -l [-Aqu] <device>\n" 709 "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] " 710 "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n" 711 "\t%s -O [-K <key>] <dataset> <path>\n" 712 "\t%s -r [-K <key>] <dataset> <path> <destination>\n" 713 "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n" 714 "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n" 715 "\t%s -E [-A] word0:word1:...:word15\n" 716 "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] " 717 "<poolname>\n\n", 718 cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, 719 cmdname, cmdname, cmdname, cmdname, cmdname); 720 721 (void) fprintf(stderr, " Dataset name must include at least one " 722 "separator character '/' or '@'\n"); 723 (void) fprintf(stderr, " If dataset name is specified, only that " 724 "dataset is dumped\n"); 725 (void) fprintf(stderr, " If object numbers or object number " 726 "ranges are specified, only those\n" 727 " objects or ranges are dumped.\n\n"); 728 (void) fprintf(stderr, 729 " Object ranges take the form <start>:<end>[:<flags>]\n" 730 " start Starting object number\n" 731 " end Ending object number, or -1 for no upper bound\n" 732 " flags Optional flags to select object types:\n" 733 " A All objects (this is the default)\n" 734 " d ZFS directories\n" 735 " f ZFS files \n" 736 " m SPA space maps\n" 737 " z ZAPs\n" 738 " - Negate effect of next flag\n\n"); 739 (void) fprintf(stderr, " Options to control amount of output:\n"); 740 (void) fprintf(stderr, " -b --block-stats " 741 "block statistics\n"); 742 (void) fprintf(stderr, " -B --backup " 743 "backup stream\n"); 744 (void) fprintf(stderr, " -c --checksum " 745 "checksum all metadata (twice for all data) blocks\n"); 746 (void) fprintf(stderr, " -C --config " 747 "config (or cachefile if alone)\n"); 748 (void) fprintf(stderr, " -d --datasets " 749 "dataset(s)\n"); 750 (void) fprintf(stderr, " -D --dedup-stats " 751 "dedup statistics\n"); 752 (void) fprintf(stderr, " -E --embedded-block-pointer=INTEGER\n" 753 " decode and display block " 754 "from an embedded block pointer\n"); 755 (void) fprintf(stderr, " -h --history " 756 "pool history\n"); 757 (void) fprintf(stderr, " -i --intent-logs " 758 "intent logs\n"); 759 (void) fprintf(stderr, " -l --label " 760 "read label contents\n"); 761 (void) fprintf(stderr, " -k --checkpointed-state " 762 "examine the checkpointed state of the pool\n"); 763 (void) fprintf(stderr, " -L --disable-leak-tracking " 764 "disable leak tracking (do not load spacemaps)\n"); 765 (void) fprintf(stderr, " -m --metaslabs " 766 "metaslabs\n"); 767 (void) fprintf(stderr, " -M --metaslab-groups " 768 "metaslab groups\n"); 769 (void) fprintf(stderr, " -O --object-lookups " 770 "perform object lookups by path\n"); 771 (void) fprintf(stderr, " -r --copy-object " 772 "copy an object by path to file\n"); 773 (void) fprintf(stderr, " -R --read-block " 774 "read and display block from a device\n"); 775 (void) fprintf(stderr, " -s --io-stats " 776 "report stats on zdb's I/O\n"); 777 (void) fprintf(stderr, " -S --simulate-dedup " 778 "simulate dedup to measure effect\n"); 779 (void) fprintf(stderr, " -v --verbose " 780 "verbose (applies to all others)\n"); 781 (void) fprintf(stderr, " -y --livelist " 782 "perform livelist and metaslab validation on any livelists being " 783 "deleted\n\n"); 784 (void) fprintf(stderr, " Below options are intended for use " 785 "with other options:\n"); 786 (void) fprintf(stderr, " -A --ignore-assertions " 787 "ignore assertions (-A), enable panic recovery (-AA) or both " 788 "(-AAA)\n"); 789 (void) fprintf(stderr, " -e --exported " 790 "pool is exported/destroyed/has altroot/not in a cachefile\n"); 791 (void) fprintf(stderr, " -F --automatic-rewind " 792 "attempt automatic rewind within safe range of transaction " 793 "groups\n"); 794 (void) fprintf(stderr, " -G --dump-debug-msg " 795 "dump zfs_dbgmsg buffer before exiting\n"); 796 (void) fprintf(stderr, " -I --inflight=INTEGER " 797 "specify the maximum number of checksumming I/Os " 798 "[default is 200]\n"); 799 (void) fprintf(stderr, " -K --key=KEY " 800 "decryption key for encrypted dataset\n"); 801 (void) fprintf(stderr, " -o --option=\"NAME=VALUE\" " 802 "set the named tunable to the given value\n"); 803 (void) fprintf(stderr, " -p --path==PATH " 804 "use one or more with -e to specify path to vdev dir\n"); 805 (void) fprintf(stderr, " -P --parseable " 806 "print numbers in parseable form\n"); 807 (void) fprintf(stderr, " -q --skip-label " 808 "don't print label contents\n"); 809 (void) fprintf(stderr, " -t --txg=INTEGER " 810 "highest txg to use when searching for uberblocks\n"); 811 (void) fprintf(stderr, " -T --brt-stats " 812 "BRT statistics\n"); 813 (void) fprintf(stderr, " -u --uberblock " 814 "uberblock\n"); 815 (void) fprintf(stderr, " -U --cachefile=PATH " 816 "use alternate cachefile\n"); 817 (void) fprintf(stderr, " -V --verbatim " 818 "do verbatim import\n"); 819 (void) fprintf(stderr, " -x --dump-blocks=PATH " 820 "dump all read blocks into specified directory\n"); 821 (void) fprintf(stderr, " -X --extreme-rewind " 822 "attempt extreme rewind (does not work with dataset)\n"); 823 (void) fprintf(stderr, " -Y --all-reconstruction " 824 "attempt all reconstruction combinations for split blocks\n"); 825 (void) fprintf(stderr, " -Z --zstd-headers " 826 "show ZSTD headers \n"); 827 (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " 828 "to make only that option verbose\n"); 829 (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); 830 zdb_exit(1); 831 } 832 833 static void 834 dump_debug_buffer(void) 835 { 836 ssize_t ret __attribute__((unused)); 837 838 if (!dump_opt['G']) 839 return; 840 /* 841 * We use write() instead of printf() so that this function 842 * is safe to call from a signal handler. 843 */ 844 ret = write(STDERR_FILENO, "\n", 1); 845 zfs_dbgmsg_print(STDERR_FILENO, "zdb"); 846 } 847 848 static void sig_handler(int signo) 849 { 850 struct sigaction action; 851 852 libspl_backtrace(STDERR_FILENO); 853 dump_debug_buffer(); 854 855 /* 856 * Restore default action and re-raise signal so SIGSEGV and 857 * SIGABRT can trigger a core dump. 858 */ 859 action.sa_handler = SIG_DFL; 860 sigemptyset(&action.sa_mask); 861 action.sa_flags = 0; 862 (void) sigaction(signo, &action, NULL); 863 raise(signo); 864 } 865 866 /* 867 * Called for usage errors that are discovered after a call to spa_open(), 868 * dmu_bonus_hold(), or pool_match(). abort() is called for other errors. 869 */ 870 871 static void 872 fatal(const char *fmt, ...) 873 { 874 va_list ap; 875 876 va_start(ap, fmt); 877 (void) fprintf(stderr, "%s: ", cmdname); 878 (void) vfprintf(stderr, fmt, ap); 879 va_end(ap); 880 (void) fprintf(stderr, "\n"); 881 882 dump_debug_buffer(); 883 884 zdb_exit(1); 885 } 886 887 static void 888 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) 889 { 890 (void) size; 891 nvlist_t *nv; 892 size_t nvsize = *(uint64_t *)data; 893 char *packed = umem_alloc(nvsize, UMEM_NOFAIL); 894 895 VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH)); 896 897 VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0); 898 899 umem_free(packed, nvsize); 900 901 dump_nvlist(nv, 8); 902 903 nvlist_free(nv); 904 } 905 906 static void 907 dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size) 908 { 909 (void) os, (void) object, (void) size; 910 spa_history_phys_t *shp = data; 911 912 if (shp == NULL) 913 return; 914 915 (void) printf("\t\tpool_create_len = %llu\n", 916 (u_longlong_t)shp->sh_pool_create_len); 917 (void) printf("\t\tphys_max_off = %llu\n", 918 (u_longlong_t)shp->sh_phys_max_off); 919 (void) printf("\t\tbof = %llu\n", 920 (u_longlong_t)shp->sh_bof); 921 (void) printf("\t\teof = %llu\n", 922 (u_longlong_t)shp->sh_eof); 923 (void) printf("\t\trecords_lost = %llu\n", 924 (u_longlong_t)shp->sh_records_lost); 925 } 926 927 static void 928 zdb_nicenum(uint64_t num, char *buf, size_t buflen) 929 { 930 if (dump_opt['P']) 931 (void) snprintf(buf, buflen, "%llu", (longlong_t)num); 932 else 933 nicenum(num, buf, buflen); 934 } 935 936 static void 937 zdb_nicebytes(uint64_t bytes, char *buf, size_t buflen) 938 { 939 if (dump_opt['P']) 940 (void) snprintf(buf, buflen, "%llu", (longlong_t)bytes); 941 else 942 zfs_nicebytes(bytes, buf, buflen); 943 } 944 945 static const char histo_stars[] = "****************************************"; 946 static const uint64_t histo_width = sizeof (histo_stars) - 1; 947 948 static void 949 dump_histogram(const uint64_t *histo, int size, int offset) 950 { 951 int i; 952 int minidx = size - 1; 953 int maxidx = 0; 954 uint64_t max = 0; 955 956 for (i = 0; i < size; i++) { 957 if (histo[i] == 0) 958 continue; 959 if (histo[i] > max) 960 max = histo[i]; 961 if (i > maxidx) 962 maxidx = i; 963 if (i < minidx) 964 minidx = i; 965 } 966 967 if (max < histo_width) 968 max = histo_width; 969 970 for (i = minidx; i <= maxidx; i++) { 971 (void) printf("\t\t\t%3u: %6llu %s\n", 972 i + offset, (u_longlong_t)histo[i], 973 &histo_stars[(max - histo[i]) * histo_width / max]); 974 } 975 } 976 977 static void 978 dump_zap_stats(objset_t *os, uint64_t object) 979 { 980 int error; 981 zap_stats_t zs; 982 983 error = zap_get_stats(os, object, &zs); 984 if (error) 985 return; 986 987 if (zs.zs_ptrtbl_len == 0) { 988 ASSERT(zs.zs_num_blocks == 1); 989 (void) printf("\tmicrozap: %llu bytes, %llu entries\n", 990 (u_longlong_t)zs.zs_blocksize, 991 (u_longlong_t)zs.zs_num_entries); 992 return; 993 } 994 995 (void) printf("\tFat ZAP stats:\n"); 996 997 (void) printf("\t\tPointer table:\n"); 998 (void) printf("\t\t\t%llu elements\n", 999 (u_longlong_t)zs.zs_ptrtbl_len); 1000 (void) printf("\t\t\tzt_blk: %llu\n", 1001 (u_longlong_t)zs.zs_ptrtbl_zt_blk); 1002 (void) printf("\t\t\tzt_numblks: %llu\n", 1003 (u_longlong_t)zs.zs_ptrtbl_zt_numblks); 1004 (void) printf("\t\t\tzt_shift: %llu\n", 1005 (u_longlong_t)zs.zs_ptrtbl_zt_shift); 1006 (void) printf("\t\t\tzt_blks_copied: %llu\n", 1007 (u_longlong_t)zs.zs_ptrtbl_blks_copied); 1008 (void) printf("\t\t\tzt_nextblk: %llu\n", 1009 (u_longlong_t)zs.zs_ptrtbl_nextblk); 1010 1011 (void) printf("\t\tZAP entries: %llu\n", 1012 (u_longlong_t)zs.zs_num_entries); 1013 (void) printf("\t\tLeaf blocks: %llu\n", 1014 (u_longlong_t)zs.zs_num_leafs); 1015 (void) printf("\t\tTotal blocks: %llu\n", 1016 (u_longlong_t)zs.zs_num_blocks); 1017 (void) printf("\t\tzap_block_type: 0x%llx\n", 1018 (u_longlong_t)zs.zs_block_type); 1019 (void) printf("\t\tzap_magic: 0x%llx\n", 1020 (u_longlong_t)zs.zs_magic); 1021 (void) printf("\t\tzap_salt: 0x%llx\n", 1022 (u_longlong_t)zs.zs_salt); 1023 1024 (void) printf("\t\tLeafs with 2^n pointers:\n"); 1025 dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0); 1026 1027 (void) printf("\t\tBlocks with n*5 entries:\n"); 1028 dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0); 1029 1030 (void) printf("\t\tBlocks n/10 full:\n"); 1031 dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0); 1032 1033 (void) printf("\t\tEntries with n chunks:\n"); 1034 dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0); 1035 1036 (void) printf("\t\tBuckets with n entries:\n"); 1037 dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0); 1038 } 1039 1040 static void 1041 dump_none(objset_t *os, uint64_t object, void *data, size_t size) 1042 { 1043 (void) os, (void) object, (void) data, (void) size; 1044 } 1045 1046 static void 1047 dump_unknown(objset_t *os, uint64_t object, void *data, size_t size) 1048 { 1049 (void) os, (void) object, (void) data, (void) size; 1050 (void) printf("\tUNKNOWN OBJECT TYPE\n"); 1051 } 1052 1053 static void 1054 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size) 1055 { 1056 (void) os, (void) object, (void) data, (void) size; 1057 } 1058 1059 static void 1060 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size) 1061 { 1062 uint64_t *arr; 1063 uint64_t oursize; 1064 if (dump_opt['d'] < 6) 1065 return; 1066 1067 if (data == NULL) { 1068 dmu_object_info_t doi; 1069 1070 VERIFY0(dmu_object_info(os, object, &doi)); 1071 size = doi.doi_max_offset; 1072 /* 1073 * We cap the size at 1 mebibyte here to prevent 1074 * allocation failures and nigh-infinite printing if the 1075 * object is extremely large. 1076 */ 1077 oursize = MIN(size, 1 << 20); 1078 arr = kmem_alloc(oursize, KM_SLEEP); 1079 1080 int err = dmu_read(os, object, 0, oursize, arr, 0); 1081 if (err != 0) { 1082 (void) printf("got error %u from dmu_read\n", err); 1083 kmem_free(arr, oursize); 1084 return; 1085 } 1086 } else { 1087 /* 1088 * Even though the allocation is already done in this code path, 1089 * we still cap the size to prevent excessive printing. 1090 */ 1091 oursize = MIN(size, 1 << 20); 1092 arr = data; 1093 } 1094 1095 if (size == 0) { 1096 if (data == NULL) 1097 kmem_free(arr, oursize); 1098 (void) printf("\t\t[]\n"); 1099 return; 1100 } 1101 1102 (void) printf("\t\t[%0llx", (u_longlong_t)arr[0]); 1103 for (size_t i = 1; i * sizeof (uint64_t) < oursize; i++) { 1104 if (i % 4 != 0) 1105 (void) printf(", %0llx", (u_longlong_t)arr[i]); 1106 else 1107 (void) printf(",\n\t\t%0llx", (u_longlong_t)arr[i]); 1108 } 1109 if (oursize != size) 1110 (void) printf(", ... "); 1111 (void) printf("]\n"); 1112 1113 if (data == NULL) 1114 kmem_free(arr, oursize); 1115 } 1116 1117 static void 1118 dump_zap(objset_t *os, uint64_t object, void *data, size_t size) 1119 { 1120 (void) data, (void) size; 1121 zap_cursor_t zc; 1122 zap_attribute_t *attrp = zap_attribute_long_alloc(); 1123 void *prop; 1124 unsigned i; 1125 1126 dump_zap_stats(os, object); 1127 (void) printf("\n"); 1128 1129 for (zap_cursor_init(&zc, os, object); 1130 zap_cursor_retrieve(&zc, attrp) == 0; 1131 zap_cursor_advance(&zc)) { 1132 boolean_t key64 = 1133 !!(zap_getflags(zc.zc_zap) & ZAP_FLAG_UINT64_KEY); 1134 1135 if (key64) 1136 (void) printf("\t\t0x%010" PRIu64 "x = ", 1137 *(uint64_t *)attrp->za_name); 1138 else 1139 (void) printf("\t\t%s = ", attrp->za_name); 1140 1141 if (attrp->za_num_integers == 0) { 1142 (void) printf("\n"); 1143 continue; 1144 } 1145 prop = umem_zalloc(attrp->za_num_integers * 1146 attrp->za_integer_length, UMEM_NOFAIL); 1147 1148 if (key64) 1149 (void) zap_lookup_uint64(os, object, 1150 (const uint64_t *)attrp->za_name, 1, 1151 attrp->za_integer_length, attrp->za_num_integers, 1152 prop); 1153 else 1154 (void) zap_lookup(os, object, attrp->za_name, 1155 attrp->za_integer_length, attrp->za_num_integers, 1156 prop); 1157 1158 if (attrp->za_integer_length == 1 && !key64) { 1159 if (strcmp(attrp->za_name, 1160 DSL_CRYPTO_KEY_MASTER_KEY) == 0 || 1161 strcmp(attrp->za_name, 1162 DSL_CRYPTO_KEY_HMAC_KEY) == 0 || 1163 strcmp(attrp->za_name, DSL_CRYPTO_KEY_IV) == 0 || 1164 strcmp(attrp->za_name, DSL_CRYPTO_KEY_MAC) == 0 || 1165 strcmp(attrp->za_name, 1166 DMU_POOL_CHECKSUM_SALT) == 0) { 1167 uint8_t *u8 = prop; 1168 1169 for (i = 0; i < attrp->za_num_integers; i++) { 1170 (void) printf("%02x", u8[i]); 1171 } 1172 } else { 1173 (void) printf("%s", (char *)prop); 1174 } 1175 } else { 1176 for (i = 0; i < attrp->za_num_integers; i++) { 1177 switch (attrp->za_integer_length) { 1178 case 1: 1179 (void) printf("%u ", 1180 ((uint8_t *)prop)[i]); 1181 break; 1182 case 2: 1183 (void) printf("%u ", 1184 ((uint16_t *)prop)[i]); 1185 break; 1186 case 4: 1187 (void) printf("%u ", 1188 ((uint32_t *)prop)[i]); 1189 break; 1190 case 8: 1191 (void) printf("%lld ", 1192 (u_longlong_t)((int64_t *)prop)[i]); 1193 break; 1194 } 1195 } 1196 } 1197 (void) printf("\n"); 1198 umem_free(prop, 1199 attrp->za_num_integers * attrp->za_integer_length); 1200 } 1201 zap_cursor_fini(&zc); 1202 zap_attribute_free(attrp); 1203 } 1204 1205 static void 1206 dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size) 1207 { 1208 bpobj_phys_t *bpop = data; 1209 uint64_t i; 1210 char bytes[32], comp[32], uncomp[32]; 1211 1212 /* make sure the output won't get truncated */ 1213 _Static_assert(sizeof (bytes) >= NN_NUMBUF_SZ, "bytes truncated"); 1214 _Static_assert(sizeof (comp) >= NN_NUMBUF_SZ, "comp truncated"); 1215 _Static_assert(sizeof (uncomp) >= NN_NUMBUF_SZ, "uncomp truncated"); 1216 1217 if (bpop == NULL) 1218 return; 1219 1220 zdb_nicenum(bpop->bpo_bytes, bytes, sizeof (bytes)); 1221 zdb_nicenum(bpop->bpo_comp, comp, sizeof (comp)); 1222 zdb_nicenum(bpop->bpo_uncomp, uncomp, sizeof (uncomp)); 1223 1224 (void) printf("\t\tnum_blkptrs = %llu\n", 1225 (u_longlong_t)bpop->bpo_num_blkptrs); 1226 (void) printf("\t\tbytes = %s\n", bytes); 1227 if (size >= BPOBJ_SIZE_V1) { 1228 (void) printf("\t\tcomp = %s\n", comp); 1229 (void) printf("\t\tuncomp = %s\n", uncomp); 1230 } 1231 if (size >= BPOBJ_SIZE_V2) { 1232 (void) printf("\t\tsubobjs = %llu\n", 1233 (u_longlong_t)bpop->bpo_subobjs); 1234 (void) printf("\t\tnum_subobjs = %llu\n", 1235 (u_longlong_t)bpop->bpo_num_subobjs); 1236 } 1237 if (size >= sizeof (*bpop)) { 1238 (void) printf("\t\tnum_freed = %llu\n", 1239 (u_longlong_t)bpop->bpo_num_freed); 1240 } 1241 1242 if (dump_opt['d'] < 5) 1243 return; 1244 1245 for (i = 0; i < bpop->bpo_num_blkptrs; i++) { 1246 char blkbuf[BP_SPRINTF_LEN]; 1247 blkptr_t bp; 1248 1249 int err = dmu_read(os, object, 1250 i * sizeof (bp), sizeof (bp), &bp, 0); 1251 if (err != 0) { 1252 (void) printf("got error %u from dmu_read\n", err); 1253 break; 1254 } 1255 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp, 1256 BP_GET_FREE(&bp)); 1257 (void) printf("\t%s\n", blkbuf); 1258 } 1259 } 1260 1261 static void 1262 dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size) 1263 { 1264 (void) data, (void) size; 1265 dmu_object_info_t doi; 1266 int64_t i; 1267 1268 VERIFY0(dmu_object_info(os, object, &doi)); 1269 uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP); 1270 1271 int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0); 1272 if (err != 0) { 1273 (void) printf("got error %u from dmu_read\n", err); 1274 kmem_free(subobjs, doi.doi_max_offset); 1275 return; 1276 } 1277 1278 int64_t last_nonzero = -1; 1279 for (i = 0; i < doi.doi_max_offset / 8; i++) { 1280 if (subobjs[i] != 0) 1281 last_nonzero = i; 1282 } 1283 1284 for (i = 0; i <= last_nonzero; i++) { 1285 (void) printf("\t%llu\n", (u_longlong_t)subobjs[i]); 1286 } 1287 kmem_free(subobjs, doi.doi_max_offset); 1288 } 1289 1290 static void 1291 dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size) 1292 { 1293 (void) data, (void) size; 1294 dump_zap_stats(os, object); 1295 /* contents are printed elsewhere, properly decoded */ 1296 } 1297 1298 static void 1299 dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size) 1300 { 1301 (void) data, (void) size; 1302 zap_cursor_t zc; 1303 zap_attribute_t *attrp = zap_attribute_alloc(); 1304 1305 dump_zap_stats(os, object); 1306 (void) printf("\n"); 1307 1308 for (zap_cursor_init(&zc, os, object); 1309 zap_cursor_retrieve(&zc, attrp) == 0; 1310 zap_cursor_advance(&zc)) { 1311 (void) printf("\t\t%s = ", attrp->za_name); 1312 if (attrp->za_num_integers == 0) { 1313 (void) printf("\n"); 1314 continue; 1315 } 1316 (void) printf(" %llx : [%d:%d:%d]\n", 1317 (u_longlong_t)attrp->za_first_integer, 1318 (int)ATTR_LENGTH(attrp->za_first_integer), 1319 (int)ATTR_BSWAP(attrp->za_first_integer), 1320 (int)ATTR_NUM(attrp->za_first_integer)); 1321 } 1322 zap_cursor_fini(&zc); 1323 zap_attribute_free(attrp); 1324 } 1325 1326 static void 1327 dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size) 1328 { 1329 (void) data, (void) size; 1330 zap_cursor_t zc; 1331 zap_attribute_t *attrp = zap_attribute_alloc(); 1332 uint16_t *layout_attrs; 1333 unsigned i; 1334 1335 dump_zap_stats(os, object); 1336 (void) printf("\n"); 1337 1338 for (zap_cursor_init(&zc, os, object); 1339 zap_cursor_retrieve(&zc, attrp) == 0; 1340 zap_cursor_advance(&zc)) { 1341 (void) printf("\t\t%s = [", attrp->za_name); 1342 if (attrp->za_num_integers == 0) { 1343 (void) printf("\n"); 1344 continue; 1345 } 1346 1347 VERIFY(attrp->za_integer_length == 2); 1348 layout_attrs = umem_zalloc(attrp->za_num_integers * 1349 attrp->za_integer_length, UMEM_NOFAIL); 1350 1351 VERIFY(zap_lookup(os, object, attrp->za_name, 1352 attrp->za_integer_length, 1353 attrp->za_num_integers, layout_attrs) == 0); 1354 1355 for (i = 0; i != attrp->za_num_integers; i++) 1356 (void) printf(" %d ", (int)layout_attrs[i]); 1357 (void) printf("]\n"); 1358 umem_free(layout_attrs, 1359 attrp->za_num_integers * attrp->za_integer_length); 1360 } 1361 zap_cursor_fini(&zc); 1362 zap_attribute_free(attrp); 1363 } 1364 1365 static void 1366 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size) 1367 { 1368 (void) data, (void) size; 1369 zap_cursor_t zc; 1370 zap_attribute_t *attrp = zap_attribute_long_alloc(); 1371 const char *typenames[] = { 1372 /* 0 */ "not specified", 1373 /* 1 */ "FIFO", 1374 /* 2 */ "Character Device", 1375 /* 3 */ "3 (invalid)", 1376 /* 4 */ "Directory", 1377 /* 5 */ "5 (invalid)", 1378 /* 6 */ "Block Device", 1379 /* 7 */ "7 (invalid)", 1380 /* 8 */ "Regular File", 1381 /* 9 */ "9 (invalid)", 1382 /* 10 */ "Symbolic Link", 1383 /* 11 */ "11 (invalid)", 1384 /* 12 */ "Socket", 1385 /* 13 */ "Door", 1386 /* 14 */ "Event Port", 1387 /* 15 */ "15 (invalid)", 1388 }; 1389 1390 dump_zap_stats(os, object); 1391 (void) printf("\n"); 1392 1393 for (zap_cursor_init(&zc, os, object); 1394 zap_cursor_retrieve(&zc, attrp) == 0; 1395 zap_cursor_advance(&zc)) { 1396 (void) printf("\t\t%s = %lld (type: %s)\n", 1397 attrp->za_name, ZFS_DIRENT_OBJ(attrp->za_first_integer), 1398 typenames[ZFS_DIRENT_TYPE(attrp->za_first_integer)]); 1399 } 1400 zap_cursor_fini(&zc); 1401 zap_attribute_free(attrp); 1402 } 1403 1404 static int 1405 get_dtl_refcount(vdev_t *vd) 1406 { 1407 int refcount = 0; 1408 1409 if (vd->vdev_ops->vdev_op_leaf) { 1410 space_map_t *sm = vd->vdev_dtl_sm; 1411 1412 if (sm != NULL && 1413 sm->sm_dbuf->db_size == sizeof (space_map_phys_t)) 1414 return (1); 1415 return (0); 1416 } 1417 1418 for (unsigned c = 0; c < vd->vdev_children; c++) 1419 refcount += get_dtl_refcount(vd->vdev_child[c]); 1420 return (refcount); 1421 } 1422 1423 static int 1424 get_metaslab_refcount(vdev_t *vd) 1425 { 1426 int refcount = 0; 1427 1428 if (vd->vdev_top == vd) { 1429 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 1430 space_map_t *sm = vd->vdev_ms[m]->ms_sm; 1431 1432 if (sm != NULL && 1433 sm->sm_dbuf->db_size == sizeof (space_map_phys_t)) 1434 refcount++; 1435 } 1436 } 1437 for (unsigned c = 0; c < vd->vdev_children; c++) 1438 refcount += get_metaslab_refcount(vd->vdev_child[c]); 1439 1440 return (refcount); 1441 } 1442 1443 static int 1444 get_obsolete_refcount(vdev_t *vd) 1445 { 1446 uint64_t obsolete_sm_object; 1447 int refcount = 0; 1448 1449 VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object)); 1450 if (vd->vdev_top == vd && obsolete_sm_object != 0) { 1451 dmu_object_info_t doi; 1452 VERIFY0(dmu_object_info(vd->vdev_spa->spa_meta_objset, 1453 obsolete_sm_object, &doi)); 1454 if (doi.doi_bonus_size == sizeof (space_map_phys_t)) { 1455 refcount++; 1456 } 1457 } else { 1458 ASSERT3P(vd->vdev_obsolete_sm, ==, NULL); 1459 ASSERT3U(obsolete_sm_object, ==, 0); 1460 } 1461 for (unsigned c = 0; c < vd->vdev_children; c++) { 1462 refcount += get_obsolete_refcount(vd->vdev_child[c]); 1463 } 1464 1465 return (refcount); 1466 } 1467 1468 static int 1469 get_prev_obsolete_spacemap_refcount(spa_t *spa) 1470 { 1471 uint64_t prev_obj = 1472 spa->spa_condensing_indirect_phys.scip_prev_obsolete_sm_object; 1473 if (prev_obj != 0) { 1474 dmu_object_info_t doi; 1475 VERIFY0(dmu_object_info(spa->spa_meta_objset, prev_obj, &doi)); 1476 if (doi.doi_bonus_size == sizeof (space_map_phys_t)) { 1477 return (1); 1478 } 1479 } 1480 return (0); 1481 } 1482 1483 static int 1484 get_checkpoint_refcount(vdev_t *vd) 1485 { 1486 int refcount = 0; 1487 1488 if (vd->vdev_top == vd && vd->vdev_top_zap != 0 && 1489 zap_contains(spa_meta_objset(vd->vdev_spa), 1490 vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) == 0) 1491 refcount++; 1492 1493 for (uint64_t c = 0; c < vd->vdev_children; c++) 1494 refcount += get_checkpoint_refcount(vd->vdev_child[c]); 1495 1496 return (refcount); 1497 } 1498 1499 static int 1500 get_log_spacemap_refcount(spa_t *spa) 1501 { 1502 return (avl_numnodes(&spa->spa_sm_logs_by_txg)); 1503 } 1504 1505 static int 1506 verify_spacemap_refcounts(spa_t *spa) 1507 { 1508 uint64_t expected_refcount = 0; 1509 uint64_t actual_refcount; 1510 1511 (void) feature_get_refcount(spa, 1512 &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM], 1513 &expected_refcount); 1514 actual_refcount = get_dtl_refcount(spa->spa_root_vdev); 1515 actual_refcount += get_metaslab_refcount(spa->spa_root_vdev); 1516 actual_refcount += get_obsolete_refcount(spa->spa_root_vdev); 1517 actual_refcount += get_prev_obsolete_spacemap_refcount(spa); 1518 actual_refcount += get_checkpoint_refcount(spa->spa_root_vdev); 1519 actual_refcount += get_log_spacemap_refcount(spa); 1520 1521 if (expected_refcount != actual_refcount) { 1522 (void) printf("space map refcount mismatch: expected %lld != " 1523 "actual %lld\n", 1524 (longlong_t)expected_refcount, 1525 (longlong_t)actual_refcount); 1526 return (2); 1527 } 1528 return (0); 1529 } 1530 1531 static void 1532 dump_spacemap(objset_t *os, space_map_t *sm) 1533 { 1534 const char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID", 1535 "INVALID", "INVALID", "INVALID", "INVALID" }; 1536 1537 if (sm == NULL) 1538 return; 1539 1540 (void) printf("space map object %llu:\n", 1541 (longlong_t)sm->sm_object); 1542 (void) printf(" smp_length = 0x%llx\n", 1543 (longlong_t)sm->sm_phys->smp_length); 1544 (void) printf(" smp_alloc = 0x%llx\n", 1545 (longlong_t)sm->sm_phys->smp_alloc); 1546 1547 if (dump_opt['d'] < 6 && dump_opt['m'] < 4) 1548 return; 1549 1550 /* 1551 * Print out the freelist entries in both encoded and decoded form. 1552 */ 1553 uint8_t mapshift = sm->sm_shift; 1554 int64_t alloc = 0; 1555 uint64_t word, entry_id = 0; 1556 for (uint64_t offset = 0; offset < space_map_length(sm); 1557 offset += sizeof (word)) { 1558 1559 VERIFY0(dmu_read(os, space_map_object(sm), offset, 1560 sizeof (word), &word, DMU_READ_PREFETCH)); 1561 1562 if (sm_entry_is_debug(word)) { 1563 uint64_t de_txg = SM_DEBUG_TXG_DECODE(word); 1564 uint64_t de_sync_pass = SM_DEBUG_SYNCPASS_DECODE(word); 1565 if (de_txg == 0) { 1566 (void) printf( 1567 "\t [%6llu] PADDING\n", 1568 (u_longlong_t)entry_id); 1569 } else { 1570 (void) printf( 1571 "\t [%6llu] %s: txg %llu pass %llu\n", 1572 (u_longlong_t)entry_id, 1573 ddata[SM_DEBUG_ACTION_DECODE(word)], 1574 (u_longlong_t)de_txg, 1575 (u_longlong_t)de_sync_pass); 1576 } 1577 entry_id++; 1578 continue; 1579 } 1580 1581 uint8_t words; 1582 char entry_type; 1583 uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID; 1584 1585 if (sm_entry_is_single_word(word)) { 1586 entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ? 1587 'A' : 'F'; 1588 entry_off = (SM_OFFSET_DECODE(word) << mapshift) + 1589 sm->sm_start; 1590 entry_run = SM_RUN_DECODE(word) << mapshift; 1591 words = 1; 1592 } else { 1593 /* it is a two-word entry so we read another word */ 1594 ASSERT(sm_entry_is_double_word(word)); 1595 1596 uint64_t extra_word; 1597 offset += sizeof (extra_word); 1598 VERIFY0(dmu_read(os, space_map_object(sm), offset, 1599 sizeof (extra_word), &extra_word, 1600 DMU_READ_PREFETCH)); 1601 1602 ASSERT3U(offset, <=, space_map_length(sm)); 1603 1604 entry_run = SM2_RUN_DECODE(word) << mapshift; 1605 entry_vdev = SM2_VDEV_DECODE(word); 1606 entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ? 1607 'A' : 'F'; 1608 entry_off = (SM2_OFFSET_DECODE(extra_word) << 1609 mapshift) + sm->sm_start; 1610 words = 2; 1611 } 1612 1613 (void) printf("\t [%6llu] %c range:" 1614 " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n", 1615 (u_longlong_t)entry_id, 1616 entry_type, (u_longlong_t)entry_off, 1617 (u_longlong_t)(entry_off + entry_run), 1618 (u_longlong_t)entry_run, 1619 (u_longlong_t)entry_vdev, words); 1620 1621 if (entry_type == 'A') 1622 alloc += entry_run; 1623 else 1624 alloc -= entry_run; 1625 entry_id++; 1626 } 1627 if (alloc != space_map_allocated(sm)) { 1628 (void) printf("space_map_object alloc (%lld) INCONSISTENT " 1629 "with space map summary (%lld)\n", 1630 (longlong_t)space_map_allocated(sm), (longlong_t)alloc); 1631 } 1632 } 1633 1634 static void 1635 dump_metaslab_stats(metaslab_t *msp) 1636 { 1637 char maxbuf[32]; 1638 zfs_range_tree_t *rt = msp->ms_allocatable; 1639 zfs_btree_t *t = &msp->ms_allocatable_by_size; 1640 int free_pct = zfs_range_tree_space(rt) * 100 / msp->ms_size; 1641 1642 /* max sure nicenum has enough space */ 1643 _Static_assert(sizeof (maxbuf) >= NN_NUMBUF_SZ, "maxbuf truncated"); 1644 1645 zdb_nicenum(metaslab_largest_allocatable(msp), maxbuf, sizeof (maxbuf)); 1646 1647 (void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n", 1648 "segments", zfs_btree_numnodes(t), "maxsize", maxbuf, 1649 "freepct", free_pct); 1650 (void) printf("\tIn-memory histogram:\n"); 1651 dump_histogram(rt->rt_histogram, ZFS_RANGE_TREE_HISTOGRAM_SIZE, 0); 1652 } 1653 1654 static void 1655 dump_metaslab(metaslab_t *msp) 1656 { 1657 vdev_t *vd = msp->ms_group->mg_vd; 1658 spa_t *spa = vd->vdev_spa; 1659 space_map_t *sm = msp->ms_sm; 1660 char freebuf[32]; 1661 1662 zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf, 1663 sizeof (freebuf)); 1664 1665 (void) printf( 1666 "\tmetaslab %6llu offset %12llx spacemap %6llu free %5s\n", 1667 (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start, 1668 (u_longlong_t)space_map_object(sm), freebuf); 1669 1670 if (dump_opt['m'] > 2 && !dump_opt['L']) { 1671 mutex_enter(&msp->ms_lock); 1672 VERIFY0(metaslab_load(msp)); 1673 zfs_range_tree_stat_verify(msp->ms_allocatable); 1674 dump_metaslab_stats(msp); 1675 metaslab_unload(msp); 1676 mutex_exit(&msp->ms_lock); 1677 } 1678 1679 if (dump_opt['m'] > 1 && sm != NULL && 1680 spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) { 1681 /* 1682 * The space map histogram represents free space in chunks 1683 * of sm_shift (i.e. bucket 0 refers to 2^sm_shift). 1684 */ 1685 (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n", 1686 (u_longlong_t)msp->ms_fragmentation); 1687 dump_histogram(sm->sm_phys->smp_histogram, 1688 SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift); 1689 } 1690 1691 if (vd->vdev_ops == &vdev_draid_ops) 1692 ASSERT3U(msp->ms_size, <=, 1ULL << vd->vdev_ms_shift); 1693 else 1694 ASSERT3U(msp->ms_size, ==, 1ULL << vd->vdev_ms_shift); 1695 1696 dump_spacemap(spa->spa_meta_objset, msp->ms_sm); 1697 1698 if (spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) { 1699 (void) printf("\tFlush data:\n\tunflushed txg=%llu\n\n", 1700 (u_longlong_t)metaslab_unflushed_txg(msp)); 1701 } 1702 } 1703 1704 static void 1705 print_vdev_metaslab_header(vdev_t *vd) 1706 { 1707 vdev_alloc_bias_t alloc_bias = vd->vdev_alloc_bias; 1708 const char *bias_str = ""; 1709 if (alloc_bias == VDEV_BIAS_LOG || vd->vdev_islog) { 1710 bias_str = VDEV_ALLOC_BIAS_LOG; 1711 } else if (alloc_bias == VDEV_BIAS_SPECIAL) { 1712 bias_str = VDEV_ALLOC_BIAS_SPECIAL; 1713 } else if (alloc_bias == VDEV_BIAS_DEDUP) { 1714 bias_str = VDEV_ALLOC_BIAS_DEDUP; 1715 } 1716 1717 uint64_t ms_flush_data_obj = 0; 1718 if (vd->vdev_top_zap != 0) { 1719 int error = zap_lookup(spa_meta_objset(vd->vdev_spa), 1720 vd->vdev_top_zap, VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS, 1721 sizeof (uint64_t), 1, &ms_flush_data_obj); 1722 if (error != ENOENT) { 1723 ASSERT0(error); 1724 } 1725 } 1726 1727 (void) printf("\tvdev %10llu %s", 1728 (u_longlong_t)vd->vdev_id, bias_str); 1729 1730 if (ms_flush_data_obj != 0) { 1731 (void) printf(" ms_unflushed_phys object %llu", 1732 (u_longlong_t)ms_flush_data_obj); 1733 } 1734 1735 (void) printf("\n\t%-10s%5llu %-19s %-15s %-12s\n", 1736 "metaslabs", (u_longlong_t)vd->vdev_ms_count, 1737 "offset", "spacemap", "free"); 1738 (void) printf("\t%15s %19s %15s %12s\n", 1739 "---------------", "-------------------", 1740 "---------------", "------------"); 1741 } 1742 1743 static void 1744 dump_metaslab_groups(spa_t *spa, boolean_t show_special) 1745 { 1746 vdev_t *rvd = spa->spa_root_vdev; 1747 metaslab_class_t *mc = spa_normal_class(spa); 1748 metaslab_class_t *smc = spa_special_class(spa); 1749 uint64_t fragmentation; 1750 1751 metaslab_class_histogram_verify(mc); 1752 1753 for (unsigned c = 0; c < rvd->vdev_children; c++) { 1754 vdev_t *tvd = rvd->vdev_child[c]; 1755 metaslab_group_t *mg = tvd->vdev_mg; 1756 1757 if (mg == NULL || (mg->mg_class != mc && 1758 (!show_special || mg->mg_class != smc))) 1759 continue; 1760 1761 metaslab_group_histogram_verify(mg); 1762 mg->mg_fragmentation = metaslab_group_fragmentation(mg); 1763 1764 (void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t" 1765 "fragmentation", 1766 (u_longlong_t)tvd->vdev_id, 1767 (u_longlong_t)tvd->vdev_ms_count); 1768 if (mg->mg_fragmentation == ZFS_FRAG_INVALID) { 1769 (void) printf("%3s\n", "-"); 1770 } else { 1771 (void) printf("%3llu%%\n", 1772 (u_longlong_t)mg->mg_fragmentation); 1773 } 1774 dump_histogram(mg->mg_histogram, 1775 ZFS_RANGE_TREE_HISTOGRAM_SIZE, 0); 1776 } 1777 1778 (void) printf("\tpool %s\tfragmentation", spa_name(spa)); 1779 fragmentation = metaslab_class_fragmentation(mc); 1780 if (fragmentation == ZFS_FRAG_INVALID) 1781 (void) printf("\t%3s\n", "-"); 1782 else 1783 (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation); 1784 dump_histogram(mc->mc_histogram, ZFS_RANGE_TREE_HISTOGRAM_SIZE, 0); 1785 } 1786 1787 static void 1788 print_vdev_indirect(vdev_t *vd) 1789 { 1790 vdev_indirect_config_t *vic = &vd->vdev_indirect_config; 1791 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 1792 vdev_indirect_births_t *vib = vd->vdev_indirect_births; 1793 1794 if (vim == NULL) { 1795 ASSERT3P(vib, ==, NULL); 1796 return; 1797 } 1798 1799 ASSERT3U(vdev_indirect_mapping_object(vim), ==, 1800 vic->vic_mapping_object); 1801 ASSERT3U(vdev_indirect_births_object(vib), ==, 1802 vic->vic_births_object); 1803 1804 (void) printf("indirect births obj %llu:\n", 1805 (longlong_t)vic->vic_births_object); 1806 (void) printf(" vib_count = %llu\n", 1807 (longlong_t)vdev_indirect_births_count(vib)); 1808 for (uint64_t i = 0; i < vdev_indirect_births_count(vib); i++) { 1809 vdev_indirect_birth_entry_phys_t *cur_vibe = 1810 &vib->vib_entries[i]; 1811 (void) printf("\toffset %llx -> txg %llu\n", 1812 (longlong_t)cur_vibe->vibe_offset, 1813 (longlong_t)cur_vibe->vibe_phys_birth_txg); 1814 } 1815 (void) printf("\n"); 1816 1817 (void) printf("indirect mapping obj %llu:\n", 1818 (longlong_t)vic->vic_mapping_object); 1819 (void) printf(" vim_max_offset = 0x%llx\n", 1820 (longlong_t)vdev_indirect_mapping_max_offset(vim)); 1821 (void) printf(" vim_bytes_mapped = 0x%llx\n", 1822 (longlong_t)vdev_indirect_mapping_bytes_mapped(vim)); 1823 (void) printf(" vim_count = %llu\n", 1824 (longlong_t)vdev_indirect_mapping_num_entries(vim)); 1825 1826 if (dump_opt['d'] <= 5 && dump_opt['m'] <= 3) 1827 return; 1828 1829 uint32_t *counts = vdev_indirect_mapping_load_obsolete_counts(vim); 1830 1831 for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) { 1832 vdev_indirect_mapping_entry_phys_t *vimep = 1833 &vim->vim_entries[i]; 1834 (void) printf("\t<%llx:%llx:%llx> -> " 1835 "<%llx:%llx:%llx> (%x obsolete)\n", 1836 (longlong_t)vd->vdev_id, 1837 (longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep), 1838 (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), 1839 (longlong_t)DVA_GET_VDEV(&vimep->vimep_dst), 1840 (longlong_t)DVA_GET_OFFSET(&vimep->vimep_dst), 1841 (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), 1842 counts[i]); 1843 } 1844 (void) printf("\n"); 1845 1846 uint64_t obsolete_sm_object; 1847 VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object)); 1848 if (obsolete_sm_object != 0) { 1849 objset_t *mos = vd->vdev_spa->spa_meta_objset; 1850 (void) printf("obsolete space map object %llu:\n", 1851 (u_longlong_t)obsolete_sm_object); 1852 ASSERT(vd->vdev_obsolete_sm != NULL); 1853 ASSERT3U(space_map_object(vd->vdev_obsolete_sm), ==, 1854 obsolete_sm_object); 1855 dump_spacemap(mos, vd->vdev_obsolete_sm); 1856 (void) printf("\n"); 1857 } 1858 } 1859 1860 static void 1861 dump_metaslabs(spa_t *spa) 1862 { 1863 vdev_t *vd, *rvd = spa->spa_root_vdev; 1864 uint64_t m, c = 0, children = rvd->vdev_children; 1865 1866 (void) printf("\nMetaslabs:\n"); 1867 1868 if (!dump_opt['d'] && zopt_metaslab_args > 0) { 1869 c = zopt_metaslab[0]; 1870 1871 if (c >= children) 1872 (void) fatal("bad vdev id: %llu", (u_longlong_t)c); 1873 1874 if (zopt_metaslab_args > 1) { 1875 vd = rvd->vdev_child[c]; 1876 print_vdev_metaslab_header(vd); 1877 1878 for (m = 1; m < zopt_metaslab_args; m++) { 1879 if (zopt_metaslab[m] < vd->vdev_ms_count) 1880 dump_metaslab( 1881 vd->vdev_ms[zopt_metaslab[m]]); 1882 else 1883 (void) fprintf(stderr, "bad metaslab " 1884 "number %llu\n", 1885 (u_longlong_t)zopt_metaslab[m]); 1886 } 1887 (void) printf("\n"); 1888 return; 1889 } 1890 children = c + 1; 1891 } 1892 for (; c < children; c++) { 1893 vd = rvd->vdev_child[c]; 1894 print_vdev_metaslab_header(vd); 1895 1896 print_vdev_indirect(vd); 1897 1898 for (m = 0; m < vd->vdev_ms_count; m++) 1899 dump_metaslab(vd->vdev_ms[m]); 1900 (void) printf("\n"); 1901 } 1902 } 1903 1904 static void 1905 dump_log_spacemaps(spa_t *spa) 1906 { 1907 if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) 1908 return; 1909 1910 (void) printf("\nLog Space Maps in Pool:\n"); 1911 for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg); 1912 sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) { 1913 space_map_t *sm = NULL; 1914 VERIFY0(space_map_open(&sm, spa_meta_objset(spa), 1915 sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT)); 1916 1917 (void) printf("Log Spacemap object %llu txg %llu\n", 1918 (u_longlong_t)sls->sls_sm_obj, (u_longlong_t)sls->sls_txg); 1919 dump_spacemap(spa->spa_meta_objset, sm); 1920 space_map_close(sm); 1921 } 1922 (void) printf("\n"); 1923 } 1924 1925 static void 1926 dump_ddt_entry(const ddt_t *ddt, const ddt_lightweight_entry_t *ddlwe, 1927 uint64_t index) 1928 { 1929 const ddt_key_t *ddk = &ddlwe->ddlwe_key; 1930 char blkbuf[BP_SPRINTF_LEN]; 1931 blkptr_t blk; 1932 int p; 1933 1934 for (p = 0; p < DDT_NPHYS(ddt); p++) { 1935 const ddt_univ_phys_t *ddp = &ddlwe->ddlwe_phys; 1936 ddt_phys_variant_t v = DDT_PHYS_VARIANT(ddt, p); 1937 1938 if (ddt_phys_birth(ddp, v) == 0) 1939 continue; 1940 ddt_bp_create(ddt->ddt_checksum, ddk, ddp, v, &blk); 1941 snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk); 1942 (void) printf("index %llx refcnt %llu phys %d %s\n", 1943 (u_longlong_t)index, (u_longlong_t)ddt_phys_refcnt(ddp, v), 1944 p, blkbuf); 1945 } 1946 } 1947 1948 static void 1949 dump_dedup_ratio(const ddt_stat_t *dds) 1950 { 1951 double rL, rP, rD, D, dedup, compress, copies; 1952 1953 if (dds->dds_blocks == 0) 1954 return; 1955 1956 rL = (double)dds->dds_ref_lsize; 1957 rP = (double)dds->dds_ref_psize; 1958 rD = (double)dds->dds_ref_dsize; 1959 D = (double)dds->dds_dsize; 1960 1961 dedup = rD / D; 1962 compress = rL / rP; 1963 copies = rD / rP; 1964 1965 (void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, " 1966 "dedup * compress / copies = %.2f\n\n", 1967 dedup, compress, copies, dedup * compress / copies); 1968 } 1969 1970 static void 1971 dump_ddt_log(ddt_t *ddt) 1972 { 1973 if (ddt->ddt_version != DDT_VERSION_FDT || 1974 !(ddt->ddt_flags & DDT_FLAG_LOG)) 1975 return; 1976 1977 for (int n = 0; n < 2; n++) { 1978 ddt_log_t *ddl = &ddt->ddt_log[n]; 1979 1980 char flagstr[64] = {0}; 1981 if (ddl->ddl_flags > 0) { 1982 flagstr[0] = ' '; 1983 int c = 1; 1984 if (ddl->ddl_flags & DDL_FLAG_FLUSHING) 1985 c += strlcpy(&flagstr[c], " FLUSHING", 1986 sizeof (flagstr) - c); 1987 if (ddl->ddl_flags & DDL_FLAG_CHECKPOINT) 1988 c += strlcpy(&flagstr[c], " CHECKPOINT", 1989 sizeof (flagstr) - c); 1990 if (ddl->ddl_flags & 1991 ~(DDL_FLAG_FLUSHING|DDL_FLAG_CHECKPOINT)) 1992 c += strlcpy(&flagstr[c], " UNKNOWN", 1993 sizeof (flagstr) - c); 1994 flagstr[1] = '['; 1995 flagstr[c] = ']'; 1996 } 1997 1998 uint64_t count = avl_numnodes(&ddl->ddl_tree); 1999 2000 printf(DMU_POOL_DDT_LOG ": flags=0x%02x%s; obj=%llu; " 2001 "len=%llu; txg=%llu; entries=%llu\n", 2002 zio_checksum_table[ddt->ddt_checksum].ci_name, n, 2003 ddl->ddl_flags, flagstr, 2004 (u_longlong_t)ddl->ddl_object, 2005 (u_longlong_t)ddl->ddl_length, 2006 (u_longlong_t)ddl->ddl_first_txg, (u_longlong_t)count); 2007 2008 if (ddl->ddl_flags & DDL_FLAG_CHECKPOINT) { 2009 const ddt_key_t *ddk = &ddl->ddl_checkpoint; 2010 printf(" checkpoint: " 2011 "%016llx:%016llx:%016llx:%016llx:%016llx\n", 2012 (u_longlong_t)ddk->ddk_cksum.zc_word[0], 2013 (u_longlong_t)ddk->ddk_cksum.zc_word[1], 2014 (u_longlong_t)ddk->ddk_cksum.zc_word[2], 2015 (u_longlong_t)ddk->ddk_cksum.zc_word[3], 2016 (u_longlong_t)ddk->ddk_prop); 2017 } 2018 2019 if (count == 0 || dump_opt['D'] < 4) 2020 continue; 2021 2022 ddt_lightweight_entry_t ddlwe; 2023 uint64_t index = 0; 2024 for (ddt_log_entry_t *ddle = avl_first(&ddl->ddl_tree); 2025 ddle; ddle = AVL_NEXT(&ddl->ddl_tree, ddle)) { 2026 DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, &ddlwe); 2027 dump_ddt_entry(ddt, &ddlwe, index++); 2028 } 2029 } 2030 } 2031 2032 static void 2033 dump_ddt_object(ddt_t *ddt, ddt_type_t type, ddt_class_t class) 2034 { 2035 char name[DDT_NAMELEN]; 2036 ddt_lightweight_entry_t ddlwe; 2037 uint64_t walk = 0; 2038 dmu_object_info_t doi; 2039 uint64_t count, dspace, mspace; 2040 int error; 2041 2042 error = ddt_object_info(ddt, type, class, &doi); 2043 2044 if (error == ENOENT) 2045 return; 2046 ASSERT(error == 0); 2047 2048 error = ddt_object_count(ddt, type, class, &count); 2049 ASSERT(error == 0); 2050 if (count == 0) 2051 return; 2052 2053 dspace = doi.doi_physical_blocks_512 << 9; 2054 mspace = doi.doi_fill_count * doi.doi_data_block_size; 2055 2056 ddt_object_name(ddt, type, class, name); 2057 2058 (void) printf("%s: dspace=%llu; mspace=%llu; entries=%llu\n", name, 2059 (u_longlong_t)dspace, (u_longlong_t)mspace, (u_longlong_t)count); 2060 2061 if (dump_opt['D'] < 3) 2062 return; 2063 2064 (void) printf("%s: object=%llu\n", name, 2065 (u_longlong_t)ddt->ddt_object[type][class]); 2066 zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]); 2067 2068 if (dump_opt['D'] < 4) 2069 return; 2070 2071 if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE) 2072 return; 2073 2074 (void) printf("%s contents:\n\n", name); 2075 2076 while ((error = ddt_object_walk(ddt, type, class, &walk, &ddlwe)) == 0) 2077 dump_ddt_entry(ddt, &ddlwe, walk); 2078 2079 ASSERT3U(error, ==, ENOENT); 2080 2081 (void) printf("\n"); 2082 } 2083 2084 static void 2085 dump_ddt(ddt_t *ddt) 2086 { 2087 if (!ddt || ddt->ddt_version == DDT_VERSION_UNCONFIGURED) 2088 return; 2089 2090 char flagstr[64] = {0}; 2091 if (ddt->ddt_flags > 0) { 2092 flagstr[0] = ' '; 2093 int c = 1; 2094 if (ddt->ddt_flags & DDT_FLAG_FLAT) 2095 c += strlcpy(&flagstr[c], " FLAT", 2096 sizeof (flagstr) - c); 2097 if (ddt->ddt_flags & DDT_FLAG_LOG) 2098 c += strlcpy(&flagstr[c], " LOG", 2099 sizeof (flagstr) - c); 2100 if (ddt->ddt_flags & ~DDT_FLAG_MASK) 2101 c += strlcpy(&flagstr[c], " UNKNOWN", 2102 sizeof (flagstr) - c); 2103 flagstr[1] = '['; 2104 flagstr[c] = ']'; 2105 } 2106 2107 printf("DDT-%s: version=%llu [%s]; flags=0x%02llx%s; rootobj=%llu\n", 2108 zio_checksum_table[ddt->ddt_checksum].ci_name, 2109 (u_longlong_t)ddt->ddt_version, 2110 (ddt->ddt_version == 0) ? "LEGACY" : 2111 (ddt->ddt_version == 1) ? "FDT" : "UNKNOWN", 2112 (u_longlong_t)ddt->ddt_flags, flagstr, 2113 (u_longlong_t)ddt->ddt_dir_object); 2114 2115 for (ddt_type_t type = 0; type < DDT_TYPES; type++) 2116 for (ddt_class_t class = 0; class < DDT_CLASSES; class++) 2117 dump_ddt_object(ddt, type, class); 2118 2119 dump_ddt_log(ddt); 2120 } 2121 2122 static void 2123 dump_all_ddts(spa_t *spa) 2124 { 2125 ddt_histogram_t ddh_total = {{{0}}}; 2126 ddt_stat_t dds_total = {0}; 2127 2128 for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) 2129 dump_ddt(spa->spa_ddt[c]); 2130 2131 ddt_get_dedup_stats(spa, &dds_total); 2132 2133 if (dds_total.dds_blocks == 0) { 2134 (void) printf("All DDTs are empty\n"); 2135 return; 2136 } 2137 2138 (void) printf("\n"); 2139 2140 if (dump_opt['D'] > 1) { 2141 (void) printf("DDT histogram (aggregated over all DDTs):\n"); 2142 ddt_get_dedup_histogram(spa, &ddh_total); 2143 zpool_dump_ddt(&dds_total, &ddh_total); 2144 } 2145 2146 dump_dedup_ratio(&dds_total); 2147 2148 /* 2149 * Dump a histogram of unique class entry age 2150 */ 2151 if (dump_opt['D'] == 3 && getenv("ZDB_DDT_UNIQUE_AGE_HIST") != NULL) { 2152 ddt_age_histo_t histogram; 2153 2154 (void) printf("DDT walk unique, building age histogram...\n"); 2155 ddt_prune_walk(spa, 0, &histogram); 2156 2157 /* 2158 * print out histogram for unique entry class birth 2159 */ 2160 if (histogram.dah_entries > 0) { 2161 (void) printf("%5s %9s %4s\n", 2162 "age", "blocks", "amnt"); 2163 (void) printf("%5s %9s %4s\n", 2164 "-----", "---------", "----"); 2165 for (int i = 0; i < HIST_BINS; i++) { 2166 (void) printf("%5d %9d %4d%%\n", 1 << i, 2167 (int)histogram.dah_age_histo[i], 2168 (int)((histogram.dah_age_histo[i] * 100) / 2169 histogram.dah_entries)); 2170 } 2171 } 2172 } 2173 } 2174 2175 static void 2176 dump_brt(spa_t *spa) 2177 { 2178 if (!spa_feature_is_enabled(spa, SPA_FEATURE_BLOCK_CLONING)) { 2179 printf("BRT: unsupported on this pool\n"); 2180 return; 2181 } 2182 2183 if (!spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) { 2184 printf("BRT: empty\n"); 2185 return; 2186 } 2187 2188 char count[32], used[32], saved[32]; 2189 zdb_nicebytes(brt_get_used(spa), used, sizeof (used)); 2190 zdb_nicebytes(brt_get_saved(spa), saved, sizeof (saved)); 2191 uint64_t ratio = brt_get_ratio(spa); 2192 printf("BRT: used %s; saved %s; ratio %llu.%02llux\n", used, saved, 2193 (u_longlong_t)(ratio / 100), (u_longlong_t)(ratio % 100)); 2194 2195 if (dump_opt['T'] < 2) 2196 return; 2197 2198 for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) { 2199 brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid]; 2200 if (!brtvd->bv_initiated) { 2201 printf("BRT: vdev %" PRIu64 ": empty\n", vdevid); 2202 continue; 2203 } 2204 2205 zdb_nicenum(brtvd->bv_totalcount, count, sizeof (count)); 2206 zdb_nicebytes(brtvd->bv_usedspace, used, sizeof (used)); 2207 zdb_nicebytes(brtvd->bv_savedspace, saved, sizeof (saved)); 2208 printf("BRT: vdev %" PRIu64 ": refcnt %s; used %s; saved %s\n", 2209 vdevid, count, used, saved); 2210 } 2211 2212 if (dump_opt['T'] < 3) 2213 return; 2214 2215 /* -TTT shows a per-vdev histograms; -TTTT shows all entries */ 2216 boolean_t do_histo = dump_opt['T'] == 3; 2217 2218 char dva[64]; 2219 2220 if (!do_histo) 2221 printf("\n%-16s %-10s\n", "DVA", "REFCNT"); 2222 2223 for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) { 2224 brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid]; 2225 if (!brtvd->bv_initiated) 2226 continue; 2227 2228 uint64_t counts[64] = {}; 2229 2230 zap_cursor_t zc; 2231 zap_attribute_t *za = zap_attribute_alloc(); 2232 for (zap_cursor_init(&zc, spa->spa_meta_objset, 2233 brtvd->bv_mos_entries); 2234 zap_cursor_retrieve(&zc, za) == 0; 2235 zap_cursor_advance(&zc)) { 2236 uint64_t refcnt; 2237 VERIFY0(zap_lookup_uint64(spa->spa_meta_objset, 2238 brtvd->bv_mos_entries, 2239 (const uint64_t *)za->za_name, 1, 2240 za->za_integer_length, za->za_num_integers, 2241 &refcnt)); 2242 2243 if (do_histo) 2244 counts[highbit64(refcnt)]++; 2245 else { 2246 uint64_t offset = 2247 *(const uint64_t *)za->za_name; 2248 2249 snprintf(dva, sizeof (dva), "%" PRIu64 ":%llx", 2250 vdevid, (u_longlong_t)offset); 2251 printf("%-16s %-10llu\n", dva, 2252 (u_longlong_t)refcnt); 2253 } 2254 } 2255 zap_cursor_fini(&zc); 2256 zap_attribute_free(za); 2257 2258 if (do_histo) { 2259 printf("\nBRT: vdev %" PRIu64 2260 ": DVAs with 2^n refcnts:\n", vdevid); 2261 dump_histogram(counts, 64, 0); 2262 } 2263 } 2264 } 2265 2266 static void 2267 dump_dtl_seg(void *arg, uint64_t start, uint64_t size) 2268 { 2269 char *prefix = arg; 2270 2271 (void) printf("%s [%llu,%llu) length %llu\n", 2272 prefix, 2273 (u_longlong_t)start, 2274 (u_longlong_t)(start + size), 2275 (u_longlong_t)(size)); 2276 } 2277 2278 static void 2279 dump_dtl(vdev_t *vd, int indent) 2280 { 2281 spa_t *spa = vd->vdev_spa; 2282 boolean_t required; 2283 const char *name[DTL_TYPES] = { "missing", "partial", "scrub", 2284 "outage" }; 2285 char prefix[256]; 2286 2287 spa_vdev_state_enter(spa, SCL_NONE); 2288 required = vdev_dtl_required(vd); 2289 (void) spa_vdev_state_exit(spa, NULL, 0); 2290 2291 if (indent == 0) 2292 (void) printf("\nDirty time logs:\n\n"); 2293 2294 (void) printf("\t%*s%s [%s]\n", indent, "", 2295 vd->vdev_path ? vd->vdev_path : 2296 vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa), 2297 required ? "DTL-required" : "DTL-expendable"); 2298 2299 for (int t = 0; t < DTL_TYPES; t++) { 2300 zfs_range_tree_t *rt = vd->vdev_dtl[t]; 2301 if (zfs_range_tree_space(rt) == 0) 2302 continue; 2303 (void) snprintf(prefix, sizeof (prefix), "\t%*s%s", 2304 indent + 2, "", name[t]); 2305 zfs_range_tree_walk(rt, dump_dtl_seg, prefix); 2306 if (dump_opt['d'] > 5 && vd->vdev_children == 0) 2307 dump_spacemap(spa->spa_meta_objset, 2308 vd->vdev_dtl_sm); 2309 } 2310 2311 for (unsigned c = 0; c < vd->vdev_children; c++) 2312 dump_dtl(vd->vdev_child[c], indent + 4); 2313 } 2314 2315 static void 2316 dump_history(spa_t *spa) 2317 { 2318 nvlist_t **events = NULL; 2319 char *buf; 2320 uint64_t resid, len, off = 0; 2321 uint_t num = 0; 2322 int error; 2323 char tbuf[30]; 2324 2325 if ((buf = malloc(SPA_OLD_MAXBLOCKSIZE)) == NULL) { 2326 (void) fprintf(stderr, "%s: unable to allocate I/O buffer\n", 2327 __func__); 2328 return; 2329 } 2330 2331 do { 2332 len = SPA_OLD_MAXBLOCKSIZE; 2333 2334 if ((error = spa_history_get(spa, &off, &len, buf)) != 0) { 2335 (void) fprintf(stderr, "Unable to read history: " 2336 "error %d\n", error); 2337 free(buf); 2338 return; 2339 } 2340 2341 if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0) 2342 break; 2343 2344 off -= resid; 2345 } while (len != 0); 2346 2347 (void) printf("\nHistory:\n"); 2348 for (unsigned i = 0; i < num; i++) { 2349 boolean_t printed = B_FALSE; 2350 2351 if (nvlist_exists(events[i], ZPOOL_HIST_TIME)) { 2352 time_t tsec; 2353 struct tm t; 2354 2355 tsec = fnvlist_lookup_uint64(events[i], 2356 ZPOOL_HIST_TIME); 2357 (void) localtime_r(&tsec, &t); 2358 (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); 2359 } else { 2360 tbuf[0] = '\0'; 2361 } 2362 2363 if (nvlist_exists(events[i], ZPOOL_HIST_CMD)) { 2364 (void) printf("%s %s\n", tbuf, 2365 fnvlist_lookup_string(events[i], ZPOOL_HIST_CMD)); 2366 } else if (nvlist_exists(events[i], ZPOOL_HIST_INT_EVENT)) { 2367 uint64_t ievent; 2368 2369 ievent = fnvlist_lookup_uint64(events[i], 2370 ZPOOL_HIST_INT_EVENT); 2371 if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) 2372 goto next; 2373 2374 (void) printf(" %s [internal %s txg:%ju] %s\n", 2375 tbuf, 2376 zfs_history_event_names[ievent], 2377 fnvlist_lookup_uint64(events[i], 2378 ZPOOL_HIST_TXG), 2379 fnvlist_lookup_string(events[i], 2380 ZPOOL_HIST_INT_STR)); 2381 } else if (nvlist_exists(events[i], ZPOOL_HIST_INT_NAME)) { 2382 (void) printf("%s [txg:%ju] %s", tbuf, 2383 fnvlist_lookup_uint64(events[i], 2384 ZPOOL_HIST_TXG), 2385 fnvlist_lookup_string(events[i], 2386 ZPOOL_HIST_INT_NAME)); 2387 2388 if (nvlist_exists(events[i], ZPOOL_HIST_DSNAME)) { 2389 (void) printf(" %s (%llu)", 2390 fnvlist_lookup_string(events[i], 2391 ZPOOL_HIST_DSNAME), 2392 (u_longlong_t)fnvlist_lookup_uint64( 2393 events[i], 2394 ZPOOL_HIST_DSID)); 2395 } 2396 2397 (void) printf(" %s\n", fnvlist_lookup_string(events[i], 2398 ZPOOL_HIST_INT_STR)); 2399 } else if (nvlist_exists(events[i], ZPOOL_HIST_IOCTL)) { 2400 (void) printf("%s ioctl %s\n", tbuf, 2401 fnvlist_lookup_string(events[i], 2402 ZPOOL_HIST_IOCTL)); 2403 2404 if (nvlist_exists(events[i], ZPOOL_HIST_INPUT_NVL)) { 2405 (void) printf(" input:\n"); 2406 dump_nvlist(fnvlist_lookup_nvlist(events[i], 2407 ZPOOL_HIST_INPUT_NVL), 8); 2408 } 2409 if (nvlist_exists(events[i], ZPOOL_HIST_OUTPUT_NVL)) { 2410 (void) printf(" output:\n"); 2411 dump_nvlist(fnvlist_lookup_nvlist(events[i], 2412 ZPOOL_HIST_OUTPUT_NVL), 8); 2413 } 2414 if (nvlist_exists(events[i], ZPOOL_HIST_ERRNO)) { 2415 (void) printf(" errno: %lld\n", 2416 (longlong_t)fnvlist_lookup_int64(events[i], 2417 ZPOOL_HIST_ERRNO)); 2418 } 2419 } else { 2420 goto next; 2421 } 2422 2423 printed = B_TRUE; 2424 next: 2425 if (dump_opt['h'] > 1) { 2426 if (!printed) 2427 (void) printf("unrecognized record:\n"); 2428 dump_nvlist(events[i], 2); 2429 } 2430 } 2431 free(buf); 2432 } 2433 2434 static void 2435 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size) 2436 { 2437 (void) os, (void) object, (void) data, (void) size; 2438 } 2439 2440 static uint64_t 2441 blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, 2442 const zbookmark_phys_t *zb) 2443 { 2444 if (dnp == NULL) { 2445 ASSERT(zb->zb_level < 0); 2446 if (zb->zb_object == 0) 2447 return (zb->zb_blkid); 2448 return (zb->zb_blkid * BP_GET_LSIZE(bp)); 2449 } 2450 2451 ASSERT(zb->zb_level >= 0); 2452 2453 return ((zb->zb_blkid << 2454 (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) * 2455 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); 2456 } 2457 2458 static void 2459 snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen, 2460 const blkptr_t *bp) 2461 { 2462 static abd_t *pabd = NULL; 2463 void *buf; 2464 zio_t *zio; 2465 zfs_zstdhdr_t zstd_hdr; 2466 int error; 2467 2468 if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_ZSTD) 2469 return; 2470 2471 if (BP_IS_HOLE(bp)) 2472 return; 2473 2474 if (BP_IS_EMBEDDED(bp)) { 2475 buf = malloc(SPA_MAXBLOCKSIZE); 2476 if (buf == NULL) { 2477 (void) fprintf(stderr, "out of memory\n"); 2478 zdb_exit(1); 2479 } 2480 decode_embedded_bp_compressed(bp, buf); 2481 memcpy(&zstd_hdr, buf, sizeof (zstd_hdr)); 2482 free(buf); 2483 zstd_hdr.c_len = BE_32(zstd_hdr.c_len); 2484 zstd_hdr.raw_version_level = BE_32(zstd_hdr.raw_version_level); 2485 (void) snprintf(blkbuf + strlen(blkbuf), 2486 buflen - strlen(blkbuf), 2487 " ZSTD:size=%u:version=%u:level=%u:EMBEDDED", 2488 zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr), 2489 zfs_get_hdrlevel(&zstd_hdr)); 2490 return; 2491 } 2492 2493 if (!pabd) 2494 pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE); 2495 zio = zio_root(spa, NULL, NULL, 0); 2496 2497 /* Decrypt but don't decompress so we can read the compression header */ 2498 zio_nowait(zio_read(zio, spa, bp, pabd, BP_GET_PSIZE(bp), NULL, NULL, 2499 ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW_COMPRESS, 2500 NULL)); 2501 error = zio_wait(zio); 2502 if (error) { 2503 (void) fprintf(stderr, "read failed: %d\n", error); 2504 return; 2505 } 2506 buf = abd_borrow_buf_copy(pabd, BP_GET_LSIZE(bp)); 2507 memcpy(&zstd_hdr, buf, sizeof (zstd_hdr)); 2508 zstd_hdr.c_len = BE_32(zstd_hdr.c_len); 2509 zstd_hdr.raw_version_level = BE_32(zstd_hdr.raw_version_level); 2510 2511 (void) snprintf(blkbuf + strlen(blkbuf), 2512 buflen - strlen(blkbuf), 2513 " ZSTD:size=%u:version=%u:level=%u:NORMAL", 2514 zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr), 2515 zfs_get_hdrlevel(&zstd_hdr)); 2516 2517 abd_return_buf_copy(pabd, buf, BP_GET_LSIZE(bp)); 2518 } 2519 2520 static void 2521 snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp, 2522 boolean_t bp_freed) 2523 { 2524 const dva_t *dva = bp->blk_dva; 2525 int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1; 2526 int i; 2527 2528 if (dump_opt['b'] >= 6) { 2529 snprintf_blkptr(blkbuf, buflen, bp); 2530 if (bp_freed) { 2531 (void) snprintf(blkbuf + strlen(blkbuf), 2532 buflen - strlen(blkbuf), " %s", "FREE"); 2533 } 2534 return; 2535 } 2536 2537 if (BP_IS_EMBEDDED(bp)) { 2538 (void) sprintf(blkbuf, 2539 "EMBEDDED et=%u %llxL/%llxP B=%llu", 2540 (int)BPE_GET_ETYPE(bp), 2541 (u_longlong_t)BPE_GET_LSIZE(bp), 2542 (u_longlong_t)BPE_GET_PSIZE(bp), 2543 (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp)); 2544 return; 2545 } 2546 2547 blkbuf[0] = '\0'; 2548 2549 for (i = 0; i < ndvas; i++) { 2550 (void) snprintf(blkbuf + strlen(blkbuf), 2551 buflen - strlen(blkbuf), "%llu:%llx:%llx%s ", 2552 (u_longlong_t)DVA_GET_VDEV(&dva[i]), 2553 (u_longlong_t)DVA_GET_OFFSET(&dva[i]), 2554 (u_longlong_t)DVA_GET_ASIZE(&dva[i]), 2555 (DVA_GET_GANG(&dva[i]) ? "G" : "")); 2556 } 2557 2558 if (BP_IS_HOLE(bp)) { 2559 (void) snprintf(blkbuf + strlen(blkbuf), 2560 buflen - strlen(blkbuf), 2561 "%llxL B=%llu", 2562 (u_longlong_t)BP_GET_LSIZE(bp), 2563 (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp)); 2564 } else { 2565 (void) snprintf(blkbuf + strlen(blkbuf), 2566 buflen - strlen(blkbuf), 2567 "%llxL/%llxP F=%llu B=%llu/%llu", 2568 (u_longlong_t)BP_GET_LSIZE(bp), 2569 (u_longlong_t)BP_GET_PSIZE(bp), 2570 (u_longlong_t)BP_GET_FILL(bp), 2571 (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp), 2572 (u_longlong_t)BP_GET_PHYSICAL_BIRTH(bp)); 2573 if (bp_freed) 2574 (void) snprintf(blkbuf + strlen(blkbuf), 2575 buflen - strlen(blkbuf), " %s", "FREE"); 2576 (void) snprintf(blkbuf + strlen(blkbuf), 2577 buflen - strlen(blkbuf), 2578 " cksum=%016llx:%016llx:%016llx:%016llx", 2579 (u_longlong_t)bp->blk_cksum.zc_word[0], 2580 (u_longlong_t)bp->blk_cksum.zc_word[1], 2581 (u_longlong_t)bp->blk_cksum.zc_word[2], 2582 (u_longlong_t)bp->blk_cksum.zc_word[3]); 2583 } 2584 } 2585 2586 static void 2587 print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb, 2588 const dnode_phys_t *dnp) 2589 { 2590 char blkbuf[BP_SPRINTF_LEN]; 2591 int l; 2592 2593 if (!BP_IS_EMBEDDED(bp)) { 2594 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); 2595 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); 2596 } 2597 2598 (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb)); 2599 2600 ASSERT(zb->zb_level >= 0); 2601 2602 for (l = dnp->dn_nlevels - 1; l >= -1; l--) { 2603 if (l == zb->zb_level) { 2604 (void) printf("L%llx", (u_longlong_t)zb->zb_level); 2605 } else { 2606 (void) printf(" "); 2607 } 2608 } 2609 2610 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, B_FALSE); 2611 if (dump_opt['Z'] && BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD) 2612 snprintf_zstd_header(spa, blkbuf, sizeof (blkbuf), bp); 2613 (void) printf("%s\n", blkbuf); 2614 } 2615 2616 static int 2617 visit_indirect(spa_t *spa, const dnode_phys_t *dnp, 2618 blkptr_t *bp, const zbookmark_phys_t *zb) 2619 { 2620 int err = 0; 2621 2622 if (BP_GET_BIRTH(bp) == 0) 2623 return (0); 2624 2625 print_indirect(spa, bp, zb, dnp); 2626 2627 if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) { 2628 arc_flags_t flags = ARC_FLAG_WAIT; 2629 int i; 2630 blkptr_t *cbp; 2631 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; 2632 arc_buf_t *buf; 2633 uint64_t fill = 0; 2634 ASSERT(!BP_IS_REDACTED(bp)); 2635 2636 err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf, 2637 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); 2638 if (err) 2639 return (err); 2640 ASSERT(buf->b_data); 2641 2642 /* recursively visit blocks below this */ 2643 cbp = buf->b_data; 2644 for (i = 0; i < epb; i++, cbp++) { 2645 zbookmark_phys_t czb; 2646 2647 SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, 2648 zb->zb_level - 1, 2649 zb->zb_blkid * epb + i); 2650 err = visit_indirect(spa, dnp, cbp, &czb); 2651 if (err) 2652 break; 2653 fill += BP_GET_FILL(cbp); 2654 } 2655 if (!err) 2656 ASSERT3U(fill, ==, BP_GET_FILL(bp)); 2657 arc_buf_destroy(buf, &buf); 2658 } 2659 2660 return (err); 2661 } 2662 2663 static void 2664 dump_indirect(dnode_t *dn) 2665 { 2666 dnode_phys_t *dnp = dn->dn_phys; 2667 zbookmark_phys_t czb; 2668 2669 (void) printf("Indirect blocks:\n"); 2670 2671 SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset), 2672 dn->dn_object, dnp->dn_nlevels - 1, 0); 2673 for (int j = 0; j < dnp->dn_nblkptr; j++) { 2674 czb.zb_blkid = j; 2675 (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp, 2676 &dnp->dn_blkptr[j], &czb); 2677 } 2678 2679 (void) printf("\n"); 2680 } 2681 2682 static void 2683 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size) 2684 { 2685 (void) os, (void) object; 2686 dsl_dir_phys_t *dd = data; 2687 time_t crtime; 2688 char nice[32]; 2689 2690 /* make sure nicenum has enough space */ 2691 _Static_assert(sizeof (nice) >= NN_NUMBUF_SZ, "nice truncated"); 2692 2693 if (dd == NULL) 2694 return; 2695 2696 ASSERT3U(size, >=, sizeof (dsl_dir_phys_t)); 2697 2698 crtime = dd->dd_creation_time; 2699 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 2700 (void) printf("\t\thead_dataset_obj = %llu\n", 2701 (u_longlong_t)dd->dd_head_dataset_obj); 2702 (void) printf("\t\tparent_dir_obj = %llu\n", 2703 (u_longlong_t)dd->dd_parent_obj); 2704 (void) printf("\t\torigin_obj = %llu\n", 2705 (u_longlong_t)dd->dd_origin_obj); 2706 (void) printf("\t\tchild_dir_zapobj = %llu\n", 2707 (u_longlong_t)dd->dd_child_dir_zapobj); 2708 zdb_nicenum(dd->dd_used_bytes, nice, sizeof (nice)); 2709 (void) printf("\t\tused_bytes = %s\n", nice); 2710 zdb_nicenum(dd->dd_compressed_bytes, nice, sizeof (nice)); 2711 (void) printf("\t\tcompressed_bytes = %s\n", nice); 2712 zdb_nicenum(dd->dd_uncompressed_bytes, nice, sizeof (nice)); 2713 (void) printf("\t\tuncompressed_bytes = %s\n", nice); 2714 zdb_nicenum(dd->dd_quota, nice, sizeof (nice)); 2715 (void) printf("\t\tquota = %s\n", nice); 2716 zdb_nicenum(dd->dd_reserved, nice, sizeof (nice)); 2717 (void) printf("\t\treserved = %s\n", nice); 2718 (void) printf("\t\tprops_zapobj = %llu\n", 2719 (u_longlong_t)dd->dd_props_zapobj); 2720 (void) printf("\t\tdeleg_zapobj = %llu\n", 2721 (u_longlong_t)dd->dd_deleg_zapobj); 2722 (void) printf("\t\tflags = %llx\n", 2723 (u_longlong_t)dd->dd_flags); 2724 2725 #define DO(which) \ 2726 zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice, \ 2727 sizeof (nice)); \ 2728 (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice) 2729 DO(HEAD); 2730 DO(SNAP); 2731 DO(CHILD); 2732 DO(CHILD_RSRV); 2733 DO(REFRSRV); 2734 #undef DO 2735 (void) printf("\t\tclones = %llu\n", 2736 (u_longlong_t)dd->dd_clones); 2737 } 2738 2739 static void 2740 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) 2741 { 2742 (void) os, (void) object; 2743 dsl_dataset_phys_t *ds = data; 2744 time_t crtime; 2745 char used[32], compressed[32], uncompressed[32], unique[32]; 2746 char blkbuf[BP_SPRINTF_LEN]; 2747 2748 /* make sure nicenum has enough space */ 2749 _Static_assert(sizeof (used) >= NN_NUMBUF_SZ, "used truncated"); 2750 _Static_assert(sizeof (compressed) >= NN_NUMBUF_SZ, 2751 "compressed truncated"); 2752 _Static_assert(sizeof (uncompressed) >= NN_NUMBUF_SZ, 2753 "uncompressed truncated"); 2754 _Static_assert(sizeof (unique) >= NN_NUMBUF_SZ, "unique truncated"); 2755 2756 if (ds == NULL) 2757 return; 2758 2759 ASSERT(size == sizeof (*ds)); 2760 crtime = ds->ds_creation_time; 2761 zdb_nicenum(ds->ds_referenced_bytes, used, sizeof (used)); 2762 zdb_nicenum(ds->ds_compressed_bytes, compressed, sizeof (compressed)); 2763 zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed, 2764 sizeof (uncompressed)); 2765 zdb_nicenum(ds->ds_unique_bytes, unique, sizeof (unique)); 2766 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp); 2767 2768 (void) printf("\t\tdir_obj = %llu\n", 2769 (u_longlong_t)ds->ds_dir_obj); 2770 (void) printf("\t\tprev_snap_obj = %llu\n", 2771 (u_longlong_t)ds->ds_prev_snap_obj); 2772 (void) printf("\t\tprev_snap_txg = %llu\n", 2773 (u_longlong_t)ds->ds_prev_snap_txg); 2774 (void) printf("\t\tnext_snap_obj = %llu\n", 2775 (u_longlong_t)ds->ds_next_snap_obj); 2776 (void) printf("\t\tsnapnames_zapobj = %llu\n", 2777 (u_longlong_t)ds->ds_snapnames_zapobj); 2778 (void) printf("\t\tnum_children = %llu\n", 2779 (u_longlong_t)ds->ds_num_children); 2780 (void) printf("\t\tuserrefs_obj = %llu\n", 2781 (u_longlong_t)ds->ds_userrefs_obj); 2782 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 2783 (void) printf("\t\tcreation_txg = %llu\n", 2784 (u_longlong_t)ds->ds_creation_txg); 2785 (void) printf("\t\tdeadlist_obj = %llu\n", 2786 (u_longlong_t)ds->ds_deadlist_obj); 2787 (void) printf("\t\tused_bytes = %s\n", used); 2788 (void) printf("\t\tcompressed_bytes = %s\n", compressed); 2789 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); 2790 (void) printf("\t\tunique = %s\n", unique); 2791 (void) printf("\t\tfsid_guid = %llu\n", 2792 (u_longlong_t)ds->ds_fsid_guid); 2793 (void) printf("\t\tguid = %llu\n", 2794 (u_longlong_t)ds->ds_guid); 2795 (void) printf("\t\tflags = %llx\n", 2796 (u_longlong_t)ds->ds_flags); 2797 (void) printf("\t\tnext_clones_obj = %llu\n", 2798 (u_longlong_t)ds->ds_next_clones_obj); 2799 (void) printf("\t\tprops_obj = %llu\n", 2800 (u_longlong_t)ds->ds_props_obj); 2801 (void) printf("\t\tbp = %s\n", blkbuf); 2802 } 2803 2804 static int 2805 dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 2806 { 2807 (void) arg, (void) tx; 2808 char blkbuf[BP_SPRINTF_LEN]; 2809 2810 if (BP_GET_BIRTH(bp) != 0) { 2811 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 2812 (void) printf("\t%s\n", blkbuf); 2813 } 2814 return (0); 2815 } 2816 2817 static void 2818 dump_bptree(objset_t *os, uint64_t obj, const char *name) 2819 { 2820 char bytes[32]; 2821 bptree_phys_t *bt; 2822 dmu_buf_t *db; 2823 2824 /* make sure nicenum has enough space */ 2825 _Static_assert(sizeof (bytes) >= NN_NUMBUF_SZ, "bytes truncated"); 2826 2827 if (dump_opt['d'] < 3) 2828 return; 2829 2830 VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); 2831 bt = db->db_data; 2832 zdb_nicenum(bt->bt_bytes, bytes, sizeof (bytes)); 2833 (void) printf("\n %s: %llu datasets, %s\n", 2834 name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes); 2835 dmu_buf_rele(db, FTAG); 2836 2837 if (dump_opt['d'] < 5) 2838 return; 2839 2840 (void) printf("\n"); 2841 2842 (void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL); 2843 } 2844 2845 static int 2846 dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx) 2847 { 2848 (void) arg, (void) tx; 2849 char blkbuf[BP_SPRINTF_LEN]; 2850 2851 ASSERT(BP_GET_BIRTH(bp) != 0); 2852 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, bp_freed); 2853 (void) printf("\t%s\n", blkbuf); 2854 return (0); 2855 } 2856 2857 static void 2858 dump_full_bpobj(bpobj_t *bpo, const char *name, int indent) 2859 { 2860 char bytes[32]; 2861 char comp[32]; 2862 char uncomp[32]; 2863 uint64_t i; 2864 2865 /* make sure nicenum has enough space */ 2866 _Static_assert(sizeof (bytes) >= NN_NUMBUF_SZ, "bytes truncated"); 2867 _Static_assert(sizeof (comp) >= NN_NUMBUF_SZ, "comp truncated"); 2868 _Static_assert(sizeof (uncomp) >= NN_NUMBUF_SZ, "uncomp truncated"); 2869 2870 if (dump_opt['d'] < 3) 2871 return; 2872 2873 zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes, sizeof (bytes)); 2874 if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) { 2875 zdb_nicenum(bpo->bpo_phys->bpo_comp, comp, sizeof (comp)); 2876 zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp, sizeof (uncomp)); 2877 if (bpo->bpo_havefreed) { 2878 (void) printf(" %*s: object %llu, %llu local " 2879 "blkptrs, %llu freed, %llu subobjs in object %llu, " 2880 "%s (%s/%s comp)\n", 2881 indent * 8, name, 2882 (u_longlong_t)bpo->bpo_object, 2883 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, 2884 (u_longlong_t)bpo->bpo_phys->bpo_num_freed, 2885 (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs, 2886 (u_longlong_t)bpo->bpo_phys->bpo_subobjs, 2887 bytes, comp, uncomp); 2888 } else { 2889 (void) printf(" %*s: object %llu, %llu local " 2890 "blkptrs, %llu subobjs in object %llu, " 2891 "%s (%s/%s comp)\n", 2892 indent * 8, name, 2893 (u_longlong_t)bpo->bpo_object, 2894 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, 2895 (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs, 2896 (u_longlong_t)bpo->bpo_phys->bpo_subobjs, 2897 bytes, comp, uncomp); 2898 } 2899 2900 for (i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) { 2901 uint64_t subobj; 2902 bpobj_t subbpo; 2903 int error; 2904 VERIFY0(dmu_read(bpo->bpo_os, 2905 bpo->bpo_phys->bpo_subobjs, 2906 i * sizeof (subobj), sizeof (subobj), &subobj, 0)); 2907 error = bpobj_open(&subbpo, bpo->bpo_os, subobj); 2908 if (error != 0) { 2909 (void) printf("ERROR %u while trying to open " 2910 "subobj id %llu\n", 2911 error, (u_longlong_t)subobj); 2912 continue; 2913 } 2914 dump_full_bpobj(&subbpo, "subobj", indent + 1); 2915 bpobj_close(&subbpo); 2916 } 2917 } else { 2918 if (bpo->bpo_havefreed) { 2919 (void) printf(" %*s: object %llu, %llu blkptrs, " 2920 "%llu freed, %s\n", 2921 indent * 8, name, 2922 (u_longlong_t)bpo->bpo_object, 2923 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, 2924 (u_longlong_t)bpo->bpo_phys->bpo_num_freed, 2925 bytes); 2926 } else { 2927 (void) printf(" %*s: object %llu, %llu blkptrs, " 2928 "%s\n", 2929 indent * 8, name, 2930 (u_longlong_t)bpo->bpo_object, 2931 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, 2932 bytes); 2933 } 2934 } 2935 2936 if (dump_opt['d'] < 5) 2937 return; 2938 2939 2940 if (indent == 0) { 2941 (void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL); 2942 (void) printf("\n"); 2943 } 2944 } 2945 2946 static int 2947 dump_bookmark(dsl_pool_t *dp, char *name, boolean_t print_redact, 2948 boolean_t print_list) 2949 { 2950 int err = 0; 2951 zfs_bookmark_phys_t prop; 2952 objset_t *mos = dp->dp_spa->spa_meta_objset; 2953 err = dsl_bookmark_lookup(dp, name, NULL, &prop); 2954 2955 if (err != 0) { 2956 return (err); 2957 } 2958 2959 (void) printf("\t#%s: ", strchr(name, '#') + 1); 2960 (void) printf("{guid: %llx creation_txg: %llu creation_time: " 2961 "%llu redaction_obj: %llu}\n", (u_longlong_t)prop.zbm_guid, 2962 (u_longlong_t)prop.zbm_creation_txg, 2963 (u_longlong_t)prop.zbm_creation_time, 2964 (u_longlong_t)prop.zbm_redaction_obj); 2965 2966 IMPLY(print_list, print_redact); 2967 if (!print_redact || prop.zbm_redaction_obj == 0) 2968 return (0); 2969 2970 redaction_list_t *rl; 2971 VERIFY0(dsl_redaction_list_hold_obj(dp, 2972 prop.zbm_redaction_obj, FTAG, &rl)); 2973 2974 redaction_list_phys_t *rlp = rl->rl_phys; 2975 (void) printf("\tRedacted:\n\t\tProgress: "); 2976 if (rlp->rlp_last_object != UINT64_MAX || 2977 rlp->rlp_last_blkid != UINT64_MAX) { 2978 (void) printf("%llu %llu (incomplete)\n", 2979 (u_longlong_t)rlp->rlp_last_object, 2980 (u_longlong_t)rlp->rlp_last_blkid); 2981 } else { 2982 (void) printf("complete\n"); 2983 } 2984 (void) printf("\t\tSnapshots: ["); 2985 for (unsigned int i = 0; i < rlp->rlp_num_snaps; i++) { 2986 if (i > 0) 2987 (void) printf(", "); 2988 (void) printf("%0llu", 2989 (u_longlong_t)rlp->rlp_snaps[i]); 2990 } 2991 (void) printf("]\n\t\tLength: %llu\n", 2992 (u_longlong_t)rlp->rlp_num_entries); 2993 2994 if (!print_list) { 2995 dsl_redaction_list_rele(rl, FTAG); 2996 return (0); 2997 } 2998 2999 if (rlp->rlp_num_entries == 0) { 3000 dsl_redaction_list_rele(rl, FTAG); 3001 (void) printf("\t\tRedaction List: []\n\n"); 3002 return (0); 3003 } 3004 3005 redact_block_phys_t *rbp_buf; 3006 uint64_t size; 3007 dmu_object_info_t doi; 3008 3009 VERIFY0(dmu_object_info(mos, prop.zbm_redaction_obj, &doi)); 3010 size = doi.doi_max_offset; 3011 rbp_buf = kmem_alloc(size, KM_SLEEP); 3012 3013 err = dmu_read(mos, prop.zbm_redaction_obj, 0, size, 3014 rbp_buf, 0); 3015 if (err != 0) { 3016 dsl_redaction_list_rele(rl, FTAG); 3017 kmem_free(rbp_buf, size); 3018 return (err); 3019 } 3020 3021 (void) printf("\t\tRedaction List: [{object: %llx, offset: " 3022 "%llx, blksz: %x, count: %llx}", 3023 (u_longlong_t)rbp_buf[0].rbp_object, 3024 (u_longlong_t)rbp_buf[0].rbp_blkid, 3025 (uint_t)(redact_block_get_size(&rbp_buf[0])), 3026 (u_longlong_t)redact_block_get_count(&rbp_buf[0])); 3027 3028 for (size_t i = 1; i < rlp->rlp_num_entries; i++) { 3029 (void) printf(",\n\t\t{object: %llx, offset: %llx, " 3030 "blksz: %x, count: %llx}", 3031 (u_longlong_t)rbp_buf[i].rbp_object, 3032 (u_longlong_t)rbp_buf[i].rbp_blkid, 3033 (uint_t)(redact_block_get_size(&rbp_buf[i])), 3034 (u_longlong_t)redact_block_get_count(&rbp_buf[i])); 3035 } 3036 dsl_redaction_list_rele(rl, FTAG); 3037 kmem_free(rbp_buf, size); 3038 (void) printf("]\n\n"); 3039 return (0); 3040 } 3041 3042 static void 3043 dump_bookmarks(objset_t *os, int verbosity) 3044 { 3045 zap_cursor_t zc; 3046 zap_attribute_t *attrp; 3047 dsl_dataset_t *ds = dmu_objset_ds(os); 3048 dsl_pool_t *dp = spa_get_dsl(os->os_spa); 3049 objset_t *mos = os->os_spa->spa_meta_objset; 3050 if (verbosity < 4) 3051 return; 3052 attrp = zap_attribute_alloc(); 3053 dsl_pool_config_enter(dp, FTAG); 3054 3055 for (zap_cursor_init(&zc, mos, ds->ds_bookmarks_obj); 3056 zap_cursor_retrieve(&zc, attrp) == 0; 3057 zap_cursor_advance(&zc)) { 3058 char osname[ZFS_MAX_DATASET_NAME_LEN]; 3059 char buf[ZFS_MAX_DATASET_NAME_LEN]; 3060 int len; 3061 dmu_objset_name(os, osname); 3062 len = snprintf(buf, sizeof (buf), "%s#%s", osname, 3063 attrp->za_name); 3064 VERIFY3S(len, <, ZFS_MAX_DATASET_NAME_LEN); 3065 (void) dump_bookmark(dp, buf, verbosity >= 5, verbosity >= 6); 3066 } 3067 zap_cursor_fini(&zc); 3068 dsl_pool_config_exit(dp, FTAG); 3069 zap_attribute_free(attrp); 3070 } 3071 3072 static void 3073 bpobj_count_refd(bpobj_t *bpo) 3074 { 3075 mos_obj_refd(bpo->bpo_object); 3076 3077 if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) { 3078 mos_obj_refd(bpo->bpo_phys->bpo_subobjs); 3079 for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) { 3080 uint64_t subobj; 3081 bpobj_t subbpo; 3082 int error; 3083 VERIFY0(dmu_read(bpo->bpo_os, 3084 bpo->bpo_phys->bpo_subobjs, 3085 i * sizeof (subobj), sizeof (subobj), &subobj, 0)); 3086 error = bpobj_open(&subbpo, bpo->bpo_os, subobj); 3087 if (error != 0) { 3088 (void) printf("ERROR %u while trying to open " 3089 "subobj id %llu\n", 3090 error, (u_longlong_t)subobj); 3091 continue; 3092 } 3093 bpobj_count_refd(&subbpo); 3094 bpobj_close(&subbpo); 3095 } 3096 } 3097 } 3098 3099 static int 3100 dsl_deadlist_entry_count_refd(void *arg, dsl_deadlist_entry_t *dle) 3101 { 3102 spa_t *spa = arg; 3103 uint64_t empty_bpobj = spa->spa_dsl_pool->dp_empty_bpobj; 3104 if (dle->dle_bpobj.bpo_object != empty_bpobj) 3105 bpobj_count_refd(&dle->dle_bpobj); 3106 return (0); 3107 } 3108 3109 static int 3110 dsl_deadlist_entry_dump(void *arg, dsl_deadlist_entry_t *dle) 3111 { 3112 ASSERT(arg == NULL); 3113 if (dump_opt['d'] >= 5) { 3114 char buf[128]; 3115 (void) snprintf(buf, sizeof (buf), 3116 "mintxg %llu -> obj %llu", 3117 (longlong_t)dle->dle_mintxg, 3118 (longlong_t)dle->dle_bpobj.bpo_object); 3119 3120 dump_full_bpobj(&dle->dle_bpobj, buf, 0); 3121 } else { 3122 (void) printf("mintxg %llu -> obj %llu\n", 3123 (longlong_t)dle->dle_mintxg, 3124 (longlong_t)dle->dle_bpobj.bpo_object); 3125 } 3126 return (0); 3127 } 3128 3129 static void 3130 dump_blkptr_list(dsl_deadlist_t *dl, const char *name) 3131 { 3132 char bytes[32]; 3133 char comp[32]; 3134 char uncomp[32]; 3135 char entries[32]; 3136 spa_t *spa = dmu_objset_spa(dl->dl_os); 3137 uint64_t empty_bpobj = spa->spa_dsl_pool->dp_empty_bpobj; 3138 3139 if (dl->dl_oldfmt) { 3140 if (dl->dl_bpobj.bpo_object != empty_bpobj) 3141 bpobj_count_refd(&dl->dl_bpobj); 3142 } else { 3143 mos_obj_refd(dl->dl_object); 3144 dsl_deadlist_iterate(dl, dsl_deadlist_entry_count_refd, spa); 3145 } 3146 3147 /* make sure nicenum has enough space */ 3148 _Static_assert(sizeof (bytes) >= NN_NUMBUF_SZ, "bytes truncated"); 3149 _Static_assert(sizeof (comp) >= NN_NUMBUF_SZ, "comp truncated"); 3150 _Static_assert(sizeof (uncomp) >= NN_NUMBUF_SZ, "uncomp truncated"); 3151 _Static_assert(sizeof (entries) >= NN_NUMBUF_SZ, "entries truncated"); 3152 3153 if (dump_opt['d'] < 3) 3154 return; 3155 3156 if (dl->dl_oldfmt) { 3157 dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0); 3158 return; 3159 } 3160 3161 zdb_nicenum(dl->dl_phys->dl_used, bytes, sizeof (bytes)); 3162 zdb_nicenum(dl->dl_phys->dl_comp, comp, sizeof (comp)); 3163 zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp, sizeof (uncomp)); 3164 zdb_nicenum(avl_numnodes(&dl->dl_tree), entries, sizeof (entries)); 3165 (void) printf("\n %s: %s (%s/%s comp), %s entries\n", 3166 name, bytes, comp, uncomp, entries); 3167 3168 if (dump_opt['d'] < 4) 3169 return; 3170 3171 (void) putchar('\n'); 3172 3173 dsl_deadlist_iterate(dl, dsl_deadlist_entry_dump, NULL); 3174 } 3175 3176 static int 3177 verify_dd_livelist(objset_t *os) 3178 { 3179 uint64_t ll_used, used, ll_comp, comp, ll_uncomp, uncomp; 3180 dsl_pool_t *dp = spa_get_dsl(os->os_spa); 3181 dsl_dir_t *dd = os->os_dsl_dataset->ds_dir; 3182 3183 ASSERT(!dmu_objset_is_snapshot(os)); 3184 if (!dsl_deadlist_is_open(&dd->dd_livelist)) 3185 return (0); 3186 3187 /* Iterate through the livelist to check for duplicates */ 3188 dsl_deadlist_iterate(&dd->dd_livelist, sublivelist_verify_lightweight, 3189 NULL); 3190 3191 dsl_pool_config_enter(dp, FTAG); 3192 dsl_deadlist_space(&dd->dd_livelist, &ll_used, 3193 &ll_comp, &ll_uncomp); 3194 3195 dsl_dataset_t *origin_ds; 3196 ASSERT(dsl_pool_config_held(dp)); 3197 VERIFY0(dsl_dataset_hold_obj(dp, 3198 dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin_ds)); 3199 VERIFY0(dsl_dataset_space_written(origin_ds, os->os_dsl_dataset, 3200 &used, &comp, &uncomp)); 3201 dsl_dataset_rele(origin_ds, FTAG); 3202 dsl_pool_config_exit(dp, FTAG); 3203 /* 3204 * It's possible that the dataset's uncomp space is larger than the 3205 * livelist's because livelists do not track embedded block pointers 3206 */ 3207 if (used != ll_used || comp != ll_comp || uncomp < ll_uncomp) { 3208 char nice_used[32], nice_comp[32], nice_uncomp[32]; 3209 (void) printf("Discrepancy in space accounting:\n"); 3210 zdb_nicenum(used, nice_used, sizeof (nice_used)); 3211 zdb_nicenum(comp, nice_comp, sizeof (nice_comp)); 3212 zdb_nicenum(uncomp, nice_uncomp, sizeof (nice_uncomp)); 3213 (void) printf("dir: used %s, comp %s, uncomp %s\n", 3214 nice_used, nice_comp, nice_uncomp); 3215 zdb_nicenum(ll_used, nice_used, sizeof (nice_used)); 3216 zdb_nicenum(ll_comp, nice_comp, sizeof (nice_comp)); 3217 zdb_nicenum(ll_uncomp, nice_uncomp, sizeof (nice_uncomp)); 3218 (void) printf("livelist: used %s, comp %s, uncomp %s\n", 3219 nice_used, nice_comp, nice_uncomp); 3220 return (1); 3221 } 3222 return (0); 3223 } 3224 3225 static char *key_material = NULL; 3226 3227 static boolean_t 3228 zdb_derive_key(dsl_dir_t *dd, uint8_t *key_out) 3229 { 3230 uint64_t keyformat, salt, iters; 3231 int i; 3232 unsigned char c; 3233 3234 VERIFY0(zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, 3235 zfs_prop_to_name(ZFS_PROP_KEYFORMAT), sizeof (uint64_t), 3236 1, &keyformat)); 3237 3238 switch (keyformat) { 3239 case ZFS_KEYFORMAT_HEX: 3240 for (i = 0; i < WRAPPING_KEY_LEN * 2; i += 2) { 3241 if (!isxdigit(key_material[i]) || 3242 !isxdigit(key_material[i+1])) 3243 return (B_FALSE); 3244 if (sscanf(&key_material[i], "%02hhx", &c) != 1) 3245 return (B_FALSE); 3246 key_out[i / 2] = c; 3247 } 3248 break; 3249 3250 case ZFS_KEYFORMAT_PASSPHRASE: 3251 VERIFY0(zap_lookup(dd->dd_pool->dp_meta_objset, 3252 dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 3253 sizeof (uint64_t), 1, &salt)); 3254 VERIFY0(zap_lookup(dd->dd_pool->dp_meta_objset, 3255 dd->dd_crypto_obj, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 3256 sizeof (uint64_t), 1, &iters)); 3257 3258 if (PKCS5_PBKDF2_HMAC_SHA1(key_material, strlen(key_material), 3259 ((uint8_t *)&salt), sizeof (uint64_t), iters, 3260 WRAPPING_KEY_LEN, key_out) != 1) 3261 return (B_FALSE); 3262 3263 break; 3264 3265 default: 3266 fatal("no support for key format %u\n", 3267 (unsigned int) keyformat); 3268 } 3269 3270 return (B_TRUE); 3271 } 3272 3273 static char encroot[ZFS_MAX_DATASET_NAME_LEN]; 3274 static boolean_t key_loaded = B_FALSE; 3275 3276 static void 3277 zdb_load_key(objset_t *os) 3278 { 3279 dsl_pool_t *dp; 3280 dsl_dir_t *dd, *rdd; 3281 uint8_t key[WRAPPING_KEY_LEN]; 3282 uint64_t rddobj; 3283 int err; 3284 3285 dp = spa_get_dsl(os->os_spa); 3286 dd = os->os_dsl_dataset->ds_dir; 3287 3288 dsl_pool_config_enter(dp, FTAG); 3289 VERIFY0(zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, 3290 DSL_CRYPTO_KEY_ROOT_DDOBJ, sizeof (uint64_t), 1, &rddobj)); 3291 VERIFY0(dsl_dir_hold_obj(dd->dd_pool, rddobj, NULL, FTAG, &rdd)); 3292 dsl_dir_name(rdd, encroot); 3293 dsl_dir_rele(rdd, FTAG); 3294 3295 if (!zdb_derive_key(dd, key)) 3296 fatal("couldn't derive encryption key"); 3297 3298 dsl_pool_config_exit(dp, FTAG); 3299 3300 ASSERT3U(dsl_dataset_get_keystatus(dd), ==, ZFS_KEYSTATUS_UNAVAILABLE); 3301 3302 dsl_crypto_params_t *dcp; 3303 nvlist_t *crypto_args; 3304 3305 crypto_args = fnvlist_alloc(); 3306 fnvlist_add_uint8_array(crypto_args, "wkeydata", 3307 (uint8_t *)key, WRAPPING_KEY_LEN); 3308 VERIFY0(dsl_crypto_params_create_nvlist(DCP_CMD_NONE, 3309 NULL, crypto_args, &dcp)); 3310 err = spa_keystore_load_wkey(encroot, dcp, B_FALSE); 3311 3312 dsl_crypto_params_free(dcp, (err != 0)); 3313 fnvlist_free(crypto_args); 3314 3315 if (err != 0) 3316 fatal( 3317 "couldn't load encryption key for %s: %s", 3318 encroot, err == ZFS_ERR_CRYPTO_NOTSUP ? 3319 "crypto params not supported" : strerror(err)); 3320 3321 ASSERT3U(dsl_dataset_get_keystatus(dd), ==, ZFS_KEYSTATUS_AVAILABLE); 3322 3323 printf("Unlocked encryption root: %s\n", encroot); 3324 key_loaded = B_TRUE; 3325 } 3326 3327 static void 3328 zdb_unload_key(void) 3329 { 3330 if (!key_loaded) 3331 return; 3332 3333 VERIFY0(spa_keystore_unload_wkey(encroot)); 3334 key_loaded = B_FALSE; 3335 } 3336 3337 static avl_tree_t idx_tree; 3338 static avl_tree_t domain_tree; 3339 static boolean_t fuid_table_loaded; 3340 static objset_t *sa_os = NULL; 3341 static sa_attr_type_t *sa_attr_table = NULL; 3342 3343 static int 3344 open_objset(const char *path, const void *tag, objset_t **osp) 3345 { 3346 int err; 3347 uint64_t sa_attrs = 0; 3348 uint64_t version = 0; 3349 3350 VERIFY3P(sa_os, ==, NULL); 3351 3352 /* 3353 * We can't own an objset if it's redacted. Therefore, we do this 3354 * dance: hold the objset, then acquire a long hold on its dataset, then 3355 * release the pool (which is held as part of holding the objset). 3356 */ 3357 3358 if (dump_opt['K']) { 3359 /* decryption requested, try to load keys */ 3360 err = dmu_objset_hold(path, tag, osp); 3361 if (err != 0) { 3362 (void) fprintf(stderr, "failed to hold dataset " 3363 "'%s': %s\n", 3364 path, strerror(err)); 3365 return (err); 3366 } 3367 dsl_dataset_long_hold(dmu_objset_ds(*osp), tag); 3368 dsl_pool_rele(dmu_objset_pool(*osp), tag); 3369 3370 /* succeeds or dies */ 3371 zdb_load_key(*osp); 3372 3373 /* release it all */ 3374 dsl_dataset_long_rele(dmu_objset_ds(*osp), tag); 3375 dsl_dataset_rele(dmu_objset_ds(*osp), tag); 3376 } 3377 3378 int ds_hold_flags = key_loaded ? DS_HOLD_FLAG_DECRYPT : 0; 3379 3380 err = dmu_objset_hold_flags(path, ds_hold_flags, tag, osp); 3381 if (err != 0) { 3382 (void) fprintf(stderr, "failed to hold dataset '%s': %s\n", 3383 path, strerror(err)); 3384 return (err); 3385 } 3386 dsl_dataset_long_hold(dmu_objset_ds(*osp), tag); 3387 dsl_pool_rele(dmu_objset_pool(*osp), tag); 3388 3389 if (dmu_objset_type(*osp) == DMU_OST_ZFS && 3390 (key_loaded || !(*osp)->os_encrypted)) { 3391 (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR, 3392 8, 1, &version); 3393 if (version >= ZPL_VERSION_SA) { 3394 (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 3395 8, 1, &sa_attrs); 3396 } 3397 err = sa_setup(*osp, sa_attrs, zfs_attr_table, ZPL_END, 3398 &sa_attr_table); 3399 if (err != 0) { 3400 (void) fprintf(stderr, "sa_setup failed: %s\n", 3401 strerror(err)); 3402 dsl_dataset_long_rele(dmu_objset_ds(*osp), tag); 3403 dsl_dataset_rele_flags(dmu_objset_ds(*osp), 3404 ds_hold_flags, tag); 3405 *osp = NULL; 3406 } 3407 } 3408 sa_os = *osp; 3409 3410 return (err); 3411 } 3412 3413 static void 3414 close_objset(objset_t *os, const void *tag) 3415 { 3416 VERIFY3P(os, ==, sa_os); 3417 if (os->os_sa != NULL) 3418 sa_tear_down(os); 3419 dsl_dataset_long_rele(dmu_objset_ds(os), tag); 3420 dsl_dataset_rele_flags(dmu_objset_ds(os), 3421 key_loaded ? DS_HOLD_FLAG_DECRYPT : 0, tag); 3422 sa_attr_table = NULL; 3423 sa_os = NULL; 3424 3425 zdb_unload_key(); 3426 } 3427 3428 static void 3429 fuid_table_destroy(void) 3430 { 3431 if (fuid_table_loaded) { 3432 zfs_fuid_table_destroy(&idx_tree, &domain_tree); 3433 fuid_table_loaded = B_FALSE; 3434 } 3435 } 3436 3437 /* 3438 * Clean up DDT internal state. ddt_lookup() adds entries to ddt_tree, which on 3439 * a live pool are normally cleaned up during ddt_sync(). We can't do that (and 3440 * wouldn't want to anyway), but if we don't clean up the presence of stuff on 3441 * ddt_tree will trip asserts in ddt_table_free(). So, we clean up ourselves. 3442 * 3443 * Note that this is not a particularly efficient way to do this, but 3444 * ddt_remove() is the only public method that can do the work we need, and it 3445 * requires the right locks and etc to do the job. This is only ever called 3446 * during zdb shutdown so efficiency is not especially important. 3447 */ 3448 static void 3449 zdb_ddt_cleanup(spa_t *spa) 3450 { 3451 for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 3452 ddt_t *ddt = spa->spa_ddt[c]; 3453 if (!ddt) 3454 continue; 3455 3456 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 3457 ddt_enter(ddt); 3458 ddt_entry_t *dde = avl_first(&ddt->ddt_tree), *next; 3459 while (dde) { 3460 next = AVL_NEXT(&ddt->ddt_tree, dde); 3461 dde->dde_io = NULL; 3462 ddt_remove(ddt, dde); 3463 dde = next; 3464 } 3465 ddt_exit(ddt); 3466 spa_config_exit(spa, SCL_CONFIG, FTAG); 3467 } 3468 } 3469 3470 static void 3471 zdb_exit(int reason) 3472 { 3473 if (spa != NULL) 3474 zdb_ddt_cleanup(spa); 3475 3476 if (os != NULL) { 3477 close_objset(os, FTAG); 3478 } else if (spa != NULL) { 3479 spa_close(spa, FTAG); 3480 } 3481 3482 fuid_table_destroy(); 3483 3484 if (kernel_init_done) 3485 kernel_fini(); 3486 3487 exit(reason); 3488 } 3489 3490 /* 3491 * print uid or gid information. 3492 * For normal POSIX id just the id is printed in decimal format. 3493 * For CIFS files with FUID the fuid is printed in hex followed by 3494 * the domain-rid string. 3495 */ 3496 static void 3497 print_idstr(uint64_t id, const char *id_type) 3498 { 3499 if (FUID_INDEX(id)) { 3500 const char *domain = 3501 zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id)); 3502 (void) printf("\t%s %llx [%s-%d]\n", id_type, 3503 (u_longlong_t)id, domain, (int)FUID_RID(id)); 3504 } else { 3505 (void) printf("\t%s %llu\n", id_type, (u_longlong_t)id); 3506 } 3507 3508 } 3509 3510 static void 3511 dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid) 3512 { 3513 uint32_t uid_idx, gid_idx; 3514 3515 uid_idx = FUID_INDEX(uid); 3516 gid_idx = FUID_INDEX(gid); 3517 3518 /* Load domain table, if not already loaded */ 3519 if (!fuid_table_loaded && (uid_idx || gid_idx)) { 3520 uint64_t fuid_obj; 3521 3522 /* first find the fuid object. It lives in the master node */ 3523 VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 3524 8, 1, &fuid_obj) == 0); 3525 zfs_fuid_avl_tree_create(&idx_tree, &domain_tree); 3526 (void) zfs_fuid_table_load(os, fuid_obj, 3527 &idx_tree, &domain_tree); 3528 fuid_table_loaded = B_TRUE; 3529 } 3530 3531 print_idstr(uid, "uid"); 3532 print_idstr(gid, "gid"); 3533 } 3534 3535 static void 3536 dump_znode_sa_xattr(sa_handle_t *hdl) 3537 { 3538 nvlist_t *sa_xattr; 3539 nvpair_t *elem = NULL; 3540 int sa_xattr_size = 0; 3541 int sa_xattr_entries = 0; 3542 int error; 3543 char *sa_xattr_packed; 3544 3545 error = sa_size(hdl, sa_attr_table[ZPL_DXATTR], &sa_xattr_size); 3546 if (error || sa_xattr_size == 0) 3547 return; 3548 3549 sa_xattr_packed = malloc(sa_xattr_size); 3550 if (sa_xattr_packed == NULL) 3551 return; 3552 3553 error = sa_lookup(hdl, sa_attr_table[ZPL_DXATTR], 3554 sa_xattr_packed, sa_xattr_size); 3555 if (error) { 3556 free(sa_xattr_packed); 3557 return; 3558 } 3559 3560 error = nvlist_unpack(sa_xattr_packed, sa_xattr_size, &sa_xattr, 0); 3561 if (error) { 3562 free(sa_xattr_packed); 3563 return; 3564 } 3565 3566 while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) 3567 sa_xattr_entries++; 3568 3569 (void) printf("\tSA xattrs: %d bytes, %d entries\n\n", 3570 sa_xattr_size, sa_xattr_entries); 3571 while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) { 3572 boolean_t can_print = !dump_opt['P']; 3573 uchar_t *value; 3574 uint_t cnt, idx; 3575 3576 (void) printf("\t\t%s = ", nvpair_name(elem)); 3577 nvpair_value_byte_array(elem, &value, &cnt); 3578 3579 for (idx = 0; idx < cnt; ++idx) { 3580 if (!isprint(value[idx])) { 3581 can_print = B_FALSE; 3582 break; 3583 } 3584 } 3585 3586 for (idx = 0; idx < cnt; ++idx) { 3587 if (can_print) 3588 (void) putchar(value[idx]); 3589 else 3590 (void) printf("\\%3.3o", value[idx]); 3591 } 3592 (void) putchar('\n'); 3593 } 3594 3595 nvlist_free(sa_xattr); 3596 free(sa_xattr_packed); 3597 } 3598 3599 static void 3600 dump_znode_symlink(sa_handle_t *hdl) 3601 { 3602 int sa_symlink_size = 0; 3603 char linktarget[MAXPATHLEN]; 3604 int error; 3605 3606 error = sa_size(hdl, sa_attr_table[ZPL_SYMLINK], &sa_symlink_size); 3607 if (error || sa_symlink_size == 0) { 3608 return; 3609 } 3610 if (sa_symlink_size >= sizeof (linktarget)) { 3611 (void) printf("symlink size %d is too large\n", 3612 sa_symlink_size); 3613 return; 3614 } 3615 linktarget[sa_symlink_size] = '\0'; 3616 if (sa_lookup(hdl, sa_attr_table[ZPL_SYMLINK], 3617 &linktarget, sa_symlink_size) == 0) 3618 (void) printf("\ttarget %s\n", linktarget); 3619 } 3620 3621 static void 3622 dump_znode(objset_t *os, uint64_t object, void *data, size_t size) 3623 { 3624 (void) data, (void) size; 3625 char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */ 3626 sa_handle_t *hdl; 3627 uint64_t xattr, rdev, gen; 3628 uint64_t uid, gid, mode, fsize, parent, links; 3629 uint64_t pflags; 3630 uint64_t acctm[2], modtm[2], chgtm[2], crtm[2]; 3631 time_t z_crtime, z_atime, z_mtime, z_ctime; 3632 sa_bulk_attr_t bulk[12]; 3633 int idx = 0; 3634 int error; 3635 3636 VERIFY3P(os, ==, sa_os); 3637 if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) { 3638 (void) printf("Failed to get handle for SA znode\n"); 3639 return; 3640 } 3641 3642 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8); 3643 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8); 3644 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL, 3645 &links, 8); 3646 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8); 3647 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL, 3648 &mode, 8); 3649 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT], 3650 NULL, &parent, 8); 3651 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL, 3652 &fsize, 8); 3653 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL, 3654 acctm, 16); 3655 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL, 3656 modtm, 16); 3657 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL, 3658 crtm, 16); 3659 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL, 3660 chgtm, 16); 3661 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL, 3662 &pflags, 8); 3663 3664 if (sa_bulk_lookup(hdl, bulk, idx)) { 3665 (void) sa_handle_destroy(hdl); 3666 return; 3667 } 3668 3669 z_crtime = (time_t)crtm[0]; 3670 z_atime = (time_t)acctm[0]; 3671 z_mtime = (time_t)modtm[0]; 3672 z_ctime = (time_t)chgtm[0]; 3673 3674 if (dump_opt['d'] > 4) { 3675 error = zfs_obj_to_path(os, object, path, sizeof (path)); 3676 if (error == ESTALE) { 3677 (void) snprintf(path, sizeof (path), "on delete queue"); 3678 } else if (error != 0) { 3679 leaked_objects++; 3680 (void) snprintf(path, sizeof (path), 3681 "path not found, possibly leaked"); 3682 } 3683 (void) printf("\tpath %s\n", path); 3684 } 3685 3686 if (S_ISLNK(mode)) 3687 dump_znode_symlink(hdl); 3688 dump_uidgid(os, uid, gid); 3689 (void) printf("\tatime %s", ctime(&z_atime)); 3690 (void) printf("\tmtime %s", ctime(&z_mtime)); 3691 (void) printf("\tctime %s", ctime(&z_ctime)); 3692 (void) printf("\tcrtime %s", ctime(&z_crtime)); 3693 (void) printf("\tgen %llu\n", (u_longlong_t)gen); 3694 (void) printf("\tmode %llo\n", (u_longlong_t)mode); 3695 (void) printf("\tsize %llu\n", (u_longlong_t)fsize); 3696 (void) printf("\tparent %llu\n", (u_longlong_t)parent); 3697 (void) printf("\tlinks %llu\n", (u_longlong_t)links); 3698 (void) printf("\tpflags %llx\n", (u_longlong_t)pflags); 3699 if (dmu_objset_projectquota_enabled(os) && (pflags & ZFS_PROJID)) { 3700 uint64_t projid; 3701 3702 if (sa_lookup(hdl, sa_attr_table[ZPL_PROJID], &projid, 3703 sizeof (uint64_t)) == 0) 3704 (void) printf("\tprojid %llu\n", (u_longlong_t)projid); 3705 } 3706 if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr, 3707 sizeof (uint64_t)) == 0) 3708 (void) printf("\txattr %llu\n", (u_longlong_t)xattr); 3709 if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev, 3710 sizeof (uint64_t)) == 0) 3711 (void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev); 3712 dump_znode_sa_xattr(hdl); 3713 sa_handle_destroy(hdl); 3714 } 3715 3716 static void 3717 dump_acl(objset_t *os, uint64_t object, void *data, size_t size) 3718 { 3719 (void) os, (void) object, (void) data, (void) size; 3720 } 3721 3722 static void 3723 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size) 3724 { 3725 (void) os, (void) object, (void) data, (void) size; 3726 } 3727 3728 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = { 3729 dump_none, /* unallocated */ 3730 dump_zap, /* object directory */ 3731 dump_uint64, /* object array */ 3732 dump_none, /* packed nvlist */ 3733 dump_packed_nvlist, /* packed nvlist size */ 3734 dump_none, /* bpobj */ 3735 dump_bpobj, /* bpobj header */ 3736 dump_none, /* SPA space map header */ 3737 dump_none, /* SPA space map */ 3738 dump_none, /* ZIL intent log */ 3739 dump_dnode, /* DMU dnode */ 3740 dump_dmu_objset, /* DMU objset */ 3741 dump_dsl_dir, /* DSL directory */ 3742 dump_zap, /* DSL directory child map */ 3743 dump_zap, /* DSL dataset snap map */ 3744 dump_zap, /* DSL props */ 3745 dump_dsl_dataset, /* DSL dataset */ 3746 dump_znode, /* ZFS znode */ 3747 dump_acl, /* ZFS V0 ACL */ 3748 dump_uint8, /* ZFS plain file */ 3749 dump_zpldir, /* ZFS directory */ 3750 dump_zap, /* ZFS master node */ 3751 dump_zap, /* ZFS delete queue */ 3752 dump_uint8, /* zvol object */ 3753 dump_zap, /* zvol prop */ 3754 dump_uint8, /* other uint8[] */ 3755 dump_uint64, /* other uint64[] */ 3756 dump_zap, /* other ZAP */ 3757 dump_zap, /* persistent error log */ 3758 dump_uint8, /* SPA history */ 3759 dump_history_offsets, /* SPA history offsets */ 3760 dump_zap, /* Pool properties */ 3761 dump_zap, /* DSL permissions */ 3762 dump_acl, /* ZFS ACL */ 3763 dump_uint8, /* ZFS SYSACL */ 3764 dump_none, /* FUID nvlist */ 3765 dump_packed_nvlist, /* FUID nvlist size */ 3766 dump_zap, /* DSL dataset next clones */ 3767 dump_zap, /* DSL scrub queue */ 3768 dump_zap, /* ZFS user/group/project used */ 3769 dump_zap, /* ZFS user/group/project quota */ 3770 dump_zap, /* snapshot refcount tags */ 3771 dump_ddt_zap, /* DDT ZAP object */ 3772 dump_zap, /* DDT statistics */ 3773 dump_znode, /* SA object */ 3774 dump_zap, /* SA Master Node */ 3775 dump_sa_attrs, /* SA attribute registration */ 3776 dump_sa_layouts, /* SA attribute layouts */ 3777 dump_zap, /* DSL scrub translations */ 3778 dump_none, /* fake dedup BP */ 3779 dump_zap, /* deadlist */ 3780 dump_none, /* deadlist hdr */ 3781 dump_zap, /* dsl clones */ 3782 dump_bpobj_subobjs, /* bpobj subobjs */ 3783 dump_unknown, /* Unknown type, must be last */ 3784 }; 3785 3786 static boolean_t 3787 match_object_type(dmu_object_type_t obj_type, uint64_t flags) 3788 { 3789 boolean_t match = B_TRUE; 3790 3791 switch (obj_type) { 3792 case DMU_OT_DIRECTORY_CONTENTS: 3793 if (!(flags & ZOR_FLAG_DIRECTORY)) 3794 match = B_FALSE; 3795 break; 3796 case DMU_OT_PLAIN_FILE_CONTENTS: 3797 if (!(flags & ZOR_FLAG_PLAIN_FILE)) 3798 match = B_FALSE; 3799 break; 3800 case DMU_OT_SPACE_MAP: 3801 if (!(flags & ZOR_FLAG_SPACE_MAP)) 3802 match = B_FALSE; 3803 break; 3804 default: 3805 if (strcmp(zdb_ot_name(obj_type), "zap") == 0) { 3806 if (!(flags & ZOR_FLAG_ZAP)) 3807 match = B_FALSE; 3808 break; 3809 } 3810 3811 /* 3812 * If all bits except some of the supported flags are 3813 * set, the user combined the all-types flag (A) with 3814 * a negated flag to exclude some types (e.g. A-f to 3815 * show all object types except plain files). 3816 */ 3817 if ((flags | ZOR_SUPPORTED_FLAGS) != ZOR_FLAG_ALL_TYPES) 3818 match = B_FALSE; 3819 3820 break; 3821 } 3822 3823 return (match); 3824 } 3825 3826 static void 3827 dump_object(objset_t *os, uint64_t object, int verbosity, 3828 boolean_t *print_header, uint64_t *dnode_slots_used, uint64_t flags) 3829 { 3830 dmu_buf_t *db = NULL; 3831 dmu_object_info_t doi; 3832 dnode_t *dn; 3833 boolean_t dnode_held = B_FALSE; 3834 void *bonus = NULL; 3835 size_t bsize = 0; 3836 char iblk[32], dblk[32], lsize[32], asize[32], fill[32], dnsize[32]; 3837 char bonus_size[32]; 3838 char aux[50]; 3839 int error; 3840 3841 /* make sure nicenum has enough space */ 3842 _Static_assert(sizeof (iblk) >= NN_NUMBUF_SZ, "iblk truncated"); 3843 _Static_assert(sizeof (dblk) >= NN_NUMBUF_SZ, "dblk truncated"); 3844 _Static_assert(sizeof (lsize) >= NN_NUMBUF_SZ, "lsize truncated"); 3845 _Static_assert(sizeof (asize) >= NN_NUMBUF_SZ, "asize truncated"); 3846 _Static_assert(sizeof (bonus_size) >= NN_NUMBUF_SZ, 3847 "bonus_size truncated"); 3848 3849 if (*print_header) { 3850 (void) printf("\n%10s %3s %5s %5s %5s %6s %5s %6s %s\n", 3851 "Object", "lvl", "iblk", "dblk", "dsize", "dnsize", 3852 "lsize", "%full", "type"); 3853 *print_header = 0; 3854 } 3855 3856 if (object == 0) { 3857 dn = DMU_META_DNODE(os); 3858 dmu_object_info_from_dnode(dn, &doi); 3859 } else { 3860 /* 3861 * Encrypted datasets will have sensitive bonus buffers 3862 * encrypted. Therefore we cannot hold the bonus buffer and 3863 * must hold the dnode itself instead. 3864 */ 3865 error = dmu_object_info(os, object, &doi); 3866 if (error) 3867 fatal("dmu_object_info() failed, errno %u", error); 3868 3869 if (!key_loaded && os->os_encrypted && 3870 DMU_OT_IS_ENCRYPTED(doi.doi_bonus_type)) { 3871 error = dnode_hold(os, object, FTAG, &dn); 3872 if (error) 3873 fatal("dnode_hold() failed, errno %u", error); 3874 dnode_held = B_TRUE; 3875 } else { 3876 error = dmu_bonus_hold(os, object, FTAG, &db); 3877 if (error) 3878 fatal("dmu_bonus_hold(%llu) failed, errno %u", 3879 object, error); 3880 bonus = db->db_data; 3881 bsize = db->db_size; 3882 dn = DB_DNODE((dmu_buf_impl_t *)db); 3883 } 3884 } 3885 3886 /* 3887 * Default to showing all object types if no flags were specified. 3888 */ 3889 if (flags != 0 && flags != ZOR_FLAG_ALL_TYPES && 3890 !match_object_type(doi.doi_type, flags)) 3891 goto out; 3892 3893 if (dnode_slots_used) 3894 *dnode_slots_used = doi.doi_dnodesize / DNODE_MIN_SIZE; 3895 3896 zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk)); 3897 zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk)); 3898 zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize)); 3899 zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof (asize)); 3900 zdb_nicenum(doi.doi_bonus_size, bonus_size, sizeof (bonus_size)); 3901 zdb_nicenum(doi.doi_dnodesize, dnsize, sizeof (dnsize)); 3902 (void) snprintf(fill, sizeof (fill), "%6.2f", 100.0 * 3903 doi.doi_fill_count * doi.doi_data_block_size / (object == 0 ? 3904 DNODES_PER_BLOCK : 1) / doi.doi_max_offset); 3905 3906 aux[0] = '\0'; 3907 3908 if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) { 3909 (void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux), 3910 " (K=%s)", ZDB_CHECKSUM_NAME(doi.doi_checksum)); 3911 } 3912 3913 if (doi.doi_compress == ZIO_COMPRESS_INHERIT && 3914 ZIO_COMPRESS_HASLEVEL(os->os_compress) && verbosity >= 6) { 3915 const char *compname = NULL; 3916 if (zfs_prop_index_to_string(ZFS_PROP_COMPRESSION, 3917 ZIO_COMPRESS_RAW(os->os_compress, os->os_complevel), 3918 &compname) == 0) { 3919 (void) snprintf(aux + strlen(aux), 3920 sizeof (aux) - strlen(aux), " (Z=inherit=%s)", 3921 compname); 3922 } else { 3923 (void) snprintf(aux + strlen(aux), 3924 sizeof (aux) - strlen(aux), 3925 " (Z=inherit=%s-unknown)", 3926 ZDB_COMPRESS_NAME(os->os_compress)); 3927 } 3928 } else if (doi.doi_compress == ZIO_COMPRESS_INHERIT && verbosity >= 6) { 3929 (void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux), 3930 " (Z=inherit=%s)", ZDB_COMPRESS_NAME(os->os_compress)); 3931 } else if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) { 3932 (void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux), 3933 " (Z=%s)", ZDB_COMPRESS_NAME(doi.doi_compress)); 3934 } 3935 3936 (void) printf("%10lld %3u %5s %5s %5s %6s %5s %6s %s%s\n", 3937 (u_longlong_t)object, doi.doi_indirection, iblk, dblk, 3938 asize, dnsize, lsize, fill, zdb_ot_name(doi.doi_type), aux); 3939 3940 if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) { 3941 (void) printf("%10s %3s %5s %5s %5s %5s %5s %6s %s\n", 3942 "", "", "", "", "", "", bonus_size, "bonus", 3943 zdb_ot_name(doi.doi_bonus_type)); 3944 } 3945 3946 if (verbosity >= 4) { 3947 (void) printf("\tdnode flags: %s%s%s%s\n", 3948 (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ? 3949 "USED_BYTES " : "", 3950 (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ? 3951 "USERUSED_ACCOUNTED " : "", 3952 (dn->dn_phys->dn_flags & DNODE_FLAG_USEROBJUSED_ACCOUNTED) ? 3953 "USEROBJUSED_ACCOUNTED " : "", 3954 (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? 3955 "SPILL_BLKPTR" : ""); 3956 (void) printf("\tdnode maxblkid: %llu\n", 3957 (longlong_t)dn->dn_phys->dn_maxblkid); 3958 3959 if (!dnode_held) { 3960 object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, 3961 object, bonus, bsize); 3962 } else { 3963 (void) printf("\t\t(bonus encrypted)\n"); 3964 } 3965 3966 if (key_loaded || 3967 (!os->os_encrypted || !DMU_OT_IS_ENCRYPTED(doi.doi_type))) { 3968 object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, 3969 NULL, 0); 3970 } else { 3971 (void) printf("\t\t(object encrypted)\n"); 3972 } 3973 3974 *print_header = B_TRUE; 3975 } 3976 3977 if (verbosity >= 5) { 3978 if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { 3979 char blkbuf[BP_SPRINTF_LEN]; 3980 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), 3981 DN_SPILL_BLKPTR(dn->dn_phys), B_FALSE); 3982 (void) printf("\nSpill block: %s\n", blkbuf); 3983 } 3984 dump_indirect(dn); 3985 } 3986 3987 if (verbosity >= 5) { 3988 /* 3989 * Report the list of segments that comprise the object. 3990 */ 3991 uint64_t start = 0; 3992 uint64_t end; 3993 uint64_t blkfill = 1; 3994 int minlvl = 1; 3995 3996 if (dn->dn_type == DMU_OT_DNODE) { 3997 minlvl = 0; 3998 blkfill = DNODES_PER_BLOCK; 3999 } 4000 4001 for (;;) { 4002 char segsize[32]; 4003 /* make sure nicenum has enough space */ 4004 _Static_assert(sizeof (segsize) >= NN_NUMBUF_SZ, 4005 "segsize truncated"); 4006 error = dnode_next_offset(dn, 4007 0, &start, minlvl, blkfill, 0); 4008 if (error) 4009 break; 4010 end = start; 4011 error = dnode_next_offset(dn, 4012 DNODE_FIND_HOLE, &end, minlvl, blkfill, 0); 4013 zdb_nicenum(end - start, segsize, sizeof (segsize)); 4014 (void) printf("\t\tsegment [%016llx, %016llx)" 4015 " size %5s\n", (u_longlong_t)start, 4016 (u_longlong_t)end, segsize); 4017 if (error) 4018 break; 4019 start = end; 4020 } 4021 } 4022 4023 out: 4024 if (db != NULL) 4025 dmu_buf_rele(db, FTAG); 4026 if (dnode_held) 4027 dnode_rele(dn, FTAG); 4028 } 4029 4030 static void 4031 count_dir_mos_objects(dsl_dir_t *dd) 4032 { 4033 mos_obj_refd(dd->dd_object); 4034 mos_obj_refd(dsl_dir_phys(dd)->dd_child_dir_zapobj); 4035 mos_obj_refd(dsl_dir_phys(dd)->dd_deleg_zapobj); 4036 mos_obj_refd(dsl_dir_phys(dd)->dd_props_zapobj); 4037 mos_obj_refd(dsl_dir_phys(dd)->dd_clones); 4038 4039 /* 4040 * The dd_crypto_obj can be referenced by multiple dsl_dir's. 4041 * Ignore the references after the first one. 4042 */ 4043 mos_obj_refd_multiple(dd->dd_crypto_obj); 4044 } 4045 4046 static void 4047 count_ds_mos_objects(dsl_dataset_t *ds) 4048 { 4049 mos_obj_refd(ds->ds_object); 4050 mos_obj_refd(dsl_dataset_phys(ds)->ds_next_clones_obj); 4051 mos_obj_refd(dsl_dataset_phys(ds)->ds_props_obj); 4052 mos_obj_refd(dsl_dataset_phys(ds)->ds_userrefs_obj); 4053 mos_obj_refd(dsl_dataset_phys(ds)->ds_snapnames_zapobj); 4054 mos_obj_refd(ds->ds_bookmarks_obj); 4055 4056 if (!dsl_dataset_is_snapshot(ds)) { 4057 count_dir_mos_objects(ds->ds_dir); 4058 } 4059 } 4060 4061 static const char *const objset_types[DMU_OST_NUMTYPES] = { 4062 "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" }; 4063 4064 /* 4065 * Parse a string denoting a range of object IDs of the form 4066 * <start>[:<end>[:flags]], and store the results in zor. 4067 * Return 0 on success. On error, return 1 and update the msg 4068 * pointer to point to a descriptive error message. 4069 */ 4070 static int 4071 parse_object_range(char *range, zopt_object_range_t *zor, const char **msg) 4072 { 4073 uint64_t flags = 0; 4074 char *p, *s, *dup, *flagstr, *tmp = NULL; 4075 size_t len; 4076 int i; 4077 int rc = 0; 4078 4079 if (strchr(range, ':') == NULL) { 4080 zor->zor_obj_start = strtoull(range, &p, 0); 4081 if (*p != '\0') { 4082 *msg = "Invalid characters in object ID"; 4083 rc = 1; 4084 } 4085 zor->zor_obj_start = ZDB_MAP_OBJECT_ID(zor->zor_obj_start); 4086 zor->zor_obj_end = zor->zor_obj_start; 4087 return (rc); 4088 } 4089 4090 if (strchr(range, ':') == range) { 4091 *msg = "Invalid leading colon"; 4092 rc = 1; 4093 return (rc); 4094 } 4095 4096 len = strlen(range); 4097 if (range[len - 1] == ':') { 4098 *msg = "Invalid trailing colon"; 4099 rc = 1; 4100 return (rc); 4101 } 4102 4103 dup = strdup(range); 4104 s = strtok_r(dup, ":", &tmp); 4105 zor->zor_obj_start = strtoull(s, &p, 0); 4106 4107 if (*p != '\0') { 4108 *msg = "Invalid characters in start object ID"; 4109 rc = 1; 4110 goto out; 4111 } 4112 4113 s = strtok_r(NULL, ":", &tmp); 4114 zor->zor_obj_end = strtoull(s, &p, 0); 4115 4116 if (*p != '\0') { 4117 *msg = "Invalid characters in end object ID"; 4118 rc = 1; 4119 goto out; 4120 } 4121 4122 if (zor->zor_obj_start > zor->zor_obj_end) { 4123 *msg = "Start object ID may not exceed end object ID"; 4124 rc = 1; 4125 goto out; 4126 } 4127 4128 s = strtok_r(NULL, ":", &tmp); 4129 if (s == NULL) { 4130 zor->zor_flags = ZOR_FLAG_ALL_TYPES; 4131 goto out; 4132 } else if (strtok_r(NULL, ":", &tmp) != NULL) { 4133 *msg = "Invalid colon-delimited field after flags"; 4134 rc = 1; 4135 goto out; 4136 } 4137 4138 flagstr = s; 4139 for (i = 0; flagstr[i]; i++) { 4140 int bit; 4141 boolean_t negation = (flagstr[i] == '-'); 4142 4143 if (negation) { 4144 i++; 4145 if (flagstr[i] == '\0') { 4146 *msg = "Invalid trailing negation operator"; 4147 rc = 1; 4148 goto out; 4149 } 4150 } 4151 bit = flagbits[(uchar_t)flagstr[i]]; 4152 if (bit == 0) { 4153 *msg = "Invalid flag"; 4154 rc = 1; 4155 goto out; 4156 } 4157 if (negation) 4158 flags &= ~bit; 4159 else 4160 flags |= bit; 4161 } 4162 zor->zor_flags = flags; 4163 4164 zor->zor_obj_start = ZDB_MAP_OBJECT_ID(zor->zor_obj_start); 4165 zor->zor_obj_end = ZDB_MAP_OBJECT_ID(zor->zor_obj_end); 4166 4167 out: 4168 free(dup); 4169 return (rc); 4170 } 4171 4172 static void 4173 dump_objset(objset_t *os) 4174 { 4175 dmu_objset_stats_t dds = { 0 }; 4176 uint64_t object, object_count; 4177 uint64_t refdbytes, usedobjs, scratch; 4178 char numbuf[32]; 4179 char blkbuf[BP_SPRINTF_LEN + 20]; 4180 char osname[ZFS_MAX_DATASET_NAME_LEN]; 4181 const char *type = "UNKNOWN"; 4182 int verbosity = dump_opt['d']; 4183 boolean_t print_header; 4184 unsigned i; 4185 int error; 4186 uint64_t total_slots_used = 0; 4187 uint64_t max_slot_used = 0; 4188 uint64_t dnode_slots; 4189 uint64_t obj_start; 4190 uint64_t obj_end; 4191 uint64_t flags; 4192 4193 /* make sure nicenum has enough space */ 4194 _Static_assert(sizeof (numbuf) >= NN_NUMBUF_SZ, "numbuf truncated"); 4195 4196 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 4197 dmu_objset_fast_stat(os, &dds); 4198 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 4199 4200 print_header = B_TRUE; 4201 4202 if (dds.dds_type < DMU_OST_NUMTYPES) 4203 type = objset_types[dds.dds_type]; 4204 4205 if (dds.dds_type == DMU_OST_META) { 4206 dds.dds_creation_txg = TXG_INITIAL; 4207 usedobjs = BP_GET_FILL(os->os_rootbp); 4208 refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)-> 4209 dd_used_bytes; 4210 } else { 4211 dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch); 4212 } 4213 4214 ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp)); 4215 4216 zdb_nicenum(refdbytes, numbuf, sizeof (numbuf)); 4217 4218 if (verbosity >= 4) { 4219 (void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp "); 4220 (void) snprintf_blkptr(blkbuf + strlen(blkbuf), 4221 sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp); 4222 } else { 4223 blkbuf[0] = '\0'; 4224 } 4225 4226 dmu_objset_name(os, osname); 4227 4228 (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, " 4229 "%s, %llu objects%s%s\n", 4230 osname, type, (u_longlong_t)dmu_objset_id(os), 4231 (u_longlong_t)dds.dds_creation_txg, 4232 numbuf, (u_longlong_t)usedobjs, blkbuf, 4233 (dds.dds_inconsistent) ? " (inconsistent)" : ""); 4234 4235 for (i = 0; i < zopt_object_args; i++) { 4236 obj_start = zopt_object_ranges[i].zor_obj_start; 4237 obj_end = zopt_object_ranges[i].zor_obj_end; 4238 flags = zopt_object_ranges[i].zor_flags; 4239 4240 object = obj_start; 4241 if (object == 0 || obj_start == obj_end) 4242 dump_object(os, object, verbosity, &print_header, NULL, 4243 flags); 4244 else 4245 object--; 4246 4247 while ((dmu_object_next(os, &object, B_FALSE, 0) == 0) && 4248 object <= obj_end) { 4249 dump_object(os, object, verbosity, &print_header, NULL, 4250 flags); 4251 } 4252 } 4253 4254 if (zopt_object_args > 0) { 4255 (void) printf("\n"); 4256 return; 4257 } 4258 4259 if (dump_opt['i'] != 0 || verbosity >= 2) 4260 dump_intent_log(dmu_objset_zil(os)); 4261 4262 if (dmu_objset_ds(os) != NULL) { 4263 dsl_dataset_t *ds = dmu_objset_ds(os); 4264 dump_blkptr_list(&ds->ds_deadlist, "Deadlist"); 4265 if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist) && 4266 !dmu_objset_is_snapshot(os)) { 4267 dump_blkptr_list(&ds->ds_dir->dd_livelist, "Livelist"); 4268 if (verify_dd_livelist(os) != 0) 4269 fatal("livelist is incorrect"); 4270 } 4271 4272 if (dsl_dataset_remap_deadlist_exists(ds)) { 4273 (void) printf("ds_remap_deadlist:\n"); 4274 dump_blkptr_list(&ds->ds_remap_deadlist, "Deadlist"); 4275 } 4276 count_ds_mos_objects(ds); 4277 } 4278 4279 if (dmu_objset_ds(os) != NULL) 4280 dump_bookmarks(os, verbosity); 4281 4282 if (verbosity < 2) 4283 return; 4284 4285 if (BP_IS_HOLE(os->os_rootbp)) 4286 return; 4287 4288 dump_object(os, 0, verbosity, &print_header, NULL, 0); 4289 object_count = 0; 4290 if (DMU_USERUSED_DNODE(os) != NULL && 4291 DMU_USERUSED_DNODE(os)->dn_type != 0) { 4292 dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header, 4293 NULL, 0); 4294 dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header, 4295 NULL, 0); 4296 } 4297 4298 if (DMU_PROJECTUSED_DNODE(os) != NULL && 4299 DMU_PROJECTUSED_DNODE(os)->dn_type != 0) 4300 dump_object(os, DMU_PROJECTUSED_OBJECT, verbosity, 4301 &print_header, NULL, 0); 4302 4303 object = 0; 4304 while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) { 4305 dump_object(os, object, verbosity, &print_header, &dnode_slots, 4306 0); 4307 object_count++; 4308 total_slots_used += dnode_slots; 4309 max_slot_used = object + dnode_slots - 1; 4310 } 4311 4312 (void) printf("\n"); 4313 4314 (void) printf(" Dnode slots:\n"); 4315 (void) printf("\tTotal used: %10llu\n", 4316 (u_longlong_t)total_slots_used); 4317 (void) printf("\tMax used: %10llu\n", 4318 (u_longlong_t)max_slot_used); 4319 (void) printf("\tPercent empty: %10lf\n", 4320 (double)(max_slot_used - total_slots_used)*100 / 4321 (double)max_slot_used); 4322 (void) printf("\n"); 4323 4324 if (error != ESRCH) { 4325 (void) fprintf(stderr, "dmu_object_next() = %d\n", error); 4326 abort(); 4327 } 4328 4329 ASSERT3U(object_count, ==, usedobjs); 4330 4331 if (leaked_objects != 0) { 4332 (void) printf("%d potentially leaked objects detected\n", 4333 leaked_objects); 4334 leaked_objects = 0; 4335 } 4336 } 4337 4338 static void 4339 dump_uberblock(uberblock_t *ub, const char *header, const char *footer) 4340 { 4341 time_t timestamp = ub->ub_timestamp; 4342 4343 (void) printf("%s", header ? header : ""); 4344 (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic); 4345 (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version); 4346 (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg); 4347 (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum); 4348 (void) printf("\ttimestamp = %llu UTC = %s", 4349 (u_longlong_t)ub->ub_timestamp, ctime(×tamp)); 4350 4351 char blkbuf[BP_SPRINTF_LEN]; 4352 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp); 4353 (void) printf("\tbp = %s\n", blkbuf); 4354 4355 (void) printf("\tmmp_magic = %016llx\n", 4356 (u_longlong_t)ub->ub_mmp_magic); 4357 if (MMP_VALID(ub)) { 4358 (void) printf("\tmmp_delay = %0llu\n", 4359 (u_longlong_t)ub->ub_mmp_delay); 4360 if (MMP_SEQ_VALID(ub)) 4361 (void) printf("\tmmp_seq = %u\n", 4362 (unsigned int) MMP_SEQ(ub)); 4363 if (MMP_FAIL_INT_VALID(ub)) 4364 (void) printf("\tmmp_fail = %u\n", 4365 (unsigned int) MMP_FAIL_INT(ub)); 4366 if (MMP_INTERVAL_VALID(ub)) 4367 (void) printf("\tmmp_write = %u\n", 4368 (unsigned int) MMP_INTERVAL(ub)); 4369 /* After MMP_* to make summarize_uberblock_mmp cleaner */ 4370 (void) printf("\tmmp_valid = %x\n", 4371 (unsigned int) ub->ub_mmp_config & 0xFF); 4372 } 4373 4374 if (dump_opt['u'] >= 4) { 4375 char blkbuf[BP_SPRINTF_LEN]; 4376 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp); 4377 (void) printf("\trootbp = %s\n", blkbuf); 4378 } 4379 (void) printf("\tcheckpoint_txg = %llu\n", 4380 (u_longlong_t)ub->ub_checkpoint_txg); 4381 4382 (void) printf("\traidz_reflow state=%u off=%llu\n", 4383 (int)RRSS_GET_STATE(ub), 4384 (u_longlong_t)RRSS_GET_OFFSET(ub)); 4385 4386 (void) printf("%s", footer ? footer : ""); 4387 } 4388 4389 static void 4390 dump_config(spa_t *spa) 4391 { 4392 dmu_buf_t *db; 4393 size_t nvsize = 0; 4394 int error = 0; 4395 4396 4397 error = dmu_bonus_hold(spa->spa_meta_objset, 4398 spa->spa_config_object, FTAG, &db); 4399 4400 if (error == 0) { 4401 nvsize = *(uint64_t *)db->db_data; 4402 dmu_buf_rele(db, FTAG); 4403 4404 (void) printf("\nMOS Configuration:\n"); 4405 dump_packed_nvlist(spa->spa_meta_objset, 4406 spa->spa_config_object, (void *)&nvsize, 1); 4407 } else { 4408 (void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d", 4409 (u_longlong_t)spa->spa_config_object, error); 4410 } 4411 } 4412 4413 static void 4414 dump_cachefile(const char *cachefile) 4415 { 4416 int fd; 4417 struct stat64 statbuf; 4418 char *buf; 4419 nvlist_t *config; 4420 4421 if ((fd = open64(cachefile, O_RDONLY)) < 0) { 4422 (void) printf("cannot open '%s': %s\n", cachefile, 4423 strerror(errno)); 4424 zdb_exit(1); 4425 } 4426 4427 if (fstat64(fd, &statbuf) != 0) { 4428 (void) printf("failed to stat '%s': %s\n", cachefile, 4429 strerror(errno)); 4430 zdb_exit(1); 4431 } 4432 4433 if ((buf = malloc(statbuf.st_size)) == NULL) { 4434 (void) fprintf(stderr, "failed to allocate %llu bytes\n", 4435 (u_longlong_t)statbuf.st_size); 4436 zdb_exit(1); 4437 } 4438 4439 if (read(fd, buf, statbuf.st_size) != statbuf.st_size) { 4440 (void) fprintf(stderr, "failed to read %llu bytes\n", 4441 (u_longlong_t)statbuf.st_size); 4442 zdb_exit(1); 4443 } 4444 4445 (void) close(fd); 4446 4447 if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) { 4448 (void) fprintf(stderr, "failed to unpack nvlist\n"); 4449 zdb_exit(1); 4450 } 4451 4452 free(buf); 4453 4454 dump_nvlist(config, 0); 4455 4456 nvlist_free(config); 4457 } 4458 4459 /* 4460 * ZFS label nvlist stats 4461 */ 4462 typedef struct zdb_nvl_stats { 4463 int zns_list_count; 4464 int zns_leaf_count; 4465 size_t zns_leaf_largest; 4466 size_t zns_leaf_total; 4467 nvlist_t *zns_string; 4468 nvlist_t *zns_uint64; 4469 nvlist_t *zns_boolean; 4470 } zdb_nvl_stats_t; 4471 4472 static void 4473 collect_nvlist_stats(nvlist_t *nvl, zdb_nvl_stats_t *stats) 4474 { 4475 nvlist_t *list, **array; 4476 nvpair_t *nvp = NULL; 4477 const char *name; 4478 uint_t i, items; 4479 4480 stats->zns_list_count++; 4481 4482 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 4483 name = nvpair_name(nvp); 4484 4485 switch (nvpair_type(nvp)) { 4486 case DATA_TYPE_STRING: 4487 fnvlist_add_string(stats->zns_string, name, 4488 fnvpair_value_string(nvp)); 4489 break; 4490 case DATA_TYPE_UINT64: 4491 fnvlist_add_uint64(stats->zns_uint64, name, 4492 fnvpair_value_uint64(nvp)); 4493 break; 4494 case DATA_TYPE_BOOLEAN: 4495 fnvlist_add_boolean(stats->zns_boolean, name); 4496 break; 4497 case DATA_TYPE_NVLIST: 4498 if (nvpair_value_nvlist(nvp, &list) == 0) 4499 collect_nvlist_stats(list, stats); 4500 break; 4501 case DATA_TYPE_NVLIST_ARRAY: 4502 if (nvpair_value_nvlist_array(nvp, &array, &items) != 0) 4503 break; 4504 4505 for (i = 0; i < items; i++) { 4506 collect_nvlist_stats(array[i], stats); 4507 4508 /* collect stats on leaf vdev */ 4509 if (strcmp(name, "children") == 0) { 4510 size_t size; 4511 4512 (void) nvlist_size(array[i], &size, 4513 NV_ENCODE_XDR); 4514 stats->zns_leaf_total += size; 4515 if (size > stats->zns_leaf_largest) 4516 stats->zns_leaf_largest = size; 4517 stats->zns_leaf_count++; 4518 } 4519 } 4520 break; 4521 default: 4522 (void) printf("skip type %d!\n", (int)nvpair_type(nvp)); 4523 } 4524 } 4525 } 4526 4527 static void 4528 dump_nvlist_stats(nvlist_t *nvl, size_t cap) 4529 { 4530 zdb_nvl_stats_t stats = { 0 }; 4531 size_t size, sum = 0, total; 4532 size_t noise; 4533 4534 /* requires nvlist with non-unique names for stat collection */ 4535 VERIFY0(nvlist_alloc(&stats.zns_string, 0, 0)); 4536 VERIFY0(nvlist_alloc(&stats.zns_uint64, 0, 0)); 4537 VERIFY0(nvlist_alloc(&stats.zns_boolean, 0, 0)); 4538 VERIFY0(nvlist_size(stats.zns_boolean, &noise, NV_ENCODE_XDR)); 4539 4540 (void) printf("\n\nZFS Label NVList Config Stats:\n"); 4541 4542 VERIFY0(nvlist_size(nvl, &total, NV_ENCODE_XDR)); 4543 (void) printf(" %d bytes used, %d bytes free (using %4.1f%%)\n\n", 4544 (int)total, (int)(cap - total), 100.0 * total / cap); 4545 4546 collect_nvlist_stats(nvl, &stats); 4547 4548 VERIFY0(nvlist_size(stats.zns_uint64, &size, NV_ENCODE_XDR)); 4549 size -= noise; 4550 sum += size; 4551 (void) printf("%12s %4d %6d bytes (%5.2f%%)\n", "integers:", 4552 (int)fnvlist_num_pairs(stats.zns_uint64), 4553 (int)size, 100.0 * size / total); 4554 4555 VERIFY0(nvlist_size(stats.zns_string, &size, NV_ENCODE_XDR)); 4556 size -= noise; 4557 sum += size; 4558 (void) printf("%12s %4d %6d bytes (%5.2f%%)\n", "strings:", 4559 (int)fnvlist_num_pairs(stats.zns_string), 4560 (int)size, 100.0 * size / total); 4561 4562 VERIFY0(nvlist_size(stats.zns_boolean, &size, NV_ENCODE_XDR)); 4563 size -= noise; 4564 sum += size; 4565 (void) printf("%12s %4d %6d bytes (%5.2f%%)\n", "booleans:", 4566 (int)fnvlist_num_pairs(stats.zns_boolean), 4567 (int)size, 100.0 * size / total); 4568 4569 size = total - sum; /* treat remainder as nvlist overhead */ 4570 (void) printf("%12s %4d %6d bytes (%5.2f%%)\n\n", "nvlists:", 4571 stats.zns_list_count, (int)size, 100.0 * size / total); 4572 4573 if (stats.zns_leaf_count > 0) { 4574 size_t average = stats.zns_leaf_total / stats.zns_leaf_count; 4575 4576 (void) printf("%12s %4d %6d bytes average\n", "leaf vdevs:", 4577 stats.zns_leaf_count, (int)average); 4578 (void) printf("%24d bytes largest\n", 4579 (int)stats.zns_leaf_largest); 4580 4581 if (dump_opt['l'] >= 3 && average > 0) 4582 (void) printf(" space for %d additional leaf vdevs\n", 4583 (int)((cap - total) / average)); 4584 } 4585 (void) printf("\n"); 4586 4587 nvlist_free(stats.zns_string); 4588 nvlist_free(stats.zns_uint64); 4589 nvlist_free(stats.zns_boolean); 4590 } 4591 4592 typedef struct cksum_record { 4593 zio_cksum_t cksum; 4594 boolean_t labels[VDEV_LABELS]; 4595 avl_node_t link; 4596 } cksum_record_t; 4597 4598 static int 4599 cksum_record_compare(const void *x1, const void *x2) 4600 { 4601 const cksum_record_t *l = (cksum_record_t *)x1; 4602 const cksum_record_t *r = (cksum_record_t *)x2; 4603 int arraysize = ARRAY_SIZE(l->cksum.zc_word); 4604 int difference = 0; 4605 4606 for (int i = 0; i < arraysize; i++) { 4607 difference = TREE_CMP(l->cksum.zc_word[i], r->cksum.zc_word[i]); 4608 if (difference) 4609 break; 4610 } 4611 4612 return (difference); 4613 } 4614 4615 static cksum_record_t * 4616 cksum_record_alloc(zio_cksum_t *cksum, int l) 4617 { 4618 cksum_record_t *rec; 4619 4620 rec = umem_zalloc(sizeof (*rec), UMEM_NOFAIL); 4621 rec->cksum = *cksum; 4622 rec->labels[l] = B_TRUE; 4623 4624 return (rec); 4625 } 4626 4627 static cksum_record_t * 4628 cksum_record_lookup(avl_tree_t *tree, zio_cksum_t *cksum) 4629 { 4630 cksum_record_t lookup = { .cksum = *cksum }; 4631 avl_index_t where; 4632 4633 return (avl_find(tree, &lookup, &where)); 4634 } 4635 4636 static cksum_record_t * 4637 cksum_record_insert(avl_tree_t *tree, zio_cksum_t *cksum, int l) 4638 { 4639 cksum_record_t *rec; 4640 4641 rec = cksum_record_lookup(tree, cksum); 4642 if (rec) { 4643 rec->labels[l] = B_TRUE; 4644 } else { 4645 rec = cksum_record_alloc(cksum, l); 4646 avl_add(tree, rec); 4647 } 4648 4649 return (rec); 4650 } 4651 4652 static int 4653 first_label(cksum_record_t *rec) 4654 { 4655 for (int i = 0; i < VDEV_LABELS; i++) 4656 if (rec->labels[i]) 4657 return (i); 4658 4659 return (-1); 4660 } 4661 4662 static void 4663 print_label_numbers(const char *prefix, const cksum_record_t *rec) 4664 { 4665 fputs(prefix, stdout); 4666 for (int i = 0; i < VDEV_LABELS; i++) 4667 if (rec->labels[i] == B_TRUE) 4668 printf("%d ", i); 4669 putchar('\n'); 4670 } 4671 4672 #define MAX_UBERBLOCK_COUNT (VDEV_UBERBLOCK_RING >> UBERBLOCK_SHIFT) 4673 4674 typedef struct zdb_label { 4675 vdev_label_t label; 4676 uint64_t label_offset; 4677 nvlist_t *config_nv; 4678 cksum_record_t *config; 4679 cksum_record_t *uberblocks[MAX_UBERBLOCK_COUNT]; 4680 boolean_t header_printed; 4681 boolean_t read_failed; 4682 boolean_t cksum_valid; 4683 } zdb_label_t; 4684 4685 static void 4686 print_label_header(zdb_label_t *label, int l) 4687 { 4688 4689 if (dump_opt['q']) 4690 return; 4691 4692 if (label->header_printed == B_TRUE) 4693 return; 4694 4695 (void) printf("------------------------------------\n"); 4696 (void) printf("LABEL %d %s\n", l, 4697 label->cksum_valid ? "" : "(Bad label cksum)"); 4698 (void) printf("------------------------------------\n"); 4699 4700 label->header_printed = B_TRUE; 4701 } 4702 4703 static void 4704 print_l2arc_header(void) 4705 { 4706 (void) printf("------------------------------------\n"); 4707 (void) printf("L2ARC device header\n"); 4708 (void) printf("------------------------------------\n"); 4709 } 4710 4711 static void 4712 print_l2arc_log_blocks(void) 4713 { 4714 (void) printf("------------------------------------\n"); 4715 (void) printf("L2ARC device log blocks\n"); 4716 (void) printf("------------------------------------\n"); 4717 } 4718 4719 static void 4720 dump_l2arc_log_entries(uint64_t log_entries, 4721 l2arc_log_ent_phys_t *le, uint64_t i) 4722 { 4723 for (int j = 0; j < log_entries; j++) { 4724 dva_t dva = le[j].le_dva; 4725 (void) printf("lb[%4llu]\tle[%4d]\tDVA asize: %llu, " 4726 "vdev: %llu, offset: %llu\n", 4727 (u_longlong_t)i, j + 1, 4728 (u_longlong_t)DVA_GET_ASIZE(&dva), 4729 (u_longlong_t)DVA_GET_VDEV(&dva), 4730 (u_longlong_t)DVA_GET_OFFSET(&dva)); 4731 (void) printf("|\t\t\t\tbirth: %llu\n", 4732 (u_longlong_t)le[j].le_birth); 4733 (void) printf("|\t\t\t\tlsize: %llu\n", 4734 (u_longlong_t)L2BLK_GET_LSIZE((&le[j])->le_prop)); 4735 (void) printf("|\t\t\t\tpsize: %llu\n", 4736 (u_longlong_t)L2BLK_GET_PSIZE((&le[j])->le_prop)); 4737 (void) printf("|\t\t\t\tcompr: %llu\n", 4738 (u_longlong_t)L2BLK_GET_COMPRESS((&le[j])->le_prop)); 4739 (void) printf("|\t\t\t\tcomplevel: %llu\n", 4740 (u_longlong_t)(&le[j])->le_complevel); 4741 (void) printf("|\t\t\t\ttype: %llu\n", 4742 (u_longlong_t)L2BLK_GET_TYPE((&le[j])->le_prop)); 4743 (void) printf("|\t\t\t\tprotected: %llu\n", 4744 (u_longlong_t)L2BLK_GET_PROTECTED((&le[j])->le_prop)); 4745 (void) printf("|\t\t\t\tprefetch: %llu\n", 4746 (u_longlong_t)L2BLK_GET_PREFETCH((&le[j])->le_prop)); 4747 (void) printf("|\t\t\t\taddress: %llu\n", 4748 (u_longlong_t)le[j].le_daddr); 4749 (void) printf("|\t\t\t\tARC state: %llu\n", 4750 (u_longlong_t)L2BLK_GET_STATE((&le[j])->le_prop)); 4751 (void) printf("|\n"); 4752 } 4753 (void) printf("\n"); 4754 } 4755 4756 static void 4757 dump_l2arc_log_blkptr(const l2arc_log_blkptr_t *lbps) 4758 { 4759 (void) printf("|\t\tdaddr: %llu\n", (u_longlong_t)lbps->lbp_daddr); 4760 (void) printf("|\t\tpayload_asize: %llu\n", 4761 (u_longlong_t)lbps->lbp_payload_asize); 4762 (void) printf("|\t\tpayload_start: %llu\n", 4763 (u_longlong_t)lbps->lbp_payload_start); 4764 (void) printf("|\t\tlsize: %llu\n", 4765 (u_longlong_t)L2BLK_GET_LSIZE(lbps->lbp_prop)); 4766 (void) printf("|\t\tasize: %llu\n", 4767 (u_longlong_t)L2BLK_GET_PSIZE(lbps->lbp_prop)); 4768 (void) printf("|\t\tcompralgo: %llu\n", 4769 (u_longlong_t)L2BLK_GET_COMPRESS(lbps->lbp_prop)); 4770 (void) printf("|\t\tcksumalgo: %llu\n", 4771 (u_longlong_t)L2BLK_GET_CHECKSUM(lbps->lbp_prop)); 4772 (void) printf("|\n\n"); 4773 } 4774 4775 static void 4776 dump_l2arc_log_blocks(int fd, const l2arc_dev_hdr_phys_t *l2dhdr, 4777 l2arc_dev_hdr_phys_t *rebuild) 4778 { 4779 l2arc_log_blk_phys_t this_lb; 4780 uint64_t asize; 4781 l2arc_log_blkptr_t lbps[2]; 4782 zio_cksum_t cksum; 4783 int failed = 0; 4784 l2arc_dev_t dev; 4785 4786 if (!dump_opt['q']) 4787 print_l2arc_log_blocks(); 4788 memcpy(lbps, l2dhdr->dh_start_lbps, sizeof (lbps)); 4789 4790 dev.l2ad_evict = l2dhdr->dh_evict; 4791 dev.l2ad_start = l2dhdr->dh_start; 4792 dev.l2ad_end = l2dhdr->dh_end; 4793 4794 if (l2dhdr->dh_start_lbps[0].lbp_daddr == 0) { 4795 /* no log blocks to read */ 4796 if (!dump_opt['q']) { 4797 (void) printf("No log blocks to read\n"); 4798 (void) printf("\n"); 4799 } 4800 return; 4801 } else { 4802 dev.l2ad_hand = lbps[0].lbp_daddr + 4803 L2BLK_GET_PSIZE((&lbps[0])->lbp_prop); 4804 } 4805 4806 dev.l2ad_first = !!(l2dhdr->dh_flags & L2ARC_DEV_HDR_EVICT_FIRST); 4807 4808 for (;;) { 4809 if (!l2arc_log_blkptr_valid(&dev, &lbps[0])) 4810 break; 4811 4812 /* L2BLK_GET_PSIZE returns aligned size for log blocks */ 4813 asize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop); 4814 if (pread64(fd, &this_lb, asize, lbps[0].lbp_daddr) != asize) { 4815 if (!dump_opt['q']) { 4816 (void) printf("Error while reading next log " 4817 "block\n\n"); 4818 } 4819 break; 4820 } 4821 4822 fletcher_4_native_varsize(&this_lb, asize, &cksum); 4823 if (!ZIO_CHECKSUM_EQUAL(cksum, lbps[0].lbp_cksum)) { 4824 failed++; 4825 if (!dump_opt['q']) { 4826 (void) printf("Invalid cksum\n"); 4827 dump_l2arc_log_blkptr(&lbps[0]); 4828 } 4829 break; 4830 } 4831 4832 switch (L2BLK_GET_COMPRESS((&lbps[0])->lbp_prop)) { 4833 case ZIO_COMPRESS_OFF: 4834 break; 4835 default: { 4836 abd_t *abd = abd_alloc_linear(asize, B_TRUE); 4837 abd_copy_from_buf_off(abd, &this_lb, 0, asize); 4838 abd_t dabd; 4839 abd_get_from_buf_struct(&dabd, &this_lb, 4840 sizeof (this_lb)); 4841 int err = zio_decompress_data(L2BLK_GET_COMPRESS( 4842 (&lbps[0])->lbp_prop), abd, &dabd, 4843 asize, sizeof (this_lb), NULL); 4844 abd_free(&dabd); 4845 abd_free(abd); 4846 if (err != 0) { 4847 (void) printf("L2ARC block decompression " 4848 "failed\n"); 4849 goto out; 4850 } 4851 break; 4852 } 4853 } 4854 4855 if (this_lb.lb_magic == BSWAP_64(L2ARC_LOG_BLK_MAGIC)) 4856 byteswap_uint64_array(&this_lb, sizeof (this_lb)); 4857 if (this_lb.lb_magic != L2ARC_LOG_BLK_MAGIC) { 4858 if (!dump_opt['q']) 4859 (void) printf("Invalid log block magic\n\n"); 4860 break; 4861 } 4862 4863 rebuild->dh_lb_count++; 4864 rebuild->dh_lb_asize += asize; 4865 if (dump_opt['l'] > 1 && !dump_opt['q']) { 4866 (void) printf("lb[%4llu]\tmagic: %llu\n", 4867 (u_longlong_t)rebuild->dh_lb_count, 4868 (u_longlong_t)this_lb.lb_magic); 4869 dump_l2arc_log_blkptr(&lbps[0]); 4870 } 4871 4872 if (dump_opt['l'] > 2 && !dump_opt['q']) 4873 dump_l2arc_log_entries(l2dhdr->dh_log_entries, 4874 this_lb.lb_entries, 4875 rebuild->dh_lb_count); 4876 4877 if (l2arc_range_check_overlap(lbps[1].lbp_payload_start, 4878 lbps[0].lbp_payload_start, dev.l2ad_evict) && 4879 !dev.l2ad_first) 4880 break; 4881 4882 lbps[0] = lbps[1]; 4883 lbps[1] = this_lb.lb_prev_lbp; 4884 } 4885 out: 4886 if (!dump_opt['q']) { 4887 (void) printf("log_blk_count:\t %llu with valid cksum\n", 4888 (u_longlong_t)rebuild->dh_lb_count); 4889 (void) printf("\t\t %d with invalid cksum\n", failed); 4890 (void) printf("log_blk_asize:\t %llu\n\n", 4891 (u_longlong_t)rebuild->dh_lb_asize); 4892 } 4893 } 4894 4895 static int 4896 dump_l2arc_header(int fd) 4897 { 4898 l2arc_dev_hdr_phys_t l2dhdr = {0}, rebuild = {0}; 4899 int error = B_FALSE; 4900 4901 if (pread64(fd, &l2dhdr, sizeof (l2dhdr), 4902 VDEV_LABEL_START_SIZE) != sizeof (l2dhdr)) { 4903 error = B_TRUE; 4904 } else { 4905 if (l2dhdr.dh_magic == BSWAP_64(L2ARC_DEV_HDR_MAGIC)) 4906 byteswap_uint64_array(&l2dhdr, sizeof (l2dhdr)); 4907 4908 if (l2dhdr.dh_magic != L2ARC_DEV_HDR_MAGIC) 4909 error = B_TRUE; 4910 } 4911 4912 if (error) { 4913 (void) printf("L2ARC device header not found\n\n"); 4914 /* Do not return an error here for backward compatibility */ 4915 return (0); 4916 } else if (!dump_opt['q']) { 4917 print_l2arc_header(); 4918 4919 (void) printf(" magic: %llu\n", 4920 (u_longlong_t)l2dhdr.dh_magic); 4921 (void) printf(" version: %llu\n", 4922 (u_longlong_t)l2dhdr.dh_version); 4923 (void) printf(" pool_guid: %llu\n", 4924 (u_longlong_t)l2dhdr.dh_spa_guid); 4925 (void) printf(" flags: %llu\n", 4926 (u_longlong_t)l2dhdr.dh_flags); 4927 (void) printf(" start_lbps[0]: %llu\n", 4928 (u_longlong_t) 4929 l2dhdr.dh_start_lbps[0].lbp_daddr); 4930 (void) printf(" start_lbps[1]: %llu\n", 4931 (u_longlong_t) 4932 l2dhdr.dh_start_lbps[1].lbp_daddr); 4933 (void) printf(" log_blk_ent: %llu\n", 4934 (u_longlong_t)l2dhdr.dh_log_entries); 4935 (void) printf(" start: %llu\n", 4936 (u_longlong_t)l2dhdr.dh_start); 4937 (void) printf(" end: %llu\n", 4938 (u_longlong_t)l2dhdr.dh_end); 4939 (void) printf(" evict: %llu\n", 4940 (u_longlong_t)l2dhdr.dh_evict); 4941 (void) printf(" lb_asize_refcount: %llu\n", 4942 (u_longlong_t)l2dhdr.dh_lb_asize); 4943 (void) printf(" lb_count_refcount: %llu\n", 4944 (u_longlong_t)l2dhdr.dh_lb_count); 4945 (void) printf(" trim_action_time: %llu\n", 4946 (u_longlong_t)l2dhdr.dh_trim_action_time); 4947 (void) printf(" trim_state: %llu\n\n", 4948 (u_longlong_t)l2dhdr.dh_trim_state); 4949 } 4950 4951 dump_l2arc_log_blocks(fd, &l2dhdr, &rebuild); 4952 /* 4953 * The total aligned size of log blocks and the number of log blocks 4954 * reported in the header of the device may be less than what zdb 4955 * reports by dump_l2arc_log_blocks() which emulates l2arc_rebuild(). 4956 * This happens because dump_l2arc_log_blocks() lacks the memory 4957 * pressure valve that l2arc_rebuild() has. Thus, if we are on a system 4958 * with low memory, l2arc_rebuild will exit prematurely and dh_lb_asize 4959 * and dh_lb_count will be lower to begin with than what exists on the 4960 * device. This is normal and zdb should not exit with an error. The 4961 * opposite case should never happen though, the values reported in the 4962 * header should never be higher than what dump_l2arc_log_blocks() and 4963 * l2arc_rebuild() report. If this happens there is a leak in the 4964 * accounting of log blocks. 4965 */ 4966 if (l2dhdr.dh_lb_asize > rebuild.dh_lb_asize || 4967 l2dhdr.dh_lb_count > rebuild.dh_lb_count) 4968 return (1); 4969 4970 return (0); 4971 } 4972 4973 static void 4974 dump_config_from_label(zdb_label_t *label, size_t buflen, int l) 4975 { 4976 if (dump_opt['q']) 4977 return; 4978 4979 if ((dump_opt['l'] < 3) && (first_label(label->config) != l)) 4980 return; 4981 4982 print_label_header(label, l); 4983 dump_nvlist(label->config_nv, 4); 4984 print_label_numbers(" labels = ", label->config); 4985 4986 if (dump_opt['l'] >= 2) 4987 dump_nvlist_stats(label->config_nv, buflen); 4988 } 4989 4990 #define ZDB_MAX_UB_HEADER_SIZE 32 4991 4992 static void 4993 dump_label_uberblocks(zdb_label_t *label, uint64_t ashift, int label_num) 4994 { 4995 4996 vdev_t vd; 4997 char header[ZDB_MAX_UB_HEADER_SIZE]; 4998 4999 vd.vdev_ashift = ashift; 5000 vd.vdev_top = &vd; 5001 5002 for (int i = 0; i < VDEV_UBERBLOCK_COUNT(&vd); i++) { 5003 uint64_t uoff = VDEV_UBERBLOCK_OFFSET(&vd, i); 5004 uberblock_t *ub = (void *)((char *)&label->label + uoff); 5005 cksum_record_t *rec = label->uberblocks[i]; 5006 5007 if (rec == NULL) { 5008 if (dump_opt['u'] >= 2) { 5009 print_label_header(label, label_num); 5010 (void) printf(" Uberblock[%d] invalid\n", i); 5011 } 5012 continue; 5013 } 5014 5015 if ((dump_opt['u'] < 3) && (first_label(rec) != label_num)) 5016 continue; 5017 5018 if ((dump_opt['u'] < 4) && 5019 (ub->ub_mmp_magic == MMP_MAGIC) && ub->ub_mmp_delay && 5020 (i >= VDEV_UBERBLOCK_COUNT(&vd) - MMP_BLOCKS_PER_LABEL)) 5021 continue; 5022 5023 print_label_header(label, label_num); 5024 (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE, 5025 " Uberblock[%d]\n", i); 5026 dump_uberblock(ub, header, ""); 5027 print_label_numbers(" labels = ", rec); 5028 } 5029 } 5030 5031 static char curpath[PATH_MAX]; 5032 5033 /* 5034 * Iterate through the path components, recursively passing 5035 * current one's obj and remaining path until we find the obj 5036 * for the last one. 5037 */ 5038 static int 5039 dump_path_impl(objset_t *os, uint64_t obj, char *name, uint64_t *retobj) 5040 { 5041 int err; 5042 boolean_t header = B_TRUE; 5043 uint64_t child_obj; 5044 char *s; 5045 dmu_buf_t *db; 5046 dmu_object_info_t doi; 5047 5048 if ((s = strchr(name, '/')) != NULL) 5049 *s = '\0'; 5050 err = zap_lookup(os, obj, name, 8, 1, &child_obj); 5051 5052 (void) strlcat(curpath, name, sizeof (curpath)); 5053 5054 if (err != 0) { 5055 (void) fprintf(stderr, "failed to lookup %s: %s\n", 5056 curpath, strerror(err)); 5057 return (err); 5058 } 5059 5060 child_obj = ZFS_DIRENT_OBJ(child_obj); 5061 err = sa_buf_hold(os, child_obj, FTAG, &db); 5062 if (err != 0) { 5063 (void) fprintf(stderr, 5064 "failed to get SA dbuf for obj %llu: %s\n", 5065 (u_longlong_t)child_obj, strerror(err)); 5066 return (EINVAL); 5067 } 5068 dmu_object_info_from_db(db, &doi); 5069 sa_buf_rele(db, FTAG); 5070 5071 if (doi.doi_bonus_type != DMU_OT_SA && 5072 doi.doi_bonus_type != DMU_OT_ZNODE) { 5073 (void) fprintf(stderr, "invalid bonus type %d for obj %llu\n", 5074 doi.doi_bonus_type, (u_longlong_t)child_obj); 5075 return (EINVAL); 5076 } 5077 5078 if (dump_opt['v'] > 6) { 5079 (void) printf("obj=%llu %s type=%d bonustype=%d\n", 5080 (u_longlong_t)child_obj, curpath, doi.doi_type, 5081 doi.doi_bonus_type); 5082 } 5083 5084 (void) strlcat(curpath, "/", sizeof (curpath)); 5085 5086 switch (doi.doi_type) { 5087 case DMU_OT_DIRECTORY_CONTENTS: 5088 if (s != NULL && *(s + 1) != '\0') 5089 return (dump_path_impl(os, child_obj, s + 1, retobj)); 5090 zfs_fallthrough; 5091 case DMU_OT_PLAIN_FILE_CONTENTS: 5092 if (retobj != NULL) { 5093 *retobj = child_obj; 5094 } else { 5095 dump_object(os, child_obj, dump_opt['v'], &header, 5096 NULL, 0); 5097 } 5098 return (0); 5099 default: 5100 (void) fprintf(stderr, "object %llu has non-file/directory " 5101 "type %d\n", (u_longlong_t)obj, doi.doi_type); 5102 break; 5103 } 5104 5105 return (EINVAL); 5106 } 5107 5108 /* 5109 * Dump the blocks for the object specified by path inside the dataset. 5110 */ 5111 static int 5112 dump_path(char *ds, char *path, uint64_t *retobj) 5113 { 5114 int err; 5115 objset_t *os; 5116 uint64_t root_obj; 5117 5118 err = open_objset(ds, FTAG, &os); 5119 if (err != 0) 5120 return (err); 5121 5122 err = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &root_obj); 5123 if (err != 0) { 5124 (void) fprintf(stderr, "can't lookup root znode: %s\n", 5125 strerror(err)); 5126 close_objset(os, FTAG); 5127 return (EINVAL); 5128 } 5129 5130 (void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds); 5131 5132 err = dump_path_impl(os, root_obj, path, retobj); 5133 5134 close_objset(os, FTAG); 5135 return (err); 5136 } 5137 5138 static int 5139 dump_backup_bytes(objset_t *os, void *buf, int len, void *arg) 5140 { 5141 const char *p = (const char *)buf; 5142 ssize_t nwritten; 5143 5144 (void) os; 5145 (void) arg; 5146 5147 /* Write the data out, handling short writes and signals. */ 5148 while ((nwritten = write(STDOUT_FILENO, p, len)) < len) { 5149 if (nwritten < 0) { 5150 if (errno == EINTR) 5151 continue; 5152 return (errno); 5153 } 5154 p += nwritten; 5155 len -= nwritten; 5156 } 5157 5158 return (0); 5159 } 5160 5161 static void 5162 dump_backup(const char *pool, uint64_t objset_id, const char *flagstr) 5163 { 5164 boolean_t embed = B_FALSE; 5165 boolean_t large_block = B_FALSE; 5166 boolean_t compress = B_FALSE; 5167 boolean_t raw = B_FALSE; 5168 5169 const char *c; 5170 for (c = flagstr; c != NULL && *c != '\0'; c++) { 5171 switch (*c) { 5172 case 'e': 5173 embed = B_TRUE; 5174 break; 5175 case 'L': 5176 large_block = B_TRUE; 5177 break; 5178 case 'c': 5179 compress = B_TRUE; 5180 break; 5181 case 'w': 5182 raw = B_TRUE; 5183 break; 5184 default: 5185 fprintf(stderr, "dump_backup: invalid flag " 5186 "'%c'\n", *c); 5187 return; 5188 } 5189 } 5190 5191 if (isatty(STDOUT_FILENO)) { 5192 fprintf(stderr, "dump_backup: stream cannot be written " 5193 "to a terminal\n"); 5194 return; 5195 } 5196 5197 offset_t off = 0; 5198 dmu_send_outparams_t out = { 5199 .dso_outfunc = dump_backup_bytes, 5200 .dso_dryrun = B_FALSE, 5201 }; 5202 5203 int err = dmu_send_obj(pool, objset_id, /* fromsnap */0, embed, 5204 large_block, compress, raw, /* saved */ B_FALSE, STDOUT_FILENO, 5205 &off, &out); 5206 if (err != 0) { 5207 fprintf(stderr, "dump_backup: dmu_send_obj: %s\n", 5208 strerror(err)); 5209 return; 5210 } 5211 } 5212 5213 static int 5214 zdb_copy_object(objset_t *os, uint64_t srcobj, char *destfile) 5215 { 5216 int err = 0; 5217 uint64_t size, readsize, oursize, offset; 5218 ssize_t writesize; 5219 sa_handle_t *hdl; 5220 5221 (void) printf("Copying object %" PRIu64 " to file %s\n", srcobj, 5222 destfile); 5223 5224 VERIFY3P(os, ==, sa_os); 5225 if ((err = sa_handle_get(os, srcobj, NULL, SA_HDL_PRIVATE, &hdl))) { 5226 (void) printf("Failed to get handle for SA znode\n"); 5227 return (err); 5228 } 5229 if ((err = sa_lookup(hdl, sa_attr_table[ZPL_SIZE], &size, 8))) { 5230 (void) sa_handle_destroy(hdl); 5231 return (err); 5232 } 5233 (void) sa_handle_destroy(hdl); 5234 5235 (void) printf("Object %" PRIu64 " is %" PRIu64 " bytes\n", srcobj, 5236 size); 5237 if (size == 0) { 5238 return (EINVAL); 5239 } 5240 5241 int fd = open(destfile, O_WRONLY | O_CREAT | O_TRUNC, 0644); 5242 if (fd == -1) 5243 return (errno); 5244 /* 5245 * We cap the size at 1 mebibyte here to prevent 5246 * allocation failures and nigh-infinite printing if the 5247 * object is extremely large. 5248 */ 5249 oursize = MIN(size, 1 << 20); 5250 offset = 0; 5251 char *buf = kmem_alloc(oursize, KM_NOSLEEP); 5252 if (buf == NULL) { 5253 (void) close(fd); 5254 return (ENOMEM); 5255 } 5256 5257 while (offset < size) { 5258 readsize = MIN(size - offset, 1 << 20); 5259 err = dmu_read(os, srcobj, offset, readsize, buf, 0); 5260 if (err != 0) { 5261 (void) printf("got error %u from dmu_read\n", err); 5262 kmem_free(buf, oursize); 5263 (void) close(fd); 5264 return (err); 5265 } 5266 if (dump_opt['v'] > 3) { 5267 (void) printf("Read offset=%" PRIu64 " size=%" PRIu64 5268 " error=%d\n", offset, readsize, err); 5269 } 5270 5271 writesize = write(fd, buf, readsize); 5272 if (writesize < 0) { 5273 err = errno; 5274 break; 5275 } else if (writesize != readsize) { 5276 /* Incomplete write */ 5277 (void) fprintf(stderr, "Short write, only wrote %llu of" 5278 " %" PRIu64 " bytes, exiting...\n", 5279 (u_longlong_t)writesize, readsize); 5280 break; 5281 } 5282 5283 offset += readsize; 5284 } 5285 5286 (void) close(fd); 5287 5288 if (buf != NULL) 5289 kmem_free(buf, oursize); 5290 5291 return (err); 5292 } 5293 5294 static boolean_t 5295 label_cksum_valid(vdev_label_t *label, uint64_t offset) 5296 { 5297 zio_checksum_info_t *ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; 5298 zio_cksum_t expected_cksum; 5299 zio_cksum_t actual_cksum; 5300 zio_cksum_t verifier; 5301 zio_eck_t *eck; 5302 int byteswap; 5303 5304 void *data = (char *)label + offsetof(vdev_label_t, vl_vdev_phys); 5305 eck = (zio_eck_t *)((char *)(data) + VDEV_PHYS_SIZE) - 1; 5306 5307 offset += offsetof(vdev_label_t, vl_vdev_phys); 5308 ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0); 5309 5310 byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC)); 5311 if (byteswap) 5312 byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); 5313 5314 expected_cksum = eck->zec_cksum; 5315 eck->zec_cksum = verifier; 5316 5317 abd_t *abd = abd_get_from_buf(data, VDEV_PHYS_SIZE); 5318 ci->ci_func[byteswap](abd, VDEV_PHYS_SIZE, NULL, &actual_cksum); 5319 abd_free(abd); 5320 5321 if (byteswap) 5322 byteswap_uint64_array(&expected_cksum, sizeof (zio_cksum_t)); 5323 5324 if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) 5325 return (B_TRUE); 5326 5327 return (B_FALSE); 5328 } 5329 5330 static int 5331 dump_label(const char *dev) 5332 { 5333 char path[MAXPATHLEN]; 5334 zdb_label_t labels[VDEV_LABELS] = {{{{0}}}}; 5335 uint64_t psize, ashift, l2cache; 5336 struct stat64 statbuf; 5337 boolean_t config_found = B_FALSE; 5338 boolean_t error = B_FALSE; 5339 boolean_t read_l2arc_header = B_FALSE; 5340 avl_tree_t config_tree; 5341 avl_tree_t uberblock_tree; 5342 void *node, *cookie; 5343 int fd; 5344 5345 /* 5346 * Check if we were given absolute path and use it as is. 5347 * Otherwise if the provided vdev name doesn't point to a file, 5348 * try prepending expected disk paths and partition numbers. 5349 */ 5350 (void) strlcpy(path, dev, sizeof (path)); 5351 if (dev[0] != '/' && stat64(path, &statbuf) != 0) { 5352 int error; 5353 5354 error = zfs_resolve_shortname(dev, path, MAXPATHLEN); 5355 if (error == 0 && zfs_dev_is_whole_disk(path)) { 5356 if (zfs_append_partition(path, MAXPATHLEN) == -1) 5357 error = ENOENT; 5358 } 5359 5360 if (error || (stat64(path, &statbuf) != 0)) { 5361 (void) printf("failed to find device %s, try " 5362 "specifying absolute path instead\n", dev); 5363 return (1); 5364 } 5365 } 5366 5367 if ((fd = open64(path, O_RDONLY)) < 0) { 5368 (void) printf("cannot open '%s': %s\n", path, strerror(errno)); 5369 zdb_exit(1); 5370 } 5371 5372 if (fstat64_blk(fd, &statbuf) != 0) { 5373 (void) printf("failed to stat '%s': %s\n", path, 5374 strerror(errno)); 5375 (void) close(fd); 5376 zdb_exit(1); 5377 } 5378 5379 if (S_ISBLK(statbuf.st_mode) && zfs_dev_flush(fd) != 0) 5380 (void) printf("failed to invalidate cache '%s' : %s\n", path, 5381 strerror(errno)); 5382 5383 avl_create(&config_tree, cksum_record_compare, 5384 sizeof (cksum_record_t), offsetof(cksum_record_t, link)); 5385 avl_create(&uberblock_tree, cksum_record_compare, 5386 sizeof (cksum_record_t), offsetof(cksum_record_t, link)); 5387 5388 psize = statbuf.st_size; 5389 psize = P2ALIGN_TYPED(psize, sizeof (vdev_label_t), uint64_t); 5390 ashift = SPA_MINBLOCKSHIFT; 5391 5392 /* 5393 * 1. Read the label from disk 5394 * 2. Verify label cksum 5395 * 3. Unpack the configuration and insert in config tree. 5396 * 4. Traverse all uberblocks and insert in uberblock tree. 5397 */ 5398 for (int l = 0; l < VDEV_LABELS; l++) { 5399 zdb_label_t *label = &labels[l]; 5400 char *buf = label->label.vl_vdev_phys.vp_nvlist; 5401 size_t buflen = sizeof (label->label.vl_vdev_phys.vp_nvlist); 5402 nvlist_t *config; 5403 cksum_record_t *rec; 5404 zio_cksum_t cksum; 5405 vdev_t vd; 5406 5407 label->label_offset = vdev_label_offset(psize, l, 0); 5408 5409 if (pread64(fd, &label->label, sizeof (label->label), 5410 label->label_offset) != sizeof (label->label)) { 5411 if (!dump_opt['q']) 5412 (void) printf("failed to read label %d\n", l); 5413 label->read_failed = B_TRUE; 5414 error = B_TRUE; 5415 continue; 5416 } 5417 5418 label->read_failed = B_FALSE; 5419 label->cksum_valid = label_cksum_valid(&label->label, 5420 label->label_offset); 5421 5422 if (nvlist_unpack(buf, buflen, &config, 0) == 0) { 5423 nvlist_t *vdev_tree = NULL; 5424 size_t size; 5425 5426 if ((nvlist_lookup_nvlist(config, 5427 ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) || 5428 (nvlist_lookup_uint64(vdev_tree, 5429 ZPOOL_CONFIG_ASHIFT, &ashift) != 0)) 5430 ashift = SPA_MINBLOCKSHIFT; 5431 5432 if (nvlist_size(config, &size, NV_ENCODE_XDR) != 0) 5433 size = buflen; 5434 5435 /* If the device is a cache device read the header. */ 5436 if (!read_l2arc_header) { 5437 if (nvlist_lookup_uint64(config, 5438 ZPOOL_CONFIG_POOL_STATE, &l2cache) == 0 && 5439 l2cache == POOL_STATE_L2CACHE) { 5440 read_l2arc_header = B_TRUE; 5441 } 5442 } 5443 5444 fletcher_4_native_varsize(buf, size, &cksum); 5445 rec = cksum_record_insert(&config_tree, &cksum, l); 5446 5447 label->config = rec; 5448 label->config_nv = config; 5449 config_found = B_TRUE; 5450 } else { 5451 error = B_TRUE; 5452 } 5453 5454 vd.vdev_ashift = ashift; 5455 vd.vdev_top = &vd; 5456 5457 for (int i = 0; i < VDEV_UBERBLOCK_COUNT(&vd); i++) { 5458 uint64_t uoff = VDEV_UBERBLOCK_OFFSET(&vd, i); 5459 uberblock_t *ub = (void *)((char *)label + uoff); 5460 5461 if (uberblock_verify(ub)) 5462 continue; 5463 5464 fletcher_4_native_varsize(ub, sizeof (*ub), &cksum); 5465 rec = cksum_record_insert(&uberblock_tree, &cksum, l); 5466 5467 label->uberblocks[i] = rec; 5468 } 5469 } 5470 5471 /* 5472 * Dump the label and uberblocks. 5473 */ 5474 for (int l = 0; l < VDEV_LABELS; l++) { 5475 zdb_label_t *label = &labels[l]; 5476 size_t buflen = sizeof (label->label.vl_vdev_phys.vp_nvlist); 5477 5478 if (label->read_failed == B_TRUE) 5479 continue; 5480 5481 if (label->config_nv) { 5482 dump_config_from_label(label, buflen, l); 5483 } else { 5484 if (!dump_opt['q']) 5485 (void) printf("failed to unpack label %d\n", l); 5486 } 5487 5488 if (dump_opt['u']) 5489 dump_label_uberblocks(label, ashift, l); 5490 5491 nvlist_free(label->config_nv); 5492 } 5493 5494 /* 5495 * Dump the L2ARC header, if existent. 5496 */ 5497 if (read_l2arc_header) 5498 error |= dump_l2arc_header(fd); 5499 5500 cookie = NULL; 5501 while ((node = avl_destroy_nodes(&config_tree, &cookie)) != NULL) 5502 umem_free(node, sizeof (cksum_record_t)); 5503 5504 cookie = NULL; 5505 while ((node = avl_destroy_nodes(&uberblock_tree, &cookie)) != NULL) 5506 umem_free(node, sizeof (cksum_record_t)); 5507 5508 avl_destroy(&config_tree); 5509 avl_destroy(&uberblock_tree); 5510 5511 (void) close(fd); 5512 5513 return (config_found == B_FALSE ? 2 : 5514 (error == B_TRUE ? 1 : 0)); 5515 } 5516 5517 static uint64_t dataset_feature_count[SPA_FEATURES]; 5518 static uint64_t global_feature_count[SPA_FEATURES]; 5519 static uint64_t remap_deadlist_count = 0; 5520 5521 static int 5522 dump_one_objset(const char *dsname, void *arg) 5523 { 5524 (void) arg; 5525 int error; 5526 objset_t *os; 5527 spa_feature_t f; 5528 5529 error = open_objset(dsname, FTAG, &os); 5530 if (error != 0) 5531 return (0); 5532 5533 for (f = 0; f < SPA_FEATURES; f++) { 5534 if (!dsl_dataset_feature_is_active(dmu_objset_ds(os), f)) 5535 continue; 5536 ASSERT(spa_feature_table[f].fi_flags & 5537 ZFEATURE_FLAG_PER_DATASET); 5538 dataset_feature_count[f]++; 5539 } 5540 5541 if (dsl_dataset_remap_deadlist_exists(dmu_objset_ds(os))) { 5542 remap_deadlist_count++; 5543 } 5544 5545 for (dsl_bookmark_node_t *dbn = 5546 avl_first(&dmu_objset_ds(os)->ds_bookmarks); dbn != NULL; 5547 dbn = AVL_NEXT(&dmu_objset_ds(os)->ds_bookmarks, dbn)) { 5548 mos_obj_refd(dbn->dbn_phys.zbm_redaction_obj); 5549 if (dbn->dbn_phys.zbm_redaction_obj != 0) { 5550 global_feature_count[ 5551 SPA_FEATURE_REDACTION_BOOKMARKS]++; 5552 objset_t *mos = os->os_spa->spa_meta_objset; 5553 dnode_t *rl; 5554 VERIFY0(dnode_hold(mos, 5555 dbn->dbn_phys.zbm_redaction_obj, FTAG, &rl)); 5556 if (rl->dn_have_spill) { 5557 global_feature_count[ 5558 SPA_FEATURE_REDACTION_LIST_SPILL]++; 5559 } 5560 } 5561 if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) 5562 global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN]++; 5563 } 5564 5565 if (dsl_deadlist_is_open(&dmu_objset_ds(os)->ds_dir->dd_livelist) && 5566 !dmu_objset_is_snapshot(os)) { 5567 global_feature_count[SPA_FEATURE_LIVELIST]++; 5568 } 5569 5570 dump_objset(os); 5571 close_objset(os, FTAG); 5572 fuid_table_destroy(); 5573 return (0); 5574 } 5575 5576 /* 5577 * Block statistics. 5578 */ 5579 #define PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2) 5580 typedef struct zdb_blkstats { 5581 uint64_t zb_asize; 5582 uint64_t zb_lsize; 5583 uint64_t zb_psize; 5584 uint64_t zb_count; 5585 uint64_t zb_gangs; 5586 uint64_t zb_ditto_samevdev; 5587 uint64_t zb_ditto_same_ms; 5588 uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE]; 5589 } zdb_blkstats_t; 5590 5591 /* 5592 * Extended object types to report deferred frees and dedup auto-ditto blocks. 5593 */ 5594 #define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0) 5595 #define ZDB_OT_DITTO (DMU_OT_NUMTYPES + 1) 5596 #define ZDB_OT_OTHER (DMU_OT_NUMTYPES + 2) 5597 #define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 3) 5598 5599 static const char *zdb_ot_extname[] = { 5600 "deferred free", 5601 "dedup ditto", 5602 "other", 5603 "Total", 5604 }; 5605 5606 #define ZB_TOTAL DN_MAX_LEVELS 5607 #define SPA_MAX_FOR_16M (SPA_MAXBLOCKSHIFT+1) 5608 5609 typedef struct zdb_brt_entry { 5610 dva_t zbre_dva; 5611 uint64_t zbre_refcount; 5612 avl_node_t zbre_node; 5613 } zdb_brt_entry_t; 5614 5615 typedef struct zdb_cb { 5616 zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1]; 5617 uint64_t zcb_removing_size; 5618 uint64_t zcb_checkpoint_size; 5619 uint64_t zcb_dedup_asize; 5620 uint64_t zcb_dedup_blocks; 5621 uint64_t zcb_clone_asize; 5622 uint64_t zcb_clone_blocks; 5623 uint64_t zcb_psize_count[SPA_MAX_FOR_16M]; 5624 uint64_t zcb_lsize_count[SPA_MAX_FOR_16M]; 5625 uint64_t zcb_asize_count[SPA_MAX_FOR_16M]; 5626 uint64_t zcb_psize_len[SPA_MAX_FOR_16M]; 5627 uint64_t zcb_lsize_len[SPA_MAX_FOR_16M]; 5628 uint64_t zcb_asize_len[SPA_MAX_FOR_16M]; 5629 uint64_t zcb_psize_total; 5630 uint64_t zcb_lsize_total; 5631 uint64_t zcb_asize_total; 5632 uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES]; 5633 uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES] 5634 [BPE_PAYLOAD_SIZE + 1]; 5635 uint64_t zcb_start; 5636 hrtime_t zcb_lastprint; 5637 uint64_t zcb_totalasize; 5638 uint64_t zcb_errors[256]; 5639 int zcb_readfails; 5640 int zcb_haderrors; 5641 spa_t *zcb_spa; 5642 uint32_t **zcb_vd_obsolete_counts; 5643 avl_tree_t zcb_brt; 5644 boolean_t zcb_brt_is_active; 5645 } zdb_cb_t; 5646 5647 /* test if two DVA offsets from same vdev are within the same metaslab */ 5648 static boolean_t 5649 same_metaslab(spa_t *spa, uint64_t vdev, uint64_t off1, uint64_t off2) 5650 { 5651 vdev_t *vd = vdev_lookup_top(spa, vdev); 5652 uint64_t ms_shift = vd->vdev_ms_shift; 5653 5654 return ((off1 >> ms_shift) == (off2 >> ms_shift)); 5655 } 5656 5657 /* 5658 * Used to simplify reporting of the histogram data. 5659 */ 5660 typedef struct one_histo { 5661 const char *name; 5662 uint64_t *count; 5663 uint64_t *len; 5664 uint64_t cumulative; 5665 } one_histo_t; 5666 5667 /* 5668 * The number of separate histograms processed for psize, lsize and asize. 5669 */ 5670 #define NUM_HISTO 3 5671 5672 /* 5673 * This routine will create a fixed column size output of three different 5674 * histograms showing by blocksize of 512 - 2^ SPA_MAX_FOR_16M 5675 * the count, length and cumulative length of the psize, lsize and 5676 * asize blocks. 5677 * 5678 * All three types of blocks are listed on a single line 5679 * 5680 * By default the table is printed in nicenumber format (e.g. 123K) but 5681 * if the '-P' parameter is specified then the full raw number (parseable) 5682 * is printed out. 5683 */ 5684 static void 5685 dump_size_histograms(zdb_cb_t *zcb) 5686 { 5687 /* 5688 * A temporary buffer that allows us to convert a number into 5689 * a string using zdb_nicenumber to allow either raw or human 5690 * readable numbers to be output. 5691 */ 5692 char numbuf[32]; 5693 5694 /* 5695 * Define titles which are used in the headers of the tables 5696 * printed by this routine. 5697 */ 5698 const char blocksize_title1[] = "block"; 5699 const char blocksize_title2[] = "size"; 5700 const char count_title[] = "Count"; 5701 const char length_title[] = "Size"; 5702 const char cumulative_title[] = "Cum."; 5703 5704 /* 5705 * Setup the histogram arrays (psize, lsize, and asize). 5706 */ 5707 one_histo_t parm_histo[NUM_HISTO]; 5708 5709 parm_histo[0].name = "psize"; 5710 parm_histo[0].count = zcb->zcb_psize_count; 5711 parm_histo[0].len = zcb->zcb_psize_len; 5712 parm_histo[0].cumulative = 0; 5713 5714 parm_histo[1].name = "lsize"; 5715 parm_histo[1].count = zcb->zcb_lsize_count; 5716 parm_histo[1].len = zcb->zcb_lsize_len; 5717 parm_histo[1].cumulative = 0; 5718 5719 parm_histo[2].name = "asize"; 5720 parm_histo[2].count = zcb->zcb_asize_count; 5721 parm_histo[2].len = zcb->zcb_asize_len; 5722 parm_histo[2].cumulative = 0; 5723 5724 5725 (void) printf("\nBlock Size Histogram\n"); 5726 /* 5727 * Print the first line titles 5728 */ 5729 if (dump_opt['P']) 5730 (void) printf("\n%s\t", blocksize_title1); 5731 else 5732 (void) printf("\n%7s ", blocksize_title1); 5733 5734 for (int j = 0; j < NUM_HISTO; j++) { 5735 if (dump_opt['P']) { 5736 if (j < NUM_HISTO - 1) { 5737 (void) printf("%s\t\t\t", parm_histo[j].name); 5738 } else { 5739 /* Don't print trailing spaces */ 5740 (void) printf(" %s", parm_histo[j].name); 5741 } 5742 } else { 5743 if (j < NUM_HISTO - 1) { 5744 /* Left aligned strings in the output */ 5745 (void) printf("%-7s ", 5746 parm_histo[j].name); 5747 } else { 5748 /* Don't print trailing spaces */ 5749 (void) printf("%s", parm_histo[j].name); 5750 } 5751 } 5752 } 5753 (void) printf("\n"); 5754 5755 /* 5756 * Print the second line titles 5757 */ 5758 if (dump_opt['P']) { 5759 (void) printf("%s\t", blocksize_title2); 5760 } else { 5761 (void) printf("%7s ", blocksize_title2); 5762 } 5763 5764 for (int i = 0; i < NUM_HISTO; i++) { 5765 if (dump_opt['P']) { 5766 (void) printf("%s\t%s\t%s\t", 5767 count_title, length_title, cumulative_title); 5768 } else { 5769 (void) printf("%7s%7s%7s", 5770 count_title, length_title, cumulative_title); 5771 } 5772 } 5773 (void) printf("\n"); 5774 5775 /* 5776 * Print the rows 5777 */ 5778 for (int i = SPA_MINBLOCKSHIFT; i < SPA_MAX_FOR_16M; i++) { 5779 5780 /* 5781 * Print the first column showing the blocksize 5782 */ 5783 zdb_nicenum((1ULL << i), numbuf, sizeof (numbuf)); 5784 5785 if (dump_opt['P']) { 5786 printf("%s", numbuf); 5787 } else { 5788 printf("%7s:", numbuf); 5789 } 5790 5791 /* 5792 * Print the remaining set of 3 columns per size: 5793 * for psize, lsize and asize 5794 */ 5795 for (int j = 0; j < NUM_HISTO; j++) { 5796 parm_histo[j].cumulative += parm_histo[j].len[i]; 5797 5798 zdb_nicenum(parm_histo[j].count[i], 5799 numbuf, sizeof (numbuf)); 5800 if (dump_opt['P']) 5801 (void) printf("\t%s", numbuf); 5802 else 5803 (void) printf("%7s", numbuf); 5804 5805 zdb_nicenum(parm_histo[j].len[i], 5806 numbuf, sizeof (numbuf)); 5807 if (dump_opt['P']) 5808 (void) printf("\t%s", numbuf); 5809 else 5810 (void) printf("%7s", numbuf); 5811 5812 zdb_nicenum(parm_histo[j].cumulative, 5813 numbuf, sizeof (numbuf)); 5814 if (dump_opt['P']) 5815 (void) printf("\t%s", numbuf); 5816 else 5817 (void) printf("%7s", numbuf); 5818 } 5819 (void) printf("\n"); 5820 } 5821 } 5822 5823 static void 5824 zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, 5825 dmu_object_type_t type) 5826 { 5827 int i; 5828 5829 ASSERT(type < ZDB_OT_TOTAL); 5830 5831 if (zilog && zil_bp_tree_add(zilog, bp) != 0) 5832 return; 5833 5834 /* 5835 * This flag controls if we will issue a claim for the block while 5836 * counting it, to ensure that all blocks are referenced in space maps. 5837 * We don't issue claims if we're not doing leak tracking, because it's 5838 * expensive if the user isn't interested. We also don't claim the 5839 * second or later occurences of cloned or dedup'd blocks, because we 5840 * already claimed them the first time. 5841 */ 5842 boolean_t do_claim = !dump_opt['L']; 5843 5844 spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER); 5845 5846 blkptr_t tempbp; 5847 if (BP_GET_DEDUP(bp)) { 5848 /* 5849 * Dedup'd blocks are special. We need to count them, so we can 5850 * later uncount them when reporting leaked space, and we must 5851 * only claim them once. 5852 * 5853 * We use the existing dedup system to track what we've seen. 5854 * The first time we see a block, we do a ddt_lookup() to see 5855 * if it exists in the DDT. If we're doing leak tracking, we 5856 * claim the block at this time. 5857 * 5858 * Each time we see a block, we reduce the refcount in the 5859 * entry by one, and add to the size and count of dedup'd 5860 * blocks to report at the end. 5861 */ 5862 5863 ddt_t *ddt = ddt_select(zcb->zcb_spa, bp); 5864 5865 ddt_enter(ddt); 5866 5867 /* 5868 * Find the block. This will create the entry in memory, but 5869 * we'll know if that happened by its refcount. 5870 */ 5871 ddt_entry_t *dde = ddt_lookup(ddt, bp, B_TRUE); 5872 5873 /* 5874 * ddt_lookup() can return NULL if this block didn't exist 5875 * in the DDT and creating it would take the DDT over its 5876 * quota. Since we got the block from disk, it must exist in 5877 * the DDT, so this can't happen. However, when unique entries 5878 * are pruned, the dedup bit can be set with no corresponding 5879 * entry in the DDT. 5880 */ 5881 if (dde == NULL) { 5882 ddt_exit(ddt); 5883 goto skipped; 5884 } 5885 5886 /* Get the phys for this variant */ 5887 ddt_phys_variant_t v = ddt_phys_select(ddt, dde, bp); 5888 5889 /* 5890 * This entry may have multiple sets of DVAs. We must claim 5891 * each set the first time we see them in a real block on disk, 5892 * or count them on subsequent occurences. We don't have a 5893 * convenient way to track the first time we see each variant, 5894 * so we repurpose dde_io as a set of "seen" flag bits. We can 5895 * do this safely in zdb because it never writes, so it will 5896 * never have a writing zio for this block in that pointer. 5897 */ 5898 boolean_t seen = !!(((uintptr_t)dde->dde_io) & (1 << v)); 5899 if (!seen) 5900 dde->dde_io = 5901 (void *)(((uintptr_t)dde->dde_io) | (1 << v)); 5902 5903 /* Consume a reference for this block. */ 5904 if (ddt_phys_total_refcnt(ddt, dde->dde_phys) > 0) 5905 ddt_phys_decref(dde->dde_phys, v); 5906 5907 /* 5908 * If this entry has a single flat phys, it may have been 5909 * extended with additional DVAs at some time in its life. 5910 * This block might be from before it was fully extended, and 5911 * so have fewer DVAs. 5912 * 5913 * If this is the first time we've seen this block, and we 5914 * claimed it as-is, then we would miss the claim on some 5915 * number of DVAs, which would then be seen as leaked. 5916 * 5917 * In all cases, if we've had fewer DVAs, then the asize would 5918 * be too small, and would lead to the pool apparently using 5919 * more space than allocated. 5920 * 5921 * To handle this, we copy the canonical set of DVAs from the 5922 * entry back to the block pointer before we claim it. 5923 */ 5924 if (v == DDT_PHYS_FLAT) { 5925 ASSERT3U(BP_GET_PHYSICAL_BIRTH(bp), ==, 5926 ddt_phys_birth(dde->dde_phys, v)); 5927 tempbp = *bp; 5928 ddt_bp_fill(dde->dde_phys, v, &tempbp, 5929 BP_GET_PHYSICAL_BIRTH(bp)); 5930 bp = &tempbp; 5931 } 5932 5933 if (seen) { 5934 /* 5935 * The second or later time we see this block, 5936 * it's a duplicate and we count it. 5937 */ 5938 zcb->zcb_dedup_asize += BP_GET_ASIZE(bp); 5939 zcb->zcb_dedup_blocks++; 5940 5941 /* Already claimed, don't do it again. */ 5942 do_claim = B_FALSE; 5943 } 5944 5945 ddt_exit(ddt); 5946 } else if (zcb->zcb_brt_is_active && 5947 brt_maybe_exists(zcb->zcb_spa, bp)) { 5948 /* 5949 * Cloned blocks are special. We need to count them, so we can 5950 * later uncount them when reporting leaked space, and we must 5951 * only claim them once. 5952 * 5953 * To do this, we keep our own in-memory BRT. For each block 5954 * we haven't seen before, we look it up in the real BRT and 5955 * if its there, we note it and its refcount then proceed as 5956 * normal. If we see the block again, we count it as a clone 5957 * and then give it no further consideration. 5958 */ 5959 zdb_brt_entry_t zbre_search, *zbre; 5960 avl_index_t where; 5961 5962 zbre_search.zbre_dva = bp->blk_dva[0]; 5963 zbre = avl_find(&zcb->zcb_brt, &zbre_search, &where); 5964 if (zbre == NULL) { 5965 /* Not seen before; track it */ 5966 uint64_t refcnt = 5967 brt_entry_get_refcount(zcb->zcb_spa, bp); 5968 if (refcnt > 0) { 5969 zbre = umem_zalloc(sizeof (zdb_brt_entry_t), 5970 UMEM_NOFAIL); 5971 zbre->zbre_dva = bp->blk_dva[0]; 5972 zbre->zbre_refcount = refcnt; 5973 avl_insert(&zcb->zcb_brt, zbre, where); 5974 } 5975 } else { 5976 /* 5977 * Second or later occurrence, count it and take a 5978 * refcount. 5979 */ 5980 zcb->zcb_clone_asize += BP_GET_ASIZE(bp); 5981 zcb->zcb_clone_blocks++; 5982 5983 zbre->zbre_refcount--; 5984 if (zbre->zbre_refcount == 0) { 5985 avl_remove(&zcb->zcb_brt, zbre); 5986 umem_free(zbre, sizeof (zdb_brt_entry_t)); 5987 } 5988 5989 /* Already claimed, don't do it again. */ 5990 do_claim = B_FALSE; 5991 } 5992 } 5993 5994 skipped: 5995 for (i = 0; i < 4; i++) { 5996 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; 5997 int t = (i & 1) ? type : ZDB_OT_TOTAL; 5998 int equal; 5999 zdb_blkstats_t *zb = &zcb->zcb_type[l][t]; 6000 6001 zb->zb_asize += BP_GET_ASIZE(bp); 6002 zb->zb_lsize += BP_GET_LSIZE(bp); 6003 zb->zb_psize += BP_GET_PSIZE(bp); 6004 zb->zb_count++; 6005 6006 /* 6007 * The histogram is only big enough to record blocks up to 6008 * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last, 6009 * "other", bucket. 6010 */ 6011 unsigned idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT; 6012 idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1); 6013 zb->zb_psize_histogram[idx]++; 6014 6015 zb->zb_gangs += BP_COUNT_GANG(bp); 6016 6017 switch (BP_GET_NDVAS(bp)) { 6018 case 2: 6019 if (DVA_GET_VDEV(&bp->blk_dva[0]) == 6020 DVA_GET_VDEV(&bp->blk_dva[1])) { 6021 zb->zb_ditto_samevdev++; 6022 6023 if (same_metaslab(zcb->zcb_spa, 6024 DVA_GET_VDEV(&bp->blk_dva[0]), 6025 DVA_GET_OFFSET(&bp->blk_dva[0]), 6026 DVA_GET_OFFSET(&bp->blk_dva[1]))) 6027 zb->zb_ditto_same_ms++; 6028 } 6029 break; 6030 case 3: 6031 equal = (DVA_GET_VDEV(&bp->blk_dva[0]) == 6032 DVA_GET_VDEV(&bp->blk_dva[1])) + 6033 (DVA_GET_VDEV(&bp->blk_dva[0]) == 6034 DVA_GET_VDEV(&bp->blk_dva[2])) + 6035 (DVA_GET_VDEV(&bp->blk_dva[1]) == 6036 DVA_GET_VDEV(&bp->blk_dva[2])); 6037 if (equal != 0) { 6038 zb->zb_ditto_samevdev++; 6039 6040 if (DVA_GET_VDEV(&bp->blk_dva[0]) == 6041 DVA_GET_VDEV(&bp->blk_dva[1]) && 6042 same_metaslab(zcb->zcb_spa, 6043 DVA_GET_VDEV(&bp->blk_dva[0]), 6044 DVA_GET_OFFSET(&bp->blk_dva[0]), 6045 DVA_GET_OFFSET(&bp->blk_dva[1]))) 6046 zb->zb_ditto_same_ms++; 6047 else if (DVA_GET_VDEV(&bp->blk_dva[0]) == 6048 DVA_GET_VDEV(&bp->blk_dva[2]) && 6049 same_metaslab(zcb->zcb_spa, 6050 DVA_GET_VDEV(&bp->blk_dva[0]), 6051 DVA_GET_OFFSET(&bp->blk_dva[0]), 6052 DVA_GET_OFFSET(&bp->blk_dva[2]))) 6053 zb->zb_ditto_same_ms++; 6054 else if (DVA_GET_VDEV(&bp->blk_dva[1]) == 6055 DVA_GET_VDEV(&bp->blk_dva[2]) && 6056 same_metaslab(zcb->zcb_spa, 6057 DVA_GET_VDEV(&bp->blk_dva[1]), 6058 DVA_GET_OFFSET(&bp->blk_dva[1]), 6059 DVA_GET_OFFSET(&bp->blk_dva[2]))) 6060 zb->zb_ditto_same_ms++; 6061 } 6062 break; 6063 } 6064 } 6065 6066 spa_config_exit(zcb->zcb_spa, SCL_CONFIG, FTAG); 6067 6068 if (BP_IS_EMBEDDED(bp)) { 6069 zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++; 6070 zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)] 6071 [BPE_GET_PSIZE(bp)]++; 6072 return; 6073 } 6074 /* 6075 * The binning histogram bins by powers of two up to 6076 * SPA_MAXBLOCKSIZE rather than creating bins for 6077 * every possible blocksize found in the pool. 6078 */ 6079 int bin = highbit64(BP_GET_PSIZE(bp)) - 1; 6080 6081 zcb->zcb_psize_count[bin]++; 6082 zcb->zcb_psize_len[bin] += BP_GET_PSIZE(bp); 6083 zcb->zcb_psize_total += BP_GET_PSIZE(bp); 6084 6085 bin = highbit64(BP_GET_LSIZE(bp)) - 1; 6086 6087 zcb->zcb_lsize_count[bin]++; 6088 zcb->zcb_lsize_len[bin] += BP_GET_LSIZE(bp); 6089 zcb->zcb_lsize_total += BP_GET_LSIZE(bp); 6090 6091 bin = highbit64(BP_GET_ASIZE(bp)) - 1; 6092 6093 zcb->zcb_asize_count[bin]++; 6094 zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp); 6095 zcb->zcb_asize_total += BP_GET_ASIZE(bp); 6096 6097 if (!do_claim) 6098 return; 6099 6100 VERIFY0(zio_wait(zio_claim(NULL, zcb->zcb_spa, 6101 spa_min_claim_txg(zcb->zcb_spa), bp, NULL, NULL, 6102 ZIO_FLAG_CANFAIL))); 6103 } 6104 6105 static void 6106 zdb_blkptr_done(zio_t *zio) 6107 { 6108 spa_t *spa = zio->io_spa; 6109 blkptr_t *bp = zio->io_bp; 6110 int ioerr = zio->io_error; 6111 zdb_cb_t *zcb = zio->io_private; 6112 zbookmark_phys_t *zb = &zio->io_bookmark; 6113 6114 mutex_enter(&spa->spa_scrub_lock); 6115 spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp); 6116 cv_broadcast(&spa->spa_scrub_io_cv); 6117 6118 if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { 6119 char blkbuf[BP_SPRINTF_LEN]; 6120 6121 zcb->zcb_haderrors = 1; 6122 zcb->zcb_errors[ioerr]++; 6123 6124 if (dump_opt['b'] >= 2) 6125 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 6126 else 6127 blkbuf[0] = '\0'; 6128 6129 (void) printf("zdb_blkptr_cb: " 6130 "Got error %d reading " 6131 "<%llu, %llu, %lld, %llx> %s -- skipping\n", 6132 ioerr, 6133 (u_longlong_t)zb->zb_objset, 6134 (u_longlong_t)zb->zb_object, 6135 (u_longlong_t)zb->zb_level, 6136 (u_longlong_t)zb->zb_blkid, 6137 blkbuf); 6138 } 6139 mutex_exit(&spa->spa_scrub_lock); 6140 6141 abd_free(zio->io_abd); 6142 } 6143 6144 static int 6145 zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 6146 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 6147 { 6148 zdb_cb_t *zcb = arg; 6149 dmu_object_type_t type; 6150 boolean_t is_metadata; 6151 6152 if (zb->zb_level == ZB_DNODE_LEVEL) 6153 return (0); 6154 6155 if (dump_opt['b'] >= 5 && BP_GET_BIRTH(bp) > 0) { 6156 char blkbuf[BP_SPRINTF_LEN]; 6157 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 6158 (void) printf("objset %llu object %llu " 6159 "level %lld offset 0x%llx %s\n", 6160 (u_longlong_t)zb->zb_objset, 6161 (u_longlong_t)zb->zb_object, 6162 (longlong_t)zb->zb_level, 6163 (u_longlong_t)blkid2offset(dnp, bp, zb), 6164 blkbuf); 6165 } 6166 6167 if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp)) 6168 return (0); 6169 6170 type = BP_GET_TYPE(bp); 6171 6172 zdb_count_block(zcb, zilog, bp, 6173 (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type); 6174 6175 is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)); 6176 6177 if (!BP_IS_EMBEDDED(bp) && 6178 (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) { 6179 size_t size = BP_GET_PSIZE(bp); 6180 abd_t *abd = abd_alloc(size, B_FALSE); 6181 int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW; 6182 6183 /* If it's an intent log block, failure is expected. */ 6184 if (zb->zb_level == ZB_ZIL_LEVEL) 6185 flags |= ZIO_FLAG_SPECULATIVE; 6186 6187 mutex_enter(&spa->spa_scrub_lock); 6188 while (spa->spa_load_verify_bytes > max_inflight_bytes) 6189 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 6190 spa->spa_load_verify_bytes += size; 6191 mutex_exit(&spa->spa_scrub_lock); 6192 6193 zio_nowait(zio_read(NULL, spa, bp, abd, size, 6194 zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb)); 6195 } 6196 6197 zcb->zcb_readfails = 0; 6198 6199 /* only call gethrtime() every 100 blocks */ 6200 static int iters; 6201 if (++iters > 100) 6202 iters = 0; 6203 else 6204 return (0); 6205 6206 if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) { 6207 uint64_t now = gethrtime(); 6208 char buf[10]; 6209 uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize; 6210 uint64_t kb_per_sec = 6211 1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000)); 6212 uint64_t sec_remaining = 6213 (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec; 6214 6215 /* make sure nicenum has enough space */ 6216 _Static_assert(sizeof (buf) >= NN_NUMBUF_SZ, "buf truncated"); 6217 6218 zfs_nicebytes(bytes, buf, sizeof (buf)); 6219 (void) fprintf(stderr, 6220 "\r%5s completed (%4"PRIu64"MB/s) " 6221 "estimated time remaining: " 6222 "%"PRIu64"hr %02"PRIu64"min %02"PRIu64"sec ", 6223 buf, kb_per_sec / 1024, 6224 sec_remaining / 60 / 60, 6225 sec_remaining / 60 % 60, 6226 sec_remaining % 60); 6227 6228 zcb->zcb_lastprint = now; 6229 } 6230 6231 return (0); 6232 } 6233 6234 static void 6235 zdb_leak(void *arg, uint64_t start, uint64_t size) 6236 { 6237 vdev_t *vd = arg; 6238 6239 (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n", 6240 (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size); 6241 } 6242 6243 static metaslab_ops_t zdb_metaslab_ops = { 6244 NULL /* alloc */ 6245 }; 6246 6247 static int 6248 load_unflushed_svr_segs_cb(spa_t *spa, space_map_entry_t *sme, 6249 uint64_t txg, void *arg) 6250 { 6251 spa_vdev_removal_t *svr = arg; 6252 6253 uint64_t offset = sme->sme_offset; 6254 uint64_t size = sme->sme_run; 6255 6256 /* skip vdevs we don't care about */ 6257 if (sme->sme_vdev != svr->svr_vdev_id) 6258 return (0); 6259 6260 vdev_t *vd = vdev_lookup_top(spa, sme->sme_vdev); 6261 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 6262 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE); 6263 6264 if (txg < metaslab_unflushed_txg(ms)) 6265 return (0); 6266 6267 if (sme->sme_type == SM_ALLOC) 6268 zfs_range_tree_add(svr->svr_allocd_segs, offset, size); 6269 else 6270 zfs_range_tree_remove(svr->svr_allocd_segs, offset, size); 6271 6272 return (0); 6273 } 6274 6275 static void 6276 claim_segment_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset, 6277 uint64_t size, void *arg) 6278 { 6279 (void) inner_offset, (void) arg; 6280 6281 /* 6282 * This callback was called through a remap from 6283 * a device being removed. Therefore, the vdev that 6284 * this callback is applied to is a concrete 6285 * vdev. 6286 */ 6287 ASSERT(vdev_is_concrete(vd)); 6288 6289 VERIFY0(metaslab_claim_impl(vd, offset, size, 6290 spa_min_claim_txg(vd->vdev_spa))); 6291 } 6292 6293 static void 6294 claim_segment_cb(void *arg, uint64_t offset, uint64_t size) 6295 { 6296 vdev_t *vd = arg; 6297 6298 vdev_indirect_ops.vdev_op_remap(vd, offset, size, 6299 claim_segment_impl_cb, NULL); 6300 } 6301 6302 /* 6303 * After accounting for all allocated blocks that are directly referenced, 6304 * we might have missed a reference to a block from a partially complete 6305 * (and thus unused) indirect mapping object. We perform a secondary pass 6306 * through the metaslabs we have already mapped and claim the destination 6307 * blocks. 6308 */ 6309 static void 6310 zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb) 6311 { 6312 if (dump_opt['L']) 6313 return; 6314 6315 if (spa->spa_vdev_removal == NULL) 6316 return; 6317 6318 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 6319 6320 spa_vdev_removal_t *svr = spa->spa_vdev_removal; 6321 vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id); 6322 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 6323 6324 ASSERT0(zfs_range_tree_space(svr->svr_allocd_segs)); 6325 6326 zfs_range_tree_t *allocs = zfs_range_tree_create_flags( 6327 NULL, ZFS_RANGE_SEG64, NULL, 0, 0, 6328 0, "zdb_claim_removing:allocs"); 6329 for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) { 6330 metaslab_t *msp = vd->vdev_ms[msi]; 6331 6332 ASSERT0(zfs_range_tree_space(allocs)); 6333 if (msp->ms_sm != NULL) 6334 VERIFY0(space_map_load(msp->ms_sm, allocs, SM_ALLOC)); 6335 zfs_range_tree_vacate(allocs, zfs_range_tree_add, 6336 svr->svr_allocd_segs); 6337 } 6338 zfs_range_tree_destroy(allocs); 6339 6340 iterate_through_spacemap_logs(spa, load_unflushed_svr_segs_cb, svr); 6341 6342 /* 6343 * Clear everything past what has been synced, 6344 * because we have not allocated mappings for 6345 * it yet. 6346 */ 6347 zfs_range_tree_clear(svr->svr_allocd_segs, 6348 vdev_indirect_mapping_max_offset(vim), 6349 vd->vdev_asize - vdev_indirect_mapping_max_offset(vim)); 6350 6351 zcb->zcb_removing_size += zfs_range_tree_space(svr->svr_allocd_segs); 6352 zfs_range_tree_vacate(svr->svr_allocd_segs, claim_segment_cb, vd); 6353 6354 spa_config_exit(spa, SCL_CONFIG, FTAG); 6355 } 6356 6357 static int 6358 increment_indirect_mapping_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, 6359 dmu_tx_t *tx) 6360 { 6361 (void) tx; 6362 zdb_cb_t *zcb = arg; 6363 spa_t *spa = zcb->zcb_spa; 6364 vdev_t *vd; 6365 const dva_t *dva = &bp->blk_dva[0]; 6366 6367 ASSERT(!bp_freed); 6368 ASSERT(!dump_opt['L']); 6369 ASSERT3U(BP_GET_NDVAS(bp), ==, 1); 6370 6371 spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 6372 vd = vdev_lookup_top(zcb->zcb_spa, DVA_GET_VDEV(dva)); 6373 ASSERT3P(vd, !=, NULL); 6374 spa_config_exit(spa, SCL_VDEV, FTAG); 6375 6376 ASSERT(vd->vdev_indirect_config.vic_mapping_object != 0); 6377 ASSERT3P(zcb->zcb_vd_obsolete_counts[vd->vdev_id], !=, NULL); 6378 6379 vdev_indirect_mapping_increment_obsolete_count( 6380 vd->vdev_indirect_mapping, 6381 DVA_GET_OFFSET(dva), DVA_GET_ASIZE(dva), 6382 zcb->zcb_vd_obsolete_counts[vd->vdev_id]); 6383 6384 return (0); 6385 } 6386 6387 static uint32_t * 6388 zdb_load_obsolete_counts(vdev_t *vd) 6389 { 6390 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 6391 spa_t *spa = vd->vdev_spa; 6392 spa_condensing_indirect_phys_t *scip = 6393 &spa->spa_condensing_indirect_phys; 6394 uint64_t obsolete_sm_object; 6395 uint32_t *counts; 6396 6397 VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object)); 6398 EQUIV(obsolete_sm_object != 0, vd->vdev_obsolete_sm != NULL); 6399 counts = vdev_indirect_mapping_load_obsolete_counts(vim); 6400 if (vd->vdev_obsolete_sm != NULL) { 6401 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts, 6402 vd->vdev_obsolete_sm); 6403 } 6404 if (scip->scip_vdev == vd->vdev_id && 6405 scip->scip_prev_obsolete_sm_object != 0) { 6406 space_map_t *prev_obsolete_sm = NULL; 6407 VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset, 6408 scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0)); 6409 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts, 6410 prev_obsolete_sm); 6411 space_map_close(prev_obsolete_sm); 6412 } 6413 return (counts); 6414 } 6415 6416 typedef struct checkpoint_sm_exclude_entry_arg { 6417 vdev_t *cseea_vd; 6418 uint64_t cseea_checkpoint_size; 6419 } checkpoint_sm_exclude_entry_arg_t; 6420 6421 static int 6422 checkpoint_sm_exclude_entry_cb(space_map_entry_t *sme, void *arg) 6423 { 6424 checkpoint_sm_exclude_entry_arg_t *cseea = arg; 6425 vdev_t *vd = cseea->cseea_vd; 6426 metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift]; 6427 uint64_t end = sme->sme_offset + sme->sme_run; 6428 6429 ASSERT(sme->sme_type == SM_FREE); 6430 6431 /* 6432 * Since the vdev_checkpoint_sm exists in the vdev level 6433 * and the ms_sm space maps exist in the metaslab level, 6434 * an entry in the checkpoint space map could theoretically 6435 * cross the boundaries of the metaslab that it belongs. 6436 * 6437 * In reality, because of the way that we populate and 6438 * manipulate the checkpoint's space maps currently, 6439 * there shouldn't be any entries that cross metaslabs. 6440 * Hence the assertion below. 6441 * 6442 * That said, there is no fundamental requirement that 6443 * the checkpoint's space map entries should not cross 6444 * metaslab boundaries. So if needed we could add code 6445 * that handles metaslab-crossing segments in the future. 6446 */ 6447 VERIFY3U(sme->sme_offset, >=, ms->ms_start); 6448 VERIFY3U(end, <=, ms->ms_start + ms->ms_size); 6449 6450 /* 6451 * By removing the entry from the allocated segments we 6452 * also verify that the entry is there to begin with. 6453 */ 6454 mutex_enter(&ms->ms_lock); 6455 zfs_range_tree_remove(ms->ms_allocatable, sme->sme_offset, 6456 sme->sme_run); 6457 mutex_exit(&ms->ms_lock); 6458 6459 cseea->cseea_checkpoint_size += sme->sme_run; 6460 return (0); 6461 } 6462 6463 static void 6464 zdb_leak_init_vdev_exclude_checkpoint(vdev_t *vd, zdb_cb_t *zcb) 6465 { 6466 spa_t *spa = vd->vdev_spa; 6467 space_map_t *checkpoint_sm = NULL; 6468 uint64_t checkpoint_sm_obj; 6469 6470 /* 6471 * If there is no vdev_top_zap, we are in a pool whose 6472 * version predates the pool checkpoint feature. 6473 */ 6474 if (vd->vdev_top_zap == 0) 6475 return; 6476 6477 /* 6478 * If there is no reference of the vdev_checkpoint_sm in 6479 * the vdev_top_zap, then one of the following scenarios 6480 * is true: 6481 * 6482 * 1] There is no checkpoint 6483 * 2] There is a checkpoint, but no checkpointed blocks 6484 * have been freed yet 6485 * 3] The current vdev is indirect 6486 * 6487 * In these cases we return immediately. 6488 */ 6489 if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap, 6490 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0) 6491 return; 6492 6493 VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap, 6494 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, sizeof (uint64_t), 1, 6495 &checkpoint_sm_obj)); 6496 6497 checkpoint_sm_exclude_entry_arg_t cseea; 6498 cseea.cseea_vd = vd; 6499 cseea.cseea_checkpoint_size = 0; 6500 6501 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa), 6502 checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift)); 6503 6504 VERIFY0(space_map_iterate(checkpoint_sm, 6505 space_map_length(checkpoint_sm), 6506 checkpoint_sm_exclude_entry_cb, &cseea)); 6507 space_map_close(checkpoint_sm); 6508 6509 zcb->zcb_checkpoint_size += cseea.cseea_checkpoint_size; 6510 } 6511 6512 static void 6513 zdb_leak_init_exclude_checkpoint(spa_t *spa, zdb_cb_t *zcb) 6514 { 6515 ASSERT(!dump_opt['L']); 6516 6517 vdev_t *rvd = spa->spa_root_vdev; 6518 for (uint64_t c = 0; c < rvd->vdev_children; c++) { 6519 ASSERT3U(c, ==, rvd->vdev_child[c]->vdev_id); 6520 zdb_leak_init_vdev_exclude_checkpoint(rvd->vdev_child[c], zcb); 6521 } 6522 } 6523 6524 static int 6525 count_unflushed_space_cb(spa_t *spa, space_map_entry_t *sme, 6526 uint64_t txg, void *arg) 6527 { 6528 int64_t *ualloc_space = arg; 6529 6530 uint64_t offset = sme->sme_offset; 6531 uint64_t vdev_id = sme->sme_vdev; 6532 6533 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 6534 if (!vdev_is_concrete(vd)) 6535 return (0); 6536 6537 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 6538 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE); 6539 6540 if (txg < metaslab_unflushed_txg(ms)) 6541 return (0); 6542 6543 if (sme->sme_type == SM_ALLOC) 6544 *ualloc_space += sme->sme_run; 6545 else 6546 *ualloc_space -= sme->sme_run; 6547 6548 return (0); 6549 } 6550 6551 static int64_t 6552 get_unflushed_alloc_space(spa_t *spa) 6553 { 6554 if (dump_opt['L']) 6555 return (0); 6556 6557 int64_t ualloc_space = 0; 6558 iterate_through_spacemap_logs(spa, count_unflushed_space_cb, 6559 &ualloc_space); 6560 return (ualloc_space); 6561 } 6562 6563 static int 6564 load_unflushed_cb(spa_t *spa, space_map_entry_t *sme, uint64_t txg, void *arg) 6565 { 6566 maptype_t *uic_maptype = arg; 6567 6568 uint64_t offset = sme->sme_offset; 6569 uint64_t size = sme->sme_run; 6570 uint64_t vdev_id = sme->sme_vdev; 6571 6572 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 6573 6574 /* skip indirect vdevs */ 6575 if (!vdev_is_concrete(vd)) 6576 return (0); 6577 6578 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 6579 6580 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE); 6581 ASSERT(*uic_maptype == SM_ALLOC || *uic_maptype == SM_FREE); 6582 6583 if (txg < metaslab_unflushed_txg(ms)) 6584 return (0); 6585 6586 if (*uic_maptype == sme->sme_type) 6587 zfs_range_tree_add(ms->ms_allocatable, offset, size); 6588 else 6589 zfs_range_tree_remove(ms->ms_allocatable, offset, size); 6590 6591 return (0); 6592 } 6593 6594 static void 6595 load_unflushed_to_ms_allocatables(spa_t *spa, maptype_t maptype) 6596 { 6597 iterate_through_spacemap_logs(spa, load_unflushed_cb, &maptype); 6598 } 6599 6600 static void 6601 load_concrete_ms_allocatable_trees(spa_t *spa, maptype_t maptype) 6602 { 6603 vdev_t *rvd = spa->spa_root_vdev; 6604 for (uint64_t i = 0; i < rvd->vdev_children; i++) { 6605 vdev_t *vd = rvd->vdev_child[i]; 6606 6607 ASSERT3U(i, ==, vd->vdev_id); 6608 6609 if (vd->vdev_ops == &vdev_indirect_ops) 6610 continue; 6611 6612 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 6613 metaslab_t *msp = vd->vdev_ms[m]; 6614 6615 (void) fprintf(stderr, 6616 "\rloading concrete vdev %llu, " 6617 "metaslab %llu of %llu ...", 6618 (longlong_t)vd->vdev_id, 6619 (longlong_t)msp->ms_id, 6620 (longlong_t)vd->vdev_ms_count); 6621 6622 mutex_enter(&msp->ms_lock); 6623 zfs_range_tree_vacate(msp->ms_allocatable, NULL, NULL); 6624 6625 /* 6626 * We don't want to spend the CPU manipulating the 6627 * size-ordered tree, so clear the range_tree ops. 6628 */ 6629 msp->ms_allocatable->rt_ops = NULL; 6630 6631 if (msp->ms_sm != NULL) { 6632 VERIFY0(space_map_load(msp->ms_sm, 6633 msp->ms_allocatable, maptype)); 6634 } 6635 if (!msp->ms_loaded) 6636 msp->ms_loaded = B_TRUE; 6637 mutex_exit(&msp->ms_lock); 6638 } 6639 } 6640 6641 load_unflushed_to_ms_allocatables(spa, maptype); 6642 } 6643 6644 /* 6645 * vm_idxp is an in-out parameter which (for indirect vdevs) is the 6646 * index in vim_entries that has the first entry in this metaslab. 6647 * On return, it will be set to the first entry after this metaslab. 6648 */ 6649 static void 6650 load_indirect_ms_allocatable_tree(vdev_t *vd, metaslab_t *msp, 6651 uint64_t *vim_idxp) 6652 { 6653 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 6654 6655 mutex_enter(&msp->ms_lock); 6656 zfs_range_tree_vacate(msp->ms_allocatable, NULL, NULL); 6657 6658 /* 6659 * We don't want to spend the CPU manipulating the 6660 * size-ordered tree, so clear the range_tree ops. 6661 */ 6662 msp->ms_allocatable->rt_ops = NULL; 6663 6664 for (; *vim_idxp < vdev_indirect_mapping_num_entries(vim); 6665 (*vim_idxp)++) { 6666 vdev_indirect_mapping_entry_phys_t *vimep = 6667 &vim->vim_entries[*vim_idxp]; 6668 uint64_t ent_offset = DVA_MAPPING_GET_SRC_OFFSET(vimep); 6669 uint64_t ent_len = DVA_GET_ASIZE(&vimep->vimep_dst); 6670 ASSERT3U(ent_offset, >=, msp->ms_start); 6671 if (ent_offset >= msp->ms_start + msp->ms_size) 6672 break; 6673 6674 /* 6675 * Mappings do not cross metaslab boundaries, 6676 * because we create them by walking the metaslabs. 6677 */ 6678 ASSERT3U(ent_offset + ent_len, <=, 6679 msp->ms_start + msp->ms_size); 6680 zfs_range_tree_add(msp->ms_allocatable, ent_offset, ent_len); 6681 } 6682 6683 if (!msp->ms_loaded) 6684 msp->ms_loaded = B_TRUE; 6685 mutex_exit(&msp->ms_lock); 6686 } 6687 6688 static void 6689 zdb_leak_init_prepare_indirect_vdevs(spa_t *spa, zdb_cb_t *zcb) 6690 { 6691 ASSERT(!dump_opt['L']); 6692 6693 vdev_t *rvd = spa->spa_root_vdev; 6694 for (uint64_t c = 0; c < rvd->vdev_children; c++) { 6695 vdev_t *vd = rvd->vdev_child[c]; 6696 6697 ASSERT3U(c, ==, vd->vdev_id); 6698 6699 if (vd->vdev_ops != &vdev_indirect_ops) 6700 continue; 6701 6702 /* 6703 * Note: we don't check for mapping leaks on 6704 * removing vdevs because their ms_allocatable's 6705 * are used to look for leaks in allocated space. 6706 */ 6707 zcb->zcb_vd_obsolete_counts[c] = zdb_load_obsolete_counts(vd); 6708 6709 /* 6710 * Normally, indirect vdevs don't have any 6711 * metaslabs. We want to set them up for 6712 * zio_claim(). 6713 */ 6714 vdev_metaslab_group_create(vd); 6715 VERIFY0(vdev_metaslab_init(vd, 0)); 6716 6717 vdev_indirect_mapping_t *vim __maybe_unused = 6718 vd->vdev_indirect_mapping; 6719 uint64_t vim_idx = 0; 6720 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 6721 6722 (void) fprintf(stderr, 6723 "\rloading indirect vdev %llu, " 6724 "metaslab %llu of %llu ...", 6725 (longlong_t)vd->vdev_id, 6726 (longlong_t)vd->vdev_ms[m]->ms_id, 6727 (longlong_t)vd->vdev_ms_count); 6728 6729 load_indirect_ms_allocatable_tree(vd, vd->vdev_ms[m], 6730 &vim_idx); 6731 } 6732 ASSERT3U(vim_idx, ==, vdev_indirect_mapping_num_entries(vim)); 6733 } 6734 } 6735 6736 static void 6737 zdb_leak_init(spa_t *spa, zdb_cb_t *zcb) 6738 { 6739 zcb->zcb_spa = spa; 6740 6741 if (dump_opt['L']) 6742 return; 6743 6744 dsl_pool_t *dp = spa->spa_dsl_pool; 6745 vdev_t *rvd = spa->spa_root_vdev; 6746 6747 /* 6748 * We are going to be changing the meaning of the metaslab's 6749 * ms_allocatable. Ensure that the allocator doesn't try to 6750 * use the tree. 6751 */ 6752 spa->spa_normal_class->mc_ops = &zdb_metaslab_ops; 6753 spa->spa_log_class->mc_ops = &zdb_metaslab_ops; 6754 spa->spa_embedded_log_class->mc_ops = &zdb_metaslab_ops; 6755 spa->spa_special_embedded_log_class->mc_ops = &zdb_metaslab_ops; 6756 6757 zcb->zcb_vd_obsolete_counts = 6758 umem_zalloc(rvd->vdev_children * sizeof (uint32_t *), 6759 UMEM_NOFAIL); 6760 6761 /* 6762 * For leak detection, we overload the ms_allocatable trees 6763 * to contain allocated segments instead of free segments. 6764 * As a result, we can't use the normal metaslab_load/unload 6765 * interfaces. 6766 */ 6767 zdb_leak_init_prepare_indirect_vdevs(spa, zcb); 6768 load_concrete_ms_allocatable_trees(spa, SM_ALLOC); 6769 6770 /* 6771 * On load_concrete_ms_allocatable_trees() we loaded all the 6772 * allocated entries from the ms_sm to the ms_allocatable for 6773 * each metaslab. If the pool has a checkpoint or is in the 6774 * middle of discarding a checkpoint, some of these blocks 6775 * may have been freed but their ms_sm may not have been 6776 * updated because they are referenced by the checkpoint. In 6777 * order to avoid false-positives during leak-detection, we 6778 * go through the vdev's checkpoint space map and exclude all 6779 * its entries from their relevant ms_allocatable. 6780 * 6781 * We also aggregate the space held by the checkpoint and add 6782 * it to zcb_checkpoint_size. 6783 * 6784 * Note that at this point we are also verifying that all the 6785 * entries on the checkpoint_sm are marked as allocated in 6786 * the ms_sm of their relevant metaslab. 6787 * [see comment in checkpoint_sm_exclude_entry_cb()] 6788 */ 6789 zdb_leak_init_exclude_checkpoint(spa, zcb); 6790 ASSERT3U(zcb->zcb_checkpoint_size, ==, spa_get_checkpoint_space(spa)); 6791 6792 /* for cleaner progress output */ 6793 (void) fprintf(stderr, "\n"); 6794 6795 if (bpobj_is_open(&dp->dp_obsolete_bpobj)) { 6796 ASSERT(spa_feature_is_enabled(spa, 6797 SPA_FEATURE_DEVICE_REMOVAL)); 6798 (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj, 6799 increment_indirect_mapping_cb, zcb, NULL); 6800 } 6801 } 6802 6803 static boolean_t 6804 zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb) 6805 { 6806 boolean_t leaks = B_FALSE; 6807 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 6808 uint64_t total_leaked = 0; 6809 boolean_t are_precise = B_FALSE; 6810 6811 ASSERT(vim != NULL); 6812 6813 for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) { 6814 vdev_indirect_mapping_entry_phys_t *vimep = 6815 &vim->vim_entries[i]; 6816 uint64_t obsolete_bytes = 0; 6817 uint64_t offset = DVA_MAPPING_GET_SRC_OFFSET(vimep); 6818 metaslab_t *msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 6819 6820 /* 6821 * This is not very efficient but it's easy to 6822 * verify correctness. 6823 */ 6824 for (uint64_t inner_offset = 0; 6825 inner_offset < DVA_GET_ASIZE(&vimep->vimep_dst); 6826 inner_offset += 1ULL << vd->vdev_ashift) { 6827 if (zfs_range_tree_contains(msp->ms_allocatable, 6828 offset + inner_offset, 1ULL << vd->vdev_ashift)) { 6829 obsolete_bytes += 1ULL << vd->vdev_ashift; 6830 } 6831 } 6832 6833 int64_t bytes_leaked = obsolete_bytes - 6834 zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]; 6835 ASSERT3U(DVA_GET_ASIZE(&vimep->vimep_dst), >=, 6836 zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]); 6837 6838 VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise)); 6839 if (bytes_leaked != 0 && (are_precise || dump_opt['d'] >= 5)) { 6840 (void) printf("obsolete indirect mapping count " 6841 "mismatch on %llu:%llx:%llx : %llx bytes leaked\n", 6842 (u_longlong_t)vd->vdev_id, 6843 (u_longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep), 6844 (u_longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), 6845 (u_longlong_t)bytes_leaked); 6846 } 6847 total_leaked += ABS(bytes_leaked); 6848 } 6849 6850 VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise)); 6851 if (!are_precise && total_leaked > 0) { 6852 int pct_leaked = total_leaked * 100 / 6853 vdev_indirect_mapping_bytes_mapped(vim); 6854 (void) printf("cannot verify obsolete indirect mapping " 6855 "counts of vdev %llu because precise feature was not " 6856 "enabled when it was removed: %d%% (%llx bytes) of mapping" 6857 "unreferenced\n", 6858 (u_longlong_t)vd->vdev_id, pct_leaked, 6859 (u_longlong_t)total_leaked); 6860 } else if (total_leaked > 0) { 6861 (void) printf("obsolete indirect mapping count mismatch " 6862 "for vdev %llu -- %llx total bytes mismatched\n", 6863 (u_longlong_t)vd->vdev_id, 6864 (u_longlong_t)total_leaked); 6865 leaks |= B_TRUE; 6866 } 6867 6868 vdev_indirect_mapping_free_obsolete_counts(vim, 6869 zcb->zcb_vd_obsolete_counts[vd->vdev_id]); 6870 zcb->zcb_vd_obsolete_counts[vd->vdev_id] = NULL; 6871 6872 return (leaks); 6873 } 6874 6875 static boolean_t 6876 zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb) 6877 { 6878 if (dump_opt['L']) 6879 return (B_FALSE); 6880 6881 boolean_t leaks = B_FALSE; 6882 vdev_t *rvd = spa->spa_root_vdev; 6883 for (unsigned c = 0; c < rvd->vdev_children; c++) { 6884 vdev_t *vd = rvd->vdev_child[c]; 6885 6886 if (zcb->zcb_vd_obsolete_counts[c] != NULL) { 6887 leaks |= zdb_check_for_obsolete_leaks(vd, zcb); 6888 } 6889 6890 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 6891 metaslab_t *msp = vd->vdev_ms[m]; 6892 ASSERT3P(msp->ms_group, ==, (msp->ms_group->mg_class == 6893 spa_embedded_log_class(spa) || 6894 msp->ms_group->mg_class == 6895 spa_special_embedded_log_class(spa)) ? 6896 vd->vdev_log_mg : vd->vdev_mg); 6897 6898 /* 6899 * ms_allocatable has been overloaded 6900 * to contain allocated segments. Now that 6901 * we finished traversing all blocks, any 6902 * block that remains in the ms_allocatable 6903 * represents an allocated block that we 6904 * did not claim during the traversal. 6905 * Claimed blocks would have been removed 6906 * from the ms_allocatable. For indirect 6907 * vdevs, space remaining in the tree 6908 * represents parts of the mapping that are 6909 * not referenced, which is not a bug. 6910 */ 6911 if (vd->vdev_ops == &vdev_indirect_ops) { 6912 zfs_range_tree_vacate(msp->ms_allocatable, 6913 NULL, NULL); 6914 } else { 6915 zfs_range_tree_vacate(msp->ms_allocatable, 6916 zdb_leak, vd); 6917 } 6918 if (msp->ms_loaded) { 6919 msp->ms_loaded = B_FALSE; 6920 } 6921 } 6922 } 6923 6924 umem_free(zcb->zcb_vd_obsolete_counts, 6925 rvd->vdev_children * sizeof (uint32_t *)); 6926 zcb->zcb_vd_obsolete_counts = NULL; 6927 6928 return (leaks); 6929 } 6930 6931 static int 6932 count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 6933 { 6934 (void) tx; 6935 zdb_cb_t *zcb = arg; 6936 6937 if (dump_opt['b'] >= 5) { 6938 char blkbuf[BP_SPRINTF_LEN]; 6939 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 6940 (void) printf("[%s] %s\n", 6941 "deferred free", blkbuf); 6942 } 6943 zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED); 6944 return (0); 6945 } 6946 6947 /* 6948 * Iterate over livelists which have been destroyed by the user but 6949 * are still present in the MOS, waiting to be freed 6950 */ 6951 static void 6952 iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg) 6953 { 6954 objset_t *mos = spa->spa_meta_objset; 6955 uint64_t zap_obj; 6956 int err = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT, 6957 DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &zap_obj); 6958 if (err == ENOENT) 6959 return; 6960 ASSERT0(err); 6961 6962 zap_cursor_t zc; 6963 zap_attribute_t *attrp = zap_attribute_alloc(); 6964 dsl_deadlist_t ll; 6965 /* NULL out os prior to dsl_deadlist_open in case it's garbage */ 6966 ll.dl_os = NULL; 6967 for (zap_cursor_init(&zc, mos, zap_obj); 6968 zap_cursor_retrieve(&zc, attrp) == 0; 6969 (void) zap_cursor_advance(&zc)) { 6970 VERIFY0(dsl_deadlist_open(&ll, mos, attrp->za_first_integer)); 6971 func(&ll, arg); 6972 dsl_deadlist_close(&ll); 6973 } 6974 zap_cursor_fini(&zc); 6975 zap_attribute_free(attrp); 6976 } 6977 6978 static int 6979 bpobj_count_block_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, 6980 dmu_tx_t *tx) 6981 { 6982 ASSERT(!bp_freed); 6983 return (count_block_cb(arg, bp, tx)); 6984 } 6985 6986 static int 6987 livelist_entry_count_blocks_cb(void *args, dsl_deadlist_entry_t *dle) 6988 { 6989 zdb_cb_t *zbc = args; 6990 bplist_t blks; 6991 bplist_create(&blks); 6992 /* determine which blocks have been alloc'd but not freed */ 6993 VERIFY0(dsl_process_sub_livelist(&dle->dle_bpobj, &blks, NULL, NULL)); 6994 /* count those blocks */ 6995 (void) bplist_iterate(&blks, count_block_cb, zbc, NULL); 6996 bplist_destroy(&blks); 6997 return (0); 6998 } 6999 7000 static void 7001 livelist_count_blocks(dsl_deadlist_t *ll, void *arg) 7002 { 7003 dsl_deadlist_iterate(ll, livelist_entry_count_blocks_cb, arg); 7004 } 7005 7006 /* 7007 * Count the blocks in the livelists that have been destroyed by the user 7008 * but haven't yet been freed. 7009 */ 7010 static void 7011 deleted_livelists_count_blocks(spa_t *spa, zdb_cb_t *zbc) 7012 { 7013 iterate_deleted_livelists(spa, livelist_count_blocks, zbc); 7014 } 7015 7016 static void 7017 dump_livelist_cb(dsl_deadlist_t *ll, void *arg) 7018 { 7019 ASSERT3P(arg, ==, NULL); 7020 global_feature_count[SPA_FEATURE_LIVELIST]++; 7021 dump_blkptr_list(ll, "Deleted Livelist"); 7022 dsl_deadlist_iterate(ll, sublivelist_verify_lightweight, NULL); 7023 } 7024 7025 /* 7026 * Print out, register object references to, and increment feature counts for 7027 * livelists that have been destroyed by the user but haven't yet been freed. 7028 */ 7029 static void 7030 deleted_livelists_dump_mos(spa_t *spa) 7031 { 7032 uint64_t zap_obj; 7033 objset_t *mos = spa->spa_meta_objset; 7034 int err = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT, 7035 DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &zap_obj); 7036 if (err == ENOENT) 7037 return; 7038 mos_obj_refd(zap_obj); 7039 iterate_deleted_livelists(spa, dump_livelist_cb, NULL); 7040 } 7041 7042 static int 7043 zdb_brt_entry_compare(const void *zcn1, const void *zcn2) 7044 { 7045 const dva_t *dva1 = &((const zdb_brt_entry_t *)zcn1)->zbre_dva; 7046 const dva_t *dva2 = &((const zdb_brt_entry_t *)zcn2)->zbre_dva; 7047 int cmp; 7048 7049 cmp = TREE_CMP(DVA_GET_VDEV(dva1), DVA_GET_VDEV(dva2)); 7050 if (cmp == 0) 7051 cmp = TREE_CMP(DVA_GET_OFFSET(dva1), DVA_GET_OFFSET(dva2)); 7052 7053 return (cmp); 7054 } 7055 7056 static int 7057 dump_block_stats(spa_t *spa) 7058 { 7059 zdb_cb_t *zcb; 7060 zdb_blkstats_t *zb, *tzb; 7061 uint64_t norm_alloc, norm_space, total_alloc, total_found; 7062 int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | 7063 TRAVERSE_NO_DECRYPT | TRAVERSE_HARD; 7064 boolean_t leaks = B_FALSE; 7065 int e, c, err; 7066 bp_embedded_type_t i; 7067 7068 ddt_prefetch_all(spa); 7069 7070 zcb = umem_zalloc(sizeof (zdb_cb_t), UMEM_NOFAIL); 7071 7072 if (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) { 7073 avl_create(&zcb->zcb_brt, zdb_brt_entry_compare, 7074 sizeof (zdb_brt_entry_t), 7075 offsetof(zdb_brt_entry_t, zbre_node)); 7076 zcb->zcb_brt_is_active = B_TRUE; 7077 } 7078 7079 (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n", 7080 (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "", 7081 (dump_opt['c'] == 1) ? "metadata " : "", 7082 dump_opt['c'] ? "checksums " : "", 7083 (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "", 7084 !dump_opt['L'] ? "nothing leaked " : ""); 7085 7086 /* 7087 * When leak detection is enabled we load all space maps as SM_ALLOC 7088 * maps, then traverse the pool claiming each block we discover. If 7089 * the pool is perfectly consistent, the segment trees will be empty 7090 * when we're done. Anything left over is a leak; any block we can't 7091 * claim (because it's not part of any space map) is a double 7092 * allocation, reference to a freed block, or an unclaimed log block. 7093 * 7094 * When leak detection is disabled (-L option) we still traverse the 7095 * pool claiming each block we discover, but we skip opening any space 7096 * maps. 7097 */ 7098 zdb_leak_init(spa, zcb); 7099 7100 /* 7101 * If there's a deferred-free bplist, process that first. 7102 */ 7103 (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj, 7104 bpobj_count_block_cb, zcb, NULL); 7105 7106 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { 7107 (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj, 7108 bpobj_count_block_cb, zcb, NULL); 7109 } 7110 7111 zdb_claim_removing(spa, zcb); 7112 7113 if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) { 7114 VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset, 7115 spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb, 7116 zcb, NULL)); 7117 } 7118 7119 deleted_livelists_count_blocks(spa, zcb); 7120 7121 if (dump_opt['c'] > 1) 7122 flags |= TRAVERSE_PREFETCH_DATA; 7123 7124 zcb->zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa)); 7125 zcb->zcb_totalasize += metaslab_class_get_alloc(spa_special_class(spa)); 7126 zcb->zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa)); 7127 zcb->zcb_totalasize += 7128 metaslab_class_get_alloc(spa_embedded_log_class(spa)); 7129 zcb->zcb_totalasize += 7130 metaslab_class_get_alloc(spa_special_embedded_log_class(spa)); 7131 zcb->zcb_start = zcb->zcb_lastprint = gethrtime(); 7132 err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, zcb); 7133 7134 /* 7135 * If we've traversed the data blocks then we need to wait for those 7136 * I/Os to complete. We leverage "The Godfather" zio to wait on 7137 * all async I/Os to complete. 7138 */ 7139 if (dump_opt['c']) { 7140 for (c = 0; c < max_ncpus; c++) { 7141 (void) zio_wait(spa->spa_async_zio_root[c]); 7142 spa->spa_async_zio_root[c] = zio_root(spa, NULL, NULL, 7143 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | 7144 ZIO_FLAG_GODFATHER); 7145 } 7146 } 7147 ASSERT0(spa->spa_load_verify_bytes); 7148 7149 /* 7150 * Done after zio_wait() since zcb_haderrors is modified in 7151 * zdb_blkptr_done() 7152 */ 7153 zcb->zcb_haderrors |= err; 7154 7155 if (zcb->zcb_haderrors) { 7156 (void) printf("\nError counts:\n\n"); 7157 (void) printf("\t%5s %s\n", "errno", "count"); 7158 for (e = 0; e < 256; e++) { 7159 if (zcb->zcb_errors[e] != 0) { 7160 (void) printf("\t%5d %llu\n", 7161 e, (u_longlong_t)zcb->zcb_errors[e]); 7162 } 7163 } 7164 } 7165 7166 /* 7167 * Report any leaked segments. 7168 */ 7169 leaks |= zdb_leak_fini(spa, zcb); 7170 7171 tzb = &zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL]; 7172 7173 norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 7174 norm_space = metaslab_class_get_space(spa_normal_class(spa)); 7175 7176 total_alloc = norm_alloc + 7177 metaslab_class_get_alloc(spa_log_class(spa)) + 7178 metaslab_class_get_alloc(spa_embedded_log_class(spa)) + 7179 metaslab_class_get_alloc(spa_special_embedded_log_class(spa)) + 7180 metaslab_class_get_alloc(spa_special_class(spa)) + 7181 metaslab_class_get_alloc(spa_dedup_class(spa)) + 7182 get_unflushed_alloc_space(spa); 7183 total_found = 7184 tzb->zb_asize - zcb->zcb_dedup_asize - zcb->zcb_clone_asize + 7185 zcb->zcb_removing_size + zcb->zcb_checkpoint_size; 7186 7187 if (total_found == total_alloc && !dump_opt['L']) { 7188 (void) printf("\n\tNo leaks (block sum matches space" 7189 " maps exactly)\n"); 7190 } else if (!dump_opt['L']) { 7191 (void) printf("block traversal size %llu != alloc %llu " 7192 "(%s %lld)\n", 7193 (u_longlong_t)total_found, 7194 (u_longlong_t)total_alloc, 7195 (dump_opt['L']) ? "unreachable" : "leaked", 7196 (longlong_t)(total_alloc - total_found)); 7197 } 7198 7199 if (tzb->zb_count == 0) { 7200 umem_free(zcb, sizeof (zdb_cb_t)); 7201 return (2); 7202 } 7203 7204 (void) printf("\n"); 7205 (void) printf("\t%-16s %14llu\n", "bp count:", 7206 (u_longlong_t)tzb->zb_count); 7207 (void) printf("\t%-16s %14llu\n", "ganged count:", 7208 (longlong_t)tzb->zb_gangs); 7209 (void) printf("\t%-16s %14llu avg: %6llu\n", "bp logical:", 7210 (u_longlong_t)tzb->zb_lsize, 7211 (u_longlong_t)(tzb->zb_lsize / tzb->zb_count)); 7212 (void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n", 7213 "bp physical:", (u_longlong_t)tzb->zb_psize, 7214 (u_longlong_t)(tzb->zb_psize / tzb->zb_count), 7215 (double)tzb->zb_lsize / tzb->zb_psize); 7216 (void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n", 7217 "bp allocated:", (u_longlong_t)tzb->zb_asize, 7218 (u_longlong_t)(tzb->zb_asize / tzb->zb_count), 7219 (double)tzb->zb_lsize / tzb->zb_asize); 7220 (void) printf("\t%-16s %14llu ref>1: %6llu deduplication: %6.2f\n", 7221 "bp deduped:", (u_longlong_t)zcb->zcb_dedup_asize, 7222 (u_longlong_t)zcb->zcb_dedup_blocks, 7223 (double)zcb->zcb_dedup_asize / tzb->zb_asize + 1.0); 7224 (void) printf("\t%-16s %14llu count: %6llu\n", 7225 "bp cloned:", (u_longlong_t)zcb->zcb_clone_asize, 7226 (u_longlong_t)zcb->zcb_clone_blocks); 7227 (void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:", 7228 (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space); 7229 7230 if (spa_special_class(spa)->mc_allocator[0].mca_rotor != NULL) { 7231 uint64_t alloc = metaslab_class_get_alloc( 7232 spa_special_class(spa)); 7233 uint64_t space = metaslab_class_get_space( 7234 spa_special_class(spa)); 7235 7236 (void) printf("\t%-16s %14llu used: %5.2f%%\n", 7237 "Special class", (u_longlong_t)alloc, 7238 100.0 * alloc / space); 7239 } 7240 7241 if (spa_dedup_class(spa)->mc_allocator[0].mca_rotor != NULL) { 7242 uint64_t alloc = metaslab_class_get_alloc( 7243 spa_dedup_class(spa)); 7244 uint64_t space = metaslab_class_get_space( 7245 spa_dedup_class(spa)); 7246 7247 (void) printf("\t%-16s %14llu used: %5.2f%%\n", 7248 "Dedup class", (u_longlong_t)alloc, 7249 100.0 * alloc / space); 7250 } 7251 7252 if (spa_embedded_log_class(spa)->mc_allocator[0].mca_rotor != NULL) { 7253 uint64_t alloc = metaslab_class_get_alloc( 7254 spa_embedded_log_class(spa)); 7255 uint64_t space = metaslab_class_get_space( 7256 spa_embedded_log_class(spa)); 7257 7258 (void) printf("\t%-16s %14llu used: %5.2f%%\n", 7259 "Embedded log class", (u_longlong_t)alloc, 7260 100.0 * alloc / space); 7261 } 7262 7263 if (spa_special_embedded_log_class(spa)->mc_allocator[0].mca_rotor 7264 != NULL) { 7265 uint64_t alloc = metaslab_class_get_alloc( 7266 spa_special_embedded_log_class(spa)); 7267 uint64_t space = metaslab_class_get_space( 7268 spa_special_embedded_log_class(spa)); 7269 7270 (void) printf("\t%-16s %14llu used: %5.2f%%\n", 7271 "Special embedded log", (u_longlong_t)alloc, 7272 100.0 * alloc / space); 7273 } 7274 7275 for (i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) { 7276 if (zcb->zcb_embedded_blocks[i] == 0) 7277 continue; 7278 (void) printf("\n"); 7279 (void) printf("\tadditional, non-pointer bps of type %u: " 7280 "%10llu\n", 7281 i, (u_longlong_t)zcb->zcb_embedded_blocks[i]); 7282 7283 if (dump_opt['b'] >= 3) { 7284 (void) printf("\t number of (compressed) bytes: " 7285 "number of bps\n"); 7286 dump_histogram(zcb->zcb_embedded_histogram[i], 7287 sizeof (zcb->zcb_embedded_histogram[i]) / 7288 sizeof (zcb->zcb_embedded_histogram[i][0]), 0); 7289 } 7290 } 7291 7292 if (tzb->zb_ditto_samevdev != 0) { 7293 (void) printf("\tDittoed blocks on same vdev: %llu\n", 7294 (longlong_t)tzb->zb_ditto_samevdev); 7295 } 7296 if (tzb->zb_ditto_same_ms != 0) { 7297 (void) printf("\tDittoed blocks in same metaslab: %llu\n", 7298 (longlong_t)tzb->zb_ditto_same_ms); 7299 } 7300 7301 for (uint64_t v = 0; v < spa->spa_root_vdev->vdev_children; v++) { 7302 vdev_t *vd = spa->spa_root_vdev->vdev_child[v]; 7303 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 7304 7305 if (vim == NULL) { 7306 continue; 7307 } 7308 7309 char mem[32]; 7310 zdb_nicenum(vdev_indirect_mapping_num_entries(vim), 7311 mem, vdev_indirect_mapping_size(vim)); 7312 7313 (void) printf("\tindirect vdev id %llu has %llu segments " 7314 "(%s in memory)\n", 7315 (longlong_t)vd->vdev_id, 7316 (longlong_t)vdev_indirect_mapping_num_entries(vim), mem); 7317 } 7318 7319 if (dump_opt['b'] >= 2) { 7320 int l, t, level; 7321 char csize[32], lsize[32], psize[32], asize[32]; 7322 char avg[32], gang[32]; 7323 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" 7324 "\t avg\t comp\t%%Total\tType\n"); 7325 7326 zfs_blkstat_t *mdstats = umem_zalloc(sizeof (zfs_blkstat_t), 7327 UMEM_NOFAIL); 7328 7329 for (t = 0; t <= ZDB_OT_TOTAL; t++) { 7330 const char *typename; 7331 7332 /* make sure nicenum has enough space */ 7333 _Static_assert(sizeof (csize) >= NN_NUMBUF_SZ, 7334 "csize truncated"); 7335 _Static_assert(sizeof (lsize) >= NN_NUMBUF_SZ, 7336 "lsize truncated"); 7337 _Static_assert(sizeof (psize) >= NN_NUMBUF_SZ, 7338 "psize truncated"); 7339 _Static_assert(sizeof (asize) >= NN_NUMBUF_SZ, 7340 "asize truncated"); 7341 _Static_assert(sizeof (avg) >= NN_NUMBUF_SZ, 7342 "avg truncated"); 7343 _Static_assert(sizeof (gang) >= NN_NUMBUF_SZ, 7344 "gang truncated"); 7345 7346 if (t < DMU_OT_NUMTYPES) 7347 typename = dmu_ot[t].ot_name; 7348 else 7349 typename = zdb_ot_extname[t - DMU_OT_NUMTYPES]; 7350 7351 if (zcb->zcb_type[ZB_TOTAL][t].zb_asize == 0) { 7352 (void) printf("%6s\t%5s\t%5s\t%5s" 7353 "\t%5s\t%5s\t%6s\t%s\n", 7354 "-", 7355 "-", 7356 "-", 7357 "-", 7358 "-", 7359 "-", 7360 "-", 7361 typename); 7362 continue; 7363 } 7364 7365 for (l = ZB_TOTAL - 1; l >= -1; l--) { 7366 level = (l == -1 ? ZB_TOTAL : l); 7367 zb = &zcb->zcb_type[level][t]; 7368 7369 if (zb->zb_asize == 0) 7370 continue; 7371 7372 if (level != ZB_TOTAL && t < DMU_OT_NUMTYPES && 7373 (level > 0 || DMU_OT_IS_METADATA(t))) { 7374 mdstats->zb_count += zb->zb_count; 7375 mdstats->zb_lsize += zb->zb_lsize; 7376 mdstats->zb_psize += zb->zb_psize; 7377 mdstats->zb_asize += zb->zb_asize; 7378 mdstats->zb_gangs += zb->zb_gangs; 7379 } 7380 7381 if (dump_opt['b'] < 3 && level != ZB_TOTAL) 7382 continue; 7383 7384 if (level == 0 && zb->zb_asize == 7385 zcb->zcb_type[ZB_TOTAL][t].zb_asize) 7386 continue; 7387 7388 zdb_nicenum(zb->zb_count, csize, 7389 sizeof (csize)); 7390 zdb_nicenum(zb->zb_lsize, lsize, 7391 sizeof (lsize)); 7392 zdb_nicenum(zb->zb_psize, psize, 7393 sizeof (psize)); 7394 zdb_nicenum(zb->zb_asize, asize, 7395 sizeof (asize)); 7396 zdb_nicenum(zb->zb_asize / zb->zb_count, avg, 7397 sizeof (avg)); 7398 zdb_nicenum(zb->zb_gangs, gang, sizeof (gang)); 7399 7400 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s" 7401 "\t%5.2f\t%6.2f\t", 7402 csize, lsize, psize, asize, avg, 7403 (double)zb->zb_lsize / zb->zb_psize, 7404 100.0 * zb->zb_asize / tzb->zb_asize); 7405 7406 if (level == ZB_TOTAL) 7407 (void) printf("%s\n", typename); 7408 else 7409 (void) printf(" L%d %s\n", 7410 level, typename); 7411 7412 if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) { 7413 (void) printf("\t number of ganged " 7414 "blocks: %s\n", gang); 7415 } 7416 7417 if (dump_opt['b'] >= 4) { 7418 (void) printf("psize " 7419 "(in 512-byte sectors): " 7420 "number of blocks\n"); 7421 dump_histogram(zb->zb_psize_histogram, 7422 PSIZE_HISTO_SIZE, 0); 7423 } 7424 } 7425 } 7426 zdb_nicenum(mdstats->zb_count, csize, 7427 sizeof (csize)); 7428 zdb_nicenum(mdstats->zb_lsize, lsize, 7429 sizeof (lsize)); 7430 zdb_nicenum(mdstats->zb_psize, psize, 7431 sizeof (psize)); 7432 zdb_nicenum(mdstats->zb_asize, asize, 7433 sizeof (asize)); 7434 zdb_nicenum(mdstats->zb_asize / mdstats->zb_count, avg, 7435 sizeof (avg)); 7436 zdb_nicenum(mdstats->zb_gangs, gang, sizeof (gang)); 7437 7438 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s" 7439 "\t%5.2f\t%6.2f\t", 7440 csize, lsize, psize, asize, avg, 7441 (double)mdstats->zb_lsize / mdstats->zb_psize, 7442 100.0 * mdstats->zb_asize / tzb->zb_asize); 7443 (void) printf("%s\n", "Metadata Total"); 7444 7445 /* Output a table summarizing block sizes in the pool */ 7446 if (dump_opt['b'] >= 2) { 7447 dump_size_histograms(zcb); 7448 } 7449 7450 umem_free(mdstats, sizeof (zfs_blkstat_t)); 7451 } 7452 7453 (void) printf("\n"); 7454 7455 if (leaks) { 7456 umem_free(zcb, sizeof (zdb_cb_t)); 7457 return (2); 7458 } 7459 7460 if (zcb->zcb_haderrors) { 7461 umem_free(zcb, sizeof (zdb_cb_t)); 7462 return (3); 7463 } 7464 7465 umem_free(zcb, sizeof (zdb_cb_t)); 7466 return (0); 7467 } 7468 7469 typedef struct zdb_ddt_entry { 7470 /* key must be first for ddt_key_compare */ 7471 ddt_key_t zdde_key; 7472 uint64_t zdde_ref_blocks; 7473 uint64_t zdde_ref_lsize; 7474 uint64_t zdde_ref_psize; 7475 uint64_t zdde_ref_dsize; 7476 avl_node_t zdde_node; 7477 } zdb_ddt_entry_t; 7478 7479 static int 7480 zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 7481 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 7482 { 7483 (void) zilog, (void) dnp; 7484 avl_tree_t *t = arg; 7485 avl_index_t where; 7486 zdb_ddt_entry_t *zdde, zdde_search; 7487 7488 if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) || 7489 BP_IS_EMBEDDED(bp)) 7490 return (0); 7491 7492 if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) { 7493 (void) printf("traversing objset %llu, %llu objects, " 7494 "%lu blocks so far\n", 7495 (u_longlong_t)zb->zb_objset, 7496 (u_longlong_t)BP_GET_FILL(bp), 7497 avl_numnodes(t)); 7498 } 7499 7500 if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF || 7501 BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) 7502 return (0); 7503 7504 ddt_key_fill(&zdde_search.zdde_key, bp); 7505 7506 zdde = avl_find(t, &zdde_search, &where); 7507 7508 if (zdde == NULL) { 7509 zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL); 7510 zdde->zdde_key = zdde_search.zdde_key; 7511 avl_insert(t, zdde, where); 7512 } 7513 7514 zdde->zdde_ref_blocks += 1; 7515 zdde->zdde_ref_lsize += BP_GET_LSIZE(bp); 7516 zdde->zdde_ref_psize += BP_GET_PSIZE(bp); 7517 zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp); 7518 7519 return (0); 7520 } 7521 7522 static void 7523 dump_simulated_ddt(spa_t *spa) 7524 { 7525 avl_tree_t t; 7526 void *cookie = NULL; 7527 zdb_ddt_entry_t *zdde; 7528 ddt_histogram_t ddh_total = {{{0}}}; 7529 ddt_stat_t dds_total = {0}; 7530 7531 avl_create(&t, ddt_key_compare, 7532 sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node)); 7533 7534 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 7535 7536 (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | 7537 TRAVERSE_NO_DECRYPT, zdb_ddt_add_cb, &t); 7538 7539 spa_config_exit(spa, SCL_CONFIG, FTAG); 7540 7541 while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) { 7542 uint64_t refcnt = zdde->zdde_ref_blocks; 7543 ASSERT(refcnt != 0); 7544 7545 ddt_stat_t *dds = &ddh_total.ddh_stat[highbit64(refcnt) - 1]; 7546 7547 dds->dds_blocks += zdde->zdde_ref_blocks / refcnt; 7548 dds->dds_lsize += zdde->zdde_ref_lsize / refcnt; 7549 dds->dds_psize += zdde->zdde_ref_psize / refcnt; 7550 dds->dds_dsize += zdde->zdde_ref_dsize / refcnt; 7551 7552 dds->dds_ref_blocks += zdde->zdde_ref_blocks; 7553 dds->dds_ref_lsize += zdde->zdde_ref_lsize; 7554 dds->dds_ref_psize += zdde->zdde_ref_psize; 7555 dds->dds_ref_dsize += zdde->zdde_ref_dsize; 7556 7557 umem_free(zdde, sizeof (*zdde)); 7558 } 7559 7560 avl_destroy(&t); 7561 7562 ddt_histogram_total(&dds_total, &ddh_total); 7563 7564 (void) printf("Simulated DDT histogram:\n"); 7565 7566 zpool_dump_ddt(&dds_total, &ddh_total); 7567 7568 dump_dedup_ratio(&dds_total); 7569 } 7570 7571 static int 7572 verify_device_removal_feature_counts(spa_t *spa) 7573 { 7574 uint64_t dr_feature_refcount = 0; 7575 uint64_t oc_feature_refcount = 0; 7576 uint64_t indirect_vdev_count = 0; 7577 uint64_t precise_vdev_count = 0; 7578 uint64_t obsolete_counts_object_count = 0; 7579 uint64_t obsolete_sm_count = 0; 7580 uint64_t obsolete_counts_count = 0; 7581 uint64_t scip_count = 0; 7582 uint64_t obsolete_bpobj_count = 0; 7583 int ret = 0; 7584 7585 spa_condensing_indirect_phys_t *scip = 7586 &spa->spa_condensing_indirect_phys; 7587 if (scip->scip_next_mapping_object != 0) { 7588 vdev_t *vd = spa->spa_root_vdev->vdev_child[scip->scip_vdev]; 7589 ASSERT(scip->scip_prev_obsolete_sm_object != 0); 7590 ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops); 7591 7592 (void) printf("Condensing indirect vdev %llu: new mapping " 7593 "object %llu, prev obsolete sm %llu\n", 7594 (u_longlong_t)scip->scip_vdev, 7595 (u_longlong_t)scip->scip_next_mapping_object, 7596 (u_longlong_t)scip->scip_prev_obsolete_sm_object); 7597 if (scip->scip_prev_obsolete_sm_object != 0) { 7598 space_map_t *prev_obsolete_sm = NULL; 7599 VERIFY0(space_map_open(&prev_obsolete_sm, 7600 spa->spa_meta_objset, 7601 scip->scip_prev_obsolete_sm_object, 7602 0, vd->vdev_asize, 0)); 7603 dump_spacemap(spa->spa_meta_objset, prev_obsolete_sm); 7604 (void) printf("\n"); 7605 space_map_close(prev_obsolete_sm); 7606 } 7607 7608 scip_count += 2; 7609 } 7610 7611 for (uint64_t i = 0; i < spa->spa_root_vdev->vdev_children; i++) { 7612 vdev_t *vd = spa->spa_root_vdev->vdev_child[i]; 7613 vdev_indirect_config_t *vic = &vd->vdev_indirect_config; 7614 7615 if (vic->vic_mapping_object != 0) { 7616 ASSERT(vd->vdev_ops == &vdev_indirect_ops || 7617 vd->vdev_removing); 7618 indirect_vdev_count++; 7619 7620 if (vd->vdev_indirect_mapping->vim_havecounts) { 7621 obsolete_counts_count++; 7622 } 7623 } 7624 7625 boolean_t are_precise; 7626 VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise)); 7627 if (are_precise) { 7628 ASSERT(vic->vic_mapping_object != 0); 7629 precise_vdev_count++; 7630 } 7631 7632 uint64_t obsolete_sm_object; 7633 VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object)); 7634 if (obsolete_sm_object != 0) { 7635 ASSERT(vic->vic_mapping_object != 0); 7636 obsolete_sm_count++; 7637 } 7638 } 7639 7640 (void) feature_get_refcount(spa, 7641 &spa_feature_table[SPA_FEATURE_DEVICE_REMOVAL], 7642 &dr_feature_refcount); 7643 (void) feature_get_refcount(spa, 7644 &spa_feature_table[SPA_FEATURE_OBSOLETE_COUNTS], 7645 &oc_feature_refcount); 7646 7647 if (dr_feature_refcount != indirect_vdev_count) { 7648 ret = 1; 7649 (void) printf("Number of indirect vdevs (%llu) " \ 7650 "does not match feature count (%llu)\n", 7651 (u_longlong_t)indirect_vdev_count, 7652 (u_longlong_t)dr_feature_refcount); 7653 } else { 7654 (void) printf("Verified device_removal feature refcount " \ 7655 "of %llu is correct\n", 7656 (u_longlong_t)dr_feature_refcount); 7657 } 7658 7659 if (zap_contains(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT, 7660 DMU_POOL_OBSOLETE_BPOBJ) == 0) { 7661 obsolete_bpobj_count++; 7662 } 7663 7664 7665 obsolete_counts_object_count = precise_vdev_count; 7666 obsolete_counts_object_count += obsolete_sm_count; 7667 obsolete_counts_object_count += obsolete_counts_count; 7668 obsolete_counts_object_count += scip_count; 7669 obsolete_counts_object_count += obsolete_bpobj_count; 7670 obsolete_counts_object_count += remap_deadlist_count; 7671 7672 if (oc_feature_refcount != obsolete_counts_object_count) { 7673 ret = 1; 7674 (void) printf("Number of obsolete counts objects (%llu) " \ 7675 "does not match feature count (%llu)\n", 7676 (u_longlong_t)obsolete_counts_object_count, 7677 (u_longlong_t)oc_feature_refcount); 7678 (void) printf("pv:%llu os:%llu oc:%llu sc:%llu " 7679 "ob:%llu rd:%llu\n", 7680 (u_longlong_t)precise_vdev_count, 7681 (u_longlong_t)obsolete_sm_count, 7682 (u_longlong_t)obsolete_counts_count, 7683 (u_longlong_t)scip_count, 7684 (u_longlong_t)obsolete_bpobj_count, 7685 (u_longlong_t)remap_deadlist_count); 7686 } else { 7687 (void) printf("Verified indirect_refcount feature refcount " \ 7688 "of %llu is correct\n", 7689 (u_longlong_t)oc_feature_refcount); 7690 } 7691 return (ret); 7692 } 7693 7694 static void 7695 zdb_set_skip_mmp(char *target) 7696 { 7697 spa_t *spa; 7698 7699 /* 7700 * Disable the activity check to allow examination of 7701 * active pools. 7702 */ 7703 mutex_enter(&spa_namespace_lock); 7704 if ((spa = spa_lookup(target)) != NULL) { 7705 spa->spa_import_flags |= ZFS_IMPORT_SKIP_MMP; 7706 } 7707 mutex_exit(&spa_namespace_lock); 7708 } 7709 7710 #define BOGUS_SUFFIX "_CHECKPOINTED_UNIVERSE" 7711 /* 7712 * Import the checkpointed state of the pool specified by the target 7713 * parameter as readonly. The function also accepts a pool config 7714 * as an optional parameter, else it attempts to infer the config by 7715 * the name of the target pool. 7716 * 7717 * Note that the checkpointed state's pool name will be the name of 7718 * the original pool with the above suffix appended to it. In addition, 7719 * if the target is not a pool name (e.g. a path to a dataset) then 7720 * the new_path parameter is populated with the updated path to 7721 * reflect the fact that we are looking into the checkpointed state. 7722 * 7723 * The function returns a newly-allocated copy of the name of the 7724 * pool containing the checkpointed state. When this copy is no 7725 * longer needed it should be freed with free(3C). Same thing 7726 * applies to the new_path parameter if allocated. 7727 */ 7728 static char * 7729 import_checkpointed_state(char *target, nvlist_t *cfg, boolean_t target_is_spa, 7730 char **new_path) 7731 { 7732 int error = 0; 7733 char *poolname, *bogus_name = NULL; 7734 boolean_t freecfg = B_FALSE; 7735 7736 /* If the target is not a pool, the extract the pool name */ 7737 char *path_start = strchr(target, '/'); 7738 if (target_is_spa || path_start == NULL) { 7739 poolname = target; 7740 } else { 7741 size_t poolname_len = path_start - target; 7742 poolname = strndup(target, poolname_len); 7743 } 7744 7745 if (cfg == NULL) { 7746 zdb_set_skip_mmp(poolname); 7747 error = spa_get_stats(poolname, &cfg, NULL, 0); 7748 if (error != 0) { 7749 fatal("Tried to read config of pool \"%s\" but " 7750 "spa_get_stats() failed with error %d\n", 7751 poolname, error); 7752 } 7753 freecfg = B_TRUE; 7754 } 7755 7756 if (asprintf(&bogus_name, "%s%s", poolname, BOGUS_SUFFIX) == -1) { 7757 if (target != poolname) 7758 free(poolname); 7759 return (NULL); 7760 } 7761 fnvlist_add_string(cfg, ZPOOL_CONFIG_POOL_NAME, bogus_name); 7762 7763 error = spa_import(bogus_name, cfg, NULL, 7764 ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT | 7765 ZFS_IMPORT_SKIP_MMP); 7766 if (freecfg) 7767 nvlist_free(cfg); 7768 if (error != 0) { 7769 fatal("Tried to import pool \"%s\" but spa_import() failed " 7770 "with error %d\n", bogus_name, error); 7771 } 7772 7773 if (new_path != NULL && !target_is_spa) { 7774 if (asprintf(new_path, "%s%s", bogus_name, 7775 path_start != NULL ? path_start : "") == -1) { 7776 free(bogus_name); 7777 if (!target_is_spa && path_start != NULL) 7778 free(poolname); 7779 return (NULL); 7780 } 7781 } 7782 7783 if (target != poolname) 7784 free(poolname); 7785 7786 return (bogus_name); 7787 } 7788 7789 typedef struct verify_checkpoint_sm_entry_cb_arg { 7790 vdev_t *vcsec_vd; 7791 7792 /* the following fields are only used for printing progress */ 7793 uint64_t vcsec_entryid; 7794 uint64_t vcsec_num_entries; 7795 } verify_checkpoint_sm_entry_cb_arg_t; 7796 7797 #define ENTRIES_PER_PROGRESS_UPDATE 10000 7798 7799 static int 7800 verify_checkpoint_sm_entry_cb(space_map_entry_t *sme, void *arg) 7801 { 7802 verify_checkpoint_sm_entry_cb_arg_t *vcsec = arg; 7803 vdev_t *vd = vcsec->vcsec_vd; 7804 metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift]; 7805 uint64_t end = sme->sme_offset + sme->sme_run; 7806 7807 ASSERT(sme->sme_type == SM_FREE); 7808 7809 if ((vcsec->vcsec_entryid % ENTRIES_PER_PROGRESS_UPDATE) == 0) { 7810 (void) fprintf(stderr, 7811 "\rverifying vdev %llu, space map entry %llu of %llu ...", 7812 (longlong_t)vd->vdev_id, 7813 (longlong_t)vcsec->vcsec_entryid, 7814 (longlong_t)vcsec->vcsec_num_entries); 7815 } 7816 vcsec->vcsec_entryid++; 7817 7818 /* 7819 * See comment in checkpoint_sm_exclude_entry_cb() 7820 */ 7821 VERIFY3U(sme->sme_offset, >=, ms->ms_start); 7822 VERIFY3U(end, <=, ms->ms_start + ms->ms_size); 7823 7824 /* 7825 * The entries in the vdev_checkpoint_sm should be marked as 7826 * allocated in the checkpointed state of the pool, therefore 7827 * their respective ms_allocateable trees should not contain them. 7828 */ 7829 mutex_enter(&ms->ms_lock); 7830 zfs_range_tree_verify_not_present(ms->ms_allocatable, 7831 sme->sme_offset, sme->sme_run); 7832 mutex_exit(&ms->ms_lock); 7833 7834 return (0); 7835 } 7836 7837 /* 7838 * Verify that all segments in the vdev_checkpoint_sm are allocated 7839 * according to the checkpoint's ms_sm (i.e. are not in the checkpoint's 7840 * ms_allocatable). 7841 * 7842 * Do so by comparing the checkpoint space maps (vdev_checkpoint_sm) of 7843 * each vdev in the current state of the pool to the metaslab space maps 7844 * (ms_sm) of the checkpointed state of the pool. 7845 * 7846 * Note that the function changes the state of the ms_allocatable 7847 * trees of the current spa_t. The entries of these ms_allocatable 7848 * trees are cleared out and then repopulated from with the free 7849 * entries of their respective ms_sm space maps. 7850 */ 7851 static void 7852 verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current) 7853 { 7854 vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev; 7855 vdev_t *current_rvd = current->spa_root_vdev; 7856 7857 load_concrete_ms_allocatable_trees(checkpoint, SM_FREE); 7858 7859 for (uint64_t c = 0; c < ckpoint_rvd->vdev_children; c++) { 7860 vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[c]; 7861 vdev_t *current_vd = current_rvd->vdev_child[c]; 7862 7863 space_map_t *checkpoint_sm = NULL; 7864 uint64_t checkpoint_sm_obj; 7865 7866 if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) { 7867 /* 7868 * Since we don't allow device removal in a pool 7869 * that has a checkpoint, we expect that all removed 7870 * vdevs were removed from the pool before the 7871 * checkpoint. 7872 */ 7873 ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops); 7874 continue; 7875 } 7876 7877 /* 7878 * If the checkpoint space map doesn't exist, then nothing 7879 * here is checkpointed so there's nothing to verify. 7880 */ 7881 if (current_vd->vdev_top_zap == 0 || 7882 zap_contains(spa_meta_objset(current), 7883 current_vd->vdev_top_zap, 7884 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0) 7885 continue; 7886 7887 VERIFY0(zap_lookup(spa_meta_objset(current), 7888 current_vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, 7889 sizeof (uint64_t), 1, &checkpoint_sm_obj)); 7890 7891 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(current), 7892 checkpoint_sm_obj, 0, current_vd->vdev_asize, 7893 current_vd->vdev_ashift)); 7894 7895 verify_checkpoint_sm_entry_cb_arg_t vcsec; 7896 vcsec.vcsec_vd = ckpoint_vd; 7897 vcsec.vcsec_entryid = 0; 7898 vcsec.vcsec_num_entries = 7899 space_map_length(checkpoint_sm) / sizeof (uint64_t); 7900 VERIFY0(space_map_iterate(checkpoint_sm, 7901 space_map_length(checkpoint_sm), 7902 verify_checkpoint_sm_entry_cb, &vcsec)); 7903 if (dump_opt['m'] > 3) 7904 dump_spacemap(current->spa_meta_objset, checkpoint_sm); 7905 space_map_close(checkpoint_sm); 7906 } 7907 7908 /* 7909 * If we've added vdevs since we took the checkpoint, ensure 7910 * that their checkpoint space maps are empty. 7911 */ 7912 if (ckpoint_rvd->vdev_children < current_rvd->vdev_children) { 7913 for (uint64_t c = ckpoint_rvd->vdev_children; 7914 c < current_rvd->vdev_children; c++) { 7915 vdev_t *current_vd = current_rvd->vdev_child[c]; 7916 VERIFY3P(current_vd->vdev_checkpoint_sm, ==, NULL); 7917 } 7918 } 7919 7920 /* for cleaner progress output */ 7921 (void) fprintf(stderr, "\n"); 7922 } 7923 7924 /* 7925 * Verifies that all space that's allocated in the checkpoint is 7926 * still allocated in the current version, by checking that everything 7927 * in checkpoint's ms_allocatable (which is actually allocated, not 7928 * allocatable/free) is not present in current's ms_allocatable. 7929 * 7930 * Note that the function changes the state of the ms_allocatable 7931 * trees of both spas when called. The entries of all ms_allocatable 7932 * trees are cleared out and then repopulated from their respective 7933 * ms_sm space maps. In the checkpointed state we load the allocated 7934 * entries, and in the current state we load the free entries. 7935 */ 7936 static void 7937 verify_checkpoint_ms_spacemaps(spa_t *checkpoint, spa_t *current) 7938 { 7939 vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev; 7940 vdev_t *current_rvd = current->spa_root_vdev; 7941 7942 load_concrete_ms_allocatable_trees(checkpoint, SM_ALLOC); 7943 load_concrete_ms_allocatable_trees(current, SM_FREE); 7944 7945 for (uint64_t i = 0; i < ckpoint_rvd->vdev_children; i++) { 7946 vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[i]; 7947 vdev_t *current_vd = current_rvd->vdev_child[i]; 7948 7949 if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) { 7950 /* 7951 * See comment in verify_checkpoint_vdev_spacemaps() 7952 */ 7953 ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops); 7954 continue; 7955 } 7956 7957 for (uint64_t m = 0; m < ckpoint_vd->vdev_ms_count; m++) { 7958 metaslab_t *ckpoint_msp = ckpoint_vd->vdev_ms[m]; 7959 metaslab_t *current_msp = current_vd->vdev_ms[m]; 7960 7961 (void) fprintf(stderr, 7962 "\rverifying vdev %llu of %llu, " 7963 "metaslab %llu of %llu ...", 7964 (longlong_t)current_vd->vdev_id, 7965 (longlong_t)current_rvd->vdev_children, 7966 (longlong_t)current_vd->vdev_ms[m]->ms_id, 7967 (longlong_t)current_vd->vdev_ms_count); 7968 7969 /* 7970 * We walk through the ms_allocatable trees that 7971 * are loaded with the allocated blocks from the 7972 * ms_sm spacemaps of the checkpoint. For each 7973 * one of these ranges we ensure that none of them 7974 * exists in the ms_allocatable trees of the 7975 * current state which are loaded with the ranges 7976 * that are currently free. 7977 * 7978 * This way we ensure that none of the blocks that 7979 * are part of the checkpoint were freed by mistake. 7980 */ 7981 zfs_range_tree_walk(ckpoint_msp->ms_allocatable, 7982 (zfs_range_tree_func_t *) 7983 zfs_range_tree_verify_not_present, 7984 current_msp->ms_allocatable); 7985 } 7986 } 7987 7988 /* for cleaner progress output */ 7989 (void) fprintf(stderr, "\n"); 7990 } 7991 7992 static void 7993 verify_checkpoint_blocks(spa_t *spa) 7994 { 7995 ASSERT(!dump_opt['L']); 7996 7997 spa_t *checkpoint_spa; 7998 char *checkpoint_pool; 7999 int error = 0; 8000 8001 /* 8002 * We import the checkpointed state of the pool (under a different 8003 * name) so we can do verification on it against the current state 8004 * of the pool. 8005 */ 8006 checkpoint_pool = import_checkpointed_state(spa->spa_name, NULL, B_TRUE, 8007 NULL); 8008 ASSERT(strcmp(spa->spa_name, checkpoint_pool) != 0); 8009 8010 error = spa_open(checkpoint_pool, &checkpoint_spa, FTAG); 8011 if (error != 0) { 8012 fatal("Tried to open pool \"%s\" but spa_open() failed with " 8013 "error %d\n", checkpoint_pool, error); 8014 } 8015 8016 /* 8017 * Ensure that ranges in the checkpoint space maps of each vdev 8018 * are allocated according to the checkpointed state's metaslab 8019 * space maps. 8020 */ 8021 verify_checkpoint_vdev_spacemaps(checkpoint_spa, spa); 8022 8023 /* 8024 * Ensure that allocated ranges in the checkpoint's metaslab 8025 * space maps remain allocated in the metaslab space maps of 8026 * the current state. 8027 */ 8028 verify_checkpoint_ms_spacemaps(checkpoint_spa, spa); 8029 8030 /* 8031 * Once we are done, we get rid of the checkpointed state. 8032 */ 8033 spa_close(checkpoint_spa, FTAG); 8034 free(checkpoint_pool); 8035 } 8036 8037 static void 8038 dump_leftover_checkpoint_blocks(spa_t *spa) 8039 { 8040 vdev_t *rvd = spa->spa_root_vdev; 8041 8042 for (uint64_t i = 0; i < rvd->vdev_children; i++) { 8043 vdev_t *vd = rvd->vdev_child[i]; 8044 8045 space_map_t *checkpoint_sm = NULL; 8046 uint64_t checkpoint_sm_obj; 8047 8048 if (vd->vdev_top_zap == 0) 8049 continue; 8050 8051 if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap, 8052 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0) 8053 continue; 8054 8055 VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap, 8056 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, 8057 sizeof (uint64_t), 1, &checkpoint_sm_obj)); 8058 8059 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa), 8060 checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift)); 8061 dump_spacemap(spa->spa_meta_objset, checkpoint_sm); 8062 space_map_close(checkpoint_sm); 8063 } 8064 } 8065 8066 static int 8067 verify_checkpoint(spa_t *spa) 8068 { 8069 uberblock_t checkpoint; 8070 int error; 8071 8072 if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) 8073 return (0); 8074 8075 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 8076 DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), 8077 sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); 8078 8079 if (error == ENOENT && !dump_opt['L']) { 8080 /* 8081 * If the feature is active but the uberblock is missing 8082 * then we must be in the middle of discarding the 8083 * checkpoint. 8084 */ 8085 (void) printf("\nPartially discarded checkpoint " 8086 "state found:\n"); 8087 if (dump_opt['m'] > 3) 8088 dump_leftover_checkpoint_blocks(spa); 8089 return (0); 8090 } else if (error != 0) { 8091 (void) printf("lookup error %d when looking for " 8092 "checkpointed uberblock in MOS\n", error); 8093 return (error); 8094 } 8095 dump_uberblock(&checkpoint, "\nCheckpointed uberblock found:\n", "\n"); 8096 8097 if (checkpoint.ub_checkpoint_txg == 0) { 8098 (void) printf("\nub_checkpoint_txg not set in checkpointed " 8099 "uberblock\n"); 8100 error = 3; 8101 } 8102 8103 if (error == 0 && !dump_opt['L']) 8104 verify_checkpoint_blocks(spa); 8105 8106 return (error); 8107 } 8108 8109 static void 8110 mos_leaks_cb(void *arg, uint64_t start, uint64_t size) 8111 { 8112 (void) arg; 8113 for (uint64_t i = start; i < size; i++) { 8114 (void) printf("MOS object %llu referenced but not allocated\n", 8115 (u_longlong_t)i); 8116 } 8117 } 8118 8119 static void 8120 mos_obj_refd(uint64_t obj) 8121 { 8122 if (obj != 0 && mos_refd_objs != NULL) 8123 zfs_range_tree_add(mos_refd_objs, obj, 1); 8124 } 8125 8126 /* 8127 * Call on a MOS object that may already have been referenced. 8128 */ 8129 static void 8130 mos_obj_refd_multiple(uint64_t obj) 8131 { 8132 if (obj != 0 && mos_refd_objs != NULL && 8133 !zfs_range_tree_contains(mos_refd_objs, obj, 1)) 8134 zfs_range_tree_add(mos_refd_objs, obj, 1); 8135 } 8136 8137 static void 8138 mos_leak_vdev_top_zap(vdev_t *vd) 8139 { 8140 uint64_t ms_flush_data_obj; 8141 int error = zap_lookup(spa_meta_objset(vd->vdev_spa), 8142 vd->vdev_top_zap, VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS, 8143 sizeof (ms_flush_data_obj), 1, &ms_flush_data_obj); 8144 if (error == ENOENT) 8145 return; 8146 ASSERT0(error); 8147 8148 mos_obj_refd(ms_flush_data_obj); 8149 } 8150 8151 static void 8152 mos_leak_vdev(vdev_t *vd) 8153 { 8154 mos_obj_refd(vd->vdev_dtl_object); 8155 mos_obj_refd(vd->vdev_ms_array); 8156 mos_obj_refd(vd->vdev_indirect_config.vic_births_object); 8157 mos_obj_refd(vd->vdev_indirect_config.vic_mapping_object); 8158 mos_obj_refd(vd->vdev_leaf_zap); 8159 if (vd->vdev_checkpoint_sm != NULL) 8160 mos_obj_refd(vd->vdev_checkpoint_sm->sm_object); 8161 if (vd->vdev_indirect_mapping != NULL) { 8162 mos_obj_refd(vd->vdev_indirect_mapping-> 8163 vim_phys->vimp_counts_object); 8164 } 8165 if (vd->vdev_obsolete_sm != NULL) 8166 mos_obj_refd(vd->vdev_obsolete_sm->sm_object); 8167 8168 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 8169 metaslab_t *ms = vd->vdev_ms[m]; 8170 mos_obj_refd(space_map_object(ms->ms_sm)); 8171 } 8172 8173 if (vd->vdev_root_zap != 0) 8174 mos_obj_refd(vd->vdev_root_zap); 8175 8176 if (vd->vdev_top_zap != 0) { 8177 mos_obj_refd(vd->vdev_top_zap); 8178 mos_leak_vdev_top_zap(vd); 8179 } 8180 8181 for (uint64_t c = 0; c < vd->vdev_children; c++) { 8182 mos_leak_vdev(vd->vdev_child[c]); 8183 } 8184 } 8185 8186 static void 8187 mos_leak_log_spacemaps(spa_t *spa) 8188 { 8189 uint64_t spacemap_zap; 8190 int error = zap_lookup(spa_meta_objset(spa), 8191 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_LOG_SPACEMAP_ZAP, 8192 sizeof (spacemap_zap), 1, &spacemap_zap); 8193 if (error == ENOENT) 8194 return; 8195 ASSERT0(error); 8196 8197 mos_obj_refd(spacemap_zap); 8198 for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg); 8199 sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) 8200 mos_obj_refd(sls->sls_sm_obj); 8201 } 8202 8203 static void 8204 errorlog_count_refd(objset_t *mos, uint64_t errlog) 8205 { 8206 zap_cursor_t zc; 8207 zap_attribute_t *za = zap_attribute_alloc(); 8208 for (zap_cursor_init(&zc, mos, errlog); 8209 zap_cursor_retrieve(&zc, za) == 0; 8210 zap_cursor_advance(&zc)) { 8211 mos_obj_refd(za->za_first_integer); 8212 } 8213 zap_cursor_fini(&zc); 8214 zap_attribute_free(za); 8215 } 8216 8217 static int 8218 dump_mos_leaks(spa_t *spa) 8219 { 8220 int rv = 0; 8221 objset_t *mos = spa->spa_meta_objset; 8222 dsl_pool_t *dp = spa->spa_dsl_pool; 8223 8224 /* Visit and mark all referenced objects in the MOS */ 8225 8226 mos_obj_refd(DMU_POOL_DIRECTORY_OBJECT); 8227 mos_obj_refd(spa->spa_pool_props_object); 8228 mos_obj_refd(spa->spa_config_object); 8229 mos_obj_refd(spa->spa_ddt_stat_object); 8230 mos_obj_refd(spa->spa_feat_desc_obj); 8231 mos_obj_refd(spa->spa_feat_enabled_txg_obj); 8232 mos_obj_refd(spa->spa_feat_for_read_obj); 8233 mos_obj_refd(spa->spa_feat_for_write_obj); 8234 mos_obj_refd(spa->spa_history); 8235 mos_obj_refd(spa->spa_errlog_last); 8236 mos_obj_refd(spa->spa_errlog_scrub); 8237 8238 if (spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) { 8239 errorlog_count_refd(mos, spa->spa_errlog_last); 8240 errorlog_count_refd(mos, spa->spa_errlog_scrub); 8241 } 8242 8243 mos_obj_refd(spa->spa_all_vdev_zaps); 8244 mos_obj_refd(spa->spa_dsl_pool->dp_bptree_obj); 8245 mos_obj_refd(spa->spa_dsl_pool->dp_tmp_userrefs_obj); 8246 mos_obj_refd(spa->spa_dsl_pool->dp_scan->scn_phys.scn_queue_obj); 8247 bpobj_count_refd(&spa->spa_deferred_bpobj); 8248 mos_obj_refd(dp->dp_empty_bpobj); 8249 bpobj_count_refd(&dp->dp_obsolete_bpobj); 8250 bpobj_count_refd(&dp->dp_free_bpobj); 8251 mos_obj_refd(spa->spa_l2cache.sav_object); 8252 mos_obj_refd(spa->spa_spares.sav_object); 8253 8254 if (spa->spa_syncing_log_sm != NULL) 8255 mos_obj_refd(spa->spa_syncing_log_sm->sm_object); 8256 mos_leak_log_spacemaps(spa); 8257 8258 mos_obj_refd(spa->spa_condensing_indirect_phys. 8259 scip_next_mapping_object); 8260 mos_obj_refd(spa->spa_condensing_indirect_phys. 8261 scip_prev_obsolete_sm_object); 8262 if (spa->spa_condensing_indirect_phys.scip_next_mapping_object != 0) { 8263 vdev_indirect_mapping_t *vim = 8264 vdev_indirect_mapping_open(mos, 8265 spa->spa_condensing_indirect_phys.scip_next_mapping_object); 8266 mos_obj_refd(vim->vim_phys->vimp_counts_object); 8267 vdev_indirect_mapping_close(vim); 8268 } 8269 deleted_livelists_dump_mos(spa); 8270 8271 if (dp->dp_origin_snap != NULL) { 8272 dsl_dataset_t *ds; 8273 8274 dsl_pool_config_enter(dp, FTAG); 8275 VERIFY0(dsl_dataset_hold_obj(dp, 8276 dsl_dataset_phys(dp->dp_origin_snap)->ds_next_snap_obj, 8277 FTAG, &ds)); 8278 count_ds_mos_objects(ds); 8279 dump_blkptr_list(&ds->ds_deadlist, "Deadlist"); 8280 dsl_dataset_rele(ds, FTAG); 8281 dsl_pool_config_exit(dp, FTAG); 8282 8283 count_ds_mos_objects(dp->dp_origin_snap); 8284 dump_blkptr_list(&dp->dp_origin_snap->ds_deadlist, "Deadlist"); 8285 } 8286 count_dir_mos_objects(dp->dp_mos_dir); 8287 if (dp->dp_free_dir != NULL) 8288 count_dir_mos_objects(dp->dp_free_dir); 8289 if (dp->dp_leak_dir != NULL) 8290 count_dir_mos_objects(dp->dp_leak_dir); 8291 8292 mos_leak_vdev(spa->spa_root_vdev); 8293 8294 for (uint64_t c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 8295 ddt_t *ddt = spa->spa_ddt[c]; 8296 if (!ddt || ddt->ddt_version == DDT_VERSION_UNCONFIGURED) 8297 continue; 8298 8299 /* DDT store objects */ 8300 for (ddt_type_t type = 0; type < DDT_TYPES; type++) { 8301 for (ddt_class_t class = 0; class < DDT_CLASSES; 8302 class++) { 8303 mos_obj_refd(ddt->ddt_object[type][class]); 8304 } 8305 } 8306 8307 /* FDT container */ 8308 if (ddt->ddt_version == DDT_VERSION_FDT) 8309 mos_obj_refd(ddt->ddt_dir_object); 8310 8311 /* FDT log objects */ 8312 if (ddt->ddt_flags & DDT_FLAG_LOG) { 8313 mos_obj_refd(ddt->ddt_log[0].ddl_object); 8314 mos_obj_refd(ddt->ddt_log[1].ddl_object); 8315 } 8316 } 8317 8318 for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) { 8319 brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid]; 8320 if (brtvd->bv_initiated) { 8321 mos_obj_refd(brtvd->bv_mos_brtvdev); 8322 mos_obj_refd(brtvd->bv_mos_entries); 8323 } 8324 } 8325 8326 /* 8327 * Visit all allocated objects and make sure they are referenced. 8328 */ 8329 uint64_t object = 0; 8330 while (dmu_object_next(mos, &object, B_FALSE, 0) == 0) { 8331 if (zfs_range_tree_contains(mos_refd_objs, object, 1)) { 8332 zfs_range_tree_remove(mos_refd_objs, object, 1); 8333 } else { 8334 dmu_object_info_t doi; 8335 const char *name; 8336 VERIFY0(dmu_object_info(mos, object, &doi)); 8337 if (doi.doi_type & DMU_OT_NEWTYPE) { 8338 dmu_object_byteswap_t bswap = 8339 DMU_OT_BYTESWAP(doi.doi_type); 8340 name = dmu_ot_byteswap[bswap].ob_name; 8341 } else { 8342 name = dmu_ot[doi.doi_type].ot_name; 8343 } 8344 8345 (void) printf("MOS object %llu (%s) leaked\n", 8346 (u_longlong_t)object, name); 8347 rv = 2; 8348 } 8349 } 8350 (void) zfs_range_tree_walk(mos_refd_objs, mos_leaks_cb, NULL); 8351 if (!zfs_range_tree_is_empty(mos_refd_objs)) 8352 rv = 2; 8353 zfs_range_tree_vacate(mos_refd_objs, NULL, NULL); 8354 zfs_range_tree_destroy(mos_refd_objs); 8355 return (rv); 8356 } 8357 8358 typedef struct log_sm_obsolete_stats_arg { 8359 uint64_t lsos_current_txg; 8360 8361 uint64_t lsos_total_entries; 8362 uint64_t lsos_valid_entries; 8363 8364 uint64_t lsos_sm_entries; 8365 uint64_t lsos_valid_sm_entries; 8366 } log_sm_obsolete_stats_arg_t; 8367 8368 static int 8369 log_spacemap_obsolete_stats_cb(spa_t *spa, space_map_entry_t *sme, 8370 uint64_t txg, void *arg) 8371 { 8372 log_sm_obsolete_stats_arg_t *lsos = arg; 8373 8374 uint64_t offset = sme->sme_offset; 8375 uint64_t vdev_id = sme->sme_vdev; 8376 8377 if (lsos->lsos_current_txg == 0) { 8378 /* this is the first log */ 8379 lsos->lsos_current_txg = txg; 8380 } else if (lsos->lsos_current_txg < txg) { 8381 /* we just changed log - print stats and reset */ 8382 (void) printf("%-8llu valid entries out of %-8llu - txg %llu\n", 8383 (u_longlong_t)lsos->lsos_valid_sm_entries, 8384 (u_longlong_t)lsos->lsos_sm_entries, 8385 (u_longlong_t)lsos->lsos_current_txg); 8386 lsos->lsos_valid_sm_entries = 0; 8387 lsos->lsos_sm_entries = 0; 8388 lsos->lsos_current_txg = txg; 8389 } 8390 ASSERT3U(lsos->lsos_current_txg, ==, txg); 8391 8392 lsos->lsos_sm_entries++; 8393 lsos->lsos_total_entries++; 8394 8395 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 8396 if (!vdev_is_concrete(vd)) 8397 return (0); 8398 8399 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 8400 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE); 8401 8402 if (txg < metaslab_unflushed_txg(ms)) 8403 return (0); 8404 lsos->lsos_valid_sm_entries++; 8405 lsos->lsos_valid_entries++; 8406 return (0); 8407 } 8408 8409 static void 8410 dump_log_spacemap_obsolete_stats(spa_t *spa) 8411 { 8412 if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) 8413 return; 8414 8415 log_sm_obsolete_stats_arg_t lsos = {0}; 8416 8417 (void) printf("Log Space Map Obsolete Entry Statistics:\n"); 8418 8419 iterate_through_spacemap_logs(spa, 8420 log_spacemap_obsolete_stats_cb, &lsos); 8421 8422 /* print stats for latest log */ 8423 (void) printf("%-8llu valid entries out of %-8llu - txg %llu\n", 8424 (u_longlong_t)lsos.lsos_valid_sm_entries, 8425 (u_longlong_t)lsos.lsos_sm_entries, 8426 (u_longlong_t)lsos.lsos_current_txg); 8427 8428 (void) printf("%-8llu valid entries out of %-8llu - total\n\n", 8429 (u_longlong_t)lsos.lsos_valid_entries, 8430 (u_longlong_t)lsos.lsos_total_entries); 8431 } 8432 8433 static void 8434 dump_zpool(spa_t *spa) 8435 { 8436 dsl_pool_t *dp = spa_get_dsl(spa); 8437 int rc = 0; 8438 8439 if (dump_opt['y']) { 8440 livelist_metaslab_validate(spa); 8441 } 8442 8443 if (dump_opt['S']) { 8444 dump_simulated_ddt(spa); 8445 return; 8446 } 8447 8448 if (!dump_opt['e'] && dump_opt['C'] > 1) { 8449 (void) printf("\nCached configuration:\n"); 8450 dump_nvlist(spa->spa_config, 8); 8451 } 8452 8453 if (dump_opt['C']) 8454 dump_config(spa); 8455 8456 if (dump_opt['u']) 8457 dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n"); 8458 8459 if (dump_opt['D']) 8460 dump_all_ddts(spa); 8461 8462 if (dump_opt['T']) 8463 dump_brt(spa); 8464 8465 if (dump_opt['d'] > 2 || dump_opt['m']) 8466 dump_metaslabs(spa); 8467 if (dump_opt['M']) 8468 dump_metaslab_groups(spa, dump_opt['M'] > 1); 8469 if (dump_opt['d'] > 2 || dump_opt['m']) { 8470 dump_log_spacemaps(spa); 8471 dump_log_spacemap_obsolete_stats(spa); 8472 } 8473 8474 if (dump_opt['d'] || dump_opt['i']) { 8475 spa_feature_t f; 8476 mos_refd_objs = zfs_range_tree_create_flags( 8477 NULL, ZFS_RANGE_SEG64, NULL, 0, 0, 8478 0, "dump_zpool:mos_refd_objs"); 8479 dump_objset(dp->dp_meta_objset); 8480 8481 if (dump_opt['d'] >= 3) { 8482 dsl_pool_t *dp = spa->spa_dsl_pool; 8483 dump_full_bpobj(&spa->spa_deferred_bpobj, 8484 "Deferred frees", 0); 8485 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { 8486 dump_full_bpobj(&dp->dp_free_bpobj, 8487 "Pool snapshot frees", 0); 8488 } 8489 if (bpobj_is_open(&dp->dp_obsolete_bpobj)) { 8490 ASSERT(spa_feature_is_enabled(spa, 8491 SPA_FEATURE_DEVICE_REMOVAL)); 8492 dump_full_bpobj(&dp->dp_obsolete_bpobj, 8493 "Pool obsolete blocks", 0); 8494 } 8495 8496 if (spa_feature_is_active(spa, 8497 SPA_FEATURE_ASYNC_DESTROY)) { 8498 dump_bptree(spa->spa_meta_objset, 8499 dp->dp_bptree_obj, 8500 "Pool dataset frees"); 8501 } 8502 dump_dtl(spa->spa_root_vdev, 0); 8503 } 8504 8505 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) 8506 global_feature_count[f] = UINT64_MAX; 8507 global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS] = 0; 8508 global_feature_count[SPA_FEATURE_REDACTION_LIST_SPILL] = 0; 8509 global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN] = 0; 8510 global_feature_count[SPA_FEATURE_LIVELIST] = 0; 8511 8512 (void) dmu_objset_find(spa_name(spa), dump_one_objset, 8513 NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); 8514 8515 if (rc == 0 && !dump_opt['L']) 8516 rc = dump_mos_leaks(spa); 8517 8518 for (f = 0; f < SPA_FEATURES; f++) { 8519 uint64_t refcount; 8520 8521 uint64_t *arr; 8522 if (!(spa_feature_table[f].fi_flags & 8523 ZFEATURE_FLAG_PER_DATASET)) { 8524 if (global_feature_count[f] == UINT64_MAX) 8525 continue; 8526 if (!spa_feature_is_enabled(spa, f)) { 8527 ASSERT0(global_feature_count[f]); 8528 continue; 8529 } 8530 arr = global_feature_count; 8531 } else { 8532 if (!spa_feature_is_enabled(spa, f)) { 8533 ASSERT0(dataset_feature_count[f]); 8534 continue; 8535 } 8536 arr = dataset_feature_count; 8537 } 8538 if (feature_get_refcount(spa, &spa_feature_table[f], 8539 &refcount) == ENOTSUP) 8540 continue; 8541 if (arr[f] != refcount) { 8542 (void) printf("%s feature refcount mismatch: " 8543 "%lld consumers != %lld refcount\n", 8544 spa_feature_table[f].fi_uname, 8545 (longlong_t)arr[f], (longlong_t)refcount); 8546 rc = 2; 8547 } else { 8548 (void) printf("Verified %s feature refcount " 8549 "of %llu is correct\n", 8550 spa_feature_table[f].fi_uname, 8551 (longlong_t)refcount); 8552 } 8553 } 8554 8555 if (rc == 0) 8556 rc = verify_device_removal_feature_counts(spa); 8557 } 8558 8559 if (rc == 0 && (dump_opt['b'] || dump_opt['c'])) 8560 rc = dump_block_stats(spa); 8561 8562 if (rc == 0) 8563 rc = verify_spacemap_refcounts(spa); 8564 8565 if (dump_opt['s']) 8566 show_pool_stats(spa); 8567 8568 if (dump_opt['h']) 8569 dump_history(spa); 8570 8571 if (rc == 0) 8572 rc = verify_checkpoint(spa); 8573 8574 if (rc != 0) { 8575 dump_debug_buffer(); 8576 zdb_exit(rc); 8577 } 8578 } 8579 8580 #define ZDB_FLAG_CHECKSUM 0x0001 8581 #define ZDB_FLAG_DECOMPRESS 0x0002 8582 #define ZDB_FLAG_BSWAP 0x0004 8583 #define ZDB_FLAG_GBH 0x0008 8584 #define ZDB_FLAG_INDIRECT 0x0010 8585 #define ZDB_FLAG_RAW 0x0020 8586 #define ZDB_FLAG_PRINT_BLKPTR 0x0040 8587 #define ZDB_FLAG_VERBOSE 0x0080 8588 8589 static int flagbits[256]; 8590 static char flagbitstr[16]; 8591 8592 static void 8593 zdb_print_blkptr(const blkptr_t *bp, int flags) 8594 { 8595 char blkbuf[BP_SPRINTF_LEN]; 8596 8597 if (flags & ZDB_FLAG_BSWAP) 8598 byteswap_uint64_array((void *)bp, sizeof (blkptr_t)); 8599 8600 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 8601 (void) printf("%s\n", blkbuf); 8602 } 8603 8604 static void 8605 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags) 8606 { 8607 int i; 8608 8609 for (i = 0; i < nbps; i++) 8610 zdb_print_blkptr(&bp[i], flags); 8611 } 8612 8613 static void 8614 zdb_dump_gbh(void *buf, uint64_t size, int flags) 8615 { 8616 zdb_dump_indirect((blkptr_t *)buf, gbh_nblkptrs(size), flags); 8617 } 8618 8619 static void 8620 zdb_dump_block_raw(void *buf, uint64_t size, int flags) 8621 { 8622 if (flags & ZDB_FLAG_BSWAP) 8623 byteswap_uint64_array(buf, size); 8624 VERIFY(write(fileno(stdout), buf, size) == size); 8625 } 8626 8627 static void 8628 zdb_dump_block(char *label, void *buf, uint64_t size, int flags) 8629 { 8630 uint64_t *d = (uint64_t *)buf; 8631 unsigned nwords = size / sizeof (uint64_t); 8632 int do_bswap = !!(flags & ZDB_FLAG_BSWAP); 8633 unsigned i, j; 8634 const char *hdr; 8635 char *c; 8636 8637 8638 if (do_bswap) 8639 hdr = " 7 6 5 4 3 2 1 0 f e d c b a 9 8"; 8640 else 8641 hdr = " 0 1 2 3 4 5 6 7 8 9 a b c d e f"; 8642 8643 (void) printf("\n%s\n%6s %s 0123456789abcdef\n", label, "", hdr); 8644 8645 #ifdef _ZFS_LITTLE_ENDIAN 8646 /* correct the endianness */ 8647 do_bswap = !do_bswap; 8648 #endif 8649 for (i = 0; i < nwords; i += 2) { 8650 (void) printf("%06llx: %016llx %016llx ", 8651 (u_longlong_t)(i * sizeof (uint64_t)), 8652 (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]), 8653 (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1])); 8654 8655 c = (char *)&d[i]; 8656 for (j = 0; j < 2 * sizeof (uint64_t); j++) 8657 (void) printf("%c", isprint(c[j]) ? c[j] : '.'); 8658 (void) printf("\n"); 8659 } 8660 } 8661 8662 /* 8663 * There are two acceptable formats: 8664 * leaf_name - For example: c1t0d0 or /tmp/ztest.0a 8665 * child[.child]* - For example: 0.1.1 8666 * 8667 * The second form can be used to specify arbitrary vdevs anywhere 8668 * in the hierarchy. For example, in a pool with a mirror of 8669 * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 . 8670 */ 8671 static vdev_t * 8672 zdb_vdev_lookup(vdev_t *vdev, const char *path) 8673 { 8674 char *s, *p, *q; 8675 unsigned i; 8676 8677 if (vdev == NULL) 8678 return (NULL); 8679 8680 /* First, assume the x.x.x.x format */ 8681 i = strtoul(path, &s, 10); 8682 if (s == path || (s && *s != '.' && *s != '\0')) 8683 goto name; 8684 if (i >= vdev->vdev_children) 8685 return (NULL); 8686 8687 vdev = vdev->vdev_child[i]; 8688 if (s && *s == '\0') 8689 return (vdev); 8690 return (zdb_vdev_lookup(vdev, s+1)); 8691 8692 name: 8693 for (i = 0; i < vdev->vdev_children; i++) { 8694 vdev_t *vc = vdev->vdev_child[i]; 8695 8696 if (vc->vdev_path == NULL) { 8697 vc = zdb_vdev_lookup(vc, path); 8698 if (vc == NULL) 8699 continue; 8700 else 8701 return (vc); 8702 } 8703 8704 p = strrchr(vc->vdev_path, '/'); 8705 p = p ? p + 1 : vc->vdev_path; 8706 q = &vc->vdev_path[strlen(vc->vdev_path) - 2]; 8707 8708 if (strcmp(vc->vdev_path, path) == 0) 8709 return (vc); 8710 if (strcmp(p, path) == 0) 8711 return (vc); 8712 if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0) 8713 return (vc); 8714 } 8715 8716 return (NULL); 8717 } 8718 8719 static int 8720 name_from_objset_id(spa_t *spa, uint64_t objset_id, char *outstr) 8721 { 8722 dsl_dataset_t *ds; 8723 8724 dsl_pool_config_enter(spa->spa_dsl_pool, FTAG); 8725 int error = dsl_dataset_hold_obj(spa->spa_dsl_pool, objset_id, 8726 NULL, &ds); 8727 if (error != 0) { 8728 (void) fprintf(stderr, "failed to hold objset %llu: %s\n", 8729 (u_longlong_t)objset_id, strerror(error)); 8730 dsl_pool_config_exit(spa->spa_dsl_pool, FTAG); 8731 return (error); 8732 } 8733 dsl_dataset_name(ds, outstr); 8734 dsl_dataset_rele(ds, NULL); 8735 dsl_pool_config_exit(spa->spa_dsl_pool, FTAG); 8736 return (0); 8737 } 8738 8739 static boolean_t 8740 zdb_parse_block_sizes(char *sizes, uint64_t *lsize, uint64_t *psize) 8741 { 8742 char *s0, *s1, *tmp = NULL; 8743 8744 if (sizes == NULL) 8745 return (B_FALSE); 8746 8747 s0 = strtok_r(sizes, "/", &tmp); 8748 if (s0 == NULL) 8749 return (B_FALSE); 8750 s1 = strtok_r(NULL, "/", &tmp); 8751 *lsize = strtoull(s0, NULL, 16); 8752 *psize = s1 ? strtoull(s1, NULL, 16) : *lsize; 8753 return (*lsize >= *psize && *psize > 0); 8754 } 8755 8756 #define ZIO_COMPRESS_MASK(alg) (1ULL << (ZIO_COMPRESS_##alg)) 8757 8758 static boolean_t 8759 try_decompress_block(abd_t *pabd, uint64_t lsize, uint64_t psize, 8760 int flags, int cfunc, void *lbuf, void *lbuf2) 8761 { 8762 if (flags & ZDB_FLAG_VERBOSE) { 8763 (void) fprintf(stderr, 8764 "Trying %05llx -> %05llx (%s)\n", 8765 (u_longlong_t)psize, 8766 (u_longlong_t)lsize, 8767 zio_compress_table[cfunc].ci_name); 8768 } 8769 8770 /* 8771 * We set lbuf to all zeros and lbuf2 to all 8772 * ones, then decompress to both buffers and 8773 * compare their contents. This way we can 8774 * know if decompression filled exactly to 8775 * lsize or if it left some bytes unwritten. 8776 */ 8777 8778 memset(lbuf, 0x00, lsize); 8779 memset(lbuf2, 0xff, lsize); 8780 8781 abd_t labd, labd2; 8782 abd_get_from_buf_struct(&labd, lbuf, lsize); 8783 abd_get_from_buf_struct(&labd2, lbuf2, lsize); 8784 8785 boolean_t ret = B_FALSE; 8786 if (zio_decompress_data(cfunc, pabd, 8787 &labd, psize, lsize, NULL) == 0 && 8788 zio_decompress_data(cfunc, pabd, 8789 &labd2, psize, lsize, NULL) == 0 && 8790 memcmp(lbuf, lbuf2, lsize) == 0) 8791 ret = B_TRUE; 8792 8793 abd_free(&labd2); 8794 abd_free(&labd); 8795 8796 return (ret); 8797 } 8798 8799 static uint64_t 8800 zdb_decompress_block(abd_t *pabd, void *buf, void *lbuf, uint64_t lsize, 8801 uint64_t psize, int flags) 8802 { 8803 (void) buf; 8804 uint64_t orig_lsize = lsize; 8805 boolean_t tryzle = ((getenv("ZDB_NO_ZLE") == NULL)); 8806 /* 8807 * We don't know how the data was compressed, so just try 8808 * every decompress function at every inflated blocksize. 8809 */ 8810 void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 8811 int cfuncs[ZIO_COMPRESS_FUNCTIONS] = { 0 }; 8812 int *cfuncp = cfuncs; 8813 uint64_t maxlsize = SPA_MAXBLOCKSIZE; 8814 uint64_t mask = ZIO_COMPRESS_MASK(ON) | ZIO_COMPRESS_MASK(OFF) | 8815 ZIO_COMPRESS_MASK(INHERIT) | ZIO_COMPRESS_MASK(EMPTY) | 8816 ZIO_COMPRESS_MASK(ZLE); 8817 *cfuncp++ = ZIO_COMPRESS_LZ4; 8818 *cfuncp++ = ZIO_COMPRESS_LZJB; 8819 mask |= ZIO_COMPRESS_MASK(LZ4) | ZIO_COMPRESS_MASK(LZJB); 8820 /* 8821 * Every gzip level has the same decompressor, no need to 8822 * run it 9 times per bruteforce attempt. 8823 */ 8824 mask |= ZIO_COMPRESS_MASK(GZIP_2) | ZIO_COMPRESS_MASK(GZIP_3); 8825 mask |= ZIO_COMPRESS_MASK(GZIP_4) | ZIO_COMPRESS_MASK(GZIP_5); 8826 mask |= ZIO_COMPRESS_MASK(GZIP_6) | ZIO_COMPRESS_MASK(GZIP_7); 8827 mask |= ZIO_COMPRESS_MASK(GZIP_8) | ZIO_COMPRESS_MASK(GZIP_9); 8828 for (int c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) 8829 if (((1ULL << c) & mask) == 0) 8830 *cfuncp++ = c; 8831 8832 /* 8833 * On the one hand, with SPA_MAXBLOCKSIZE at 16MB, this 8834 * could take a while and we should let the user know 8835 * we are not stuck. On the other hand, printing progress 8836 * info gets old after a while. User can specify 'v' flag 8837 * to see the progression. 8838 */ 8839 if (lsize == psize) 8840 lsize += SPA_MINBLOCKSIZE; 8841 else 8842 maxlsize = lsize; 8843 8844 for (; lsize <= maxlsize; lsize += SPA_MINBLOCKSIZE) { 8845 for (cfuncp = cfuncs; *cfuncp; cfuncp++) { 8846 if (try_decompress_block(pabd, lsize, psize, flags, 8847 *cfuncp, lbuf, lbuf2)) { 8848 tryzle = B_FALSE; 8849 break; 8850 } 8851 } 8852 if (*cfuncp != 0) 8853 break; 8854 } 8855 if (tryzle) { 8856 for (lsize = orig_lsize; lsize <= maxlsize; 8857 lsize += SPA_MINBLOCKSIZE) { 8858 if (try_decompress_block(pabd, lsize, psize, flags, 8859 ZIO_COMPRESS_ZLE, lbuf, lbuf2)) { 8860 *cfuncp = ZIO_COMPRESS_ZLE; 8861 break; 8862 } 8863 } 8864 } 8865 umem_free(lbuf2, SPA_MAXBLOCKSIZE); 8866 8867 if (*cfuncp == ZIO_COMPRESS_ZLE) { 8868 printf("\nZLE decompression was selected. If you " 8869 "suspect the results are wrong,\ntry avoiding ZLE " 8870 "by setting and exporting ZDB_NO_ZLE=\"true\"\n"); 8871 } 8872 8873 return (lsize > maxlsize ? -1 : lsize); 8874 } 8875 8876 /* 8877 * Read a block from a pool and print it out. The syntax of the 8878 * block descriptor is: 8879 * 8880 * pool:vdev_specifier:offset:[lsize/]psize[:flags] 8881 * 8882 * pool - The name of the pool you wish to read from 8883 * vdev_specifier - Which vdev (see comment for zdb_vdev_lookup) 8884 * offset - offset, in hex, in bytes 8885 * size - Amount of data to read, in hex, in bytes 8886 * flags - A string of characters specifying options 8887 * b: Decode a blkptr at given offset within block 8888 * c: Calculate and display checksums 8889 * d: Decompress data before dumping 8890 * e: Byteswap data before dumping 8891 * g: Display data as a gang block header 8892 * i: Display as an indirect block 8893 * r: Dump raw data to stdout 8894 * v: Verbose 8895 * 8896 */ 8897 static void 8898 zdb_read_block(char *thing, spa_t *spa) 8899 { 8900 blkptr_t blk, *bp = &blk; 8901 dva_t *dva = bp->blk_dva; 8902 int flags = 0; 8903 uint64_t offset = 0, psize = 0, lsize = 0, blkptr_offset = 0; 8904 zio_t *zio; 8905 vdev_t *vd; 8906 abd_t *pabd; 8907 void *lbuf, *buf; 8908 char *s, *p, *dup, *flagstr, *sizes, *tmp = NULL; 8909 const char *vdev, *errmsg = NULL; 8910 int i, len, error; 8911 boolean_t borrowed = B_FALSE, found = B_FALSE; 8912 8913 dup = strdup(thing); 8914 s = strtok_r(dup, ":", &tmp); 8915 vdev = s ?: ""; 8916 s = strtok_r(NULL, ":", &tmp); 8917 offset = strtoull(s ? s : "", NULL, 16); 8918 sizes = strtok_r(NULL, ":", &tmp); 8919 s = strtok_r(NULL, ":", &tmp); 8920 flagstr = strdup(s ?: ""); 8921 8922 if (!zdb_parse_block_sizes(sizes, &lsize, &psize)) 8923 errmsg = "invalid size(s)"; 8924 if (!IS_P2ALIGNED(psize, DEV_BSIZE) || !IS_P2ALIGNED(lsize, DEV_BSIZE)) 8925 errmsg = "size must be a multiple of sector size"; 8926 if (!IS_P2ALIGNED(offset, DEV_BSIZE)) 8927 errmsg = "offset must be a multiple of sector size"; 8928 if (errmsg) { 8929 (void) printf("Invalid block specifier: %s - %s\n", 8930 thing, errmsg); 8931 goto done; 8932 } 8933 8934 tmp = NULL; 8935 for (s = strtok_r(flagstr, ":", &tmp); 8936 s != NULL; 8937 s = strtok_r(NULL, ":", &tmp)) { 8938 len = strlen(flagstr); 8939 for (i = 0; i < len; i++) { 8940 int bit = flagbits[(uchar_t)flagstr[i]]; 8941 8942 if (bit == 0) { 8943 (void) printf("***Ignoring flag: %c\n", 8944 (uchar_t)flagstr[i]); 8945 continue; 8946 } 8947 found = B_TRUE; 8948 flags |= bit; 8949 8950 p = &flagstr[i + 1]; 8951 if (*p != ':' && *p != '\0') { 8952 int j = 0, nextbit = flagbits[(uchar_t)*p]; 8953 char *end, offstr[8] = { 0 }; 8954 if ((bit == ZDB_FLAG_PRINT_BLKPTR) && 8955 (nextbit == 0)) { 8956 /* look ahead to isolate the offset */ 8957 while (nextbit == 0 && 8958 strchr(flagbitstr, *p) == NULL) { 8959 offstr[j] = *p; 8960 j++; 8961 if (i + j > strlen(flagstr)) 8962 break; 8963 p++; 8964 nextbit = flagbits[(uchar_t)*p]; 8965 } 8966 blkptr_offset = strtoull(offstr, &end, 8967 16); 8968 i += j; 8969 } else if (nextbit == 0) { 8970 (void) printf("***Ignoring flag arg:" 8971 " '%c'\n", (uchar_t)*p); 8972 } 8973 } 8974 } 8975 } 8976 if (blkptr_offset % sizeof (blkptr_t)) { 8977 printf("Block pointer offset 0x%llx " 8978 "must be divisible by 0x%x\n", 8979 (longlong_t)blkptr_offset, (int)sizeof (blkptr_t)); 8980 goto done; 8981 } 8982 if (found == B_FALSE && strlen(flagstr) > 0) { 8983 printf("Invalid flag arg: '%s'\n", flagstr); 8984 goto done; 8985 } 8986 8987 vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev); 8988 if (vd == NULL) { 8989 (void) printf("***Invalid vdev: %s\n", vdev); 8990 goto done; 8991 } else { 8992 if (vd->vdev_path) 8993 (void) fprintf(stderr, "Found vdev: %s\n", 8994 vd->vdev_path); 8995 else 8996 (void) fprintf(stderr, "Found vdev type: %s\n", 8997 vd->vdev_ops->vdev_op_type); 8998 } 8999 9000 pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE); 9001 lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 9002 9003 BP_ZERO(bp); 9004 9005 DVA_SET_VDEV(&dva[0], vd->vdev_id); 9006 DVA_SET_OFFSET(&dva[0], offset); 9007 DVA_SET_GANG(&dva[0], 0); 9008 DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize)); 9009 9010 BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL); 9011 9012 BP_SET_LSIZE(bp, lsize); 9013 BP_SET_PSIZE(bp, psize); 9014 BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF); 9015 BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF); 9016 BP_SET_TYPE(bp, DMU_OT_NONE); 9017 BP_SET_LEVEL(bp, 0); 9018 BP_SET_DEDUP(bp, 0); 9019 BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); 9020 9021 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 9022 zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 9023 9024 if (vd == vd->vdev_top) { 9025 /* 9026 * Treat this as a normal block read. 9027 */ 9028 zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL, 9029 ZIO_PRIORITY_SYNC_READ, 9030 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL)); 9031 } else { 9032 /* 9033 * Treat this as a vdev child I/O. 9034 */ 9035 zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd, 9036 psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, 9037 ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY | 9038 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | ZIO_FLAG_OPTIONAL, 9039 NULL, NULL)); 9040 } 9041 9042 error = zio_wait(zio); 9043 spa_config_exit(spa, SCL_STATE, FTAG); 9044 9045 if (error) { 9046 (void) printf("Read of %s failed, error: %d\n", thing, error); 9047 goto out; 9048 } 9049 9050 uint64_t orig_lsize = lsize; 9051 buf = lbuf; 9052 if (flags & ZDB_FLAG_DECOMPRESS) { 9053 lsize = zdb_decompress_block(pabd, buf, lbuf, 9054 lsize, psize, flags); 9055 if (lsize == -1) { 9056 (void) printf("Decompress of %s failed\n", thing); 9057 goto out; 9058 } 9059 } else { 9060 buf = abd_borrow_buf_copy(pabd, lsize); 9061 borrowed = B_TRUE; 9062 } 9063 /* 9064 * Try to detect invalid block pointer. If invalid, try 9065 * decompressing. 9066 */ 9067 if ((flags & ZDB_FLAG_PRINT_BLKPTR || flags & ZDB_FLAG_INDIRECT) && 9068 !(flags & ZDB_FLAG_DECOMPRESS)) { 9069 const blkptr_t *b = (const blkptr_t *)(void *) 9070 ((uintptr_t)buf + (uintptr_t)blkptr_offset); 9071 if (zfs_blkptr_verify(spa, b, 9072 BLK_CONFIG_NEEDED, BLK_VERIFY_ONLY)) { 9073 abd_return_buf_copy(pabd, buf, lsize); 9074 borrowed = B_FALSE; 9075 buf = lbuf; 9076 lsize = zdb_decompress_block(pabd, buf, 9077 lbuf, lsize, psize, flags); 9078 b = (const blkptr_t *)(void *) 9079 ((uintptr_t)buf + (uintptr_t)blkptr_offset); 9080 if (lsize == -1 || zfs_blkptr_verify(spa, b, 9081 BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) { 9082 printf("invalid block pointer at this DVA\n"); 9083 goto out; 9084 } 9085 } 9086 } 9087 9088 if (flags & ZDB_FLAG_PRINT_BLKPTR) 9089 zdb_print_blkptr((blkptr_t *)(void *) 9090 ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags); 9091 else if (flags & ZDB_FLAG_RAW) 9092 zdb_dump_block_raw(buf, lsize, flags); 9093 else if (flags & ZDB_FLAG_INDIRECT) 9094 zdb_dump_indirect((blkptr_t *)buf, 9095 orig_lsize / sizeof (blkptr_t), flags); 9096 else if (flags & ZDB_FLAG_GBH) 9097 zdb_dump_gbh(buf, lsize, flags); 9098 else 9099 zdb_dump_block(thing, buf, lsize, flags); 9100 9101 /* 9102 * If :c was specified, iterate through the checksum table to 9103 * calculate and display each checksum for our specified 9104 * DVA and length. 9105 */ 9106 if ((flags & ZDB_FLAG_CHECKSUM) && !(flags & ZDB_FLAG_RAW) && 9107 !(flags & ZDB_FLAG_GBH)) { 9108 zio_t *czio; 9109 (void) printf("\n"); 9110 for (enum zio_checksum ck = ZIO_CHECKSUM_LABEL; 9111 ck < ZIO_CHECKSUM_FUNCTIONS; ck++) { 9112 9113 if ((zio_checksum_table[ck].ci_flags & 9114 ZCHECKSUM_FLAG_EMBEDDED) || 9115 ck == ZIO_CHECKSUM_NOPARITY) { 9116 continue; 9117 } 9118 BP_SET_CHECKSUM(bp, ck); 9119 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 9120 czio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 9121 if (vd == vd->vdev_top) { 9122 zio_nowait(zio_read(czio, spa, bp, pabd, psize, 9123 NULL, NULL, 9124 ZIO_PRIORITY_SYNC_READ, 9125 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | 9126 ZIO_FLAG_DONT_RETRY, NULL)); 9127 } else { 9128 zio_nowait(zio_vdev_child_io(czio, bp, vd, 9129 offset, pabd, psize, ZIO_TYPE_READ, 9130 ZIO_PRIORITY_SYNC_READ, 9131 ZIO_FLAG_DONT_PROPAGATE | 9132 ZIO_FLAG_DONT_RETRY | 9133 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | 9134 ZIO_FLAG_SPECULATIVE | 9135 ZIO_FLAG_OPTIONAL, NULL, NULL)); 9136 } 9137 error = zio_wait(czio); 9138 if (error == 0 || error == ECKSUM) { 9139 zio_t *ck_zio = zio_null(NULL, spa, NULL, 9140 NULL, NULL, 0); 9141 ck_zio->io_offset = 9142 DVA_GET_OFFSET(&bp->blk_dva[0]); 9143 ck_zio->io_bp = bp; 9144 zio_checksum_compute(ck_zio, ck, pabd, psize); 9145 printf( 9146 "%12s\t" 9147 "cksum=%016llx:%016llx:%016llx:%016llx\n", 9148 zio_checksum_table[ck].ci_name, 9149 (u_longlong_t)bp->blk_cksum.zc_word[0], 9150 (u_longlong_t)bp->blk_cksum.zc_word[1], 9151 (u_longlong_t)bp->blk_cksum.zc_word[2], 9152 (u_longlong_t)bp->blk_cksum.zc_word[3]); 9153 zio_wait(ck_zio); 9154 } else { 9155 printf("error %d reading block\n", error); 9156 } 9157 spa_config_exit(spa, SCL_STATE, FTAG); 9158 } 9159 } 9160 9161 if (borrowed) 9162 abd_return_buf_copy(pabd, buf, lsize); 9163 9164 out: 9165 abd_free(pabd); 9166 umem_free(lbuf, SPA_MAXBLOCKSIZE); 9167 done: 9168 free(flagstr); 9169 free(dup); 9170 } 9171 9172 static void 9173 zdb_embedded_block(char *thing) 9174 { 9175 blkptr_t bp = {{{{0}}}}; 9176 unsigned long long *words = (void *)&bp; 9177 char *buf; 9178 int err; 9179 9180 err = sscanf(thing, "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx:" 9181 "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx", 9182 words + 0, words + 1, words + 2, words + 3, 9183 words + 4, words + 5, words + 6, words + 7, 9184 words + 8, words + 9, words + 10, words + 11, 9185 words + 12, words + 13, words + 14, words + 15); 9186 if (err != 16) { 9187 (void) fprintf(stderr, "invalid input format\n"); 9188 zdb_exit(1); 9189 } 9190 ASSERT3U(BPE_GET_LSIZE(&bp), <=, SPA_MAXBLOCKSIZE); 9191 buf = malloc(SPA_MAXBLOCKSIZE); 9192 if (buf == NULL) { 9193 (void) fprintf(stderr, "out of memory\n"); 9194 zdb_exit(1); 9195 } 9196 err = decode_embedded_bp(&bp, buf, BPE_GET_LSIZE(&bp)); 9197 if (err != 0) { 9198 (void) fprintf(stderr, "decode failed: %u\n", err); 9199 zdb_exit(1); 9200 } 9201 zdb_dump_block_raw(buf, BPE_GET_LSIZE(&bp), 0); 9202 free(buf); 9203 } 9204 9205 /* check for valid hex or decimal numeric string */ 9206 static boolean_t 9207 zdb_numeric(char *str) 9208 { 9209 int i = 0, len; 9210 9211 len = strlen(str); 9212 if (len == 0) 9213 return (B_FALSE); 9214 if (strncmp(str, "0x", 2) == 0 || strncmp(str, "0X", 2) == 0) 9215 i = 2; 9216 for (; i < len; i++) { 9217 if (!isxdigit(str[i])) 9218 return (B_FALSE); 9219 } 9220 return (B_TRUE); 9221 } 9222 9223 static int 9224 dummy_get_file_info(dmu_object_type_t bonustype, const void *data, 9225 zfs_file_info_t *zoi) 9226 { 9227 (void) data, (void) zoi; 9228 9229 if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA) 9230 return (ENOENT); 9231 9232 (void) fprintf(stderr, "dummy_get_file_info: not implemented"); 9233 abort(); 9234 } 9235 9236 int 9237 main(int argc, char **argv) 9238 { 9239 int c; 9240 int dump_all = 1; 9241 int verbose = 0; 9242 int error = 0; 9243 char **searchdirs = NULL; 9244 int nsearch = 0; 9245 char *target, *target_pool, dsname[ZFS_MAX_DATASET_NAME_LEN]; 9246 nvlist_t *policy = NULL; 9247 uint64_t max_txg = UINT64_MAX; 9248 int64_t objset_id = -1; 9249 uint64_t object; 9250 int flags = ZFS_IMPORT_MISSING_LOG; 9251 int rewind = ZPOOL_NEVER_REWIND; 9252 char *spa_config_path_env, *objset_str; 9253 boolean_t target_is_spa = B_TRUE, dataset_lookup = B_FALSE; 9254 nvlist_t *cfg = NULL; 9255 struct sigaction action; 9256 boolean_t force_import = B_FALSE; 9257 boolean_t config_path_console = B_FALSE; 9258 char pbuf[MAXPATHLEN]; 9259 9260 dprintf_setup(&argc, argv); 9261 9262 /* 9263 * Set up signal handlers, so if we crash due to bad on-disk data we 9264 * can get more info. Unlike ztest, we don't bail out if we can't set 9265 * up signal handlers, because zdb is very useful without them. 9266 */ 9267 action.sa_handler = sig_handler; 9268 sigemptyset(&action.sa_mask); 9269 action.sa_flags = 0; 9270 if (sigaction(SIGSEGV, &action, NULL) < 0) { 9271 (void) fprintf(stderr, "zdb: cannot catch SIGSEGV: %s\n", 9272 strerror(errno)); 9273 } 9274 if (sigaction(SIGABRT, &action, NULL) < 0) { 9275 (void) fprintf(stderr, "zdb: cannot catch SIGABRT: %s\n", 9276 strerror(errno)); 9277 } 9278 9279 /* 9280 * If there is an environment variable SPA_CONFIG_PATH it overrides 9281 * default spa_config_path setting. If -U flag is specified it will 9282 * override this environment variable settings once again. 9283 */ 9284 spa_config_path_env = getenv("SPA_CONFIG_PATH"); 9285 if (spa_config_path_env != NULL) 9286 spa_config_path = spa_config_path_env; 9287 9288 /* 9289 * For performance reasons, we set this tunable down. We do so before 9290 * the arg parsing section so that the user can override this value if 9291 * they choose. 9292 */ 9293 zfs_btree_verify_intensity = 3; 9294 9295 struct option long_options[] = { 9296 {"ignore-assertions", no_argument, NULL, 'A'}, 9297 {"block-stats", no_argument, NULL, 'b'}, 9298 {"backup", no_argument, NULL, 'B'}, 9299 {"checksum", no_argument, NULL, 'c'}, 9300 {"config", no_argument, NULL, 'C'}, 9301 {"datasets", no_argument, NULL, 'd'}, 9302 {"dedup-stats", no_argument, NULL, 'D'}, 9303 {"exported", no_argument, NULL, 'e'}, 9304 {"embedded-block-pointer", no_argument, NULL, 'E'}, 9305 {"automatic-rewind", no_argument, NULL, 'F'}, 9306 {"dump-debug-msg", no_argument, NULL, 'G'}, 9307 {"history", no_argument, NULL, 'h'}, 9308 {"intent-logs", no_argument, NULL, 'i'}, 9309 {"inflight", required_argument, NULL, 'I'}, 9310 {"checkpointed-state", no_argument, NULL, 'k'}, 9311 {"key", required_argument, NULL, 'K'}, 9312 {"label", no_argument, NULL, 'l'}, 9313 {"disable-leak-tracking", no_argument, NULL, 'L'}, 9314 {"metaslabs", no_argument, NULL, 'm'}, 9315 {"metaslab-groups", no_argument, NULL, 'M'}, 9316 {"numeric", no_argument, NULL, 'N'}, 9317 {"option", required_argument, NULL, 'o'}, 9318 {"object-lookups", no_argument, NULL, 'O'}, 9319 {"path", required_argument, NULL, 'p'}, 9320 {"parseable", no_argument, NULL, 'P'}, 9321 {"skip-label", no_argument, NULL, 'q'}, 9322 {"copy-object", no_argument, NULL, 'r'}, 9323 {"read-block", no_argument, NULL, 'R'}, 9324 {"io-stats", no_argument, NULL, 's'}, 9325 {"simulate-dedup", no_argument, NULL, 'S'}, 9326 {"txg", required_argument, NULL, 't'}, 9327 {"brt-stats", no_argument, NULL, 'T'}, 9328 {"uberblock", no_argument, NULL, 'u'}, 9329 {"cachefile", required_argument, NULL, 'U'}, 9330 {"verbose", no_argument, NULL, 'v'}, 9331 {"verbatim", no_argument, NULL, 'V'}, 9332 {"dump-blocks", required_argument, NULL, 'x'}, 9333 {"extreme-rewind", no_argument, NULL, 'X'}, 9334 {"all-reconstruction", no_argument, NULL, 'Y'}, 9335 {"livelist", no_argument, NULL, 'y'}, 9336 {"zstd-headers", no_argument, NULL, 'Z'}, 9337 {0, 0, 0, 0} 9338 }; 9339 9340 while ((c = getopt_long(argc, argv, 9341 "AbBcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:TuU:vVx:XYyZ", 9342 long_options, NULL)) != -1) { 9343 switch (c) { 9344 case 'b': 9345 case 'B': 9346 case 'c': 9347 case 'C': 9348 case 'd': 9349 case 'D': 9350 case 'E': 9351 case 'G': 9352 case 'h': 9353 case 'i': 9354 case 'l': 9355 case 'm': 9356 case 'M': 9357 case 'N': 9358 case 'O': 9359 case 'r': 9360 case 'R': 9361 case 's': 9362 case 'S': 9363 case 'T': 9364 case 'u': 9365 case 'y': 9366 case 'Z': 9367 dump_opt[c]++; 9368 dump_all = 0; 9369 break; 9370 case 'A': 9371 case 'e': 9372 case 'F': 9373 case 'k': 9374 case 'L': 9375 case 'P': 9376 case 'q': 9377 case 'X': 9378 dump_opt[c]++; 9379 break; 9380 case 'Y': 9381 zfs_reconstruct_indirect_combinations_max = INT_MAX; 9382 zfs_deadman_enabled = 0; 9383 break; 9384 /* NB: Sort single match options below. */ 9385 case 'I': 9386 max_inflight_bytes = strtoull(optarg, NULL, 0); 9387 if (max_inflight_bytes == 0) { 9388 (void) fprintf(stderr, "maximum number " 9389 "of inflight bytes must be greater " 9390 "than 0\n"); 9391 usage(); 9392 } 9393 break; 9394 case 'K': 9395 dump_opt[c]++; 9396 key_material = strdup(optarg); 9397 /* redact key material in process table */ 9398 while (*optarg != '\0') { *optarg++ = '*'; } 9399 break; 9400 case 'o': 9401 dump_opt[c]++; 9402 dump_all = 0; 9403 error = handle_tunable_option(optarg, B_FALSE); 9404 if (error != 0) 9405 zdb_exit(1); 9406 break; 9407 case 'p': 9408 if (searchdirs == NULL) { 9409 searchdirs = umem_alloc(sizeof (char *), 9410 UMEM_NOFAIL); 9411 } else { 9412 char **tmp = umem_alloc((nsearch + 1) * 9413 sizeof (char *), UMEM_NOFAIL); 9414 memcpy(tmp, searchdirs, nsearch * 9415 sizeof (char *)); 9416 umem_free(searchdirs, 9417 nsearch * sizeof (char *)); 9418 searchdirs = tmp; 9419 } 9420 searchdirs[nsearch++] = optarg; 9421 break; 9422 case 't': 9423 max_txg = strtoull(optarg, NULL, 0); 9424 if (max_txg < TXG_INITIAL) { 9425 (void) fprintf(stderr, "incorrect txg " 9426 "specified: %s\n", optarg); 9427 usage(); 9428 } 9429 break; 9430 case 'U': 9431 config_path_console = B_TRUE; 9432 spa_config_path = optarg; 9433 if (spa_config_path[0] != '/') { 9434 (void) fprintf(stderr, 9435 "cachefile must be an absolute path " 9436 "(i.e. start with a slash)\n"); 9437 usage(); 9438 } 9439 break; 9440 case 'v': 9441 verbose++; 9442 break; 9443 case 'V': 9444 flags = ZFS_IMPORT_VERBATIM; 9445 break; 9446 case 'x': 9447 vn_dumpdir = optarg; 9448 break; 9449 default: 9450 usage(); 9451 break; 9452 } 9453 } 9454 9455 if (!dump_opt['e'] && searchdirs != NULL) { 9456 (void) fprintf(stderr, "-p option requires use of -e\n"); 9457 usage(); 9458 } 9459 #if defined(_LP64) 9460 /* 9461 * ZDB does not typically re-read blocks; therefore limit the ARC 9462 * to 256 MB, which can be used entirely for metadata. 9463 */ 9464 zfs_arc_min = 2ULL << SPA_MAXBLOCKSHIFT; 9465 zfs_arc_max = 256 * 1024 * 1024; 9466 #endif 9467 9468 /* 9469 * "zdb -c" uses checksum-verifying scrub i/os which are async reads. 9470 * "zdb -b" uses traversal prefetch which uses async reads. 9471 * For good performance, let several of them be active at once. 9472 */ 9473 zfs_vdev_async_read_max_active = 10; 9474 9475 /* 9476 * Disable reference tracking for better performance. 9477 */ 9478 reference_tracking_enable = B_FALSE; 9479 9480 /* 9481 * Do not fail spa_load when spa_load_verify fails. This is needed 9482 * to load non-idle pools. 9483 */ 9484 spa_load_verify_dryrun = B_TRUE; 9485 9486 /* 9487 * ZDB should have ability to read spacemaps. 9488 */ 9489 spa_mode_readable_spacemaps = B_TRUE; 9490 9491 if (dump_all) 9492 verbose = MAX(verbose, 1); 9493 9494 for (c = 0; c < 256; c++) { 9495 if (dump_all && strchr("ABeEFkKlLNOPrRSXy", c) == NULL) 9496 dump_opt[c] = 1; 9497 if (dump_opt[c]) 9498 dump_opt[c] += verbose; 9499 } 9500 9501 libspl_set_assert_ok((dump_opt['A'] == 1) || (dump_opt['A'] > 2)); 9502 zfs_recover = (dump_opt['A'] > 1); 9503 9504 argc -= optind; 9505 argv += optind; 9506 if (argc < 2 && dump_opt['R']) 9507 usage(); 9508 9509 target = argv[0]; 9510 9511 /* 9512 * Automate cachefile 9513 */ 9514 if (!spa_config_path_env && !config_path_console && target && 9515 libzfs_core_init() == 0) { 9516 char *pname = strdup(target); 9517 const char *value; 9518 nvlist_t *pnvl = NULL; 9519 nvlist_t *vnvl = NULL; 9520 9521 if (strpbrk(pname, "/@") != NULL) 9522 *strpbrk(pname, "/@") = '\0'; 9523 9524 if (pname && lzc_get_props(pname, &pnvl) == 0) { 9525 if (nvlist_lookup_nvlist(pnvl, "cachefile", 9526 &vnvl) == 0) { 9527 value = fnvlist_lookup_string(vnvl, 9528 ZPROP_VALUE); 9529 } else { 9530 value = "-"; 9531 } 9532 strlcpy(pbuf, value, sizeof (pbuf)); 9533 if (pbuf[0] != '\0') { 9534 if (pbuf[0] == '/') { 9535 if (access(pbuf, F_OK) == 0) 9536 spa_config_path = pbuf; 9537 else 9538 force_import = B_TRUE; 9539 } else if ((strcmp(pbuf, "-") == 0 && 9540 access(ZPOOL_CACHE, F_OK) != 0) || 9541 strcmp(pbuf, "none") == 0) { 9542 force_import = B_TRUE; 9543 } 9544 } 9545 nvlist_free(vnvl); 9546 } 9547 9548 free(pname); 9549 nvlist_free(pnvl); 9550 libzfs_core_fini(); 9551 } 9552 9553 dmu_objset_register_type(DMU_OST_ZFS, dummy_get_file_info); 9554 kernel_init(SPA_MODE_READ); 9555 kernel_init_done = B_TRUE; 9556 9557 if (dump_opt['E']) { 9558 if (argc != 1) 9559 usage(); 9560 zdb_embedded_block(argv[0]); 9561 error = 0; 9562 goto fini; 9563 } 9564 9565 if (argc < 1) { 9566 if (!dump_opt['e'] && dump_opt['C']) { 9567 dump_cachefile(spa_config_path); 9568 error = 0; 9569 goto fini; 9570 } 9571 if (dump_opt['o']) 9572 /* 9573 * Avoid blasting tunable options off the top of the 9574 * screen. 9575 */ 9576 zdb_exit(1); 9577 usage(); 9578 } 9579 9580 if (dump_opt['l']) { 9581 error = dump_label(argv[0]); 9582 goto fini; 9583 } 9584 9585 if (dump_opt['X'] || dump_opt['F']) 9586 rewind = ZPOOL_DO_REWIND | 9587 (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0); 9588 9589 /* -N implies -d */ 9590 if (dump_opt['N'] && dump_opt['d'] == 0) 9591 dump_opt['d'] = dump_opt['N']; 9592 9593 if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 || 9594 nvlist_add_uint64(policy, ZPOOL_LOAD_REQUEST_TXG, max_txg) != 0 || 9595 nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, rewind) != 0) 9596 fatal("internal error: %s", strerror(ENOMEM)); 9597 9598 error = 0; 9599 9600 if (strpbrk(target, "/@") != NULL) { 9601 size_t targetlen; 9602 9603 target_pool = strdup(target); 9604 *strpbrk(target_pool, "/@") = '\0'; 9605 9606 target_is_spa = B_FALSE; 9607 targetlen = strlen(target); 9608 if (targetlen && target[targetlen - 1] == '/') 9609 target[targetlen - 1] = '\0'; 9610 9611 /* 9612 * See if an objset ID was supplied (-d <pool>/<objset ID>). 9613 * To disambiguate tank/100, consider the 100 as objsetID 9614 * if -N was given, otherwise 100 is an objsetID iff 9615 * tank/100 as a named dataset fails on lookup. 9616 */ 9617 objset_str = strchr(target, '/'); 9618 if (objset_str && strlen(objset_str) > 1 && 9619 zdb_numeric(objset_str + 1)) { 9620 char *endptr; 9621 errno = 0; 9622 objset_str++; 9623 objset_id = strtoull(objset_str, &endptr, 0); 9624 /* dataset 0 is the same as opening the pool */ 9625 if (errno == 0 && endptr != objset_str && 9626 objset_id != 0) { 9627 if (dump_opt['N']) 9628 dataset_lookup = B_TRUE; 9629 } 9630 /* normal dataset name not an objset ID */ 9631 if (endptr == objset_str) { 9632 objset_id = -1; 9633 } 9634 } else if (objset_str && !zdb_numeric(objset_str + 1) && 9635 dump_opt['N']) { 9636 printf("Supply a numeric objset ID with -N\n"); 9637 error = 1; 9638 goto fini; 9639 } 9640 } else { 9641 target_pool = target; 9642 } 9643 9644 if (dump_opt['e'] || force_import) { 9645 importargs_t args = { 0 }; 9646 9647 /* 9648 * If path is not provided, search in /dev 9649 */ 9650 if (searchdirs == NULL) { 9651 searchdirs = umem_alloc(sizeof (char *), UMEM_NOFAIL); 9652 searchdirs[nsearch++] = (char *)ZFS_DEVDIR; 9653 } 9654 9655 args.paths = nsearch; 9656 args.path = searchdirs; 9657 args.can_be_active = B_TRUE; 9658 9659 libpc_handle_t lpch = { 9660 .lpc_lib_handle = NULL, 9661 .lpc_ops = &libzpool_config_ops, 9662 .lpc_printerr = B_TRUE 9663 }; 9664 error = zpool_find_config(&lpch, target_pool, &cfg, &args); 9665 9666 if (error == 0) { 9667 9668 if (nvlist_add_nvlist(cfg, 9669 ZPOOL_LOAD_POLICY, policy) != 0) { 9670 fatal("can't open '%s': %s", 9671 target, strerror(ENOMEM)); 9672 } 9673 9674 if (dump_opt['C'] > 1) { 9675 (void) printf("\nConfiguration for import:\n"); 9676 dump_nvlist(cfg, 8); 9677 } 9678 9679 /* 9680 * Disable the activity check to allow examination of 9681 * active pools. 9682 */ 9683 error = spa_import(target_pool, cfg, NULL, 9684 flags | ZFS_IMPORT_SKIP_MMP); 9685 } 9686 } 9687 9688 if (searchdirs != NULL) { 9689 umem_free(searchdirs, nsearch * sizeof (char *)); 9690 searchdirs = NULL; 9691 } 9692 9693 /* 9694 * We need to make sure to process -O option or call 9695 * dump_path after the -e option has been processed, 9696 * which imports the pool to the namespace if it's 9697 * not in the cachefile. 9698 */ 9699 if (dump_opt['O']) { 9700 if (argc != 2) 9701 usage(); 9702 dump_opt['v'] = verbose + 3; 9703 error = dump_path(argv[0], argv[1], NULL); 9704 goto fini; 9705 } 9706 9707 if (dump_opt['r']) { 9708 target_is_spa = B_FALSE; 9709 if (argc != 3) 9710 usage(); 9711 dump_opt['v'] = verbose; 9712 error = dump_path(argv[0], argv[1], &object); 9713 if (error != 0) 9714 fatal("internal error: %s", strerror(error)); 9715 } 9716 9717 /* 9718 * import_checkpointed_state makes the assumption that the 9719 * target pool that we pass it is already part of the spa 9720 * namespace. Because of that we need to make sure to call 9721 * it always after the -e option has been processed, which 9722 * imports the pool to the namespace if it's not in the 9723 * cachefile. 9724 */ 9725 char *checkpoint_pool = NULL; 9726 char *checkpoint_target = NULL; 9727 if (dump_opt['k']) { 9728 checkpoint_pool = import_checkpointed_state(target, cfg, 9729 target_is_spa, &checkpoint_target); 9730 9731 if (checkpoint_target != NULL) 9732 target = checkpoint_target; 9733 } 9734 9735 if (cfg != NULL) { 9736 nvlist_free(cfg); 9737 cfg = NULL; 9738 } 9739 9740 if (target_pool != target) 9741 free(target_pool); 9742 9743 if (error == 0) { 9744 if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) { 9745 ASSERT(checkpoint_pool != NULL); 9746 ASSERT(checkpoint_target == NULL); 9747 9748 error = spa_open(checkpoint_pool, &spa, FTAG); 9749 if (error != 0) { 9750 fatal("Tried to open pool \"%s\" but " 9751 "spa_open() failed with error %d\n", 9752 checkpoint_pool, error); 9753 } 9754 9755 } else if (target_is_spa || dump_opt['R'] || dump_opt['B'] || 9756 objset_id == 0) { 9757 zdb_set_skip_mmp(target); 9758 error = spa_open_rewind(target, &spa, FTAG, policy, 9759 NULL); 9760 if (error) { 9761 /* 9762 * If we're missing the log device then 9763 * try opening the pool after clearing the 9764 * log state. 9765 */ 9766 mutex_enter(&spa_namespace_lock); 9767 if ((spa = spa_lookup(target)) != NULL && 9768 spa->spa_log_state == SPA_LOG_MISSING) { 9769 spa->spa_log_state = SPA_LOG_CLEAR; 9770 error = 0; 9771 } 9772 mutex_exit(&spa_namespace_lock); 9773 9774 if (!error) { 9775 error = spa_open_rewind(target, &spa, 9776 FTAG, policy, NULL); 9777 } 9778 } 9779 } else if (strpbrk(target, "#") != NULL) { 9780 dsl_pool_t *dp; 9781 error = dsl_pool_hold(target, FTAG, &dp); 9782 if (error != 0) { 9783 fatal("can't dump '%s': %s", target, 9784 strerror(error)); 9785 } 9786 error = dump_bookmark(dp, target, B_TRUE, verbose > 1); 9787 dsl_pool_rele(dp, FTAG); 9788 if (error != 0) { 9789 fatal("can't dump '%s': %s", target, 9790 strerror(error)); 9791 } 9792 goto fini; 9793 } else { 9794 target_pool = strdup(target); 9795 if (strpbrk(target, "/@") != NULL) 9796 *strpbrk(target_pool, "/@") = '\0'; 9797 9798 zdb_set_skip_mmp(target); 9799 /* 9800 * If -N was supplied, the user has indicated that 9801 * zdb -d <pool>/<objsetID> is in effect. Otherwise 9802 * we first assume that the dataset string is the 9803 * dataset name. If dmu_objset_hold fails with the 9804 * dataset string, and we have an objset_id, retry the 9805 * lookup with the objsetID. 9806 */ 9807 boolean_t retry = B_TRUE; 9808 retry_lookup: 9809 if (dataset_lookup == B_TRUE) { 9810 /* 9811 * Use the supplied id to get the name 9812 * for open_objset. 9813 */ 9814 error = spa_open(target_pool, &spa, FTAG); 9815 if (error == 0) { 9816 error = name_from_objset_id(spa, 9817 objset_id, dsname); 9818 spa_close(spa, FTAG); 9819 if (error == 0) 9820 target = dsname; 9821 } 9822 } 9823 if (error == 0) { 9824 if (objset_id > 0 && retry) { 9825 int err = dmu_objset_hold(target, FTAG, 9826 &os); 9827 if (err) { 9828 dataset_lookup = B_TRUE; 9829 retry = B_FALSE; 9830 goto retry_lookup; 9831 } else { 9832 dmu_objset_rele(os, FTAG); 9833 } 9834 } 9835 error = open_objset(target, FTAG, &os); 9836 } 9837 if (error == 0) 9838 spa = dmu_objset_spa(os); 9839 free(target_pool); 9840 } 9841 } 9842 nvlist_free(policy); 9843 9844 if (error) 9845 fatal("can't open '%s': %s", target, strerror(error)); 9846 9847 /* 9848 * Set the pool failure mode to panic in order to prevent the pool 9849 * from suspending. A suspended I/O will have no way to resume and 9850 * can prevent the zdb(8) command from terminating as expected. 9851 */ 9852 if (spa != NULL) 9853 spa->spa_failmode = ZIO_FAILURE_MODE_PANIC; 9854 9855 argv++; 9856 argc--; 9857 if (dump_opt['r']) { 9858 error = zdb_copy_object(os, object, argv[1]); 9859 } else if (!dump_opt['R']) { 9860 flagbits['d'] = ZOR_FLAG_DIRECTORY; 9861 flagbits['f'] = ZOR_FLAG_PLAIN_FILE; 9862 flagbits['m'] = ZOR_FLAG_SPACE_MAP; 9863 flagbits['z'] = ZOR_FLAG_ZAP; 9864 flagbits['A'] = ZOR_FLAG_ALL_TYPES; 9865 9866 if (argc > 0 && dump_opt['d']) { 9867 zopt_object_args = argc; 9868 zopt_object_ranges = calloc(zopt_object_args, 9869 sizeof (zopt_object_range_t)); 9870 for (unsigned i = 0; i < zopt_object_args; i++) { 9871 int err; 9872 const char *msg = NULL; 9873 9874 err = parse_object_range(argv[i], 9875 &zopt_object_ranges[i], &msg); 9876 if (err != 0) 9877 fatal("Bad object or range: '%s': %s\n", 9878 argv[i], msg ?: ""); 9879 } 9880 } else if (argc > 0 && dump_opt['m']) { 9881 zopt_metaslab_args = argc; 9882 zopt_metaslab = calloc(zopt_metaslab_args, 9883 sizeof (uint64_t)); 9884 for (unsigned i = 0; i < zopt_metaslab_args; i++) { 9885 errno = 0; 9886 zopt_metaslab[i] = strtoull(argv[i], NULL, 0); 9887 if (zopt_metaslab[i] == 0 && errno != 0) 9888 fatal("bad number %s: %s", argv[i], 9889 strerror(errno)); 9890 } 9891 } 9892 if (dump_opt['B']) { 9893 dump_backup(target, objset_id, 9894 argc > 0 ? argv[0] : NULL); 9895 } else if (os != NULL) { 9896 dump_objset(os); 9897 } else if (zopt_object_args > 0 && !dump_opt['m']) { 9898 dump_objset(spa->spa_meta_objset); 9899 } else { 9900 dump_zpool(spa); 9901 } 9902 } else { 9903 flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR; 9904 flagbits['c'] = ZDB_FLAG_CHECKSUM; 9905 flagbits['d'] = ZDB_FLAG_DECOMPRESS; 9906 flagbits['e'] = ZDB_FLAG_BSWAP; 9907 flagbits['g'] = ZDB_FLAG_GBH; 9908 flagbits['i'] = ZDB_FLAG_INDIRECT; 9909 flagbits['r'] = ZDB_FLAG_RAW; 9910 flagbits['v'] = ZDB_FLAG_VERBOSE; 9911 9912 for (int i = 0; i < argc; i++) 9913 zdb_read_block(argv[i], spa); 9914 } 9915 9916 if (dump_opt['k']) { 9917 free(checkpoint_pool); 9918 if (!target_is_spa) 9919 free(checkpoint_target); 9920 } 9921 9922 fini: 9923 if (spa != NULL) 9924 zdb_ddt_cleanup(spa); 9925 9926 if (os != NULL) { 9927 close_objset(os, FTAG); 9928 } else if (spa != NULL) { 9929 spa_close(spa, FTAG); 9930 } 9931 9932 fuid_table_destroy(); 9933 9934 dump_debug_buffer(); 9935 9936 if (kernel_init_done) 9937 kernel_fini(); 9938 9939 return (error); 9940 } 9941