1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2011, 2019 by Delphix. All rights reserved. 25 * Copyright (c) 2014 Integros [integros.com] 26 * Copyright 2017 Nexenta Systems, Inc. 27 * Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC. 28 * Copyright 2017 RackTop Systems. 29 */ 30 31 #include <stdio.h> 32 #include <unistd.h> 33 #include <stdio_ext.h> 34 #include <stdlib.h> 35 #include <ctype.h> 36 #include <sys/zfs_context.h> 37 #include <sys/spa.h> 38 #include <sys/spa_impl.h> 39 #include <sys/dmu.h> 40 #include <sys/zap.h> 41 #include <sys/fs/zfs.h> 42 #include <sys/zfs_znode.h> 43 #include <sys/zfs_sa.h> 44 #include <sys/sa.h> 45 #include <sys/sa_impl.h> 46 #include <sys/vdev.h> 47 #include <sys/vdev_impl.h> 48 #include <sys/metaslab_impl.h> 49 #include <sys/dmu_objset.h> 50 #include <sys/dsl_dir.h> 51 #include <sys/dsl_dataset.h> 52 #include <sys/dsl_pool.h> 53 #include <sys/dbuf.h> 54 #include <sys/zil.h> 55 #include <sys/zil_impl.h> 56 #include <sys/stat.h> 57 #include <sys/resource.h> 58 #include <sys/dmu_traverse.h> 59 #include <sys/zio_checksum.h> 60 #include <sys/zio_compress.h> 61 #include <sys/zfs_fuid.h> 62 #include <sys/arc.h> 63 #include <sys/ddt.h> 64 #include <sys/zfeature.h> 65 #include <sys/abd.h> 66 #include <sys/blkptr.h> 67 #include <sys/dsl_scan.h> 68 #include <sys/dsl_crypt.h> 69 #include <zfs_comutil.h> 70 #include <libcmdutils.h> 71 #undef verify 72 #include <libzfs.h> 73 74 #include <libnvpair.h> 75 #include <libzutil.h> 76 77 #include "zdb.h" 78 79 #define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \ 80 zio_compress_table[(idx)].ci_name : "UNKNOWN") 81 #define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \ 82 zio_checksum_table[(idx)].ci_name : "UNKNOWN") 83 #define ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \ 84 dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ? \ 85 dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN") 86 #define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : \ 87 (idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA ? \ 88 DMU_OT_ZAP_OTHER : \ 89 (idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \ 90 DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES) 91 92 extern int reference_tracking_enable; 93 extern boolean_t zfs_recover; 94 extern uint64_t zfs_arc_max, zfs_arc_meta_limit; 95 extern int zfs_vdev_async_read_max_active; 96 extern int aok; 97 extern boolean_t spa_load_verify_dryrun; 98 extern int zfs_btree_verify_intensity; 99 100 static const char cmdname[] = "zdb"; 101 uint8_t dump_opt[256]; 102 103 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); 104 105 uint64_t *zopt_object = NULL; 106 static unsigned zopt_objects = 0; 107 uint64_t max_inflight = 1000; 108 static int leaked_objects = 0; 109 110 static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *); 111 static void mos_obj_refd(uint64_t); 112 113 /* 114 * These libumem hooks provide a reasonable set of defaults for the allocator's 115 * debugging facilities. 116 */ 117 const char * 118 _umem_debug_init() 119 { 120 return ("default,verbose"); /* $UMEM_DEBUG setting */ 121 } 122 123 const char * 124 _umem_logging_init(void) 125 { 126 return ("fail,contents"); /* $UMEM_LOGGING setting */ 127 } 128 129 static void 130 usage(void) 131 { 132 (void) fprintf(stderr, 133 "Usage:\t%s [-AbcdDFGhikLMPsvX] [-e [-V] [-p <path> ...]] " 134 "[-I <inflight I/Os>]\n" 135 "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n" 136 "\t\t[<poolname> [<object> ...]]\n" 137 "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] <dataset> " 138 "[<object> ...]\n" 139 "\t%s -C [-A] [-U <cache>]\n" 140 "\t%s -l [-Aqu] <device>\n" 141 "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] " 142 "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n" 143 "\t%s -O <dataset> <path>\n" 144 "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n" 145 "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n" 146 "\t%s -E [-A] word0:word1:...:word15\n" 147 "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] " 148 "<poolname>\n\n", 149 cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, 150 cmdname, cmdname); 151 152 (void) fprintf(stderr, " Dataset name must include at least one " 153 "separator character '/' or '@'\n"); 154 (void) fprintf(stderr, " If dataset name is specified, only that " 155 "dataset is dumped\n"); 156 (void) fprintf(stderr, " If object numbers are specified, only " 157 "those objects are dumped\n\n"); 158 (void) fprintf(stderr, " Options to control amount of output:\n"); 159 (void) fprintf(stderr, " -b block statistics\n"); 160 (void) fprintf(stderr, " -c checksum all metadata (twice for " 161 "all data) blocks\n"); 162 (void) fprintf(stderr, " -C config (or cachefile if alone)\n"); 163 (void) fprintf(stderr, " -d dataset(s)\n"); 164 (void) fprintf(stderr, " -D dedup statistics\n"); 165 (void) fprintf(stderr, " -E decode and display block from an " 166 "embedded block pointer\n"); 167 (void) fprintf(stderr, " -h pool history\n"); 168 (void) fprintf(stderr, " -i intent logs\n"); 169 (void) fprintf(stderr, " -l read label contents\n"); 170 (void) fprintf(stderr, " -k examine the checkpointed state " 171 "of the pool\n"); 172 (void) fprintf(stderr, " -L disable leak tracking (do not " 173 "load spacemaps)\n"); 174 (void) fprintf(stderr, " -m metaslabs\n"); 175 (void) fprintf(stderr, " -M metaslab groups\n"); 176 (void) fprintf(stderr, " -O perform object lookups by path\n"); 177 (void) fprintf(stderr, " -R read and display block from a " 178 "device\n"); 179 (void) fprintf(stderr, " -s report stats on zdb's I/O\n"); 180 (void) fprintf(stderr, " -S simulate dedup to measure effect\n"); 181 (void) fprintf(stderr, " -v verbose (applies to all " 182 "others)\n\n"); 183 (void) fprintf(stderr, " Below options are intended for use " 184 "with other options:\n"); 185 (void) fprintf(stderr, " -A ignore assertions (-A), enable " 186 "panic recovery (-AA) or both (-AAA)\n"); 187 (void) fprintf(stderr, " -e pool is exported/destroyed/" 188 "has altroot/not in a cachefile\n"); 189 (void) fprintf(stderr, " -F attempt automatic rewind within " 190 "safe range of transaction groups\n"); 191 (void) fprintf(stderr, " -G dump zfs_dbgmsg buffer before " 192 "exiting\n"); 193 (void) fprintf(stderr, " -I <number of inflight I/Os> -- " 194 "specify the maximum number of " 195 "checksumming I/Os [default is 200]\n"); 196 (void) fprintf(stderr, " -o <variable>=<value> set global " 197 "variable to an unsigned 32-bit integer value\n"); 198 (void) fprintf(stderr, " -p <path> -- use one or more with " 199 "-e to specify path to vdev dir\n"); 200 (void) fprintf(stderr, " -P print numbers in parseable form\n"); 201 (void) fprintf(stderr, " -q don't print label contents\n"); 202 (void) fprintf(stderr, " -t <txg> -- highest txg to use when " 203 "searching for uberblocks\n"); 204 (void) fprintf(stderr, " -u uberblock\n"); 205 (void) fprintf(stderr, " -U <cachefile_path> -- use alternate " 206 "cachefile\n"); 207 (void) fprintf(stderr, " -V do verbatim import\n"); 208 (void) fprintf(stderr, " -x <dumpdir> -- " 209 "dump all read blocks into specified directory\n"); 210 (void) fprintf(stderr, " -X attempt extreme rewind (does not " 211 "work with dataset)\n\n"); 212 (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " 213 "to make only that option verbose\n"); 214 (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); 215 exit(1); 216 } 217 218 static void 219 dump_debug_buffer() 220 { 221 if (dump_opt['G']) { 222 (void) printf("\n"); 223 zfs_dbgmsg_print("zdb"); 224 } 225 } 226 227 /* 228 * Called for usage errors that are discovered after a call to spa_open(), 229 * dmu_bonus_hold(), or pool_match(). abort() is called for other errors. 230 */ 231 232 static void 233 fatal(const char *fmt, ...) 234 { 235 va_list ap; 236 237 va_start(ap, fmt); 238 (void) fprintf(stderr, "%s: ", cmdname); 239 (void) vfprintf(stderr, fmt, ap); 240 va_end(ap); 241 (void) fprintf(stderr, "\n"); 242 243 dump_debug_buffer(); 244 245 exit(1); 246 } 247 248 /* ARGSUSED */ 249 static void 250 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) 251 { 252 nvlist_t *nv; 253 size_t nvsize = *(uint64_t *)data; 254 char *packed = umem_alloc(nvsize, UMEM_NOFAIL); 255 256 VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH)); 257 258 VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0); 259 260 umem_free(packed, nvsize); 261 262 dump_nvlist(nv, 8); 263 264 nvlist_free(nv); 265 } 266 267 /* ARGSUSED */ 268 static void 269 dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size) 270 { 271 spa_history_phys_t *shp = data; 272 273 if (shp == NULL) 274 return; 275 276 (void) printf("\t\tpool_create_len = %llu\n", 277 (u_longlong_t)shp->sh_pool_create_len); 278 (void) printf("\t\tphys_max_off = %llu\n", 279 (u_longlong_t)shp->sh_phys_max_off); 280 (void) printf("\t\tbof = %llu\n", 281 (u_longlong_t)shp->sh_bof); 282 (void) printf("\t\teof = %llu\n", 283 (u_longlong_t)shp->sh_eof); 284 (void) printf("\t\trecords_lost = %llu\n", 285 (u_longlong_t)shp->sh_records_lost); 286 } 287 288 static void 289 zdb_nicenum(uint64_t num, char *buf, size_t buflen) 290 { 291 if (dump_opt['P']) 292 (void) snprintf(buf, buflen, "%llu", (longlong_t)num); 293 else 294 nicenum(num, buf, sizeof (buf)); 295 } 296 297 static const char histo_stars[] = "****************************************"; 298 static const uint64_t histo_width = sizeof (histo_stars) - 1; 299 300 static void 301 dump_histogram(const uint64_t *histo, int size, int offset) 302 { 303 int i; 304 int minidx = size - 1; 305 int maxidx = 0; 306 uint64_t max = 0; 307 308 for (i = 0; i < size; i++) { 309 if (histo[i] > max) 310 max = histo[i]; 311 if (histo[i] > 0 && i > maxidx) 312 maxidx = i; 313 if (histo[i] > 0 && i < minidx) 314 minidx = i; 315 } 316 317 if (max < histo_width) 318 max = histo_width; 319 320 for (i = minidx; i <= maxidx; i++) { 321 (void) printf("\t\t\t%3u: %6llu %s\n", 322 i + offset, (u_longlong_t)histo[i], 323 &histo_stars[(max - histo[i]) * histo_width / max]); 324 } 325 } 326 327 static void 328 dump_zap_stats(objset_t *os, uint64_t object) 329 { 330 int error; 331 zap_stats_t zs; 332 333 error = zap_get_stats(os, object, &zs); 334 if (error) 335 return; 336 337 if (zs.zs_ptrtbl_len == 0) { 338 ASSERT(zs.zs_num_blocks == 1); 339 (void) printf("\tmicrozap: %llu bytes, %llu entries\n", 340 (u_longlong_t)zs.zs_blocksize, 341 (u_longlong_t)zs.zs_num_entries); 342 return; 343 } 344 345 (void) printf("\tFat ZAP stats:\n"); 346 347 (void) printf("\t\tPointer table:\n"); 348 (void) printf("\t\t\t%llu elements\n", 349 (u_longlong_t)zs.zs_ptrtbl_len); 350 (void) printf("\t\t\tzt_blk: %llu\n", 351 (u_longlong_t)zs.zs_ptrtbl_zt_blk); 352 (void) printf("\t\t\tzt_numblks: %llu\n", 353 (u_longlong_t)zs.zs_ptrtbl_zt_numblks); 354 (void) printf("\t\t\tzt_shift: %llu\n", 355 (u_longlong_t)zs.zs_ptrtbl_zt_shift); 356 (void) printf("\t\t\tzt_blks_copied: %llu\n", 357 (u_longlong_t)zs.zs_ptrtbl_blks_copied); 358 (void) printf("\t\t\tzt_nextblk: %llu\n", 359 (u_longlong_t)zs.zs_ptrtbl_nextblk); 360 361 (void) printf("\t\tZAP entries: %llu\n", 362 (u_longlong_t)zs.zs_num_entries); 363 (void) printf("\t\tLeaf blocks: %llu\n", 364 (u_longlong_t)zs.zs_num_leafs); 365 (void) printf("\t\tTotal blocks: %llu\n", 366 (u_longlong_t)zs.zs_num_blocks); 367 (void) printf("\t\tzap_block_type: 0x%llx\n", 368 (u_longlong_t)zs.zs_block_type); 369 (void) printf("\t\tzap_magic: 0x%llx\n", 370 (u_longlong_t)zs.zs_magic); 371 (void) printf("\t\tzap_salt: 0x%llx\n", 372 (u_longlong_t)zs.zs_salt); 373 374 (void) printf("\t\tLeafs with 2^n pointers:\n"); 375 dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0); 376 377 (void) printf("\t\tBlocks with n*5 entries:\n"); 378 dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0); 379 380 (void) printf("\t\tBlocks n/10 full:\n"); 381 dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0); 382 383 (void) printf("\t\tEntries with n chunks:\n"); 384 dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0); 385 386 (void) printf("\t\tBuckets with n entries:\n"); 387 dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0); 388 } 389 390 /*ARGSUSED*/ 391 static void 392 dump_none(objset_t *os, uint64_t object, void *data, size_t size) 393 { 394 } 395 396 /*ARGSUSED*/ 397 static void 398 dump_unknown(objset_t *os, uint64_t object, void *data, size_t size) 399 { 400 (void) printf("\tUNKNOWN OBJECT TYPE\n"); 401 } 402 403 /*ARGSUSED*/ 404 static void 405 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size) 406 { 407 } 408 409 /*ARGSUSED*/ 410 static void 411 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size) 412 { 413 } 414 415 /*ARGSUSED*/ 416 static void 417 dump_zap(objset_t *os, uint64_t object, void *data, size_t size) 418 { 419 zap_cursor_t zc; 420 zap_attribute_t attr; 421 void *prop; 422 unsigned i; 423 424 dump_zap_stats(os, object); 425 (void) printf("\n"); 426 427 for (zap_cursor_init(&zc, os, object); 428 zap_cursor_retrieve(&zc, &attr) == 0; 429 zap_cursor_advance(&zc)) { 430 (void) printf("\t\t%s = ", attr.za_name); 431 if (attr.za_num_integers == 0) { 432 (void) printf("\n"); 433 continue; 434 } 435 prop = umem_zalloc(attr.za_num_integers * 436 attr.za_integer_length, UMEM_NOFAIL); 437 (void) zap_lookup(os, object, attr.za_name, 438 attr.za_integer_length, attr.za_num_integers, prop); 439 if (attr.za_integer_length == 1) { 440 if (strcmp(attr.za_name, 441 DSL_CRYPTO_KEY_MASTER_KEY) == 0 || 442 strcmp(attr.za_name, 443 DSL_CRYPTO_KEY_HMAC_KEY) == 0 || 444 strcmp(attr.za_name, DSL_CRYPTO_KEY_IV) == 0 || 445 strcmp(attr.za_name, DSL_CRYPTO_KEY_MAC) == 0 || 446 strcmp(attr.za_name, DMU_POOL_CHECKSUM_SALT) == 0) { 447 uint8_t *u8 = prop; 448 449 for (i = 0; i < attr.za_num_integers; i++) { 450 (void) printf("%02x", u8[i]); 451 } 452 } else { 453 (void) printf("%s", (char *)prop); 454 } 455 } else { 456 for (i = 0; i < attr.za_num_integers; i++) { 457 switch (attr.za_integer_length) { 458 case 2: 459 (void) printf("%u ", 460 ((uint16_t *)prop)[i]); 461 break; 462 case 4: 463 (void) printf("%u ", 464 ((uint32_t *)prop)[i]); 465 break; 466 case 8: 467 (void) printf("%lld ", 468 (u_longlong_t)((int64_t *)prop)[i]); 469 break; 470 } 471 } 472 } 473 (void) printf("\n"); 474 umem_free(prop, attr.za_num_integers * attr.za_integer_length); 475 } 476 zap_cursor_fini(&zc); 477 } 478 479 static void 480 dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size) 481 { 482 bpobj_phys_t *bpop = data; 483 char bytes[32], comp[32], uncomp[32]; 484 485 /* make sure the output won't get truncated */ 486 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); 487 CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ); 488 CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ); 489 490 if (bpop == NULL) 491 return; 492 493 zdb_nicenum(bpop->bpo_bytes, bytes, sizeof (bytes)); 494 zdb_nicenum(bpop->bpo_comp, comp, sizeof (comp)); 495 zdb_nicenum(bpop->bpo_uncomp, uncomp, sizeof (uncomp)); 496 497 (void) printf("\t\tnum_blkptrs = %llu\n", 498 (u_longlong_t)bpop->bpo_num_blkptrs); 499 (void) printf("\t\tbytes = %s\n", bytes); 500 if (size >= BPOBJ_SIZE_V1) { 501 (void) printf("\t\tcomp = %s\n", comp); 502 (void) printf("\t\tuncomp = %s\n", uncomp); 503 } 504 if (size >= sizeof (*bpop)) { 505 (void) printf("\t\tsubobjs = %llu\n", 506 (u_longlong_t)bpop->bpo_subobjs); 507 (void) printf("\t\tnum_subobjs = %llu\n", 508 (u_longlong_t)bpop->bpo_num_subobjs); 509 } 510 511 if (dump_opt['d'] < 5) 512 return; 513 514 for (uint64_t i = 0; i < bpop->bpo_num_blkptrs; i++) { 515 char blkbuf[BP_SPRINTF_LEN]; 516 blkptr_t bp; 517 518 int err = dmu_read(os, object, 519 i * sizeof (bp), sizeof (bp), &bp, 0); 520 if (err != 0) { 521 (void) printf("got error %u from dmu_read\n", err); 522 break; 523 } 524 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp); 525 (void) printf("\t%s\n", blkbuf); 526 } 527 } 528 529 /* ARGSUSED */ 530 static void 531 dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size) 532 { 533 dmu_object_info_t doi; 534 535 VERIFY0(dmu_object_info(os, object, &doi)); 536 uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP); 537 538 int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0); 539 if (err != 0) { 540 (void) printf("got error %u from dmu_read\n", err); 541 kmem_free(subobjs, doi.doi_max_offset); 542 return; 543 } 544 545 int64_t last_nonzero = -1; 546 for (uint64_t i = 0; i < doi.doi_max_offset / 8; i++) { 547 if (subobjs[i] != 0) 548 last_nonzero = i; 549 } 550 551 for (int64_t i = 0; i <= last_nonzero; i++) { 552 (void) printf("\t%llu\n", (longlong_t)subobjs[i]); 553 } 554 kmem_free(subobjs, doi.doi_max_offset); 555 } 556 557 /*ARGSUSED*/ 558 static void 559 dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size) 560 { 561 dump_zap_stats(os, object); 562 /* contents are printed elsewhere, properly decoded */ 563 } 564 565 /*ARGSUSED*/ 566 static void 567 dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size) 568 { 569 zap_cursor_t zc; 570 zap_attribute_t attr; 571 572 dump_zap_stats(os, object); 573 (void) printf("\n"); 574 575 for (zap_cursor_init(&zc, os, object); 576 zap_cursor_retrieve(&zc, &attr) == 0; 577 zap_cursor_advance(&zc)) { 578 (void) printf("\t\t%s = ", attr.za_name); 579 if (attr.za_num_integers == 0) { 580 (void) printf("\n"); 581 continue; 582 } 583 (void) printf(" %llx : [%d:%d:%d]\n", 584 (u_longlong_t)attr.za_first_integer, 585 (int)ATTR_LENGTH(attr.za_first_integer), 586 (int)ATTR_BSWAP(attr.za_first_integer), 587 (int)ATTR_NUM(attr.za_first_integer)); 588 } 589 zap_cursor_fini(&zc); 590 } 591 592 /*ARGSUSED*/ 593 static void 594 dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size) 595 { 596 zap_cursor_t zc; 597 zap_attribute_t attr; 598 uint16_t *layout_attrs; 599 unsigned i; 600 601 dump_zap_stats(os, object); 602 (void) printf("\n"); 603 604 for (zap_cursor_init(&zc, os, object); 605 zap_cursor_retrieve(&zc, &attr) == 0; 606 zap_cursor_advance(&zc)) { 607 (void) printf("\t\t%s = [", attr.za_name); 608 if (attr.za_num_integers == 0) { 609 (void) printf("\n"); 610 continue; 611 } 612 613 VERIFY(attr.za_integer_length == 2); 614 layout_attrs = umem_zalloc(attr.za_num_integers * 615 attr.za_integer_length, UMEM_NOFAIL); 616 617 VERIFY(zap_lookup(os, object, attr.za_name, 618 attr.za_integer_length, 619 attr.za_num_integers, layout_attrs) == 0); 620 621 for (i = 0; i != attr.za_num_integers; i++) 622 (void) printf(" %d ", (int)layout_attrs[i]); 623 (void) printf("]\n"); 624 umem_free(layout_attrs, 625 attr.za_num_integers * attr.za_integer_length); 626 } 627 zap_cursor_fini(&zc); 628 } 629 630 /*ARGSUSED*/ 631 static void 632 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size) 633 { 634 zap_cursor_t zc; 635 zap_attribute_t attr; 636 const char *typenames[] = { 637 /* 0 */ "not specified", 638 /* 1 */ "FIFO", 639 /* 2 */ "Character Device", 640 /* 3 */ "3 (invalid)", 641 /* 4 */ "Directory", 642 /* 5 */ "5 (invalid)", 643 /* 6 */ "Block Device", 644 /* 7 */ "7 (invalid)", 645 /* 8 */ "Regular File", 646 /* 9 */ "9 (invalid)", 647 /* 10 */ "Symbolic Link", 648 /* 11 */ "11 (invalid)", 649 /* 12 */ "Socket", 650 /* 13 */ "Door", 651 /* 14 */ "Event Port", 652 /* 15 */ "15 (invalid)", 653 }; 654 655 dump_zap_stats(os, object); 656 (void) printf("\n"); 657 658 for (zap_cursor_init(&zc, os, object); 659 zap_cursor_retrieve(&zc, &attr) == 0; 660 zap_cursor_advance(&zc)) { 661 (void) printf("\t\t%s = %lld (type: %s)\n", 662 attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer), 663 typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]); 664 } 665 zap_cursor_fini(&zc); 666 } 667 668 static int 669 get_dtl_refcount(vdev_t *vd) 670 { 671 int refcount = 0; 672 673 if (vd->vdev_ops->vdev_op_leaf) { 674 space_map_t *sm = vd->vdev_dtl_sm; 675 676 if (sm != NULL && 677 sm->sm_dbuf->db_size == sizeof (space_map_phys_t)) 678 return (1); 679 return (0); 680 } 681 682 for (unsigned c = 0; c < vd->vdev_children; c++) 683 refcount += get_dtl_refcount(vd->vdev_child[c]); 684 return (refcount); 685 } 686 687 static int 688 get_metaslab_refcount(vdev_t *vd) 689 { 690 int refcount = 0; 691 692 if (vd->vdev_top == vd) { 693 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 694 space_map_t *sm = vd->vdev_ms[m]->ms_sm; 695 696 if (sm != NULL && 697 sm->sm_dbuf->db_size == sizeof (space_map_phys_t)) 698 refcount++; 699 } 700 } 701 for (unsigned c = 0; c < vd->vdev_children; c++) 702 refcount += get_metaslab_refcount(vd->vdev_child[c]); 703 704 return (refcount); 705 } 706 707 static int 708 get_obsolete_refcount(vdev_t *vd) 709 { 710 int refcount = 0; 711 712 uint64_t obsolete_sm_obj = vdev_obsolete_sm_object(vd); 713 if (vd->vdev_top == vd && obsolete_sm_obj != 0) { 714 dmu_object_info_t doi; 715 VERIFY0(dmu_object_info(vd->vdev_spa->spa_meta_objset, 716 obsolete_sm_obj, &doi)); 717 if (doi.doi_bonus_size == sizeof (space_map_phys_t)) { 718 refcount++; 719 } 720 } else { 721 ASSERT3P(vd->vdev_obsolete_sm, ==, NULL); 722 ASSERT3U(obsolete_sm_obj, ==, 0); 723 } 724 for (unsigned c = 0; c < vd->vdev_children; c++) { 725 refcount += get_obsolete_refcount(vd->vdev_child[c]); 726 } 727 728 return (refcount); 729 } 730 731 static int 732 get_prev_obsolete_spacemap_refcount(spa_t *spa) 733 { 734 uint64_t prev_obj = 735 spa->spa_condensing_indirect_phys.scip_prev_obsolete_sm_object; 736 if (prev_obj != 0) { 737 dmu_object_info_t doi; 738 VERIFY0(dmu_object_info(spa->spa_meta_objset, prev_obj, &doi)); 739 if (doi.doi_bonus_size == sizeof (space_map_phys_t)) { 740 return (1); 741 } 742 } 743 return (0); 744 } 745 746 static int 747 get_checkpoint_refcount(vdev_t *vd) 748 { 749 int refcount = 0; 750 751 if (vd->vdev_top == vd && vd->vdev_top_zap != 0 && 752 zap_contains(spa_meta_objset(vd->vdev_spa), 753 vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) == 0) 754 refcount++; 755 756 for (uint64_t c = 0; c < vd->vdev_children; c++) 757 refcount += get_checkpoint_refcount(vd->vdev_child[c]); 758 759 return (refcount); 760 } 761 762 static int 763 get_log_spacemap_refcount(spa_t *spa) 764 { 765 return (avl_numnodes(&spa->spa_sm_logs_by_txg)); 766 } 767 768 static int 769 verify_spacemap_refcounts(spa_t *spa) 770 { 771 uint64_t expected_refcount = 0; 772 uint64_t actual_refcount; 773 774 (void) feature_get_refcount(spa, 775 &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM], 776 &expected_refcount); 777 actual_refcount = get_dtl_refcount(spa->spa_root_vdev); 778 actual_refcount += get_metaslab_refcount(spa->spa_root_vdev); 779 actual_refcount += get_obsolete_refcount(spa->spa_root_vdev); 780 actual_refcount += get_prev_obsolete_spacemap_refcount(spa); 781 actual_refcount += get_checkpoint_refcount(spa->spa_root_vdev); 782 actual_refcount += get_log_spacemap_refcount(spa); 783 784 if (expected_refcount != actual_refcount) { 785 (void) printf("space map refcount mismatch: expected %lld != " 786 "actual %lld\n", 787 (longlong_t)expected_refcount, 788 (longlong_t)actual_refcount); 789 return (2); 790 } 791 return (0); 792 } 793 794 static void 795 dump_spacemap(objset_t *os, space_map_t *sm) 796 { 797 char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID", 798 "INVALID", "INVALID", "INVALID", "INVALID" }; 799 800 if (sm == NULL) 801 return; 802 803 (void) printf("space map object %llu:\n", 804 (longlong_t)sm->sm_object); 805 (void) printf(" smp_length = 0x%llx\n", 806 (longlong_t)sm->sm_phys->smp_length); 807 (void) printf(" smp_alloc = 0x%llx\n", 808 (longlong_t)sm->sm_phys->smp_alloc); 809 810 if (dump_opt['d'] < 6 && dump_opt['m'] < 4) 811 return; 812 813 /* 814 * Print out the freelist entries in both encoded and decoded form. 815 */ 816 uint8_t mapshift = sm->sm_shift; 817 int64_t alloc = 0; 818 uint64_t word, entry_id = 0; 819 for (uint64_t offset = 0; offset < space_map_length(sm); 820 offset += sizeof (word)) { 821 822 VERIFY0(dmu_read(os, space_map_object(sm), offset, 823 sizeof (word), &word, DMU_READ_PREFETCH)); 824 825 if (sm_entry_is_debug(word)) { 826 (void) printf("\t [%6llu] %s: txg %llu pass %llu\n", 827 (u_longlong_t)entry_id, 828 ddata[SM_DEBUG_ACTION_DECODE(word)], 829 (u_longlong_t)SM_DEBUG_TXG_DECODE(word), 830 (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word)); 831 entry_id++; 832 continue; 833 } 834 835 uint8_t words; 836 char entry_type; 837 uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID; 838 839 if (sm_entry_is_single_word(word)) { 840 entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ? 841 'A' : 'F'; 842 entry_off = (SM_OFFSET_DECODE(word) << mapshift) + 843 sm->sm_start; 844 entry_run = SM_RUN_DECODE(word) << mapshift; 845 words = 1; 846 } else { 847 /* it is a two-word entry so we read another word */ 848 ASSERT(sm_entry_is_double_word(word)); 849 850 uint64_t extra_word; 851 offset += sizeof (extra_word); 852 VERIFY0(dmu_read(os, space_map_object(sm), offset, 853 sizeof (extra_word), &extra_word, 854 DMU_READ_PREFETCH)); 855 856 ASSERT3U(offset, <=, space_map_length(sm)); 857 858 entry_run = SM2_RUN_DECODE(word) << mapshift; 859 entry_vdev = SM2_VDEV_DECODE(word); 860 entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ? 861 'A' : 'F'; 862 entry_off = (SM2_OFFSET_DECODE(extra_word) << 863 mapshift) + sm->sm_start; 864 words = 2; 865 } 866 867 (void) printf("\t [%6llu] %c range:" 868 " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n", 869 (u_longlong_t)entry_id, 870 entry_type, (u_longlong_t)entry_off, 871 (u_longlong_t)(entry_off + entry_run), 872 (u_longlong_t)entry_run, 873 (u_longlong_t)entry_vdev, words); 874 875 if (entry_type == 'A') 876 alloc += entry_run; 877 else 878 alloc -= entry_run; 879 entry_id++; 880 } 881 if (alloc != space_map_allocated(sm)) { 882 (void) printf("space_map_object alloc (%lld) INCONSISTENT " 883 "with space map summary (%lld)\n", 884 (longlong_t)space_map_allocated(sm), (longlong_t)alloc); 885 } 886 } 887 888 static void 889 dump_metaslab_stats(metaslab_t *msp) 890 { 891 char maxbuf[32]; 892 range_tree_t *rt = msp->ms_allocatable; 893 zfs_btree_t *t = &msp->ms_allocatable_by_size; 894 int free_pct = range_tree_space(rt) * 100 / msp->ms_size; 895 896 /* max sure nicenum has enough space */ 897 CTASSERT(sizeof (maxbuf) >= NN_NUMBUF_SZ); 898 899 zdb_nicenum(metaslab_largest_allocatable(msp), maxbuf, sizeof (maxbuf)); 900 901 (void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n", 902 "segments", zfs_btree_numnodes(t), "maxsize", maxbuf, 903 "freepct", free_pct); 904 (void) printf("\tIn-memory histogram:\n"); 905 dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 906 } 907 908 static void 909 dump_metaslab(metaslab_t *msp) 910 { 911 vdev_t *vd = msp->ms_group->mg_vd; 912 spa_t *spa = vd->vdev_spa; 913 space_map_t *sm = msp->ms_sm; 914 char freebuf[32]; 915 916 zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf, 917 sizeof (freebuf)); 918 919 (void) printf( 920 "\tmetaslab %6llu offset %12llx spacemap %6llu free %5s\n", 921 (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start, 922 (u_longlong_t)space_map_object(sm), freebuf); 923 924 if (dump_opt['m'] > 2 && !dump_opt['L']) { 925 mutex_enter(&msp->ms_lock); 926 VERIFY0(metaslab_load(msp)); 927 range_tree_stat_verify(msp->ms_allocatable); 928 dump_metaslab_stats(msp); 929 metaslab_unload(msp); 930 mutex_exit(&msp->ms_lock); 931 } 932 933 if (dump_opt['m'] > 1 && sm != NULL && 934 spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) { 935 /* 936 * The space map histogram represents free space in chunks 937 * of sm_shift (i.e. bucket 0 refers to 2^sm_shift). 938 */ 939 (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n", 940 (u_longlong_t)msp->ms_fragmentation); 941 dump_histogram(sm->sm_phys->smp_histogram, 942 SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift); 943 } 944 945 ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift)); 946 dump_spacemap(spa->spa_meta_objset, msp->ms_sm); 947 948 if (spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) { 949 (void) printf("\tFlush data:\n\tunflushed txg=%llu\n\n", 950 (u_longlong_t)metaslab_unflushed_txg(msp)); 951 } 952 } 953 954 static void 955 print_vdev_metaslab_header(vdev_t *vd) 956 { 957 vdev_alloc_bias_t alloc_bias = vd->vdev_alloc_bias; 958 const char *bias_str = ""; 959 960 if (alloc_bias == VDEV_BIAS_LOG || vd->vdev_islog) { 961 bias_str = VDEV_ALLOC_BIAS_LOG; 962 } else if (alloc_bias == VDEV_BIAS_SPECIAL) { 963 bias_str = VDEV_ALLOC_BIAS_SPECIAL; 964 } else if (alloc_bias == VDEV_BIAS_DEDUP) { 965 bias_str = VDEV_ALLOC_BIAS_DEDUP; 966 } 967 968 uint64_t ms_flush_data_obj = 0; 969 if (vd->vdev_top_zap != 0) { 970 int error = zap_lookup(spa_meta_objset(vd->vdev_spa), 971 vd->vdev_top_zap, VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS, 972 sizeof (uint64_t), 1, &ms_flush_data_obj); 973 if (error != ENOENT) { 974 ASSERT0(error); 975 } 976 } 977 978 (void) printf("\tvdev %10llu %s", 979 (u_longlong_t)vd->vdev_id, bias_str); 980 981 if (ms_flush_data_obj != 0) { 982 (void) printf(" ms_unflushed_phys object %llu", 983 (u_longlong_t)ms_flush_data_obj); 984 } 985 986 (void) printf("\n\t%-10s%5llu %-19s %-15s %-12s\n", 987 "metaslabs", (u_longlong_t)vd->vdev_ms_count, 988 "offset", "spacemap", "free"); 989 (void) printf("\t%15s %19s %15s %12s\n", 990 "---------------", "-------------------", 991 "---------------", "------------"); 992 } 993 994 static void 995 dump_metaslab_groups(spa_t *spa) 996 { 997 vdev_t *rvd = spa->spa_root_vdev; 998 metaslab_class_t *mc = spa_normal_class(spa); 999 uint64_t fragmentation; 1000 1001 metaslab_class_histogram_verify(mc); 1002 1003 for (unsigned c = 0; c < rvd->vdev_children; c++) { 1004 vdev_t *tvd = rvd->vdev_child[c]; 1005 metaslab_group_t *mg = tvd->vdev_mg; 1006 1007 if (mg == NULL || mg->mg_class != mc) 1008 continue; 1009 1010 metaslab_group_histogram_verify(mg); 1011 mg->mg_fragmentation = metaslab_group_fragmentation(mg); 1012 1013 (void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t" 1014 "fragmentation", 1015 (u_longlong_t)tvd->vdev_id, 1016 (u_longlong_t)tvd->vdev_ms_count); 1017 if (mg->mg_fragmentation == ZFS_FRAG_INVALID) { 1018 (void) printf("%3s\n", "-"); 1019 } else { 1020 (void) printf("%3llu%%\n", 1021 (u_longlong_t)mg->mg_fragmentation); 1022 } 1023 dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 1024 } 1025 1026 (void) printf("\tpool %s\tfragmentation", spa_name(spa)); 1027 fragmentation = metaslab_class_fragmentation(mc); 1028 if (fragmentation == ZFS_FRAG_INVALID) 1029 (void) printf("\t%3s\n", "-"); 1030 else 1031 (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation); 1032 dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 1033 } 1034 1035 static void 1036 print_vdev_indirect(vdev_t *vd) 1037 { 1038 vdev_indirect_config_t *vic = &vd->vdev_indirect_config; 1039 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 1040 vdev_indirect_births_t *vib = vd->vdev_indirect_births; 1041 1042 if (vim == NULL) { 1043 ASSERT3P(vib, ==, NULL); 1044 return; 1045 } 1046 1047 ASSERT3U(vdev_indirect_mapping_object(vim), ==, 1048 vic->vic_mapping_object); 1049 ASSERT3U(vdev_indirect_births_object(vib), ==, 1050 vic->vic_births_object); 1051 1052 (void) printf("indirect births obj %llu:\n", 1053 (longlong_t)vic->vic_births_object); 1054 (void) printf(" vib_count = %llu\n", 1055 (longlong_t)vdev_indirect_births_count(vib)); 1056 for (uint64_t i = 0; i < vdev_indirect_births_count(vib); i++) { 1057 vdev_indirect_birth_entry_phys_t *cur_vibe = 1058 &vib->vib_entries[i]; 1059 (void) printf("\toffset %llx -> txg %llu\n", 1060 (longlong_t)cur_vibe->vibe_offset, 1061 (longlong_t)cur_vibe->vibe_phys_birth_txg); 1062 } 1063 (void) printf("\n"); 1064 1065 (void) printf("indirect mapping obj %llu:\n", 1066 (longlong_t)vic->vic_mapping_object); 1067 (void) printf(" vim_max_offset = 0x%llx\n", 1068 (longlong_t)vdev_indirect_mapping_max_offset(vim)); 1069 (void) printf(" vim_bytes_mapped = 0x%llx\n", 1070 (longlong_t)vdev_indirect_mapping_bytes_mapped(vim)); 1071 (void) printf(" vim_count = %llu\n", 1072 (longlong_t)vdev_indirect_mapping_num_entries(vim)); 1073 1074 if (dump_opt['d'] <= 5 && dump_opt['m'] <= 3) 1075 return; 1076 1077 uint32_t *counts = vdev_indirect_mapping_load_obsolete_counts(vim); 1078 1079 for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) { 1080 vdev_indirect_mapping_entry_phys_t *vimep = 1081 &vim->vim_entries[i]; 1082 (void) printf("\t<%llx:%llx:%llx> -> " 1083 "<%llx:%llx:%llx> (%x obsolete)\n", 1084 (longlong_t)vd->vdev_id, 1085 (longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep), 1086 (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), 1087 (longlong_t)DVA_GET_VDEV(&vimep->vimep_dst), 1088 (longlong_t)DVA_GET_OFFSET(&vimep->vimep_dst), 1089 (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), 1090 counts[i]); 1091 } 1092 (void) printf("\n"); 1093 1094 uint64_t obsolete_sm_object = vdev_obsolete_sm_object(vd); 1095 if (obsolete_sm_object != 0) { 1096 objset_t *mos = vd->vdev_spa->spa_meta_objset; 1097 (void) printf("obsolete space map object %llu:\n", 1098 (u_longlong_t)obsolete_sm_object); 1099 ASSERT(vd->vdev_obsolete_sm != NULL); 1100 ASSERT3U(space_map_object(vd->vdev_obsolete_sm), ==, 1101 obsolete_sm_object); 1102 dump_spacemap(mos, vd->vdev_obsolete_sm); 1103 (void) printf("\n"); 1104 } 1105 } 1106 1107 static void 1108 dump_metaslabs(spa_t *spa) 1109 { 1110 vdev_t *vd, *rvd = spa->spa_root_vdev; 1111 uint64_t m, c = 0, children = rvd->vdev_children; 1112 1113 (void) printf("\nMetaslabs:\n"); 1114 1115 if (!dump_opt['d'] && zopt_objects > 0) { 1116 c = zopt_object[0]; 1117 1118 if (c >= children) 1119 (void) fatal("bad vdev id: %llu", (u_longlong_t)c); 1120 1121 if (zopt_objects > 1) { 1122 vd = rvd->vdev_child[c]; 1123 print_vdev_metaslab_header(vd); 1124 1125 for (m = 1; m < zopt_objects; m++) { 1126 if (zopt_object[m] < vd->vdev_ms_count) 1127 dump_metaslab( 1128 vd->vdev_ms[zopt_object[m]]); 1129 else 1130 (void) fprintf(stderr, "bad metaslab " 1131 "number %llu\n", 1132 (u_longlong_t)zopt_object[m]); 1133 } 1134 (void) printf("\n"); 1135 return; 1136 } 1137 children = c + 1; 1138 } 1139 for (; c < children; c++) { 1140 vd = rvd->vdev_child[c]; 1141 print_vdev_metaslab_header(vd); 1142 1143 print_vdev_indirect(vd); 1144 1145 for (m = 0; m < vd->vdev_ms_count; m++) 1146 dump_metaslab(vd->vdev_ms[m]); 1147 (void) printf("\n"); 1148 } 1149 } 1150 1151 static void 1152 dump_log_spacemaps(spa_t *spa) 1153 { 1154 if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) 1155 return; 1156 1157 (void) printf("\nLog Space Maps in Pool:\n"); 1158 for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg); 1159 sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) { 1160 space_map_t *sm = NULL; 1161 VERIFY0(space_map_open(&sm, spa_meta_objset(spa), 1162 sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT)); 1163 1164 (void) printf("Log Spacemap object %llu txg %llu\n", 1165 (u_longlong_t)sls->sls_sm_obj, (u_longlong_t)sls->sls_txg); 1166 dump_spacemap(spa->spa_meta_objset, sm); 1167 space_map_close(sm); 1168 } 1169 (void) printf("\n"); 1170 } 1171 1172 static void 1173 dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index) 1174 { 1175 const ddt_phys_t *ddp = dde->dde_phys; 1176 const ddt_key_t *ddk = &dde->dde_key; 1177 const char *types[4] = { "ditto", "single", "double", "triple" }; 1178 char blkbuf[BP_SPRINTF_LEN]; 1179 blkptr_t blk; 1180 1181 for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 1182 if (ddp->ddp_phys_birth == 0) 1183 continue; 1184 ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); 1185 snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk); 1186 (void) printf("index %llx refcnt %llu %s %s\n", 1187 (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt, 1188 types[p], blkbuf); 1189 } 1190 } 1191 1192 static void 1193 dump_dedup_ratio(const ddt_stat_t *dds) 1194 { 1195 double rL, rP, rD, D, dedup, compress, copies; 1196 1197 if (dds->dds_blocks == 0) 1198 return; 1199 1200 rL = (double)dds->dds_ref_lsize; 1201 rP = (double)dds->dds_ref_psize; 1202 rD = (double)dds->dds_ref_dsize; 1203 D = (double)dds->dds_dsize; 1204 1205 dedup = rD / D; 1206 compress = rL / rP; 1207 copies = rD / rP; 1208 1209 (void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, " 1210 "dedup * compress / copies = %.2f\n\n", 1211 dedup, compress, copies, dedup * compress / copies); 1212 } 1213 1214 static void 1215 dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class) 1216 { 1217 char name[DDT_NAMELEN]; 1218 ddt_entry_t dde; 1219 uint64_t walk = 0; 1220 dmu_object_info_t doi; 1221 uint64_t count, dspace, mspace; 1222 int error; 1223 1224 error = ddt_object_info(ddt, type, class, &doi); 1225 1226 if (error == ENOENT) 1227 return; 1228 ASSERT(error == 0); 1229 1230 if ((count = ddt_object_count(ddt, type, class)) == 0) 1231 return; 1232 1233 dspace = doi.doi_physical_blocks_512 << 9; 1234 mspace = doi.doi_fill_count * doi.doi_data_block_size; 1235 1236 ddt_object_name(ddt, type, class, name); 1237 1238 (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n", 1239 name, 1240 (u_longlong_t)count, 1241 (u_longlong_t)(dspace / count), 1242 (u_longlong_t)(mspace / count)); 1243 1244 if (dump_opt['D'] < 3) 1245 return; 1246 1247 zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]); 1248 1249 if (dump_opt['D'] < 4) 1250 return; 1251 1252 if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE) 1253 return; 1254 1255 (void) printf("%s contents:\n\n", name); 1256 1257 while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0) 1258 dump_dde(ddt, &dde, walk); 1259 1260 ASSERT3U(error, ==, ENOENT); 1261 1262 (void) printf("\n"); 1263 } 1264 1265 static void 1266 dump_all_ddts(spa_t *spa) 1267 { 1268 ddt_histogram_t ddh_total; 1269 ddt_stat_t dds_total; 1270 1271 bzero(&ddh_total, sizeof (ddh_total)); 1272 bzero(&dds_total, sizeof (dds_total)); 1273 1274 for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 1275 ddt_t *ddt = spa->spa_ddt[c]; 1276 for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 1277 for (enum ddt_class class = 0; class < DDT_CLASSES; 1278 class++) { 1279 dump_ddt(ddt, type, class); 1280 } 1281 } 1282 } 1283 1284 ddt_get_dedup_stats(spa, &dds_total); 1285 1286 if (dds_total.dds_blocks == 0) { 1287 (void) printf("All DDTs are empty\n"); 1288 return; 1289 } 1290 1291 (void) printf("\n"); 1292 1293 if (dump_opt['D'] > 1) { 1294 (void) printf("DDT histogram (aggregated over all DDTs):\n"); 1295 ddt_get_dedup_histogram(spa, &ddh_total); 1296 zpool_dump_ddt(&dds_total, &ddh_total); 1297 } 1298 1299 dump_dedup_ratio(&dds_total); 1300 } 1301 1302 static void 1303 dump_dtl_seg(void *arg, uint64_t start, uint64_t size) 1304 { 1305 char *prefix = arg; 1306 1307 (void) printf("%s [%llu,%llu) length %llu\n", 1308 prefix, 1309 (u_longlong_t)start, 1310 (u_longlong_t)(start + size), 1311 (u_longlong_t)(size)); 1312 } 1313 1314 static void 1315 dump_dtl(vdev_t *vd, int indent) 1316 { 1317 spa_t *spa = vd->vdev_spa; 1318 boolean_t required; 1319 const char *name[DTL_TYPES] = { "missing", "partial", "scrub", 1320 "outage" }; 1321 char prefix[256]; 1322 1323 spa_vdev_state_enter(spa, SCL_NONE); 1324 required = vdev_dtl_required(vd); 1325 (void) spa_vdev_state_exit(spa, NULL, 0); 1326 1327 if (indent == 0) 1328 (void) printf("\nDirty time logs:\n\n"); 1329 1330 (void) printf("\t%*s%s [%s]\n", indent, "", 1331 vd->vdev_path ? vd->vdev_path : 1332 vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa), 1333 required ? "DTL-required" : "DTL-expendable"); 1334 1335 for (int t = 0; t < DTL_TYPES; t++) { 1336 range_tree_t *rt = vd->vdev_dtl[t]; 1337 if (range_tree_space(rt) == 0) 1338 continue; 1339 (void) snprintf(prefix, sizeof (prefix), "\t%*s%s", 1340 indent + 2, "", name[t]); 1341 range_tree_walk(rt, dump_dtl_seg, prefix); 1342 if (dump_opt['d'] > 5 && vd->vdev_children == 0) 1343 dump_spacemap(spa->spa_meta_objset, vd->vdev_dtl_sm); 1344 } 1345 1346 for (unsigned c = 0; c < vd->vdev_children; c++) 1347 dump_dtl(vd->vdev_child[c], indent + 4); 1348 } 1349 1350 static void 1351 dump_history(spa_t *spa) 1352 { 1353 nvlist_t **events = NULL; 1354 uint64_t resid, len, off = 0; 1355 uint_t num = 0; 1356 int error; 1357 time_t tsec; 1358 struct tm t; 1359 char tbuf[30]; 1360 char internalstr[MAXPATHLEN]; 1361 1362 char *buf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 1363 do { 1364 len = SPA_MAXBLOCKSIZE; 1365 1366 if ((error = spa_history_get(spa, &off, &len, buf)) != 0) { 1367 (void) fprintf(stderr, "Unable to read history: " 1368 "error %d\n", error); 1369 umem_free(buf, SPA_MAXBLOCKSIZE); 1370 return; 1371 } 1372 1373 if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0) 1374 break; 1375 1376 off -= resid; 1377 } while (len != 0); 1378 umem_free(buf, SPA_MAXBLOCKSIZE); 1379 1380 (void) printf("\nHistory:\n"); 1381 for (unsigned i = 0; i < num; i++) { 1382 uint64_t time, txg, ievent; 1383 char *cmd, *intstr; 1384 boolean_t printed = B_FALSE; 1385 1386 if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME, 1387 &time) != 0) 1388 goto next; 1389 if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD, 1390 &cmd) != 0) { 1391 if (nvlist_lookup_uint64(events[i], 1392 ZPOOL_HIST_INT_EVENT, &ievent) != 0) 1393 goto next; 1394 verify(nvlist_lookup_uint64(events[i], 1395 ZPOOL_HIST_TXG, &txg) == 0); 1396 verify(nvlist_lookup_string(events[i], 1397 ZPOOL_HIST_INT_STR, &intstr) == 0); 1398 if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) 1399 goto next; 1400 1401 (void) snprintf(internalstr, 1402 sizeof (internalstr), 1403 "[internal %s txg:%ju] %s", 1404 zfs_history_event_names[ievent], (uintmax_t)txg, 1405 intstr); 1406 cmd = internalstr; 1407 } 1408 tsec = time; 1409 (void) localtime_r(&tsec, &t); 1410 (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); 1411 (void) printf("%s %s\n", tbuf, cmd); 1412 printed = B_TRUE; 1413 1414 next: 1415 if (dump_opt['h'] > 1) { 1416 if (!printed) 1417 (void) printf("unrecognized record:\n"); 1418 dump_nvlist(events[i], 2); 1419 } 1420 } 1421 } 1422 1423 /*ARGSUSED*/ 1424 static void 1425 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size) 1426 { 1427 } 1428 1429 static uint64_t 1430 blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, 1431 const zbookmark_phys_t *zb) 1432 { 1433 if (dnp == NULL) { 1434 ASSERT(zb->zb_level < 0); 1435 if (zb->zb_object == 0) 1436 return (zb->zb_blkid); 1437 return (zb->zb_blkid * BP_GET_LSIZE(bp)); 1438 } 1439 1440 ASSERT(zb->zb_level >= 0); 1441 1442 return ((zb->zb_blkid << 1443 (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) * 1444 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); 1445 } 1446 1447 static void 1448 snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) 1449 { 1450 const dva_t *dva = bp->blk_dva; 1451 unsigned int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1; 1452 1453 if (dump_opt['b'] >= 6) { 1454 snprintf_blkptr(blkbuf, buflen, bp); 1455 return; 1456 } 1457 1458 if (BP_IS_EMBEDDED(bp)) { 1459 (void) sprintf(blkbuf, 1460 "EMBEDDED et=%u %llxL/%llxP B=%llu", 1461 (int)BPE_GET_ETYPE(bp), 1462 (u_longlong_t)BPE_GET_LSIZE(bp), 1463 (u_longlong_t)BPE_GET_PSIZE(bp), 1464 (u_longlong_t)bp->blk_birth); 1465 return; 1466 } 1467 1468 blkbuf[0] = '\0'; 1469 for (unsigned int i = 0; i < ndvas; i++) 1470 (void) snprintf(blkbuf + strlen(blkbuf), 1471 buflen - strlen(blkbuf), "%llu:%llx:%llx ", 1472 (u_longlong_t)DVA_GET_VDEV(&dva[i]), 1473 (u_longlong_t)DVA_GET_OFFSET(&dva[i]), 1474 (u_longlong_t)DVA_GET_ASIZE(&dva[i])); 1475 1476 if (BP_IS_HOLE(bp)) { 1477 (void) snprintf(blkbuf + strlen(blkbuf), 1478 buflen - strlen(blkbuf), 1479 "%llxL B=%llu", 1480 (u_longlong_t)BP_GET_LSIZE(bp), 1481 (u_longlong_t)bp->blk_birth); 1482 } else { 1483 (void) snprintf(blkbuf + strlen(blkbuf), 1484 buflen - strlen(blkbuf), 1485 "%llxL/%llxP F=%llu B=%llu/%llu", 1486 (u_longlong_t)BP_GET_LSIZE(bp), 1487 (u_longlong_t)BP_GET_PSIZE(bp), 1488 (u_longlong_t)BP_GET_FILL(bp), 1489 (u_longlong_t)bp->blk_birth, 1490 (u_longlong_t)BP_PHYSICAL_BIRTH(bp)); 1491 } 1492 } 1493 1494 static void 1495 print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb, 1496 const dnode_phys_t *dnp) 1497 { 1498 char blkbuf[BP_SPRINTF_LEN]; 1499 int l; 1500 1501 if (!BP_IS_EMBEDDED(bp)) { 1502 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); 1503 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); 1504 } 1505 1506 (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb)); 1507 1508 ASSERT(zb->zb_level >= 0); 1509 1510 for (l = dnp->dn_nlevels - 1; l >= -1; l--) { 1511 if (l == zb->zb_level) { 1512 (void) printf("L%llx", (u_longlong_t)zb->zb_level); 1513 } else { 1514 (void) printf(" "); 1515 } 1516 } 1517 1518 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp); 1519 (void) printf("%s\n", blkbuf); 1520 } 1521 1522 static int 1523 visit_indirect(spa_t *spa, const dnode_phys_t *dnp, 1524 blkptr_t *bp, const zbookmark_phys_t *zb) 1525 { 1526 int err = 0; 1527 1528 if (bp->blk_birth == 0) 1529 return (0); 1530 1531 print_indirect(bp, zb, dnp); 1532 1533 if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) { 1534 arc_flags_t flags = ARC_FLAG_WAIT; 1535 int i; 1536 blkptr_t *cbp; 1537 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; 1538 arc_buf_t *buf; 1539 uint64_t fill = 0; 1540 1541 err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf, 1542 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); 1543 if (err) 1544 return (err); 1545 ASSERT(buf->b_data); 1546 1547 /* recursively visit blocks below this */ 1548 cbp = buf->b_data; 1549 for (i = 0; i < epb; i++, cbp++) { 1550 zbookmark_phys_t czb; 1551 1552 SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, 1553 zb->zb_level - 1, 1554 zb->zb_blkid * epb + i); 1555 err = visit_indirect(spa, dnp, cbp, &czb); 1556 if (err) 1557 break; 1558 fill += BP_GET_FILL(cbp); 1559 } 1560 if (!err) 1561 ASSERT3U(fill, ==, BP_GET_FILL(bp)); 1562 arc_buf_destroy(buf, &buf); 1563 } 1564 1565 return (err); 1566 } 1567 1568 /*ARGSUSED*/ 1569 static void 1570 dump_indirect(dnode_t *dn) 1571 { 1572 dnode_phys_t *dnp = dn->dn_phys; 1573 int j; 1574 zbookmark_phys_t czb; 1575 1576 (void) printf("Indirect blocks:\n"); 1577 1578 SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset), 1579 dn->dn_object, dnp->dn_nlevels - 1, 0); 1580 for (j = 0; j < dnp->dn_nblkptr; j++) { 1581 czb.zb_blkid = j; 1582 (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp, 1583 &dnp->dn_blkptr[j], &czb); 1584 } 1585 1586 (void) printf("\n"); 1587 } 1588 1589 /*ARGSUSED*/ 1590 static void 1591 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size) 1592 { 1593 dsl_dir_phys_t *dd = data; 1594 time_t crtime; 1595 char nice[32]; 1596 1597 /* make sure nicenum has enough space */ 1598 CTASSERT(sizeof (nice) >= NN_NUMBUF_SZ); 1599 1600 if (dd == NULL) 1601 return; 1602 1603 ASSERT3U(size, >=, sizeof (dsl_dir_phys_t)); 1604 1605 crtime = dd->dd_creation_time; 1606 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 1607 (void) printf("\t\thead_dataset_obj = %llu\n", 1608 (u_longlong_t)dd->dd_head_dataset_obj); 1609 (void) printf("\t\tparent_dir_obj = %llu\n", 1610 (u_longlong_t)dd->dd_parent_obj); 1611 (void) printf("\t\torigin_obj = %llu\n", 1612 (u_longlong_t)dd->dd_origin_obj); 1613 (void) printf("\t\tchild_dir_zapobj = %llu\n", 1614 (u_longlong_t)dd->dd_child_dir_zapobj); 1615 zdb_nicenum(dd->dd_used_bytes, nice, sizeof (nice)); 1616 (void) printf("\t\tused_bytes = %s\n", nice); 1617 zdb_nicenum(dd->dd_compressed_bytes, nice, sizeof (nice)); 1618 (void) printf("\t\tcompressed_bytes = %s\n", nice); 1619 zdb_nicenum(dd->dd_uncompressed_bytes, nice, sizeof (nice)); 1620 (void) printf("\t\tuncompressed_bytes = %s\n", nice); 1621 zdb_nicenum(dd->dd_quota, nice, sizeof (nice)); 1622 (void) printf("\t\tquota = %s\n", nice); 1623 zdb_nicenum(dd->dd_reserved, nice, sizeof (nice)); 1624 (void) printf("\t\treserved = %s\n", nice); 1625 (void) printf("\t\tprops_zapobj = %llu\n", 1626 (u_longlong_t)dd->dd_props_zapobj); 1627 (void) printf("\t\tdeleg_zapobj = %llu\n", 1628 (u_longlong_t)dd->dd_deleg_zapobj); 1629 (void) printf("\t\tflags = %llx\n", 1630 (u_longlong_t)dd->dd_flags); 1631 1632 #define DO(which) \ 1633 zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice, \ 1634 sizeof (nice)); \ 1635 (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice) 1636 DO(HEAD); 1637 DO(SNAP); 1638 DO(CHILD); 1639 DO(CHILD_RSRV); 1640 DO(REFRSRV); 1641 #undef DO 1642 (void) printf("\t\tclones = %llu\n", 1643 (u_longlong_t)dd->dd_clones); 1644 } 1645 1646 /*ARGSUSED*/ 1647 static void 1648 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) 1649 { 1650 dsl_dataset_phys_t *ds = data; 1651 time_t crtime; 1652 char used[32], compressed[32], uncompressed[32], unique[32]; 1653 char blkbuf[BP_SPRINTF_LEN]; 1654 1655 /* make sure nicenum has enough space */ 1656 CTASSERT(sizeof (used) >= NN_NUMBUF_SZ); 1657 CTASSERT(sizeof (compressed) >= NN_NUMBUF_SZ); 1658 CTASSERT(sizeof (uncompressed) >= NN_NUMBUF_SZ); 1659 CTASSERT(sizeof (unique) >= NN_NUMBUF_SZ); 1660 1661 if (ds == NULL) 1662 return; 1663 1664 ASSERT(size == sizeof (*ds)); 1665 crtime = ds->ds_creation_time; 1666 zdb_nicenum(ds->ds_referenced_bytes, used, sizeof (used)); 1667 zdb_nicenum(ds->ds_compressed_bytes, compressed, sizeof (compressed)); 1668 zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed, 1669 sizeof (uncompressed)); 1670 zdb_nicenum(ds->ds_unique_bytes, unique, sizeof (unique)); 1671 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp); 1672 1673 (void) printf("\t\tdir_obj = %llu\n", 1674 (u_longlong_t)ds->ds_dir_obj); 1675 (void) printf("\t\tprev_snap_obj = %llu\n", 1676 (u_longlong_t)ds->ds_prev_snap_obj); 1677 (void) printf("\t\tprev_snap_txg = %llu\n", 1678 (u_longlong_t)ds->ds_prev_snap_txg); 1679 (void) printf("\t\tnext_snap_obj = %llu\n", 1680 (u_longlong_t)ds->ds_next_snap_obj); 1681 (void) printf("\t\tsnapnames_zapobj = %llu\n", 1682 (u_longlong_t)ds->ds_snapnames_zapobj); 1683 (void) printf("\t\tnum_children = %llu\n", 1684 (u_longlong_t)ds->ds_num_children); 1685 (void) printf("\t\tuserrefs_obj = %llu\n", 1686 (u_longlong_t)ds->ds_userrefs_obj); 1687 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 1688 (void) printf("\t\tcreation_txg = %llu\n", 1689 (u_longlong_t)ds->ds_creation_txg); 1690 (void) printf("\t\tdeadlist_obj = %llu\n", 1691 (u_longlong_t)ds->ds_deadlist_obj); 1692 (void) printf("\t\tused_bytes = %s\n", used); 1693 (void) printf("\t\tcompressed_bytes = %s\n", compressed); 1694 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); 1695 (void) printf("\t\tunique = %s\n", unique); 1696 (void) printf("\t\tfsid_guid = %llu\n", 1697 (u_longlong_t)ds->ds_fsid_guid); 1698 (void) printf("\t\tguid = %llu\n", 1699 (u_longlong_t)ds->ds_guid); 1700 (void) printf("\t\tflags = %llx\n", 1701 (u_longlong_t)ds->ds_flags); 1702 (void) printf("\t\tnext_clones_obj = %llu\n", 1703 (u_longlong_t)ds->ds_next_clones_obj); 1704 (void) printf("\t\tprops_obj = %llu\n", 1705 (u_longlong_t)ds->ds_props_obj); 1706 (void) printf("\t\tbp = %s\n", blkbuf); 1707 } 1708 1709 /* ARGSUSED */ 1710 static int 1711 dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 1712 { 1713 char blkbuf[BP_SPRINTF_LEN]; 1714 1715 if (bp->blk_birth != 0) { 1716 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 1717 (void) printf("\t%s\n", blkbuf); 1718 } 1719 return (0); 1720 } 1721 1722 static void 1723 dump_bptree(objset_t *os, uint64_t obj, const char *name) 1724 { 1725 char bytes[32]; 1726 bptree_phys_t *bt; 1727 dmu_buf_t *db; 1728 1729 /* make sure nicenum has enough space */ 1730 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); 1731 1732 if (dump_opt['d'] < 3) 1733 return; 1734 1735 VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); 1736 bt = db->db_data; 1737 zdb_nicenum(bt->bt_bytes, bytes, sizeof (bytes)); 1738 (void) printf("\n %s: %llu datasets, %s\n", 1739 name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes); 1740 dmu_buf_rele(db, FTAG); 1741 1742 if (dump_opt['d'] < 5) 1743 return; 1744 1745 (void) printf("\n"); 1746 1747 (void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL); 1748 } 1749 1750 /* ARGSUSED */ 1751 static int 1752 dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 1753 { 1754 char blkbuf[BP_SPRINTF_LEN]; 1755 1756 ASSERT(bp->blk_birth != 0); 1757 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp); 1758 (void) printf("\t%s\n", blkbuf); 1759 return (0); 1760 } 1761 1762 static void 1763 dump_full_bpobj(bpobj_t *bpo, const char *name, int indent) 1764 { 1765 char bytes[32]; 1766 char comp[32]; 1767 char uncomp[32]; 1768 1769 /* make sure nicenum has enough space */ 1770 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); 1771 CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ); 1772 CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ); 1773 1774 if (dump_opt['d'] < 3) 1775 return; 1776 1777 zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes, sizeof (bytes)); 1778 if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) { 1779 zdb_nicenum(bpo->bpo_phys->bpo_comp, comp, sizeof (comp)); 1780 zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp, sizeof (uncomp)); 1781 (void) printf(" %*s: object %llu, %llu local blkptrs, " 1782 "%llu subobjs in object %llu, %s (%s/%s comp)\n", 1783 indent * 8, name, 1784 (u_longlong_t)bpo->bpo_object, 1785 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, 1786 (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs, 1787 (u_longlong_t)bpo->bpo_phys->bpo_subobjs, 1788 bytes, comp, uncomp); 1789 1790 for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) { 1791 uint64_t subobj; 1792 bpobj_t subbpo; 1793 int error; 1794 VERIFY0(dmu_read(bpo->bpo_os, 1795 bpo->bpo_phys->bpo_subobjs, 1796 i * sizeof (subobj), sizeof (subobj), &subobj, 0)); 1797 error = bpobj_open(&subbpo, bpo->bpo_os, subobj); 1798 if (error != 0) { 1799 (void) printf("ERROR %u while trying to open " 1800 "subobj id %llu\n", 1801 error, (u_longlong_t)subobj); 1802 continue; 1803 } 1804 dump_full_bpobj(&subbpo, "subobj", indent + 1); 1805 bpobj_close(&subbpo); 1806 } 1807 } else { 1808 (void) printf(" %*s: object %llu, %llu blkptrs, %s\n", 1809 indent * 8, name, 1810 (u_longlong_t)bpo->bpo_object, 1811 (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, 1812 bytes); 1813 } 1814 1815 if (dump_opt['d'] < 5) 1816 return; 1817 1818 1819 if (indent == 0) { 1820 (void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL); 1821 (void) printf("\n"); 1822 } 1823 } 1824 1825 static void 1826 bpobj_count_refd(bpobj_t *bpo) 1827 { 1828 mos_obj_refd(bpo->bpo_object); 1829 1830 if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) { 1831 mos_obj_refd(bpo->bpo_phys->bpo_subobjs); 1832 for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) { 1833 uint64_t subobj; 1834 bpobj_t subbpo; 1835 int error; 1836 VERIFY0(dmu_read(bpo->bpo_os, 1837 bpo->bpo_phys->bpo_subobjs, 1838 i * sizeof (subobj), sizeof (subobj), &subobj, 0)); 1839 error = bpobj_open(&subbpo, bpo->bpo_os, subobj); 1840 if (error != 0) { 1841 (void) printf("ERROR %u while trying to open " 1842 "subobj id %llu\n", 1843 error, (u_longlong_t)subobj); 1844 continue; 1845 } 1846 bpobj_count_refd(&subbpo); 1847 bpobj_close(&subbpo); 1848 } 1849 } 1850 } 1851 1852 static void 1853 dump_deadlist(dsl_deadlist_t *dl) 1854 { 1855 dsl_deadlist_entry_t *dle; 1856 uint64_t unused; 1857 char bytes[32]; 1858 char comp[32]; 1859 char uncomp[32]; 1860 uint64_t empty_bpobj = 1861 dmu_objset_spa(dl->dl_os)->spa_dsl_pool->dp_empty_bpobj; 1862 1863 /* force the tree to be loaded */ 1864 dsl_deadlist_space_range(dl, 0, UINT64_MAX, &unused, &unused, &unused); 1865 1866 if (dl->dl_oldfmt) { 1867 if (dl->dl_bpobj.bpo_object != empty_bpobj) 1868 bpobj_count_refd(&dl->dl_bpobj); 1869 } else { 1870 mos_obj_refd(dl->dl_object); 1871 for (dle = avl_first(&dl->dl_tree); dle; 1872 dle = AVL_NEXT(&dl->dl_tree, dle)) { 1873 if (dle->dle_bpobj.bpo_object != empty_bpobj) 1874 bpobj_count_refd(&dle->dle_bpobj); 1875 } 1876 } 1877 1878 /* make sure nicenum has enough space */ 1879 CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); 1880 CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ); 1881 CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ); 1882 1883 if (dump_opt['d'] < 3) 1884 return; 1885 1886 if (dl->dl_oldfmt) { 1887 dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0); 1888 return; 1889 } 1890 1891 zdb_nicenum(dl->dl_phys->dl_used, bytes, sizeof (bytes)); 1892 zdb_nicenum(dl->dl_phys->dl_comp, comp, sizeof (comp)); 1893 zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp, sizeof (uncomp)); 1894 (void) printf("\n Deadlist: %s (%s/%s comp)\n", 1895 bytes, comp, uncomp); 1896 1897 if (dump_opt['d'] < 4) 1898 return; 1899 1900 (void) printf("\n"); 1901 1902 for (dle = avl_first(&dl->dl_tree); dle; 1903 dle = AVL_NEXT(&dl->dl_tree, dle)) { 1904 if (dump_opt['d'] >= 5) { 1905 char buf[128]; 1906 (void) snprintf(buf, sizeof (buf), 1907 "mintxg %llu -> obj %llu", 1908 (longlong_t)dle->dle_mintxg, 1909 (longlong_t)dle->dle_bpobj.bpo_object); 1910 1911 dump_full_bpobj(&dle->dle_bpobj, buf, 0); 1912 } else { 1913 (void) printf("mintxg %llu -> obj %llu\n", 1914 (longlong_t)dle->dle_mintxg, 1915 (longlong_t)dle->dle_bpobj.bpo_object); 1916 } 1917 } 1918 } 1919 1920 static avl_tree_t idx_tree; 1921 static avl_tree_t domain_tree; 1922 static boolean_t fuid_table_loaded; 1923 static objset_t *sa_os = NULL; 1924 static sa_attr_type_t *sa_attr_table = NULL; 1925 1926 static int 1927 open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp) 1928 { 1929 int err; 1930 uint64_t sa_attrs = 0; 1931 uint64_t version = 0; 1932 1933 VERIFY3P(sa_os, ==, NULL); 1934 err = dmu_objset_own(path, type, B_TRUE, B_FALSE, tag, osp); 1935 if (err != 0) { 1936 (void) fprintf(stderr, "failed to own dataset '%s': %s\n", path, 1937 strerror(err)); 1938 return (err); 1939 } 1940 1941 if (dmu_objset_type(*osp) == DMU_OST_ZFS && !(*osp)->os_encrypted) { 1942 (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR, 1943 8, 1, &version); 1944 if (version >= ZPL_VERSION_SA) { 1945 (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 1946 8, 1, &sa_attrs); 1947 } 1948 err = sa_setup(*osp, sa_attrs, zfs_attr_table, ZPL_END, 1949 &sa_attr_table); 1950 if (err != 0) { 1951 (void) fprintf(stderr, "sa_setup failed: %s\n", 1952 strerror(err)); 1953 dmu_objset_disown(*osp, B_FALSE, tag); 1954 *osp = NULL; 1955 } 1956 } 1957 sa_os = *osp; 1958 1959 return (0); 1960 } 1961 1962 static void 1963 close_objset(objset_t *os, void *tag) 1964 { 1965 VERIFY3P(os, ==, sa_os); 1966 if (os->os_sa != NULL) 1967 sa_tear_down(os); 1968 dmu_objset_disown(os, B_FALSE, tag); 1969 sa_attr_table = NULL; 1970 sa_os = NULL; 1971 } 1972 1973 static void 1974 fuid_table_destroy() 1975 { 1976 if (fuid_table_loaded) { 1977 zfs_fuid_table_destroy(&idx_tree, &domain_tree); 1978 fuid_table_loaded = B_FALSE; 1979 } 1980 } 1981 1982 /* 1983 * print uid or gid information. 1984 * For normal POSIX id just the id is printed in decimal format. 1985 * For CIFS files with FUID the fuid is printed in hex followed by 1986 * the domain-rid string. 1987 */ 1988 static void 1989 print_idstr(uint64_t id, const char *id_type) 1990 { 1991 if (FUID_INDEX(id)) { 1992 char *domain; 1993 1994 domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id)); 1995 (void) printf("\t%s %llx [%s-%d]\n", id_type, 1996 (u_longlong_t)id, domain, (int)FUID_RID(id)); 1997 } else { 1998 (void) printf("\t%s %llu\n", id_type, (u_longlong_t)id); 1999 } 2000 2001 } 2002 2003 static void 2004 dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid) 2005 { 2006 uint32_t uid_idx, gid_idx; 2007 2008 uid_idx = FUID_INDEX(uid); 2009 gid_idx = FUID_INDEX(gid); 2010 2011 /* Load domain table, if not already loaded */ 2012 if (!fuid_table_loaded && (uid_idx || gid_idx)) { 2013 uint64_t fuid_obj; 2014 2015 /* first find the fuid object. It lives in the master node */ 2016 VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 2017 8, 1, &fuid_obj) == 0); 2018 zfs_fuid_avl_tree_create(&idx_tree, &domain_tree); 2019 (void) zfs_fuid_table_load(os, fuid_obj, 2020 &idx_tree, &domain_tree); 2021 fuid_table_loaded = B_TRUE; 2022 } 2023 2024 print_idstr(uid, "uid"); 2025 print_idstr(gid, "gid"); 2026 } 2027 2028 /*ARGSUSED*/ 2029 static void 2030 dump_znode(objset_t *os, uint64_t object, void *data, size_t size) 2031 { 2032 char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */ 2033 sa_handle_t *hdl; 2034 uint64_t xattr, rdev, gen; 2035 uint64_t uid, gid, mode, fsize, parent, links; 2036 uint64_t pflags; 2037 uint64_t acctm[2], modtm[2], chgtm[2], crtm[2]; 2038 time_t z_crtime, z_atime, z_mtime, z_ctime; 2039 sa_bulk_attr_t bulk[12]; 2040 int idx = 0; 2041 int error; 2042 2043 VERIFY3P(os, ==, sa_os); 2044 if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) { 2045 (void) printf("Failed to get handle for SA znode\n"); 2046 return; 2047 } 2048 2049 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8); 2050 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8); 2051 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL, 2052 &links, 8); 2053 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8); 2054 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL, 2055 &mode, 8); 2056 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT], 2057 NULL, &parent, 8); 2058 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL, 2059 &fsize, 8); 2060 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL, 2061 acctm, 16); 2062 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL, 2063 modtm, 16); 2064 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL, 2065 crtm, 16); 2066 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL, 2067 chgtm, 16); 2068 SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL, 2069 &pflags, 8); 2070 2071 if (sa_bulk_lookup(hdl, bulk, idx)) { 2072 (void) sa_handle_destroy(hdl); 2073 return; 2074 } 2075 2076 z_crtime = (time_t)crtm[0]; 2077 z_atime = (time_t)acctm[0]; 2078 z_mtime = (time_t)modtm[0]; 2079 z_ctime = (time_t)chgtm[0]; 2080 2081 if (dump_opt['d'] > 4) { 2082 error = zfs_obj_to_path(os, object, path, sizeof (path)); 2083 if (error == ESTALE) { 2084 (void) snprintf(path, sizeof (path), "on delete queue"); 2085 } else if (error != 0) { 2086 leaked_objects++; 2087 (void) snprintf(path, sizeof (path), 2088 "path not found, possibly leaked"); 2089 } 2090 (void) printf("\tpath %s\n", path); 2091 } 2092 dump_uidgid(os, uid, gid); 2093 (void) printf("\tatime %s", ctime(&z_atime)); 2094 (void) printf("\tmtime %s", ctime(&z_mtime)); 2095 (void) printf("\tctime %s", ctime(&z_ctime)); 2096 (void) printf("\tcrtime %s", ctime(&z_crtime)); 2097 (void) printf("\tgen %llu\n", (u_longlong_t)gen); 2098 (void) printf("\tmode %llo\n", (u_longlong_t)mode); 2099 (void) printf("\tsize %llu\n", (u_longlong_t)fsize); 2100 (void) printf("\tparent %llu\n", (u_longlong_t)parent); 2101 (void) printf("\tlinks %llu\n", (u_longlong_t)links); 2102 (void) printf("\tpflags %llx\n", (u_longlong_t)pflags); 2103 if (dmu_objset_projectquota_enabled(os) && (pflags & ZFS_PROJID)) { 2104 uint64_t projid; 2105 2106 if (sa_lookup(hdl, sa_attr_table[ZPL_PROJID], &projid, 2107 sizeof (uint64_t)) == 0) 2108 (void) printf("\tprojid %llu\n", (u_longlong_t)projid); 2109 } 2110 if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr, 2111 sizeof (uint64_t)) == 0) 2112 (void) printf("\txattr %llu\n", (u_longlong_t)xattr); 2113 if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev, 2114 sizeof (uint64_t)) == 0) 2115 (void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev); 2116 sa_handle_destroy(hdl); 2117 } 2118 2119 /*ARGSUSED*/ 2120 static void 2121 dump_acl(objset_t *os, uint64_t object, void *data, size_t size) 2122 { 2123 } 2124 2125 /*ARGSUSED*/ 2126 static void 2127 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size) 2128 { 2129 } 2130 2131 2132 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = { 2133 dump_none, /* unallocated */ 2134 dump_zap, /* object directory */ 2135 dump_uint64, /* object array */ 2136 dump_none, /* packed nvlist */ 2137 dump_packed_nvlist, /* packed nvlist size */ 2138 dump_none, /* bpobj */ 2139 dump_bpobj, /* bpobj header */ 2140 dump_none, /* SPA space map header */ 2141 dump_none, /* SPA space map */ 2142 dump_none, /* ZIL intent log */ 2143 dump_dnode, /* DMU dnode */ 2144 dump_dmu_objset, /* DMU objset */ 2145 dump_dsl_dir, /* DSL directory */ 2146 dump_zap, /* DSL directory child map */ 2147 dump_zap, /* DSL dataset snap map */ 2148 dump_zap, /* DSL props */ 2149 dump_dsl_dataset, /* DSL dataset */ 2150 dump_znode, /* ZFS znode */ 2151 dump_acl, /* ZFS V0 ACL */ 2152 dump_uint8, /* ZFS plain file */ 2153 dump_zpldir, /* ZFS directory */ 2154 dump_zap, /* ZFS master node */ 2155 dump_zap, /* ZFS delete queue */ 2156 dump_uint8, /* zvol object */ 2157 dump_zap, /* zvol prop */ 2158 dump_uint8, /* other uint8[] */ 2159 dump_uint64, /* other uint64[] */ 2160 dump_zap, /* other ZAP */ 2161 dump_zap, /* persistent error log */ 2162 dump_uint8, /* SPA history */ 2163 dump_history_offsets, /* SPA history offsets */ 2164 dump_zap, /* Pool properties */ 2165 dump_zap, /* DSL permissions */ 2166 dump_acl, /* ZFS ACL */ 2167 dump_uint8, /* ZFS SYSACL */ 2168 dump_none, /* FUID nvlist */ 2169 dump_packed_nvlist, /* FUID nvlist size */ 2170 dump_zap, /* DSL dataset next clones */ 2171 dump_zap, /* DSL scrub queue */ 2172 dump_zap, /* ZFS user/group/project used */ 2173 dump_zap, /* ZFS user/group/project quota */ 2174 dump_zap, /* snapshot refcount tags */ 2175 dump_ddt_zap, /* DDT ZAP object */ 2176 dump_zap, /* DDT statistics */ 2177 dump_znode, /* SA object */ 2178 dump_zap, /* SA Master Node */ 2179 dump_sa_attrs, /* SA attribute registration */ 2180 dump_sa_layouts, /* SA attribute layouts */ 2181 dump_zap, /* DSL scrub translations */ 2182 dump_none, /* fake dedup BP */ 2183 dump_zap, /* deadlist */ 2184 dump_none, /* deadlist hdr */ 2185 dump_zap, /* dsl clones */ 2186 dump_bpobj_subobjs, /* bpobj subobjs */ 2187 dump_unknown, /* Unknown type, must be last */ 2188 }; 2189 2190 static void 2191 dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header, 2192 uint64_t *dnode_slots_used) 2193 { 2194 dmu_buf_t *db = NULL; 2195 dmu_object_info_t doi; 2196 dnode_t *dn; 2197 boolean_t dnode_held = B_FALSE; 2198 void *bonus = NULL; 2199 size_t bsize = 0; 2200 char iblk[32], dblk[32], lsize[32], asize[32], fill[32], dnsize[32]; 2201 char bonus_size[32]; 2202 char aux[50]; 2203 int error; 2204 2205 /* make sure nicenum has enough space */ 2206 CTASSERT(sizeof (iblk) >= NN_NUMBUF_SZ); 2207 CTASSERT(sizeof (dblk) >= NN_NUMBUF_SZ); 2208 CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ); 2209 CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ); 2210 CTASSERT(sizeof (bonus_size) >= NN_NUMBUF_SZ); 2211 2212 if (*print_header) { 2213 (void) printf("\n%10s %3s %5s %5s %5s %6s %5s %6s %s\n", 2214 "Object", "lvl", "iblk", "dblk", "dsize", "dnsize", 2215 "lsize", "%full", "type"); 2216 *print_header = 0; 2217 } 2218 2219 if (object == 0) { 2220 dn = DMU_META_DNODE(os); 2221 dmu_object_info_from_dnode(dn, &doi); 2222 } else { 2223 /* 2224 * Encrypted datasets will have sensitive bonus buffers 2225 * encrypted. Therefore we cannot hold the bonus buffer and 2226 * must hold the dnode itself instead. 2227 */ 2228 error = dmu_object_info(os, object, &doi); 2229 if (error) 2230 fatal("dmu_object_info() failed, errno %u", error); 2231 2232 if (os->os_encrypted && 2233 DMU_OT_IS_ENCRYPTED(doi.doi_bonus_type)) { 2234 error = dnode_hold(os, object, FTAG, &dn); 2235 if (error) 2236 fatal("dnode_hold() failed, errno %u", error); 2237 dnode_held = B_TRUE; 2238 } else { 2239 error = dmu_bonus_hold(os, object, FTAG, &db); 2240 if (error) 2241 fatal("dmu_bonus_hold(%llu) failed, errno %u", 2242 object, error); 2243 bonus = db->db_data; 2244 bsize = db->db_size; 2245 dn = DB_DNODE((dmu_buf_impl_t *)db); 2246 } 2247 } 2248 2249 if (dnode_slots_used != NULL) 2250 *dnode_slots_used = doi.doi_dnodesize / DNODE_MIN_SIZE; 2251 2252 zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk)); 2253 zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk)); 2254 zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize)); 2255 zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof (asize)); 2256 zdb_nicenum(doi.doi_bonus_size, bonus_size, sizeof (bonus_size)); 2257 zdb_nicenum(doi.doi_dnodesize, dnsize, sizeof (dnsize)); 2258 (void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count * 2259 doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) / 2260 doi.doi_max_offset); 2261 2262 aux[0] = '\0'; 2263 2264 if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) { 2265 (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)", 2266 ZDB_CHECKSUM_NAME(doi.doi_checksum)); 2267 } 2268 2269 if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) { 2270 (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)", 2271 ZDB_COMPRESS_NAME(doi.doi_compress)); 2272 } 2273 2274 (void) printf("%10" PRIu64 2275 " %3u %5s %5s %5s %5s %5s %6s %s%s\n", 2276 object, doi.doi_indirection, iblk, dblk, 2277 asize, dnsize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux); 2278 2279 if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) { 2280 (void) printf("%10s %3s %5s %5s %5s %5s %5s %6s %s\n", 2281 "", "", "", "", "", "", bonus_size, "bonus", 2282 ZDB_OT_NAME(doi.doi_bonus_type)); 2283 } 2284 2285 if (verbosity >= 4) { 2286 (void) printf("\tdnode flags: %s%s%s%s\n", 2287 (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ? 2288 "USED_BYTES " : "", 2289 (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ? 2290 "USERUSED_ACCOUNTED " : "", 2291 (dn->dn_phys->dn_flags & DNODE_FLAG_USEROBJUSED_ACCOUNTED) ? 2292 "USEROBJUSED_ACCOUNTED " : "", 2293 (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? 2294 "SPILL_BLKPTR" : ""); 2295 (void) printf("\tdnode maxblkid: %llu\n", 2296 (longlong_t)dn->dn_phys->dn_maxblkid); 2297 2298 if (!dnode_held) { 2299 object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, 2300 object, bonus, bsize); 2301 } else { 2302 (void) printf("\t\t(bonus encrypted)\n"); 2303 } 2304 2305 if (!os->os_encrypted || !DMU_OT_IS_ENCRYPTED(doi.doi_type)) { 2306 object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, 2307 NULL, 0); 2308 } else { 2309 (void) printf("\t\t(object encrypted)\n"); 2310 } 2311 2312 *print_header = 1; 2313 } 2314 2315 if (verbosity >= 5) 2316 dump_indirect(dn); 2317 2318 if (verbosity >= 5) { 2319 /* 2320 * Report the list of segments that comprise the object. 2321 */ 2322 uint64_t start = 0; 2323 uint64_t end; 2324 uint64_t blkfill = 1; 2325 int minlvl = 1; 2326 2327 if (dn->dn_type == DMU_OT_DNODE) { 2328 minlvl = 0; 2329 blkfill = DNODES_PER_BLOCK; 2330 } 2331 2332 for (;;) { 2333 char segsize[32]; 2334 /* make sure nicenum has enough space */ 2335 CTASSERT(sizeof (segsize) >= NN_NUMBUF_SZ); 2336 error = dnode_next_offset(dn, 2337 0, &start, minlvl, blkfill, 0); 2338 if (error) 2339 break; 2340 end = start; 2341 error = dnode_next_offset(dn, 2342 DNODE_FIND_HOLE, &end, minlvl, blkfill, 0); 2343 zdb_nicenum(end - start, segsize, sizeof (segsize)); 2344 (void) printf("\t\tsegment [%016llx, %016llx)" 2345 " size %5s\n", (u_longlong_t)start, 2346 (u_longlong_t)end, segsize); 2347 if (error) 2348 break; 2349 start = end; 2350 } 2351 } 2352 2353 if (db != NULL) 2354 dmu_buf_rele(db, FTAG); 2355 if (dnode_held) 2356 dnode_rele(dn, FTAG); 2357 } 2358 2359 static void 2360 count_dir_mos_objects(dsl_dir_t *dd) 2361 { 2362 mos_obj_refd(dd->dd_object); 2363 mos_obj_refd(dsl_dir_phys(dd)->dd_child_dir_zapobj); 2364 mos_obj_refd(dsl_dir_phys(dd)->dd_deleg_zapobj); 2365 mos_obj_refd(dsl_dir_phys(dd)->dd_props_zapobj); 2366 mos_obj_refd(dsl_dir_phys(dd)->dd_clones); 2367 } 2368 2369 static void 2370 count_ds_mos_objects(dsl_dataset_t *ds) 2371 { 2372 mos_obj_refd(ds->ds_object); 2373 mos_obj_refd(dsl_dataset_phys(ds)->ds_next_clones_obj); 2374 mos_obj_refd(dsl_dataset_phys(ds)->ds_props_obj); 2375 mos_obj_refd(dsl_dataset_phys(ds)->ds_userrefs_obj); 2376 mos_obj_refd(dsl_dataset_phys(ds)->ds_snapnames_zapobj); 2377 2378 if (!dsl_dataset_is_snapshot(ds)) { 2379 count_dir_mos_objects(ds->ds_dir); 2380 } 2381 } 2382 2383 static const char *objset_types[DMU_OST_NUMTYPES] = { 2384 "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" }; 2385 2386 static void 2387 dump_dir(objset_t *os) 2388 { 2389 dmu_objset_stats_t dds; 2390 uint64_t object, object_count; 2391 uint64_t refdbytes, usedobjs, scratch; 2392 char numbuf[32]; 2393 char blkbuf[BP_SPRINTF_LEN + 20]; 2394 char osname[ZFS_MAX_DATASET_NAME_LEN]; 2395 const char *type = "UNKNOWN"; 2396 int verbosity = dump_opt['d']; 2397 int print_header = 1; 2398 unsigned i; 2399 int error; 2400 uint64_t total_slots_used = 0; 2401 uint64_t max_slot_used = 0; 2402 uint64_t dnode_slots; 2403 2404 /* make sure nicenum has enough space */ 2405 CTASSERT(sizeof (numbuf) >= NN_NUMBUF_SZ); 2406 2407 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 2408 dmu_objset_fast_stat(os, &dds); 2409 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 2410 2411 if (dds.dds_type < DMU_OST_NUMTYPES) 2412 type = objset_types[dds.dds_type]; 2413 2414 if (dds.dds_type == DMU_OST_META) { 2415 dds.dds_creation_txg = TXG_INITIAL; 2416 usedobjs = BP_GET_FILL(os->os_rootbp); 2417 refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)-> 2418 dd_used_bytes; 2419 } else { 2420 dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch); 2421 } 2422 2423 ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp)); 2424 2425 zdb_nicenum(refdbytes, numbuf, sizeof (numbuf)); 2426 2427 if (verbosity >= 4) { 2428 (void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp "); 2429 (void) snprintf_blkptr(blkbuf + strlen(blkbuf), 2430 sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp); 2431 } else { 2432 blkbuf[0] = '\0'; 2433 } 2434 2435 dmu_objset_name(os, osname); 2436 2437 (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, " 2438 "%s, %llu objects%s%s\n", 2439 osname, type, (u_longlong_t)dmu_objset_id(os), 2440 (u_longlong_t)dds.dds_creation_txg, 2441 numbuf, (u_longlong_t)usedobjs, blkbuf, 2442 (dds.dds_inconsistent) ? " (inconsistent)" : ""); 2443 2444 if (zopt_objects != 0) { 2445 for (i = 0; i < zopt_objects; i++) 2446 dump_object(os, zopt_object[i], verbosity, 2447 &print_header, NULL); 2448 (void) printf("\n"); 2449 return; 2450 } 2451 2452 if (dump_opt['i'] != 0 || verbosity >= 2) 2453 dump_intent_log(dmu_objset_zil(os)); 2454 2455 if (dmu_objset_ds(os) != NULL) { 2456 dsl_dataset_t *ds = dmu_objset_ds(os); 2457 dump_deadlist(&ds->ds_deadlist); 2458 2459 if (dsl_dataset_remap_deadlist_exists(ds)) { 2460 (void) printf("ds_remap_deadlist:\n"); 2461 dump_deadlist(&ds->ds_remap_deadlist); 2462 } 2463 count_ds_mos_objects(ds); 2464 } 2465 2466 if (verbosity < 2) 2467 return; 2468 2469 if (BP_IS_HOLE(os->os_rootbp)) 2470 return; 2471 2472 dump_object(os, 0, verbosity, &print_header, NULL); 2473 object_count = 0; 2474 if (DMU_USERUSED_DNODE(os) != NULL && 2475 DMU_USERUSED_DNODE(os)->dn_type != 0) { 2476 dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header, 2477 NULL); 2478 dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header, 2479 NULL); 2480 } 2481 2482 if (DMU_PROJECTUSED_DNODE(os) != NULL && 2483 DMU_PROJECTUSED_DNODE(os)->dn_type != 0) 2484 dump_object(os, DMU_PROJECTUSED_OBJECT, verbosity, 2485 &print_header, NULL); 2486 2487 object = 0; 2488 while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) { 2489 dump_object(os, object, verbosity, &print_header, &dnode_slots); 2490 object_count++; 2491 total_slots_used += dnode_slots; 2492 max_slot_used = object + dnode_slots - 1; 2493 } 2494 2495 (void) printf("\n"); 2496 2497 (void) printf(" Dnode slots:\n"); 2498 (void) printf("\tTotal used: %10llu\n", 2499 (u_longlong_t)total_slots_used); 2500 (void) printf("\tMax used: %10llu\n", 2501 (u_longlong_t)max_slot_used); 2502 (void) printf("\tPercent empty: %10lf\n", 2503 (double)(max_slot_used - total_slots_used)*100 / 2504 (double)max_slot_used); 2505 2506 (void) printf("\n"); 2507 2508 if (error != ESRCH) { 2509 (void) fprintf(stderr, "dmu_object_next() = %d\n", error); 2510 abort(); 2511 } 2512 if (leaked_objects != 0) { 2513 (void) printf("%d potentially leaked objects detected\n", 2514 leaked_objects); 2515 leaked_objects = 0; 2516 } 2517 2518 ASSERT3U(object_count, ==, usedobjs); 2519 } 2520 2521 static void 2522 dump_uberblock(uberblock_t *ub, const char *header, const char *footer) 2523 { 2524 time_t timestamp = ub->ub_timestamp; 2525 2526 (void) printf("%s", header ? header : ""); 2527 (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic); 2528 (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version); 2529 (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg); 2530 (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum); 2531 (void) printf("\ttimestamp = %llu UTC = %s", 2532 (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp))); 2533 2534 (void) printf("\tmmp_magic = %016llx\n", 2535 (u_longlong_t)ub->ub_mmp_magic); 2536 if (MMP_VALID(ub)) { 2537 (void) printf("\tmmp_delay = %0llu\n", 2538 (u_longlong_t)ub->ub_mmp_delay); 2539 if (MMP_SEQ_VALID(ub)) 2540 (void) printf("\tmmp_seq = %u\n", 2541 (unsigned int) MMP_SEQ(ub)); 2542 if (MMP_FAIL_INT_VALID(ub)) 2543 (void) printf("\tmmp_fail = %u\n", 2544 (unsigned int) MMP_FAIL_INT(ub)); 2545 if (MMP_INTERVAL_VALID(ub)) 2546 (void) printf("\tmmp_write = %u\n", 2547 (unsigned int) MMP_INTERVAL(ub)); 2548 /* After MMP_* to make summarize_uberblock_mmp cleaner */ 2549 (void) printf("\tmmp_valid = %x\n", 2550 (unsigned int) ub->ub_mmp_config & 0xFF); 2551 } 2552 2553 if (dump_opt['u'] >= 3) { 2554 char blkbuf[BP_SPRINTF_LEN]; 2555 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp); 2556 (void) printf("\trootbp = %s\n", blkbuf); 2557 } 2558 (void) printf("\tcheckpoint_txg = %llu\n", 2559 (u_longlong_t)ub->ub_checkpoint_txg); 2560 (void) printf("%s", footer ? footer : ""); 2561 } 2562 2563 static void 2564 dump_config(spa_t *spa) 2565 { 2566 dmu_buf_t *db; 2567 size_t nvsize = 0; 2568 int error = 0; 2569 2570 2571 error = dmu_bonus_hold(spa->spa_meta_objset, 2572 spa->spa_config_object, FTAG, &db); 2573 2574 if (error == 0) { 2575 nvsize = *(uint64_t *)db->db_data; 2576 dmu_buf_rele(db, FTAG); 2577 2578 (void) printf("\nMOS Configuration:\n"); 2579 dump_packed_nvlist(spa->spa_meta_objset, 2580 spa->spa_config_object, (void *)&nvsize, 1); 2581 } else { 2582 (void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d", 2583 (u_longlong_t)spa->spa_config_object, error); 2584 } 2585 } 2586 2587 static void 2588 dump_cachefile(const char *cachefile) 2589 { 2590 int fd; 2591 struct stat64 statbuf; 2592 char *buf; 2593 nvlist_t *config; 2594 2595 if ((fd = open64(cachefile, O_RDONLY)) < 0) { 2596 (void) printf("cannot open '%s': %s\n", cachefile, 2597 strerror(errno)); 2598 exit(1); 2599 } 2600 2601 if (fstat64(fd, &statbuf) != 0) { 2602 (void) printf("failed to stat '%s': %s\n", cachefile, 2603 strerror(errno)); 2604 exit(1); 2605 } 2606 2607 if ((buf = malloc(statbuf.st_size)) == NULL) { 2608 (void) fprintf(stderr, "failed to allocate %llu bytes\n", 2609 (u_longlong_t)statbuf.st_size); 2610 exit(1); 2611 } 2612 2613 if (read(fd, buf, statbuf.st_size) != statbuf.st_size) { 2614 (void) fprintf(stderr, "failed to read %llu bytes\n", 2615 (u_longlong_t)statbuf.st_size); 2616 exit(1); 2617 } 2618 2619 (void) close(fd); 2620 2621 if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) { 2622 (void) fprintf(stderr, "failed to unpack nvlist\n"); 2623 exit(1); 2624 } 2625 2626 free(buf); 2627 2628 dump_nvlist(config, 0); 2629 2630 nvlist_free(config); 2631 } 2632 2633 #define ZDB_MAX_UB_HEADER_SIZE 32 2634 2635 static void 2636 dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift) 2637 { 2638 vdev_t vd; 2639 vdev_t *vdp = &vd; 2640 char header[ZDB_MAX_UB_HEADER_SIZE]; 2641 2642 vd.vdev_ashift = ashift; 2643 vdp->vdev_top = vdp; 2644 2645 for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) { 2646 uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i); 2647 uberblock_t *ub = (void *)((char *)lbl + uoff); 2648 2649 if (uberblock_verify(ub)) 2650 continue; 2651 2652 if ((dump_opt['u'] < 4) && 2653 (ub->ub_mmp_magic == MMP_MAGIC) && ub->ub_mmp_delay && 2654 (i >= VDEV_UBERBLOCK_COUNT(&vd) - MMP_BLOCKS_PER_LABEL)) 2655 continue; 2656 2657 (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE, 2658 "Uberblock[%d]\n", i); 2659 dump_uberblock(ub, header, ""); 2660 } 2661 } 2662 2663 static char curpath[PATH_MAX]; 2664 2665 /* 2666 * Iterate through the path components, recursively passing 2667 * current one's obj and remaining path until we find the obj 2668 * for the last one. 2669 */ 2670 static int 2671 dump_path_impl(objset_t *os, uint64_t obj, char *name) 2672 { 2673 int err; 2674 int header = 1; 2675 uint64_t child_obj; 2676 char *s; 2677 dmu_buf_t *db; 2678 dmu_object_info_t doi; 2679 2680 if ((s = strchr(name, '/')) != NULL) 2681 *s = '\0'; 2682 err = zap_lookup(os, obj, name, 8, 1, &child_obj); 2683 2684 (void) strlcat(curpath, name, sizeof (curpath)); 2685 2686 if (err != 0) { 2687 (void) fprintf(stderr, "failed to lookup %s: %s\n", 2688 curpath, strerror(err)); 2689 return (err); 2690 } 2691 2692 child_obj = ZFS_DIRENT_OBJ(child_obj); 2693 err = sa_buf_hold(os, child_obj, FTAG, &db); 2694 if (err != 0) { 2695 (void) fprintf(stderr, 2696 "failed to get SA dbuf for obj %llu: %s\n", 2697 (u_longlong_t)child_obj, strerror(err)); 2698 return (EINVAL); 2699 } 2700 dmu_object_info_from_db(db, &doi); 2701 sa_buf_rele(db, FTAG); 2702 2703 if (doi.doi_bonus_type != DMU_OT_SA && 2704 doi.doi_bonus_type != DMU_OT_ZNODE) { 2705 (void) fprintf(stderr, "invalid bonus type %d for obj %llu\n", 2706 doi.doi_bonus_type, (u_longlong_t)child_obj); 2707 return (EINVAL); 2708 } 2709 2710 if (dump_opt['v'] > 6) { 2711 (void) printf("obj=%llu %s type=%d bonustype=%d\n", 2712 (u_longlong_t)child_obj, curpath, doi.doi_type, 2713 doi.doi_bonus_type); 2714 } 2715 2716 (void) strlcat(curpath, "/", sizeof (curpath)); 2717 2718 switch (doi.doi_type) { 2719 case DMU_OT_DIRECTORY_CONTENTS: 2720 if (s != NULL && *(s + 1) != '\0') 2721 return (dump_path_impl(os, child_obj, s + 1)); 2722 /*FALLTHROUGH*/ 2723 case DMU_OT_PLAIN_FILE_CONTENTS: 2724 dump_object(os, child_obj, dump_opt['v'], &header, NULL); 2725 return (0); 2726 default: 2727 (void) fprintf(stderr, "object %llu has non-file/directory " 2728 "type %d\n", (u_longlong_t)obj, doi.doi_type); 2729 break; 2730 } 2731 2732 return (EINVAL); 2733 } 2734 2735 /* 2736 * Dump the blocks for the object specified by path inside the dataset. 2737 */ 2738 static int 2739 dump_path(char *ds, char *path) 2740 { 2741 int err; 2742 objset_t *os; 2743 uint64_t root_obj; 2744 2745 err = open_objset(ds, DMU_OST_ZFS, FTAG, &os); 2746 if (err != 0) 2747 return (err); 2748 2749 err = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &root_obj); 2750 if (err != 0) { 2751 (void) fprintf(stderr, "can't lookup root znode: %s\n", 2752 strerror(err)); 2753 dmu_objset_disown(os, B_FALSE, FTAG); 2754 return (EINVAL); 2755 } 2756 2757 (void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds); 2758 2759 err = dump_path_impl(os, root_obj, path); 2760 2761 close_objset(os, FTAG); 2762 return (err); 2763 } 2764 2765 static int 2766 dump_label(const char *dev) 2767 { 2768 int fd; 2769 vdev_label_t label; 2770 char path[MAXPATHLEN]; 2771 char *buf = label.vl_vdev_phys.vp_nvlist; 2772 size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist); 2773 struct stat64 statbuf; 2774 uint64_t psize, ashift; 2775 boolean_t label_found = B_FALSE; 2776 2777 (void) strlcpy(path, dev, sizeof (path)); 2778 if (dev[0] == '/') { 2779 if (strncmp(dev, ZFS_DISK_ROOTD, 2780 strlen(ZFS_DISK_ROOTD)) == 0) { 2781 (void) snprintf(path, sizeof (path), "%s%s", 2782 ZFS_RDISK_ROOTD, dev + strlen(ZFS_DISK_ROOTD)); 2783 } 2784 } else if (stat64(path, &statbuf) != 0) { 2785 char *s; 2786 2787 (void) snprintf(path, sizeof (path), "%s%s", ZFS_RDISK_ROOTD, 2788 dev); 2789 if (((s = strrchr(dev, 's')) == NULL && 2790 (s = strchr(dev, 'p')) == NULL) || 2791 !isdigit(*(s + 1))) 2792 (void) strlcat(path, "s0", sizeof (path)); 2793 } 2794 2795 if ((fd = open64(path, O_RDONLY)) < 0) { 2796 (void) fprintf(stderr, "cannot open '%s': %s\n", path, 2797 strerror(errno)); 2798 exit(1); 2799 } 2800 2801 if (fstat64(fd, &statbuf) != 0) { 2802 (void) fprintf(stderr, "failed to stat '%s': %s\n", path, 2803 strerror(errno)); 2804 (void) close(fd); 2805 exit(1); 2806 } 2807 2808 if (S_ISBLK(statbuf.st_mode)) { 2809 (void) fprintf(stderr, 2810 "cannot use '%s': character device required\n", path); 2811 (void) close(fd); 2812 exit(1); 2813 } 2814 2815 psize = statbuf.st_size; 2816 psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t)); 2817 2818 for (int l = 0; l < VDEV_LABELS; l++) { 2819 nvlist_t *config = NULL; 2820 2821 if (!dump_opt['q']) { 2822 (void) printf("------------------------------------\n"); 2823 (void) printf("LABEL %d\n", l); 2824 (void) printf("------------------------------------\n"); 2825 } 2826 2827 if (pread64(fd, &label, sizeof (label), 2828 vdev_label_offset(psize, l, 0)) != sizeof (label)) { 2829 if (!dump_opt['q']) 2830 (void) printf("failed to read label %d\n", l); 2831 continue; 2832 } 2833 2834 if (nvlist_unpack(buf, buflen, &config, 0) != 0) { 2835 if (!dump_opt['q']) 2836 (void) printf("failed to unpack label %d\n", l); 2837 ashift = SPA_MINBLOCKSHIFT; 2838 } else { 2839 nvlist_t *vdev_tree = NULL; 2840 2841 if (!dump_opt['q']) 2842 dump_nvlist(config, 4); 2843 if ((nvlist_lookup_nvlist(config, 2844 ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) || 2845 (nvlist_lookup_uint64(vdev_tree, 2846 ZPOOL_CONFIG_ASHIFT, &ashift) != 0)) 2847 ashift = SPA_MINBLOCKSHIFT; 2848 nvlist_free(config); 2849 label_found = B_TRUE; 2850 } 2851 if (dump_opt['u']) 2852 dump_label_uberblocks(&label, ashift); 2853 } 2854 2855 (void) close(fd); 2856 2857 return (label_found ? 0 : 2); 2858 } 2859 2860 static uint64_t dataset_feature_count[SPA_FEATURES]; 2861 static uint64_t remap_deadlist_count = 0; 2862 2863 /*ARGSUSED*/ 2864 static int 2865 dump_one_dir(const char *dsname, void *arg) 2866 { 2867 int error; 2868 objset_t *os; 2869 2870 error = open_objset(dsname, DMU_OST_ANY, FTAG, &os); 2871 if (error != 0) 2872 return (0); 2873 2874 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 2875 if (!dmu_objset_ds(os)->ds_feature_inuse[f]) 2876 continue; 2877 ASSERT(spa_feature_table[f].fi_flags & 2878 ZFEATURE_FLAG_PER_DATASET); 2879 dataset_feature_count[f]++; 2880 } 2881 2882 if (dsl_dataset_remap_deadlist_exists(dmu_objset_ds(os))) { 2883 remap_deadlist_count++; 2884 } 2885 2886 dump_dir(os); 2887 close_objset(os, FTAG); 2888 fuid_table_destroy(); 2889 return (0); 2890 } 2891 2892 /* 2893 * Block statistics. 2894 */ 2895 #define PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2) 2896 typedef struct zdb_blkstats { 2897 uint64_t zb_asize; 2898 uint64_t zb_lsize; 2899 uint64_t zb_psize; 2900 uint64_t zb_count; 2901 uint64_t zb_gangs; 2902 uint64_t zb_ditto_samevdev; 2903 uint64_t zb_ditto_same_ms; 2904 uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE]; 2905 } zdb_blkstats_t; 2906 2907 /* 2908 * Extended object types to report deferred frees and dedup auto-ditto blocks. 2909 */ 2910 #define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0) 2911 #define ZDB_OT_DITTO (DMU_OT_NUMTYPES + 1) 2912 #define ZDB_OT_OTHER (DMU_OT_NUMTYPES + 2) 2913 #define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 3) 2914 2915 static const char *zdb_ot_extname[] = { 2916 "deferred free", 2917 "dedup ditto", 2918 "other", 2919 "Total", 2920 }; 2921 2922 #define ZB_TOTAL DN_MAX_LEVELS 2923 2924 typedef struct zdb_cb { 2925 zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1]; 2926 uint64_t zcb_removing_size; 2927 uint64_t zcb_checkpoint_size; 2928 uint64_t zcb_dedup_asize; 2929 uint64_t zcb_dedup_blocks; 2930 uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES]; 2931 uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES] 2932 [BPE_PAYLOAD_SIZE]; 2933 uint64_t zcb_start; 2934 hrtime_t zcb_lastprint; 2935 uint64_t zcb_totalasize; 2936 uint64_t zcb_errors[256]; 2937 int zcb_readfails; 2938 int zcb_haderrors; 2939 spa_t *zcb_spa; 2940 uint32_t **zcb_vd_obsolete_counts; 2941 } zdb_cb_t; 2942 2943 /* test if two DVA offsets from same vdev are within the same metaslab */ 2944 static boolean_t 2945 same_metaslab(spa_t *spa, uint64_t vdev, uint64_t off1, uint64_t off2) 2946 { 2947 vdev_t *vd = vdev_lookup_top(spa, vdev); 2948 uint64_t ms_shift = vd->vdev_ms_shift; 2949 2950 return ((off1 >> ms_shift) == (off2 >> ms_shift)); 2951 } 2952 2953 static void 2954 zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, 2955 dmu_object_type_t type) 2956 { 2957 uint64_t refcnt = 0; 2958 2959 ASSERT(type < ZDB_OT_TOTAL); 2960 2961 if (zilog && zil_bp_tree_add(zilog, bp) != 0) 2962 return; 2963 2964 spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER); 2965 2966 for (int i = 0; i < 4; i++) { 2967 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; 2968 int t = (i & 1) ? type : ZDB_OT_TOTAL; 2969 int equal; 2970 zdb_blkstats_t *zb = &zcb->zcb_type[l][t]; 2971 2972 zb->zb_asize += BP_GET_ASIZE(bp); 2973 zb->zb_lsize += BP_GET_LSIZE(bp); 2974 zb->zb_psize += BP_GET_PSIZE(bp); 2975 zb->zb_count++; 2976 2977 /* 2978 * The histogram is only big enough to record blocks up to 2979 * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last, 2980 * "other", bucket. 2981 */ 2982 unsigned idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT; 2983 idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1); 2984 zb->zb_psize_histogram[idx]++; 2985 2986 zb->zb_gangs += BP_COUNT_GANG(bp); 2987 2988 switch (BP_GET_NDVAS(bp)) { 2989 case 2: 2990 if (DVA_GET_VDEV(&bp->blk_dva[0]) == 2991 DVA_GET_VDEV(&bp->blk_dva[1])) { 2992 zb->zb_ditto_samevdev++; 2993 2994 if (same_metaslab(zcb->zcb_spa, 2995 DVA_GET_VDEV(&bp->blk_dva[0]), 2996 DVA_GET_OFFSET(&bp->blk_dva[0]), 2997 DVA_GET_OFFSET(&bp->blk_dva[1]))) 2998 zb->zb_ditto_same_ms++; 2999 } 3000 break; 3001 case 3: 3002 equal = (DVA_GET_VDEV(&bp->blk_dva[0]) == 3003 DVA_GET_VDEV(&bp->blk_dva[1])) + 3004 (DVA_GET_VDEV(&bp->blk_dva[0]) == 3005 DVA_GET_VDEV(&bp->blk_dva[2])) + 3006 (DVA_GET_VDEV(&bp->blk_dva[1]) == 3007 DVA_GET_VDEV(&bp->blk_dva[2])); 3008 if (equal != 0) { 3009 zb->zb_ditto_samevdev++; 3010 3011 if (DVA_GET_VDEV(&bp->blk_dva[0]) == 3012 DVA_GET_VDEV(&bp->blk_dva[1]) && 3013 same_metaslab(zcb->zcb_spa, 3014 DVA_GET_VDEV(&bp->blk_dva[0]), 3015 DVA_GET_OFFSET(&bp->blk_dva[0]), 3016 DVA_GET_OFFSET(&bp->blk_dva[1]))) 3017 zb->zb_ditto_same_ms++; 3018 else if (DVA_GET_VDEV(&bp->blk_dva[0]) == 3019 DVA_GET_VDEV(&bp->blk_dva[2]) && 3020 same_metaslab(zcb->zcb_spa, 3021 DVA_GET_VDEV(&bp->blk_dva[0]), 3022 DVA_GET_OFFSET(&bp->blk_dva[0]), 3023 DVA_GET_OFFSET(&bp->blk_dva[2]))) 3024 zb->zb_ditto_same_ms++; 3025 else if (DVA_GET_VDEV(&bp->blk_dva[1]) == 3026 DVA_GET_VDEV(&bp->blk_dva[2]) && 3027 same_metaslab(zcb->zcb_spa, 3028 DVA_GET_VDEV(&bp->blk_dva[1]), 3029 DVA_GET_OFFSET(&bp->blk_dva[1]), 3030 DVA_GET_OFFSET(&bp->blk_dva[2]))) 3031 zb->zb_ditto_same_ms++; 3032 } 3033 break; 3034 } 3035 } 3036 3037 spa_config_exit(zcb->zcb_spa, SCL_CONFIG, FTAG); 3038 3039 if (BP_IS_EMBEDDED(bp)) { 3040 zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++; 3041 zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)] 3042 [BPE_GET_PSIZE(bp)]++; 3043 return; 3044 } 3045 3046 if (dump_opt['L']) 3047 return; 3048 3049 if (BP_GET_DEDUP(bp)) { 3050 ddt_t *ddt; 3051 ddt_entry_t *dde; 3052 3053 ddt = ddt_select(zcb->zcb_spa, bp); 3054 ddt_enter(ddt); 3055 dde = ddt_lookup(ddt, bp, B_FALSE); 3056 3057 if (dde == NULL) { 3058 refcnt = 0; 3059 } else { 3060 ddt_phys_t *ddp = ddt_phys_select(dde, bp); 3061 ddt_phys_decref(ddp); 3062 refcnt = ddp->ddp_refcnt; 3063 if (ddt_phys_total_refcnt(dde) == 0) 3064 ddt_remove(ddt, dde); 3065 } 3066 ddt_exit(ddt); 3067 } 3068 3069 VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa, 3070 refcnt ? 0 : spa_min_claim_txg(zcb->zcb_spa), 3071 bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0); 3072 } 3073 3074 static void 3075 zdb_blkptr_done(zio_t *zio) 3076 { 3077 spa_t *spa = zio->io_spa; 3078 blkptr_t *bp = zio->io_bp; 3079 int ioerr = zio->io_error; 3080 zdb_cb_t *zcb = zio->io_private; 3081 zbookmark_phys_t *zb = &zio->io_bookmark; 3082 3083 abd_free(zio->io_abd); 3084 3085 mutex_enter(&spa->spa_scrub_lock); 3086 spa->spa_load_verify_ios--; 3087 cv_broadcast(&spa->spa_scrub_io_cv); 3088 3089 if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { 3090 char blkbuf[BP_SPRINTF_LEN]; 3091 3092 zcb->zcb_haderrors = 1; 3093 zcb->zcb_errors[ioerr]++; 3094 3095 if (dump_opt['b'] >= 2) 3096 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 3097 else 3098 blkbuf[0] = '\0'; 3099 3100 (void) printf("zdb_blkptr_cb: " 3101 "Got error %d reading " 3102 "<%llu, %llu, %lld, %llx> %s -- skipping\n", 3103 ioerr, 3104 (u_longlong_t)zb->zb_objset, 3105 (u_longlong_t)zb->zb_object, 3106 (u_longlong_t)zb->zb_level, 3107 (u_longlong_t)zb->zb_blkid, 3108 blkbuf); 3109 } 3110 mutex_exit(&spa->spa_scrub_lock); 3111 } 3112 3113 static int 3114 zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 3115 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 3116 { 3117 zdb_cb_t *zcb = arg; 3118 dmu_object_type_t type; 3119 boolean_t is_metadata; 3120 3121 if (bp == NULL) 3122 return (0); 3123 3124 if (dump_opt['b'] >= 5 && bp->blk_birth > 0) { 3125 char blkbuf[BP_SPRINTF_LEN]; 3126 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 3127 (void) printf("objset %llu object %llu " 3128 "level %lld offset 0x%llx %s\n", 3129 (u_longlong_t)zb->zb_objset, 3130 (u_longlong_t)zb->zb_object, 3131 (longlong_t)zb->zb_level, 3132 (u_longlong_t)blkid2offset(dnp, bp, zb), 3133 blkbuf); 3134 } 3135 3136 if (BP_IS_HOLE(bp)) 3137 return (0); 3138 3139 type = BP_GET_TYPE(bp); 3140 3141 zdb_count_block(zcb, zilog, bp, 3142 (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type); 3143 3144 is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)); 3145 3146 if (!BP_IS_EMBEDDED(bp) && 3147 (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) { 3148 size_t size = BP_GET_PSIZE(bp); 3149 abd_t *abd = abd_alloc(size, B_FALSE); 3150 int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW; 3151 3152 /* If it's an intent log block, failure is expected. */ 3153 if (zb->zb_level == ZB_ZIL_LEVEL) 3154 flags |= ZIO_FLAG_SPECULATIVE; 3155 3156 mutex_enter(&spa->spa_scrub_lock); 3157 while (spa->spa_load_verify_ios > max_inflight) 3158 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); 3159 spa->spa_load_verify_ios++; 3160 mutex_exit(&spa->spa_scrub_lock); 3161 3162 zio_nowait(zio_read(NULL, spa, bp, abd, size, 3163 zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb)); 3164 } 3165 3166 zcb->zcb_readfails = 0; 3167 3168 /* only call gethrtime() every 100 blocks */ 3169 static int iters; 3170 if (++iters > 100) 3171 iters = 0; 3172 else 3173 return (0); 3174 3175 if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) { 3176 uint64_t now = gethrtime(); 3177 char buf[10]; 3178 uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize; 3179 int kb_per_sec = 3180 1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000)); 3181 int sec_remaining = 3182 (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec; 3183 3184 /* make sure nicenum has enough space */ 3185 CTASSERT(sizeof (buf) >= NN_NUMBUF_SZ); 3186 3187 zfs_nicenum(bytes, buf, sizeof (buf)); 3188 (void) fprintf(stderr, 3189 "\r%5s completed (%4dMB/s) " 3190 "estimated time remaining: %uhr %02umin %02usec ", 3191 buf, kb_per_sec / 1024, 3192 sec_remaining / 60 / 60, 3193 sec_remaining / 60 % 60, 3194 sec_remaining % 60); 3195 3196 zcb->zcb_lastprint = now; 3197 } 3198 3199 return (0); 3200 } 3201 3202 static void 3203 zdb_leak(void *arg, uint64_t start, uint64_t size) 3204 { 3205 vdev_t *vd = arg; 3206 3207 (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n", 3208 (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size); 3209 } 3210 3211 static metaslab_ops_t zdb_metaslab_ops = { 3212 NULL /* alloc */ 3213 }; 3214 3215 typedef int (*zdb_log_sm_cb_t)(spa_t *spa, space_map_entry_t *sme, 3216 uint64_t txg, void *arg); 3217 3218 typedef struct unflushed_iter_cb_arg { 3219 spa_t *uic_spa; 3220 uint64_t uic_txg; 3221 void *uic_arg; 3222 zdb_log_sm_cb_t uic_cb; 3223 } unflushed_iter_cb_arg_t; 3224 3225 static int 3226 iterate_through_spacemap_logs_cb(space_map_entry_t *sme, void *arg) 3227 { 3228 unflushed_iter_cb_arg_t *uic = arg; 3229 3230 return (uic->uic_cb(uic->uic_spa, sme, uic->uic_txg, uic->uic_arg)); 3231 } 3232 3233 static void 3234 iterate_through_spacemap_logs(spa_t *spa, zdb_log_sm_cb_t cb, void *arg) 3235 { 3236 if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) 3237 return; 3238 3239 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 3240 for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg); 3241 sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) { 3242 space_map_t *sm = NULL; 3243 VERIFY0(space_map_open(&sm, spa_meta_objset(spa), 3244 sls->sls_sm_obj, 0, UINT64_MAX, SPA_MINBLOCKSHIFT)); 3245 3246 unflushed_iter_cb_arg_t uic = { 3247 .uic_spa = spa, 3248 .uic_txg = sls->sls_txg, 3249 .uic_arg = arg, 3250 .uic_cb = cb 3251 }; 3252 3253 VERIFY0(space_map_iterate(sm, space_map_length(sm), 3254 iterate_through_spacemap_logs_cb, &uic)); 3255 space_map_close(sm); 3256 } 3257 spa_config_exit(spa, SCL_CONFIG, FTAG); 3258 } 3259 3260 /* ARGSUSED */ 3261 static int 3262 load_unflushed_svr_segs_cb(spa_t *spa, space_map_entry_t *sme, 3263 uint64_t txg, void *arg) 3264 { 3265 spa_vdev_removal_t *svr = arg; 3266 3267 uint64_t offset = sme->sme_offset; 3268 uint64_t size = sme->sme_run; 3269 3270 /* skip vdevs we don't care about */ 3271 if (sme->sme_vdev != svr->svr_vdev_id) 3272 return (0); 3273 3274 vdev_t *vd = vdev_lookup_top(spa, sme->sme_vdev); 3275 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 3276 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE); 3277 3278 if (txg < metaslab_unflushed_txg(ms)) 3279 return (0); 3280 3281 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 3282 ASSERT(vim != NULL); 3283 if (offset >= vdev_indirect_mapping_max_offset(vim)) 3284 return (0); 3285 3286 if (sme->sme_type == SM_ALLOC) 3287 range_tree_add(svr->svr_allocd_segs, offset, size); 3288 else 3289 range_tree_remove(svr->svr_allocd_segs, offset, size); 3290 3291 return (0); 3292 } 3293 3294 static void 3295 zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb) 3296 { 3297 ddt_bookmark_t ddb; 3298 ddt_entry_t dde; 3299 int error; 3300 3301 ASSERT(!dump_opt['L']); 3302 3303 bzero(&ddb, sizeof (ddb)); 3304 while ((error = ddt_walk(spa, &ddb, &dde)) == 0) { 3305 blkptr_t blk; 3306 ddt_phys_t *ddp = dde.dde_phys; 3307 3308 if (ddb.ddb_class == DDT_CLASS_UNIQUE) 3309 return; 3310 3311 ASSERT(ddt_phys_total_refcnt(&dde) > 1); 3312 3313 for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 3314 if (ddp->ddp_phys_birth == 0) 3315 continue; 3316 ddt_bp_create(ddb.ddb_checksum, 3317 &dde.dde_key, ddp, &blk); 3318 if (p == DDT_PHYS_DITTO) { 3319 zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO); 3320 } else { 3321 zcb->zcb_dedup_asize += 3322 BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1); 3323 zcb->zcb_dedup_blocks++; 3324 } 3325 } 3326 ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum]; 3327 ddt_enter(ddt); 3328 VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL); 3329 ddt_exit(ddt); 3330 } 3331 3332 ASSERT(error == ENOENT); 3333 } 3334 3335 /* ARGSUSED */ 3336 static void 3337 claim_segment_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset, 3338 uint64_t size, void *arg) 3339 { 3340 /* 3341 * This callback was called through a remap from 3342 * a device being removed. Therefore, the vdev that 3343 * this callback is applied to is a concrete 3344 * vdev. 3345 */ 3346 ASSERT(vdev_is_concrete(vd)); 3347 3348 VERIFY0(metaslab_claim_impl(vd, offset, size, 3349 spa_min_claim_txg(vd->vdev_spa))); 3350 } 3351 3352 static void 3353 claim_segment_cb(void *arg, uint64_t offset, uint64_t size) 3354 { 3355 vdev_t *vd = arg; 3356 3357 vdev_indirect_ops.vdev_op_remap(vd, offset, size, 3358 claim_segment_impl_cb, NULL); 3359 } 3360 3361 /* 3362 * After accounting for all allocated blocks that are directly referenced, 3363 * we might have missed a reference to a block from a partially complete 3364 * (and thus unused) indirect mapping object. We perform a secondary pass 3365 * through the metaslabs we have already mapped and claim the destination 3366 * blocks. 3367 */ 3368 static void 3369 zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb) 3370 { 3371 if (dump_opt['L']) 3372 return; 3373 3374 if (spa->spa_vdev_removal == NULL) 3375 return; 3376 3377 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 3378 3379 spa_vdev_removal_t *svr = spa->spa_vdev_removal; 3380 vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id); 3381 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 3382 3383 ASSERT0(range_tree_space(svr->svr_allocd_segs)); 3384 3385 range_tree_t *allocs = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0); 3386 for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) { 3387 metaslab_t *msp = vd->vdev_ms[msi]; 3388 3389 if (msp->ms_start >= vdev_indirect_mapping_max_offset(vim)) 3390 break; 3391 3392 ASSERT0(range_tree_space(allocs)); 3393 if (msp->ms_sm != NULL) 3394 VERIFY0(space_map_load(msp->ms_sm, allocs, SM_ALLOC)); 3395 range_tree_vacate(allocs, range_tree_add, svr->svr_allocd_segs); 3396 } 3397 range_tree_destroy(allocs); 3398 3399 iterate_through_spacemap_logs(spa, load_unflushed_svr_segs_cb, svr); 3400 3401 /* 3402 * Clear everything past what has been synced, 3403 * because we have not allocated mappings for 3404 * it yet. 3405 */ 3406 range_tree_clear(svr->svr_allocd_segs, 3407 vdev_indirect_mapping_max_offset(vim), 3408 vd->vdev_asize - vdev_indirect_mapping_max_offset(vim)); 3409 3410 zcb->zcb_removing_size += range_tree_space(svr->svr_allocd_segs); 3411 range_tree_vacate(svr->svr_allocd_segs, claim_segment_cb, vd); 3412 3413 spa_config_exit(spa, SCL_CONFIG, FTAG); 3414 } 3415 3416 /* ARGSUSED */ 3417 static int 3418 increment_indirect_mapping_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 3419 { 3420 zdb_cb_t *zcb = arg; 3421 spa_t *spa = zcb->zcb_spa; 3422 vdev_t *vd; 3423 const dva_t *dva = &bp->blk_dva[0]; 3424 3425 ASSERT(!dump_opt['L']); 3426 ASSERT3U(BP_GET_NDVAS(bp), ==, 1); 3427 3428 spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 3429 vd = vdev_lookup_top(zcb->zcb_spa, DVA_GET_VDEV(dva)); 3430 ASSERT3P(vd, !=, NULL); 3431 spa_config_exit(spa, SCL_VDEV, FTAG); 3432 3433 ASSERT(vd->vdev_indirect_config.vic_mapping_object != 0); 3434 ASSERT3P(zcb->zcb_vd_obsolete_counts[vd->vdev_id], !=, NULL); 3435 3436 vdev_indirect_mapping_increment_obsolete_count( 3437 vd->vdev_indirect_mapping, 3438 DVA_GET_OFFSET(dva), DVA_GET_ASIZE(dva), 3439 zcb->zcb_vd_obsolete_counts[vd->vdev_id]); 3440 3441 return (0); 3442 } 3443 3444 static uint32_t * 3445 zdb_load_obsolete_counts(vdev_t *vd) 3446 { 3447 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 3448 spa_t *spa = vd->vdev_spa; 3449 spa_condensing_indirect_phys_t *scip = 3450 &spa->spa_condensing_indirect_phys; 3451 uint32_t *counts; 3452 3453 EQUIV(vdev_obsolete_sm_object(vd) != 0, vd->vdev_obsolete_sm != NULL); 3454 counts = vdev_indirect_mapping_load_obsolete_counts(vim); 3455 if (vd->vdev_obsolete_sm != NULL) { 3456 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts, 3457 vd->vdev_obsolete_sm); 3458 } 3459 if (scip->scip_vdev == vd->vdev_id && 3460 scip->scip_prev_obsolete_sm_object != 0) { 3461 space_map_t *prev_obsolete_sm = NULL; 3462 VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset, 3463 scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0)); 3464 vdev_indirect_mapping_load_obsolete_spacemap(vim, counts, 3465 prev_obsolete_sm); 3466 space_map_close(prev_obsolete_sm); 3467 } 3468 return (counts); 3469 } 3470 3471 typedef struct checkpoint_sm_exclude_entry_arg { 3472 vdev_t *cseea_vd; 3473 uint64_t cseea_checkpoint_size; 3474 } checkpoint_sm_exclude_entry_arg_t; 3475 3476 static int 3477 checkpoint_sm_exclude_entry_cb(space_map_entry_t *sme, void *arg) 3478 { 3479 checkpoint_sm_exclude_entry_arg_t *cseea = arg; 3480 vdev_t *vd = cseea->cseea_vd; 3481 metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift]; 3482 uint64_t end = sme->sme_offset + sme->sme_run; 3483 3484 ASSERT(sme->sme_type == SM_FREE); 3485 3486 /* 3487 * Since the vdev_checkpoint_sm exists in the vdev level 3488 * and the ms_sm space maps exist in the metaslab level, 3489 * an entry in the checkpoint space map could theoretically 3490 * cross the boundaries of the metaslab that it belongs. 3491 * 3492 * In reality, because of the way that we populate and 3493 * manipulate the checkpoint's space maps currently, 3494 * there shouldn't be any entries that cross metaslabs. 3495 * Hence the assertion below. 3496 * 3497 * That said, there is no fundamental requirement that 3498 * the checkpoint's space map entries should not cross 3499 * metaslab boundaries. So if needed we could add code 3500 * that handles metaslab-crossing segments in the future. 3501 */ 3502 VERIFY3U(sme->sme_offset, >=, ms->ms_start); 3503 VERIFY3U(end, <=, ms->ms_start + ms->ms_size); 3504 3505 /* 3506 * By removing the entry from the allocated segments we 3507 * also verify that the entry is there to begin with. 3508 */ 3509 mutex_enter(&ms->ms_lock); 3510 range_tree_remove(ms->ms_allocatable, sme->sme_offset, sme->sme_run); 3511 mutex_exit(&ms->ms_lock); 3512 3513 cseea->cseea_checkpoint_size += sme->sme_run; 3514 return (0); 3515 } 3516 3517 static void 3518 zdb_leak_init_vdev_exclude_checkpoint(vdev_t *vd, zdb_cb_t *zcb) 3519 { 3520 spa_t *spa = vd->vdev_spa; 3521 space_map_t *checkpoint_sm = NULL; 3522 uint64_t checkpoint_sm_obj; 3523 3524 /* 3525 * If there is no vdev_top_zap, we are in a pool whose 3526 * version predates the pool checkpoint feature. 3527 */ 3528 if (vd->vdev_top_zap == 0) 3529 return; 3530 3531 /* 3532 * If there is no reference of the vdev_checkpoint_sm in 3533 * the vdev_top_zap, then one of the following scenarios 3534 * is true: 3535 * 3536 * 1] There is no checkpoint 3537 * 2] There is a checkpoint, but no checkpointed blocks 3538 * have been freed yet 3539 * 3] The current vdev is indirect 3540 * 3541 * In these cases we return immediately. 3542 */ 3543 if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap, 3544 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0) 3545 return; 3546 3547 VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap, 3548 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, sizeof (uint64_t), 1, 3549 &checkpoint_sm_obj)); 3550 3551 checkpoint_sm_exclude_entry_arg_t cseea; 3552 cseea.cseea_vd = vd; 3553 cseea.cseea_checkpoint_size = 0; 3554 3555 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa), 3556 checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift)); 3557 3558 VERIFY0(space_map_iterate(checkpoint_sm, 3559 space_map_length(checkpoint_sm), 3560 checkpoint_sm_exclude_entry_cb, &cseea)); 3561 space_map_close(checkpoint_sm); 3562 3563 zcb->zcb_checkpoint_size += cseea.cseea_checkpoint_size; 3564 } 3565 3566 static void 3567 zdb_leak_init_exclude_checkpoint(spa_t *spa, zdb_cb_t *zcb) 3568 { 3569 ASSERT(!dump_opt['L']); 3570 3571 vdev_t *rvd = spa->spa_root_vdev; 3572 for (uint64_t c = 0; c < rvd->vdev_children; c++) { 3573 ASSERT3U(c, ==, rvd->vdev_child[c]->vdev_id); 3574 zdb_leak_init_vdev_exclude_checkpoint(rvd->vdev_child[c], zcb); 3575 } 3576 } 3577 3578 static int 3579 count_unflushed_space_cb(spa_t *spa, space_map_entry_t *sme, 3580 uint64_t txg, void *arg) 3581 { 3582 int64_t *ualloc_space = arg; 3583 uint64_t offset = sme->sme_offset; 3584 uint64_t vdev_id = sme->sme_vdev; 3585 3586 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 3587 if (!vdev_is_concrete(vd)) 3588 return (0); 3589 3590 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 3591 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE); 3592 3593 if (txg < metaslab_unflushed_txg(ms)) 3594 return (0); 3595 3596 if (sme->sme_type == SM_ALLOC) 3597 *ualloc_space += sme->sme_run; 3598 else 3599 *ualloc_space -= sme->sme_run; 3600 3601 return (0); 3602 } 3603 3604 static int64_t 3605 get_unflushed_alloc_space(spa_t *spa) 3606 { 3607 if (dump_opt['L']) 3608 return (0); 3609 3610 int64_t ualloc_space = 0; 3611 iterate_through_spacemap_logs(spa, count_unflushed_space_cb, 3612 &ualloc_space); 3613 return (ualloc_space); 3614 } 3615 3616 static int 3617 load_unflushed_cb(spa_t *spa, space_map_entry_t *sme, uint64_t txg, void *arg) 3618 { 3619 maptype_t *uic_maptype = arg; 3620 uint64_t offset = sme->sme_offset; 3621 uint64_t size = sme->sme_run; 3622 uint64_t vdev_id = sme->sme_vdev; 3623 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 3624 3625 /* skip indirect vdevs */ 3626 if (!vdev_is_concrete(vd)) 3627 return (0); 3628 3629 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 3630 3631 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE); 3632 ASSERT(*uic_maptype == SM_ALLOC || *uic_maptype == SM_FREE); 3633 3634 if (txg < metaslab_unflushed_txg(ms)) 3635 return (0); 3636 3637 if (*uic_maptype == sme->sme_type) 3638 range_tree_add(ms->ms_allocatable, offset, size); 3639 else 3640 range_tree_remove(ms->ms_allocatable, offset, size); 3641 3642 return (0); 3643 } 3644 3645 static void 3646 load_unflushed_to_ms_allocatables(spa_t *spa, maptype_t maptype) 3647 { 3648 iterate_through_spacemap_logs(spa, load_unflushed_cb, &maptype); 3649 } 3650 3651 static void 3652 load_concrete_ms_allocatable_trees(spa_t *spa, maptype_t maptype) 3653 { 3654 vdev_t *rvd = spa->spa_root_vdev; 3655 for (uint64_t i = 0; i < rvd->vdev_children; i++) { 3656 vdev_t *vd = rvd->vdev_child[i]; 3657 3658 ASSERT3U(i, ==, vd->vdev_id); 3659 3660 if (vd->vdev_ops == &vdev_indirect_ops) 3661 continue; 3662 3663 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 3664 metaslab_t *msp = vd->vdev_ms[m]; 3665 3666 (void) fprintf(stderr, 3667 "\rloading concrete vdev %llu, " 3668 "metaslab %llu of %llu ...", 3669 (longlong_t)vd->vdev_id, 3670 (longlong_t)msp->ms_id, 3671 (longlong_t)vd->vdev_ms_count); 3672 3673 mutex_enter(&msp->ms_lock); 3674 range_tree_vacate(msp->ms_allocatable, NULL, NULL); 3675 3676 /* 3677 * We don't want to spend the CPU manipulating the 3678 * size-ordered tree, so clear the range_tree ops. 3679 */ 3680 msp->ms_allocatable->rt_ops = NULL; 3681 3682 if (msp->ms_sm != NULL) { 3683 VERIFY0(space_map_load(msp->ms_sm, 3684 msp->ms_allocatable, maptype)); 3685 } 3686 if (!msp->ms_loaded) 3687 msp->ms_loaded = B_TRUE; 3688 mutex_exit(&msp->ms_lock); 3689 } 3690 } 3691 3692 load_unflushed_to_ms_allocatables(spa, maptype); 3693 } 3694 3695 /* 3696 * vm_idxp is an in-out parameter which (for indirect vdevs) is the 3697 * index in vim_entries that has the first entry in this metaslab. 3698 * On return, it will be set to the first entry after this metaslab. 3699 */ 3700 static void 3701 load_indirect_ms_allocatable_tree(vdev_t *vd, metaslab_t *msp, 3702 uint64_t *vim_idxp) 3703 { 3704 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 3705 3706 mutex_enter(&msp->ms_lock); 3707 range_tree_vacate(msp->ms_allocatable, NULL, NULL); 3708 3709 /* 3710 * We don't want to spend the CPU manipulating the 3711 * size-ordered tree, so clear the range_tree ops. 3712 */ 3713 msp->ms_allocatable->rt_ops = NULL; 3714 3715 for (; *vim_idxp < vdev_indirect_mapping_num_entries(vim); 3716 (*vim_idxp)++) { 3717 vdev_indirect_mapping_entry_phys_t *vimep = 3718 &vim->vim_entries[*vim_idxp]; 3719 uint64_t ent_offset = DVA_MAPPING_GET_SRC_OFFSET(vimep); 3720 uint64_t ent_len = DVA_GET_ASIZE(&vimep->vimep_dst); 3721 ASSERT3U(ent_offset, >=, msp->ms_start); 3722 if (ent_offset >= msp->ms_start + msp->ms_size) 3723 break; 3724 3725 /* 3726 * Mappings do not cross metaslab boundaries, 3727 * because we create them by walking the metaslabs. 3728 */ 3729 ASSERT3U(ent_offset + ent_len, <=, 3730 msp->ms_start + msp->ms_size); 3731 range_tree_add(msp->ms_allocatable, ent_offset, ent_len); 3732 } 3733 3734 if (!msp->ms_loaded) 3735 msp->ms_loaded = B_TRUE; 3736 mutex_exit(&msp->ms_lock); 3737 } 3738 3739 static void 3740 zdb_leak_init_prepare_indirect_vdevs(spa_t *spa, zdb_cb_t *zcb) 3741 { 3742 ASSERT(!dump_opt['L']); 3743 3744 vdev_t *rvd = spa->spa_root_vdev; 3745 for (uint64_t c = 0; c < rvd->vdev_children; c++) { 3746 vdev_t *vd = rvd->vdev_child[c]; 3747 3748 ASSERT3U(c, ==, vd->vdev_id); 3749 3750 if (vd->vdev_ops != &vdev_indirect_ops) 3751 continue; 3752 3753 /* 3754 * Note: we don't check for mapping leaks on 3755 * removing vdevs because their ms_allocatable's 3756 * are used to look for leaks in allocated space. 3757 */ 3758 zcb->zcb_vd_obsolete_counts[c] = zdb_load_obsolete_counts(vd); 3759 3760 /* 3761 * Normally, indirect vdevs don't have any 3762 * metaslabs. We want to set them up for 3763 * zio_claim(). 3764 */ 3765 VERIFY0(vdev_metaslab_init(vd, 0)); 3766 3767 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 3768 uint64_t vim_idx = 0; 3769 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 3770 3771 (void) fprintf(stderr, 3772 "\rloading indirect vdev %llu, " 3773 "metaslab %llu of %llu ...", 3774 (longlong_t)vd->vdev_id, 3775 (longlong_t)vd->vdev_ms[m]->ms_id, 3776 (longlong_t)vd->vdev_ms_count); 3777 3778 load_indirect_ms_allocatable_tree(vd, vd->vdev_ms[m], 3779 &vim_idx); 3780 } 3781 ASSERT3U(vim_idx, ==, vdev_indirect_mapping_num_entries(vim)); 3782 } 3783 } 3784 3785 static void 3786 zdb_leak_init(spa_t *spa, zdb_cb_t *zcb) 3787 { 3788 zcb->zcb_spa = spa; 3789 3790 if (dump_opt['L']) 3791 return; 3792 3793 dsl_pool_t *dp = spa->spa_dsl_pool; 3794 vdev_t *rvd = spa->spa_root_vdev; 3795 3796 /* 3797 * We are going to be changing the meaning of the metaslab's 3798 * ms_allocatable. Ensure that the allocator doesn't try to 3799 * use the tree. 3800 */ 3801 spa->spa_normal_class->mc_ops = &zdb_metaslab_ops; 3802 spa->spa_log_class->mc_ops = &zdb_metaslab_ops; 3803 3804 zcb->zcb_vd_obsolete_counts = 3805 umem_zalloc(rvd->vdev_children * sizeof (uint32_t *), 3806 UMEM_NOFAIL); 3807 3808 /* 3809 * For leak detection, we overload the ms_allocatable trees 3810 * to contain allocated segments instead of free segments. 3811 * As a result, we can't use the normal metaslab_load/unload 3812 * interfaces. 3813 */ 3814 zdb_leak_init_prepare_indirect_vdevs(spa, zcb); 3815 load_concrete_ms_allocatable_trees(spa, SM_ALLOC); 3816 3817 /* 3818 * On load_concrete_ms_allocatable_trees() we loaded all the 3819 * allocated entries from the ms_sm to the ms_allocatable for 3820 * each metaslab. If the pool has a checkpoint or is in the 3821 * middle of discarding a checkpoint, some of these blocks 3822 * may have been freed but their ms_sm may not have been 3823 * updated because they are referenced by the checkpoint. In 3824 * order to avoid false-positives during leak-detection, we 3825 * go through the vdev's checkpoint space map and exclude all 3826 * its entries from their relevant ms_allocatable. 3827 * 3828 * We also aggregate the space held by the checkpoint and add 3829 * it to zcb_checkpoint_size. 3830 * 3831 * Note that at this point we are also verifying that all the 3832 * entries on the checkpoint_sm are marked as allocated in 3833 * the ms_sm of their relevant metaslab. 3834 * [see comment in checkpoint_sm_exclude_entry_cb()] 3835 */ 3836 zdb_leak_init_exclude_checkpoint(spa, zcb); 3837 ASSERT3U(zcb->zcb_checkpoint_size, ==, spa_get_checkpoint_space(spa)); 3838 3839 /* for cleaner progress output */ 3840 (void) fprintf(stderr, "\n"); 3841 3842 if (bpobj_is_open(&dp->dp_obsolete_bpobj)) { 3843 ASSERT(spa_feature_is_enabled(spa, 3844 SPA_FEATURE_DEVICE_REMOVAL)); 3845 (void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj, 3846 increment_indirect_mapping_cb, zcb, NULL); 3847 } 3848 3849 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 3850 zdb_ddt_leak_init(spa, zcb); 3851 spa_config_exit(spa, SCL_CONFIG, FTAG); 3852 } 3853 3854 static boolean_t 3855 zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb) 3856 { 3857 boolean_t leaks = B_FALSE; 3858 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 3859 uint64_t total_leaked = 0; 3860 3861 ASSERT(vim != NULL); 3862 3863 for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) { 3864 vdev_indirect_mapping_entry_phys_t *vimep = 3865 &vim->vim_entries[i]; 3866 uint64_t obsolete_bytes = 0; 3867 uint64_t offset = DVA_MAPPING_GET_SRC_OFFSET(vimep); 3868 metaslab_t *msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 3869 3870 /* 3871 * This is not very efficient but it's easy to 3872 * verify correctness. 3873 */ 3874 for (uint64_t inner_offset = 0; 3875 inner_offset < DVA_GET_ASIZE(&vimep->vimep_dst); 3876 inner_offset += 1 << vd->vdev_ashift) { 3877 if (range_tree_contains(msp->ms_allocatable, 3878 offset + inner_offset, 1 << vd->vdev_ashift)) { 3879 obsolete_bytes += 1 << vd->vdev_ashift; 3880 } 3881 } 3882 3883 int64_t bytes_leaked = obsolete_bytes - 3884 zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]; 3885 ASSERT3U(DVA_GET_ASIZE(&vimep->vimep_dst), >=, 3886 zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]); 3887 if (bytes_leaked != 0 && 3888 (vdev_obsolete_counts_are_precise(vd) || 3889 dump_opt['d'] >= 5)) { 3890 (void) printf("obsolete indirect mapping count " 3891 "mismatch on %llu:%llx:%llx : %llx bytes leaked\n", 3892 (u_longlong_t)vd->vdev_id, 3893 (u_longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep), 3894 (u_longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst), 3895 (u_longlong_t)bytes_leaked); 3896 } 3897 total_leaked += ABS(bytes_leaked); 3898 } 3899 3900 if (!vdev_obsolete_counts_are_precise(vd) && total_leaked > 0) { 3901 int pct_leaked = total_leaked * 100 / 3902 vdev_indirect_mapping_bytes_mapped(vim); 3903 (void) printf("cannot verify obsolete indirect mapping " 3904 "counts of vdev %llu because precise feature was not " 3905 "enabled when it was removed: %d%% (%llx bytes) of mapping" 3906 "unreferenced\n", 3907 (u_longlong_t)vd->vdev_id, pct_leaked, 3908 (u_longlong_t)total_leaked); 3909 } else if (total_leaked > 0) { 3910 (void) printf("obsolete indirect mapping count mismatch " 3911 "for vdev %llu -- %llx total bytes mismatched\n", 3912 (u_longlong_t)vd->vdev_id, 3913 (u_longlong_t)total_leaked); 3914 leaks |= B_TRUE; 3915 } 3916 3917 vdev_indirect_mapping_free_obsolete_counts(vim, 3918 zcb->zcb_vd_obsolete_counts[vd->vdev_id]); 3919 zcb->zcb_vd_obsolete_counts[vd->vdev_id] = NULL; 3920 3921 return (leaks); 3922 } 3923 3924 static boolean_t 3925 zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb) 3926 { 3927 if (dump_opt['L']) 3928 return (B_FALSE); 3929 3930 boolean_t leaks = B_FALSE; 3931 3932 vdev_t *rvd = spa->spa_root_vdev; 3933 for (unsigned c = 0; c < rvd->vdev_children; c++) { 3934 vdev_t *vd = rvd->vdev_child[c]; 3935 #if DEBUG 3936 metaslab_group_t *mg = vd->vdev_mg; 3937 #endif 3938 3939 if (zcb->zcb_vd_obsolete_counts[c] != NULL) { 3940 leaks |= zdb_check_for_obsolete_leaks(vd, zcb); 3941 } 3942 3943 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 3944 metaslab_t *msp = vd->vdev_ms[m]; 3945 ASSERT3P(mg, ==, msp->ms_group); 3946 3947 /* 3948 * ms_allocatable has been overloaded 3949 * to contain allocated segments. Now that 3950 * we finished traversing all blocks, any 3951 * block that remains in the ms_allocatable 3952 * represents an allocated block that we 3953 * did not claim during the traversal. 3954 * Claimed blocks would have been removed 3955 * from the ms_allocatable. For indirect 3956 * vdevs, space remaining in the tree 3957 * represents parts of the mapping that are 3958 * not referenced, which is not a bug. 3959 */ 3960 if (vd->vdev_ops == &vdev_indirect_ops) { 3961 range_tree_vacate(msp->ms_allocatable, 3962 NULL, NULL); 3963 } else { 3964 range_tree_vacate(msp->ms_allocatable, 3965 zdb_leak, vd); 3966 } 3967 if (msp->ms_loaded) { 3968 msp->ms_loaded = B_FALSE; 3969 } 3970 } 3971 3972 } 3973 3974 umem_free(zcb->zcb_vd_obsolete_counts, 3975 rvd->vdev_children * sizeof (uint32_t *)); 3976 zcb->zcb_vd_obsolete_counts = NULL; 3977 3978 return (leaks); 3979 } 3980 3981 /* ARGSUSED */ 3982 static int 3983 count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 3984 { 3985 zdb_cb_t *zcb = arg; 3986 3987 if (dump_opt['b'] >= 5) { 3988 char blkbuf[BP_SPRINTF_LEN]; 3989 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 3990 (void) printf("[%s] %s\n", 3991 "deferred free", blkbuf); 3992 } 3993 zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED); 3994 return (0); 3995 } 3996 3997 static int 3998 dump_block_stats(spa_t *spa) 3999 { 4000 zdb_cb_t zcb; 4001 zdb_blkstats_t *zb, *tzb; 4002 uint64_t norm_alloc, norm_space, total_alloc, total_found; 4003 int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | 4004 TRAVERSE_NO_DECRYPT | TRAVERSE_HARD; 4005 boolean_t leaks = B_FALSE; 4006 int err; 4007 4008 bzero(&zcb, sizeof (zcb)); 4009 (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n", 4010 (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "", 4011 (dump_opt['c'] == 1) ? "metadata " : "", 4012 dump_opt['c'] ? "checksums " : "", 4013 (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "", 4014 !dump_opt['L'] ? "nothing leaked " : ""); 4015 4016 /* 4017 * When leak detection is enabled we load all space maps as SM_ALLOC 4018 * maps, then traverse the pool claiming each block we discover. If 4019 * the pool is perfectly consistent, the segment trees will be empty 4020 * when we're done. Anything left over is a leak; any block we can't 4021 * claim (because it's not part of any space map) is a double 4022 * allocation, reference to a freed block, or an unclaimed log block. 4023 * 4024 * When leak detection is disabled (-L option) we still traverse the 4025 * pool claiming each block we discover, but we skip opening any space 4026 * maps. 4027 */ 4028 bzero(&zcb, sizeof (zdb_cb_t)); 4029 zdb_leak_init(spa, &zcb); 4030 4031 /* 4032 * If there's a deferred-free bplist, process that first. 4033 */ 4034 (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj, 4035 count_block_cb, &zcb, NULL); 4036 4037 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { 4038 (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj, 4039 count_block_cb, &zcb, NULL); 4040 } 4041 4042 zdb_claim_removing(spa, &zcb); 4043 4044 if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) { 4045 VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset, 4046 spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb, 4047 &zcb, NULL)); 4048 } 4049 4050 if (dump_opt['c'] > 1) 4051 flags |= TRAVERSE_PREFETCH_DATA; 4052 4053 zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa)); 4054 zcb.zcb_totalasize += metaslab_class_get_alloc(spa_special_class(spa)); 4055 zcb.zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa)); 4056 zcb.zcb_start = zcb.zcb_lastprint = gethrtime(); 4057 err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb); 4058 4059 /* 4060 * If we've traversed the data blocks then we need to wait for those 4061 * I/Os to complete. We leverage "The Godfather" zio to wait on 4062 * all async I/Os to complete. 4063 */ 4064 if (dump_opt['c']) { 4065 for (int i = 0; i < max_ncpus; i++) { 4066 (void) zio_wait(spa->spa_async_zio_root[i]); 4067 spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, 4068 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | 4069 ZIO_FLAG_GODFATHER); 4070 } 4071 } 4072 4073 /* 4074 * Done after zio_wait() since zcb_haderrors is modified in 4075 * zdb_blkptr_done() 4076 */ 4077 zcb.zcb_haderrors |= err; 4078 4079 if (zcb.zcb_haderrors) { 4080 (void) printf("\nError counts:\n\n"); 4081 (void) printf("\t%5s %s\n", "errno", "count"); 4082 for (int e = 0; e < 256; e++) { 4083 if (zcb.zcb_errors[e] != 0) { 4084 (void) printf("\t%5d %llu\n", 4085 e, (u_longlong_t)zcb.zcb_errors[e]); 4086 } 4087 } 4088 } 4089 4090 /* 4091 * Report any leaked segments. 4092 */ 4093 leaks |= zdb_leak_fini(spa, &zcb); 4094 4095 tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL]; 4096 4097 norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 4098 norm_space = metaslab_class_get_space(spa_normal_class(spa)); 4099 4100 total_alloc = norm_alloc + 4101 metaslab_class_get_alloc(spa_log_class(spa)) + 4102 metaslab_class_get_alloc(spa_special_class(spa)) + 4103 metaslab_class_get_alloc(spa_dedup_class(spa)) + 4104 get_unflushed_alloc_space(spa); 4105 total_found = tzb->zb_asize - zcb.zcb_dedup_asize + 4106 zcb.zcb_removing_size + zcb.zcb_checkpoint_size; 4107 4108 if (total_found == total_alloc && !dump_opt['L']) { 4109 (void) printf("\n\tNo leaks (block sum matches space" 4110 " maps exactly)\n"); 4111 } else if (!dump_opt['L']) { 4112 (void) printf("block traversal size %llu != alloc %llu " 4113 "(%s %lld)\n", 4114 (u_longlong_t)total_found, 4115 (u_longlong_t)total_alloc, 4116 (dump_opt['L']) ? "unreachable" : "leaked", 4117 (longlong_t)(total_alloc - total_found)); 4118 leaks = B_TRUE; 4119 } 4120 4121 if (tzb->zb_count == 0) 4122 return (2); 4123 4124 (void) printf("\n"); 4125 (void) printf("\t%-16s %14llu\n", "bp count:", 4126 (u_longlong_t)tzb->zb_count); 4127 (void) printf("\t%-16s %14llu\n", "ganged count:", 4128 (longlong_t)tzb->zb_gangs); 4129 (void) printf("\t%-16s %14llu avg: %6llu\n", "bp logical:", 4130 (u_longlong_t)tzb->zb_lsize, 4131 (u_longlong_t)(tzb->zb_lsize / tzb->zb_count)); 4132 (void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n", 4133 "bp physical:", (u_longlong_t)tzb->zb_psize, 4134 (u_longlong_t)(tzb->zb_psize / tzb->zb_count), 4135 (double)tzb->zb_lsize / tzb->zb_psize); 4136 (void) printf("\t%-16s %14llu avg: %6llu compression: %6.2f\n", 4137 "bp allocated:", (u_longlong_t)tzb->zb_asize, 4138 (u_longlong_t)(tzb->zb_asize / tzb->zb_count), 4139 (double)tzb->zb_lsize / tzb->zb_asize); 4140 (void) printf("\t%-16s %14llu ref>1: %6llu deduplication: %6.2f\n", 4141 "bp deduped:", (u_longlong_t)zcb.zcb_dedup_asize, 4142 (u_longlong_t)zcb.zcb_dedup_blocks, 4143 (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0); 4144 (void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:", 4145 (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space); 4146 4147 if (spa_special_class(spa)->mc_rotor != NULL) { 4148 uint64_t alloc = metaslab_class_get_alloc( 4149 spa_special_class(spa)); 4150 uint64_t space = metaslab_class_get_space( 4151 spa_special_class(spa)); 4152 4153 (void) printf("\t%-16s %14llu used: %5.2f%%\n", 4154 "Special class", (u_longlong_t)alloc, 4155 100.0 * alloc / space); 4156 } 4157 4158 if (spa_dedup_class(spa)->mc_rotor != NULL) { 4159 uint64_t alloc = metaslab_class_get_alloc( 4160 spa_dedup_class(spa)); 4161 uint64_t space = metaslab_class_get_space( 4162 spa_dedup_class(spa)); 4163 4164 (void) printf("\t%-16s %14llu used: %5.2f%%\n", 4165 "Dedup class", (u_longlong_t)alloc, 4166 100.0 * alloc / space); 4167 } 4168 4169 for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) { 4170 if (zcb.zcb_embedded_blocks[i] == 0) 4171 continue; 4172 (void) printf("\n"); 4173 (void) printf("\tadditional, non-pointer bps of type %u: " 4174 "%10llu\n", 4175 i, (u_longlong_t)zcb.zcb_embedded_blocks[i]); 4176 4177 if (dump_opt['b'] >= 3) { 4178 (void) printf("\t number of (compressed) bytes: " 4179 "number of bps\n"); 4180 dump_histogram(zcb.zcb_embedded_histogram[i], 4181 sizeof (zcb.zcb_embedded_histogram[i]) / 4182 sizeof (zcb.zcb_embedded_histogram[i][0]), 0); 4183 } 4184 } 4185 4186 if (tzb->zb_ditto_samevdev != 0) { 4187 (void) printf("\tDittoed blocks on same vdev: %llu\n", 4188 (longlong_t)tzb->zb_ditto_samevdev); 4189 } 4190 if (tzb->zb_ditto_same_ms != 0) { 4191 (void) printf("\tDittoed blocks in same metaslab: %llu\n", 4192 (longlong_t)tzb->zb_ditto_same_ms); 4193 } 4194 4195 for (uint64_t v = 0; v < spa->spa_root_vdev->vdev_children; v++) { 4196 vdev_t *vd = spa->spa_root_vdev->vdev_child[v]; 4197 vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; 4198 4199 if (vim == NULL) { 4200 continue; 4201 } 4202 4203 char mem[32]; 4204 zdb_nicenum(vdev_indirect_mapping_num_entries(vim), 4205 mem, vdev_indirect_mapping_size(vim)); 4206 4207 (void) printf("\tindirect vdev id %llu has %llu segments " 4208 "(%s in memory)\n", 4209 (longlong_t)vd->vdev_id, 4210 (longlong_t)vdev_indirect_mapping_num_entries(vim), mem); 4211 } 4212 4213 if (dump_opt['b'] >= 2) { 4214 int l, t, level; 4215 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" 4216 "\t avg\t comp\t%%Total\tType\n"); 4217 4218 for (t = 0; t <= ZDB_OT_TOTAL; t++) { 4219 char csize[32], lsize[32], psize[32], asize[32]; 4220 char avg[32], gang[32]; 4221 const char *typename; 4222 4223 /* make sure nicenum has enough space */ 4224 CTASSERT(sizeof (csize) >= NN_NUMBUF_SZ); 4225 CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ); 4226 CTASSERT(sizeof (psize) >= NN_NUMBUF_SZ); 4227 CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ); 4228 CTASSERT(sizeof (avg) >= NN_NUMBUF_SZ); 4229 CTASSERT(sizeof (gang) >= NN_NUMBUF_SZ); 4230 4231 if (t < DMU_OT_NUMTYPES) 4232 typename = dmu_ot[t].ot_name; 4233 else 4234 typename = zdb_ot_extname[t - DMU_OT_NUMTYPES]; 4235 4236 if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) { 4237 (void) printf("%6s\t%5s\t%5s\t%5s" 4238 "\t%5s\t%5s\t%6s\t%s\n", 4239 "-", 4240 "-", 4241 "-", 4242 "-", 4243 "-", 4244 "-", 4245 "-", 4246 typename); 4247 continue; 4248 } 4249 4250 for (l = ZB_TOTAL - 1; l >= -1; l--) { 4251 level = (l == -1 ? ZB_TOTAL : l); 4252 zb = &zcb.zcb_type[level][t]; 4253 4254 if (zb->zb_asize == 0) 4255 continue; 4256 4257 if (dump_opt['b'] < 3 && level != ZB_TOTAL) 4258 continue; 4259 4260 if (level == 0 && zb->zb_asize == 4261 zcb.zcb_type[ZB_TOTAL][t].zb_asize) 4262 continue; 4263 4264 zdb_nicenum(zb->zb_count, csize, 4265 sizeof (csize)); 4266 zdb_nicenum(zb->zb_lsize, lsize, 4267 sizeof (lsize)); 4268 zdb_nicenum(zb->zb_psize, psize, 4269 sizeof (psize)); 4270 zdb_nicenum(zb->zb_asize, asize, 4271 sizeof (asize)); 4272 zdb_nicenum(zb->zb_asize / zb->zb_count, avg, 4273 sizeof (avg)); 4274 zdb_nicenum(zb->zb_gangs, gang, sizeof (gang)); 4275 4276 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s" 4277 "\t%5.2f\t%6.2f\t", 4278 csize, lsize, psize, asize, avg, 4279 (double)zb->zb_lsize / zb->zb_psize, 4280 100.0 * zb->zb_asize / tzb->zb_asize); 4281 4282 if (level == ZB_TOTAL) 4283 (void) printf("%s\n", typename); 4284 else 4285 (void) printf(" L%d %s\n", 4286 level, typename); 4287 4288 if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) { 4289 (void) printf("\t number of ganged " 4290 "blocks: %s\n", gang); 4291 } 4292 4293 if (dump_opt['b'] >= 4) { 4294 (void) printf("psize " 4295 "(in 512-byte sectors): " 4296 "number of blocks\n"); 4297 dump_histogram(zb->zb_psize_histogram, 4298 PSIZE_HISTO_SIZE, 0); 4299 } 4300 } 4301 } 4302 } 4303 4304 (void) printf("\n"); 4305 4306 if (leaks) 4307 return (2); 4308 4309 if (zcb.zcb_haderrors) 4310 return (3); 4311 4312 return (0); 4313 } 4314 4315 typedef struct zdb_ddt_entry { 4316 ddt_key_t zdde_key; 4317 uint64_t zdde_ref_blocks; 4318 uint64_t zdde_ref_lsize; 4319 uint64_t zdde_ref_psize; 4320 uint64_t zdde_ref_dsize; 4321 avl_node_t zdde_node; 4322 } zdb_ddt_entry_t; 4323 4324 /* ARGSUSED */ 4325 static int 4326 zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 4327 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 4328 { 4329 avl_tree_t *t = arg; 4330 avl_index_t where; 4331 zdb_ddt_entry_t *zdde, zdde_search; 4332 4333 if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) 4334 return (0); 4335 4336 if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) { 4337 (void) printf("traversing objset %llu, %llu objects, " 4338 "%lu blocks so far\n", 4339 (u_longlong_t)zb->zb_objset, 4340 (u_longlong_t)BP_GET_FILL(bp), 4341 avl_numnodes(t)); 4342 } 4343 4344 if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF || 4345 BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) 4346 return (0); 4347 4348 ddt_key_fill(&zdde_search.zdde_key, bp); 4349 4350 zdde = avl_find(t, &zdde_search, &where); 4351 4352 if (zdde == NULL) { 4353 zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL); 4354 zdde->zdde_key = zdde_search.zdde_key; 4355 avl_insert(t, zdde, where); 4356 } 4357 4358 zdde->zdde_ref_blocks += 1; 4359 zdde->zdde_ref_lsize += BP_GET_LSIZE(bp); 4360 zdde->zdde_ref_psize += BP_GET_PSIZE(bp); 4361 zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp); 4362 4363 return (0); 4364 } 4365 4366 static void 4367 dump_simulated_ddt(spa_t *spa) 4368 { 4369 avl_tree_t t; 4370 void *cookie = NULL; 4371 zdb_ddt_entry_t *zdde; 4372 ddt_histogram_t ddh_total; 4373 ddt_stat_t dds_total; 4374 4375 bzero(&ddh_total, sizeof (ddh_total)); 4376 bzero(&dds_total, sizeof (dds_total)); 4377 avl_create(&t, ddt_entry_compare, 4378 sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node)); 4379 4380 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 4381 4382 (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | 4383 TRAVERSE_NO_DECRYPT, zdb_ddt_add_cb, &t); 4384 4385 spa_config_exit(spa, SCL_CONFIG, FTAG); 4386 4387 while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) { 4388 ddt_stat_t dds; 4389 uint64_t refcnt = zdde->zdde_ref_blocks; 4390 ASSERT(refcnt != 0); 4391 4392 dds.dds_blocks = zdde->zdde_ref_blocks / refcnt; 4393 dds.dds_lsize = zdde->zdde_ref_lsize / refcnt; 4394 dds.dds_psize = zdde->zdde_ref_psize / refcnt; 4395 dds.dds_dsize = zdde->zdde_ref_dsize / refcnt; 4396 4397 dds.dds_ref_blocks = zdde->zdde_ref_blocks; 4398 dds.dds_ref_lsize = zdde->zdde_ref_lsize; 4399 dds.dds_ref_psize = zdde->zdde_ref_psize; 4400 dds.dds_ref_dsize = zdde->zdde_ref_dsize; 4401 4402 ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1], 4403 &dds, 0); 4404 4405 umem_free(zdde, sizeof (*zdde)); 4406 } 4407 4408 avl_destroy(&t); 4409 4410 ddt_histogram_stat(&dds_total, &ddh_total); 4411 4412 (void) printf("Simulated DDT histogram:\n"); 4413 4414 zpool_dump_ddt(&dds_total, &ddh_total); 4415 4416 dump_dedup_ratio(&dds_total); 4417 } 4418 4419 static int 4420 verify_device_removal_feature_counts(spa_t *spa) 4421 { 4422 uint64_t dr_feature_refcount = 0; 4423 uint64_t oc_feature_refcount = 0; 4424 uint64_t indirect_vdev_count = 0; 4425 uint64_t precise_vdev_count = 0; 4426 uint64_t obsolete_counts_object_count = 0; 4427 uint64_t obsolete_sm_count = 0; 4428 uint64_t obsolete_counts_count = 0; 4429 uint64_t scip_count = 0; 4430 uint64_t obsolete_bpobj_count = 0; 4431 int ret = 0; 4432 4433 spa_condensing_indirect_phys_t *scip = 4434 &spa->spa_condensing_indirect_phys; 4435 if (scip->scip_next_mapping_object != 0) { 4436 vdev_t *vd = spa->spa_root_vdev->vdev_child[scip->scip_vdev]; 4437 ASSERT(scip->scip_prev_obsolete_sm_object != 0); 4438 ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops); 4439 4440 (void) printf("Condensing indirect vdev %llu: new mapping " 4441 "object %llu, prev obsolete sm %llu\n", 4442 (u_longlong_t)scip->scip_vdev, 4443 (u_longlong_t)scip->scip_next_mapping_object, 4444 (u_longlong_t)scip->scip_prev_obsolete_sm_object); 4445 if (scip->scip_prev_obsolete_sm_object != 0) { 4446 space_map_t *prev_obsolete_sm = NULL; 4447 VERIFY0(space_map_open(&prev_obsolete_sm, 4448 spa->spa_meta_objset, 4449 scip->scip_prev_obsolete_sm_object, 4450 0, vd->vdev_asize, 0)); 4451 dump_spacemap(spa->spa_meta_objset, prev_obsolete_sm); 4452 (void) printf("\n"); 4453 space_map_close(prev_obsolete_sm); 4454 } 4455 4456 scip_count += 2; 4457 } 4458 4459 for (uint64_t i = 0; i < spa->spa_root_vdev->vdev_children; i++) { 4460 vdev_t *vd = spa->spa_root_vdev->vdev_child[i]; 4461 vdev_indirect_config_t *vic = &vd->vdev_indirect_config; 4462 4463 if (vic->vic_mapping_object != 0) { 4464 ASSERT(vd->vdev_ops == &vdev_indirect_ops || 4465 vd->vdev_removing); 4466 indirect_vdev_count++; 4467 4468 if (vd->vdev_indirect_mapping->vim_havecounts) { 4469 obsolete_counts_count++; 4470 } 4471 } 4472 if (vdev_obsolete_counts_are_precise(vd)) { 4473 ASSERT(vic->vic_mapping_object != 0); 4474 precise_vdev_count++; 4475 } 4476 if (vdev_obsolete_sm_object(vd) != 0) { 4477 ASSERT(vic->vic_mapping_object != 0); 4478 obsolete_sm_count++; 4479 } 4480 } 4481 4482 (void) feature_get_refcount(spa, 4483 &spa_feature_table[SPA_FEATURE_DEVICE_REMOVAL], 4484 &dr_feature_refcount); 4485 (void) feature_get_refcount(spa, 4486 &spa_feature_table[SPA_FEATURE_OBSOLETE_COUNTS], 4487 &oc_feature_refcount); 4488 4489 if (dr_feature_refcount != indirect_vdev_count) { 4490 ret = 1; 4491 (void) printf("Number of indirect vdevs (%llu) " \ 4492 "does not match feature count (%llu)\n", 4493 (u_longlong_t)indirect_vdev_count, 4494 (u_longlong_t)dr_feature_refcount); 4495 } else { 4496 (void) printf("Verified device_removal feature refcount " \ 4497 "of %llu is correct\n", 4498 (u_longlong_t)dr_feature_refcount); 4499 } 4500 4501 if (zap_contains(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT, 4502 DMU_POOL_OBSOLETE_BPOBJ) == 0) { 4503 obsolete_bpobj_count++; 4504 } 4505 4506 4507 obsolete_counts_object_count = precise_vdev_count; 4508 obsolete_counts_object_count += obsolete_sm_count; 4509 obsolete_counts_object_count += obsolete_counts_count; 4510 obsolete_counts_object_count += scip_count; 4511 obsolete_counts_object_count += obsolete_bpobj_count; 4512 obsolete_counts_object_count += remap_deadlist_count; 4513 4514 if (oc_feature_refcount != obsolete_counts_object_count) { 4515 ret = 1; 4516 (void) printf("Number of obsolete counts objects (%llu) " \ 4517 "does not match feature count (%llu)\n", 4518 (u_longlong_t)obsolete_counts_object_count, 4519 (u_longlong_t)oc_feature_refcount); 4520 (void) printf("pv:%llu os:%llu oc:%llu sc:%llu " 4521 "ob:%llu rd:%llu\n", 4522 (u_longlong_t)precise_vdev_count, 4523 (u_longlong_t)obsolete_sm_count, 4524 (u_longlong_t)obsolete_counts_count, 4525 (u_longlong_t)scip_count, 4526 (u_longlong_t)obsolete_bpobj_count, 4527 (u_longlong_t)remap_deadlist_count); 4528 } else { 4529 (void) printf("Verified indirect_refcount feature refcount " \ 4530 "of %llu is correct\n", 4531 (u_longlong_t)oc_feature_refcount); 4532 } 4533 return (ret); 4534 } 4535 4536 static void 4537 zdb_set_skip_mmp(char *target) 4538 { 4539 spa_t *spa; 4540 4541 /* 4542 * Disable the activity check to allow examination of 4543 * active pools. 4544 */ 4545 mutex_enter(&spa_namespace_lock); 4546 if ((spa = spa_lookup(target)) != NULL) { 4547 spa->spa_import_flags |= ZFS_IMPORT_SKIP_MMP; 4548 } 4549 mutex_exit(&spa_namespace_lock); 4550 } 4551 4552 #define BOGUS_SUFFIX "_CHECKPOINTED_UNIVERSE" 4553 /* 4554 * Import the checkpointed state of the pool specified by the target 4555 * parameter as readonly. The function also accepts a pool config 4556 * as an optional parameter, else it attempts to infer the config by 4557 * the name of the target pool. 4558 * 4559 * Note that the checkpointed state's pool name will be the name of 4560 * the original pool with the above suffix appened to it. In addition, 4561 * if the target is not a pool name (e.g. a path to a dataset) then 4562 * the new_path parameter is populated with the updated path to 4563 * reflect the fact that we are looking into the checkpointed state. 4564 * 4565 * The function returns a newly-allocated copy of the name of the 4566 * pool containing the checkpointed state. When this copy is no 4567 * longer needed it should be freed with free(3C). Same thing 4568 * applies to the new_path parameter if allocated. 4569 */ 4570 static char * 4571 import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path) 4572 { 4573 int error = 0; 4574 char *poolname, *bogus_name; 4575 4576 /* If the target is not a pool, the extract the pool name */ 4577 char *path_start = strchr(target, '/'); 4578 if (path_start != NULL) { 4579 size_t poolname_len = path_start - target; 4580 poolname = strndup(target, poolname_len); 4581 } else { 4582 poolname = target; 4583 } 4584 4585 if (cfg == NULL) { 4586 zdb_set_skip_mmp(poolname); 4587 error = spa_get_stats(poolname, &cfg, NULL, 0); 4588 if (error != 0) { 4589 fatal("Tried to read config of pool \"%s\" but " 4590 "spa_get_stats() failed with error %d\n", 4591 poolname, error); 4592 } 4593 } 4594 4595 (void) asprintf(&bogus_name, "%s%s", poolname, BOGUS_SUFFIX); 4596 fnvlist_add_string(cfg, ZPOOL_CONFIG_POOL_NAME, bogus_name); 4597 4598 error = spa_import(bogus_name, cfg, NULL, 4599 ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT | 4600 ZFS_IMPORT_SKIP_MMP); 4601 if (error != 0) { 4602 fatal("Tried to import pool \"%s\" but spa_import() failed " 4603 "with error %d\n", bogus_name, error); 4604 } 4605 4606 if (new_path != NULL && path_start != NULL) 4607 (void) asprintf(new_path, "%s%s", bogus_name, path_start); 4608 4609 if (target != poolname) 4610 free(poolname); 4611 4612 return (bogus_name); 4613 } 4614 4615 typedef struct verify_checkpoint_sm_entry_cb_arg { 4616 vdev_t *vcsec_vd; 4617 4618 /* the following fields are only used for printing progress */ 4619 uint64_t vcsec_entryid; 4620 uint64_t vcsec_num_entries; 4621 } verify_checkpoint_sm_entry_cb_arg_t; 4622 4623 #define ENTRIES_PER_PROGRESS_UPDATE 10000 4624 4625 static int 4626 verify_checkpoint_sm_entry_cb(space_map_entry_t *sme, void *arg) 4627 { 4628 verify_checkpoint_sm_entry_cb_arg_t *vcsec = arg; 4629 vdev_t *vd = vcsec->vcsec_vd; 4630 metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift]; 4631 uint64_t end = sme->sme_offset + sme->sme_run; 4632 4633 ASSERT(sme->sme_type == SM_FREE); 4634 4635 if ((vcsec->vcsec_entryid % ENTRIES_PER_PROGRESS_UPDATE) == 0) { 4636 (void) fprintf(stderr, 4637 "\rverifying vdev %llu, space map entry %llu of %llu ...", 4638 (longlong_t)vd->vdev_id, 4639 (longlong_t)vcsec->vcsec_entryid, 4640 (longlong_t)vcsec->vcsec_num_entries); 4641 } 4642 vcsec->vcsec_entryid++; 4643 4644 /* 4645 * See comment in checkpoint_sm_exclude_entry_cb() 4646 */ 4647 VERIFY3U(sme->sme_offset, >=, ms->ms_start); 4648 VERIFY3U(end, <=, ms->ms_start + ms->ms_size); 4649 4650 /* 4651 * The entries in the vdev_checkpoint_sm should be marked as 4652 * allocated in the checkpointed state of the pool, therefore 4653 * their respective ms_allocateable trees should not contain them. 4654 */ 4655 mutex_enter(&ms->ms_lock); 4656 range_tree_verify_not_present(ms->ms_allocatable, 4657 sme->sme_offset, sme->sme_run); 4658 mutex_exit(&ms->ms_lock); 4659 4660 return (0); 4661 } 4662 4663 /* 4664 * Verify that all segments in the vdev_checkpoint_sm are allocated 4665 * according to the checkpoint's ms_sm (i.e. are not in the checkpoint's 4666 * ms_allocatable). 4667 * 4668 * Do so by comparing the checkpoint space maps (vdev_checkpoint_sm) of 4669 * each vdev in the current state of the pool to the metaslab space maps 4670 * (ms_sm) of the checkpointed state of the pool. 4671 * 4672 * Note that the function changes the state of the ms_allocatable 4673 * trees of the current spa_t. The entries of these ms_allocatable 4674 * trees are cleared out and then repopulated from with the free 4675 * entries of their respective ms_sm space maps. 4676 */ 4677 static void 4678 verify_checkpoint_vdev_spacemaps(spa_t *checkpoint, spa_t *current) 4679 { 4680 vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev; 4681 vdev_t *current_rvd = current->spa_root_vdev; 4682 4683 load_concrete_ms_allocatable_trees(checkpoint, SM_FREE); 4684 4685 for (uint64_t c = 0; c < ckpoint_rvd->vdev_children; c++) { 4686 vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[c]; 4687 vdev_t *current_vd = current_rvd->vdev_child[c]; 4688 4689 space_map_t *checkpoint_sm = NULL; 4690 uint64_t checkpoint_sm_obj; 4691 4692 if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) { 4693 /* 4694 * Since we don't allow device removal in a pool 4695 * that has a checkpoint, we expect that all removed 4696 * vdevs were removed from the pool before the 4697 * checkpoint. 4698 */ 4699 ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops); 4700 continue; 4701 } 4702 4703 /* 4704 * If the checkpoint space map doesn't exist, then nothing 4705 * here is checkpointed so there's nothing to verify. 4706 */ 4707 if (current_vd->vdev_top_zap == 0 || 4708 zap_contains(spa_meta_objset(current), 4709 current_vd->vdev_top_zap, 4710 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0) 4711 continue; 4712 4713 VERIFY0(zap_lookup(spa_meta_objset(current), 4714 current_vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, 4715 sizeof (uint64_t), 1, &checkpoint_sm_obj)); 4716 4717 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(current), 4718 checkpoint_sm_obj, 0, current_vd->vdev_asize, 4719 current_vd->vdev_ashift)); 4720 4721 verify_checkpoint_sm_entry_cb_arg_t vcsec; 4722 vcsec.vcsec_vd = ckpoint_vd; 4723 vcsec.vcsec_entryid = 0; 4724 vcsec.vcsec_num_entries = 4725 space_map_length(checkpoint_sm) / sizeof (uint64_t); 4726 VERIFY0(space_map_iterate(checkpoint_sm, 4727 space_map_length(checkpoint_sm), 4728 verify_checkpoint_sm_entry_cb, &vcsec)); 4729 dump_spacemap(current->spa_meta_objset, checkpoint_sm); 4730 space_map_close(checkpoint_sm); 4731 } 4732 4733 /* 4734 * If we've added vdevs since we took the checkpoint, ensure 4735 * that their checkpoint space maps are empty. 4736 */ 4737 if (ckpoint_rvd->vdev_children < current_rvd->vdev_children) { 4738 for (uint64_t c = ckpoint_rvd->vdev_children; 4739 c < current_rvd->vdev_children; c++) { 4740 vdev_t *current_vd = current_rvd->vdev_child[c]; 4741 ASSERT3P(current_vd->vdev_checkpoint_sm, ==, NULL); 4742 } 4743 } 4744 4745 /* for cleaner progress output */ 4746 (void) fprintf(stderr, "\n"); 4747 } 4748 4749 /* 4750 * Verifies that all space that's allocated in the checkpoint is 4751 * still allocated in the current version, by checking that everything 4752 * in checkpoint's ms_allocatable (which is actually allocated, not 4753 * allocatable/free) is not present in current's ms_allocatable. 4754 * 4755 * Note that the function changes the state of the ms_allocatable 4756 * trees of both spas when called. The entries of all ms_allocatable 4757 * trees are cleared out and then repopulated from their respective 4758 * ms_sm space maps. In the checkpointed state we load the allocated 4759 * entries, and in the current state we load the free entries. 4760 */ 4761 static void 4762 verify_checkpoint_ms_spacemaps(spa_t *checkpoint, spa_t *current) 4763 { 4764 vdev_t *ckpoint_rvd = checkpoint->spa_root_vdev; 4765 vdev_t *current_rvd = current->spa_root_vdev; 4766 4767 load_concrete_ms_allocatable_trees(checkpoint, SM_ALLOC); 4768 load_concrete_ms_allocatable_trees(current, SM_FREE); 4769 4770 for (uint64_t i = 0; i < ckpoint_rvd->vdev_children; i++) { 4771 vdev_t *ckpoint_vd = ckpoint_rvd->vdev_child[i]; 4772 vdev_t *current_vd = current_rvd->vdev_child[i]; 4773 4774 if (ckpoint_vd->vdev_ops == &vdev_indirect_ops) { 4775 /* 4776 * See comment in verify_checkpoint_vdev_spacemaps() 4777 */ 4778 ASSERT3P(current_vd->vdev_ops, ==, &vdev_indirect_ops); 4779 continue; 4780 } 4781 4782 for (uint64_t m = 0; m < ckpoint_vd->vdev_ms_count; m++) { 4783 metaslab_t *ckpoint_msp = ckpoint_vd->vdev_ms[m]; 4784 metaslab_t *current_msp = current_vd->vdev_ms[m]; 4785 4786 (void) fprintf(stderr, 4787 "\rverifying vdev %llu of %llu, " 4788 "metaslab %llu of %llu ...", 4789 (longlong_t)current_vd->vdev_id, 4790 (longlong_t)current_rvd->vdev_children, 4791 (longlong_t)current_vd->vdev_ms[m]->ms_id, 4792 (longlong_t)current_vd->vdev_ms_count); 4793 4794 /* 4795 * We walk through the ms_allocatable trees that 4796 * are loaded with the allocated blocks from the 4797 * ms_sm spacemaps of the checkpoint. For each 4798 * one of these ranges we ensure that none of them 4799 * exists in the ms_allocatable trees of the 4800 * current state which are loaded with the ranges 4801 * that are currently free. 4802 * 4803 * This way we ensure that none of the blocks that 4804 * are part of the checkpoint were freed by mistake. 4805 */ 4806 range_tree_walk(ckpoint_msp->ms_allocatable, 4807 (range_tree_func_t *)range_tree_verify_not_present, 4808 current_msp->ms_allocatable); 4809 } 4810 } 4811 4812 /* for cleaner progress output */ 4813 (void) fprintf(stderr, "\n"); 4814 } 4815 4816 static void 4817 verify_checkpoint_blocks(spa_t *spa) 4818 { 4819 ASSERT(!dump_opt['L']); 4820 4821 spa_t *checkpoint_spa; 4822 char *checkpoint_pool; 4823 nvlist_t *config = NULL; 4824 int error = 0; 4825 4826 /* 4827 * We import the checkpointed state of the pool (under a different 4828 * name) so we can do verification on it against the current state 4829 * of the pool. 4830 */ 4831 checkpoint_pool = import_checkpointed_state(spa->spa_name, config, 4832 NULL); 4833 ASSERT(strcmp(spa->spa_name, checkpoint_pool) != 0); 4834 4835 error = spa_open(checkpoint_pool, &checkpoint_spa, FTAG); 4836 if (error != 0) { 4837 fatal("Tried to open pool \"%s\" but spa_open() failed with " 4838 "error %d\n", checkpoint_pool, error); 4839 } 4840 4841 /* 4842 * Ensure that ranges in the checkpoint space maps of each vdev 4843 * are allocated according to the checkpointed state's metaslab 4844 * space maps. 4845 */ 4846 verify_checkpoint_vdev_spacemaps(checkpoint_spa, spa); 4847 4848 /* 4849 * Ensure that allocated ranges in the checkpoint's metaslab 4850 * space maps remain allocated in the metaslab space maps of 4851 * the current state. 4852 */ 4853 verify_checkpoint_ms_spacemaps(checkpoint_spa, spa); 4854 4855 /* 4856 * Once we are done, we get rid of the checkpointed state. 4857 */ 4858 spa_close(checkpoint_spa, FTAG); 4859 free(checkpoint_pool); 4860 } 4861 4862 static void 4863 dump_leftover_checkpoint_blocks(spa_t *spa) 4864 { 4865 vdev_t *rvd = spa->spa_root_vdev; 4866 4867 for (uint64_t i = 0; i < rvd->vdev_children; i++) { 4868 vdev_t *vd = rvd->vdev_child[i]; 4869 4870 space_map_t *checkpoint_sm = NULL; 4871 uint64_t checkpoint_sm_obj; 4872 4873 if (vd->vdev_top_zap == 0) 4874 continue; 4875 4876 if (zap_contains(spa_meta_objset(spa), vd->vdev_top_zap, 4877 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM) != 0) 4878 continue; 4879 4880 VERIFY0(zap_lookup(spa_meta_objset(spa), vd->vdev_top_zap, 4881 VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, 4882 sizeof (uint64_t), 1, &checkpoint_sm_obj)); 4883 4884 VERIFY0(space_map_open(&checkpoint_sm, spa_meta_objset(spa), 4885 checkpoint_sm_obj, 0, vd->vdev_asize, vd->vdev_ashift)); 4886 dump_spacemap(spa->spa_meta_objset, checkpoint_sm); 4887 space_map_close(checkpoint_sm); 4888 } 4889 } 4890 4891 static int 4892 verify_checkpoint(spa_t *spa) 4893 { 4894 uberblock_t checkpoint; 4895 int error; 4896 4897 if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) 4898 return (0); 4899 4900 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 4901 DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), 4902 sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); 4903 4904 if (error == ENOENT && !dump_opt['L']) { 4905 /* 4906 * If the feature is active but the uberblock is missing 4907 * then we must be in the middle of discarding the 4908 * checkpoint. 4909 */ 4910 (void) printf("\nPartially discarded checkpoint " 4911 "state found:\n"); 4912 dump_leftover_checkpoint_blocks(spa); 4913 return (0); 4914 } else if (error != 0) { 4915 (void) printf("lookup error %d when looking for " 4916 "checkpointed uberblock in MOS\n", error); 4917 return (error); 4918 } 4919 dump_uberblock(&checkpoint, "\nCheckpointed uberblock found:\n", "\n"); 4920 4921 if (checkpoint.ub_checkpoint_txg == 0) { 4922 (void) printf("\nub_checkpoint_txg not set in checkpointed " 4923 "uberblock\n"); 4924 error = 3; 4925 } 4926 4927 if (error == 0 && !dump_opt['L']) 4928 verify_checkpoint_blocks(spa); 4929 4930 return (error); 4931 } 4932 4933 /* ARGSUSED */ 4934 static void 4935 mos_leaks_cb(void *arg, uint64_t start, uint64_t size) 4936 { 4937 for (uint64_t i = start; i < size; i++) { 4938 (void) printf("MOS object %llu referenced but not allocated\n", 4939 (u_longlong_t)i); 4940 } 4941 } 4942 4943 static range_tree_t *mos_refd_objs; 4944 4945 static void 4946 mos_obj_refd(uint64_t obj) 4947 { 4948 if (obj != 0 && mos_refd_objs != NULL) 4949 range_tree_add(mos_refd_objs, obj, 1); 4950 } 4951 4952 static void 4953 mos_leak_vdev_top_zap(vdev_t *vd) 4954 { 4955 uint64_t ms_flush_data_obj; 4956 4957 int error = zap_lookup(spa_meta_objset(vd->vdev_spa), 4958 vd->vdev_top_zap, VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS, 4959 sizeof (ms_flush_data_obj), 1, &ms_flush_data_obj); 4960 if (error == ENOENT) 4961 return; 4962 ASSERT0(error); 4963 4964 mos_obj_refd(ms_flush_data_obj); 4965 } 4966 4967 static void 4968 mos_leak_vdev(vdev_t *vd) 4969 { 4970 mos_obj_refd(vd->vdev_dtl_object); 4971 mos_obj_refd(vd->vdev_ms_array); 4972 mos_obj_refd(vd->vdev_indirect_config.vic_births_object); 4973 mos_obj_refd(vd->vdev_indirect_config.vic_mapping_object); 4974 mos_obj_refd(vd->vdev_leaf_zap); 4975 if (vd->vdev_checkpoint_sm != NULL) 4976 mos_obj_refd(vd->vdev_checkpoint_sm->sm_object); 4977 if (vd->vdev_indirect_mapping != NULL) { 4978 mos_obj_refd(vd->vdev_indirect_mapping-> 4979 vim_phys->vimp_counts_object); 4980 } 4981 if (vd->vdev_obsolete_sm != NULL) 4982 mos_obj_refd(vd->vdev_obsolete_sm->sm_object); 4983 4984 for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { 4985 metaslab_t *ms = vd->vdev_ms[m]; 4986 mos_obj_refd(space_map_object(ms->ms_sm)); 4987 } 4988 4989 if (vd->vdev_top_zap != 0) { 4990 mos_obj_refd(vd->vdev_top_zap); 4991 mos_leak_vdev_top_zap(vd); 4992 } 4993 4994 for (uint64_t c = 0; c < vd->vdev_children; c++) { 4995 mos_leak_vdev(vd->vdev_child[c]); 4996 } 4997 } 4998 4999 static void 5000 mos_leak_log_spacemaps(spa_t *spa) 5001 { 5002 uint64_t spacemap_zap; 5003 5004 int error = zap_lookup(spa_meta_objset(spa), 5005 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_LOG_SPACEMAP_ZAP, 5006 sizeof (spacemap_zap), 1, &spacemap_zap); 5007 if (error == ENOENT) 5008 return; 5009 ASSERT0(error); 5010 5011 mos_obj_refd(spacemap_zap); 5012 for (spa_log_sm_t *sls = avl_first(&spa->spa_sm_logs_by_txg); 5013 sls; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) 5014 mos_obj_refd(sls->sls_sm_obj); 5015 } 5016 5017 static int 5018 dump_mos_leaks(spa_t *spa) 5019 { 5020 int rv = 0; 5021 objset_t *mos = spa->spa_meta_objset; 5022 dsl_pool_t *dp = spa->spa_dsl_pool; 5023 5024 /* Visit and mark all referenced objects in the MOS */ 5025 5026 mos_obj_refd(DMU_POOL_DIRECTORY_OBJECT); 5027 mos_obj_refd(spa->spa_pool_props_object); 5028 mos_obj_refd(spa->spa_config_object); 5029 mos_obj_refd(spa->spa_ddt_stat_object); 5030 mos_obj_refd(spa->spa_feat_desc_obj); 5031 mos_obj_refd(spa->spa_feat_enabled_txg_obj); 5032 mos_obj_refd(spa->spa_feat_for_read_obj); 5033 mos_obj_refd(spa->spa_feat_for_write_obj); 5034 mos_obj_refd(spa->spa_history); 5035 mos_obj_refd(spa->spa_errlog_last); 5036 mos_obj_refd(spa->spa_errlog_scrub); 5037 mos_obj_refd(spa->spa_all_vdev_zaps); 5038 mos_obj_refd(spa->spa_dsl_pool->dp_bptree_obj); 5039 mos_obj_refd(spa->spa_dsl_pool->dp_tmp_userrefs_obj); 5040 mos_obj_refd(spa->spa_dsl_pool->dp_scan->scn_phys.scn_queue_obj); 5041 bpobj_count_refd(&spa->spa_deferred_bpobj); 5042 mos_obj_refd(dp->dp_empty_bpobj); 5043 bpobj_count_refd(&dp->dp_obsolete_bpobj); 5044 bpobj_count_refd(&dp->dp_free_bpobj); 5045 mos_obj_refd(spa->spa_l2cache.sav_object); 5046 mos_obj_refd(spa->spa_spares.sav_object); 5047 5048 if (spa->spa_syncing_log_sm != NULL) 5049 mos_obj_refd(spa->spa_syncing_log_sm->sm_object); 5050 mos_leak_log_spacemaps(spa); 5051 5052 mos_obj_refd(spa->spa_condensing_indirect_phys. 5053 scip_next_mapping_object); 5054 mos_obj_refd(spa->spa_condensing_indirect_phys. 5055 scip_prev_obsolete_sm_object); 5056 if (spa->spa_condensing_indirect_phys.scip_next_mapping_object != 0) { 5057 vdev_indirect_mapping_t *vim = 5058 vdev_indirect_mapping_open(mos, 5059 spa->spa_condensing_indirect_phys.scip_next_mapping_object); 5060 mos_obj_refd(vim->vim_phys->vimp_counts_object); 5061 vdev_indirect_mapping_close(vim); 5062 } 5063 5064 if (dp->dp_origin_snap != NULL) { 5065 dsl_dataset_t *ds; 5066 5067 dsl_pool_config_enter(dp, FTAG); 5068 VERIFY0(dsl_dataset_hold_obj(dp, 5069 dsl_dataset_phys(dp->dp_origin_snap)->ds_next_snap_obj, 5070 FTAG, &ds)); 5071 count_ds_mos_objects(ds); 5072 dump_deadlist(&ds->ds_deadlist); 5073 dsl_dataset_rele(ds, FTAG); 5074 dsl_pool_config_exit(dp, FTAG); 5075 5076 count_ds_mos_objects(dp->dp_origin_snap); 5077 dump_deadlist(&dp->dp_origin_snap->ds_deadlist); 5078 } 5079 count_dir_mos_objects(dp->dp_mos_dir); 5080 if (dp->dp_free_dir != NULL) 5081 count_dir_mos_objects(dp->dp_free_dir); 5082 if (dp->dp_leak_dir != NULL) 5083 count_dir_mos_objects(dp->dp_leak_dir); 5084 5085 mos_leak_vdev(spa->spa_root_vdev); 5086 5087 for (uint64_t class = 0; class < DDT_CLASSES; class++) { 5088 for (uint64_t type = 0; type < DDT_TYPES; type++) { 5089 for (uint64_t cksum = 0; 5090 cksum < ZIO_CHECKSUM_FUNCTIONS; cksum++) { 5091 ddt_t *ddt = spa->spa_ddt[cksum]; 5092 mos_obj_refd(ddt->ddt_object[type][class]); 5093 } 5094 } 5095 } 5096 5097 /* 5098 * Visit all allocated objects and make sure they are referenced. 5099 */ 5100 uint64_t object = 0; 5101 while (dmu_object_next(mos, &object, B_FALSE, 0) == 0) { 5102 if (range_tree_contains(mos_refd_objs, object, 1)) { 5103 range_tree_remove(mos_refd_objs, object, 1); 5104 } else { 5105 dmu_object_info_t doi; 5106 const char *name; 5107 dmu_object_info(mos, object, &doi); 5108 if (doi.doi_type & DMU_OT_NEWTYPE) { 5109 dmu_object_byteswap_t bswap = 5110 DMU_OT_BYTESWAP(doi.doi_type); 5111 name = dmu_ot_byteswap[bswap].ob_name; 5112 } else { 5113 name = dmu_ot[doi.doi_type].ot_name; 5114 } 5115 5116 (void) printf("MOS object %llu (%s) leaked\n", 5117 (u_longlong_t)object, name); 5118 rv = 2; 5119 } 5120 } 5121 (void) range_tree_walk(mos_refd_objs, mos_leaks_cb, NULL); 5122 if (!range_tree_is_empty(mos_refd_objs)) 5123 rv = 2; 5124 range_tree_vacate(mos_refd_objs, NULL, NULL); 5125 range_tree_destroy(mos_refd_objs); 5126 return (rv); 5127 } 5128 5129 typedef struct log_sm_obsolete_stats_arg { 5130 uint64_t lsos_current_txg; 5131 5132 uint64_t lsos_total_entries; 5133 uint64_t lsos_valid_entries; 5134 5135 uint64_t lsos_sm_entries; 5136 uint64_t lsos_valid_sm_entries; 5137 } log_sm_obsolete_stats_arg_t; 5138 5139 static int 5140 log_spacemap_obsolete_stats_cb(spa_t *spa, space_map_entry_t *sme, 5141 uint64_t txg, void *arg) 5142 { 5143 log_sm_obsolete_stats_arg_t *lsos = arg; 5144 uint64_t offset = sme->sme_offset; 5145 uint64_t vdev_id = sme->sme_vdev; 5146 5147 if (lsos->lsos_current_txg == 0) { 5148 /* this is the first log */ 5149 lsos->lsos_current_txg = txg; 5150 } else if (lsos->lsos_current_txg < txg) { 5151 /* we just changed log - print stats and reset */ 5152 (void) printf("%-8llu valid entries out of %-8llu - txg %llu\n", 5153 (u_longlong_t)lsos->lsos_valid_sm_entries, 5154 (u_longlong_t)lsos->lsos_sm_entries, 5155 (u_longlong_t)lsos->lsos_current_txg); 5156 lsos->lsos_valid_sm_entries = 0; 5157 lsos->lsos_sm_entries = 0; 5158 lsos->lsos_current_txg = txg; 5159 } 5160 ASSERT3U(lsos->lsos_current_txg, ==, txg); 5161 5162 lsos->lsos_sm_entries++; 5163 lsos->lsos_total_entries++; 5164 5165 vdev_t *vd = vdev_lookup_top(spa, vdev_id); 5166 if (!vdev_is_concrete(vd)) 5167 return (0); 5168 5169 metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 5170 ASSERT(sme->sme_type == SM_ALLOC || sme->sme_type == SM_FREE); 5171 5172 if (txg < metaslab_unflushed_txg(ms)) 5173 return (0); 5174 lsos->lsos_valid_sm_entries++; 5175 lsos->lsos_valid_entries++; 5176 return (0); 5177 } 5178 5179 static void 5180 dump_log_spacemap_obsolete_stats(spa_t *spa) 5181 { 5182 if (!spa_feature_is_active(spa, SPA_FEATURE_LOG_SPACEMAP)) 5183 return; 5184 5185 log_sm_obsolete_stats_arg_t lsos; 5186 bzero(&lsos, sizeof (lsos)); 5187 5188 (void) printf("Log Space Map Obsolete Entry Statistics:\n"); 5189 5190 iterate_through_spacemap_logs(spa, 5191 log_spacemap_obsolete_stats_cb, &lsos); 5192 5193 /* print stats for latest log */ 5194 (void) printf("%-8llu valid entries out of %-8llu - txg %llu\n", 5195 (u_longlong_t)lsos.lsos_valid_sm_entries, 5196 (u_longlong_t)lsos.lsos_sm_entries, 5197 (u_longlong_t)lsos.lsos_current_txg); 5198 5199 (void) printf("%-8llu valid entries out of %-8llu - total\n\n", 5200 (u_longlong_t)lsos.lsos_valid_entries, 5201 (u_longlong_t)lsos.lsos_total_entries); 5202 } 5203 5204 static void 5205 dump_zpool(spa_t *spa) 5206 { 5207 dsl_pool_t *dp = spa_get_dsl(spa); 5208 int rc = 0; 5209 5210 if (dump_opt['S']) { 5211 dump_simulated_ddt(spa); 5212 return; 5213 } 5214 5215 if (!dump_opt['e'] && dump_opt['C'] > 1) { 5216 (void) printf("\nCached configuration:\n"); 5217 dump_nvlist(spa->spa_config, 8); 5218 } 5219 5220 if (dump_opt['C']) 5221 dump_config(spa); 5222 5223 if (dump_opt['u']) 5224 dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n"); 5225 5226 if (dump_opt['D']) 5227 dump_all_ddts(spa); 5228 5229 if (dump_opt['d'] > 2 || dump_opt['m']) 5230 dump_metaslabs(spa); 5231 if (dump_opt['M']) 5232 dump_metaslab_groups(spa); 5233 if (dump_opt['d'] > 2 || dump_opt['m']) { 5234 dump_log_spacemaps(spa); 5235 dump_log_spacemap_obsolete_stats(spa); 5236 } 5237 5238 if (dump_opt['d'] || dump_opt['i']) { 5239 mos_refd_objs = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 5240 0); 5241 dump_dir(dp->dp_meta_objset); 5242 5243 if (dump_opt['d'] >= 3) { 5244 dsl_pool_t *dp = spa->spa_dsl_pool; 5245 dump_full_bpobj(&spa->spa_deferred_bpobj, 5246 "Deferred frees", 0); 5247 if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { 5248 dump_full_bpobj(&dp->dp_free_bpobj, 5249 "Pool snapshot frees", 0); 5250 } 5251 if (bpobj_is_open(&dp->dp_obsolete_bpobj)) { 5252 ASSERT(spa_feature_is_enabled(spa, 5253 SPA_FEATURE_DEVICE_REMOVAL)); 5254 dump_full_bpobj(&dp->dp_obsolete_bpobj, 5255 "Pool obsolete blocks", 0); 5256 } 5257 5258 if (spa_feature_is_active(spa, 5259 SPA_FEATURE_ASYNC_DESTROY)) { 5260 dump_bptree(spa->spa_meta_objset, 5261 dp->dp_bptree_obj, 5262 "Pool dataset frees"); 5263 } 5264 dump_dtl(spa->spa_root_vdev, 0); 5265 } 5266 (void) dmu_objset_find(spa_name(spa), dump_one_dir, 5267 NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); 5268 5269 if (rc == 0 && !dump_opt['L']) 5270 rc = dump_mos_leaks(spa); 5271 5272 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 5273 uint64_t refcount; 5274 5275 if (!(spa_feature_table[f].fi_flags & 5276 ZFEATURE_FLAG_PER_DATASET) || 5277 !spa_feature_is_enabled(spa, f)) { 5278 ASSERT0(dataset_feature_count[f]); 5279 continue; 5280 } 5281 (void) feature_get_refcount(spa, 5282 &spa_feature_table[f], &refcount); 5283 if (dataset_feature_count[f] != refcount) { 5284 (void) printf("%s feature refcount mismatch: " 5285 "%lld datasets != %lld refcount\n", 5286 spa_feature_table[f].fi_uname, 5287 (longlong_t)dataset_feature_count[f], 5288 (longlong_t)refcount); 5289 rc = 2; 5290 } else { 5291 (void) printf("Verified %s feature refcount " 5292 "of %llu is correct\n", 5293 spa_feature_table[f].fi_uname, 5294 (longlong_t)refcount); 5295 } 5296 } 5297 5298 if (rc == 0) 5299 rc = verify_device_removal_feature_counts(spa); 5300 } 5301 5302 if (rc == 0 && (dump_opt['b'] || dump_opt['c'])) 5303 rc = dump_block_stats(spa); 5304 5305 if (rc == 0) 5306 rc = verify_spacemap_refcounts(spa); 5307 5308 if (dump_opt['s']) 5309 show_pool_stats(spa); 5310 5311 if (dump_opt['h']) 5312 dump_history(spa); 5313 5314 if (rc == 0) 5315 rc = verify_checkpoint(spa); 5316 5317 if (rc != 0) { 5318 dump_debug_buffer(); 5319 exit(rc); 5320 } 5321 } 5322 5323 #define ZDB_FLAG_CHECKSUM 0x0001 5324 #define ZDB_FLAG_DECOMPRESS 0x0002 5325 #define ZDB_FLAG_BSWAP 0x0004 5326 #define ZDB_FLAG_GBH 0x0008 5327 #define ZDB_FLAG_INDIRECT 0x0010 5328 #define ZDB_FLAG_PHYS 0x0020 5329 #define ZDB_FLAG_RAW 0x0040 5330 #define ZDB_FLAG_PRINT_BLKPTR 0x0080 5331 5332 static int flagbits[256]; 5333 5334 static void 5335 zdb_print_blkptr(blkptr_t *bp, int flags) 5336 { 5337 char blkbuf[BP_SPRINTF_LEN]; 5338 5339 if (flags & ZDB_FLAG_BSWAP) 5340 byteswap_uint64_array((void *)bp, sizeof (blkptr_t)); 5341 5342 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); 5343 (void) printf("%s\n", blkbuf); 5344 } 5345 5346 static void 5347 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags) 5348 { 5349 int i; 5350 5351 for (i = 0; i < nbps; i++) 5352 zdb_print_blkptr(&bp[i], flags); 5353 } 5354 5355 static void 5356 zdb_dump_gbh(void *buf, int flags) 5357 { 5358 zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags); 5359 } 5360 5361 static void 5362 zdb_dump_block_raw(void *buf, uint64_t size, int flags) 5363 { 5364 if (flags & ZDB_FLAG_BSWAP) 5365 byteswap_uint64_array(buf, size); 5366 (void) write(1, buf, size); 5367 } 5368 5369 static void 5370 zdb_dump_block(char *label, void *buf, uint64_t size, int flags) 5371 { 5372 uint64_t *d = (uint64_t *)buf; 5373 unsigned nwords = size / sizeof (uint64_t); 5374 int do_bswap = !!(flags & ZDB_FLAG_BSWAP); 5375 unsigned i, j; 5376 const char *hdr; 5377 char *c; 5378 5379 5380 if (do_bswap) 5381 hdr = " 7 6 5 4 3 2 1 0 f e d c b a 9 8"; 5382 else 5383 hdr = " 0 1 2 3 4 5 6 7 8 9 a b c d e f"; 5384 5385 (void) printf("\n%s\n%6s %s 0123456789abcdef\n", label, "", hdr); 5386 5387 for (i = 0; i < nwords; i += 2) { 5388 (void) printf("%06llx: %016llx %016llx ", 5389 (u_longlong_t)(i * sizeof (uint64_t)), 5390 (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]), 5391 (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1])); 5392 5393 c = (char *)&d[i]; 5394 for (j = 0; j < 2 * sizeof (uint64_t); j++) 5395 (void) printf("%c", isprint(c[j]) ? c[j] : '.'); 5396 (void) printf("\n"); 5397 } 5398 } 5399 5400 /* 5401 * There are two acceptable formats: 5402 * leaf_name - For example: c1t0d0 or /tmp/ztest.0a 5403 * child[.child]* - For example: 0.1.1 5404 * 5405 * The second form can be used to specify arbitrary vdevs anywhere 5406 * in the heirarchy. For example, in a pool with a mirror of 5407 * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 . 5408 */ 5409 static vdev_t * 5410 zdb_vdev_lookup(vdev_t *vdev, const char *path) 5411 { 5412 char *s, *p, *q; 5413 unsigned i; 5414 5415 if (vdev == NULL) 5416 return (NULL); 5417 5418 /* First, assume the x.x.x.x format */ 5419 i = strtoul(path, &s, 10); 5420 if (s == path || (s && *s != '.' && *s != '\0')) 5421 goto name; 5422 if (i >= vdev->vdev_children) 5423 return (NULL); 5424 5425 vdev = vdev->vdev_child[i]; 5426 if (*s == '\0') 5427 return (vdev); 5428 return (zdb_vdev_lookup(vdev, s+1)); 5429 5430 name: 5431 for (i = 0; i < vdev->vdev_children; i++) { 5432 vdev_t *vc = vdev->vdev_child[i]; 5433 5434 if (vc->vdev_path == NULL) { 5435 vc = zdb_vdev_lookup(vc, path); 5436 if (vc == NULL) 5437 continue; 5438 else 5439 return (vc); 5440 } 5441 5442 p = strrchr(vc->vdev_path, '/'); 5443 p = p ? p + 1 : vc->vdev_path; 5444 q = &vc->vdev_path[strlen(vc->vdev_path) - 2]; 5445 5446 if (strcmp(vc->vdev_path, path) == 0) 5447 return (vc); 5448 if (strcmp(p, path) == 0) 5449 return (vc); 5450 if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0) 5451 return (vc); 5452 } 5453 5454 return (NULL); 5455 } 5456 5457 /* ARGSUSED */ 5458 static int 5459 random_get_pseudo_bytes_cb(void *buf, size_t len, void *unused) 5460 { 5461 return (random_get_pseudo_bytes(buf, len)); 5462 } 5463 5464 /* 5465 * Read a block from a pool and print it out. The syntax of the 5466 * block descriptor is: 5467 * 5468 * pool:vdev_specifier:offset:size[:flags] 5469 * 5470 * pool - The name of the pool you wish to read from 5471 * vdev_specifier - Which vdev (see comment for zdb_vdev_lookup) 5472 * offset - offset, in hex, in bytes 5473 * size - Amount of data to read, in hex, in bytes 5474 * flags - A string of characters specifying options 5475 * b: Decode a blkptr at given offset within block 5476 * *c: Calculate and display checksums 5477 * d: Decompress data before dumping 5478 * e: Byteswap data before dumping 5479 * g: Display data as a gang block header 5480 * i: Display as an indirect block 5481 * p: Do I/O to physical offset 5482 * r: Dump raw data to stdout 5483 * 5484 * * = not yet implemented 5485 */ 5486 static void 5487 zdb_read_block(char *thing, spa_t *spa) 5488 { 5489 blkptr_t blk, *bp = &blk; 5490 dva_t *dva = bp->blk_dva; 5491 int flags = 0; 5492 uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0; 5493 zio_t *zio; 5494 vdev_t *vd; 5495 abd_t *pabd; 5496 void *lbuf, *buf; 5497 const char *s, *vdev; 5498 char *p, *dup, *flagstr; 5499 int i, error; 5500 5501 dup = strdup(thing); 5502 s = strtok(dup, ":"); 5503 vdev = s ? s : ""; 5504 s = strtok(NULL, ":"); 5505 offset = strtoull(s ? s : "", NULL, 16); 5506 s = strtok(NULL, ":"); 5507 size = strtoull(s ? s : "", NULL, 16); 5508 s = strtok(NULL, ":"); 5509 if (s) 5510 flagstr = strdup(s); 5511 else 5512 flagstr = strdup(""); 5513 5514 s = NULL; 5515 if (size == 0) 5516 s = "size must not be zero"; 5517 if (!IS_P2ALIGNED(size, DEV_BSIZE)) 5518 s = "size must be a multiple of sector size"; 5519 if (!IS_P2ALIGNED(offset, DEV_BSIZE)) 5520 s = "offset must be a multiple of sector size"; 5521 if (s) { 5522 (void) printf("Invalid block specifier: %s - %s\n", thing, s); 5523 free(dup); 5524 return; 5525 } 5526 5527 for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) { 5528 for (i = 0; flagstr[i]; i++) { 5529 int bit = flagbits[(uchar_t)flagstr[i]]; 5530 5531 if (bit == 0) { 5532 (void) printf("***Invalid flag: %c\n", 5533 flagstr[i]); 5534 continue; 5535 } 5536 flags |= bit; 5537 5538 /* If it's not something with an argument, keep going */ 5539 if ((bit & (ZDB_FLAG_CHECKSUM | 5540 ZDB_FLAG_PRINT_BLKPTR)) == 0) 5541 continue; 5542 5543 p = &flagstr[i + 1]; 5544 if (bit == ZDB_FLAG_PRINT_BLKPTR) 5545 blkptr_offset = strtoull(p, &p, 16); 5546 if (*p != ':' && *p != '\0') { 5547 (void) printf("***Invalid flag arg: '%s'\n", s); 5548 free(dup); 5549 return; 5550 } 5551 } 5552 } 5553 free(flagstr); 5554 5555 vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev); 5556 if (vd == NULL) { 5557 (void) printf("***Invalid vdev: %s\n", vdev); 5558 free(dup); 5559 return; 5560 } else { 5561 if (vd->vdev_path) 5562 (void) fprintf(stderr, "Found vdev: %s\n", 5563 vd->vdev_path); 5564 else 5565 (void) fprintf(stderr, "Found vdev type: %s\n", 5566 vd->vdev_ops->vdev_op_type); 5567 } 5568 5569 psize = size; 5570 lsize = size; 5571 5572 pabd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_FALSE); 5573 lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 5574 5575 BP_ZERO(bp); 5576 5577 DVA_SET_VDEV(&dva[0], vd->vdev_id); 5578 DVA_SET_OFFSET(&dva[0], offset); 5579 DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH)); 5580 DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize)); 5581 5582 BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL); 5583 5584 BP_SET_LSIZE(bp, lsize); 5585 BP_SET_PSIZE(bp, psize); 5586 BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF); 5587 BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF); 5588 BP_SET_TYPE(bp, DMU_OT_NONE); 5589 BP_SET_LEVEL(bp, 0); 5590 BP_SET_DEDUP(bp, 0); 5591 BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); 5592 5593 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 5594 zio = zio_root(spa, NULL, NULL, 0); 5595 5596 if (vd == vd->vdev_top) { 5597 /* 5598 * Treat this as a normal block read. 5599 */ 5600 zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL, 5601 ZIO_PRIORITY_SYNC_READ, 5602 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL)); 5603 } else { 5604 /* 5605 * Treat this as a vdev child I/O. 5606 */ 5607 zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd, 5608 psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, 5609 ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE | 5610 ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY | 5611 ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | ZIO_FLAG_OPTIONAL, 5612 NULL, NULL)); 5613 } 5614 5615 error = zio_wait(zio); 5616 spa_config_exit(spa, SCL_STATE, FTAG); 5617 5618 if (error) { 5619 (void) printf("Read of %s failed, error: %d\n", thing, error); 5620 goto out; 5621 } 5622 5623 if (flags & ZDB_FLAG_DECOMPRESS) { 5624 /* 5625 * We don't know how the data was compressed, so just try 5626 * every decompress function at every inflated blocksize. 5627 */ 5628 enum zio_compress c; 5629 void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 5630 void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 5631 5632 abd_copy_to_buf(pbuf2, pabd, psize); 5633 5634 VERIFY0(abd_iterate_func(pabd, psize, SPA_MAXBLOCKSIZE - psize, 5635 random_get_pseudo_bytes_cb, NULL)); 5636 5637 VERIFY0(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize, 5638 SPA_MAXBLOCKSIZE - psize)); 5639 5640 for (lsize = SPA_MAXBLOCKSIZE; lsize > psize; 5641 lsize -= SPA_MINBLOCKSIZE) { 5642 for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) { 5643 if (zio_decompress_data(c, pabd, 5644 lbuf, psize, lsize) == 0 && 5645 zio_decompress_data_buf(c, pbuf2, 5646 lbuf2, psize, lsize) == 0 && 5647 bcmp(lbuf, lbuf2, lsize) == 0) 5648 break; 5649 } 5650 if (c != ZIO_COMPRESS_FUNCTIONS) 5651 break; 5652 lsize -= SPA_MINBLOCKSIZE; 5653 } 5654 5655 umem_free(pbuf2, SPA_MAXBLOCKSIZE); 5656 umem_free(lbuf2, SPA_MAXBLOCKSIZE); 5657 5658 if (lsize <= psize) { 5659 (void) printf("Decompress of %s failed\n", thing); 5660 goto out; 5661 } 5662 buf = lbuf; 5663 size = lsize; 5664 } else { 5665 buf = abd_to_buf(pabd); 5666 size = psize; 5667 } 5668 5669 if (flags & ZDB_FLAG_PRINT_BLKPTR) 5670 zdb_print_blkptr((blkptr_t *)(void *) 5671 ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags); 5672 else if (flags & ZDB_FLAG_RAW) 5673 zdb_dump_block_raw(buf, size, flags); 5674 else if (flags & ZDB_FLAG_INDIRECT) 5675 zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t), 5676 flags); 5677 else if (flags & ZDB_FLAG_GBH) 5678 zdb_dump_gbh(buf, flags); 5679 else 5680 zdb_dump_block(thing, buf, size, flags); 5681 5682 out: 5683 abd_free(pabd); 5684 umem_free(lbuf, SPA_MAXBLOCKSIZE); 5685 free(dup); 5686 } 5687 5688 static void 5689 zdb_embedded_block(char *thing) 5690 { 5691 blkptr_t bp; 5692 unsigned long long *words = (void *)&bp; 5693 char *buf; 5694 int err; 5695 5696 bzero(&bp, sizeof (bp)); 5697 err = sscanf(thing, "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx:" 5698 "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx", 5699 words + 0, words + 1, words + 2, words + 3, 5700 words + 4, words + 5, words + 6, words + 7, 5701 words + 8, words + 9, words + 10, words + 11, 5702 words + 12, words + 13, words + 14, words + 15); 5703 if (err != 16) { 5704 (void) fprintf(stderr, "invalid input format\n"); 5705 exit(1); 5706 } 5707 ASSERT3U(BPE_GET_LSIZE(&bp), <=, SPA_MAXBLOCKSIZE); 5708 buf = malloc(SPA_MAXBLOCKSIZE); 5709 if (buf == NULL) { 5710 (void) fprintf(stderr, "out of memory\n"); 5711 exit(1); 5712 } 5713 err = decode_embedded_bp(&bp, buf, BPE_GET_LSIZE(&bp)); 5714 if (err != 0) { 5715 (void) fprintf(stderr, "decode failed: %u\n", err); 5716 exit(1); 5717 } 5718 zdb_dump_block_raw(buf, BPE_GET_LSIZE(&bp), 0); 5719 free(buf); 5720 } 5721 5722 int 5723 main(int argc, char **argv) 5724 { 5725 int c; 5726 struct rlimit rl = { 1024, 1024 }; 5727 spa_t *spa = NULL; 5728 objset_t *os = NULL; 5729 int dump_all = 1; 5730 int verbose = 0; 5731 int error = 0; 5732 char **searchdirs = NULL; 5733 int nsearch = 0; 5734 char *target, *target_pool; 5735 nvlist_t *policy = NULL; 5736 uint64_t max_txg = UINT64_MAX; 5737 int flags = ZFS_IMPORT_MISSING_LOG; 5738 int rewind = ZPOOL_NEVER_REWIND; 5739 char *spa_config_path_env; 5740 boolean_t target_is_spa = B_TRUE; 5741 nvlist_t *cfg = NULL; 5742 5743 (void) setrlimit(RLIMIT_NOFILE, &rl); 5744 (void) enable_extended_FILE_stdio(-1, -1); 5745 5746 dprintf_setup(&argc, argv); 5747 5748 /* 5749 * If there is an environment variable SPA_CONFIG_PATH it overrides 5750 * default spa_config_path setting. If -U flag is specified it will 5751 * override this environment variable settings once again. 5752 */ 5753 spa_config_path_env = getenv("SPA_CONFIG_PATH"); 5754 if (spa_config_path_env != NULL) 5755 spa_config_path = spa_config_path_env; 5756 5757 /* 5758 * For performance reasons, we set this tunable down. We do so before 5759 * the arg parsing section so that the user can override this value if 5760 * they choose. 5761 */ 5762 zfs_btree_verify_intensity = 3; 5763 5764 while ((c = getopt(argc, argv, 5765 "AbcCdDeEFGhiI:klLmMo:Op:PqRsSt:uU:vVx:X")) != -1) { 5766 switch (c) { 5767 case 'b': 5768 case 'c': 5769 case 'C': 5770 case 'd': 5771 case 'D': 5772 case 'E': 5773 case 'G': 5774 case 'h': 5775 case 'i': 5776 case 'l': 5777 case 'm': 5778 case 'M': 5779 case 'O': 5780 case 'R': 5781 case 's': 5782 case 'S': 5783 case 'u': 5784 dump_opt[c]++; 5785 dump_all = 0; 5786 break; 5787 case 'A': 5788 case 'e': 5789 case 'F': 5790 case 'k': 5791 case 'L': 5792 case 'P': 5793 case 'q': 5794 case 'X': 5795 dump_opt[c]++; 5796 break; 5797 /* NB: Sort single match options below. */ 5798 case 'I': 5799 max_inflight = strtoull(optarg, NULL, 0); 5800 if (max_inflight == 0) { 5801 (void) fprintf(stderr, "maximum number " 5802 "of inflight I/Os must be greater " 5803 "than 0\n"); 5804 usage(); 5805 } 5806 break; 5807 case 'o': 5808 error = set_global_var(optarg); 5809 if (error != 0) 5810 usage(); 5811 break; 5812 case 'p': 5813 if (searchdirs == NULL) { 5814 searchdirs = umem_alloc(sizeof (char *), 5815 UMEM_NOFAIL); 5816 } else { 5817 char **tmp = umem_alloc((nsearch + 1) * 5818 sizeof (char *), UMEM_NOFAIL); 5819 bcopy(searchdirs, tmp, nsearch * 5820 sizeof (char *)); 5821 umem_free(searchdirs, 5822 nsearch * sizeof (char *)); 5823 searchdirs = tmp; 5824 } 5825 searchdirs[nsearch++] = optarg; 5826 break; 5827 case 't': 5828 max_txg = strtoull(optarg, NULL, 0); 5829 if (max_txg < TXG_INITIAL) { 5830 (void) fprintf(stderr, "incorrect txg " 5831 "specified: %s\n", optarg); 5832 usage(); 5833 } 5834 break; 5835 case 'U': 5836 spa_config_path = optarg; 5837 if (spa_config_path[0] != '/') { 5838 (void) fprintf(stderr, 5839 "cachefile must be an absolute path " 5840 "(i.e. start with a slash)\n"); 5841 usage(); 5842 } 5843 break; 5844 case 'v': 5845 verbose++; 5846 break; 5847 case 'V': 5848 flags = ZFS_IMPORT_VERBATIM; 5849 break; 5850 case 'x': 5851 vn_dumpdir = optarg; 5852 break; 5853 default: 5854 usage(); 5855 break; 5856 } 5857 } 5858 5859 if (!dump_opt['e'] && searchdirs != NULL) { 5860 (void) fprintf(stderr, "-p option requires use of -e\n"); 5861 usage(); 5862 } 5863 5864 /* 5865 * ZDB does not typically re-read blocks; therefore limit the ARC 5866 * to 256 MB, which can be used entirely for metadata. 5867 */ 5868 zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024; 5869 5870 /* 5871 * "zdb -c" uses checksum-verifying scrub i/os which are async reads. 5872 * "zdb -b" uses traversal prefetch which uses async reads. 5873 * For good performance, let several of them be active at once. 5874 */ 5875 zfs_vdev_async_read_max_active = 10; 5876 5877 /* 5878 * Disable reference tracking for better performance. 5879 */ 5880 reference_tracking_enable = B_FALSE; 5881 5882 /* 5883 * Do not fail spa_load when spa_load_verify fails. This is needed 5884 * to load non-idle pools. 5885 */ 5886 spa_load_verify_dryrun = B_TRUE; 5887 5888 kernel_init(FREAD); 5889 5890 if (dump_all) 5891 verbose = MAX(verbose, 1); 5892 5893 for (c = 0; c < 256; c++) { 5894 if (dump_all && strchr("AeEFklLOPRSX", c) == NULL) 5895 dump_opt[c] = 1; 5896 if (dump_opt[c]) 5897 dump_opt[c] += verbose; 5898 } 5899 5900 aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2); 5901 zfs_recover = (dump_opt['A'] > 1); 5902 5903 argc -= optind; 5904 argv += optind; 5905 5906 if (argc < 2 && dump_opt['R']) 5907 usage(); 5908 5909 if (dump_opt['E']) { 5910 if (argc != 1) 5911 usage(); 5912 zdb_embedded_block(argv[0]); 5913 return (0); 5914 } 5915 5916 if (argc < 1) { 5917 if (!dump_opt['e'] && dump_opt['C']) { 5918 dump_cachefile(spa_config_path); 5919 return (0); 5920 } 5921 usage(); 5922 } 5923 5924 if (dump_opt['l']) 5925 return (dump_label(argv[0])); 5926 5927 if (dump_opt['O']) { 5928 if (argc != 2) 5929 usage(); 5930 dump_opt['v'] = verbose + 3; 5931 return (dump_path(argv[0], argv[1])); 5932 } 5933 5934 if (dump_opt['X'] || dump_opt['F']) 5935 rewind = ZPOOL_DO_REWIND | 5936 (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0); 5937 5938 if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 || 5939 nvlist_add_uint64(policy, ZPOOL_LOAD_REQUEST_TXG, max_txg) != 0 || 5940 nvlist_add_uint32(policy, ZPOOL_LOAD_REWIND_POLICY, rewind) != 0) 5941 fatal("internal error: %s", strerror(ENOMEM)); 5942 5943 error = 0; 5944 target = argv[0]; 5945 5946 if (strpbrk(target, "/@") != NULL) { 5947 size_t targetlen; 5948 5949 target_pool = strdup(target); 5950 *strpbrk(target_pool, "/@") = '\0'; 5951 5952 target_is_spa = B_FALSE; 5953 targetlen = strlen(target); 5954 if (targetlen && target[targetlen - 1] == '/') 5955 target[targetlen - 1] = '\0'; 5956 } else { 5957 target_pool = target; 5958 } 5959 5960 if (dump_opt['e']) { 5961 importargs_t args = { 0 }; 5962 5963 args.paths = nsearch; 5964 args.path = searchdirs; 5965 args.can_be_active = B_TRUE; 5966 5967 error = zpool_find_config(NULL, target_pool, &cfg, &args, 5968 &libzpool_config_ops); 5969 5970 if (error == 0) { 5971 5972 if (nvlist_add_nvlist(cfg, 5973 ZPOOL_LOAD_POLICY, policy) != 0) { 5974 fatal("can't open '%s': %s", 5975 target, strerror(ENOMEM)); 5976 } 5977 5978 if (dump_opt['C'] > 1) { 5979 (void) printf("\nConfiguration for import:\n"); 5980 dump_nvlist(cfg, 8); 5981 } 5982 5983 /* 5984 * Disable the activity check to allow examination of 5985 * active pools. 5986 */ 5987 error = spa_import(target_pool, cfg, NULL, 5988 flags | ZFS_IMPORT_SKIP_MMP); 5989 } 5990 } 5991 5992 char *checkpoint_pool = NULL; 5993 char *checkpoint_target = NULL; 5994 if (dump_opt['k']) { 5995 checkpoint_pool = import_checkpointed_state(target, cfg, 5996 &checkpoint_target); 5997 5998 if (checkpoint_target != NULL) 5999 target = checkpoint_target; 6000 6001 } 6002 6003 if (error == 0) { 6004 if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) { 6005 ASSERT(checkpoint_pool != NULL); 6006 ASSERT(checkpoint_target == NULL); 6007 6008 error = spa_open(checkpoint_pool, &spa, FTAG); 6009 if (error != 0) { 6010 fatal("Tried to open pool \"%s\" but " 6011 "spa_open() failed with error %d\n", 6012 checkpoint_pool, error); 6013 } 6014 6015 } else if (target_is_spa || dump_opt['R']) { 6016 zdb_set_skip_mmp(target); 6017 error = spa_open_rewind(target, &spa, FTAG, policy, 6018 NULL); 6019 if (error) { 6020 /* 6021 * If we're missing the log device then 6022 * try opening the pool after clearing the 6023 * log state. 6024 */ 6025 mutex_enter(&spa_namespace_lock); 6026 if ((spa = spa_lookup(target)) != NULL && 6027 spa->spa_log_state == SPA_LOG_MISSING) { 6028 spa->spa_log_state = SPA_LOG_CLEAR; 6029 error = 0; 6030 } 6031 mutex_exit(&spa_namespace_lock); 6032 6033 if (!error) { 6034 error = spa_open_rewind(target, &spa, 6035 FTAG, policy, NULL); 6036 } 6037 } 6038 } else { 6039 zdb_set_skip_mmp(target); 6040 error = open_objset(target, DMU_OST_ANY, FTAG, &os); 6041 } 6042 } 6043 nvlist_free(policy); 6044 6045 if (error) 6046 fatal("can't open '%s': %s", target, strerror(error)); 6047 6048 argv++; 6049 argc--; 6050 if (!dump_opt['R']) { 6051 if (argc > 0) { 6052 zopt_objects = argc; 6053 zopt_object = calloc(zopt_objects, sizeof (uint64_t)); 6054 for (unsigned i = 0; i < zopt_objects; i++) { 6055 errno = 0; 6056 zopt_object[i] = strtoull(argv[i], NULL, 0); 6057 if (zopt_object[i] == 0 && errno != 0) 6058 fatal("bad number %s: %s", 6059 argv[i], strerror(errno)); 6060 } 6061 } 6062 if (os != NULL) { 6063 dump_dir(os); 6064 } else if (zopt_objects > 0 && !dump_opt['m']) { 6065 dump_dir(spa->spa_meta_objset); 6066 } else { 6067 dump_zpool(spa); 6068 } 6069 } else { 6070 flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR; 6071 flagbits['c'] = ZDB_FLAG_CHECKSUM; 6072 flagbits['d'] = ZDB_FLAG_DECOMPRESS; 6073 flagbits['e'] = ZDB_FLAG_BSWAP; 6074 flagbits['g'] = ZDB_FLAG_GBH; 6075 flagbits['i'] = ZDB_FLAG_INDIRECT; 6076 flagbits['p'] = ZDB_FLAG_PHYS; 6077 flagbits['r'] = ZDB_FLAG_RAW; 6078 6079 for (int i = 0; i < argc; i++) 6080 zdb_read_block(argv[i], spa); 6081 } 6082 6083 if (dump_opt['k']) { 6084 free(checkpoint_pool); 6085 if (!target_is_spa) 6086 free(checkpoint_target); 6087 } 6088 6089 if (os != NULL) 6090 close_objset(os, FTAG); 6091 else 6092 spa_close(spa, FTAG); 6093 6094 fuid_table_destroy(); 6095 6096 dump_debug_buffer(); 6097 6098 kernel_fini(); 6099 6100 return (error); 6101 } 6102