1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <sys/zfs_context.h> 31 #include <sys/spa.h> 32 #include <sys/spa_impl.h> 33 #include <sys/dmu.h> 34 #include <sys/zap.h> 35 #include <sys/fs/zfs.h> 36 #include <sys/zfs_znode.h> 37 #include <sys/vdev.h> 38 #include <sys/vdev_impl.h> 39 #include <sys/metaslab_impl.h> 40 #include <sys/dmu_objset.h> 41 #include <sys/dsl_dir.h> 42 #include <sys/dsl_dataset.h> 43 #include <sys/dsl_pool.h> 44 #include <sys/dbuf.h> 45 #include <sys/zil.h> 46 #include <sys/zil_impl.h> 47 #include <sys/stat.h> 48 #include <sys/resource.h> 49 #include <sys/dmu_traverse.h> 50 #include <sys/zio_checksum.h> 51 #include <sys/zio_compress.h> 52 53 const char cmdname[] = "zdb"; 54 uint8_t dump_opt[256]; 55 56 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); 57 58 extern void dump_intent_log(zilog_t *); 59 uint64_t *zopt_object = NULL; 60 int zopt_objects = 0; 61 int zdb_advance = ADVANCE_PRE; 62 zbookmark_t zdb_noread = { 0, 0, ZB_NO_LEVEL, 0 }; 63 64 /* 65 * These libumem hooks provide a reasonable set of defaults for the allocator's 66 * debugging facilities. 67 */ 68 const char * 69 _umem_debug_init() 70 { 71 return ("default,verbose"); /* $UMEM_DEBUG setting */ 72 } 73 74 const char * 75 _umem_logging_init(void) 76 { 77 return ("fail,contents"); /* $UMEM_LOGGING setting */ 78 } 79 80 static void 81 usage(void) 82 { 83 (void) fprintf(stderr, 84 "Usage: %s [-udibcsvLU] [-O order] [-B os:obj:level:blkid] " 85 "dataset [object...]\n" 86 " %s -C [pool]\n" 87 " %s -l dev\n", 88 cmdname, cmdname, cmdname); 89 90 (void) fprintf(stderr, " -u uberblock\n"); 91 (void) fprintf(stderr, " -d datasets\n"); 92 (void) fprintf(stderr, " -C cached pool configuration\n"); 93 (void) fprintf(stderr, " -i intent logs\n"); 94 (void) fprintf(stderr, " -b block statistics\n"); 95 (void) fprintf(stderr, " -c checksum all data blocks\n"); 96 (void) fprintf(stderr, " -s report stats on zdb's I/O\n"); 97 (void) fprintf(stderr, " -v verbose (applies to all others)\n"); 98 (void) fprintf(stderr, " -l dump label contents\n"); 99 (void) fprintf(stderr, " -L live pool (allows some errors)\n"); 100 (void) fprintf(stderr, " -O [!]<pre|post|prune|data|holes> " 101 "visitation order\n"); 102 (void) fprintf(stderr, " -U use zpool.cache in /tmp\n"); 103 (void) fprintf(stderr, " -B objset:object:level:blkid -- " 104 "simulate bad block\n"); 105 (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " 106 "to make only that option verbose\n"); 107 (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); 108 exit(1); 109 } 110 111 static void 112 fatal(const char *fmt, ...) 113 { 114 va_list ap; 115 116 va_start(ap, fmt); 117 (void) fprintf(stderr, "%s: ", cmdname); 118 (void) vfprintf(stderr, fmt, ap); 119 va_end(ap); 120 (void) fprintf(stderr, "\n"); 121 122 exit(1); 123 } 124 125 static void 126 dump_nvlist(nvlist_t *list, int indent) 127 { 128 nvpair_t *elem = NULL; 129 130 while ((elem = nvlist_next_nvpair(list, elem)) != NULL) { 131 switch (nvpair_type(elem)) { 132 case DATA_TYPE_STRING: 133 { 134 char *value; 135 136 VERIFY(nvpair_value_string(elem, &value) == 0); 137 (void) printf("%*s%s='%s'\n", indent, "", 138 nvpair_name(elem), value); 139 } 140 break; 141 142 case DATA_TYPE_UINT64: 143 { 144 uint64_t value; 145 146 VERIFY(nvpair_value_uint64(elem, &value) == 0); 147 (void) printf("%*s%s=%llu\n", indent, "", 148 nvpair_name(elem), (u_longlong_t)value); 149 } 150 break; 151 152 case DATA_TYPE_NVLIST: 153 { 154 nvlist_t *value; 155 156 VERIFY(nvpair_value_nvlist(elem, &value) == 0); 157 (void) printf("%*s%s\n", indent, "", 158 nvpair_name(elem)); 159 dump_nvlist(value, indent + 4); 160 } 161 break; 162 163 case DATA_TYPE_NVLIST_ARRAY: 164 { 165 nvlist_t **value; 166 uint_t c, count; 167 168 VERIFY(nvpair_value_nvlist_array(elem, &value, 169 &count) == 0); 170 171 for (c = 0; c < count; c++) { 172 (void) printf("%*s%s[%u]\n", indent, "", 173 nvpair_name(elem), c); 174 dump_nvlist(value[c], indent + 8); 175 } 176 } 177 break; 178 179 default: 180 181 (void) printf("bad config type %d for %s\n", 182 nvpair_type(elem), nvpair_name(elem)); 183 } 184 } 185 } 186 187 /* ARGSUSED */ 188 static void 189 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) 190 { 191 nvlist_t *nv; 192 size_t nvsize = *(uint64_t *)data; 193 char *packed = umem_alloc(nvsize, UMEM_NOFAIL); 194 195 VERIFY(0 == dmu_read(os, object, 0, nvsize, packed)); 196 197 VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0); 198 199 umem_free(packed, nvsize); 200 201 dump_nvlist(nv, 8); 202 203 nvlist_free(nv); 204 } 205 206 const char dump_zap_stars[] = "****************************************"; 207 const int dump_zap_width = sizeof (dump_zap_stars) - 1; 208 209 static void 210 dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE]) 211 { 212 int i; 213 int minidx = ZAP_HISTOGRAM_SIZE - 1; 214 int maxidx = 0; 215 uint64_t max = 0; 216 217 for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) { 218 if (histo[i] > max) 219 max = histo[i]; 220 if (histo[i] > 0 && i > maxidx) 221 maxidx = i; 222 if (histo[i] > 0 && i < minidx) 223 minidx = i; 224 } 225 226 if (max < dump_zap_width) 227 max = dump_zap_width; 228 229 for (i = minidx; i <= maxidx; i++) 230 (void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i], 231 &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]); 232 } 233 234 static void 235 dump_zap_stats(objset_t *os, uint64_t object) 236 { 237 int error; 238 zap_stats_t zs; 239 240 error = zap_get_stats(os, object, &zs); 241 if (error) 242 return; 243 244 if (zs.zs_ptrtbl_len == 0) { 245 ASSERT(zs.zs_num_blocks == 1); 246 (void) printf("\tmicrozap: %llu bytes, %llu entries\n", 247 (u_longlong_t)zs.zs_blocksize, 248 (u_longlong_t)zs.zs_num_entries); 249 return; 250 } 251 252 (void) printf("\tFat ZAP stats:\n"); 253 254 (void) printf("\t\tPointer table:\n"); 255 (void) printf("\t\t\t%llu elements\n", 256 (u_longlong_t)zs.zs_ptrtbl_len); 257 (void) printf("\t\t\tzt_blk: %llu\n", 258 (u_longlong_t)zs.zs_ptrtbl_zt_blk); 259 (void) printf("\t\t\tzt_numblks: %llu\n", 260 (u_longlong_t)zs.zs_ptrtbl_zt_numblks); 261 (void) printf("\t\t\tzt_shift: %llu\n", 262 (u_longlong_t)zs.zs_ptrtbl_zt_shift); 263 (void) printf("\t\t\tzt_blks_copied: %llu\n", 264 (u_longlong_t)zs.zs_ptrtbl_blks_copied); 265 (void) printf("\t\t\tzt_nextblk: %llu\n", 266 (u_longlong_t)zs.zs_ptrtbl_nextblk); 267 268 (void) printf("\t\tZAP entries: %llu\n", 269 (u_longlong_t)zs.zs_num_entries); 270 (void) printf("\t\tLeaf blocks: %llu\n", 271 (u_longlong_t)zs.zs_num_leafs); 272 (void) printf("\t\tTotal blocks: %llu\n", 273 (u_longlong_t)zs.zs_num_blocks); 274 (void) printf("\t\tzap_block_type: 0x%llx\n", 275 (u_longlong_t)zs.zs_block_type); 276 (void) printf("\t\tzap_magic: 0x%llx\n", 277 (u_longlong_t)zs.zs_magic); 278 (void) printf("\t\tzap_salt: 0x%llx\n", 279 (u_longlong_t)zs.zs_salt); 280 281 (void) printf("\t\tLeafs with 2^n pointers:\n"); 282 dump_zap_histogram(zs.zs_leafs_with_2n_pointers); 283 284 (void) printf("\t\tBlocks with n*5 entries:\n"); 285 dump_zap_histogram(zs.zs_blocks_with_n5_entries); 286 287 (void) printf("\t\tBlocks n/10 full:\n"); 288 dump_zap_histogram(zs.zs_blocks_n_tenths_full); 289 290 (void) printf("\t\tEntries with n chunks:\n"); 291 dump_zap_histogram(zs.zs_entries_using_n_chunks); 292 293 (void) printf("\t\tBuckets with n entries:\n"); 294 dump_zap_histogram(zs.zs_buckets_with_n_entries); 295 } 296 297 /*ARGSUSED*/ 298 static void 299 dump_none(objset_t *os, uint64_t object, void *data, size_t size) 300 { 301 } 302 303 /*ARGSUSED*/ 304 void 305 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size) 306 { 307 } 308 309 /*ARGSUSED*/ 310 static void 311 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size) 312 { 313 } 314 315 /*ARGSUSED*/ 316 static void 317 dump_zap(objset_t *os, uint64_t object, void *data, size_t size) 318 { 319 zap_cursor_t zc; 320 zap_attribute_t attr; 321 void *prop; 322 int i; 323 324 dump_zap_stats(os, object); 325 (void) printf("\n"); 326 327 for (zap_cursor_init(&zc, os, object); 328 zap_cursor_retrieve(&zc, &attr) == 0; 329 zap_cursor_advance(&zc)) { 330 (void) printf("\t\t%s = ", attr.za_name); 331 if (attr.za_num_integers == 0) { 332 (void) printf("\n"); 333 continue; 334 } 335 prop = umem_zalloc(attr.za_num_integers * 336 attr.za_integer_length, UMEM_NOFAIL); 337 (void) zap_lookup(os, object, attr.za_name, 338 attr.za_integer_length, attr.za_num_integers, prop); 339 if (attr.za_integer_length == 1) { 340 (void) printf("%s", (char *)prop); 341 } else { 342 for (i = 0; i < attr.za_num_integers; i++) { 343 switch (attr.za_integer_length) { 344 case 2: 345 (void) printf("%u ", 346 ((uint16_t *)prop)[i]); 347 break; 348 case 4: 349 (void) printf("%u ", 350 ((uint32_t *)prop)[i]); 351 break; 352 case 8: 353 (void) printf("%lld ", 354 (u_longlong_t)((int64_t *)prop)[i]); 355 break; 356 } 357 } 358 } 359 (void) printf("\n"); 360 umem_free(prop, attr.za_num_integers * attr.za_integer_length); 361 } 362 zap_cursor_fini(&zc); 363 } 364 365 static void 366 dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm) 367 { 368 uint64_t alloc, offset, entry; 369 uint8_t mapshift = sm->sm_shift; 370 uint64_t mapstart = sm->sm_start; 371 char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID" }; 372 373 if (smo->smo_object == 0) 374 return; 375 376 /* 377 * Print out the freelist entries in both encoded and decoded form. 378 */ 379 alloc = 0; 380 for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) { 381 VERIFY(0 == dmu_read(os, smo->smo_object, offset, 382 sizeof (entry), &entry)); 383 if (SM_DEBUG_DECODE(entry)) { 384 (void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n", 385 (u_longlong_t)(offset / sizeof (entry)), 386 ddata[SM_DEBUG_ACTION_DECODE(entry)], 387 SM_DEBUG_TXG_DECODE(entry), 388 SM_DEBUG_SYNCPASS_DECODE(entry)); 389 } else { 390 (void) printf("\t\t[%4llu] %c range:" 391 " %08llx-%08llx size: %06llx\n", 392 (u_longlong_t)(offset / sizeof (entry)), 393 SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F', 394 (SM_OFFSET_DECODE(entry) << mapshift) + mapstart, 395 (SM_OFFSET_DECODE(entry) << mapshift) + mapstart + 396 (SM_RUN_DECODE(entry) << mapshift), 397 (SM_RUN_DECODE(entry) << mapshift)); 398 if (SM_TYPE_DECODE(entry) == SM_ALLOC) 399 alloc += SM_RUN_DECODE(entry) << mapshift; 400 else 401 alloc -= SM_RUN_DECODE(entry) << mapshift; 402 } 403 } 404 if (alloc != smo->smo_alloc) { 405 (void) printf("space_map_object alloc (%llu) INCONSISTENT " 406 "with space map summary (%llu)\n", 407 (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc); 408 } 409 } 410 411 static void 412 dump_metaslab(metaslab_t *msp) 413 { 414 char freebuf[5]; 415 space_map_obj_t *smo = &msp->ms_smo; 416 vdev_t *vd = msp->ms_group->mg_vd; 417 spa_t *spa = vd->vdev_spa; 418 419 nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf); 420 421 if (dump_opt['d'] <= 5) { 422 (void) printf("\t%10llx %10llu %5s\n", 423 (u_longlong_t)msp->ms_map.sm_start, 424 (u_longlong_t)smo->smo_object, 425 freebuf); 426 return; 427 } 428 429 (void) printf( 430 "\tvdev %llu offset %08llx spacemap %4llu free %5s\n", 431 (u_longlong_t)vd->vdev_id, (u_longlong_t)msp->ms_map.sm_start, 432 (u_longlong_t)smo->smo_object, freebuf); 433 434 ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift)); 435 436 dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map); 437 } 438 439 static void 440 dump_metaslabs(spa_t *spa) 441 { 442 vdev_t *rvd = spa->spa_root_vdev; 443 vdev_t *vd; 444 int c, m; 445 446 (void) printf("\nMetaslabs:\n"); 447 448 for (c = 0; c < rvd->vdev_children; c++) { 449 vd = rvd->vdev_child[c]; 450 451 spa_config_enter(spa, RW_READER, FTAG); 452 (void) printf("\n vdev %llu = %s\n\n", 453 (u_longlong_t)vd->vdev_id, vdev_description(vd)); 454 spa_config_exit(spa, FTAG); 455 456 if (dump_opt['d'] <= 5) { 457 (void) printf("\t%10s %10s %5s\n", 458 "offset", "spacemap", "free"); 459 (void) printf("\t%10s %10s %5s\n", 460 "------", "--------", "----"); 461 } 462 for (m = 0; m < vd->vdev_ms_count; m++) 463 dump_metaslab(vd->vdev_ms[m]); 464 (void) printf("\n"); 465 } 466 } 467 468 static void 469 dump_dtl(vdev_t *vd, int indent) 470 { 471 avl_tree_t *t = &vd->vdev_dtl_map.sm_root; 472 spa_t *spa = vd->vdev_spa; 473 space_seg_t *ss; 474 vdev_t *pvd; 475 int c; 476 477 if (indent == 0) 478 (void) printf("\nDirty time logs:\n\n"); 479 480 spa_config_enter(spa, RW_READER, FTAG); 481 (void) printf("\t%*s%s\n", indent, "", vdev_description(vd)); 482 spa_config_exit(spa, FTAG); 483 484 for (ss = avl_first(t); ss; ss = AVL_NEXT(t, ss)) { 485 /* 486 * Everything in this DTL must appear in all parent DTL unions. 487 */ 488 for (pvd = vd; pvd; pvd = pvd->vdev_parent) 489 ASSERT(vdev_dtl_contains(&pvd->vdev_dtl_map, 490 ss->ss_start, ss->ss_end - ss->ss_start)); 491 (void) printf("\t%*soutage [%llu,%llu] length %llu\n", 492 indent, "", 493 (u_longlong_t)ss->ss_start, 494 (u_longlong_t)ss->ss_end - 1, 495 (u_longlong_t)ss->ss_end - ss->ss_start); 496 } 497 498 (void) printf("\n"); 499 500 if (dump_opt['d'] > 5 && vd->vdev_children == 0) { 501 dump_spacemap(vd->vdev_spa->spa_meta_objset, &vd->vdev_dtl, 502 &vd->vdev_dtl_map); 503 (void) printf("\n"); 504 } 505 506 for (c = 0; c < vd->vdev_children; c++) 507 dump_dtl(vd->vdev_child[c], indent + 4); 508 } 509 510 /*ARGSUSED*/ 511 static void 512 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size) 513 { 514 } 515 516 static uint64_t 517 blkid2offset(dnode_phys_t *dnp, int level, uint64_t blkid) 518 { 519 if (level < 0) 520 return (blkid); 521 522 return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) * 523 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); 524 } 525 526 /* ARGSUSED */ 527 static int 528 zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a) 529 { 530 zbookmark_t *zb = &bc->bc_bookmark; 531 blkptr_t *bp = &bc->bc_blkptr; 532 dva_t *dva = &bp->blk_dva[0]; 533 void *data = bc->bc_data; 534 dnode_phys_t *dnp = bc->bc_dnode; 535 char buffer[300]; 536 int l; 537 538 if (bc->bc_errno) { 539 (void) sprintf(buffer, 540 "Error %d reading <%llu, %llu, %lld, %llu>: ", 541 bc->bc_errno, 542 (u_longlong_t)zb->zb_objset, 543 (u_longlong_t)zb->zb_object, 544 (u_longlong_t)zb->zb_level, 545 (u_longlong_t)zb->zb_blkid); 546 goto out; 547 } 548 549 if (zb->zb_level == -1) { 550 ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); 551 ASSERT3U(BP_GET_LEVEL(bp), ==, 0); 552 } else { 553 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); 554 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); 555 } 556 557 if (zb->zb_level > 0) { 558 uint64_t fill = 0; 559 blkptr_t *bpx, *bpend; 560 561 for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx); 562 bpx < bpend; bpx++) { 563 if (bpx->blk_birth != 0) { 564 fill += bpx->blk_fill; 565 } else { 566 ASSERT(bpx->blk_fill == 0); 567 } 568 } 569 ASSERT3U(fill, ==, bp->blk_fill); 570 } 571 572 if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) { 573 uint64_t fill = 0; 574 dnode_phys_t *dnx, *dnend; 575 576 for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT); 577 dnx < dnend; dnx++) { 578 if (dnx->dn_type != DMU_OT_NONE) 579 fill++; 580 } 581 ASSERT3U(fill, ==, bp->blk_fill); 582 } 583 584 (void) sprintf(buffer, "%16llx ", 585 (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid)); 586 587 ASSERT(zb->zb_level >= 0); 588 589 for (l = dnp->dn_nlevels - 1; l >= -1; l--) { 590 if (l == zb->zb_level) { 591 (void) sprintf(buffer + strlen(buffer), "L%llx", 592 (u_longlong_t)zb->zb_level); 593 } else { 594 (void) sprintf(buffer + strlen(buffer), " "); 595 } 596 } 597 598 out: 599 if (bp->blk_birth == 0) { 600 (void) sprintf(buffer + strlen(buffer), "<hole>"); 601 (void) printf("%s\n", buffer); 602 } else { 603 // XXBP - Need to print number of active BPs here 604 (void) sprintf(buffer + strlen(buffer), 605 "vdev=%llu off=%llx %llxL/%llxP/%llxA F=%llu B=%llu", 606 (u_longlong_t)DVA_GET_VDEV(dva), 607 (u_longlong_t)DVA_GET_OFFSET(dva), 608 (u_longlong_t)BP_GET_LSIZE(bp), 609 (u_longlong_t)BP_GET_PSIZE(bp), 610 (u_longlong_t)DVA_GET_ASIZE(dva), 611 (u_longlong_t)bp->blk_fill, 612 (u_longlong_t)bp->blk_birth); 613 614 (void) printf("%s\n", buffer); 615 } 616 617 return (bc->bc_errno ? ERESTART : 0); 618 } 619 620 /*ARGSUSED*/ 621 static void 622 dump_indirect(objset_t *os, uint64_t object, void *data, size_t size) 623 { 624 traverse_handle_t *th; 625 uint64_t objset = dmu_objset_id(os); 626 int advance = zdb_advance; 627 628 (void) printf("Indirect blocks:\n"); 629 630 if (object == 0) 631 advance |= ADVANCE_DATA; 632 633 th = traverse_init(dmu_objset_spa(os), zdb_indirect_cb, NULL, advance, 634 ZIO_FLAG_CANFAIL); 635 th->th_noread = zdb_noread; 636 637 traverse_add_dnode(th, 0, -1ULL, objset, object); 638 639 while (traverse_more(th) == EAGAIN) 640 continue; 641 642 (void) printf("\n"); 643 644 traverse_fini(th); 645 } 646 647 /*ARGSUSED*/ 648 static void 649 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size) 650 { 651 dsl_dir_phys_t *dd = data; 652 time_t crtime; 653 char used[6], compressed[6], uncompressed[6], quota[6], resv[6]; 654 655 if (dd == NULL) 656 return; 657 658 ASSERT(size == sizeof (*dd)); 659 660 crtime = dd->dd_creation_time; 661 nicenum(dd->dd_used_bytes, used); 662 nicenum(dd->dd_compressed_bytes, compressed); 663 nicenum(dd->dd_uncompressed_bytes, uncompressed); 664 nicenum(dd->dd_quota, quota); 665 nicenum(dd->dd_reserved, resv); 666 667 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 668 (void) printf("\t\thead_dataset_obj = %llu\n", 669 (u_longlong_t)dd->dd_head_dataset_obj); 670 (void) printf("\t\tparent_dir_obj = %llu\n", 671 (u_longlong_t)dd->dd_parent_obj); 672 (void) printf("\t\tclone_parent_obj = %llu\n", 673 (u_longlong_t)dd->dd_clone_parent_obj); 674 (void) printf("\t\tchild_dir_zapobj = %llu\n", 675 (u_longlong_t)dd->dd_child_dir_zapobj); 676 (void) printf("\t\tused_bytes = %s\n", used); 677 (void) printf("\t\tcompressed_bytes = %s\n", compressed); 678 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); 679 (void) printf("\t\tquota = %s\n", quota); 680 (void) printf("\t\treserved = %s\n", resv); 681 (void) printf("\t\tprops_zapobj = %llu\n", 682 (u_longlong_t)dd->dd_props_zapobj); 683 } 684 685 /*ARGSUSED*/ 686 static void 687 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) 688 { 689 dsl_dataset_phys_t *ds = data; 690 time_t crtime; 691 char used[6], compressed[6], uncompressed[6], unique[6]; 692 char blkbuf[BP_SPRINTF_LEN]; 693 694 if (ds == NULL) 695 return; 696 697 ASSERT(size == sizeof (*ds)); 698 crtime = ds->ds_creation_time; 699 nicenum(ds->ds_used_bytes, used); 700 nicenum(ds->ds_compressed_bytes, compressed); 701 nicenum(ds->ds_uncompressed_bytes, uncompressed); 702 nicenum(ds->ds_unique_bytes, unique); 703 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ds->ds_bp); 704 705 (void) printf("\t\tdataset_obj = %llu\n", 706 (u_longlong_t)ds->ds_dir_obj); 707 (void) printf("\t\tprev_snap_obj = %llu\n", 708 (u_longlong_t)ds->ds_prev_snap_obj); 709 (void) printf("\t\tprev_snap_txg = %llu\n", 710 (u_longlong_t)ds->ds_prev_snap_txg); 711 (void) printf("\t\tnext_snap_obj = %llu\n", 712 (u_longlong_t)ds->ds_next_snap_obj); 713 (void) printf("\t\tsnapnames_zapobj = %llu\n", 714 (u_longlong_t)ds->ds_snapnames_zapobj); 715 (void) printf("\t\tnum_children = %llu\n", 716 (u_longlong_t)ds->ds_num_children); 717 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 718 (void) printf("\t\tcreation_txg = %llu\n", 719 (u_longlong_t)ds->ds_creation_txg); 720 (void) printf("\t\tdeadlist_obj = %llu\n", 721 (u_longlong_t)ds->ds_deadlist_obj); 722 (void) printf("\t\tused_bytes = %s\n", used); 723 (void) printf("\t\tcompressed_bytes = %s\n", compressed); 724 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); 725 (void) printf("\t\tunique = %s\n", unique); 726 (void) printf("\t\tfsid_guid = %llu\n", 727 (u_longlong_t)ds->ds_fsid_guid); 728 (void) printf("\t\tguid = %llu\n", 729 (u_longlong_t)ds->ds_guid); 730 (void) printf("\t\tinconsistent = %llu\n", 731 (u_longlong_t)ds->ds_inconsistent); 732 (void) printf("\t\tbp = %s\n", blkbuf); 733 } 734 735 static void 736 dump_bplist(objset_t *mos, uint64_t object, char *name) 737 { 738 bplist_t bpl = { 0 }; 739 blkptr_t blk, *bp = &blk; 740 uint64_t itor = 0; 741 char numbuf[6]; 742 743 if (dump_opt['d'] < 3) 744 return; 745 746 VERIFY(0 == bplist_open(&bpl, mos, object)); 747 if (bplist_empty(&bpl)) { 748 bplist_close(&bpl); 749 return; 750 } 751 752 nicenum(bpl.bpl_phys->bpl_bytes, numbuf); 753 754 (void) printf("\n %s: %llu entries, %s\n", 755 name, (u_longlong_t)bpl.bpl_phys->bpl_entries, numbuf); 756 757 if (dump_opt['d'] < 5) { 758 bplist_close(&bpl); 759 return; 760 } 761 762 (void) printf("\n"); 763 764 while (bplist_iterate(&bpl, &itor, bp) == 0) { 765 ASSERT(bp->blk_birth != 0); 766 // XXBP - Do we want to see all DVAs, or just one? 767 (void) printf("\tItem %3llu: vdev=%llu off=%llx " 768 "%llxL/%llxP/%llxA F=%llu B=%llu\n", 769 (u_longlong_t)itor - 1, 770 (u_longlong_t)DVA_GET_VDEV(&bp->blk_dva[0]), 771 (u_longlong_t)DVA_GET_OFFSET(&bp->blk_dva[0]), 772 (u_longlong_t)BP_GET_LSIZE(bp), 773 (u_longlong_t)BP_GET_PSIZE(bp), 774 (u_longlong_t)DVA_GET_ASIZE(&bp->blk_dva[0]), 775 (u_longlong_t)bp->blk_fill, 776 (u_longlong_t)bp->blk_birth); 777 } 778 779 bplist_close(&bpl); 780 } 781 782 static char * 783 znode_path(objset_t *os, uint64_t object, char *pathbuf, size_t size) 784 { 785 dmu_buf_t *db; 786 dmu_object_info_t doi; 787 znode_phys_t *zp; 788 uint64_t parent = 0; 789 size_t complen; 790 char component[MAXNAMELEN + 1]; 791 char *path; 792 int error; 793 794 path = pathbuf + size; 795 *--path = '\0'; 796 797 for (;;) { 798 error = dmu_bonus_hold(os, object, FTAG, &db); 799 if (error) 800 break; 801 802 dmu_object_info_from_db(db, &doi); 803 zp = db->db_data; 804 parent = zp->zp_parent; 805 dmu_buf_rele(db, FTAG); 806 807 if (doi.doi_bonus_type != DMU_OT_ZNODE) 808 break; 809 810 if (parent == object) { 811 if (path[0] != '/') 812 *--path = '/'; 813 return (path); 814 } 815 816 if (zap_value_search(os, parent, object, component) != 0) 817 break; 818 819 complen = strlen(component); 820 path -= complen; 821 bcopy(component, path, complen); 822 *--path = '/'; 823 824 object = parent; 825 } 826 827 (void) sprintf(component, "???<object#%llu>", (u_longlong_t)object); 828 829 complen = strlen(component); 830 path -= complen; 831 bcopy(component, path, complen); 832 833 return (path); 834 } 835 836 /*ARGSUSED*/ 837 static void 838 dump_znode(objset_t *os, uint64_t object, void *data, size_t size) 839 { 840 znode_phys_t *zp = data; 841 time_t z_crtime, z_atime, z_mtime, z_ctime; 842 char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */ 843 844 ASSERT(size >= sizeof (znode_phys_t)); 845 846 if (dump_opt['d'] < 3) { 847 (void) printf("\t%s\n", 848 znode_path(os, object, path, sizeof (path))); 849 return; 850 } 851 852 z_crtime = (time_t)zp->zp_crtime[0]; 853 z_atime = (time_t)zp->zp_atime[0]; 854 z_mtime = (time_t)zp->zp_mtime[0]; 855 z_ctime = (time_t)zp->zp_ctime[0]; 856 857 (void) printf("\tpath %s\n", 858 znode_path(os, object, path, sizeof (path))); 859 (void) printf("\tatime %s", ctime(&z_atime)); 860 (void) printf("\tmtime %s", ctime(&z_mtime)); 861 (void) printf("\tctime %s", ctime(&z_ctime)); 862 (void) printf("\tcrtime %s", ctime(&z_crtime)); 863 (void) printf("\tgen %llu\n", (u_longlong_t)zp->zp_gen); 864 (void) printf("\tmode %llo\n", (u_longlong_t)zp->zp_mode); 865 (void) printf("\tsize %llu\n", (u_longlong_t)zp->zp_size); 866 (void) printf("\tparent %llu\n", (u_longlong_t)zp->zp_parent); 867 (void) printf("\tlinks %llu\n", (u_longlong_t)zp->zp_links); 868 (void) printf("\txattr %llu\n", (u_longlong_t)zp->zp_xattr); 869 (void) printf("\trdev 0x%016llx\n", (u_longlong_t)zp->zp_rdev); 870 } 871 872 /*ARGSUSED*/ 873 static void 874 dump_acl(objset_t *os, uint64_t object, void *data, size_t size) 875 { 876 } 877 878 /*ARGSUSED*/ 879 static void 880 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size) 881 { 882 } 883 884 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = { 885 dump_none, /* unallocated */ 886 dump_zap, /* object directory */ 887 dump_uint64, /* object array */ 888 dump_none, /* packed nvlist */ 889 dump_packed_nvlist, /* packed nvlist size */ 890 dump_none, /* bplist */ 891 dump_none, /* bplist header */ 892 dump_none, /* SPA space map header */ 893 dump_none, /* SPA space map */ 894 dump_none, /* ZIL intent log */ 895 dump_dnode, /* DMU dnode */ 896 dump_dmu_objset, /* DMU objset */ 897 dump_dsl_dir, /* DSL directory */ 898 dump_zap, /* DSL directory child map */ 899 dump_zap, /* DSL dataset snap map */ 900 dump_zap, /* DSL props */ 901 dump_dsl_dataset, /* DSL dataset */ 902 dump_znode, /* ZFS znode */ 903 dump_acl, /* ZFS ACL */ 904 dump_uint8, /* ZFS plain file */ 905 dump_zap, /* ZFS directory */ 906 dump_zap, /* ZFS master node */ 907 dump_zap, /* ZFS delete queue */ 908 dump_uint8, /* zvol object */ 909 dump_zap, /* zvol prop */ 910 dump_uint8, /* other uint8[] */ 911 dump_uint64, /* other uint64[] */ 912 dump_zap, /* other ZAP */ 913 dump_zap, /* persistent error log */ 914 }; 915 916 static void 917 dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) 918 { 919 dmu_buf_t *db = NULL; 920 dmu_object_info_t doi; 921 dnode_t *dn; 922 void *bonus = NULL; 923 size_t bsize = 0; 924 char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], segsize[6]; 925 char aux[50]; 926 int error; 927 928 if (*print_header) { 929 (void) printf("\n Object lvl iblk dblk lsize" 930 " asize type\n"); 931 *print_header = 0; 932 } 933 934 if (object == 0) { 935 dn = os->os->os_meta_dnode; 936 } else { 937 error = dmu_bonus_hold(os, object, FTAG, &db); 938 if (error) 939 fatal("dmu_bonus_hold(%llu) failed, errno %u", 940 object, error); 941 bonus = db->db_data; 942 bsize = db->db_size; 943 dn = ((dmu_buf_impl_t *)db)->db_dnode; 944 } 945 dmu_object_info_from_dnode(dn, &doi); 946 947 nicenum(doi.doi_metadata_block_size, iblk); 948 nicenum(doi.doi_data_block_size, dblk); 949 nicenum(doi.doi_data_block_size * (doi.doi_max_block_offset + 1), 950 lsize); 951 nicenum(doi.doi_physical_blks << 9, asize); 952 nicenum(doi.doi_bonus_size, bonus_size); 953 954 aux[0] = '\0'; 955 956 if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) 957 (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)", 958 zio_checksum_table[doi.doi_checksum].ci_name); 959 960 if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) 961 (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)", 962 zio_compress_table[doi.doi_compress].ci_name); 963 964 (void) printf("%10lld %3u %5s %5s %5s %5s %s%s\n", 965 (u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize, 966 asize, dmu_ot[doi.doi_type].ot_name, aux); 967 968 if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) { 969 (void) printf("%10s %3s %5s %5s %5s %5s %s\n", 970 "", "", "", "", bonus_size, "bonus", 971 dmu_ot[doi.doi_bonus_type].ot_name); 972 } 973 974 if (verbosity >= 4) { 975 object_viewer[doi.doi_bonus_type](os, object, bonus, bsize); 976 object_viewer[doi.doi_type](os, object, NULL, 0); 977 *print_header = 1; 978 } 979 980 if (verbosity >= 5) 981 dump_indirect(os, object, NULL, 0); 982 983 if (verbosity >= 5) { 984 /* 985 * Report the list of segments that comprise the object. 986 */ 987 uint64_t start = 0; 988 uint64_t end; 989 uint64_t blkfill = 1; 990 int minlvl = 1; 991 992 if (dn->dn_type == DMU_OT_DNODE) { 993 minlvl = 0; 994 blkfill = DNODES_PER_BLOCK; 995 } 996 997 for (;;) { 998 error = dnode_next_offset(dn, B_FALSE, &start, minlvl, 999 blkfill); 1000 if (error) 1001 break; 1002 end = start; 1003 error = dnode_next_offset(dn, B_TRUE, &end, minlvl, 1004 blkfill); 1005 nicenum(end - start, segsize); 1006 (void) printf("\t\tsegment [%016llx, %016llx)" 1007 " size %5s\n", (u_longlong_t)start, 1008 (u_longlong_t)end, segsize); 1009 if (error) 1010 break; 1011 start = end; 1012 } 1013 } 1014 1015 if (db != NULL) 1016 dmu_buf_rele(db, FTAG); 1017 } 1018 1019 static char *objset_types[DMU_OST_NUMTYPES] = { 1020 "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" }; 1021 1022 /*ARGSUSED*/ 1023 static void 1024 dump_dir(objset_t *os) 1025 { 1026 dmu_objset_stats_t dds; 1027 uint64_t object, object_count; 1028 char numbuf[8]; 1029 char blkbuf[BP_SPRINTF_LEN]; 1030 char osname[MAXNAMELEN]; 1031 char *type = "UNKNOWN"; 1032 int verbosity = dump_opt['d']; 1033 int print_header = 1; 1034 int i, error; 1035 1036 dmu_objset_stats(os, &dds); 1037 1038 if (dds.dds_type < DMU_OST_NUMTYPES) 1039 type = objset_types[dds.dds_type]; 1040 1041 if (dds.dds_type == DMU_OST_META) { 1042 dds.dds_creation_txg = TXG_INITIAL; 1043 dds.dds_last_txg = os->os->os_rootbp.blk_birth; 1044 dds.dds_objects_used = os->os->os_rootbp.blk_fill; 1045 dds.dds_space_refd = 1046 os->os->os_spa->spa_dsl_pool->dp_mos_dir->dd_used_bytes; 1047 } 1048 1049 ASSERT3U(dds.dds_objects_used, ==, os->os->os_rootbp.blk_fill); 1050 1051 nicenum(dds.dds_space_refd, numbuf); 1052 1053 if (verbosity >= 4) { 1054 (void) strcpy(blkbuf, ", rootbp "); 1055 sprintf_blkptr(blkbuf + strlen(blkbuf), 1056 BP_SPRINTF_LEN - strlen(blkbuf), &os->os->os_rootbp); 1057 } else { 1058 blkbuf[0] = '\0'; 1059 } 1060 1061 dmu_objset_name(os, osname); 1062 1063 (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, last_txg %llu, " 1064 "%s, %llu objects%s\n", 1065 osname, type, (u_longlong_t)dmu_objset_id(os), 1066 (u_longlong_t)dds.dds_creation_txg, 1067 (u_longlong_t)dds.dds_last_txg, 1068 numbuf, 1069 (u_longlong_t)dds.dds_objects_used, 1070 blkbuf); 1071 1072 dump_intent_log(dmu_objset_zil(os)); 1073 1074 if (dmu_objset_ds(os) != NULL) 1075 dump_bplist(dmu_objset_pool(os)->dp_meta_objset, 1076 dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist"); 1077 1078 if (verbosity < 2) 1079 return; 1080 1081 if (zopt_objects != 0) { 1082 for (i = 0; i < zopt_objects; i++) 1083 dump_object(os, zopt_object[i], verbosity, 1084 &print_header); 1085 (void) printf("\n"); 1086 return; 1087 } 1088 1089 dump_object(os, 0, verbosity, &print_header); 1090 object_count = 1; 1091 1092 object = 0; 1093 while ((error = dmu_object_next(os, &object, B_FALSE)) == 0) { 1094 dump_object(os, object, verbosity, &print_header); 1095 object_count++; 1096 } 1097 1098 ASSERT3U(object_count, ==, dds.dds_objects_used); 1099 1100 (void) printf("\n"); 1101 1102 if (error != ESRCH) 1103 fatal("dmu_object_next() = %d", error); 1104 } 1105 1106 static void 1107 dump_uberblock(uberblock_t *ub) 1108 { 1109 time_t timestamp = ub->ub_timestamp; 1110 1111 (void) printf("Uberblock\n\n"); 1112 (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic); 1113 (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version); 1114 (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg); 1115 (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum); 1116 (void) printf("\ttimestamp = %llu UTC = %s", 1117 (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp))); 1118 if (dump_opt['u'] >= 3) { 1119 char blkbuf[BP_SPRINTF_LEN]; 1120 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ub->ub_rootbp); 1121 (void) printf("\trootbp = %s\n", blkbuf); 1122 } 1123 (void) printf("\n"); 1124 } 1125 1126 static void 1127 dump_config(const char *pool) 1128 { 1129 spa_t *spa = NULL; 1130 1131 mutex_enter(&spa_namespace_lock); 1132 while ((spa = spa_next(spa)) != NULL) { 1133 if (pool == NULL) 1134 (void) printf("%s\n", spa_name(spa)); 1135 if (pool == NULL || strcmp(pool, spa_name(spa)) == 0) 1136 dump_nvlist(spa->spa_config, 4); 1137 } 1138 mutex_exit(&spa_namespace_lock); 1139 } 1140 1141 static void 1142 dump_label(const char *dev) 1143 { 1144 int fd; 1145 vdev_label_t label; 1146 char *buf = label.vl_vdev_phys.vp_nvlist; 1147 size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist); 1148 struct stat64 statbuf; 1149 uint64_t psize; 1150 int l; 1151 1152 if ((fd = open64(dev, O_RDONLY)) < 0) { 1153 (void) printf("cannot open '%s': %s\n", dev, strerror(errno)); 1154 exit(1); 1155 } 1156 1157 if (fstat64(fd, &statbuf) != 0) { 1158 (void) printf("failed to stat '%s': %s\n", dev, 1159 strerror(errno)); 1160 exit(1); 1161 } 1162 1163 psize = statbuf.st_size; 1164 psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t)); 1165 1166 for (l = 0; l < VDEV_LABELS; l++) { 1167 1168 nvlist_t *config = NULL; 1169 1170 (void) printf("--------------------------------------------\n"); 1171 (void) printf("LABEL %d\n", l); 1172 (void) printf("--------------------------------------------\n"); 1173 1174 if (pread64(fd, &label, sizeof (label), 1175 vdev_label_offset(psize, l, 0)) != sizeof (label)) { 1176 (void) printf("failed to read label %d\n", l); 1177 continue; 1178 } 1179 1180 if (nvlist_unpack(buf, buflen, &config, 0) != 0) { 1181 (void) printf("failed to unpack label %d\n", l); 1182 continue; 1183 } 1184 dump_nvlist(config, 4); 1185 nvlist_free(config); 1186 } 1187 } 1188 1189 /*ARGSUSED*/ 1190 static void 1191 dump_one_dir(char *dsname, void *arg) 1192 { 1193 int error; 1194 objset_t *os; 1195 1196 error = dmu_objset_open(dsname, DMU_OST_ANY, 1197 DS_MODE_STANDARD | DS_MODE_READONLY, &os); 1198 if (error) { 1199 (void) printf("Could not open %s\n", dsname); 1200 return; 1201 } 1202 dump_dir(os); 1203 dmu_objset_close(os); 1204 } 1205 1206 static void 1207 zdb_space_map_load(spa_t *spa) 1208 { 1209 vdev_t *rvd = spa->spa_root_vdev; 1210 vdev_t *vd; 1211 int c, m, error; 1212 1213 for (c = 0; c < rvd->vdev_children; c++) { 1214 vd = rvd->vdev_child[c]; 1215 for (m = 0; m < vd->vdev_ms_count; m++) { 1216 metaslab_t *msp = vd->vdev_ms[m]; 1217 mutex_enter(&msp->ms_lock); 1218 error = space_map_load(&msp->ms_allocmap[0], NULL, 1219 SM_ALLOC, &msp->ms_smo, spa->spa_meta_objset); 1220 mutex_exit(&msp->ms_lock); 1221 if (error) 1222 fatal("%s bad space map #%d, error %d", 1223 spa->spa_name, c, error); 1224 } 1225 } 1226 } 1227 1228 static int 1229 zdb_space_map_claim(spa_t *spa, blkptr_t *bp, zbookmark_t *zb) 1230 { 1231 dva_t *dva = &bp->blk_dva[0]; 1232 uint64_t vdev = DVA_GET_VDEV(dva); 1233 uint64_t offset = DVA_GET_OFFSET(dva); 1234 uint64_t size = DVA_GET_ASIZE(dva); 1235 vdev_t *vd; 1236 metaslab_t *msp; 1237 space_map_t *allocmap, *freemap; 1238 int error; 1239 1240 if ((vd = vdev_lookup_top(spa, vdev)) == NULL) 1241 return (ENXIO); 1242 1243 if ((offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) 1244 return (ENXIO); 1245 1246 if (DVA_GET_GANG(dva)) { 1247 zio_gbh_phys_t gbh; 1248 blkptr_t blk = *bp; 1249 int g; 1250 1251 /* LINTED - compile time assert */ 1252 ASSERT(sizeof (zio_gbh_phys_t) == SPA_GANGBLOCKSIZE); 1253 size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE); 1254 DVA_SET_GANG(&blk.blk_dva[0], 0); 1255 DVA_SET_ASIZE(&blk.blk_dva[0], size); 1256 BP_SET_CHECKSUM(&blk, ZIO_CHECKSUM_GANG_HEADER); 1257 BP_SET_PSIZE(&blk, SPA_GANGBLOCKSIZE); 1258 BP_SET_LSIZE(&blk, SPA_GANGBLOCKSIZE); 1259 BP_SET_COMPRESS(&blk, ZIO_COMPRESS_OFF); 1260 error = zio_wait(zio_read(NULL, spa, &blk, 1261 &gbh, SPA_GANGBLOCKSIZE, NULL, NULL, 1262 ZIO_PRIORITY_SYNC_READ, 1263 ZIO_FLAG_CANFAIL | ZIO_FLAG_CONFIG_HELD, zb)); 1264 if (error) 1265 return (error); 1266 if (BP_SHOULD_BYTESWAP(&blk)) 1267 byteswap_uint64_array(&gbh, SPA_GANGBLOCKSIZE); 1268 for (g = 0; g < SPA_GBH_NBLKPTRS; g++) { 1269 if (gbh.zg_blkptr[g].blk_birth == 0) 1270 break; 1271 error = zdb_space_map_claim(spa, &gbh.zg_blkptr[g], zb); 1272 if (error) 1273 return (error); 1274 } 1275 } 1276 1277 msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 1278 allocmap = &msp->ms_allocmap[0]; 1279 freemap = &msp->ms_freemap[0]; 1280 1281 mutex_enter(&msp->ms_lock); 1282 if (space_map_contains(freemap, offset, size)) { 1283 mutex_exit(&msp->ms_lock); 1284 return (EAGAIN); /* allocated more than once */ 1285 } 1286 1287 if (!space_map_contains(allocmap, offset, size)) { 1288 mutex_exit(&msp->ms_lock); 1289 return (ESTALE); /* not allocated at all */ 1290 } 1291 1292 space_map_remove(allocmap, offset, size); 1293 space_map_add(freemap, offset, size); 1294 1295 mutex_exit(&msp->ms_lock); 1296 1297 return (0); 1298 } 1299 1300 static void 1301 zdb_leak(space_map_t *sm, uint64_t start, uint64_t size) 1302 { 1303 metaslab_t *msp; 1304 1305 /* LINTED */ 1306 msp = (metaslab_t *)((char *)sm - offsetof(metaslab_t, ms_allocmap[0])); 1307 1308 (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n", 1309 (u_longlong_t)msp->ms_group->mg_vd->vdev_id, 1310 (u_longlong_t)start, 1311 (u_longlong_t)size); 1312 } 1313 1314 static void 1315 zdb_space_map_unload(spa_t *spa) 1316 { 1317 vdev_t *rvd = spa->spa_root_vdev; 1318 vdev_t *vd; 1319 int c, m; 1320 1321 for (c = 0; c < rvd->vdev_children; c++) { 1322 vd = rvd->vdev_child[c]; 1323 for (m = 0; m < vd->vdev_ms_count; m++) { 1324 metaslab_t *msp = vd->vdev_ms[m]; 1325 mutex_enter(&msp->ms_lock); 1326 space_map_vacate(&msp->ms_allocmap[0], zdb_leak, 1327 &msp->ms_allocmap[0]); 1328 space_map_unload(&msp->ms_allocmap[0]); 1329 space_map_vacate(&msp->ms_freemap[0], NULL, NULL); 1330 mutex_exit(&msp->ms_lock); 1331 } 1332 } 1333 } 1334 1335 static void 1336 zdb_refresh_ubsync(spa_t *spa) 1337 { 1338 uberblock_t ub = { 0 }; 1339 vdev_t *rvd = spa->spa_root_vdev; 1340 zio_t *zio; 1341 1342 /* 1343 * Reload the uberblock. 1344 */ 1345 zio = zio_root(spa, NULL, NULL, 1346 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 1347 vdev_uberblock_load(zio, rvd, &ub); 1348 (void) zio_wait(zio); 1349 1350 if (ub.ub_txg != 0) 1351 spa->spa_ubsync = ub; 1352 } 1353 1354 /* 1355 * Verify that the sum of the sizes of all blocks in the pool adds up 1356 * to the SPA's sa_alloc total. 1357 */ 1358 typedef struct zdb_blkstats { 1359 uint64_t zb_asize; 1360 uint64_t zb_lsize; 1361 uint64_t zb_psize; 1362 uint64_t zb_count; 1363 } zdb_blkstats_t; 1364 1365 #define DMU_OT_DEFERRED DMU_OT_NONE 1366 #define DMU_OT_TOTAL DMU_OT_NUMTYPES 1367 1368 #define ZB_TOTAL ZB_MAXLEVEL 1369 1370 typedef struct zdb_cb { 1371 zdb_blkstats_t zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1]; 1372 uint64_t zcb_errors[256]; 1373 traverse_blk_cache_t *zcb_cache; 1374 int zcb_readfails; 1375 int zcb_haderrors; 1376 } zdb_cb_t; 1377 1378 static void 1379 zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type) 1380 { 1381 int i, error; 1382 1383 for (i = 0; i < 4; i++) { 1384 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; 1385 int t = (i & 1) ? type : DMU_OT_TOTAL; 1386 zdb_blkstats_t *zb = &zcb->zcb_type[l][t]; 1387 1388 zb->zb_asize += BP_GET_ASIZE(bp); 1389 zb->zb_lsize += BP_GET_LSIZE(bp); 1390 zb->zb_psize += BP_GET_PSIZE(bp); 1391 zb->zb_count++; 1392 } 1393 1394 if (dump_opt['L']) 1395 return; 1396 1397 error = zdb_space_map_claim(spa, bp, &zcb->zcb_cache->bc_bookmark); 1398 1399 if (error == 0) 1400 return; 1401 1402 if (error == EAGAIN) 1403 (void) fatal("double-allocation, bp=%p", bp); 1404 1405 if (error == ESTALE) 1406 (void) fatal("reference to freed block, bp=%p", bp); 1407 1408 (void) fatal("fatal error %d in bp %p", error, bp); 1409 } 1410 1411 static int 1412 zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 1413 { 1414 zbookmark_t *zb = &bc->bc_bookmark; 1415 zdb_cb_t *zcb = arg; 1416 blkptr_t *bp = &bc->bc_blkptr; 1417 dmu_object_type_t type = BP_GET_TYPE(bp); 1418 char blkbuf[BP_SPRINTF_LEN]; 1419 int error = 0; 1420 1421 if (bc->bc_errno) { 1422 if (zcb->zcb_readfails++ < 10 && dump_opt['L']) { 1423 zdb_refresh_ubsync(spa); 1424 error = EAGAIN; 1425 } else { 1426 zcb->zcb_haderrors = 1; 1427 zcb->zcb_errors[bc->bc_errno]++; 1428 error = ERESTART; 1429 } 1430 1431 if (dump_opt['b'] >= 3 || (dump_opt['b'] >= 2 && bc->bc_errno)) 1432 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp); 1433 else 1434 blkbuf[0] = '\0'; 1435 1436 (void) printf("zdb_blkptr_cb: Got error %d reading " 1437 "<%llu, %llu, %lld, %llx> %s -- %s\n", 1438 bc->bc_errno, 1439 (u_longlong_t)zb->zb_objset, 1440 (u_longlong_t)zb->zb_object, 1441 (u_longlong_t)zb->zb_level, 1442 (u_longlong_t)zb->zb_blkid, 1443 blkbuf, 1444 error == EAGAIN ? "retrying" : "skipping"); 1445 1446 return (error); 1447 } 1448 1449 zcb->zcb_readfails = 0; 1450 1451 ASSERT(bp->blk_birth != 0); 1452 1453 zdb_count_block(spa, zcb, bp, type); 1454 1455 if (dump_opt['b'] >= 4) { 1456 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp); 1457 (void) printf("objset %llu object %llu offset 0x%llx %s\n", 1458 (u_longlong_t)zb->zb_objset, 1459 (u_longlong_t)zb->zb_object, 1460 (u_longlong_t)blkid2offset(bc->bc_dnode, 1461 zb->zb_level, zb->zb_blkid), 1462 blkbuf); 1463 } 1464 1465 return (0); 1466 } 1467 1468 static int 1469 dump_block_stats(spa_t *spa) 1470 { 1471 traverse_handle_t *th; 1472 zdb_cb_t zcb = { 0 }; 1473 traverse_blk_cache_t dummy_cache = { 0 }; 1474 zdb_blkstats_t *zb, *tzb; 1475 uint64_t alloc, space; 1476 int leaks = 0; 1477 int advance = zdb_advance; 1478 int flags; 1479 int e; 1480 1481 zcb.zcb_cache = &dummy_cache; 1482 1483 if (dump_opt['c']) 1484 advance |= ADVANCE_DATA; 1485 1486 advance |= ADVANCE_PRUNE | ADVANCE_ZIL; 1487 1488 (void) printf("\nTraversing all blocks to %sverify" 1489 " nothing leaked ...\n", 1490 dump_opt['c'] ? "verify checksums and " : ""); 1491 1492 /* 1493 * Load all space maps. As we traverse the pool, if we find a block 1494 * that's not in its space map, that indicates a double-allocation, 1495 * reference to a freed block, or an unclaimed block. Otherwise we 1496 * remove the block from the space map. If the space maps are not 1497 * empty when we're done, that indicates leaked blocks. 1498 */ 1499 if (!dump_opt['L']) 1500 zdb_space_map_load(spa); 1501 1502 /* 1503 * If there's a deferred-free bplist, process that first. 1504 */ 1505 if (spa->spa_sync_bplist_obj != 0) { 1506 bplist_t *bpl = &spa->spa_sync_bplist; 1507 blkptr_t blk; 1508 uint64_t itor = 0; 1509 1510 VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset, 1511 spa->spa_sync_bplist_obj)); 1512 1513 while (bplist_iterate(bpl, &itor, &blk) == 0) { 1514 zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED); 1515 if (dump_opt['b'] >= 4) { 1516 char blkbuf[BP_SPRINTF_LEN]; 1517 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &blk); 1518 (void) printf("[%s] %s\n", 1519 "deferred free", blkbuf); 1520 } 1521 } 1522 1523 bplist_close(bpl); 1524 } 1525 1526 /* 1527 * Now traverse the pool. If we're reading all data to verify 1528 * checksums, do a scrubbing read so that we validate all copies. 1529 */ 1530 flags = ZIO_FLAG_CANFAIL; 1531 if (advance & ADVANCE_DATA) 1532 flags |= ZIO_FLAG_SCRUB; 1533 th = traverse_init(spa, zdb_blkptr_cb, &zcb, advance, flags); 1534 th->th_noread = zdb_noread; 1535 1536 traverse_add_pool(th, 0, spa_first_txg(spa) + TXG_CONCURRENT_STATES); 1537 1538 while (traverse_more(th) == EAGAIN) 1539 continue; 1540 1541 traverse_fini(th); 1542 1543 if (zcb.zcb_haderrors) { 1544 (void) printf("\nError counts:\n\n"); 1545 (void) printf("\t%5s %s\n", "errno", "count"); 1546 for (e = 0; e < 256; e++) { 1547 if (zcb.zcb_errors[e] != 0) { 1548 (void) printf("\t%5d %llu\n", 1549 e, (u_longlong_t)zcb.zcb_errors[e]); 1550 } 1551 } 1552 } 1553 1554 /* 1555 * Report any leaked segments. 1556 */ 1557 if (!dump_opt['L']) 1558 zdb_space_map_unload(spa); 1559 1560 if (dump_opt['L']) 1561 (void) printf("\n\n *** Live pool traversal; " 1562 "block counts are only approximate ***\n\n"); 1563 1564 alloc = spa_get_alloc(spa); 1565 space = spa_get_space(spa); 1566 1567 tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL]; 1568 1569 if (tzb->zb_asize == alloc) { 1570 (void) printf("\n\tNo leaks (block sum matches space" 1571 " maps exactly)\n"); 1572 } else { 1573 (void) printf("block traversal size %llu != alloc %llu " 1574 "(leaked %lld)\n", 1575 (u_longlong_t)tzb->zb_asize, 1576 (u_longlong_t)alloc, 1577 (u_longlong_t)(alloc - tzb->zb_asize)); 1578 leaks = 1; 1579 } 1580 1581 if (tzb->zb_count == 0) 1582 return (2); 1583 1584 (void) printf("\n"); 1585 (void) printf("\tbp count: %10llu\n", 1586 (u_longlong_t)tzb->zb_count); 1587 (void) printf("\tbp logical: %10llu\t avg: %6llu\n", 1588 (u_longlong_t)tzb->zb_lsize, 1589 (u_longlong_t)(tzb->zb_lsize / tzb->zb_count)); 1590 (void) printf("\tbp physical: %10llu\t avg:" 1591 " %6llu\tcompression: %6.2f\n", 1592 (u_longlong_t)tzb->zb_psize, 1593 (u_longlong_t)(tzb->zb_psize / tzb->zb_count), 1594 (double)tzb->zb_lsize / tzb->zb_psize); 1595 (void) printf("\tbp allocated: %10llu\t avg:" 1596 " %6llu\tcompression: %6.2f\n", 1597 (u_longlong_t)tzb->zb_asize, 1598 (u_longlong_t)(tzb->zb_asize / tzb->zb_count), 1599 (double)tzb->zb_lsize / tzb->zb_asize); 1600 (void) printf("\tSPA allocated: %10llu\tused: %5.2f%%\n", 1601 (u_longlong_t)alloc, 100.0 * alloc / space); 1602 1603 if (dump_opt['b'] >= 2) { 1604 int l, t, level; 1605 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" 1606 "\t avg\t comp\t%%Total\tType\n"); 1607 1608 for (t = 0; t <= DMU_OT_NUMTYPES; t++) { 1609 char csize[6], lsize[6], psize[6], asize[6], avg[6]; 1610 char *typename; 1611 1612 typename = t == DMU_OT_DEFERRED ? "deferred free" : 1613 t == DMU_OT_TOTAL ? "Total" : dmu_ot[t].ot_name; 1614 1615 if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) { 1616 (void) printf("%6s\t%5s\t%5s\t%5s" 1617 "\t%5s\t%5s\t%6s\t%s\n", 1618 "-", 1619 "-", 1620 "-", 1621 "-", 1622 "-", 1623 "-", 1624 "-", 1625 typename); 1626 continue; 1627 } 1628 1629 for (l = ZB_TOTAL - 1; l >= -1; l--) { 1630 level = (l == -1 ? ZB_TOTAL : l); 1631 zb = &zcb.zcb_type[level][t]; 1632 1633 if (zb->zb_asize == 0) 1634 continue; 1635 1636 if (dump_opt['b'] < 3 && level != ZB_TOTAL) 1637 continue; 1638 1639 if (level == 0 && zb->zb_asize == 1640 zcb.zcb_type[ZB_TOTAL][t].zb_asize) 1641 continue; 1642 1643 nicenum(zb->zb_count, csize); 1644 nicenum(zb->zb_lsize, lsize); 1645 nicenum(zb->zb_psize, psize); 1646 nicenum(zb->zb_asize, asize); 1647 nicenum(zb->zb_asize / zb->zb_count, avg); 1648 1649 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s" 1650 "\t%5.2f\t%6.2f\t", 1651 csize, lsize, psize, asize, avg, 1652 (double)zb->zb_lsize / zb->zb_psize, 1653 100.0 * zb->zb_asize / tzb->zb_asize); 1654 1655 if (level == ZB_TOTAL) 1656 (void) printf("%s\n", typename); 1657 else 1658 (void) printf(" L%d %s\n", 1659 level, typename); 1660 } 1661 } 1662 } 1663 1664 (void) printf("\n"); 1665 1666 if (leaks) 1667 return (2); 1668 1669 if (zcb.zcb_haderrors) 1670 return (3); 1671 1672 return (0); 1673 } 1674 1675 static void 1676 dump_zpool(spa_t *spa) 1677 { 1678 dsl_pool_t *dp = spa_get_dsl(spa); 1679 int rc = 0; 1680 1681 if (dump_opt['u']) 1682 dump_uberblock(&spa->spa_uberblock); 1683 1684 if (dump_opt['d'] || dump_opt['i']) { 1685 dump_dir(dp->dp_meta_objset); 1686 if (dump_opt['d'] >= 3) { 1687 dump_bplist(dp->dp_meta_objset, 1688 spa->spa_sync_bplist_obj, "Deferred frees"); 1689 dump_dtl(spa->spa_root_vdev, 0); 1690 dump_metaslabs(spa); 1691 } 1692 dmu_objset_find(spa->spa_name, dump_one_dir, NULL, 1693 DS_FIND_SNAPSHOTS); 1694 } 1695 1696 if (dump_opt['b'] || dump_opt['c']) 1697 rc = dump_block_stats(spa); 1698 1699 if (dump_opt['s']) 1700 show_pool_stats(spa); 1701 1702 if (rc != 0) 1703 exit(rc); 1704 } 1705 1706 int 1707 main(int argc, char **argv) 1708 { 1709 int i, c; 1710 struct rlimit rl = { 1024, 1024 }; 1711 spa_t *spa; 1712 objset_t *os = NULL; 1713 char *endstr; 1714 int dump_all = 1; 1715 int verbose = 0; 1716 int error; 1717 int flag, set; 1718 vdev_knob_t *vk; 1719 1720 (void) setrlimit(RLIMIT_NOFILE, &rl); 1721 1722 dprintf_setup(&argc, argv); 1723 1724 while ((c = getopt(argc, argv, "udibcsvCLO:B:Ul")) != -1) { 1725 switch (c) { 1726 case 'u': 1727 case 'd': 1728 case 'i': 1729 case 'b': 1730 case 'c': 1731 case 's': 1732 case 'C': 1733 case 'l': 1734 dump_opt[c]++; 1735 dump_all = 0; 1736 break; 1737 case 'L': 1738 dump_opt[c]++; 1739 break; 1740 case 'O': 1741 endstr = optarg; 1742 if (endstr[0] == '!') { 1743 endstr++; 1744 set = 0; 1745 } else { 1746 set = 1; 1747 } 1748 if (strcmp(endstr, "post") == 0) { 1749 flag = ADVANCE_PRE; 1750 set = !set; 1751 } else if (strcmp(endstr, "pre") == 0) { 1752 flag = ADVANCE_PRE; 1753 } else if (strcmp(endstr, "prune") == 0) { 1754 flag = ADVANCE_PRUNE; 1755 } else if (strcmp(endstr, "data") == 0) { 1756 flag = ADVANCE_DATA; 1757 } else if (strcmp(endstr, "holes") == 0) { 1758 flag = ADVANCE_HOLES; 1759 } else { 1760 usage(); 1761 } 1762 if (set) 1763 zdb_advance |= flag; 1764 else 1765 zdb_advance &= ~flag; 1766 break; 1767 case 'B': 1768 endstr = optarg - 1; 1769 zdb_noread.zb_objset = strtoull(endstr + 1, &endstr, 0); 1770 zdb_noread.zb_object = strtoull(endstr + 1, &endstr, 0); 1771 zdb_noread.zb_level = strtol(endstr + 1, &endstr, 0); 1772 zdb_noread.zb_blkid = strtoull(endstr + 1, &endstr, 16); 1773 (void) printf("simulating bad block " 1774 "<%llu, %llu, %lld, %llx>\n", 1775 (u_longlong_t)zdb_noread.zb_objset, 1776 (u_longlong_t)zdb_noread.zb_object, 1777 (u_longlong_t)zdb_noread.zb_level, 1778 (u_longlong_t)zdb_noread.zb_blkid); 1779 break; 1780 case 'v': 1781 verbose++; 1782 break; 1783 case 'U': 1784 spa_config_dir = "/tmp"; 1785 break; 1786 default: 1787 usage(); 1788 break; 1789 } 1790 } 1791 1792 kernel_init(FREAD); 1793 1794 /* 1795 * Disable vdev caching. If we don't do this, live pool traversal 1796 * won't make progress because it will never see disk updates. 1797 */ 1798 for (vk = vdev_knob_next(NULL); vk != NULL; vk = vdev_knob_next(vk)) { 1799 if (strcmp(vk->vk_name, "cache_size") == 0) 1800 vk->vk_default = 0; 1801 } 1802 1803 for (c = 0; c < 256; c++) { 1804 if (dump_all && c != 'L' && c != 'l') 1805 dump_opt[c] = 1; 1806 if (dump_opt[c]) 1807 dump_opt[c] += verbose; 1808 } 1809 1810 argc -= optind; 1811 argv += optind; 1812 1813 if (argc < 1) { 1814 if (dump_opt['C']) { 1815 dump_config(NULL); 1816 return (0); 1817 } 1818 usage(); 1819 } 1820 1821 if (dump_opt['l']) { 1822 dump_label(argv[0]); 1823 return (0); 1824 } 1825 1826 if (dump_opt['C']) 1827 dump_config(argv[0]); 1828 1829 if (strchr(argv[0], '/') != NULL) { 1830 error = dmu_objset_open(argv[0], DMU_OST_ANY, 1831 DS_MODE_STANDARD | DS_MODE_READONLY, &os); 1832 } else { 1833 error = spa_open(argv[0], &spa, FTAG); 1834 } 1835 1836 if (error) 1837 fatal("can't open %s: error %d", argv[0], error); 1838 1839 argv++; 1840 if (--argc > 0) { 1841 zopt_objects = argc; 1842 zopt_object = calloc(zopt_objects, sizeof (uint64_t)); 1843 for (i = 0; i < zopt_objects; i++) { 1844 errno = 0; 1845 zopt_object[i] = strtoull(argv[i], NULL, 0); 1846 if (zopt_object[i] == 0 && errno != 0) 1847 fatal("bad object number %s: %s", 1848 argv[i], strerror(errno)); 1849 } 1850 } 1851 1852 if (os != NULL) { 1853 dump_dir(os); 1854 dmu_objset_close(os); 1855 } else { 1856 dump_zpool(spa); 1857 spa_close(spa, FTAG); 1858 } 1859 1860 kernel_fini(); 1861 1862 return (0); 1863 } 1864