1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <sys/zfs_context.h> 31 #include <sys/spa.h> 32 #include <sys/spa_impl.h> 33 #include <sys/dmu.h> 34 #include <sys/zap.h> 35 #include <sys/fs/zfs.h> 36 #include <sys/zfs_znode.h> 37 #include <sys/vdev.h> 38 #include <sys/vdev_impl.h> 39 #include <sys/metaslab_impl.h> 40 #include <sys/dmu_objset.h> 41 #include <sys/dsl_dir.h> 42 #include <sys/dsl_dataset.h> 43 #include <sys/dsl_pool.h> 44 #include <sys/dbuf.h> 45 #include <sys/zil.h> 46 #include <sys/zil_impl.h> 47 #include <sys/stat.h> 48 #include <sys/resource.h> 49 #include <sys/dmu_traverse.h> 50 #include <sys/zio_checksum.h> 51 #include <sys/zio_compress.h> 52 53 const char cmdname[] = "zdb"; 54 uint8_t dump_opt[256]; 55 56 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); 57 58 extern void dump_intent_log(zilog_t *); 59 uint64_t *zopt_object = NULL; 60 int zopt_objects = 0; 61 int zdb_advance = ADVANCE_PRE; 62 zbookmark_t zdb_noread = { 0, 0, ZB_NO_LEVEL, 0 }; 63 64 /* 65 * These libumem hooks provide a reasonable set of defaults for the allocator's 66 * debugging facilities. 67 */ 68 const char * 69 _umem_debug_init() 70 { 71 return ("default,verbose"); /* $UMEM_DEBUG setting */ 72 } 73 74 const char * 75 _umem_logging_init(void) 76 { 77 return ("fail,contents"); /* $UMEM_LOGGING setting */ 78 } 79 80 static void 81 usage(void) 82 { 83 (void) fprintf(stderr, 84 "Usage: %s [-udibcsvLU] [-O order] [-B os:obj:level:blkid] " 85 "dataset [object...]\n" 86 " %s -C [pool]\n" 87 " %s -l dev\n", 88 cmdname, cmdname, cmdname); 89 90 (void) fprintf(stderr, " -u uberblock\n"); 91 (void) fprintf(stderr, " -d datasets\n"); 92 (void) fprintf(stderr, " -C cached pool configuration\n"); 93 (void) fprintf(stderr, " -i intent logs\n"); 94 (void) fprintf(stderr, " -b block statistics\n"); 95 (void) fprintf(stderr, " -c checksum all data blocks\n"); 96 (void) fprintf(stderr, " -s report stats on zdb's I/O\n"); 97 (void) fprintf(stderr, " -v verbose (applies to all others)\n"); 98 (void) fprintf(stderr, " -l dump label contents\n"); 99 (void) fprintf(stderr, " -L live pool (allows some errors)\n"); 100 (void) fprintf(stderr, " -O [!]<pre|post|prune|data|holes> " 101 "visitation order\n"); 102 (void) fprintf(stderr, " -U use zpool.cache in /tmp\n"); 103 (void) fprintf(stderr, " -B objset:object:level:blkid -- " 104 "simulate bad block\n"); 105 (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " 106 "to make only that option verbose\n"); 107 (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); 108 exit(1); 109 } 110 111 static void 112 fatal(const char *fmt, ...) 113 { 114 va_list ap; 115 116 va_start(ap, fmt); 117 (void) fprintf(stderr, "%s: ", cmdname); 118 (void) vfprintf(stderr, fmt, ap); 119 va_end(ap); 120 (void) fprintf(stderr, "\n"); 121 122 exit(1); 123 } 124 125 static void 126 dump_nvlist(nvlist_t *list, int indent) 127 { 128 nvpair_t *elem = NULL; 129 130 while ((elem = nvlist_next_nvpair(list, elem)) != NULL) { 131 switch (nvpair_type(elem)) { 132 case DATA_TYPE_STRING: 133 { 134 char *value; 135 136 VERIFY(nvpair_value_string(elem, &value) == 0); 137 (void) printf("%*s%s='%s'\n", indent, "", 138 nvpair_name(elem), value); 139 } 140 break; 141 142 case DATA_TYPE_UINT64: 143 { 144 uint64_t value; 145 146 VERIFY(nvpair_value_uint64(elem, &value) == 0); 147 (void) printf("%*s%s=%llu\n", indent, "", 148 nvpair_name(elem), (u_longlong_t)value); 149 } 150 break; 151 152 case DATA_TYPE_NVLIST: 153 { 154 nvlist_t *value; 155 156 VERIFY(nvpair_value_nvlist(elem, &value) == 0); 157 (void) printf("%*s%s\n", indent, "", 158 nvpair_name(elem)); 159 dump_nvlist(value, indent + 4); 160 } 161 break; 162 163 case DATA_TYPE_NVLIST_ARRAY: 164 { 165 nvlist_t **value; 166 uint_t c, count; 167 168 VERIFY(nvpair_value_nvlist_array(elem, &value, 169 &count) == 0); 170 171 for (c = 0; c < count; c++) { 172 (void) printf("%*s%s[%u]\n", indent, "", 173 nvpair_name(elem), c); 174 dump_nvlist(value[c], indent + 8); 175 } 176 } 177 break; 178 179 default: 180 181 (void) printf("bad config type %d for %s\n", 182 nvpair_type(elem), nvpair_name(elem)); 183 } 184 } 185 } 186 187 /* ARGSUSED */ 188 static void 189 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) 190 { 191 nvlist_t *nv; 192 size_t nvsize = *(uint64_t *)data; 193 char *packed = umem_alloc(nvsize, UMEM_NOFAIL); 194 195 VERIFY(0 == dmu_read(os, object, 0, nvsize, packed)); 196 197 VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0); 198 199 umem_free(packed, nvsize); 200 201 dump_nvlist(nv, 8); 202 203 nvlist_free(nv); 204 } 205 206 const char dump_zap_stars[] = "****************************************"; 207 const int dump_zap_width = sizeof (dump_zap_stars) - 1; 208 209 static void 210 dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE]) 211 { 212 int i; 213 int minidx = ZAP_HISTOGRAM_SIZE - 1; 214 int maxidx = 0; 215 uint64_t max = 0; 216 217 for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) { 218 if (histo[i] > max) 219 max = histo[i]; 220 if (histo[i] > 0 && i > maxidx) 221 maxidx = i; 222 if (histo[i] > 0 && i < minidx) 223 minidx = i; 224 } 225 226 if (max < dump_zap_width) 227 max = dump_zap_width; 228 229 for (i = minidx; i <= maxidx; i++) 230 (void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i], 231 &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]); 232 } 233 234 static void 235 dump_zap_stats(objset_t *os, uint64_t object) 236 { 237 int error; 238 zap_stats_t zs; 239 240 error = zap_get_stats(os, object, &zs); 241 if (error) 242 return; 243 244 if (zs.zs_ptrtbl_len == 0) { 245 ASSERT(zs.zs_num_blocks == 1); 246 (void) printf("\tmicrozap: %llu bytes, %llu entries\n", 247 (u_longlong_t)zs.zs_blocksize, 248 (u_longlong_t)zs.zs_num_entries); 249 return; 250 } 251 252 (void) printf("\tFat ZAP stats:\n"); 253 (void) printf("\t\tPointer table: %llu elements\n", 254 (u_longlong_t)zs.zs_ptrtbl_len); 255 (void) printf("\t\tZAP entries: %llu\n", 256 (u_longlong_t)zs.zs_num_entries); 257 (void) printf("\t\tLeaf blocks: %llu\n", 258 (u_longlong_t)zs.zs_num_leafs); 259 (void) printf("\t\tTotal blocks: %llu\n", 260 (u_longlong_t)zs.zs_num_blocks); 261 (void) printf("\t\tOversize blocks: %llu\n", 262 (u_longlong_t)zs.zs_num_blocks_large); 263 264 (void) printf("\t\tLeafs with 2^n pointers:\n"); 265 dump_zap_histogram(zs.zs_leafs_with_2n_pointers); 266 267 (void) printf("\t\tLeafs with n chained:\n"); 268 dump_zap_histogram(zs.zs_leafs_with_n_chained); 269 270 (void) printf("\t\tBlocks with n*5 entries:\n"); 271 dump_zap_histogram(zs.zs_blocks_with_n5_entries); 272 273 (void) printf("\t\tBlocks n/10 full:\n"); 274 dump_zap_histogram(zs.zs_blocks_n_tenths_full); 275 276 (void) printf("\t\tEntries with n chunks:\n"); 277 dump_zap_histogram(zs.zs_entries_using_n_chunks); 278 279 (void) printf("\t\tBuckets with n entries:\n"); 280 dump_zap_histogram(zs.zs_buckets_with_n_entries); 281 } 282 283 /*ARGSUSED*/ 284 static void 285 dump_none(objset_t *os, uint64_t object, void *data, size_t size) 286 { 287 } 288 289 /*ARGSUSED*/ 290 void 291 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size) 292 { 293 } 294 295 /*ARGSUSED*/ 296 static void 297 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size) 298 { 299 } 300 301 /*ARGSUSED*/ 302 static void 303 dump_zap(objset_t *os, uint64_t object, void *data, size_t size) 304 { 305 zap_cursor_t zc; 306 zap_attribute_t attr; 307 void *prop; 308 int i; 309 310 dump_zap_stats(os, object); 311 (void) printf("\n"); 312 313 for (zap_cursor_init(&zc, os, object); 314 zap_cursor_retrieve(&zc, &attr) == 0; 315 zap_cursor_advance(&zc)) { 316 (void) printf("\t\t%s = ", attr.za_name); 317 if (attr.za_num_integers == 0) { 318 (void) printf("\n"); 319 continue; 320 } 321 prop = umem_zalloc(attr.za_num_integers * 322 attr.za_integer_length, UMEM_NOFAIL); 323 (void) zap_lookup(os, object, attr.za_name, 324 attr.za_integer_length, attr.za_num_integers, prop); 325 if (attr.za_integer_length == 1) { 326 (void) printf("%s", (char *)prop); 327 } else { 328 for (i = 0; i < attr.za_num_integers; i++) { 329 switch (attr.za_integer_length) { 330 case 2: 331 (void) printf("%u ", 332 ((uint16_t *)prop)[i]); 333 break; 334 case 4: 335 (void) printf("%u ", 336 ((uint32_t *)prop)[i]); 337 break; 338 case 8: 339 (void) printf("%lld ", 340 (u_longlong_t)((int64_t *)prop)[i]); 341 break; 342 } 343 } 344 } 345 (void) printf("\n"); 346 umem_free(prop, attr.za_num_integers * attr.za_integer_length); 347 } 348 zap_cursor_fini(&zc); 349 } 350 351 static void 352 dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm) 353 { 354 uint64_t alloc, offset, entry; 355 int mapshift = sm->sm_shift; 356 uint64_t mapstart = sm->sm_start; 357 char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID" }; 358 359 if (smo->smo_object == 0) 360 return; 361 362 /* 363 * Print out the freelist entries in both encoded and decoded form. 364 */ 365 alloc = 0; 366 for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) { 367 VERIFY(0 == dmu_read(os, smo->smo_object, offset, 368 sizeof (entry), &entry)); 369 if (SM_DEBUG_DECODE(entry)) { 370 (void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n", 371 (u_longlong_t)(offset / sizeof (entry)), 372 ddata[SM_DEBUG_ACTION_DECODE(entry)], 373 SM_DEBUG_TXG_DECODE(entry), 374 SM_DEBUG_SYNCPASS_DECODE(entry)); 375 } else { 376 (void) printf("\t\t[%4llu] %c range:" 377 " %08llx-%08llx size: %06llx\n", 378 (u_longlong_t)(offset / sizeof (entry)), 379 SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F', 380 (SM_OFFSET_DECODE(entry) << mapshift) + mapstart, 381 (SM_OFFSET_DECODE(entry) << mapshift) + mapstart + 382 (SM_RUN_DECODE(entry) << mapshift), 383 (SM_RUN_DECODE(entry) << mapshift)); 384 if (SM_TYPE_DECODE(entry) == SM_ALLOC) 385 alloc += SM_RUN_DECODE(entry) << mapshift; 386 else 387 alloc -= SM_RUN_DECODE(entry) << mapshift; 388 } 389 } 390 if (alloc != smo->smo_alloc) { 391 (void) printf("space_map_object alloc (%llu) INCONSISTENT " 392 "with space map summary (%llu)\n", 393 (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc); 394 } 395 } 396 397 static void 398 dump_metaslab(metaslab_t *msp) 399 { 400 char freebuf[5]; 401 space_map_obj_t *smo = msp->ms_smo; 402 vdev_t *vd = msp->ms_group->mg_vd; 403 spa_t *spa = vd->vdev_spa; 404 405 nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf); 406 407 if (dump_opt['d'] <= 5) { 408 (void) printf("\t%10llx %10llu %5s\n", 409 (u_longlong_t)msp->ms_map.sm_start, 410 (u_longlong_t)smo->smo_object, 411 freebuf); 412 return; 413 } 414 415 (void) printf( 416 "\tvdev %llu offset %08llx spacemap %4llu free %5s\n", 417 (u_longlong_t)vd->vdev_id, (u_longlong_t)msp->ms_map.sm_start, 418 (u_longlong_t)smo->smo_object, freebuf); 419 420 ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift)); 421 422 dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map); 423 } 424 425 static void 426 dump_metaslabs(spa_t *spa) 427 { 428 vdev_t *rvd = spa->spa_root_vdev; 429 vdev_t *vd; 430 int c, m; 431 432 (void) printf("\nMetaslabs:\n"); 433 434 for (c = 0; c < rvd->vdev_children; c++) { 435 vd = rvd->vdev_child[c]; 436 437 spa_config_enter(spa, RW_READER, FTAG); 438 (void) printf("\n vdev %llu = %s\n\n", 439 (u_longlong_t)vd->vdev_id, vdev_description(vd)); 440 spa_config_exit(spa, FTAG); 441 442 if (dump_opt['d'] <= 5) { 443 (void) printf("\t%10s %10s %5s\n", 444 "offset", "spacemap", "free"); 445 (void) printf("\t%10s %10s %5s\n", 446 "------", "--------", "----"); 447 } 448 for (m = 0; m < vd->vdev_ms_count; m++) 449 dump_metaslab(vd->vdev_ms[m]); 450 (void) printf("\n"); 451 } 452 } 453 454 static void 455 dump_dtl(vdev_t *vd, int indent) 456 { 457 avl_tree_t *t = &vd->vdev_dtl_map.sm_root; 458 spa_t *spa = vd->vdev_spa; 459 space_seg_t *ss; 460 vdev_t *pvd; 461 int c; 462 463 if (indent == 0) 464 (void) printf("\nDirty time logs:\n\n"); 465 466 spa_config_enter(spa, RW_READER, FTAG); 467 (void) printf("\t%*s%s\n", indent, "", vdev_description(vd)); 468 spa_config_exit(spa, FTAG); 469 470 for (ss = avl_first(t); ss; ss = AVL_NEXT(t, ss)) { 471 /* 472 * Everything in this DTL must appear in all parent DTL unions. 473 */ 474 for (pvd = vd; pvd; pvd = pvd->vdev_parent) 475 ASSERT(vdev_dtl_contains(&pvd->vdev_dtl_map, 476 ss->ss_start, ss->ss_end - ss->ss_start)); 477 (void) printf("\t%*soutage [%llu,%llu] length %llu\n", 478 indent, "", 479 (u_longlong_t)ss->ss_start, 480 (u_longlong_t)ss->ss_end - 1, 481 (u_longlong_t)ss->ss_end - ss->ss_start); 482 } 483 484 (void) printf("\n"); 485 486 if (dump_opt['d'] > 5 && vd->vdev_children == 0) { 487 dump_spacemap(vd->vdev_spa->spa_meta_objset, &vd->vdev_dtl, 488 &vd->vdev_dtl_map); 489 (void) printf("\n"); 490 } 491 492 for (c = 0; c < vd->vdev_children; c++) 493 dump_dtl(vd->vdev_child[c], indent + 4); 494 } 495 496 /*ARGSUSED*/ 497 static void 498 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size) 499 { 500 } 501 502 static uint64_t 503 blkid2offset(dnode_phys_t *dnp, int level, uint64_t blkid) 504 { 505 if (level < 0) 506 return (blkid); 507 508 return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) * 509 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); 510 } 511 512 /* ARGSUSED */ 513 static int 514 zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a) 515 { 516 zbookmark_t *zb = &bc->bc_bookmark; 517 blkptr_t *bp = &bc->bc_blkptr; 518 dva_t *dva = &bp->blk_dva[0]; 519 void *data = bc->bc_data; 520 dnode_phys_t *dnp = bc->bc_dnode; 521 char buffer[300]; 522 int l; 523 524 if (bc->bc_errno) { 525 (void) sprintf(buffer, 526 "Error %d reading <%llu, %llu, %lld, %llu>: ", 527 bc->bc_errno, 528 (u_longlong_t)zb->zb_objset, 529 (u_longlong_t)zb->zb_object, 530 (u_longlong_t)zb->zb_level, 531 (u_longlong_t)zb->zb_blkid); 532 goto out; 533 } 534 535 if (zb->zb_level == -1) { 536 ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); 537 ASSERT3U(BP_GET_LEVEL(bp), ==, 0); 538 } else { 539 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); 540 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); 541 } 542 543 if (zb->zb_level > 0) { 544 uint64_t fill = 0; 545 blkptr_t *bpx, *bpend; 546 547 for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx); 548 bpx < bpend; bpx++) { 549 if (bpx->blk_birth != 0) { 550 fill += bpx->blk_fill; 551 } else { 552 ASSERT(bpx->blk_fill == 0); 553 } 554 } 555 ASSERT3U(fill, ==, bp->blk_fill); 556 } 557 558 if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) { 559 uint64_t fill = 0; 560 dnode_phys_t *dnx, *dnend; 561 562 for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT); 563 dnx < dnend; dnx++) { 564 if (dnx->dn_type != DMU_OT_NONE) 565 fill++; 566 } 567 ASSERT3U(fill, ==, bp->blk_fill); 568 } 569 570 (void) sprintf(buffer, "%16llx ", 571 (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid)); 572 573 ASSERT(zb->zb_level >= 0); 574 575 for (l = dnp->dn_nlevels - 1; l >= -1; l--) { 576 if (l == zb->zb_level) { 577 (void) sprintf(buffer + strlen(buffer), "L%llx", 578 (u_longlong_t)zb->zb_level); 579 } else { 580 (void) sprintf(buffer + strlen(buffer), " "); 581 } 582 } 583 584 out: 585 if (bp->blk_birth == 0) { 586 (void) sprintf(buffer + strlen(buffer), "<hole>"); 587 (void) printf("%s\n", buffer); 588 } else { 589 // XXBP - Need to print number of active BPs here 590 (void) sprintf(buffer + strlen(buffer), 591 "vdev=%llu off=%llx %llxL/%llxP/%llxA F=%llu B=%llu", 592 (u_longlong_t)DVA_GET_VDEV(dva), 593 (u_longlong_t)DVA_GET_OFFSET(dva), 594 (u_longlong_t)BP_GET_LSIZE(bp), 595 (u_longlong_t)BP_GET_PSIZE(bp), 596 (u_longlong_t)DVA_GET_ASIZE(dva), 597 (u_longlong_t)bp->blk_fill, 598 (u_longlong_t)bp->blk_birth); 599 600 (void) printf("%s\n", buffer); 601 } 602 603 return (bc->bc_errno ? ERESTART : 0); 604 } 605 606 /*ARGSUSED*/ 607 static void 608 dump_indirect(objset_t *os, uint64_t object, void *data, size_t size) 609 { 610 traverse_handle_t *th; 611 uint64_t objset = dmu_objset_id(os); 612 int advance = zdb_advance; 613 614 (void) printf("Indirect blocks:\n"); 615 616 if (object == 0) 617 advance |= ADVANCE_DATA; 618 619 th = traverse_init(dmu_objset_spa(os), zdb_indirect_cb, NULL, advance, 620 ZIO_FLAG_CANFAIL); 621 th->th_noread = zdb_noread; 622 623 traverse_add_dnode(th, 0, -1ULL, objset, object); 624 625 while (traverse_more(th) == EAGAIN) 626 continue; 627 628 (void) printf("\n"); 629 630 traverse_fini(th); 631 } 632 633 /*ARGSUSED*/ 634 static void 635 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size) 636 { 637 dsl_dir_phys_t *dd = data; 638 time_t crtime; 639 char used[6], compressed[6], uncompressed[6], quota[6], resv[6]; 640 641 if (dd == NULL) 642 return; 643 644 ASSERT(size == sizeof (*dd)); 645 646 crtime = dd->dd_creation_time; 647 nicenum(dd->dd_used_bytes, used); 648 nicenum(dd->dd_compressed_bytes, compressed); 649 nicenum(dd->dd_uncompressed_bytes, uncompressed); 650 nicenum(dd->dd_quota, quota); 651 nicenum(dd->dd_reserved, resv); 652 653 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 654 (void) printf("\t\thead_dataset_obj = %llu\n", 655 (u_longlong_t)dd->dd_head_dataset_obj); 656 (void) printf("\t\tparent_dir_obj = %llu\n", 657 (u_longlong_t)dd->dd_parent_obj); 658 (void) printf("\t\tclone_parent_obj = %llu\n", 659 (u_longlong_t)dd->dd_clone_parent_obj); 660 (void) printf("\t\tchild_dir_zapobj = %llu\n", 661 (u_longlong_t)dd->dd_child_dir_zapobj); 662 (void) printf("\t\tused_bytes = %s\n", used); 663 (void) printf("\t\tcompressed_bytes = %s\n", compressed); 664 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); 665 (void) printf("\t\tquota = %s\n", quota); 666 (void) printf("\t\treserved = %s\n", resv); 667 (void) printf("\t\tprops_zapobj = %llu\n", 668 (u_longlong_t)dd->dd_props_zapobj); 669 } 670 671 /*ARGSUSED*/ 672 static void 673 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) 674 { 675 dsl_dataset_phys_t *ds = data; 676 time_t crtime; 677 char used[6], compressed[6], uncompressed[6], unique[6]; 678 char blkbuf[BP_SPRINTF_LEN]; 679 680 if (ds == NULL) 681 return; 682 683 ASSERT(size == sizeof (*ds)); 684 crtime = ds->ds_creation_time; 685 nicenum(ds->ds_used_bytes, used); 686 nicenum(ds->ds_compressed_bytes, compressed); 687 nicenum(ds->ds_uncompressed_bytes, uncompressed); 688 nicenum(ds->ds_unique_bytes, unique); 689 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ds->ds_bp); 690 691 (void) printf("\t\tdataset_obj = %llu\n", 692 (u_longlong_t)ds->ds_dir_obj); 693 (void) printf("\t\tprev_snap_obj = %llu\n", 694 (u_longlong_t)ds->ds_prev_snap_obj); 695 (void) printf("\t\tprev_snap_txg = %llu\n", 696 (u_longlong_t)ds->ds_prev_snap_txg); 697 (void) printf("\t\tnext_snap_obj = %llu\n", 698 (u_longlong_t)ds->ds_next_snap_obj); 699 (void) printf("\t\tsnapnames_zapobj = %llu\n", 700 (u_longlong_t)ds->ds_snapnames_zapobj); 701 (void) printf("\t\tnum_children = %llu\n", 702 (u_longlong_t)ds->ds_num_children); 703 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 704 (void) printf("\t\tcreation_txg = %llu\n", 705 (u_longlong_t)ds->ds_creation_txg); 706 (void) printf("\t\tdeadlist_obj = %llu\n", 707 (u_longlong_t)ds->ds_deadlist_obj); 708 (void) printf("\t\tused_bytes = %s\n", used); 709 (void) printf("\t\tcompressed_bytes = %s\n", compressed); 710 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); 711 (void) printf("\t\tunique = %s\n", unique); 712 (void) printf("\t\tfsid_guid = %llu\n", 713 (u_longlong_t)ds->ds_fsid_guid); 714 (void) printf("\t\tguid = %llu\n", 715 (u_longlong_t)ds->ds_guid); 716 (void) printf("\t\trestoring = %llu\n", 717 (u_longlong_t)ds->ds_restoring); 718 (void) printf("\t\tbp = %s\n", blkbuf); 719 } 720 721 static void 722 dump_bplist(objset_t *mos, uint64_t object, char *name) 723 { 724 bplist_t bpl = { 0 }; 725 blkptr_t blk, *bp = &blk; 726 uint64_t itor = 0; 727 char numbuf[6]; 728 729 if (dump_opt['d'] < 3) 730 return; 731 732 VERIFY(0 == bplist_open(&bpl, mos, object)); 733 if (bplist_empty(&bpl)) { 734 bplist_close(&bpl); 735 return; 736 } 737 738 nicenum(bpl.bpl_phys->bpl_bytes, numbuf); 739 740 (void) printf("\n %s: %llu entries, %s\n", 741 name, (u_longlong_t)bpl.bpl_phys->bpl_entries, numbuf); 742 743 if (dump_opt['d'] < 5) { 744 bplist_close(&bpl); 745 return; 746 } 747 748 (void) printf("\n"); 749 750 while (bplist_iterate(&bpl, &itor, bp) == 0) { 751 ASSERT(bp->blk_birth != 0); 752 // XXBP - Do we want to see all DVAs, or just one? 753 (void) printf("\tItem %3llu: vdev=%llu off=%llx " 754 "%llxL/%llxP/%llxA F=%llu B=%llu\n", 755 (u_longlong_t)itor - 1, 756 (u_longlong_t)DVA_GET_VDEV(&bp->blk_dva[0]), 757 (u_longlong_t)DVA_GET_OFFSET(&bp->blk_dva[0]), 758 (u_longlong_t)BP_GET_LSIZE(bp), 759 (u_longlong_t)BP_GET_PSIZE(bp), 760 (u_longlong_t)DVA_GET_ASIZE(&bp->blk_dva[0]), 761 (u_longlong_t)bp->blk_fill, 762 (u_longlong_t)bp->blk_birth); 763 } 764 765 bplist_close(&bpl); 766 } 767 768 static char * 769 znode_path(objset_t *os, uint64_t object, char *pathbuf, size_t size) 770 { 771 dmu_buf_t *db; 772 dmu_object_info_t doi; 773 znode_phys_t *zp; 774 uint64_t parent = 0; 775 size_t complen; 776 char component[MAXNAMELEN + 1]; 777 char *path; 778 int error; 779 780 path = pathbuf + size; 781 *--path = '\0'; 782 783 for (;;) { 784 error = dmu_bonus_hold(os, object, FTAG, &db); 785 if (error) 786 break; 787 788 dmu_object_info_from_db(db, &doi); 789 zp = db->db_data; 790 parent = zp->zp_parent; 791 dmu_buf_rele(db, FTAG); 792 793 if (doi.doi_bonus_type != DMU_OT_ZNODE) 794 break; 795 796 if (parent == object) { 797 if (path[0] != '/') 798 *--path = '/'; 799 return (path); 800 } 801 802 if (zap_value_search(os, parent, object, component) != 0) 803 break; 804 805 complen = strlen(component); 806 path -= complen; 807 bcopy(component, path, complen); 808 *--path = '/'; 809 810 object = parent; 811 } 812 813 (void) sprintf(component, "???<object#%llu>", (u_longlong_t)object); 814 815 complen = strlen(component); 816 path -= complen; 817 bcopy(component, path, complen); 818 819 return (path); 820 } 821 822 /*ARGSUSED*/ 823 static void 824 dump_znode(objset_t *os, uint64_t object, void *data, size_t size) 825 { 826 znode_phys_t *zp = data; 827 time_t z_crtime, z_atime, z_mtime, z_ctime; 828 char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */ 829 830 ASSERT(size >= sizeof (znode_phys_t)); 831 832 if (dump_opt['d'] < 3) { 833 (void) printf("\t%s\n", 834 znode_path(os, object, path, sizeof (path))); 835 return; 836 } 837 838 z_crtime = (time_t)zp->zp_crtime[0]; 839 z_atime = (time_t)zp->zp_atime[0]; 840 z_mtime = (time_t)zp->zp_mtime[0]; 841 z_ctime = (time_t)zp->zp_ctime[0]; 842 843 (void) printf("\tpath %s\n", 844 znode_path(os, object, path, sizeof (path))); 845 (void) printf("\tatime %s", ctime(&z_atime)); 846 (void) printf("\tmtime %s", ctime(&z_mtime)); 847 (void) printf("\tctime %s", ctime(&z_ctime)); 848 (void) printf("\tcrtime %s", ctime(&z_crtime)); 849 (void) printf("\tgen %llu\n", (u_longlong_t)zp->zp_gen); 850 (void) printf("\tmode %llo\n", (u_longlong_t)zp->zp_mode); 851 (void) printf("\tsize %llu\n", (u_longlong_t)zp->zp_size); 852 (void) printf("\tparent %llu\n", (u_longlong_t)zp->zp_parent); 853 (void) printf("\tlinks %llu\n", (u_longlong_t)zp->zp_links); 854 (void) printf("\txattr %llu\n", (u_longlong_t)zp->zp_xattr); 855 (void) printf("\trdev 0x%016llx\n", (u_longlong_t)zp->zp_rdev); 856 } 857 858 /*ARGSUSED*/ 859 static void 860 dump_acl(objset_t *os, uint64_t object, void *data, size_t size) 861 { 862 } 863 864 /*ARGSUSED*/ 865 static void 866 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size) 867 { 868 } 869 870 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = { 871 dump_none, /* unallocated */ 872 dump_zap, /* object directory */ 873 dump_uint64, /* object array */ 874 dump_none, /* packed nvlist */ 875 dump_packed_nvlist, /* packed nvlist size */ 876 dump_none, /* bplist */ 877 dump_none, /* bplist header */ 878 dump_none, /* SPA space map header */ 879 dump_none, /* SPA space map */ 880 dump_none, /* ZIL intent log */ 881 dump_dnode, /* DMU dnode */ 882 dump_dmu_objset, /* DMU objset */ 883 dump_dsl_dir, /* DSL directory */ 884 dump_zap, /* DSL directory child map */ 885 dump_zap, /* DSL dataset snap map */ 886 dump_zap, /* DSL props */ 887 dump_dsl_dataset, /* DSL dataset */ 888 dump_znode, /* ZFS znode */ 889 dump_acl, /* ZFS ACL */ 890 dump_uint8, /* ZFS plain file */ 891 dump_zap, /* ZFS directory */ 892 dump_zap, /* ZFS master node */ 893 dump_zap, /* ZFS delete queue */ 894 dump_uint8, /* zvol object */ 895 dump_zap, /* zvol prop */ 896 dump_uint8, /* other uint8[] */ 897 dump_uint64, /* other uint64[] */ 898 dump_zap, /* other ZAP */ 899 dump_zap, /* persistent error log */ 900 }; 901 902 static void 903 dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) 904 { 905 dmu_buf_t *db = NULL; 906 dmu_object_info_t doi; 907 dnode_t *dn; 908 void *bonus = NULL; 909 size_t bsize = 0; 910 char iblk[6], dblk[6], lsize[6], psize[6], bonus_size[6], segsize[6]; 911 char aux[50]; 912 int error; 913 914 if (*print_header) { 915 (void) printf("\n Object lvl iblk dblk lsize" 916 " psize type\n"); 917 *print_header = 0; 918 } 919 920 if (object == 0) { 921 dn = os->os->os_meta_dnode; 922 } else { 923 error = dmu_bonus_hold(os, object, FTAG, &db); 924 if (error) 925 fatal("dmu_bonus_hold(%llu) failed, errno %u", 926 object, error); 927 bonus = db->db_data; 928 bsize = db->db_size; 929 dn = ((dmu_buf_impl_t *)db)->db_dnode; 930 } 931 dmu_object_info_from_dnode(dn, &doi); 932 933 nicenum(doi.doi_metadata_block_size, iblk); 934 nicenum(doi.doi_data_block_size, dblk); 935 nicenum(doi.doi_data_block_size * (doi.doi_max_block_offset + 1), 936 lsize); 937 nicenum(doi.doi_physical_blks << 9, psize); 938 nicenum(doi.doi_bonus_size, bonus_size); 939 940 aux[0] = '\0'; 941 942 if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) 943 (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)", 944 zio_checksum_table[doi.doi_checksum].ci_name); 945 946 if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) 947 (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)", 948 zio_compress_table[doi.doi_compress].ci_name); 949 950 (void) printf("%10lld %3u %5s %5s %5s %5s %s%s\n", 951 (u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize, 952 psize, dmu_ot[doi.doi_type].ot_name, aux); 953 954 if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) { 955 (void) printf("%10s %3s %5s %5s %5s %5s %s\n", 956 "", "", "", "", bonus_size, "bonus", 957 dmu_ot[doi.doi_bonus_type].ot_name); 958 } 959 960 if (verbosity >= 4) { 961 object_viewer[doi.doi_bonus_type](os, object, bonus, bsize); 962 object_viewer[doi.doi_type](os, object, NULL, 0); 963 *print_header = 1; 964 } 965 966 if (verbosity >= 5) 967 dump_indirect(os, object, NULL, 0); 968 969 if (verbosity >= 5) { 970 /* 971 * Report the list of segments that comprise the object. 972 */ 973 uint64_t start = 0; 974 uint64_t end; 975 uint64_t blkfill = 1; 976 int minlvl = 1; 977 978 if (dn->dn_type == DMU_OT_DNODE) { 979 minlvl = 0; 980 blkfill = DNODES_PER_BLOCK; 981 } 982 983 for (;;) { 984 error = dnode_next_offset(dn, B_FALSE, &start, minlvl, 985 blkfill); 986 if (error) 987 break; 988 end = start; 989 error = dnode_next_offset(dn, B_TRUE, &end, minlvl, 990 blkfill); 991 nicenum(end - start, segsize); 992 (void) printf("\t\tsegment [%016llx, %016llx)" 993 " size %5s\n", (u_longlong_t)start, 994 (u_longlong_t)end, segsize); 995 if (error) 996 break; 997 start = end; 998 } 999 } 1000 1001 if (db != NULL) 1002 dmu_buf_rele(db, FTAG); 1003 } 1004 1005 static char *objset_types[DMU_OST_NUMTYPES] = { 1006 "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" }; 1007 1008 /*ARGSUSED*/ 1009 static void 1010 dump_dir(objset_t *os) 1011 { 1012 dmu_objset_stats_t dds; 1013 uint64_t object, object_count; 1014 char numbuf[8]; 1015 char blkbuf[BP_SPRINTF_LEN]; 1016 char osname[MAXNAMELEN]; 1017 char *type = "UNKNOWN"; 1018 int verbosity = dump_opt['d']; 1019 int print_header = 1; 1020 int i, error; 1021 1022 dmu_objset_stats(os, &dds); 1023 1024 if (dds.dds_type < DMU_OST_NUMTYPES) 1025 type = objset_types[dds.dds_type]; 1026 1027 if (dds.dds_type == DMU_OST_META) { 1028 dds.dds_creation_txg = TXG_INITIAL; 1029 dds.dds_last_txg = os->os->os_rootbp.blk_birth; 1030 dds.dds_objects_used = os->os->os_rootbp.blk_fill; 1031 dds.dds_space_refd = 1032 os->os->os_spa->spa_dsl_pool->dp_mos_dir->dd_used_bytes; 1033 } 1034 1035 ASSERT3U(dds.dds_objects_used, ==, os->os->os_rootbp.blk_fill); 1036 1037 nicenum(dds.dds_space_refd, numbuf); 1038 1039 if (verbosity >= 4) { 1040 (void) strcpy(blkbuf, ", rootbp "); 1041 sprintf_blkptr(blkbuf + strlen(blkbuf), 1042 BP_SPRINTF_LEN - strlen(blkbuf), &os->os->os_rootbp); 1043 } else { 1044 blkbuf[0] = '\0'; 1045 } 1046 1047 dmu_objset_name(os, osname); 1048 1049 (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, last_txg %llu, " 1050 "%s, %llu objects%s\n", 1051 osname, type, (u_longlong_t)dmu_objset_id(os), 1052 (u_longlong_t)dds.dds_creation_txg, 1053 (u_longlong_t)dds.dds_last_txg, 1054 numbuf, 1055 (u_longlong_t)dds.dds_objects_used, 1056 blkbuf); 1057 1058 dump_intent_log(dmu_objset_zil(os)); 1059 1060 if (dmu_objset_ds(os) != NULL) 1061 dump_bplist(dmu_objset_pool(os)->dp_meta_objset, 1062 dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist"); 1063 1064 if (verbosity < 2) 1065 return; 1066 1067 if (zopt_objects != 0) { 1068 for (i = 0; i < zopt_objects; i++) 1069 dump_object(os, zopt_object[i], verbosity, 1070 &print_header); 1071 (void) printf("\n"); 1072 return; 1073 } 1074 1075 dump_object(os, 0, verbosity, &print_header); 1076 object_count = 1; 1077 1078 object = 0; 1079 while ((error = dmu_object_next(os, &object, B_FALSE)) == 0) { 1080 dump_object(os, object, verbosity, &print_header); 1081 object_count++; 1082 } 1083 1084 ASSERT3U(object_count, ==, dds.dds_objects_used); 1085 1086 (void) printf("\n"); 1087 1088 if (error != ESRCH) 1089 fatal("dmu_object_next() = %d", error); 1090 } 1091 1092 static void 1093 dump_uberblock(uberblock_t *ub) 1094 { 1095 time_t timestamp = ub->ub_timestamp; 1096 1097 (void) printf("Uberblock\n\n"); 1098 (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic); 1099 (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version); 1100 (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg); 1101 (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum); 1102 (void) printf("\ttimestamp = %llu UTC = %s", 1103 (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp))); 1104 if (dump_opt['u'] >= 3) { 1105 char blkbuf[BP_SPRINTF_LEN]; 1106 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ub->ub_rootbp); 1107 (void) printf("\trootbp = %s\n", blkbuf); 1108 } 1109 (void) printf("\n"); 1110 } 1111 1112 static void 1113 dump_config(const char *pool) 1114 { 1115 spa_t *spa = NULL; 1116 1117 mutex_enter(&spa_namespace_lock); 1118 while ((spa = spa_next(spa)) != NULL) { 1119 if (pool == NULL) 1120 (void) printf("%s\n", spa_name(spa)); 1121 if (pool == NULL || strcmp(pool, spa_name(spa)) == 0) 1122 dump_nvlist(spa->spa_config, 4); 1123 } 1124 mutex_exit(&spa_namespace_lock); 1125 } 1126 1127 static void 1128 dump_label(const char *dev) 1129 { 1130 int fd; 1131 vdev_label_t label; 1132 char *buf = label.vl_vdev_phys.vp_nvlist; 1133 size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist); 1134 struct stat64 statbuf; 1135 uint64_t psize; 1136 int l; 1137 1138 if ((fd = open64(dev, O_RDONLY)) < 0) { 1139 (void) printf("cannot open '%s': %s\n", dev, strerror(errno)); 1140 exit(1); 1141 } 1142 1143 if (fstat64(fd, &statbuf) != 0) { 1144 (void) printf("failed to stat '%s': %s\n", dev, 1145 strerror(errno)); 1146 exit(1); 1147 } 1148 1149 psize = statbuf.st_size; 1150 psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t)); 1151 1152 for (l = 0; l < VDEV_LABELS; l++) { 1153 1154 nvlist_t *config = NULL; 1155 1156 (void) printf("--------------------------------------------\n"); 1157 (void) printf("LABEL %d\n", l); 1158 (void) printf("--------------------------------------------\n"); 1159 1160 if (pread64(fd, &label, sizeof (label), 1161 vdev_label_offset(psize, l, 0)) != sizeof (label)) { 1162 (void) printf("failed to read label %d\n", l); 1163 continue; 1164 } 1165 1166 if (nvlist_unpack(buf, buflen, &config, 0) != 0) { 1167 (void) printf("failed to unpack label %d\n", l); 1168 continue; 1169 } 1170 dump_nvlist(config, 4); 1171 nvlist_free(config); 1172 } 1173 } 1174 1175 /*ARGSUSED*/ 1176 static void 1177 dump_one_dir(char *dsname, void *arg) 1178 { 1179 int error; 1180 objset_t *os; 1181 1182 error = dmu_objset_open(dsname, DMU_OST_ANY, 1183 DS_MODE_STANDARD | DS_MODE_READONLY, &os); 1184 if (error) { 1185 (void) printf("Could not open %s\n", dsname); 1186 return; 1187 } 1188 dump_dir(os); 1189 dmu_objset_close(os); 1190 } 1191 1192 static void 1193 zdb_space_map_load(spa_t *spa) 1194 { 1195 vdev_t *rvd = spa->spa_root_vdev; 1196 vdev_t *vd; 1197 int c, m, error; 1198 1199 for (c = 0; c < rvd->vdev_children; c++) { 1200 vd = rvd->vdev_child[c]; 1201 for (m = 0; m < vd->vdev_ms_count; m++) { 1202 metaslab_t *msp = vd->vdev_ms[m]; 1203 space_map_t *sm = &msp->ms_allocmap[0]; 1204 mutex_enter(&msp->ms_lock); 1205 error = space_map_load(sm, msp->ms_smo, SM_ALLOC, 1206 spa->spa_meta_objset, msp->ms_usable_end, 1207 sm->sm_size - msp->ms_usable_space); 1208 mutex_exit(&msp->ms_lock); 1209 if (error) 1210 fatal("%s bad space map #%d, error %d", 1211 spa->spa_name, c, error); 1212 } 1213 } 1214 } 1215 1216 static int 1217 zdb_space_map_claim(spa_t *spa, blkptr_t *bp, zbookmark_t *zb) 1218 { 1219 dva_t *dva = &bp->blk_dva[0]; 1220 uint64_t vdev = DVA_GET_VDEV(dva); 1221 uint64_t offset = DVA_GET_OFFSET(dva); 1222 uint64_t size = DVA_GET_ASIZE(dva); 1223 vdev_t *vd; 1224 metaslab_t *msp; 1225 space_map_t *allocmap, *freemap; 1226 int error; 1227 1228 if ((vd = vdev_lookup_top(spa, vdev)) == NULL) 1229 return (ENXIO); 1230 1231 if ((offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) 1232 return (ENXIO); 1233 1234 if (DVA_GET_GANG(dva)) { 1235 zio_gbh_phys_t gbh; 1236 blkptr_t blk = *bp; 1237 int g; 1238 1239 /* LINTED - compile time assert */ 1240 ASSERT(sizeof (zio_gbh_phys_t) == SPA_GANGBLOCKSIZE); 1241 size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE); 1242 DVA_SET_GANG(&blk.blk_dva[0], 0); 1243 DVA_SET_ASIZE(&blk.blk_dva[0], size); 1244 BP_SET_CHECKSUM(&blk, ZIO_CHECKSUM_GANG_HEADER); 1245 BP_SET_PSIZE(&blk, SPA_GANGBLOCKSIZE); 1246 BP_SET_LSIZE(&blk, SPA_GANGBLOCKSIZE); 1247 BP_SET_COMPRESS(&blk, ZIO_COMPRESS_OFF); 1248 error = zio_wait(zio_read(NULL, spa, &blk, 1249 &gbh, SPA_GANGBLOCKSIZE, NULL, NULL, 1250 ZIO_PRIORITY_SYNC_READ, 1251 ZIO_FLAG_CANFAIL | ZIO_FLAG_CONFIG_HELD, zb)); 1252 if (error) 1253 return (error); 1254 if (BP_SHOULD_BYTESWAP(&blk)) 1255 byteswap_uint64_array(&gbh, SPA_GANGBLOCKSIZE); 1256 for (g = 0; g < SPA_GBH_NBLKPTRS; g++) { 1257 if (gbh.zg_blkptr[g].blk_birth == 0) 1258 break; 1259 error = zdb_space_map_claim(spa, &gbh.zg_blkptr[g], zb); 1260 if (error) 1261 return (error); 1262 } 1263 } 1264 1265 msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 1266 allocmap = &msp->ms_allocmap[0]; 1267 freemap = &msp->ms_freemap[0]; 1268 1269 mutex_enter(&msp->ms_lock); 1270 if (space_map_contains(freemap, offset, size)) { 1271 mutex_exit(&msp->ms_lock); 1272 return (EAGAIN); /* allocated more than once */ 1273 } 1274 1275 if (!space_map_contains(allocmap, offset, size)) { 1276 mutex_exit(&msp->ms_lock); 1277 return (ESTALE); /* not allocated at all */ 1278 } 1279 1280 space_map_remove(allocmap, offset, size); 1281 space_map_add(freemap, offset, size); 1282 1283 mutex_exit(&msp->ms_lock); 1284 1285 return (0); 1286 } 1287 1288 static void 1289 zdb_leak(space_map_t *sm, uint64_t start, uint64_t size) 1290 { 1291 metaslab_t *msp; 1292 1293 /* LINTED */ 1294 msp = (metaslab_t *)((char *)sm - offsetof(metaslab_t, ms_allocmap[0])); 1295 1296 (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n", 1297 (u_longlong_t)msp->ms_group->mg_vd->vdev_id, 1298 (u_longlong_t)start, 1299 (u_longlong_t)size); 1300 } 1301 1302 static void 1303 zdb_space_map_vacate(spa_t *spa) 1304 { 1305 vdev_t *rvd = spa->spa_root_vdev; 1306 vdev_t *vd; 1307 int c, m; 1308 1309 for (c = 0; c < rvd->vdev_children; c++) { 1310 vd = rvd->vdev_child[c]; 1311 for (m = 0; m < vd->vdev_ms_count; m++) { 1312 metaslab_t *msp = vd->vdev_ms[m]; 1313 mutex_enter(&msp->ms_lock); 1314 space_map_vacate(&msp->ms_allocmap[0], zdb_leak, 1315 &msp->ms_allocmap[0]); 1316 space_map_vacate(&msp->ms_freemap[0], NULL, NULL); 1317 mutex_exit(&msp->ms_lock); 1318 } 1319 } 1320 } 1321 1322 static void 1323 zdb_refresh_ubsync(spa_t *spa) 1324 { 1325 uberblock_t ub = { 0 }; 1326 vdev_t *rvd = spa->spa_root_vdev; 1327 zio_t *zio; 1328 1329 /* 1330 * Reload the uberblock. 1331 */ 1332 zio = zio_root(spa, NULL, NULL, 1333 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 1334 vdev_uberblock_load(zio, rvd, &ub); 1335 (void) zio_wait(zio); 1336 1337 if (ub.ub_txg != 0) 1338 spa->spa_ubsync = ub; 1339 } 1340 1341 /* 1342 * Verify that the sum of the sizes of all blocks in the pool adds up 1343 * to the SPA's sa_alloc total. 1344 */ 1345 typedef struct zdb_blkstats { 1346 uint64_t zb_asize; 1347 uint64_t zb_lsize; 1348 uint64_t zb_psize; 1349 uint64_t zb_count; 1350 } zdb_blkstats_t; 1351 1352 #define DMU_OT_DEFERRED DMU_OT_NONE 1353 #define DMU_OT_TOTAL DMU_OT_NUMTYPES 1354 1355 #define ZB_TOTAL ZB_MAXLEVEL 1356 1357 typedef struct zdb_cb { 1358 zdb_blkstats_t zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1]; 1359 uint64_t zcb_errors[256]; 1360 traverse_blk_cache_t *zcb_cache; 1361 int zcb_readfails; 1362 int zcb_haderrors; 1363 } zdb_cb_t; 1364 1365 static void 1366 zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type) 1367 { 1368 int i, error; 1369 1370 for (i = 0; i < 4; i++) { 1371 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; 1372 int t = (i & 1) ? type : DMU_OT_TOTAL; 1373 zdb_blkstats_t *zb = &zcb->zcb_type[l][t]; 1374 1375 zb->zb_asize += BP_GET_ASIZE(bp); 1376 zb->zb_lsize += BP_GET_LSIZE(bp); 1377 zb->zb_psize += BP_GET_PSIZE(bp); 1378 zb->zb_count++; 1379 } 1380 1381 if (dump_opt['L']) 1382 return; 1383 1384 error = zdb_space_map_claim(spa, bp, &zcb->zcb_cache->bc_bookmark); 1385 1386 if (error == 0) 1387 return; 1388 1389 if (error == EAGAIN) 1390 (void) fatal("double-allocation, bp=%p", bp); 1391 1392 if (error == ESTALE) 1393 (void) fatal("reference to freed block, bp=%p", bp); 1394 1395 (void) fatal("fatal error %d in bp %p", error, bp); 1396 } 1397 1398 static int 1399 zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 1400 { 1401 zbookmark_t *zb = &bc->bc_bookmark; 1402 zdb_cb_t *zcb = arg; 1403 blkptr_t *bp = &bc->bc_blkptr; 1404 dmu_object_type_t type = BP_GET_TYPE(bp); 1405 char blkbuf[BP_SPRINTF_LEN]; 1406 int error = 0; 1407 1408 if (bc->bc_errno) { 1409 if (zcb->zcb_readfails++ < 10 && dump_opt['L']) { 1410 zdb_refresh_ubsync(spa); 1411 error = EAGAIN; 1412 } else { 1413 zcb->zcb_haderrors = 1; 1414 zcb->zcb_errors[bc->bc_errno]++; 1415 error = ERESTART; 1416 } 1417 1418 if (dump_opt['b'] >= 3 || (dump_opt['b'] >= 2 && bc->bc_errno)) 1419 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp); 1420 else 1421 blkbuf[0] = '\0'; 1422 1423 (void) printf("zdb_blkptr_cb: Got error %d reading " 1424 "<%llu, %llu, %lld, %llx> %s -- %s\n", 1425 bc->bc_errno, 1426 (u_longlong_t)zb->zb_objset, 1427 (u_longlong_t)zb->zb_object, 1428 (u_longlong_t)zb->zb_level, 1429 (u_longlong_t)zb->zb_blkid, 1430 blkbuf, 1431 error == EAGAIN ? "retrying" : "skipping"); 1432 1433 return (error); 1434 } 1435 1436 zcb->zcb_readfails = 0; 1437 1438 ASSERT(bp->blk_birth != 0); 1439 1440 zdb_count_block(spa, zcb, bp, type); 1441 1442 if (dump_opt['b'] >= 4) { 1443 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp); 1444 (void) printf("objset %llu object %llu offset 0x%llx %s\n", 1445 (u_longlong_t)zb->zb_objset, 1446 (u_longlong_t)zb->zb_object, 1447 (u_longlong_t)blkid2offset(bc->bc_dnode, 1448 zb->zb_level, zb->zb_blkid), 1449 blkbuf); 1450 } 1451 1452 return (0); 1453 } 1454 1455 static int 1456 dump_block_stats(spa_t *spa) 1457 { 1458 traverse_handle_t *th; 1459 zdb_cb_t zcb = { 0 }; 1460 traverse_blk_cache_t dummy_cache = { 0 }; 1461 zdb_blkstats_t *zb, *tzb; 1462 uint64_t alloc, space; 1463 int leaks = 0; 1464 int advance = zdb_advance; 1465 int flags; 1466 int e; 1467 1468 zcb.zcb_cache = &dummy_cache; 1469 1470 if (dump_opt['c']) 1471 advance |= ADVANCE_DATA; 1472 1473 advance |= ADVANCE_PRUNE | ADVANCE_ZIL; 1474 1475 (void) printf("\nTraversing all blocks to %sverify" 1476 " nothing leaked ...\n", 1477 dump_opt['c'] ? "verify checksums and " : ""); 1478 1479 /* 1480 * Load all space maps. As we traverse the pool, if we find a block 1481 * that's not in its space map, that indicates a double-allocation, 1482 * reference to a freed block, or an unclaimed block. Otherwise we 1483 * remove the block from the space map. If the space maps are not 1484 * empty when we're done, that indicates leaked blocks. 1485 */ 1486 if (!dump_opt['L']) 1487 zdb_space_map_load(spa); 1488 1489 /* 1490 * If there's a deferred-free bplist, process that first. 1491 */ 1492 if (spa->spa_sync_bplist_obj != 0) { 1493 bplist_t *bpl = &spa->spa_sync_bplist; 1494 blkptr_t blk; 1495 uint64_t itor = 0; 1496 1497 VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset, 1498 spa->spa_sync_bplist_obj)); 1499 1500 while (bplist_iterate(bpl, &itor, &blk) == 0) { 1501 zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED); 1502 if (dump_opt['b'] >= 4) { 1503 char blkbuf[BP_SPRINTF_LEN]; 1504 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &blk); 1505 (void) printf("[%s] %s\n", 1506 "deferred free", blkbuf); 1507 } 1508 } 1509 1510 bplist_close(bpl); 1511 } 1512 1513 /* 1514 * Now traverse the pool. If we're reading all data to verify 1515 * checksums, do a scrubbing read so that we validate all copies. 1516 */ 1517 flags = ZIO_FLAG_CANFAIL; 1518 if (advance & ADVANCE_DATA) 1519 flags |= ZIO_FLAG_SCRUB; 1520 th = traverse_init(spa, zdb_blkptr_cb, &zcb, advance, flags); 1521 th->th_noread = zdb_noread; 1522 1523 traverse_add_pool(th, 0, spa_first_txg(spa)); 1524 1525 while (traverse_more(th) == EAGAIN) 1526 continue; 1527 1528 traverse_fini(th); 1529 1530 if (zcb.zcb_haderrors) { 1531 (void) printf("\nError counts:\n\n"); 1532 (void) printf("\t%5s %s\n", "errno", "count"); 1533 for (e = 0; e < 256; e++) { 1534 if (zcb.zcb_errors[e] != 0) { 1535 (void) printf("\t%5d %llu\n", 1536 e, (u_longlong_t)zcb.zcb_errors[e]); 1537 } 1538 } 1539 } 1540 1541 /* 1542 * Report any leaked segments. 1543 */ 1544 if (!dump_opt['L']) 1545 zdb_space_map_vacate(spa); 1546 1547 if (dump_opt['L']) 1548 (void) printf("\n\n *** Live pool traversal; " 1549 "block counts are only approximate ***\n\n"); 1550 1551 alloc = spa_get_alloc(spa); 1552 space = spa_get_space(spa); 1553 1554 tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL]; 1555 1556 if (tzb->zb_asize == alloc) { 1557 (void) printf("\n\tNo leaks (block sum matches space" 1558 " maps exactly)\n"); 1559 } else { 1560 (void) printf("block traversal size %llu != alloc %llu " 1561 "(leaked %lld)\n", 1562 (u_longlong_t)tzb->zb_asize, 1563 (u_longlong_t)alloc, 1564 (u_longlong_t)(alloc - tzb->zb_asize)); 1565 leaks = 1; 1566 } 1567 1568 if (tzb->zb_count == 0) 1569 return (2); 1570 1571 (void) printf("\n"); 1572 (void) printf("\tbp count: %10llu\n", 1573 (u_longlong_t)tzb->zb_count); 1574 (void) printf("\tbp logical: %10llu\t avg: %6llu\n", 1575 (u_longlong_t)tzb->zb_lsize, 1576 (u_longlong_t)(tzb->zb_lsize / tzb->zb_count)); 1577 (void) printf("\tbp physical: %10llu\t avg:" 1578 " %6llu\tcompression: %6.2f\n", 1579 (u_longlong_t)tzb->zb_psize, 1580 (u_longlong_t)(tzb->zb_psize / tzb->zb_count), 1581 (double)tzb->zb_lsize / tzb->zb_psize); 1582 (void) printf("\tbp allocated: %10llu\t avg:" 1583 " %6llu\tcompression: %6.2f\n", 1584 (u_longlong_t)tzb->zb_asize, 1585 (u_longlong_t)(tzb->zb_asize / tzb->zb_count), 1586 (double)tzb->zb_lsize / tzb->zb_asize); 1587 (void) printf("\tSPA allocated: %10llu\tused: %5.2f%%\n", 1588 (u_longlong_t)alloc, 100.0 * alloc / space); 1589 1590 if (dump_opt['b'] >= 2) { 1591 int l, t, level; 1592 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" 1593 "\t avg\t comp\t%%Total\tType\n"); 1594 1595 for (t = 0; t <= DMU_OT_NUMTYPES; t++) { 1596 char csize[6], lsize[6], psize[6], asize[6], avg[6]; 1597 char *typename; 1598 1599 typename = t == DMU_OT_DEFERRED ? "deferred free" : 1600 t == DMU_OT_TOTAL ? "Total" : dmu_ot[t].ot_name; 1601 1602 if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) { 1603 (void) printf("%6s\t%5s\t%5s\t%5s" 1604 "\t%5s\t%5s\t%6s\t%s\n", 1605 "-", 1606 "-", 1607 "-", 1608 "-", 1609 "-", 1610 "-", 1611 "-", 1612 typename); 1613 continue; 1614 } 1615 1616 for (l = ZB_TOTAL - 1; l >= -1; l--) { 1617 level = (l == -1 ? ZB_TOTAL : l); 1618 zb = &zcb.zcb_type[level][t]; 1619 1620 if (zb->zb_asize == 0) 1621 continue; 1622 1623 if (dump_opt['b'] < 3 && level != ZB_TOTAL) 1624 continue; 1625 1626 if (level == 0 && zb->zb_asize == 1627 zcb.zcb_type[ZB_TOTAL][t].zb_asize) 1628 continue; 1629 1630 nicenum(zb->zb_count, csize); 1631 nicenum(zb->zb_lsize, lsize); 1632 nicenum(zb->zb_psize, psize); 1633 nicenum(zb->zb_asize, asize); 1634 nicenum(zb->zb_asize / zb->zb_count, avg); 1635 1636 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s" 1637 "\t%5.2f\t%6.2f\t", 1638 csize, lsize, psize, asize, avg, 1639 (double)zb->zb_lsize / zb->zb_psize, 1640 100.0 * zb->zb_asize / tzb->zb_asize); 1641 1642 if (level == ZB_TOTAL) 1643 (void) printf("%s\n", typename); 1644 else 1645 (void) printf(" L%d %s\n", 1646 level, typename); 1647 } 1648 } 1649 } 1650 1651 (void) printf("\n"); 1652 1653 if (leaks) 1654 return (2); 1655 1656 if (zcb.zcb_haderrors) 1657 return (3); 1658 1659 return (0); 1660 } 1661 1662 static void 1663 dump_zpool(spa_t *spa) 1664 { 1665 dsl_pool_t *dp = spa_get_dsl(spa); 1666 int rc = 0; 1667 1668 if (dump_opt['u']) 1669 dump_uberblock(&spa->spa_uberblock); 1670 1671 if (dump_opt['d'] || dump_opt['i']) { 1672 dump_dir(dp->dp_meta_objset); 1673 if (dump_opt['d'] >= 3) { 1674 dump_bplist(dp->dp_meta_objset, 1675 spa->spa_sync_bplist_obj, "Deferred frees"); 1676 dump_dtl(spa->spa_root_vdev, 0); 1677 dump_metaslabs(spa); 1678 } 1679 dmu_objset_find(spa->spa_name, dump_one_dir, NULL, 1680 DS_FIND_SNAPSHOTS); 1681 } 1682 1683 if (dump_opt['b'] || dump_opt['c']) 1684 rc = dump_block_stats(spa); 1685 1686 if (dump_opt['s']) 1687 show_pool_stats(spa); 1688 1689 if (rc != 0) 1690 exit(rc); 1691 } 1692 1693 int 1694 main(int argc, char **argv) 1695 { 1696 int i, c; 1697 struct rlimit rl = { 1024, 1024 }; 1698 spa_t *spa; 1699 objset_t *os = NULL; 1700 char *endstr; 1701 int dump_all = 1; 1702 int verbose = 0; 1703 int error; 1704 int flag, set; 1705 vdev_knob_t *vk; 1706 1707 (void) setrlimit(RLIMIT_NOFILE, &rl); 1708 1709 dprintf_setup(&argc, argv); 1710 1711 while ((c = getopt(argc, argv, "udibcsvCLO:B:Ul")) != -1) { 1712 switch (c) { 1713 case 'u': 1714 case 'd': 1715 case 'i': 1716 case 'b': 1717 case 'c': 1718 case 's': 1719 case 'C': 1720 case 'l': 1721 dump_opt[c]++; 1722 dump_all = 0; 1723 break; 1724 case 'L': 1725 dump_opt[c]++; 1726 break; 1727 case 'O': 1728 endstr = optarg; 1729 if (endstr[0] == '!') { 1730 endstr++; 1731 set = 0; 1732 } else { 1733 set = 1; 1734 } 1735 if (strcmp(endstr, "post") == 0) { 1736 flag = ADVANCE_PRE; 1737 set = !set; 1738 } else if (strcmp(endstr, "pre") == 0) { 1739 flag = ADVANCE_PRE; 1740 } else if (strcmp(endstr, "prune") == 0) { 1741 flag = ADVANCE_PRUNE; 1742 } else if (strcmp(endstr, "data") == 0) { 1743 flag = ADVANCE_DATA; 1744 } else if (strcmp(endstr, "holes") == 0) { 1745 flag = ADVANCE_HOLES; 1746 } else { 1747 usage(); 1748 } 1749 if (set) 1750 zdb_advance |= flag; 1751 else 1752 zdb_advance &= ~flag; 1753 break; 1754 case 'B': 1755 endstr = optarg - 1; 1756 zdb_noread.zb_objset = strtoull(endstr + 1, &endstr, 0); 1757 zdb_noread.zb_object = strtoull(endstr + 1, &endstr, 0); 1758 zdb_noread.zb_level = strtol(endstr + 1, &endstr, 0); 1759 zdb_noread.zb_blkid = strtoull(endstr + 1, &endstr, 16); 1760 (void) printf("simulating bad block " 1761 "<%llu, %llu, %lld, %llx>\n", 1762 (u_longlong_t)zdb_noread.zb_objset, 1763 (u_longlong_t)zdb_noread.zb_object, 1764 (u_longlong_t)zdb_noread.zb_level, 1765 (u_longlong_t)zdb_noread.zb_blkid); 1766 break; 1767 case 'v': 1768 verbose++; 1769 break; 1770 case 'U': 1771 spa_config_dir = "/tmp"; 1772 break; 1773 default: 1774 usage(); 1775 break; 1776 } 1777 } 1778 1779 kernel_init(FREAD); 1780 1781 /* 1782 * Disable vdev caching. If we don't do this, live pool traversal 1783 * won't make progress because it will never see disk updates. 1784 */ 1785 for (vk = vdev_knob_next(NULL); vk != NULL; vk = vdev_knob_next(vk)) { 1786 if (strcmp(vk->vk_name, "cache_size") == 0) 1787 vk->vk_default = 0; 1788 } 1789 1790 for (c = 0; c < 256; c++) { 1791 if (dump_all && c != 'L' && c != 'l') 1792 dump_opt[c] = 1; 1793 if (dump_opt[c]) 1794 dump_opt[c] += verbose; 1795 } 1796 1797 argc -= optind; 1798 argv += optind; 1799 1800 if (argc < 1) { 1801 if (dump_opt['C']) { 1802 dump_config(NULL); 1803 return (0); 1804 } 1805 usage(); 1806 } 1807 1808 if (dump_opt['l']) { 1809 dump_label(argv[0]); 1810 return (0); 1811 } 1812 1813 if (dump_opt['C']) 1814 dump_config(argv[0]); 1815 1816 if (strchr(argv[0], '/') != NULL) { 1817 error = dmu_objset_open(argv[0], DMU_OST_ANY, 1818 DS_MODE_STANDARD | DS_MODE_READONLY, &os); 1819 } else { 1820 error = spa_open(argv[0], &spa, FTAG); 1821 } 1822 1823 if (error) 1824 fatal("can't open %s: error %d", argv[0], error); 1825 1826 argv++; 1827 if (--argc > 0) { 1828 zopt_objects = argc; 1829 zopt_object = calloc(zopt_objects, sizeof (uint64_t)); 1830 for (i = 0; i < zopt_objects; i++) { 1831 errno = 0; 1832 zopt_object[i] = strtoull(argv[i], NULL, 0); 1833 if (zopt_object[i] == 0 && errno != 0) 1834 fatal("bad object number %s: %s", 1835 argv[i], strerror(errno)); 1836 } 1837 } 1838 1839 if (os != NULL) { 1840 dump_dir(os); 1841 dmu_objset_close(os); 1842 } else { 1843 dump_zpool(spa); 1844 spa_close(spa, FTAG); 1845 } 1846 1847 kernel_fini(); 1848 1849 return (0); 1850 } 1851