1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <sys/zfs_context.h> 32 #include <sys/spa.h> 33 #include <sys/spa_impl.h> 34 #include <sys/dmu.h> 35 #include <sys/zap.h> 36 #include <sys/fs/zfs.h> 37 #include <sys/zfs_znode.h> 38 #include <sys/vdev.h> 39 #include <sys/vdev_impl.h> 40 #include <sys/metaslab_impl.h> 41 #include <sys/dmu_objset.h> 42 #include <sys/dsl_dir.h> 43 #include <sys/dsl_dataset.h> 44 #include <sys/dsl_pool.h> 45 #include <sys/dbuf.h> 46 #include <sys/zil.h> 47 #include <sys/zil_impl.h> 48 #include <sys/stat.h> 49 #include <sys/resource.h> 50 #include <sys/dmu_traverse.h> 51 #include <sys/zio_checksum.h> 52 #include <sys/zio_compress.h> 53 54 const char cmdname[] = "zdb"; 55 uint8_t dump_opt[256]; 56 57 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); 58 59 extern void dump_intent_log(zilog_t *); 60 uint64_t *zopt_object = NULL; 61 int zopt_objects = 0; 62 int zdb_advance = ADVANCE_PRE; 63 zbookmark_t zdb_noread = { 0, 0, ZB_NO_LEVEL, 0 }; 64 65 /* 66 * These libumem hooks provide a reasonable set of defaults for the allocator's 67 * debugging facilities. 68 */ 69 const char * 70 _umem_debug_init() 71 { 72 return ("default,verbose"); /* $UMEM_DEBUG setting */ 73 } 74 75 const char * 76 _umem_logging_init(void) 77 { 78 return ("fail,contents"); /* $UMEM_LOGGING setting */ 79 } 80 81 static void 82 usage(void) 83 { 84 (void) fprintf(stderr, 85 "Usage: %s [-udibcsvLU] [-O order] [-B os:obj:level:blkid] " 86 "dataset [object...]\n" 87 " %s -C [pool]\n" 88 " %s -l dev\n", 89 cmdname, cmdname, cmdname); 90 91 (void) fprintf(stderr, " -u uberblock\n"); 92 (void) fprintf(stderr, " -d datasets\n"); 93 (void) fprintf(stderr, " -C cached pool configuration\n"); 94 (void) fprintf(stderr, " -i intent logs\n"); 95 (void) fprintf(stderr, " -b block statistics\n"); 96 (void) fprintf(stderr, " -c checksum all data blocks\n"); 97 (void) fprintf(stderr, " -s report stats on zdb's I/O\n"); 98 (void) fprintf(stderr, " -v verbose (applies to all others)\n"); 99 (void) fprintf(stderr, " -l dump label contents\n"); 100 (void) fprintf(stderr, " -L live pool (allows some errors)\n"); 101 (void) fprintf(stderr, " -O [!]<pre|post|prune|data|holes> " 102 "visitation order\n"); 103 (void) fprintf(stderr, " -U use zpool.cache in /tmp\n"); 104 (void) fprintf(stderr, " -B objset:object:level:blkid -- " 105 "simulate bad block\n"); 106 (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " 107 "to make only that option verbose\n"); 108 (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); 109 exit(1); 110 } 111 112 static void 113 fatal(const char *fmt, ...) 114 { 115 va_list ap; 116 117 va_start(ap, fmt); 118 (void) fprintf(stderr, "%s: ", cmdname); 119 (void) vfprintf(stderr, fmt, ap); 120 va_end(ap); 121 (void) fprintf(stderr, "\n"); 122 123 exit(1); 124 } 125 126 static void 127 dump_nvlist(nvlist_t *list, int indent) 128 { 129 nvpair_t *elem = NULL; 130 131 while ((elem = nvlist_next_nvpair(list, elem)) != NULL) { 132 switch (nvpair_type(elem)) { 133 case DATA_TYPE_STRING: 134 { 135 char *value; 136 137 VERIFY(nvpair_value_string(elem, &value) == 0); 138 (void) printf("%*s%s='%s'\n", indent, "", 139 nvpair_name(elem), value); 140 } 141 break; 142 143 case DATA_TYPE_UINT64: 144 { 145 uint64_t value; 146 147 VERIFY(nvpair_value_uint64(elem, &value) == 0); 148 (void) printf("%*s%s=%llu\n", indent, "", 149 nvpair_name(elem), (u_longlong_t)value); 150 } 151 break; 152 153 case DATA_TYPE_NVLIST: 154 { 155 nvlist_t *value; 156 157 VERIFY(nvpair_value_nvlist(elem, &value) == 0); 158 (void) printf("%*s%s\n", indent, "", 159 nvpair_name(elem)); 160 dump_nvlist(value, indent + 4); 161 } 162 break; 163 164 case DATA_TYPE_NVLIST_ARRAY: 165 { 166 nvlist_t **value; 167 uint_t c, count; 168 169 VERIFY(nvpair_value_nvlist_array(elem, &value, 170 &count) == 0); 171 172 for (c = 0; c < count; c++) { 173 (void) printf("%*s%s[%u]\n", indent, "", 174 nvpair_name(elem), c); 175 dump_nvlist(value[c], indent + 8); 176 } 177 } 178 break; 179 180 default: 181 182 (void) printf("bad config type %d for %s\n", 183 nvpair_type(elem), nvpair_name(elem)); 184 } 185 } 186 } 187 188 /* ARGSUSED */ 189 static void 190 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) 191 { 192 nvlist_t *nv; 193 size_t nvsize = *(uint64_t *)data; 194 char *packed = umem_alloc(nvsize, UMEM_NOFAIL); 195 196 dmu_read(os, object, 0, nvsize, packed); 197 198 VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0); 199 200 umem_free(packed, nvsize); 201 202 dump_nvlist(nv, 8); 203 204 nvlist_free(nv); 205 } 206 207 const char dump_zap_stars[] = "****************************************"; 208 const int dump_zap_width = sizeof (dump_zap_stars) - 1; 209 210 static void 211 dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE]) 212 { 213 int i; 214 int minidx = ZAP_HISTOGRAM_SIZE - 1; 215 int maxidx = 0; 216 uint64_t max = 0; 217 218 for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) { 219 if (histo[i] > max) 220 max = histo[i]; 221 if (histo[i] > 0 && i > maxidx) 222 maxidx = i; 223 if (histo[i] > 0 && i < minidx) 224 minidx = i; 225 } 226 227 if (max < dump_zap_width) 228 max = dump_zap_width; 229 230 for (i = minidx; i <= maxidx; i++) 231 (void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i], 232 &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]); 233 } 234 235 static void 236 dump_zap_stats(objset_t *os, uint64_t object) 237 { 238 int error; 239 zap_stats_t zs; 240 241 error = zap_get_stats(os, object, &zs); 242 if (error) 243 return; 244 245 if (zs.zs_ptrtbl_len == 0) { 246 ASSERT(zs.zs_num_blocks == 1); 247 (void) printf("\tmicrozap: %llu bytes, %llu entries\n", 248 (u_longlong_t)zs.zs_blocksize, 249 (u_longlong_t)zs.zs_num_entries); 250 return; 251 } 252 253 (void) printf("\tFat ZAP stats:\n"); 254 (void) printf("\t\tPointer table: %llu elements\n", 255 (u_longlong_t)zs.zs_ptrtbl_len); 256 (void) printf("\t\tZAP entries: %llu\n", 257 (u_longlong_t)zs.zs_num_entries); 258 (void) printf("\t\tLeaf blocks: %llu\n", 259 (u_longlong_t)zs.zs_num_leafs); 260 (void) printf("\t\tTotal blocks: %llu\n", 261 (u_longlong_t)zs.zs_num_blocks); 262 (void) printf("\t\tOversize blocks: %llu\n", 263 (u_longlong_t)zs.zs_num_blocks_large); 264 265 (void) printf("\t\tLeafs with 2^n pointers:\n"); 266 dump_zap_histogram(zs.zs_leafs_with_2n_pointers); 267 268 (void) printf("\t\tLeafs with n chained:\n"); 269 dump_zap_histogram(zs.zs_leafs_with_n_chained); 270 271 (void) printf("\t\tBlocks with n*5 entries:\n"); 272 dump_zap_histogram(zs.zs_blocks_with_n5_entries); 273 274 (void) printf("\t\tBlocks n/10 full:\n"); 275 dump_zap_histogram(zs.zs_blocks_n_tenths_full); 276 277 (void) printf("\t\tEntries with n chunks:\n"); 278 dump_zap_histogram(zs.zs_entries_using_n_chunks); 279 280 (void) printf("\t\tBuckets with n entries:\n"); 281 dump_zap_histogram(zs.zs_buckets_with_n_entries); 282 } 283 284 /*ARGSUSED*/ 285 static void 286 dump_none(objset_t *os, uint64_t object, void *data, size_t size) 287 { 288 } 289 290 /*ARGSUSED*/ 291 void 292 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size) 293 { 294 } 295 296 /*ARGSUSED*/ 297 static void 298 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size) 299 { 300 } 301 302 /*ARGSUSED*/ 303 static void 304 dump_zap(objset_t *os, uint64_t object, void *data, size_t size) 305 { 306 zap_cursor_t zc; 307 zap_attribute_t attr; 308 void *prop; 309 int i; 310 311 dump_zap_stats(os, object); 312 (void) printf("\n"); 313 314 for (zap_cursor_init(&zc, os, object); 315 zap_cursor_retrieve(&zc, &attr) == 0; 316 zap_cursor_advance(&zc)) { 317 (void) printf("\t\t%s = ", attr.za_name); 318 if (attr.za_num_integers == 0) { 319 (void) printf("\n"); 320 continue; 321 } 322 prop = umem_zalloc(attr.za_num_integers * 323 attr.za_integer_length, UMEM_NOFAIL); 324 (void) zap_lookup(os, object, attr.za_name, 325 attr.za_integer_length, attr.za_num_integers, prop); 326 if (attr.za_integer_length == 1) { 327 (void) printf("%s", (char *)prop); 328 } else { 329 for (i = 0; i < attr.za_num_integers; i++) { 330 switch (attr.za_integer_length) { 331 case 2: 332 (void) printf("%u ", 333 ((uint16_t *)prop)[i]); 334 break; 335 case 4: 336 (void) printf("%u ", 337 ((uint32_t *)prop)[i]); 338 break; 339 case 8: 340 (void) printf("%lld ", 341 (u_longlong_t)((int64_t *)prop)[i]); 342 break; 343 } 344 } 345 } 346 (void) printf("\n"); 347 umem_free(prop, attr.za_num_integers * attr.za_integer_length); 348 } 349 } 350 351 static void 352 dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm) 353 { 354 uint64_t alloc, offset, entry; 355 int mapshift = sm->sm_shift; 356 uint64_t mapstart = sm->sm_start; 357 char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID" }; 358 359 if (smo->smo_object == 0) 360 return; 361 362 /* 363 * Print out the freelist entries in both encoded and decoded form. 364 */ 365 alloc = 0; 366 for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) { 367 dmu_read(os, smo->smo_object, offset, sizeof (entry), &entry); 368 if (SM_DEBUG_DECODE(entry)) { 369 (void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n", 370 (u_longlong_t)(offset / sizeof (entry)), 371 ddata[SM_DEBUG_ACTION_DECODE(entry)], 372 SM_DEBUG_TXG_DECODE(entry), 373 SM_DEBUG_SYNCPASS_DECODE(entry)); 374 } else { 375 (void) printf("\t\t[%4llu] %c range:" 376 " %08llx-%08llx size: %06llx\n", 377 (u_longlong_t)(offset / sizeof (entry)), 378 SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F', 379 (SM_OFFSET_DECODE(entry) << mapshift) + mapstart, 380 (SM_OFFSET_DECODE(entry) << mapshift) + mapstart + 381 (SM_RUN_DECODE(entry) << mapshift), 382 (SM_RUN_DECODE(entry) << mapshift)); 383 if (SM_TYPE_DECODE(entry) == SM_ALLOC) 384 alloc += SM_RUN_DECODE(entry) << mapshift; 385 else 386 alloc -= SM_RUN_DECODE(entry) << mapshift; 387 } 388 } 389 if (alloc != smo->smo_alloc) { 390 (void) printf("space_map_object alloc (%llu) INCONSISTENT " 391 "with space map summary (%llu)\n", 392 (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc); 393 } 394 } 395 396 static void 397 dump_metaslab(metaslab_t *msp) 398 { 399 char freebuf[5]; 400 space_map_obj_t *smo = msp->ms_smo; 401 vdev_t *vd = msp->ms_group->mg_vd; 402 spa_t *spa = vd->vdev_spa; 403 404 nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf); 405 406 if (dump_opt['d'] <= 5) { 407 (void) printf("\t%10llx %10llu %5s\n", 408 (u_longlong_t)msp->ms_map.sm_start, 409 (u_longlong_t)smo->smo_object, 410 freebuf); 411 return; 412 } 413 414 (void) printf( 415 "\tvdev %llu offset %08llx spacemap %4llu free %5s\n", 416 (u_longlong_t)vd->vdev_id, (u_longlong_t)msp->ms_map.sm_start, 417 (u_longlong_t)smo->smo_object, freebuf); 418 419 ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift)); 420 421 dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map); 422 } 423 424 static void 425 dump_metaslabs(spa_t *spa) 426 { 427 vdev_t *rvd = spa->spa_root_vdev; 428 vdev_t *vd; 429 int c, m; 430 431 (void) printf("\nMetaslabs:\n"); 432 433 for (c = 0; c < rvd->vdev_children; c++) { 434 vd = rvd->vdev_child[c]; 435 436 spa_config_enter(spa, RW_READER); 437 (void) printf("\n vdev %llu = %s\n\n", 438 (u_longlong_t)vd->vdev_id, vdev_description(vd)); 439 spa_config_exit(spa); 440 441 if (dump_opt['d'] <= 5) { 442 (void) printf("\t%10s %10s %5s\n", 443 "offset", "spacemap", "free"); 444 (void) printf("\t%10s %10s %5s\n", 445 "------", "--------", "----"); 446 } 447 for (m = 0; m < vd->vdev_ms_count; m++) 448 dump_metaslab(vd->vdev_ms[m]); 449 (void) printf("\n"); 450 } 451 } 452 453 static void 454 dump_dtl(vdev_t *vd, int indent) 455 { 456 avl_tree_t *t = &vd->vdev_dtl_map.sm_root; 457 spa_t *spa = vd->vdev_spa; 458 space_seg_t *ss; 459 vdev_t *pvd; 460 int c; 461 462 if (indent == 0) 463 (void) printf("\nDirty time logs:\n\n"); 464 465 spa_config_enter(spa, RW_READER); 466 (void) printf("\t%*s%s\n", indent, "", vdev_description(vd)); 467 spa_config_exit(spa); 468 469 for (ss = avl_first(t); ss; ss = AVL_NEXT(t, ss)) { 470 /* 471 * Everything in this DTL must appear in all parent DTL unions. 472 */ 473 for (pvd = vd; pvd; pvd = pvd->vdev_parent) 474 ASSERT(vdev_dtl_contains(&pvd->vdev_dtl_map, 475 ss->ss_start, ss->ss_end - ss->ss_start)); 476 (void) printf("\t%*soutage [%llu,%llu] length %llu\n", 477 indent, "", 478 (u_longlong_t)ss->ss_start, 479 (u_longlong_t)ss->ss_end - 1, 480 (u_longlong_t)ss->ss_end - ss->ss_start); 481 } 482 483 (void) printf("\n"); 484 485 if (dump_opt['d'] > 5 && vd->vdev_children == 0) { 486 dump_spacemap(vd->vdev_spa->spa_meta_objset, &vd->vdev_dtl, 487 &vd->vdev_dtl_map); 488 (void) printf("\n"); 489 } 490 491 for (c = 0; c < vd->vdev_children; c++) 492 dump_dtl(vd->vdev_child[c], indent + 4); 493 } 494 495 /*ARGSUSED*/ 496 static void 497 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size) 498 { 499 } 500 501 static uint64_t 502 blkid2offset(dnode_phys_t *dnp, int level, uint64_t blkid) 503 { 504 if (level < 0) 505 return (blkid); 506 507 return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) * 508 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); 509 } 510 511 /* ARGSUSED */ 512 static int 513 zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a) 514 { 515 zbookmark_t *zb = &bc->bc_bookmark; 516 blkptr_t *bp = &bc->bc_blkptr; 517 dva_t *dva = &bp->blk_dva[0]; 518 void *data = bc->bc_data; 519 dnode_phys_t *dnp = bc->bc_dnode; 520 char buffer[300]; 521 int l; 522 523 if (bc->bc_errno) { 524 (void) sprintf(buffer, 525 "Error %d reading <%llu, %llu, %d, %llu>: ", 526 bc->bc_errno, 527 (u_longlong_t)zb->zb_objset, 528 (u_longlong_t)zb->zb_object, 529 zb->zb_level, 530 (u_longlong_t)zb->zb_blkid); 531 goto out; 532 } 533 534 if (zb->zb_level == -1) { 535 ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); 536 ASSERT3U(BP_GET_LEVEL(bp), ==, 0); 537 } else { 538 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); 539 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); 540 } 541 542 if (zb->zb_level > 0) { 543 uint64_t fill = 0; 544 blkptr_t *bpx, *bpend; 545 546 for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx); 547 bpx < bpend; bpx++) { 548 if (bpx->blk_birth != 0) { 549 ASSERT(bpx->blk_fill > 0); 550 fill += bpx->blk_fill; 551 } else { 552 ASSERT(bpx->blk_fill == 0); 553 } 554 } 555 ASSERT3U(fill, ==, bp->blk_fill); 556 } 557 558 if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) { 559 uint64_t fill = 0; 560 dnode_phys_t *dnx, *dnend; 561 562 for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT); 563 dnx < dnend; dnx++) { 564 if (dnx->dn_type != DMU_OT_NONE) 565 fill++; 566 } 567 ASSERT3U(fill, ==, bp->blk_fill); 568 } 569 570 (void) sprintf(buffer, "%16llx ", 571 (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid)); 572 573 ASSERT(zb->zb_level >= 0); 574 575 for (l = dnp->dn_nlevels - 1; l >= -1; l--) { 576 if (l == zb->zb_level) { 577 (void) sprintf(buffer + strlen(buffer), "L%x", 578 zb->zb_level); 579 } else { 580 (void) sprintf(buffer + strlen(buffer), " "); 581 } 582 } 583 584 out: 585 if (bp->blk_birth == 0) { 586 (void) sprintf(buffer + strlen(buffer), "<hole>"); 587 (void) printf("%s\n", buffer); 588 } else { 589 // XXBP - Need to print number of active BPs here 590 (void) sprintf(buffer + strlen(buffer), 591 "vdev=%llu off=%llx %llxL/%llxP/%llxA F=%llu B=%llu", 592 (u_longlong_t)DVA_GET_VDEV(dva), 593 (u_longlong_t)DVA_GET_OFFSET(dva), 594 (u_longlong_t)BP_GET_LSIZE(bp), 595 (u_longlong_t)BP_GET_PSIZE(bp), 596 (u_longlong_t)DVA_GET_ASIZE(dva), 597 (u_longlong_t)bp->blk_fill, 598 (u_longlong_t)bp->blk_birth); 599 600 (void) printf("%s\n", buffer); 601 } 602 603 return (bc->bc_errno ? ERESTART : 0); 604 } 605 606 /*ARGSUSED*/ 607 static void 608 dump_indirect(objset_t *os, uint64_t object, void *data, size_t size) 609 { 610 traverse_handle_t *th; 611 uint64_t objset = dmu_objset_id(os); 612 int advance = zdb_advance; 613 614 (void) printf("Indirect blocks:\n"); 615 616 if (object == 0) 617 advance |= ADVANCE_DATA; 618 619 th = traverse_init(dmu_objset_spa(os), zdb_indirect_cb, NULL, advance, 620 ZIO_FLAG_CANFAIL); 621 th->th_noread = zdb_noread; 622 623 traverse_add_dnode(th, 0, -1ULL, objset, object); 624 625 while (traverse_more(th) == EAGAIN) 626 continue; 627 628 (void) printf("\n"); 629 630 traverse_fini(th); 631 } 632 633 /*ARGSUSED*/ 634 static void 635 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size) 636 { 637 dsl_dir_phys_t *dd = data; 638 time_t crtime; 639 char used[6], compressed[6], uncompressed[6], quota[6], resv[6]; 640 641 if (dd == NULL) 642 return; 643 644 ASSERT(size == sizeof (*dd)); 645 646 crtime = dd->dd_creation_time; 647 nicenum(dd->dd_used_bytes, used); 648 nicenum(dd->dd_compressed_bytes, compressed); 649 nicenum(dd->dd_uncompressed_bytes, uncompressed); 650 nicenum(dd->dd_quota, quota); 651 nicenum(dd->dd_reserved, resv); 652 653 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 654 (void) printf("\t\thead_dataset_obj = %llu\n", 655 (u_longlong_t)dd->dd_head_dataset_obj); 656 (void) printf("\t\tparent_dir_obj = %llu\n", 657 (u_longlong_t)dd->dd_parent_obj); 658 (void) printf("\t\tclone_parent_obj = %llu\n", 659 (u_longlong_t)dd->dd_clone_parent_obj); 660 (void) printf("\t\tchild_dir_zapobj = %llu\n", 661 (u_longlong_t)dd->dd_child_dir_zapobj); 662 (void) printf("\t\tused_bytes = %s\n", used); 663 (void) printf("\t\tcompressed_bytes = %s\n", compressed); 664 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); 665 (void) printf("\t\tquota = %s\n", quota); 666 (void) printf("\t\treserved = %s\n", resv); 667 (void) printf("\t\tprops_zapobj = %llu\n", 668 (u_longlong_t)dd->dd_props_zapobj); 669 } 670 671 /*ARGSUSED*/ 672 static void 673 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) 674 { 675 dsl_dataset_phys_t *ds = data; 676 time_t crtime; 677 char used[6], compressed[6], uncompressed[6], unique[6], blkbuf[300]; 678 679 if (ds == NULL) 680 return; 681 682 ASSERT(size == sizeof (*ds)); 683 crtime = ds->ds_creation_time; 684 nicenum(ds->ds_used_bytes, used); 685 nicenum(ds->ds_compressed_bytes, compressed); 686 nicenum(ds->ds_uncompressed_bytes, uncompressed); 687 nicenum(ds->ds_unique_bytes, unique); 688 sprintf_blkptr(blkbuf, &ds->ds_bp); 689 690 (void) printf("\t\tdataset_obj = %llu\n", 691 (u_longlong_t)ds->ds_dir_obj); 692 (void) printf("\t\tprev_snap_obj = %llu\n", 693 (u_longlong_t)ds->ds_prev_snap_obj); 694 (void) printf("\t\tprev_snap_txg = %llu\n", 695 (u_longlong_t)ds->ds_prev_snap_txg); 696 (void) printf("\t\tnext_snap_obj = %llu\n", 697 (u_longlong_t)ds->ds_next_snap_obj); 698 (void) printf("\t\tsnapnames_zapobj = %llu\n", 699 (u_longlong_t)ds->ds_snapnames_zapobj); 700 (void) printf("\t\tnum_children = %llu\n", 701 (u_longlong_t)ds->ds_num_children); 702 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 703 (void) printf("\t\tcreation_txg = %llu\n", 704 (u_longlong_t)ds->ds_creation_txg); 705 (void) printf("\t\tdeadlist_obj = %llu\n", 706 (u_longlong_t)ds->ds_deadlist_obj); 707 (void) printf("\t\tused_bytes = %s\n", used); 708 (void) printf("\t\tcompressed_bytes = %s\n", compressed); 709 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); 710 (void) printf("\t\tunique = %s\n", unique); 711 (void) printf("\t\tfsid_guid = %llu\n", 712 (u_longlong_t)ds->ds_fsid_guid); 713 (void) printf("\t\tguid = %llu\n", 714 (u_longlong_t)ds->ds_guid); 715 (void) printf("\t\trestoring = %llu\n", 716 (u_longlong_t)ds->ds_restoring); 717 (void) printf("\t\tbp = %s\n", blkbuf); 718 } 719 720 static void 721 dump_bplist(objset_t *mos, uint64_t object, char *name) 722 { 723 bplist_t bpl = { 0 }; 724 blkptr_t blk, *bp = &blk; 725 uint64_t itor = 0; 726 char numbuf[6]; 727 728 if (dump_opt['d'] < 3) 729 return; 730 731 bplist_open(&bpl, mos, object); 732 if (bplist_empty(&bpl)) { 733 bplist_close(&bpl); 734 return; 735 } 736 737 nicenum(bpl.bpl_phys->bpl_bytes, numbuf); 738 739 (void) printf("\n %s: %llu entries, %s\n", 740 name, (u_longlong_t)bpl.bpl_phys->bpl_entries, numbuf); 741 742 if (dump_opt['d'] < 5) { 743 bplist_close(&bpl); 744 return; 745 } 746 747 (void) printf("\n"); 748 749 while (bplist_iterate(&bpl, &itor, bp) == 0) { 750 ASSERT(bp->blk_birth != 0); 751 // XXBP - Do we want to see all DVAs, or just one? 752 (void) printf("\tItem %3llu: vdev=%llu off=%llx " 753 "%llxL/%llxP/%llxA F=%llu B=%llu\n", 754 (u_longlong_t)itor - 1, 755 (u_longlong_t)DVA_GET_VDEV(&bp->blk_dva[0]), 756 (u_longlong_t)DVA_GET_OFFSET(&bp->blk_dva[0]), 757 (u_longlong_t)BP_GET_LSIZE(bp), 758 (u_longlong_t)BP_GET_PSIZE(bp), 759 (u_longlong_t)DVA_GET_ASIZE(&bp->blk_dva[0]), 760 (u_longlong_t)bp->blk_fill, 761 (u_longlong_t)bp->blk_birth); 762 } 763 764 bplist_close(&bpl); 765 } 766 767 static char * 768 znode_path(objset_t *os, uint64_t object, char *pathbuf, size_t size) 769 { 770 dmu_buf_t *db; 771 dmu_object_info_t doi; 772 znode_phys_t *zp; 773 uint64_t parent = 0; 774 size_t complen; 775 char component[MAXNAMELEN + 1]; 776 char *path; 777 778 path = pathbuf + size; 779 *--path = '\0'; 780 781 for (;;) { 782 db = dmu_bonus_hold(os, object); 783 if (db == NULL) 784 break; 785 786 dmu_buf_read(db); 787 dmu_object_info_from_db(db, &doi); 788 zp = db->db_data; 789 parent = zp->zp_parent; 790 dmu_buf_rele(db); 791 792 if (doi.doi_bonus_type != DMU_OT_ZNODE) 793 break; 794 795 if (parent == object) { 796 if (path[0] != '/') 797 *--path = '/'; 798 return (path); 799 } 800 801 if (zap_value_search(os, parent, object, component) != 0) 802 break; 803 804 complen = strlen(component); 805 path -= complen; 806 bcopy(component, path, complen); 807 *--path = '/'; 808 809 object = parent; 810 } 811 812 (void) sprintf(component, "???<object#%llu>", (u_longlong_t)object); 813 814 complen = strlen(component); 815 path -= complen; 816 bcopy(component, path, complen); 817 818 return (path); 819 } 820 821 /*ARGSUSED*/ 822 static void 823 dump_znode(objset_t *os, uint64_t object, void *data, size_t size) 824 { 825 znode_phys_t *zp = data; 826 time_t z_crtime, z_atime, z_mtime, z_ctime; 827 char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */ 828 829 ASSERT(size >= sizeof (znode_phys_t)); 830 831 if (dump_opt['d'] < 3) { 832 (void) printf("\t%s\n", 833 znode_path(os, object, path, sizeof (path))); 834 return; 835 } 836 837 z_crtime = (time_t)zp->zp_crtime[0]; 838 z_atime = (time_t)zp->zp_atime[0]; 839 z_mtime = (time_t)zp->zp_mtime[0]; 840 z_ctime = (time_t)zp->zp_ctime[0]; 841 842 (void) printf("\tpath %s\n", 843 znode_path(os, object, path, sizeof (path))); 844 (void) printf("\tatime %s", ctime(&z_atime)); 845 (void) printf("\tmtime %s", ctime(&z_mtime)); 846 (void) printf("\tctime %s", ctime(&z_ctime)); 847 (void) printf("\tcrtime %s", ctime(&z_crtime)); 848 (void) printf("\tgen %llu\n", (u_longlong_t)zp->zp_gen); 849 (void) printf("\tmode %llo\n", (u_longlong_t)zp->zp_mode); 850 (void) printf("\tsize %llu\n", (u_longlong_t)zp->zp_size); 851 (void) printf("\tparent %llu\n", (u_longlong_t)zp->zp_parent); 852 (void) printf("\tlinks %llu\n", (u_longlong_t)zp->zp_links); 853 (void) printf("\txattr %llu\n", (u_longlong_t)zp->zp_xattr); 854 (void) printf("\trdev 0x%016llx\n", (u_longlong_t)zp->zp_rdev); 855 } 856 857 /*ARGSUSED*/ 858 static void 859 dump_acl(objset_t *os, uint64_t object, void *data, size_t size) 860 { 861 } 862 863 /*ARGSUSED*/ 864 static void 865 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size) 866 { 867 } 868 869 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = { 870 dump_none, /* unallocated */ 871 dump_zap, /* object directory */ 872 dump_uint64, /* object array */ 873 dump_none, /* packed nvlist */ 874 dump_packed_nvlist, /* packed nvlist size */ 875 dump_none, /* bplist */ 876 dump_none, /* bplist header */ 877 dump_none, /* SPA space map header */ 878 dump_none, /* SPA space map */ 879 dump_none, /* ZIL intent log */ 880 dump_dnode, /* DMU dnode */ 881 dump_dmu_objset, /* DMU objset */ 882 dump_dsl_dir, /* DSL directory */ 883 dump_zap, /* DSL directory child map */ 884 dump_zap, /* DSL dataset snap map */ 885 dump_zap, /* DSL props */ 886 dump_dsl_dataset, /* DSL dataset */ 887 dump_znode, /* ZFS znode */ 888 dump_acl, /* ZFS ACL */ 889 dump_uint8, /* ZFS plain file */ 890 dump_zap, /* ZFS directory */ 891 dump_zap, /* ZFS master node */ 892 dump_zap, /* ZFS delete queue */ 893 dump_uint8, /* zvol object */ 894 dump_zap, /* zvol prop */ 895 dump_uint8, /* other uint8[] */ 896 dump_uint64, /* other uint64[] */ 897 dump_zap, /* other ZAP */ 898 }; 899 900 static void 901 dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) 902 { 903 dmu_buf_t *db = NULL; 904 dmu_object_info_t doi; 905 dnode_t *dn; 906 void *bonus = NULL; 907 size_t bsize = 0; 908 char iblk[6], dblk[6], lsize[6], psize[6], bonus_size[6], segsize[6]; 909 char aux[50]; 910 int error; 911 912 if (*print_header) { 913 (void) printf("\n Object lvl iblk dblk lsize" 914 " psize type\n"); 915 *print_header = 0; 916 } 917 918 if (object == 0) { 919 dn = os->os->os_meta_dnode; 920 } else { 921 db = dmu_bonus_hold(os, object); 922 if (db == NULL) 923 fatal("dmu_bonus_hold(%llu) failed", object); 924 dmu_buf_read(db); 925 bonus = db->db_data; 926 bsize = db->db_size; 927 dn = ((dmu_buf_impl_t *)db)->db_dnode; 928 } 929 dmu_object_info_from_dnode(dn, &doi); 930 931 nicenum(doi.doi_metadata_block_size, iblk); 932 nicenum(doi.doi_data_block_size, dblk); 933 nicenum(doi.doi_data_block_size * (doi.doi_max_block_offset + 1), 934 lsize); 935 nicenum(doi.doi_physical_blks << 9, psize); 936 nicenum(doi.doi_bonus_size, bonus_size); 937 938 aux[0] = '\0'; 939 940 if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) 941 (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)", 942 zio_checksum_table[doi.doi_checksum].ci_name); 943 944 if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) 945 (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)", 946 zio_compress_table[doi.doi_compress].ci_name); 947 948 (void) printf("%10lld %3u %5s %5s %5s %5s %s%s\n", 949 (u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize, 950 psize, dmu_ot[doi.doi_type].ot_name, aux); 951 952 if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) { 953 (void) printf("%10s %3s %5s %5s %5s %5s %s\n", 954 "", "", "", "", bonus_size, "bonus", 955 dmu_ot[doi.doi_bonus_type].ot_name); 956 } 957 958 if (verbosity >= 4) { 959 object_viewer[doi.doi_bonus_type](os, object, bonus, bsize); 960 object_viewer[doi.doi_type](os, object, NULL, 0); 961 *print_header = 1; 962 } 963 964 if (verbosity >= 5) 965 dump_indirect(os, object, NULL, 0); 966 967 if (verbosity >= 5) { 968 /* 969 * Report the list of segments that comprise the object. 970 */ 971 uint64_t start = 0; 972 uint64_t end; 973 uint64_t blkfill = 1; 974 int minlvl = 1; 975 976 if (dn->dn_type == DMU_OT_DNODE) { 977 minlvl = 0; 978 blkfill = DNODES_PER_BLOCK; 979 } 980 981 for (;;) { 982 error = dnode_next_offset(dn, B_FALSE, &start, minlvl, 983 blkfill); 984 if (error) 985 break; 986 end = start; 987 error = dnode_next_offset(dn, B_TRUE, &end, minlvl, 988 blkfill); 989 nicenum(end - start, segsize); 990 (void) printf("\t\tsegment [%016llx, %016llx)" 991 " size %5s\n", (u_longlong_t)start, 992 (u_longlong_t)end, segsize); 993 if (error) 994 break; 995 start = end; 996 } 997 } 998 999 if (db != NULL) 1000 dmu_buf_rele(db); 1001 } 1002 1003 static char *objset_types[DMU_OST_NUMTYPES] = { 1004 "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" }; 1005 1006 /*ARGSUSED*/ 1007 static void 1008 dump_dir(objset_t *os) 1009 { 1010 dmu_objset_stats_t dds; 1011 uint64_t object, object_count; 1012 char numbuf[8]; 1013 char blkbuf[300]; 1014 char osname[MAXNAMELEN]; 1015 char *type = "UNKNOWN"; 1016 int verbosity = dump_opt['d']; 1017 int print_header = 1; 1018 int i, error; 1019 1020 dmu_objset_stats(os, &dds); 1021 1022 if (dds.dds_type < DMU_OST_NUMTYPES) 1023 type = objset_types[dds.dds_type]; 1024 1025 if (dds.dds_type == DMU_OST_META) { 1026 dds.dds_creation_txg = TXG_INITIAL; 1027 dds.dds_last_txg = os->os->os_rootbp.blk_birth; 1028 dds.dds_objects_used = os->os->os_rootbp.blk_fill; 1029 dds.dds_space_refd = 1030 os->os->os_spa->spa_dsl_pool->dp_mos_dir->dd_used_bytes; 1031 } 1032 1033 ASSERT3U(dds.dds_objects_used, ==, os->os->os_rootbp.blk_fill); 1034 1035 nicenum(dds.dds_space_refd, numbuf); 1036 1037 if (verbosity >= 4) { 1038 (void) strcpy(blkbuf, ", rootbp "); 1039 sprintf_blkptr(blkbuf + strlen(blkbuf), &os->os->os_rootbp); 1040 } else { 1041 blkbuf[0] = '\0'; 1042 } 1043 1044 dmu_objset_name(os, osname); 1045 1046 (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, last_txg %llu, " 1047 "%s, %llu objects%s\n", 1048 osname, type, (u_longlong_t)dmu_objset_id(os), 1049 (u_longlong_t)dds.dds_creation_txg, 1050 (u_longlong_t)dds.dds_last_txg, 1051 numbuf, 1052 (u_longlong_t)dds.dds_objects_used, 1053 blkbuf); 1054 1055 dump_intent_log(dmu_objset_zil(os)); 1056 1057 if (dmu_objset_ds(os) != NULL) 1058 dump_bplist(dmu_objset_pool(os)->dp_meta_objset, 1059 dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist"); 1060 1061 if (verbosity < 2) 1062 return; 1063 1064 if (zopt_objects != 0) { 1065 for (i = 0; i < zopt_objects; i++) 1066 dump_object(os, zopt_object[i], verbosity, 1067 &print_header); 1068 (void) printf("\n"); 1069 return; 1070 } 1071 1072 dump_object(os, 0, verbosity, &print_header); 1073 object_count = 1; 1074 1075 object = 0; 1076 while ((error = dmu_object_next(os, &object, B_FALSE)) == 0) { 1077 dump_object(os, object, verbosity, &print_header); 1078 object_count++; 1079 } 1080 1081 ASSERT3U(object_count, ==, dds.dds_objects_used); 1082 1083 (void) printf("\n"); 1084 1085 if (error != ESRCH) 1086 fatal("dmu_object_next() = %d", error); 1087 } 1088 1089 static void 1090 dump_uberblock(uberblock_t *ub) 1091 { 1092 time_t timestamp = ub->ub_timestamp; 1093 1094 (void) printf("Uberblock\n\n"); 1095 (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic); 1096 (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version); 1097 (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg); 1098 (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum); 1099 (void) printf("\ttimestamp = %llu UTC = %s", 1100 (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp))); 1101 if (dump_opt['u'] >= 3) { 1102 char blkbuf[300]; 1103 sprintf_blkptr(blkbuf, &ub->ub_rootbp); 1104 (void) printf("\trootbp = %s\n", blkbuf); 1105 } 1106 (void) printf("\n"); 1107 } 1108 1109 static void 1110 dump_config(const char *pool) 1111 { 1112 spa_t *spa = NULL; 1113 1114 mutex_enter(&spa_namespace_lock); 1115 while ((spa = spa_next(spa)) != NULL) { 1116 if (pool == NULL) 1117 (void) printf("%s\n", spa_name(spa)); 1118 if (pool == NULL || strcmp(pool, spa_name(spa)) == 0) 1119 dump_nvlist(spa->spa_config, 4); 1120 } 1121 mutex_exit(&spa_namespace_lock); 1122 } 1123 1124 static void 1125 dump_label(const char *dev) 1126 { 1127 int fd; 1128 vdev_label_t label; 1129 char *buf = label.vl_vdev_phys.vp_nvlist; 1130 size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist); 1131 struct stat64 statbuf; 1132 uint64_t psize; 1133 int l; 1134 1135 if ((fd = open(dev, O_RDONLY)) < 0) { 1136 (void) printf("cannot open '%s': %s\n", dev, strerror(errno)); 1137 exit(1); 1138 } 1139 1140 if (fstat64(fd, &statbuf) != 0) { 1141 (void) printf("failed to stat '%s': %s\n", dev, 1142 strerror(errno)); 1143 exit(1); 1144 } 1145 1146 psize = statbuf.st_size; 1147 psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t)); 1148 1149 for (l = 0; l < VDEV_LABELS; l++) { 1150 1151 nvlist_t *config = NULL; 1152 1153 (void) printf("--------------------------------------------\n"); 1154 (void) printf("LABEL %d\n", l); 1155 (void) printf("--------------------------------------------\n"); 1156 1157 if (pread(fd, &label, sizeof (label), 1158 vdev_label_offset(psize, l, 0)) != sizeof (label)) { 1159 (void) printf("failed to read label %d\n", l); 1160 continue; 1161 } 1162 1163 if (nvlist_unpack(buf, buflen, &config, 0) != 0) { 1164 (void) printf("failed to unpack label %d\n", l); 1165 continue; 1166 } 1167 dump_nvlist(config, 4); 1168 nvlist_free(config); 1169 } 1170 } 1171 1172 /*ARGSUSED*/ 1173 static void 1174 dump_one_dir(char *dsname, void *arg) 1175 { 1176 int error; 1177 objset_t *os; 1178 1179 error = dmu_objset_open(dsname, DMU_OST_ANY, 1180 DS_MODE_STANDARD | DS_MODE_READONLY, &os); 1181 if (error) { 1182 (void) printf("Could not open %s\n", dsname); 1183 return; 1184 } 1185 dump_dir(os); 1186 dmu_objset_close(os); 1187 } 1188 1189 static void 1190 zdb_space_map_load(spa_t *spa) 1191 { 1192 vdev_t *rvd = spa->spa_root_vdev; 1193 vdev_t *vd; 1194 int c, m, error; 1195 1196 for (c = 0; c < rvd->vdev_children; c++) { 1197 vd = rvd->vdev_child[c]; 1198 for (m = 0; m < vd->vdev_ms_count; m++) { 1199 metaslab_t *msp = vd->vdev_ms[m]; 1200 space_map_t *sm = &msp->ms_allocmap[0]; 1201 mutex_enter(&msp->ms_lock); 1202 error = space_map_load(sm, msp->ms_smo, SM_ALLOC, 1203 spa->spa_meta_objset, msp->ms_usable_end, 1204 sm->sm_size - msp->ms_usable_space); 1205 mutex_exit(&msp->ms_lock); 1206 if (error) 1207 fatal("%s bad space map #%d, error %d", 1208 spa->spa_name, c, error); 1209 } 1210 } 1211 } 1212 1213 static int 1214 zdb_space_map_claim(spa_t *spa, blkptr_t *bp) 1215 { 1216 dva_t *dva = &bp->blk_dva[0]; 1217 uint64_t vdev = DVA_GET_VDEV(dva); 1218 uint64_t offset = DVA_GET_OFFSET(dva); 1219 uint64_t size = DVA_GET_ASIZE(dva); 1220 vdev_t *vd; 1221 metaslab_t *msp; 1222 space_map_t *allocmap, *freemap; 1223 int error; 1224 1225 if ((vd = vdev_lookup_top(spa, vdev)) == NULL) 1226 return (ENXIO); 1227 1228 if ((offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) 1229 return (ENXIO); 1230 1231 if (DVA_GET_GANG(dva)) { 1232 zio_gbh_phys_t gbh; 1233 blkptr_t blk = *bp; 1234 int g; 1235 1236 /* LINTED - compile time assert */ 1237 ASSERT(sizeof (zio_gbh_phys_t) == SPA_GANGBLOCKSIZE); 1238 size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE); 1239 DVA_SET_GANG(&blk.blk_dva[0], 0); 1240 DVA_SET_ASIZE(&blk.blk_dva[0], size); 1241 BP_SET_CHECKSUM(&blk, ZIO_CHECKSUM_GANG_HEADER); 1242 BP_SET_PSIZE(&blk, SPA_GANGBLOCKSIZE); 1243 BP_SET_LSIZE(&blk, SPA_GANGBLOCKSIZE); 1244 BP_SET_COMPRESS(&blk, ZIO_COMPRESS_OFF); 1245 error = zio_wait(zio_read(NULL, spa, &blk, 1246 &gbh, SPA_GANGBLOCKSIZE, NULL, NULL, 1247 ZIO_PRIORITY_SYNC_READ, 1248 ZIO_FLAG_CANFAIL | ZIO_FLAG_CONFIG_HELD)); 1249 if (error) 1250 return (error); 1251 if (BP_SHOULD_BYTESWAP(&blk)) 1252 byteswap_uint64_array(&gbh, SPA_GANGBLOCKSIZE); 1253 for (g = 0; g < SPA_GBH_NBLKPTRS; g++) { 1254 if (gbh.zg_blkptr[g].blk_birth == 0) 1255 break; 1256 error = zdb_space_map_claim(spa, &gbh.zg_blkptr[g]); 1257 if (error) 1258 return (error); 1259 } 1260 } 1261 1262 msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 1263 allocmap = &msp->ms_allocmap[0]; 1264 freemap = &msp->ms_freemap[0]; 1265 1266 mutex_enter(&msp->ms_lock); 1267 if (space_map_contains(freemap, offset, size)) { 1268 mutex_exit(&msp->ms_lock); 1269 return (EAGAIN); /* allocated more than once */ 1270 } 1271 1272 if (!space_map_contains(allocmap, offset, size)) { 1273 mutex_exit(&msp->ms_lock); 1274 return (ESTALE); /* not allocated at all */ 1275 } 1276 1277 space_map_remove(allocmap, offset, size); 1278 space_map_add(freemap, offset, size); 1279 1280 mutex_exit(&msp->ms_lock); 1281 1282 return (0); 1283 } 1284 1285 static void 1286 zdb_leak(space_map_t *sm, uint64_t start, uint64_t size) 1287 { 1288 metaslab_t *msp; 1289 1290 /* LINTED */ 1291 msp = (metaslab_t *)((char *)sm - offsetof(metaslab_t, ms_allocmap[0])); 1292 1293 (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n", 1294 (u_longlong_t)msp->ms_group->mg_vd->vdev_id, 1295 (u_longlong_t)start, 1296 (u_longlong_t)size); 1297 } 1298 1299 static void 1300 zdb_space_map_vacate(spa_t *spa) 1301 { 1302 vdev_t *rvd = spa->spa_root_vdev; 1303 vdev_t *vd; 1304 int c, m; 1305 1306 for (c = 0; c < rvd->vdev_children; c++) { 1307 vd = rvd->vdev_child[c]; 1308 for (m = 0; m < vd->vdev_ms_count; m++) { 1309 metaslab_t *msp = vd->vdev_ms[m]; 1310 mutex_enter(&msp->ms_lock); 1311 space_map_vacate(&msp->ms_allocmap[0], zdb_leak, 1312 &msp->ms_allocmap[0]); 1313 space_map_vacate(&msp->ms_freemap[0], NULL, NULL); 1314 mutex_exit(&msp->ms_lock); 1315 } 1316 } 1317 } 1318 1319 static void 1320 zdb_refresh_ubsync(spa_t *spa) 1321 { 1322 uberblock_t ub = { 0 }; 1323 vdev_t *rvd = spa->spa_root_vdev; 1324 zio_t *zio; 1325 1326 /* 1327 * Reopen all devices to purge zdb's vdev caches. 1328 */ 1329 vdev_reopen(rvd, NULL); 1330 1331 /* 1332 * Reload the uberblock. 1333 */ 1334 zio = zio_root(spa, NULL, NULL, 1335 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 1336 vdev_uberblock_load(zio, rvd, &ub); 1337 (void) zio_wait(zio); 1338 1339 if (ub.ub_txg != 0) 1340 spa->spa_ubsync = ub; 1341 } 1342 1343 /* 1344 * Verify that the sum of the sizes of all blocks in the pool adds up 1345 * to the SPA's sa_alloc total. 1346 */ 1347 typedef struct zdb_blkstats { 1348 uint64_t zb_asize; 1349 uint64_t zb_lsize; 1350 uint64_t zb_psize; 1351 uint64_t zb_count; 1352 } zdb_blkstats_t; 1353 1354 #define DMU_OT_DEFERRED DMU_OT_NONE 1355 #define DMU_OT_TOTAL DMU_OT_NUMTYPES 1356 1357 #define ZB_TOTAL ZB_MAXLEVEL 1358 1359 typedef struct zdb_cb { 1360 zdb_blkstats_t zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1]; 1361 uint64_t zcb_errors[256]; 1362 traverse_blk_cache_t *zcb_cache; 1363 int zcb_readfails; 1364 int zcb_haderrors; 1365 } zdb_cb_t; 1366 1367 static blkptr_cb_t zdb_blkptr_cb; 1368 1369 static void 1370 zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type) 1371 { 1372 int i, error; 1373 1374 for (i = 0; i < 4; i++) { 1375 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; 1376 int t = (i & 1) ? type : DMU_OT_TOTAL; 1377 zdb_blkstats_t *zb = &zcb->zcb_type[l][t]; 1378 1379 zb->zb_asize += BP_GET_ASIZE(bp); 1380 zb->zb_lsize += BP_GET_LSIZE(bp); 1381 zb->zb_psize += BP_GET_PSIZE(bp); 1382 zb->zb_count++; 1383 } 1384 1385 if (dump_opt['L']) 1386 return; 1387 1388 error = zdb_space_map_claim(spa, bp); 1389 1390 if (error == 0) 1391 return; 1392 1393 if (error == EAGAIN) 1394 (void) fatal("double-allocation, bp=%p", bp); 1395 1396 if (error == ESTALE) 1397 (void) fatal("reference to freed block, bp=%p", bp); 1398 1399 (void) fatal("fatal error %d in bp %p", error, bp); 1400 } 1401 1402 static void 1403 zdb_log_block_cb(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t first_txg) 1404 { 1405 if (bp->blk_birth < first_txg) { 1406 zdb_cb_t *zcb = arg; 1407 traverse_blk_cache_t bc = *zcb->zcb_cache; 1408 zbookmark_t *zb = &bc.bc_bookmark; 1409 1410 zb->zb_objset = bp->blk_cksum.zc_word[2]; 1411 zb->zb_blkid = bp->blk_cksum.zc_word[3]; 1412 bc.bc_blkptr = *bp; 1413 1414 (void) zdb_blkptr_cb(&bc, zilog->zl_spa, arg); 1415 } 1416 } 1417 1418 static int 1419 zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 1420 { 1421 zbookmark_t *zb = &bc->bc_bookmark; 1422 zdb_cb_t *zcb = arg; 1423 blkptr_t *bp = &bc->bc_blkptr; 1424 dmu_object_type_t type = BP_GET_TYPE(bp); 1425 char blkbuf[300]; 1426 int error = 0; 1427 1428 if (bc->bc_errno) { 1429 if (zcb->zcb_readfails++ < 10 && dump_opt['L']) { 1430 zdb_refresh_ubsync(spa); 1431 error = EAGAIN; 1432 } else { 1433 zcb->zcb_haderrors = 1; 1434 zcb->zcb_errors[bc->bc_errno]++; 1435 error = ERESTART; 1436 } 1437 1438 if (dump_opt['b'] >= 3 || (dump_opt['b'] >= 2 && bc->bc_errno)) 1439 sprintf_blkptr(blkbuf, bp); 1440 else 1441 blkbuf[0] = '\0'; 1442 1443 (void) printf("zdb_blkptr_cb: Got error %d reading " 1444 "<%llu, %llu, %d, %llx> %s -- %s\n", 1445 bc->bc_errno, 1446 (u_longlong_t)zb->zb_objset, 1447 (u_longlong_t)zb->zb_object, 1448 zb->zb_level, 1449 (u_longlong_t)zb->zb_blkid, 1450 blkbuf, 1451 error == EAGAIN ? "retrying" : "skipping"); 1452 1453 return (error); 1454 } 1455 1456 zcb->zcb_readfails = 0; 1457 1458 ASSERT(bp->blk_birth != 0); 1459 1460 zdb_count_block(spa, zcb, bp, type); 1461 1462 if (dump_opt['b'] >= 4) { 1463 sprintf_blkptr(blkbuf, bp); 1464 (void) printf("objset %llu object %llu offset 0x%llx %s\n", 1465 (u_longlong_t)zb->zb_objset, 1466 (u_longlong_t)zb->zb_object, 1467 (u_longlong_t)blkid2offset(bc->bc_dnode, 1468 zb->zb_level, zb->zb_blkid), 1469 blkbuf); 1470 } 1471 1472 if (type == DMU_OT_OBJSET) { 1473 objset_phys_t *osphys = bc->bc_data; 1474 zilog_t zilog = { 0 }; 1475 zilog.zl_header = &osphys->os_zil_header; 1476 zilog.zl_spa = spa; 1477 1478 zcb->zcb_cache = bc; 1479 1480 zil_parse(&zilog, zdb_log_block_cb, NULL, zcb, 1481 spa_first_txg(spa)); 1482 } 1483 1484 return (0); 1485 } 1486 1487 static int 1488 dump_block_stats(spa_t *spa) 1489 { 1490 traverse_handle_t *th; 1491 zdb_cb_t zcb = { 0 }; 1492 zdb_blkstats_t *zb, *tzb; 1493 uint64_t alloc, space; 1494 int leaks = 0; 1495 int advance = zdb_advance; 1496 int flags; 1497 int e; 1498 1499 if (dump_opt['c']) 1500 advance |= ADVANCE_DATA; 1501 1502 advance |= ADVANCE_PRUNE; 1503 1504 (void) printf("\nTraversing all blocks to %sverify" 1505 " nothing leaked ...\n", 1506 dump_opt['c'] ? "verify checksums and " : ""); 1507 1508 /* 1509 * Load all space maps. As we traverse the pool, if we find a block 1510 * that's not in its space map, that indicates a double-allocation, 1511 * reference to a freed block, or an unclaimed block. Otherwise we 1512 * remove the block from the space map. If the space maps are not 1513 * empty when we're done, that indicates leaked blocks. 1514 */ 1515 if (!dump_opt['L']) 1516 zdb_space_map_load(spa); 1517 1518 /* 1519 * If there's a deferred-free bplist, process that first. 1520 */ 1521 if (spa->spa_sync_bplist_obj != 0) { 1522 bplist_t *bpl = &spa->spa_sync_bplist; 1523 blkptr_t blk; 1524 uint64_t itor = 0; 1525 1526 bplist_open(bpl, spa->spa_meta_objset, 1527 spa->spa_sync_bplist_obj); 1528 1529 while (bplist_iterate(bpl, &itor, &blk) == 0) { 1530 zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED); 1531 if (dump_opt['b'] >= 4) { 1532 char blkbuf[300]; 1533 sprintf_blkptr(blkbuf, &blk); 1534 (void) printf("[%s] %s\n", 1535 "deferred free", blkbuf); 1536 } 1537 } 1538 1539 bplist_close(bpl); 1540 } 1541 1542 /* 1543 * Now traverse the pool. If we're read all data to verify checksums, 1544 * do a scrubbing read so that we validate all copies. 1545 */ 1546 flags = ZIO_FLAG_CANFAIL; 1547 if (advance & ADVANCE_DATA) 1548 flags |= ZIO_FLAG_SCRUB; 1549 th = traverse_init(spa, zdb_blkptr_cb, &zcb, advance, flags); 1550 th->th_noread = zdb_noread; 1551 1552 traverse_add_pool(th, 0, -1ULL); 1553 1554 while (traverse_more(th) == EAGAIN) 1555 continue; 1556 1557 traverse_fini(th); 1558 1559 if (zcb.zcb_haderrors) { 1560 (void) printf("\nError counts:\n\n"); 1561 (void) printf("\t%5s %s\n", "errno", "count"); 1562 for (e = 0; e < 256; e++) { 1563 if (zcb.zcb_errors[e] != 0) { 1564 (void) printf("\t%5d %llu\n", 1565 e, (u_longlong_t)zcb.zcb_errors[e]); 1566 } 1567 } 1568 } 1569 1570 /* 1571 * Report any leaked segments. 1572 */ 1573 if (!dump_opt['L']) 1574 zdb_space_map_vacate(spa); 1575 1576 if (dump_opt['L']) 1577 (void) printf("\n\n *** Live pool traversal; " 1578 "block counts are only approximate ***\n\n"); 1579 1580 alloc = spa_get_alloc(spa); 1581 space = spa_get_space(spa); 1582 1583 tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL]; 1584 1585 if (tzb->zb_asize == alloc) { 1586 (void) printf("\n\tNo leaks (block sum matches space" 1587 " maps exactly)\n"); 1588 } else { 1589 (void) printf("block traversal size %llu != alloc %llu " 1590 "(leaked %lld)\n", 1591 (u_longlong_t)tzb->zb_asize, 1592 (u_longlong_t)alloc, 1593 (u_longlong_t)(alloc - tzb->zb_asize)); 1594 leaks = 1; 1595 } 1596 1597 if (tzb->zb_count == 0) 1598 return (2); 1599 1600 (void) printf("\n"); 1601 (void) printf("\tbp count: %10llu\n", 1602 (u_longlong_t)tzb->zb_count); 1603 (void) printf("\tbp logical: %10llu\t avg: %6llu\n", 1604 (u_longlong_t)tzb->zb_lsize, 1605 (u_longlong_t)(tzb->zb_lsize / tzb->zb_count)); 1606 (void) printf("\tbp physical: %10llu\t avg:" 1607 " %6llu\tcompression: %6.2f\n", 1608 (u_longlong_t)tzb->zb_psize, 1609 (u_longlong_t)(tzb->zb_psize / tzb->zb_count), 1610 (double)tzb->zb_lsize / tzb->zb_psize); 1611 (void) printf("\tbp allocated: %10llu\t avg:" 1612 " %6llu\tcompression: %6.2f\n", 1613 (u_longlong_t)tzb->zb_asize, 1614 (u_longlong_t)(tzb->zb_asize / tzb->zb_count), 1615 (double)tzb->zb_lsize / tzb->zb_asize); 1616 (void) printf("\tSPA allocated: %10llu\tused: %5.2f%%\n", 1617 (u_longlong_t)alloc, 100.0 * alloc / space); 1618 1619 if (dump_opt['b'] >= 2) { 1620 int l, t, level; 1621 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" 1622 "\t avg\t comp\t%%Total\tType\n"); 1623 1624 for (t = 0; t <= DMU_OT_NUMTYPES; t++) { 1625 char csize[6], lsize[6], psize[6], asize[6], avg[6]; 1626 char *typename; 1627 1628 typename = t == DMU_OT_DEFERRED ? "deferred free" : 1629 t == DMU_OT_TOTAL ? "Total" : dmu_ot[t].ot_name; 1630 1631 if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) { 1632 (void) printf("%6s\t%5s\t%5s\t%5s" 1633 "\t%5s\t%5s\t%6s\t%s\n", 1634 "-", 1635 "-", 1636 "-", 1637 "-", 1638 "-", 1639 "-", 1640 "-", 1641 typename); 1642 continue; 1643 } 1644 1645 for (l = ZB_TOTAL - 1; l >= -1; l--) { 1646 level = (l == -1 ? ZB_TOTAL : l); 1647 zb = &zcb.zcb_type[level][t]; 1648 1649 if (zb->zb_asize == 0) 1650 continue; 1651 1652 if (dump_opt['b'] < 3 && level != ZB_TOTAL) 1653 continue; 1654 1655 if (level == 0 && zb->zb_asize == 1656 zcb.zcb_type[ZB_TOTAL][t].zb_asize) 1657 continue; 1658 1659 nicenum(zb->zb_count, csize); 1660 nicenum(zb->zb_lsize, lsize); 1661 nicenum(zb->zb_psize, psize); 1662 nicenum(zb->zb_asize, asize); 1663 nicenum(zb->zb_asize / zb->zb_count, avg); 1664 1665 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s" 1666 "\t%5.2f\t%6.2f\t", 1667 csize, lsize, psize, asize, avg, 1668 (double)zb->zb_lsize / zb->zb_psize, 1669 100.0 * zb->zb_asize / tzb->zb_asize); 1670 1671 if (level == ZB_TOTAL) 1672 (void) printf("%s\n", typename); 1673 else 1674 (void) printf(" L%d %s\n", 1675 level, typename); 1676 } 1677 } 1678 } 1679 1680 (void) printf("\n"); 1681 1682 if (leaks) 1683 return (2); 1684 1685 if (zcb.zcb_haderrors) 1686 return (3); 1687 1688 return (0); 1689 } 1690 1691 static void 1692 dump_zpool(spa_t *spa) 1693 { 1694 dsl_pool_t *dp = spa_get_dsl(spa); 1695 int rc = 0; 1696 1697 if (dump_opt['u']) 1698 dump_uberblock(&spa->spa_uberblock); 1699 1700 if (dump_opt['d'] || dump_opt['i']) { 1701 dump_dir(dp->dp_meta_objset); 1702 if (dump_opt['d'] >= 3) { 1703 dump_bplist(dp->dp_meta_objset, 1704 spa->spa_sync_bplist_obj, "Deferred frees"); 1705 dump_dtl(spa->spa_root_vdev, 0); 1706 dump_metaslabs(spa); 1707 } 1708 dmu_objset_find(spa->spa_name, dump_one_dir, NULL, 1709 DS_FIND_SNAPSHOTS); 1710 } 1711 1712 if (dump_opt['b'] || dump_opt['c']) 1713 rc = dump_block_stats(spa); 1714 1715 if (dump_opt['s']) 1716 show_pool_stats(spa); 1717 1718 if (rc != 0) 1719 exit(rc); 1720 } 1721 1722 int 1723 main(int argc, char **argv) 1724 { 1725 int i, c; 1726 struct rlimit rl = { 1024, 1024 }; 1727 spa_t *spa; 1728 objset_t *os = NULL; 1729 char *endstr; 1730 int dump_all = 1; 1731 int verbose = 0; 1732 int error; 1733 int flag, set; 1734 1735 (void) setrlimit(RLIMIT_NOFILE, &rl); 1736 1737 dprintf_setup(&argc, argv); 1738 1739 while ((c = getopt(argc, argv, "udibcsvCLO:B:Ul")) != -1) { 1740 switch (c) { 1741 case 'u': 1742 case 'd': 1743 case 'i': 1744 case 'b': 1745 case 'c': 1746 case 's': 1747 case 'C': 1748 case 'l': 1749 dump_opt[c]++; 1750 dump_all = 0; 1751 break; 1752 case 'L': 1753 dump_opt[c]++; 1754 break; 1755 case 'O': 1756 endstr = optarg; 1757 if (endstr[0] == '!') { 1758 endstr++; 1759 set = 0; 1760 } else { 1761 set = 1; 1762 } 1763 if (strcmp(endstr, "post") == 0) { 1764 flag = ADVANCE_PRE; 1765 set = !set; 1766 } else if (strcmp(endstr, "pre") == 0) { 1767 flag = ADVANCE_PRE; 1768 } else if (strcmp(endstr, "prune") == 0) { 1769 flag = ADVANCE_PRUNE; 1770 } else if (strcmp(endstr, "data") == 0) { 1771 flag = ADVANCE_DATA; 1772 } else if (strcmp(endstr, "holes") == 0) { 1773 flag = ADVANCE_HOLES; 1774 } else { 1775 usage(); 1776 } 1777 if (set) 1778 zdb_advance |= flag; 1779 else 1780 zdb_advance &= ~flag; 1781 break; 1782 case 'B': 1783 endstr = optarg - 1; 1784 zdb_noread.zb_objset = strtoull(endstr + 1, &endstr, 0); 1785 zdb_noread.zb_object = strtoull(endstr + 1, &endstr, 0); 1786 zdb_noread.zb_level = strtol(endstr + 1, &endstr, 0); 1787 zdb_noread.zb_blkid = strtoull(endstr + 1, &endstr, 16); 1788 (void) printf("simulating bad block " 1789 "<%llu, %llu, %d, %llx>\n", 1790 (u_longlong_t)zdb_noread.zb_objset, 1791 (u_longlong_t)zdb_noread.zb_object, 1792 zdb_noread.zb_level, 1793 (u_longlong_t)zdb_noread.zb_blkid); 1794 break; 1795 case 'v': 1796 verbose++; 1797 break; 1798 case 'U': 1799 spa_config_dir = "/tmp"; 1800 break; 1801 default: 1802 usage(); 1803 break; 1804 } 1805 } 1806 1807 kernel_init(FREAD); 1808 1809 for (c = 0; c < 256; c++) { 1810 if (dump_all && c != 'L' && c != 'l') 1811 dump_opt[c] = 1; 1812 if (dump_opt[c]) 1813 dump_opt[c] += verbose; 1814 } 1815 1816 argc -= optind; 1817 argv += optind; 1818 1819 if (argc < 1) { 1820 if (dump_opt['C']) { 1821 dump_config(NULL); 1822 return (0); 1823 } 1824 usage(); 1825 } 1826 1827 if (dump_opt['l']) { 1828 dump_label(argv[0]); 1829 return (0); 1830 } 1831 1832 if (dump_opt['C']) 1833 dump_config(argv[0]); 1834 1835 if (strchr(argv[0], '/') != NULL) { 1836 error = dmu_objset_open(argv[0], DMU_OST_ANY, 1837 DS_MODE_STANDARD | DS_MODE_READONLY, &os); 1838 } else { 1839 error = spa_open(argv[0], &spa, FTAG); 1840 } 1841 1842 if (error) 1843 fatal("can't open %s: error %d", argv[0], error); 1844 1845 argv++; 1846 if (--argc > 0) { 1847 zopt_objects = argc; 1848 zopt_object = calloc(zopt_objects, sizeof (uint64_t)); 1849 for (i = 0; i < zopt_objects; i++) { 1850 errno = 0; 1851 zopt_object[i] = strtoull(argv[i], NULL, 0); 1852 if (zopt_object[i] == 0 && errno != 0) 1853 fatal("bad object number %s: %s", 1854 argv[i], strerror(errno)); 1855 } 1856 } 1857 1858 if (os != NULL) { 1859 dump_dir(os); 1860 dmu_objset_close(os); 1861 } else { 1862 dump_zpool(spa); 1863 spa_close(spa, FTAG); 1864 } 1865 1866 kernel_fini(); 1867 1868 return (0); 1869 } 1870