1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <sys/zfs_context.h> 31 #include <sys/spa.h> 32 #include <sys/spa_impl.h> 33 #include <sys/dmu.h> 34 #include <sys/zap.h> 35 #include <sys/fs/zfs.h> 36 #include <sys/zfs_znode.h> 37 #include <sys/vdev.h> 38 #include <sys/vdev_impl.h> 39 #include <sys/metaslab_impl.h> 40 #include <sys/dmu_objset.h> 41 #include <sys/dsl_dir.h> 42 #include <sys/dsl_dataset.h> 43 #include <sys/dsl_pool.h> 44 #include <sys/dbuf.h> 45 #include <sys/zil.h> 46 #include <sys/zil_impl.h> 47 #include <sys/stat.h> 48 #include <sys/resource.h> 49 #include <sys/dmu_traverse.h> 50 #include <sys/zio_checksum.h> 51 #include <sys/zio_compress.h> 52 53 const char cmdname[] = "zdb"; 54 uint8_t dump_opt[256]; 55 56 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); 57 58 extern void dump_intent_log(zilog_t *); 59 uint64_t *zopt_object = NULL; 60 int zopt_objects = 0; 61 int zdb_advance = ADVANCE_PRE; 62 zbookmark_t zdb_noread = { 0, 0, ZB_NO_LEVEL, 0 }; 63 64 /* 65 * These libumem hooks provide a reasonable set of defaults for the allocator's 66 * debugging facilities. 67 */ 68 const char * 69 _umem_debug_init() 70 { 71 return ("default,verbose"); /* $UMEM_DEBUG setting */ 72 } 73 74 const char * 75 _umem_logging_init(void) 76 { 77 return ("fail,contents"); /* $UMEM_LOGGING setting */ 78 } 79 80 static void 81 usage(void) 82 { 83 (void) fprintf(stderr, 84 "Usage: %s [-udibcsvLU] [-O order] [-B os:obj:level:blkid] " 85 "dataset [object...]\n" 86 " %s -C [pool]\n" 87 " %s -l dev\n", 88 cmdname, cmdname, cmdname); 89 90 (void) fprintf(stderr, " -u uberblock\n"); 91 (void) fprintf(stderr, " -d datasets\n"); 92 (void) fprintf(stderr, " -C cached pool configuration\n"); 93 (void) fprintf(stderr, " -i intent logs\n"); 94 (void) fprintf(stderr, " -b block statistics\n"); 95 (void) fprintf(stderr, " -c checksum all data blocks\n"); 96 (void) fprintf(stderr, " -s report stats on zdb's I/O\n"); 97 (void) fprintf(stderr, " -v verbose (applies to all others)\n"); 98 (void) fprintf(stderr, " -l dump label contents\n"); 99 (void) fprintf(stderr, " -L live pool (allows some errors)\n"); 100 (void) fprintf(stderr, " -O [!]<pre|post|prune|data|holes> " 101 "visitation order\n"); 102 (void) fprintf(stderr, " -U use zpool.cache in /tmp\n"); 103 (void) fprintf(stderr, " -B objset:object:level:blkid -- " 104 "simulate bad block\n"); 105 (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " 106 "to make only that option verbose\n"); 107 (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); 108 exit(1); 109 } 110 111 static void 112 fatal(const char *fmt, ...) 113 { 114 va_list ap; 115 116 va_start(ap, fmt); 117 (void) fprintf(stderr, "%s: ", cmdname); 118 (void) vfprintf(stderr, fmt, ap); 119 va_end(ap); 120 (void) fprintf(stderr, "\n"); 121 122 exit(1); 123 } 124 125 static void 126 dump_nvlist(nvlist_t *list, int indent) 127 { 128 nvpair_t *elem = NULL; 129 130 while ((elem = nvlist_next_nvpair(list, elem)) != NULL) { 131 switch (nvpair_type(elem)) { 132 case DATA_TYPE_STRING: 133 { 134 char *value; 135 136 VERIFY(nvpair_value_string(elem, &value) == 0); 137 (void) printf("%*s%s='%s'\n", indent, "", 138 nvpair_name(elem), value); 139 } 140 break; 141 142 case DATA_TYPE_UINT64: 143 { 144 uint64_t value; 145 146 VERIFY(nvpair_value_uint64(elem, &value) == 0); 147 (void) printf("%*s%s=%llu\n", indent, "", 148 nvpair_name(elem), (u_longlong_t)value); 149 } 150 break; 151 152 case DATA_TYPE_NVLIST: 153 { 154 nvlist_t *value; 155 156 VERIFY(nvpair_value_nvlist(elem, &value) == 0); 157 (void) printf("%*s%s\n", indent, "", 158 nvpair_name(elem)); 159 dump_nvlist(value, indent + 4); 160 } 161 break; 162 163 case DATA_TYPE_NVLIST_ARRAY: 164 { 165 nvlist_t **value; 166 uint_t c, count; 167 168 VERIFY(nvpair_value_nvlist_array(elem, &value, 169 &count) == 0); 170 171 for (c = 0; c < count; c++) { 172 (void) printf("%*s%s[%u]\n", indent, "", 173 nvpair_name(elem), c); 174 dump_nvlist(value[c], indent + 8); 175 } 176 } 177 break; 178 179 default: 180 181 (void) printf("bad config type %d for %s\n", 182 nvpair_type(elem), nvpair_name(elem)); 183 } 184 } 185 } 186 187 /* ARGSUSED */ 188 static void 189 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) 190 { 191 nvlist_t *nv; 192 size_t nvsize = *(uint64_t *)data; 193 char *packed = umem_alloc(nvsize, UMEM_NOFAIL); 194 195 VERIFY(0 == dmu_read(os, object, 0, nvsize, packed)); 196 197 VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0); 198 199 umem_free(packed, nvsize); 200 201 dump_nvlist(nv, 8); 202 203 nvlist_free(nv); 204 } 205 206 const char dump_zap_stars[] = "****************************************"; 207 const int dump_zap_width = sizeof (dump_zap_stars) - 1; 208 209 static void 210 dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE]) 211 { 212 int i; 213 int minidx = ZAP_HISTOGRAM_SIZE - 1; 214 int maxidx = 0; 215 uint64_t max = 0; 216 217 for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) { 218 if (histo[i] > max) 219 max = histo[i]; 220 if (histo[i] > 0 && i > maxidx) 221 maxidx = i; 222 if (histo[i] > 0 && i < minidx) 223 minidx = i; 224 } 225 226 if (max < dump_zap_width) 227 max = dump_zap_width; 228 229 for (i = minidx; i <= maxidx; i++) 230 (void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i], 231 &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]); 232 } 233 234 static void 235 dump_zap_stats(objset_t *os, uint64_t object) 236 { 237 int error; 238 zap_stats_t zs; 239 240 error = zap_get_stats(os, object, &zs); 241 if (error) 242 return; 243 244 if (zs.zs_ptrtbl_len == 0) { 245 ASSERT(zs.zs_num_blocks == 1); 246 (void) printf("\tmicrozap: %llu bytes, %llu entries\n", 247 (u_longlong_t)zs.zs_blocksize, 248 (u_longlong_t)zs.zs_num_entries); 249 return; 250 } 251 252 (void) printf("\tFat ZAP stats:\n"); 253 254 (void) printf("\t\tPointer table:\n"); 255 (void) printf("\t\t\t%llu elements\n", 256 (u_longlong_t)zs.zs_ptrtbl_len); 257 (void) printf("\t\t\tzt_blk: %llu\n", 258 (u_longlong_t)zs.zs_ptrtbl_zt_blk); 259 (void) printf("\t\t\tzt_numblks: %llu\n", 260 (u_longlong_t)zs.zs_ptrtbl_zt_numblks); 261 (void) printf("\t\t\tzt_shift: %llu\n", 262 (u_longlong_t)zs.zs_ptrtbl_zt_shift); 263 (void) printf("\t\t\tzt_blks_copied: %llu\n", 264 (u_longlong_t)zs.zs_ptrtbl_blks_copied); 265 (void) printf("\t\t\tzt_nextblk: %llu\n", 266 (u_longlong_t)zs.zs_ptrtbl_nextblk); 267 268 (void) printf("\t\tZAP entries: %llu\n", 269 (u_longlong_t)zs.zs_num_entries); 270 (void) printf("\t\tLeaf blocks: %llu\n", 271 (u_longlong_t)zs.zs_num_leafs); 272 (void) printf("\t\tTotal blocks: %llu\n", 273 (u_longlong_t)zs.zs_num_blocks); 274 (void) printf("\t\tzap_block_type: 0x%llx\n", 275 (u_longlong_t)zs.zs_block_type); 276 (void) printf("\t\tzap_magic: 0x%llx\n", 277 (u_longlong_t)zs.zs_magic); 278 (void) printf("\t\tzap_salt: 0x%llx\n", 279 (u_longlong_t)zs.zs_salt); 280 281 (void) printf("\t\tLeafs with 2^n pointers:\n"); 282 dump_zap_histogram(zs.zs_leafs_with_2n_pointers); 283 284 (void) printf("\t\tBlocks with n*5 entries:\n"); 285 dump_zap_histogram(zs.zs_blocks_with_n5_entries); 286 287 (void) printf("\t\tBlocks n/10 full:\n"); 288 dump_zap_histogram(zs.zs_blocks_n_tenths_full); 289 290 (void) printf("\t\tEntries with n chunks:\n"); 291 dump_zap_histogram(zs.zs_entries_using_n_chunks); 292 293 (void) printf("\t\tBuckets with n entries:\n"); 294 dump_zap_histogram(zs.zs_buckets_with_n_entries); 295 } 296 297 /*ARGSUSED*/ 298 static void 299 dump_none(objset_t *os, uint64_t object, void *data, size_t size) 300 { 301 } 302 303 /*ARGSUSED*/ 304 void 305 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size) 306 { 307 } 308 309 /*ARGSUSED*/ 310 static void 311 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size) 312 { 313 } 314 315 /*ARGSUSED*/ 316 static void 317 dump_zap(objset_t *os, uint64_t object, void *data, size_t size) 318 { 319 zap_cursor_t zc; 320 zap_attribute_t attr; 321 void *prop; 322 int i; 323 324 dump_zap_stats(os, object); 325 (void) printf("\n"); 326 327 for (zap_cursor_init(&zc, os, object); 328 zap_cursor_retrieve(&zc, &attr) == 0; 329 zap_cursor_advance(&zc)) { 330 (void) printf("\t\t%s = ", attr.za_name); 331 if (attr.za_num_integers == 0) { 332 (void) printf("\n"); 333 continue; 334 } 335 prop = umem_zalloc(attr.za_num_integers * 336 attr.za_integer_length, UMEM_NOFAIL); 337 (void) zap_lookup(os, object, attr.za_name, 338 attr.za_integer_length, attr.za_num_integers, prop); 339 if (attr.za_integer_length == 1) { 340 (void) printf("%s", (char *)prop); 341 } else { 342 for (i = 0; i < attr.za_num_integers; i++) { 343 switch (attr.za_integer_length) { 344 case 2: 345 (void) printf("%u ", 346 ((uint16_t *)prop)[i]); 347 break; 348 case 4: 349 (void) printf("%u ", 350 ((uint32_t *)prop)[i]); 351 break; 352 case 8: 353 (void) printf("%lld ", 354 (u_longlong_t)((int64_t *)prop)[i]); 355 break; 356 } 357 } 358 } 359 (void) printf("\n"); 360 umem_free(prop, attr.za_num_integers * attr.za_integer_length); 361 } 362 zap_cursor_fini(&zc); 363 } 364 365 static void 366 dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm) 367 { 368 uint64_t alloc, offset, entry; 369 int mapshift = sm->sm_shift; 370 uint64_t mapstart = sm->sm_start; 371 char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID" }; 372 373 if (smo->smo_object == 0) 374 return; 375 376 /* 377 * Print out the freelist entries in both encoded and decoded form. 378 */ 379 alloc = 0; 380 for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) { 381 VERIFY(0 == dmu_read(os, smo->smo_object, offset, 382 sizeof (entry), &entry)); 383 if (SM_DEBUG_DECODE(entry)) { 384 (void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n", 385 (u_longlong_t)(offset / sizeof (entry)), 386 ddata[SM_DEBUG_ACTION_DECODE(entry)], 387 SM_DEBUG_TXG_DECODE(entry), 388 SM_DEBUG_SYNCPASS_DECODE(entry)); 389 } else { 390 (void) printf("\t\t[%4llu] %c range:" 391 " %08llx-%08llx size: %06llx\n", 392 (u_longlong_t)(offset / sizeof (entry)), 393 SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F', 394 (SM_OFFSET_DECODE(entry) << mapshift) + mapstart, 395 (SM_OFFSET_DECODE(entry) << mapshift) + mapstart + 396 (SM_RUN_DECODE(entry) << mapshift), 397 (SM_RUN_DECODE(entry) << mapshift)); 398 if (SM_TYPE_DECODE(entry) == SM_ALLOC) 399 alloc += SM_RUN_DECODE(entry) << mapshift; 400 else 401 alloc -= SM_RUN_DECODE(entry) << mapshift; 402 } 403 } 404 if (alloc != smo->smo_alloc) { 405 (void) printf("space_map_object alloc (%llu) INCONSISTENT " 406 "with space map summary (%llu)\n", 407 (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc); 408 } 409 } 410 411 static void 412 dump_metaslab(metaslab_t *msp) 413 { 414 char freebuf[5]; 415 space_map_obj_t *smo = msp->ms_smo; 416 vdev_t *vd = msp->ms_group->mg_vd; 417 spa_t *spa = vd->vdev_spa; 418 419 nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf); 420 421 if (dump_opt['d'] <= 5) { 422 (void) printf("\t%10llx %10llu %5s\n", 423 (u_longlong_t)msp->ms_map.sm_start, 424 (u_longlong_t)smo->smo_object, 425 freebuf); 426 return; 427 } 428 429 (void) printf( 430 "\tvdev %llu offset %08llx spacemap %4llu free %5s\n", 431 (u_longlong_t)vd->vdev_id, (u_longlong_t)msp->ms_map.sm_start, 432 (u_longlong_t)smo->smo_object, freebuf); 433 434 ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift)); 435 436 dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map); 437 } 438 439 static void 440 dump_metaslabs(spa_t *spa) 441 { 442 vdev_t *rvd = spa->spa_root_vdev; 443 vdev_t *vd; 444 int c, m; 445 446 (void) printf("\nMetaslabs:\n"); 447 448 for (c = 0; c < rvd->vdev_children; c++) { 449 vd = rvd->vdev_child[c]; 450 451 spa_config_enter(spa, RW_READER, FTAG); 452 (void) printf("\n vdev %llu = %s\n\n", 453 (u_longlong_t)vd->vdev_id, vdev_description(vd)); 454 spa_config_exit(spa, FTAG); 455 456 if (dump_opt['d'] <= 5) { 457 (void) printf("\t%10s %10s %5s\n", 458 "offset", "spacemap", "free"); 459 (void) printf("\t%10s %10s %5s\n", 460 "------", "--------", "----"); 461 } 462 for (m = 0; m < vd->vdev_ms_count; m++) 463 dump_metaslab(vd->vdev_ms[m]); 464 (void) printf("\n"); 465 } 466 } 467 468 static void 469 dump_dtl(vdev_t *vd, int indent) 470 { 471 avl_tree_t *t = &vd->vdev_dtl_map.sm_root; 472 spa_t *spa = vd->vdev_spa; 473 space_seg_t *ss; 474 vdev_t *pvd; 475 int c; 476 477 if (indent == 0) 478 (void) printf("\nDirty time logs:\n\n"); 479 480 spa_config_enter(spa, RW_READER, FTAG); 481 (void) printf("\t%*s%s\n", indent, "", vdev_description(vd)); 482 spa_config_exit(spa, FTAG); 483 484 for (ss = avl_first(t); ss; ss = AVL_NEXT(t, ss)) { 485 /* 486 * Everything in this DTL must appear in all parent DTL unions. 487 */ 488 for (pvd = vd; pvd; pvd = pvd->vdev_parent) 489 ASSERT(vdev_dtl_contains(&pvd->vdev_dtl_map, 490 ss->ss_start, ss->ss_end - ss->ss_start)); 491 (void) printf("\t%*soutage [%llu,%llu] length %llu\n", 492 indent, "", 493 (u_longlong_t)ss->ss_start, 494 (u_longlong_t)ss->ss_end - 1, 495 (u_longlong_t)ss->ss_end - ss->ss_start); 496 } 497 498 (void) printf("\n"); 499 500 if (dump_opt['d'] > 5 && vd->vdev_children == 0) { 501 dump_spacemap(vd->vdev_spa->spa_meta_objset, &vd->vdev_dtl, 502 &vd->vdev_dtl_map); 503 (void) printf("\n"); 504 } 505 506 for (c = 0; c < vd->vdev_children; c++) 507 dump_dtl(vd->vdev_child[c], indent + 4); 508 } 509 510 /*ARGSUSED*/ 511 static void 512 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size) 513 { 514 } 515 516 static uint64_t 517 blkid2offset(dnode_phys_t *dnp, int level, uint64_t blkid) 518 { 519 if (level < 0) 520 return (blkid); 521 522 return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) * 523 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); 524 } 525 526 /* ARGSUSED */ 527 static int 528 zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a) 529 { 530 zbookmark_t *zb = &bc->bc_bookmark; 531 blkptr_t *bp = &bc->bc_blkptr; 532 dva_t *dva = &bp->blk_dva[0]; 533 void *data = bc->bc_data; 534 dnode_phys_t *dnp = bc->bc_dnode; 535 char buffer[300]; 536 int l; 537 538 if (bc->bc_errno) { 539 (void) sprintf(buffer, 540 "Error %d reading <%llu, %llu, %lld, %llu>: ", 541 bc->bc_errno, 542 (u_longlong_t)zb->zb_objset, 543 (u_longlong_t)zb->zb_object, 544 (u_longlong_t)zb->zb_level, 545 (u_longlong_t)zb->zb_blkid); 546 goto out; 547 } 548 549 if (zb->zb_level == -1) { 550 ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); 551 ASSERT3U(BP_GET_LEVEL(bp), ==, 0); 552 } else { 553 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); 554 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); 555 } 556 557 if (zb->zb_level > 0) { 558 uint64_t fill = 0; 559 blkptr_t *bpx, *bpend; 560 561 for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx); 562 bpx < bpend; bpx++) { 563 if (bpx->blk_birth != 0) { 564 fill += bpx->blk_fill; 565 } else { 566 ASSERT(bpx->blk_fill == 0); 567 } 568 } 569 ASSERT3U(fill, ==, bp->blk_fill); 570 } 571 572 if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) { 573 uint64_t fill = 0; 574 dnode_phys_t *dnx, *dnend; 575 576 for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT); 577 dnx < dnend; dnx++) { 578 if (dnx->dn_type != DMU_OT_NONE) 579 fill++; 580 } 581 ASSERT3U(fill, ==, bp->blk_fill); 582 } 583 584 (void) sprintf(buffer, "%16llx ", 585 (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid)); 586 587 ASSERT(zb->zb_level >= 0); 588 589 for (l = dnp->dn_nlevels - 1; l >= -1; l--) { 590 if (l == zb->zb_level) { 591 (void) sprintf(buffer + strlen(buffer), "L%llx", 592 (u_longlong_t)zb->zb_level); 593 } else { 594 (void) sprintf(buffer + strlen(buffer), " "); 595 } 596 } 597 598 out: 599 if (bp->blk_birth == 0) { 600 (void) sprintf(buffer + strlen(buffer), "<hole>"); 601 (void) printf("%s\n", buffer); 602 } else { 603 // XXBP - Need to print number of active BPs here 604 (void) sprintf(buffer + strlen(buffer), 605 "vdev=%llu off=%llx %llxL/%llxP/%llxA F=%llu B=%llu", 606 (u_longlong_t)DVA_GET_VDEV(dva), 607 (u_longlong_t)DVA_GET_OFFSET(dva), 608 (u_longlong_t)BP_GET_LSIZE(bp), 609 (u_longlong_t)BP_GET_PSIZE(bp), 610 (u_longlong_t)DVA_GET_ASIZE(dva), 611 (u_longlong_t)bp->blk_fill, 612 (u_longlong_t)bp->blk_birth); 613 614 (void) printf("%s\n", buffer); 615 } 616 617 return (bc->bc_errno ? ERESTART : 0); 618 } 619 620 /*ARGSUSED*/ 621 static void 622 dump_indirect(objset_t *os, uint64_t object, void *data, size_t size) 623 { 624 traverse_handle_t *th; 625 uint64_t objset = dmu_objset_id(os); 626 int advance = zdb_advance; 627 628 (void) printf("Indirect blocks:\n"); 629 630 if (object == 0) 631 advance |= ADVANCE_DATA; 632 633 th = traverse_init(dmu_objset_spa(os), zdb_indirect_cb, NULL, advance, 634 ZIO_FLAG_CANFAIL); 635 th->th_noread = zdb_noread; 636 637 traverse_add_dnode(th, 0, -1ULL, objset, object); 638 639 while (traverse_more(th) == EAGAIN) 640 continue; 641 642 (void) printf("\n"); 643 644 traverse_fini(th); 645 } 646 647 /*ARGSUSED*/ 648 static void 649 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size) 650 { 651 dsl_dir_phys_t *dd = data; 652 time_t crtime; 653 char used[6], compressed[6], uncompressed[6], quota[6], resv[6]; 654 655 if (dd == NULL) 656 return; 657 658 ASSERT(size == sizeof (*dd)); 659 660 crtime = dd->dd_creation_time; 661 nicenum(dd->dd_used_bytes, used); 662 nicenum(dd->dd_compressed_bytes, compressed); 663 nicenum(dd->dd_uncompressed_bytes, uncompressed); 664 nicenum(dd->dd_quota, quota); 665 nicenum(dd->dd_reserved, resv); 666 667 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 668 (void) printf("\t\thead_dataset_obj = %llu\n", 669 (u_longlong_t)dd->dd_head_dataset_obj); 670 (void) printf("\t\tparent_dir_obj = %llu\n", 671 (u_longlong_t)dd->dd_parent_obj); 672 (void) printf("\t\tclone_parent_obj = %llu\n", 673 (u_longlong_t)dd->dd_clone_parent_obj); 674 (void) printf("\t\tchild_dir_zapobj = %llu\n", 675 (u_longlong_t)dd->dd_child_dir_zapobj); 676 (void) printf("\t\tused_bytes = %s\n", used); 677 (void) printf("\t\tcompressed_bytes = %s\n", compressed); 678 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); 679 (void) printf("\t\tquota = %s\n", quota); 680 (void) printf("\t\treserved = %s\n", resv); 681 (void) printf("\t\tprops_zapobj = %llu\n", 682 (u_longlong_t)dd->dd_props_zapobj); 683 } 684 685 /*ARGSUSED*/ 686 static void 687 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size) 688 { 689 dsl_dataset_phys_t *ds = data; 690 time_t crtime; 691 char used[6], compressed[6], uncompressed[6], unique[6]; 692 char blkbuf[BP_SPRINTF_LEN]; 693 694 if (ds == NULL) 695 return; 696 697 ASSERT(size == sizeof (*ds)); 698 crtime = ds->ds_creation_time; 699 nicenum(ds->ds_used_bytes, used); 700 nicenum(ds->ds_compressed_bytes, compressed); 701 nicenum(ds->ds_uncompressed_bytes, uncompressed); 702 nicenum(ds->ds_unique_bytes, unique); 703 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ds->ds_bp); 704 705 (void) printf("\t\tdataset_obj = %llu\n", 706 (u_longlong_t)ds->ds_dir_obj); 707 (void) printf("\t\tprev_snap_obj = %llu\n", 708 (u_longlong_t)ds->ds_prev_snap_obj); 709 (void) printf("\t\tprev_snap_txg = %llu\n", 710 (u_longlong_t)ds->ds_prev_snap_txg); 711 (void) printf("\t\tnext_snap_obj = %llu\n", 712 (u_longlong_t)ds->ds_next_snap_obj); 713 (void) printf("\t\tsnapnames_zapobj = %llu\n", 714 (u_longlong_t)ds->ds_snapnames_zapobj); 715 (void) printf("\t\tnum_children = %llu\n", 716 (u_longlong_t)ds->ds_num_children); 717 (void) printf("\t\tcreation_time = %s", ctime(&crtime)); 718 (void) printf("\t\tcreation_txg = %llu\n", 719 (u_longlong_t)ds->ds_creation_txg); 720 (void) printf("\t\tdeadlist_obj = %llu\n", 721 (u_longlong_t)ds->ds_deadlist_obj); 722 (void) printf("\t\tused_bytes = %s\n", used); 723 (void) printf("\t\tcompressed_bytes = %s\n", compressed); 724 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed); 725 (void) printf("\t\tunique = %s\n", unique); 726 (void) printf("\t\tfsid_guid = %llu\n", 727 (u_longlong_t)ds->ds_fsid_guid); 728 (void) printf("\t\tguid = %llu\n", 729 (u_longlong_t)ds->ds_guid); 730 (void) printf("\t\trestoring = %llu\n", 731 (u_longlong_t)ds->ds_restoring); 732 (void) printf("\t\tbp = %s\n", blkbuf); 733 } 734 735 static void 736 dump_bplist(objset_t *mos, uint64_t object, char *name) 737 { 738 bplist_t bpl = { 0 }; 739 blkptr_t blk, *bp = &blk; 740 uint64_t itor = 0; 741 char numbuf[6]; 742 743 if (dump_opt['d'] < 3) 744 return; 745 746 VERIFY(0 == bplist_open(&bpl, mos, object)); 747 if (bplist_empty(&bpl)) { 748 bplist_close(&bpl); 749 return; 750 } 751 752 nicenum(bpl.bpl_phys->bpl_bytes, numbuf); 753 754 (void) printf("\n %s: %llu entries, %s\n", 755 name, (u_longlong_t)bpl.bpl_phys->bpl_entries, numbuf); 756 757 if (dump_opt['d'] < 5) { 758 bplist_close(&bpl); 759 return; 760 } 761 762 (void) printf("\n"); 763 764 while (bplist_iterate(&bpl, &itor, bp) == 0) { 765 ASSERT(bp->blk_birth != 0); 766 // XXBP - Do we want to see all DVAs, or just one? 767 (void) printf("\tItem %3llu: vdev=%llu off=%llx " 768 "%llxL/%llxP/%llxA F=%llu B=%llu\n", 769 (u_longlong_t)itor - 1, 770 (u_longlong_t)DVA_GET_VDEV(&bp->blk_dva[0]), 771 (u_longlong_t)DVA_GET_OFFSET(&bp->blk_dva[0]), 772 (u_longlong_t)BP_GET_LSIZE(bp), 773 (u_longlong_t)BP_GET_PSIZE(bp), 774 (u_longlong_t)DVA_GET_ASIZE(&bp->blk_dva[0]), 775 (u_longlong_t)bp->blk_fill, 776 (u_longlong_t)bp->blk_birth); 777 } 778 779 bplist_close(&bpl); 780 } 781 782 static char * 783 znode_path(objset_t *os, uint64_t object, char *pathbuf, size_t size) 784 { 785 dmu_buf_t *db; 786 dmu_object_info_t doi; 787 znode_phys_t *zp; 788 uint64_t parent = 0; 789 size_t complen; 790 char component[MAXNAMELEN + 1]; 791 char *path; 792 int error; 793 794 path = pathbuf + size; 795 *--path = '\0'; 796 797 for (;;) { 798 error = dmu_bonus_hold(os, object, FTAG, &db); 799 if (error) 800 break; 801 802 dmu_object_info_from_db(db, &doi); 803 zp = db->db_data; 804 parent = zp->zp_parent; 805 dmu_buf_rele(db, FTAG); 806 807 if (doi.doi_bonus_type != DMU_OT_ZNODE) 808 break; 809 810 if (parent == object) { 811 if (path[0] != '/') 812 *--path = '/'; 813 return (path); 814 } 815 816 if (zap_value_search(os, parent, object, component) != 0) 817 break; 818 819 complen = strlen(component); 820 path -= complen; 821 bcopy(component, path, complen); 822 *--path = '/'; 823 824 object = parent; 825 } 826 827 (void) sprintf(component, "???<object#%llu>", (u_longlong_t)object); 828 829 complen = strlen(component); 830 path -= complen; 831 bcopy(component, path, complen); 832 833 return (path); 834 } 835 836 /*ARGSUSED*/ 837 static void 838 dump_znode(objset_t *os, uint64_t object, void *data, size_t size) 839 { 840 znode_phys_t *zp = data; 841 time_t z_crtime, z_atime, z_mtime, z_ctime; 842 char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */ 843 844 ASSERT(size >= sizeof (znode_phys_t)); 845 846 if (dump_opt['d'] < 3) { 847 (void) printf("\t%s\n", 848 znode_path(os, object, path, sizeof (path))); 849 return; 850 } 851 852 z_crtime = (time_t)zp->zp_crtime[0]; 853 z_atime = (time_t)zp->zp_atime[0]; 854 z_mtime = (time_t)zp->zp_mtime[0]; 855 z_ctime = (time_t)zp->zp_ctime[0]; 856 857 (void) printf("\tpath %s\n", 858 znode_path(os, object, path, sizeof (path))); 859 (void) printf("\tatime %s", ctime(&z_atime)); 860 (void) printf("\tmtime %s", ctime(&z_mtime)); 861 (void) printf("\tctime %s", ctime(&z_ctime)); 862 (void) printf("\tcrtime %s", ctime(&z_crtime)); 863 (void) printf("\tgen %llu\n", (u_longlong_t)zp->zp_gen); 864 (void) printf("\tmode %llo\n", (u_longlong_t)zp->zp_mode); 865 (void) printf("\tsize %llu\n", (u_longlong_t)zp->zp_size); 866 (void) printf("\tparent %llu\n", (u_longlong_t)zp->zp_parent); 867 (void) printf("\tlinks %llu\n", (u_longlong_t)zp->zp_links); 868 (void) printf("\txattr %llu\n", (u_longlong_t)zp->zp_xattr); 869 (void) printf("\trdev 0x%016llx\n", (u_longlong_t)zp->zp_rdev); 870 } 871 872 /*ARGSUSED*/ 873 static void 874 dump_acl(objset_t *os, uint64_t object, void *data, size_t size) 875 { 876 } 877 878 /*ARGSUSED*/ 879 static void 880 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size) 881 { 882 } 883 884 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = { 885 dump_none, /* unallocated */ 886 dump_zap, /* object directory */ 887 dump_uint64, /* object array */ 888 dump_none, /* packed nvlist */ 889 dump_packed_nvlist, /* packed nvlist size */ 890 dump_none, /* bplist */ 891 dump_none, /* bplist header */ 892 dump_none, /* SPA space map header */ 893 dump_none, /* SPA space map */ 894 dump_none, /* ZIL intent log */ 895 dump_dnode, /* DMU dnode */ 896 dump_dmu_objset, /* DMU objset */ 897 dump_dsl_dir, /* DSL directory */ 898 dump_zap, /* DSL directory child map */ 899 dump_zap, /* DSL dataset snap map */ 900 dump_zap, /* DSL props */ 901 dump_dsl_dataset, /* DSL dataset */ 902 dump_znode, /* ZFS znode */ 903 dump_acl, /* ZFS ACL */ 904 dump_uint8, /* ZFS plain file */ 905 dump_zap, /* ZFS directory */ 906 dump_zap, /* ZFS master node */ 907 dump_zap, /* ZFS delete queue */ 908 dump_uint8, /* zvol object */ 909 dump_zap, /* zvol prop */ 910 dump_uint8, /* other uint8[] */ 911 dump_uint64, /* other uint64[] */ 912 dump_zap, /* other ZAP */ 913 dump_zap, /* persistent error log */ 914 }; 915 916 static void 917 dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) 918 { 919 dmu_buf_t *db = NULL; 920 dmu_object_info_t doi; 921 dnode_t *dn; 922 void *bonus = NULL; 923 size_t bsize = 0; 924 char iblk[6], dblk[6], lsize[6], psize[6], bonus_size[6], segsize[6]; 925 char aux[50]; 926 int error; 927 928 if (*print_header) { 929 (void) printf("\n Object lvl iblk dblk lsize" 930 " psize type\n"); 931 *print_header = 0; 932 } 933 934 if (object == 0) { 935 dn = os->os->os_meta_dnode; 936 } else { 937 error = dmu_bonus_hold(os, object, FTAG, &db); 938 if (error) 939 fatal("dmu_bonus_hold(%llu) failed, errno %u", 940 object, error); 941 bonus = db->db_data; 942 bsize = db->db_size; 943 dn = ((dmu_buf_impl_t *)db)->db_dnode; 944 } 945 dmu_object_info_from_dnode(dn, &doi); 946 947 nicenum(doi.doi_metadata_block_size, iblk); 948 nicenum(doi.doi_data_block_size, dblk); 949 nicenum(doi.doi_data_block_size * (doi.doi_max_block_offset + 1), 950 lsize); 951 nicenum(doi.doi_physical_blks << 9, psize); 952 nicenum(doi.doi_bonus_size, bonus_size); 953 954 aux[0] = '\0'; 955 956 if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) 957 (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)", 958 zio_checksum_table[doi.doi_checksum].ci_name); 959 960 if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) 961 (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)", 962 zio_compress_table[doi.doi_compress].ci_name); 963 964 (void) printf("%10lld %3u %5s %5s %5s %5s %s%s\n", 965 (u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize, 966 psize, dmu_ot[doi.doi_type].ot_name, aux); 967 968 if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) { 969 (void) printf("%10s %3s %5s %5s %5s %5s %s\n", 970 "", "", "", "", bonus_size, "bonus", 971 dmu_ot[doi.doi_bonus_type].ot_name); 972 } 973 974 if (verbosity >= 4) { 975 object_viewer[doi.doi_bonus_type](os, object, bonus, bsize); 976 object_viewer[doi.doi_type](os, object, NULL, 0); 977 *print_header = 1; 978 } 979 980 if (verbosity >= 5) 981 dump_indirect(os, object, NULL, 0); 982 983 if (verbosity >= 5) { 984 /* 985 * Report the list of segments that comprise the object. 986 */ 987 uint64_t start = 0; 988 uint64_t end; 989 uint64_t blkfill = 1; 990 int minlvl = 1; 991 992 if (dn->dn_type == DMU_OT_DNODE) { 993 minlvl = 0; 994 blkfill = DNODES_PER_BLOCK; 995 } 996 997 for (;;) { 998 error = dnode_next_offset(dn, B_FALSE, &start, minlvl, 999 blkfill); 1000 if (error) 1001 break; 1002 end = start; 1003 error = dnode_next_offset(dn, B_TRUE, &end, minlvl, 1004 blkfill); 1005 nicenum(end - start, segsize); 1006 (void) printf("\t\tsegment [%016llx, %016llx)" 1007 " size %5s\n", (u_longlong_t)start, 1008 (u_longlong_t)end, segsize); 1009 if (error) 1010 break; 1011 start = end; 1012 } 1013 } 1014 1015 if (db != NULL) 1016 dmu_buf_rele(db, FTAG); 1017 } 1018 1019 static char *objset_types[DMU_OST_NUMTYPES] = { 1020 "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" }; 1021 1022 /*ARGSUSED*/ 1023 static void 1024 dump_dir(objset_t *os) 1025 { 1026 dmu_objset_stats_t dds; 1027 uint64_t object, object_count; 1028 char numbuf[8]; 1029 char blkbuf[BP_SPRINTF_LEN]; 1030 char osname[MAXNAMELEN]; 1031 char *type = "UNKNOWN"; 1032 int verbosity = dump_opt['d']; 1033 int print_header = 1; 1034 int i, error; 1035 1036 dmu_objset_stats(os, &dds); 1037 1038 if (dds.dds_type < DMU_OST_NUMTYPES) 1039 type = objset_types[dds.dds_type]; 1040 1041 if (dds.dds_type == DMU_OST_META) { 1042 dds.dds_creation_txg = TXG_INITIAL; 1043 dds.dds_last_txg = os->os->os_rootbp.blk_birth; 1044 dds.dds_objects_used = os->os->os_rootbp.blk_fill; 1045 dds.dds_space_refd = 1046 os->os->os_spa->spa_dsl_pool->dp_mos_dir->dd_used_bytes; 1047 } 1048 1049 ASSERT3U(dds.dds_objects_used, ==, os->os->os_rootbp.blk_fill); 1050 1051 nicenum(dds.dds_space_refd, numbuf); 1052 1053 if (verbosity >= 4) { 1054 (void) strcpy(blkbuf, ", rootbp "); 1055 sprintf_blkptr(blkbuf + strlen(blkbuf), 1056 BP_SPRINTF_LEN - strlen(blkbuf), &os->os->os_rootbp); 1057 } else { 1058 blkbuf[0] = '\0'; 1059 } 1060 1061 dmu_objset_name(os, osname); 1062 1063 (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, last_txg %llu, " 1064 "%s, %llu objects%s\n", 1065 osname, type, (u_longlong_t)dmu_objset_id(os), 1066 (u_longlong_t)dds.dds_creation_txg, 1067 (u_longlong_t)dds.dds_last_txg, 1068 numbuf, 1069 (u_longlong_t)dds.dds_objects_used, 1070 blkbuf); 1071 1072 dump_intent_log(dmu_objset_zil(os)); 1073 1074 if (dmu_objset_ds(os) != NULL) 1075 dump_bplist(dmu_objset_pool(os)->dp_meta_objset, 1076 dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist"); 1077 1078 if (verbosity < 2) 1079 return; 1080 1081 if (zopt_objects != 0) { 1082 for (i = 0; i < zopt_objects; i++) 1083 dump_object(os, zopt_object[i], verbosity, 1084 &print_header); 1085 (void) printf("\n"); 1086 return; 1087 } 1088 1089 dump_object(os, 0, verbosity, &print_header); 1090 object_count = 1; 1091 1092 object = 0; 1093 while ((error = dmu_object_next(os, &object, B_FALSE)) == 0) { 1094 dump_object(os, object, verbosity, &print_header); 1095 object_count++; 1096 } 1097 1098 ASSERT3U(object_count, ==, dds.dds_objects_used); 1099 1100 (void) printf("\n"); 1101 1102 if (error != ESRCH) 1103 fatal("dmu_object_next() = %d", error); 1104 } 1105 1106 static void 1107 dump_uberblock(uberblock_t *ub) 1108 { 1109 time_t timestamp = ub->ub_timestamp; 1110 1111 (void) printf("Uberblock\n\n"); 1112 (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic); 1113 (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version); 1114 (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg); 1115 (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum); 1116 (void) printf("\ttimestamp = %llu UTC = %s", 1117 (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp))); 1118 if (dump_opt['u'] >= 3) { 1119 char blkbuf[BP_SPRINTF_LEN]; 1120 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ub->ub_rootbp); 1121 (void) printf("\trootbp = %s\n", blkbuf); 1122 } 1123 (void) printf("\n"); 1124 } 1125 1126 static void 1127 dump_config(const char *pool) 1128 { 1129 spa_t *spa = NULL; 1130 1131 mutex_enter(&spa_namespace_lock); 1132 while ((spa = spa_next(spa)) != NULL) { 1133 if (pool == NULL) 1134 (void) printf("%s\n", spa_name(spa)); 1135 if (pool == NULL || strcmp(pool, spa_name(spa)) == 0) 1136 dump_nvlist(spa->spa_config, 4); 1137 } 1138 mutex_exit(&spa_namespace_lock); 1139 } 1140 1141 static void 1142 dump_label(const char *dev) 1143 { 1144 int fd; 1145 vdev_label_t label; 1146 char *buf = label.vl_vdev_phys.vp_nvlist; 1147 size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist); 1148 struct stat64 statbuf; 1149 uint64_t psize; 1150 int l; 1151 1152 if ((fd = open64(dev, O_RDONLY)) < 0) { 1153 (void) printf("cannot open '%s': %s\n", dev, strerror(errno)); 1154 exit(1); 1155 } 1156 1157 if (fstat64(fd, &statbuf) != 0) { 1158 (void) printf("failed to stat '%s': %s\n", dev, 1159 strerror(errno)); 1160 exit(1); 1161 } 1162 1163 psize = statbuf.st_size; 1164 psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t)); 1165 1166 for (l = 0; l < VDEV_LABELS; l++) { 1167 1168 nvlist_t *config = NULL; 1169 1170 (void) printf("--------------------------------------------\n"); 1171 (void) printf("LABEL %d\n", l); 1172 (void) printf("--------------------------------------------\n"); 1173 1174 if (pread64(fd, &label, sizeof (label), 1175 vdev_label_offset(psize, l, 0)) != sizeof (label)) { 1176 (void) printf("failed to read label %d\n", l); 1177 continue; 1178 } 1179 1180 if (nvlist_unpack(buf, buflen, &config, 0) != 0) { 1181 (void) printf("failed to unpack label %d\n", l); 1182 continue; 1183 } 1184 dump_nvlist(config, 4); 1185 nvlist_free(config); 1186 } 1187 } 1188 1189 /*ARGSUSED*/ 1190 static void 1191 dump_one_dir(char *dsname, void *arg) 1192 { 1193 int error; 1194 objset_t *os; 1195 1196 error = dmu_objset_open(dsname, DMU_OST_ANY, 1197 DS_MODE_STANDARD | DS_MODE_READONLY, &os); 1198 if (error) { 1199 (void) printf("Could not open %s\n", dsname); 1200 return; 1201 } 1202 dump_dir(os); 1203 dmu_objset_close(os); 1204 } 1205 1206 static void 1207 zdb_space_map_load(spa_t *spa) 1208 { 1209 vdev_t *rvd = spa->spa_root_vdev; 1210 vdev_t *vd; 1211 int c, m, error; 1212 1213 for (c = 0; c < rvd->vdev_children; c++) { 1214 vd = rvd->vdev_child[c]; 1215 for (m = 0; m < vd->vdev_ms_count; m++) { 1216 metaslab_t *msp = vd->vdev_ms[m]; 1217 space_map_t *sm = &msp->ms_allocmap[0]; 1218 mutex_enter(&msp->ms_lock); 1219 error = space_map_load(sm, msp->ms_smo, SM_ALLOC, 1220 spa->spa_meta_objset, msp->ms_usable_end, 1221 sm->sm_size - msp->ms_usable_space); 1222 mutex_exit(&msp->ms_lock); 1223 if (error) 1224 fatal("%s bad space map #%d, error %d", 1225 spa->spa_name, c, error); 1226 } 1227 } 1228 } 1229 1230 static int 1231 zdb_space_map_claim(spa_t *spa, blkptr_t *bp, zbookmark_t *zb) 1232 { 1233 dva_t *dva = &bp->blk_dva[0]; 1234 uint64_t vdev = DVA_GET_VDEV(dva); 1235 uint64_t offset = DVA_GET_OFFSET(dva); 1236 uint64_t size = DVA_GET_ASIZE(dva); 1237 vdev_t *vd; 1238 metaslab_t *msp; 1239 space_map_t *allocmap, *freemap; 1240 int error; 1241 1242 if ((vd = vdev_lookup_top(spa, vdev)) == NULL) 1243 return (ENXIO); 1244 1245 if ((offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) 1246 return (ENXIO); 1247 1248 if (DVA_GET_GANG(dva)) { 1249 zio_gbh_phys_t gbh; 1250 blkptr_t blk = *bp; 1251 int g; 1252 1253 /* LINTED - compile time assert */ 1254 ASSERT(sizeof (zio_gbh_phys_t) == SPA_GANGBLOCKSIZE); 1255 size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE); 1256 DVA_SET_GANG(&blk.blk_dva[0], 0); 1257 DVA_SET_ASIZE(&blk.blk_dva[0], size); 1258 BP_SET_CHECKSUM(&blk, ZIO_CHECKSUM_GANG_HEADER); 1259 BP_SET_PSIZE(&blk, SPA_GANGBLOCKSIZE); 1260 BP_SET_LSIZE(&blk, SPA_GANGBLOCKSIZE); 1261 BP_SET_COMPRESS(&blk, ZIO_COMPRESS_OFF); 1262 error = zio_wait(zio_read(NULL, spa, &blk, 1263 &gbh, SPA_GANGBLOCKSIZE, NULL, NULL, 1264 ZIO_PRIORITY_SYNC_READ, 1265 ZIO_FLAG_CANFAIL | ZIO_FLAG_CONFIG_HELD, zb)); 1266 if (error) 1267 return (error); 1268 if (BP_SHOULD_BYTESWAP(&blk)) 1269 byteswap_uint64_array(&gbh, SPA_GANGBLOCKSIZE); 1270 for (g = 0; g < SPA_GBH_NBLKPTRS; g++) { 1271 if (gbh.zg_blkptr[g].blk_birth == 0) 1272 break; 1273 error = zdb_space_map_claim(spa, &gbh.zg_blkptr[g], zb); 1274 if (error) 1275 return (error); 1276 } 1277 } 1278 1279 msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; 1280 allocmap = &msp->ms_allocmap[0]; 1281 freemap = &msp->ms_freemap[0]; 1282 1283 mutex_enter(&msp->ms_lock); 1284 if (space_map_contains(freemap, offset, size)) { 1285 mutex_exit(&msp->ms_lock); 1286 return (EAGAIN); /* allocated more than once */ 1287 } 1288 1289 if (!space_map_contains(allocmap, offset, size)) { 1290 mutex_exit(&msp->ms_lock); 1291 return (ESTALE); /* not allocated at all */ 1292 } 1293 1294 space_map_remove(allocmap, offset, size); 1295 space_map_add(freemap, offset, size); 1296 1297 mutex_exit(&msp->ms_lock); 1298 1299 return (0); 1300 } 1301 1302 static void 1303 zdb_leak(space_map_t *sm, uint64_t start, uint64_t size) 1304 { 1305 metaslab_t *msp; 1306 1307 /* LINTED */ 1308 msp = (metaslab_t *)((char *)sm - offsetof(metaslab_t, ms_allocmap[0])); 1309 1310 (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n", 1311 (u_longlong_t)msp->ms_group->mg_vd->vdev_id, 1312 (u_longlong_t)start, 1313 (u_longlong_t)size); 1314 } 1315 1316 static void 1317 zdb_space_map_vacate(spa_t *spa) 1318 { 1319 vdev_t *rvd = spa->spa_root_vdev; 1320 vdev_t *vd; 1321 int c, m; 1322 1323 for (c = 0; c < rvd->vdev_children; c++) { 1324 vd = rvd->vdev_child[c]; 1325 for (m = 0; m < vd->vdev_ms_count; m++) { 1326 metaslab_t *msp = vd->vdev_ms[m]; 1327 mutex_enter(&msp->ms_lock); 1328 space_map_vacate(&msp->ms_allocmap[0], zdb_leak, 1329 &msp->ms_allocmap[0]); 1330 space_map_vacate(&msp->ms_freemap[0], NULL, NULL); 1331 mutex_exit(&msp->ms_lock); 1332 } 1333 } 1334 } 1335 1336 static void 1337 zdb_refresh_ubsync(spa_t *spa) 1338 { 1339 uberblock_t ub = { 0 }; 1340 vdev_t *rvd = spa->spa_root_vdev; 1341 zio_t *zio; 1342 1343 /* 1344 * Reload the uberblock. 1345 */ 1346 zio = zio_root(spa, NULL, NULL, 1347 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 1348 vdev_uberblock_load(zio, rvd, &ub); 1349 (void) zio_wait(zio); 1350 1351 if (ub.ub_txg != 0) 1352 spa->spa_ubsync = ub; 1353 } 1354 1355 /* 1356 * Verify that the sum of the sizes of all blocks in the pool adds up 1357 * to the SPA's sa_alloc total. 1358 */ 1359 typedef struct zdb_blkstats { 1360 uint64_t zb_asize; 1361 uint64_t zb_lsize; 1362 uint64_t zb_psize; 1363 uint64_t zb_count; 1364 } zdb_blkstats_t; 1365 1366 #define DMU_OT_DEFERRED DMU_OT_NONE 1367 #define DMU_OT_TOTAL DMU_OT_NUMTYPES 1368 1369 #define ZB_TOTAL ZB_MAXLEVEL 1370 1371 typedef struct zdb_cb { 1372 zdb_blkstats_t zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1]; 1373 uint64_t zcb_errors[256]; 1374 traverse_blk_cache_t *zcb_cache; 1375 int zcb_readfails; 1376 int zcb_haderrors; 1377 } zdb_cb_t; 1378 1379 static void 1380 zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type) 1381 { 1382 int i, error; 1383 1384 for (i = 0; i < 4; i++) { 1385 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; 1386 int t = (i & 1) ? type : DMU_OT_TOTAL; 1387 zdb_blkstats_t *zb = &zcb->zcb_type[l][t]; 1388 1389 zb->zb_asize += BP_GET_ASIZE(bp); 1390 zb->zb_lsize += BP_GET_LSIZE(bp); 1391 zb->zb_psize += BP_GET_PSIZE(bp); 1392 zb->zb_count++; 1393 } 1394 1395 if (dump_opt['L']) 1396 return; 1397 1398 error = zdb_space_map_claim(spa, bp, &zcb->zcb_cache->bc_bookmark); 1399 1400 if (error == 0) 1401 return; 1402 1403 if (error == EAGAIN) 1404 (void) fatal("double-allocation, bp=%p", bp); 1405 1406 if (error == ESTALE) 1407 (void) fatal("reference to freed block, bp=%p", bp); 1408 1409 (void) fatal("fatal error %d in bp %p", error, bp); 1410 } 1411 1412 static int 1413 zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 1414 { 1415 zbookmark_t *zb = &bc->bc_bookmark; 1416 zdb_cb_t *zcb = arg; 1417 blkptr_t *bp = &bc->bc_blkptr; 1418 dmu_object_type_t type = BP_GET_TYPE(bp); 1419 char blkbuf[BP_SPRINTF_LEN]; 1420 int error = 0; 1421 1422 if (bc->bc_errno) { 1423 if (zcb->zcb_readfails++ < 10 && dump_opt['L']) { 1424 zdb_refresh_ubsync(spa); 1425 error = EAGAIN; 1426 } else { 1427 zcb->zcb_haderrors = 1; 1428 zcb->zcb_errors[bc->bc_errno]++; 1429 error = ERESTART; 1430 } 1431 1432 if (dump_opt['b'] >= 3 || (dump_opt['b'] >= 2 && bc->bc_errno)) 1433 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp); 1434 else 1435 blkbuf[0] = '\0'; 1436 1437 (void) printf("zdb_blkptr_cb: Got error %d reading " 1438 "<%llu, %llu, %lld, %llx> %s -- %s\n", 1439 bc->bc_errno, 1440 (u_longlong_t)zb->zb_objset, 1441 (u_longlong_t)zb->zb_object, 1442 (u_longlong_t)zb->zb_level, 1443 (u_longlong_t)zb->zb_blkid, 1444 blkbuf, 1445 error == EAGAIN ? "retrying" : "skipping"); 1446 1447 return (error); 1448 } 1449 1450 zcb->zcb_readfails = 0; 1451 1452 ASSERT(bp->blk_birth != 0); 1453 1454 zdb_count_block(spa, zcb, bp, type); 1455 1456 if (dump_opt['b'] >= 4) { 1457 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp); 1458 (void) printf("objset %llu object %llu offset 0x%llx %s\n", 1459 (u_longlong_t)zb->zb_objset, 1460 (u_longlong_t)zb->zb_object, 1461 (u_longlong_t)blkid2offset(bc->bc_dnode, 1462 zb->zb_level, zb->zb_blkid), 1463 blkbuf); 1464 } 1465 1466 return (0); 1467 } 1468 1469 static int 1470 dump_block_stats(spa_t *spa) 1471 { 1472 traverse_handle_t *th; 1473 zdb_cb_t zcb = { 0 }; 1474 traverse_blk_cache_t dummy_cache = { 0 }; 1475 zdb_blkstats_t *zb, *tzb; 1476 uint64_t alloc, space; 1477 int leaks = 0; 1478 int advance = zdb_advance; 1479 int flags; 1480 int e; 1481 1482 zcb.zcb_cache = &dummy_cache; 1483 1484 if (dump_opt['c']) 1485 advance |= ADVANCE_DATA; 1486 1487 advance |= ADVANCE_PRUNE | ADVANCE_ZIL; 1488 1489 (void) printf("\nTraversing all blocks to %sverify" 1490 " nothing leaked ...\n", 1491 dump_opt['c'] ? "verify checksums and " : ""); 1492 1493 /* 1494 * Load all space maps. As we traverse the pool, if we find a block 1495 * that's not in its space map, that indicates a double-allocation, 1496 * reference to a freed block, or an unclaimed block. Otherwise we 1497 * remove the block from the space map. If the space maps are not 1498 * empty when we're done, that indicates leaked blocks. 1499 */ 1500 if (!dump_opt['L']) 1501 zdb_space_map_load(spa); 1502 1503 /* 1504 * If there's a deferred-free bplist, process that first. 1505 */ 1506 if (spa->spa_sync_bplist_obj != 0) { 1507 bplist_t *bpl = &spa->spa_sync_bplist; 1508 blkptr_t blk; 1509 uint64_t itor = 0; 1510 1511 VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset, 1512 spa->spa_sync_bplist_obj)); 1513 1514 while (bplist_iterate(bpl, &itor, &blk) == 0) { 1515 zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED); 1516 if (dump_opt['b'] >= 4) { 1517 char blkbuf[BP_SPRINTF_LEN]; 1518 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &blk); 1519 (void) printf("[%s] %s\n", 1520 "deferred free", blkbuf); 1521 } 1522 } 1523 1524 bplist_close(bpl); 1525 } 1526 1527 /* 1528 * Now traverse the pool. If we're reading all data to verify 1529 * checksums, do a scrubbing read so that we validate all copies. 1530 */ 1531 flags = ZIO_FLAG_CANFAIL; 1532 if (advance & ADVANCE_DATA) 1533 flags |= ZIO_FLAG_SCRUB; 1534 th = traverse_init(spa, zdb_blkptr_cb, &zcb, advance, flags); 1535 th->th_noread = zdb_noread; 1536 1537 traverse_add_pool(th, 0, spa_first_txg(spa)); 1538 1539 while (traverse_more(th) == EAGAIN) 1540 continue; 1541 1542 traverse_fini(th); 1543 1544 if (zcb.zcb_haderrors) { 1545 (void) printf("\nError counts:\n\n"); 1546 (void) printf("\t%5s %s\n", "errno", "count"); 1547 for (e = 0; e < 256; e++) { 1548 if (zcb.zcb_errors[e] != 0) { 1549 (void) printf("\t%5d %llu\n", 1550 e, (u_longlong_t)zcb.zcb_errors[e]); 1551 } 1552 } 1553 } 1554 1555 /* 1556 * Report any leaked segments. 1557 */ 1558 if (!dump_opt['L']) 1559 zdb_space_map_vacate(spa); 1560 1561 if (dump_opt['L']) 1562 (void) printf("\n\n *** Live pool traversal; " 1563 "block counts are only approximate ***\n\n"); 1564 1565 alloc = spa_get_alloc(spa); 1566 space = spa_get_space(spa); 1567 1568 tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL]; 1569 1570 if (tzb->zb_asize == alloc) { 1571 (void) printf("\n\tNo leaks (block sum matches space" 1572 " maps exactly)\n"); 1573 } else { 1574 (void) printf("block traversal size %llu != alloc %llu " 1575 "(leaked %lld)\n", 1576 (u_longlong_t)tzb->zb_asize, 1577 (u_longlong_t)alloc, 1578 (u_longlong_t)(alloc - tzb->zb_asize)); 1579 leaks = 1; 1580 } 1581 1582 if (tzb->zb_count == 0) 1583 return (2); 1584 1585 (void) printf("\n"); 1586 (void) printf("\tbp count: %10llu\n", 1587 (u_longlong_t)tzb->zb_count); 1588 (void) printf("\tbp logical: %10llu\t avg: %6llu\n", 1589 (u_longlong_t)tzb->zb_lsize, 1590 (u_longlong_t)(tzb->zb_lsize / tzb->zb_count)); 1591 (void) printf("\tbp physical: %10llu\t avg:" 1592 " %6llu\tcompression: %6.2f\n", 1593 (u_longlong_t)tzb->zb_psize, 1594 (u_longlong_t)(tzb->zb_psize / tzb->zb_count), 1595 (double)tzb->zb_lsize / tzb->zb_psize); 1596 (void) printf("\tbp allocated: %10llu\t avg:" 1597 " %6llu\tcompression: %6.2f\n", 1598 (u_longlong_t)tzb->zb_asize, 1599 (u_longlong_t)(tzb->zb_asize / tzb->zb_count), 1600 (double)tzb->zb_lsize / tzb->zb_asize); 1601 (void) printf("\tSPA allocated: %10llu\tused: %5.2f%%\n", 1602 (u_longlong_t)alloc, 100.0 * alloc / space); 1603 1604 if (dump_opt['b'] >= 2) { 1605 int l, t, level; 1606 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" 1607 "\t avg\t comp\t%%Total\tType\n"); 1608 1609 for (t = 0; t <= DMU_OT_NUMTYPES; t++) { 1610 char csize[6], lsize[6], psize[6], asize[6], avg[6]; 1611 char *typename; 1612 1613 typename = t == DMU_OT_DEFERRED ? "deferred free" : 1614 t == DMU_OT_TOTAL ? "Total" : dmu_ot[t].ot_name; 1615 1616 if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) { 1617 (void) printf("%6s\t%5s\t%5s\t%5s" 1618 "\t%5s\t%5s\t%6s\t%s\n", 1619 "-", 1620 "-", 1621 "-", 1622 "-", 1623 "-", 1624 "-", 1625 "-", 1626 typename); 1627 continue; 1628 } 1629 1630 for (l = ZB_TOTAL - 1; l >= -1; l--) { 1631 level = (l == -1 ? ZB_TOTAL : l); 1632 zb = &zcb.zcb_type[level][t]; 1633 1634 if (zb->zb_asize == 0) 1635 continue; 1636 1637 if (dump_opt['b'] < 3 && level != ZB_TOTAL) 1638 continue; 1639 1640 if (level == 0 && zb->zb_asize == 1641 zcb.zcb_type[ZB_TOTAL][t].zb_asize) 1642 continue; 1643 1644 nicenum(zb->zb_count, csize); 1645 nicenum(zb->zb_lsize, lsize); 1646 nicenum(zb->zb_psize, psize); 1647 nicenum(zb->zb_asize, asize); 1648 nicenum(zb->zb_asize / zb->zb_count, avg); 1649 1650 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s" 1651 "\t%5.2f\t%6.2f\t", 1652 csize, lsize, psize, asize, avg, 1653 (double)zb->zb_lsize / zb->zb_psize, 1654 100.0 * zb->zb_asize / tzb->zb_asize); 1655 1656 if (level == ZB_TOTAL) 1657 (void) printf("%s\n", typename); 1658 else 1659 (void) printf(" L%d %s\n", 1660 level, typename); 1661 } 1662 } 1663 } 1664 1665 (void) printf("\n"); 1666 1667 if (leaks) 1668 return (2); 1669 1670 if (zcb.zcb_haderrors) 1671 return (3); 1672 1673 return (0); 1674 } 1675 1676 static void 1677 dump_zpool(spa_t *spa) 1678 { 1679 dsl_pool_t *dp = spa_get_dsl(spa); 1680 int rc = 0; 1681 1682 if (dump_opt['u']) 1683 dump_uberblock(&spa->spa_uberblock); 1684 1685 if (dump_opt['d'] || dump_opt['i']) { 1686 dump_dir(dp->dp_meta_objset); 1687 if (dump_opt['d'] >= 3) { 1688 dump_bplist(dp->dp_meta_objset, 1689 spa->spa_sync_bplist_obj, "Deferred frees"); 1690 dump_dtl(spa->spa_root_vdev, 0); 1691 dump_metaslabs(spa); 1692 } 1693 dmu_objset_find(spa->spa_name, dump_one_dir, NULL, 1694 DS_FIND_SNAPSHOTS); 1695 } 1696 1697 if (dump_opt['b'] || dump_opt['c']) 1698 rc = dump_block_stats(spa); 1699 1700 if (dump_opt['s']) 1701 show_pool_stats(spa); 1702 1703 if (rc != 0) 1704 exit(rc); 1705 } 1706 1707 int 1708 main(int argc, char **argv) 1709 { 1710 int i, c; 1711 struct rlimit rl = { 1024, 1024 }; 1712 spa_t *spa; 1713 objset_t *os = NULL; 1714 char *endstr; 1715 int dump_all = 1; 1716 int verbose = 0; 1717 int error; 1718 int flag, set; 1719 vdev_knob_t *vk; 1720 1721 (void) setrlimit(RLIMIT_NOFILE, &rl); 1722 1723 dprintf_setup(&argc, argv); 1724 1725 while ((c = getopt(argc, argv, "udibcsvCLO:B:Ul")) != -1) { 1726 switch (c) { 1727 case 'u': 1728 case 'd': 1729 case 'i': 1730 case 'b': 1731 case 'c': 1732 case 's': 1733 case 'C': 1734 case 'l': 1735 dump_opt[c]++; 1736 dump_all = 0; 1737 break; 1738 case 'L': 1739 dump_opt[c]++; 1740 break; 1741 case 'O': 1742 endstr = optarg; 1743 if (endstr[0] == '!') { 1744 endstr++; 1745 set = 0; 1746 } else { 1747 set = 1; 1748 } 1749 if (strcmp(endstr, "post") == 0) { 1750 flag = ADVANCE_PRE; 1751 set = !set; 1752 } else if (strcmp(endstr, "pre") == 0) { 1753 flag = ADVANCE_PRE; 1754 } else if (strcmp(endstr, "prune") == 0) { 1755 flag = ADVANCE_PRUNE; 1756 } else if (strcmp(endstr, "data") == 0) { 1757 flag = ADVANCE_DATA; 1758 } else if (strcmp(endstr, "holes") == 0) { 1759 flag = ADVANCE_HOLES; 1760 } else { 1761 usage(); 1762 } 1763 if (set) 1764 zdb_advance |= flag; 1765 else 1766 zdb_advance &= ~flag; 1767 break; 1768 case 'B': 1769 endstr = optarg - 1; 1770 zdb_noread.zb_objset = strtoull(endstr + 1, &endstr, 0); 1771 zdb_noread.zb_object = strtoull(endstr + 1, &endstr, 0); 1772 zdb_noread.zb_level = strtol(endstr + 1, &endstr, 0); 1773 zdb_noread.zb_blkid = strtoull(endstr + 1, &endstr, 16); 1774 (void) printf("simulating bad block " 1775 "<%llu, %llu, %lld, %llx>\n", 1776 (u_longlong_t)zdb_noread.zb_objset, 1777 (u_longlong_t)zdb_noread.zb_object, 1778 (u_longlong_t)zdb_noread.zb_level, 1779 (u_longlong_t)zdb_noread.zb_blkid); 1780 break; 1781 case 'v': 1782 verbose++; 1783 break; 1784 case 'U': 1785 spa_config_dir = "/tmp"; 1786 break; 1787 default: 1788 usage(); 1789 break; 1790 } 1791 } 1792 1793 kernel_init(FREAD); 1794 1795 /* 1796 * Disable vdev caching. If we don't do this, live pool traversal 1797 * won't make progress because it will never see disk updates. 1798 */ 1799 for (vk = vdev_knob_next(NULL); vk != NULL; vk = vdev_knob_next(vk)) { 1800 if (strcmp(vk->vk_name, "cache_size") == 0) 1801 vk->vk_default = 0; 1802 } 1803 1804 for (c = 0; c < 256; c++) { 1805 if (dump_all && c != 'L' && c != 'l') 1806 dump_opt[c] = 1; 1807 if (dump_opt[c]) 1808 dump_opt[c] += verbose; 1809 } 1810 1811 argc -= optind; 1812 argv += optind; 1813 1814 if (argc < 1) { 1815 if (dump_opt['C']) { 1816 dump_config(NULL); 1817 return (0); 1818 } 1819 usage(); 1820 } 1821 1822 if (dump_opt['l']) { 1823 dump_label(argv[0]); 1824 return (0); 1825 } 1826 1827 if (dump_opt['C']) 1828 dump_config(argv[0]); 1829 1830 if (strchr(argv[0], '/') != NULL) { 1831 error = dmu_objset_open(argv[0], DMU_OST_ANY, 1832 DS_MODE_STANDARD | DS_MODE_READONLY, &os); 1833 } else { 1834 error = spa_open(argv[0], &spa, FTAG); 1835 } 1836 1837 if (error) 1838 fatal("can't open %s: error %d", argv[0], error); 1839 1840 argv++; 1841 if (--argc > 0) { 1842 zopt_objects = argc; 1843 zopt_object = calloc(zopt_objects, sizeof (uint64_t)); 1844 for (i = 0; i < zopt_objects; i++) { 1845 errno = 0; 1846 zopt_object[i] = strtoull(argv[i], NULL, 0); 1847 if (zopt_object[i] == 0 && errno != 0) 1848 fatal("bad object number %s: %s", 1849 argv[i], strerror(errno)); 1850 } 1851 } 1852 1853 if (os != NULL) { 1854 dump_dir(os); 1855 dmu_objset_close(os); 1856 } else { 1857 dump_zpool(spa); 1858 spa_close(spa, FTAG); 1859 } 1860 1861 kernel_fini(); 1862 1863 return (0); 1864 } 1865