1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 * Copyright (c) 2011, 2018 by Delphix. All rights reserved. 25 * Copyright (c) 2019 Joyent, Inc. 26 */ 27 28 /* Portions Copyright 2010 Robert Milkowski */ 29 30 #include <mdb/mdb_ctf.h> 31 #include <sys/zfs_context.h> 32 #include <sys/mdb_modapi.h> 33 #include <sys/dbuf.h> 34 #include <sys/dmu_objset.h> 35 #include <sys/dsl_dir.h> 36 #include <sys/dsl_pool.h> 37 #include <sys/metaslab_impl.h> 38 #include <sys/space_map.h> 39 #include <sys/list.h> 40 #include <sys/vdev_impl.h> 41 #include <sys/zap_leaf.h> 42 #include <sys/zap_impl.h> 43 #include <ctype.h> 44 #include <sys/zfs_acl.h> 45 #include <sys/sa_impl.h> 46 #include <sys/multilist.h> 47 48 #ifdef _KERNEL 49 #define ZFS_OBJ_NAME "zfs" 50 extern int64_t mdb_gethrtime(void); 51 #else 52 #define ZFS_OBJ_NAME "libzpool.so.1" 53 #endif 54 55 #define ZFS_STRUCT "struct " ZFS_OBJ_NAME "`" 56 57 #ifndef _KERNEL 58 int aok; 59 #endif 60 61 enum spa_flags { 62 SPA_FLAG_CONFIG = 1 << 0, 63 SPA_FLAG_VDEVS = 1 << 1, 64 SPA_FLAG_ERRORS = 1 << 2, 65 SPA_FLAG_METASLAB_GROUPS = 1 << 3, 66 SPA_FLAG_METASLABS = 1 << 4, 67 SPA_FLAG_HISTOGRAMS = 1 << 5 68 }; 69 70 /* 71 * If any of these flags are set, call spa_vdevs in spa_print 72 */ 73 #define SPA_FLAG_ALL_VDEV \ 74 (SPA_FLAG_VDEVS | SPA_FLAG_ERRORS | SPA_FLAG_METASLAB_GROUPS | \ 75 SPA_FLAG_METASLABS) 76 77 static int 78 getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp, 79 const char *member, int len, void *buf) 80 { 81 mdb_ctf_id_t id; 82 ulong_t off; 83 char name[64]; 84 85 if (idp == NULL) { 86 if (mdb_ctf_lookup_by_name(type, &id) == -1) { 87 mdb_warn("couldn't find type %s", type); 88 return (DCMD_ERR); 89 } 90 idp = &id; 91 } else { 92 type = name; 93 mdb_ctf_type_name(*idp, name, sizeof (name)); 94 } 95 96 if (mdb_ctf_offsetof(*idp, member, &off) == -1) { 97 mdb_warn("couldn't find member %s of type %s\n", member, type); 98 return (DCMD_ERR); 99 } 100 if (off % 8 != 0) { 101 mdb_warn("member %s of type %s is unsupported bitfield", 102 member, type); 103 return (DCMD_ERR); 104 } 105 off /= 8; 106 107 if (mdb_vread(buf, len, addr + off) == -1) { 108 mdb_warn("failed to read %s from %s at %p", 109 member, type, addr + off); 110 return (DCMD_ERR); 111 } 112 /* mdb_warn("read %s from %s at %p+%llx\n", member, type, addr, off); */ 113 114 return (0); 115 } 116 117 #define GETMEMB(addr, structname, member, dest) \ 118 getmember(addr, ZFS_STRUCT structname, NULL, #member, \ 119 sizeof (dest), &(dest)) 120 121 #define GETMEMBID(addr, ctfid, member, dest) \ 122 getmember(addr, NULL, ctfid, #member, sizeof (dest), &(dest)) 123 124 static boolean_t 125 strisprint(const char *cp) 126 { 127 for (; *cp; cp++) { 128 if (!isprint(*cp)) 129 return (B_FALSE); 130 } 131 return (B_TRUE); 132 } 133 134 /* 135 * <addr>::sm_entries <buffer length in bytes> 136 * 137 * Treat the buffer specified by the given address as a buffer that contains 138 * space map entries. Iterate over the specified number of entries and print 139 * them in both encoded and decoded form. 140 */ 141 /* ARGSUSED */ 142 static int 143 sm_entries(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 144 { 145 uint64_t bufsz = 0; 146 boolean_t preview = B_FALSE; 147 148 if (!(flags & DCMD_ADDRSPEC)) 149 return (DCMD_USAGE); 150 151 if (argc < 1) { 152 preview = B_TRUE; 153 bufsz = 2; 154 } else if (argc != 1) { 155 return (DCMD_USAGE); 156 } else { 157 switch (argv[0].a_type) { 158 case MDB_TYPE_STRING: 159 bufsz = mdb_strtoull(argv[0].a_un.a_str); 160 break; 161 case MDB_TYPE_IMMEDIATE: 162 bufsz = argv[0].a_un.a_val; 163 break; 164 default: 165 return (DCMD_USAGE); 166 } 167 } 168 169 char *actions[] = { "ALLOC", "FREE", "INVALID" }; 170 for (uintptr_t bufend = addr + bufsz; addr < bufend; 171 addr += sizeof (uint64_t)) { 172 uint64_t nwords; 173 uint64_t start_addr = addr; 174 175 uint64_t word = 0; 176 if (mdb_vread(&word, sizeof (word), addr) == -1) { 177 mdb_warn("failed to read space map entry %p", addr); 178 return (DCMD_ERR); 179 } 180 181 if (SM_PREFIX_DECODE(word) == SM_DEBUG_PREFIX) { 182 (void) mdb_printf("\t [%6llu] %s: txg %llu, " 183 "pass %llu\n", 184 (u_longlong_t)(addr), 185 actions[SM_DEBUG_ACTION_DECODE(word)], 186 (u_longlong_t)SM_DEBUG_TXG_DECODE(word), 187 (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word)); 188 continue; 189 } 190 191 char entry_type; 192 uint64_t raw_offset, raw_run, vdev_id = SM_NO_VDEVID; 193 194 if (SM_PREFIX_DECODE(word) != SM2_PREFIX) { 195 entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ? 196 'A' : 'F'; 197 raw_offset = SM_OFFSET_DECODE(word); 198 raw_run = SM_RUN_DECODE(word); 199 nwords = 1; 200 } else { 201 ASSERT3U(SM_PREFIX_DECODE(word), ==, SM2_PREFIX); 202 203 raw_run = SM2_RUN_DECODE(word); 204 vdev_id = SM2_VDEV_DECODE(word); 205 206 /* it is a two-word entry so we read another word */ 207 addr += sizeof (uint64_t); 208 if (addr >= bufend) { 209 mdb_warn("buffer ends in the middle of a two " 210 "word entry\n", addr); 211 return (DCMD_ERR); 212 } 213 214 if (mdb_vread(&word, sizeof (word), addr) == -1) { 215 mdb_warn("failed to read space map entry %p", 216 addr); 217 return (DCMD_ERR); 218 } 219 220 entry_type = (SM2_TYPE_DECODE(word) == SM_ALLOC) ? 221 'A' : 'F'; 222 raw_offset = SM2_OFFSET_DECODE(word); 223 nwords = 2; 224 } 225 226 (void) mdb_printf("\t [%6llx] %c range:" 227 " %010llx-%010llx size: %06llx vdev: %06llu words: %llu\n", 228 (u_longlong_t)start_addr, 229 entry_type, (u_longlong_t)raw_offset, 230 (u_longlong_t)(raw_offset + raw_run), 231 (u_longlong_t)raw_run, 232 (u_longlong_t)vdev_id, (u_longlong_t)nwords); 233 234 if (preview) 235 break; 236 } 237 return (DCMD_OK); 238 } 239 240 static int 241 mdb_dsl_dir_name(uintptr_t addr, char *buf) 242 { 243 static int gotid; 244 static mdb_ctf_id_t dd_id; 245 uintptr_t dd_parent; 246 char dd_myname[ZFS_MAX_DATASET_NAME_LEN]; 247 248 if (!gotid) { 249 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dir", 250 &dd_id) == -1) { 251 mdb_warn("couldn't find struct dsl_dir"); 252 return (DCMD_ERR); 253 } 254 gotid = TRUE; 255 } 256 if (GETMEMBID(addr, &dd_id, dd_parent, dd_parent) || 257 GETMEMBID(addr, &dd_id, dd_myname, dd_myname)) { 258 return (DCMD_ERR); 259 } 260 261 if (dd_parent) { 262 if (mdb_dsl_dir_name(dd_parent, buf)) 263 return (DCMD_ERR); 264 strcat(buf, "/"); 265 } 266 267 if (dd_myname[0]) 268 strcat(buf, dd_myname); 269 else 270 strcat(buf, "???"); 271 272 return (0); 273 } 274 275 static int 276 objset_name(uintptr_t addr, char *buf) 277 { 278 static int gotid; 279 static mdb_ctf_id_t os_id, ds_id; 280 uintptr_t os_dsl_dataset; 281 char ds_snapname[ZFS_MAX_DATASET_NAME_LEN]; 282 uintptr_t ds_dir; 283 284 buf[0] = '\0'; 285 286 if (!gotid) { 287 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "objset", 288 &os_id) == -1) { 289 mdb_warn("couldn't find struct objset"); 290 return (DCMD_ERR); 291 } 292 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dataset", 293 &ds_id) == -1) { 294 mdb_warn("couldn't find struct dsl_dataset"); 295 return (DCMD_ERR); 296 } 297 298 gotid = TRUE; 299 } 300 301 if (GETMEMBID(addr, &os_id, os_dsl_dataset, os_dsl_dataset)) 302 return (DCMD_ERR); 303 304 if (os_dsl_dataset == 0) { 305 strcat(buf, "mos"); 306 return (0); 307 } 308 309 if (GETMEMBID(os_dsl_dataset, &ds_id, ds_snapname, ds_snapname) || 310 GETMEMBID(os_dsl_dataset, &ds_id, ds_dir, ds_dir)) { 311 return (DCMD_ERR); 312 } 313 314 if (ds_dir && mdb_dsl_dir_name(ds_dir, buf)) 315 return (DCMD_ERR); 316 317 if (ds_snapname[0]) { 318 strcat(buf, "@"); 319 strcat(buf, ds_snapname); 320 } 321 return (0); 322 } 323 324 static int 325 enum_lookup(char *type, int val, const char *prefix, size_t size, char *out) 326 { 327 const char *cp; 328 size_t len = strlen(prefix); 329 mdb_ctf_id_t enum_type; 330 331 if (mdb_ctf_lookup_by_name(type, &enum_type) != 0) { 332 mdb_warn("Could not find enum for %s", type); 333 return (-1); 334 } 335 336 if ((cp = mdb_ctf_enum_name(enum_type, val)) != NULL) { 337 if (strncmp(cp, prefix, len) == 0) 338 cp += len; 339 (void) strncpy(out, cp, size); 340 } else { 341 mdb_snprintf(out, size, "? (%d)", val); 342 } 343 return (0); 344 } 345 346 /* ARGSUSED */ 347 static int 348 zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 349 { 350 /* 351 * This table can be approximately generated by running: 352 * egrep "^[a-z0-9_]+ [a-z0-9_]+( =.*)?;" *.c | cut -d ' ' -f 2 353 */ 354 static const char *params[] = { 355 "arc_lotsfree_percent", 356 "arc_pages_pp_reserve", 357 "arc_reduce_dnlc_percent", 358 "arc_swapfs_reserve", 359 "arc_zio_arena_free_shift", 360 "dbuf_cache_hiwater_pct", 361 "dbuf_cache_lowater_pct", 362 "dbuf_cache_max_bytes", 363 "dbuf_cache_max_shift", 364 "ddt_zap_indirect_blockshift", 365 "ddt_zap_leaf_blockshift", 366 "ditto_same_vdev_distance_shift", 367 "dmu_find_threads", 368 "dmu_rescan_dnode_threshold", 369 "dsl_scan_delay_completion", 370 "fzap_default_block_shift", 371 "l2arc_feed_again", 372 "l2arc_feed_min_ms", 373 "l2arc_feed_secs", 374 "l2arc_headroom", 375 "l2arc_headroom_boost", 376 "l2arc_noprefetch", 377 "l2arc_norw", 378 "l2arc_write_boost", 379 "l2arc_write_max", 380 "metaslab_aliquot", 381 "metaslab_bias_enabled", 382 "metaslab_debug_load", 383 "metaslab_debug_unload", 384 "metaslab_df_alloc_threshold", 385 "metaslab_df_free_pct", 386 "metaslab_fragmentation_factor_enabled", 387 "metaslab_force_ganging", 388 "metaslab_lba_weighting_enabled", 389 "metaslab_load_pct", 390 "metaslab_min_alloc_size", 391 "metaslab_ndf_clump_shift", 392 "metaslab_preload_enabled", 393 "metaslab_preload_limit", 394 "metaslab_trace_enabled", 395 "metaslab_trace_max_entries", 396 "metaslab_unload_delay", 397 "metaslabs_per_vdev", 398 "reference_history", 399 "reference_tracking_enable", 400 "send_holes_without_birth_time", 401 "spa_asize_inflation", 402 "spa_load_verify_data", 403 "spa_load_verify_maxinflight", 404 "spa_load_verify_metadata", 405 "spa_max_replication_override", 406 "spa_min_slop", 407 "spa_mode_global", 408 "spa_slop_shift", 409 "space_map_blksz", 410 "vdev_mirror_shift", 411 "zfetch_max_distance", 412 "zfs_abd_chunk_size", 413 "zfs_abd_scatter_enabled", 414 "zfs_arc_average_blocksize", 415 "zfs_arc_evict_batch_limit", 416 "zfs_arc_grow_retry", 417 "zfs_arc_max", 418 "zfs_arc_meta_limit", 419 "zfs_arc_meta_min", 420 "zfs_arc_min", 421 "zfs_arc_p_min_shift", 422 "zfs_arc_shrink_shift", 423 "zfs_async_block_max_blocks", 424 "zfs_ccw_retry_interval", 425 "zfs_commit_timeout_pct", 426 "zfs_compressed_arc_enabled", 427 "zfs_condense_indirect_commit_entry_delay_ticks", 428 "zfs_condense_indirect_vdevs_enable", 429 "zfs_condense_max_obsolete_bytes", 430 "zfs_condense_min_mapping_bytes", 431 "zfs_condense_pct", 432 "zfs_dbgmsg_maxsize", 433 "zfs_deadman_checktime_ms", 434 "zfs_deadman_enabled", 435 "zfs_deadman_synctime_ms", 436 "zfs_dedup_prefetch", 437 "zfs_default_bs", 438 "zfs_default_ibs", 439 "zfs_delay_max_ns", 440 "zfs_delay_min_dirty_percent", 441 "zfs_delay_resolution_ns", 442 "zfs_delay_scale", 443 "zfs_dirty_data_max", 444 "zfs_dirty_data_max_max", 445 "zfs_dirty_data_max_percent", 446 "zfs_dirty_data_sync", 447 "zfs_flags", 448 "zfs_free_bpobj_enabled", 449 "zfs_free_leak_on_eio", 450 "zfs_free_min_time_ms", 451 "zfs_fsync_sync_cnt", 452 "zfs_immediate_write_sz", 453 "zfs_indirect_condense_obsolete_pct", 454 "zfs_lua_check_instrlimit_interval", 455 "zfs_lua_max_instrlimit", 456 "zfs_lua_max_memlimit", 457 "zfs_max_recordsize", 458 "zfs_mdcomp_disable", 459 "zfs_metaslab_condense_block_threshold", 460 "zfs_metaslab_fragmentation_threshold", 461 "zfs_metaslab_segment_weight_enabled", 462 "zfs_metaslab_switch_threshold", 463 "zfs_mg_fragmentation_threshold", 464 "zfs_mg_noalloc_threshold", 465 "zfs_multilist_num_sublists", 466 "zfs_no_scrub_io", 467 "zfs_no_scrub_prefetch", 468 "zfs_nocacheflush", 469 "zfs_nopwrite_enabled", 470 "zfs_object_remap_one_indirect_delay_ticks", 471 "zfs_obsolete_min_time_ms", 472 "zfs_pd_bytes_max", 473 "zfs_per_txg_dirty_frees_percent", 474 "zfs_prefetch_disable", 475 "zfs_read_chunk_size", 476 "zfs_recover", 477 "zfs_recv_queue_length", 478 "zfs_redundant_metadata_most_ditto_level", 479 "zfs_remap_blkptr_enable", 480 "zfs_remove_max_copy_bytes", 481 "zfs_remove_max_segment", 482 "zfs_resilver_delay", 483 "zfs_resilver_min_time_ms", 484 "zfs_scan_idle", 485 "zfs_scan_min_time_ms", 486 "zfs_scrub_delay", 487 "zfs_scrub_limit", 488 "zfs_send_corrupt_data", 489 "zfs_send_queue_length", 490 "zfs_send_set_freerecords_bit", 491 "zfs_sync_pass_deferred_free", 492 "zfs_sync_pass_dont_compress", 493 "zfs_sync_pass_rewrite", 494 "zfs_sync_taskq_batch_pct", 495 "zfs_top_maxinflight", 496 "zfs_txg_timeout", 497 "zfs_vdev_aggregation_limit", 498 "zfs_vdev_async_read_max_active", 499 "zfs_vdev_async_read_min_active", 500 "zfs_vdev_async_write_active_max_dirty_percent", 501 "zfs_vdev_async_write_active_min_dirty_percent", 502 "zfs_vdev_async_write_max_active", 503 "zfs_vdev_async_write_min_active", 504 "zfs_vdev_cache_bshift", 505 "zfs_vdev_cache_max", 506 "zfs_vdev_cache_size", 507 "zfs_vdev_max_active", 508 "zfs_vdev_queue_depth_pct", 509 "zfs_vdev_read_gap_limit", 510 "zfs_vdev_removal_max_active", 511 "zfs_vdev_removal_min_active", 512 "zfs_vdev_scrub_max_active", 513 "zfs_vdev_scrub_min_active", 514 "zfs_vdev_sync_read_max_active", 515 "zfs_vdev_sync_read_min_active", 516 "zfs_vdev_sync_write_max_active", 517 "zfs_vdev_sync_write_min_active", 518 "zfs_vdev_write_gap_limit", 519 "zfs_write_implies_delete_child", 520 "zfs_zil_clean_taskq_maxalloc", 521 "zfs_zil_clean_taskq_minalloc", 522 "zfs_zil_clean_taskq_nthr_pct", 523 "zil_replay_disable", 524 "zil_slog_bulk", 525 "zio_buf_debug_limit", 526 "zio_dva_throttle_enabled", 527 "zio_injection_enabled", 528 "zvol_immediate_write_sz", 529 "zvol_maxphys", 530 "zvol_unmap_enabled", 531 "zvol_unmap_sync_enabled", 532 "zfs_max_dataset_nesting", 533 }; 534 535 for (int i = 0; i < sizeof (params) / sizeof (params[0]); i++) { 536 int sz; 537 uint64_t val64; 538 uint32_t *val32p = (uint32_t *)&val64; 539 540 sz = mdb_readvar(&val64, params[i]); 541 if (sz == 4) { 542 mdb_printf("%s = 0x%x\n", params[i], *val32p); 543 } else if (sz == 8) { 544 mdb_printf("%s = 0x%llx\n", params[i], val64); 545 } else { 546 mdb_warn("variable %s not found", params[i]); 547 } 548 } 549 550 return (DCMD_OK); 551 } 552 553 /* ARGSUSED */ 554 static int 555 dva(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 556 { 557 dva_t dva; 558 if (mdb_vread(&dva, sizeof (dva_t), addr) == -1) { 559 mdb_warn("failed to read dva_t"); 560 return (DCMD_ERR); 561 } 562 mdb_printf("<%llu:%llx:%llx>\n", 563 (u_longlong_t)DVA_GET_VDEV(&dva), 564 (u_longlong_t)DVA_GET_OFFSET(&dva), 565 (u_longlong_t)DVA_GET_ASIZE(&dva)); 566 567 return (DCMD_OK); 568 } 569 570 /* ARGSUSED */ 571 static int 572 blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 573 { 574 char type[80], checksum[80], compress[80]; 575 blkptr_t blk, *bp = &blk; 576 char buf[BP_SPRINTF_LEN]; 577 578 if (mdb_vread(&blk, sizeof (blkptr_t), addr) == -1) { 579 mdb_warn("failed to read blkptr_t"); 580 return (DCMD_ERR); 581 } 582 583 if (enum_lookup("enum dmu_object_type", BP_GET_TYPE(bp), "DMU_OT_", 584 sizeof (type), type) == -1 || 585 enum_lookup("enum zio_checksum", BP_GET_CHECKSUM(bp), 586 "ZIO_CHECKSUM_", sizeof (checksum), checksum) == -1 || 587 enum_lookup("enum zio_compress", BP_GET_COMPRESS(bp), 588 "ZIO_COMPRESS_", sizeof (compress), compress) == -1) { 589 mdb_warn("Could not find blkptr enumerated types"); 590 return (DCMD_ERR); 591 } 592 593 SNPRINTF_BLKPTR(mdb_snprintf, '\n', buf, sizeof (buf), bp, type, 594 checksum, compress); 595 596 mdb_printf("%s\n", buf); 597 598 return (DCMD_OK); 599 } 600 601 typedef struct mdb_dmu_buf_impl { 602 struct { 603 uint64_t db_object; 604 uintptr_t db_data; 605 } db; 606 uintptr_t db_objset; 607 uint64_t db_level; 608 uint64_t db_blkid; 609 struct { 610 uint64_t rc_count; 611 } db_holds; 612 } mdb_dmu_buf_impl_t; 613 614 /* ARGSUSED */ 615 static int 616 dbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 617 { 618 mdb_dmu_buf_impl_t db; 619 char objectname[32]; 620 char blkidname[32]; 621 char path[ZFS_MAX_DATASET_NAME_LEN]; 622 int ptr_width = (int)(sizeof (void *)) * 2; 623 624 if (DCMD_HDRSPEC(flags)) 625 mdb_printf("%*s %8s %3s %9s %5s %s\n", 626 ptr_width, "addr", "object", "lvl", "blkid", "holds", "os"); 627 628 if (mdb_ctf_vread(&db, ZFS_STRUCT "dmu_buf_impl", "mdb_dmu_buf_impl_t", 629 addr, 0) == -1) 630 return (DCMD_ERR); 631 632 if (db.db.db_object == DMU_META_DNODE_OBJECT) 633 (void) strcpy(objectname, "mdn"); 634 else 635 (void) mdb_snprintf(objectname, sizeof (objectname), "%llx", 636 (u_longlong_t)db.db.db_object); 637 638 if (db.db_blkid == DMU_BONUS_BLKID) 639 (void) strcpy(blkidname, "bonus"); 640 else 641 (void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx", 642 (u_longlong_t)db.db_blkid); 643 644 if (objset_name(db.db_objset, path)) { 645 return (DCMD_ERR); 646 } 647 648 mdb_printf("%*p %8s %3u %9s %5llu %s\n", ptr_width, addr, 649 objectname, (int)db.db_level, blkidname, 650 db.db_holds.rc_count, path); 651 652 return (DCMD_OK); 653 } 654 655 /* ARGSUSED */ 656 static int 657 dbuf_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 658 { 659 #define HISTOSZ 32 660 uintptr_t dbp; 661 dmu_buf_impl_t db; 662 dbuf_hash_table_t ht; 663 uint64_t bucket, ndbufs; 664 uint64_t histo[HISTOSZ]; 665 uint64_t histo2[HISTOSZ]; 666 int i, maxidx; 667 668 if (mdb_readvar(&ht, "dbuf_hash_table") == -1) { 669 mdb_warn("failed to read 'dbuf_hash_table'"); 670 return (DCMD_ERR); 671 } 672 673 for (i = 0; i < HISTOSZ; i++) { 674 histo[i] = 0; 675 histo2[i] = 0; 676 } 677 678 ndbufs = 0; 679 for (bucket = 0; bucket < ht.hash_table_mask+1; bucket++) { 680 int len; 681 682 if (mdb_vread(&dbp, sizeof (void *), 683 (uintptr_t)(ht.hash_table+bucket)) == -1) { 684 mdb_warn("failed to read hash bucket %u at %p", 685 bucket, ht.hash_table+bucket); 686 return (DCMD_ERR); 687 } 688 689 len = 0; 690 while (dbp != 0) { 691 if (mdb_vread(&db, sizeof (dmu_buf_impl_t), 692 dbp) == -1) { 693 mdb_warn("failed to read dbuf at %p", dbp); 694 return (DCMD_ERR); 695 } 696 dbp = (uintptr_t)db.db_hash_next; 697 for (i = MIN(len, HISTOSZ - 1); i >= 0; i--) 698 histo2[i]++; 699 len++; 700 ndbufs++; 701 } 702 703 if (len >= HISTOSZ) 704 len = HISTOSZ-1; 705 histo[len]++; 706 } 707 708 mdb_printf("hash table has %llu buckets, %llu dbufs " 709 "(avg %llu buckets/dbuf)\n", 710 ht.hash_table_mask+1, ndbufs, 711 (ht.hash_table_mask+1)/ndbufs); 712 713 mdb_printf("\n"); 714 maxidx = 0; 715 for (i = 0; i < HISTOSZ; i++) 716 if (histo[i] > 0) 717 maxidx = i; 718 mdb_printf("hash chain length number of buckets\n"); 719 for (i = 0; i <= maxidx; i++) 720 mdb_printf("%u %llu\n", i, histo[i]); 721 722 mdb_printf("\n"); 723 maxidx = 0; 724 for (i = 0; i < HISTOSZ; i++) 725 if (histo2[i] > 0) 726 maxidx = i; 727 mdb_printf("hash chain depth number of dbufs\n"); 728 for (i = 0; i <= maxidx; i++) 729 mdb_printf("%u or more %llu %llu%%\n", 730 i, histo2[i], histo2[i]*100/ndbufs); 731 732 733 return (DCMD_OK); 734 } 735 736 #define CHAIN_END 0xffff 737 /* 738 * ::zap_leaf [-v] 739 * 740 * Print a zap_leaf_phys_t, assumed to be 16k 741 */ 742 /* ARGSUSED */ 743 static int 744 zap_leaf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 745 { 746 char buf[16*1024]; 747 int verbose = B_FALSE; 748 int four = B_FALSE; 749 dmu_buf_t l_dbuf; 750 zap_leaf_t l; 751 zap_leaf_phys_t *zlp = (void *)buf; 752 int i; 753 754 if (mdb_getopts(argc, argv, 755 'v', MDB_OPT_SETBITS, TRUE, &verbose, 756 '4', MDB_OPT_SETBITS, TRUE, &four, 757 NULL) != argc) 758 return (DCMD_USAGE); 759 760 l_dbuf.db_data = zlp; 761 l.l_dbuf = &l_dbuf; 762 l.l_bs = 14; /* assume 16k blocks */ 763 if (four) 764 l.l_bs = 12; 765 766 if (!(flags & DCMD_ADDRSPEC)) { 767 return (DCMD_USAGE); 768 } 769 770 if (mdb_vread(buf, sizeof (buf), addr) == -1) { 771 mdb_warn("failed to read zap_leaf_phys_t at %p", addr); 772 return (DCMD_ERR); 773 } 774 775 if (zlp->l_hdr.lh_block_type != ZBT_LEAF || 776 zlp->l_hdr.lh_magic != ZAP_LEAF_MAGIC) { 777 mdb_warn("This does not appear to be a zap_leaf_phys_t"); 778 return (DCMD_ERR); 779 } 780 781 mdb_printf("zap_leaf_phys_t at %p:\n", addr); 782 mdb_printf(" lh_prefix_len = %u\n", zlp->l_hdr.lh_prefix_len); 783 mdb_printf(" lh_prefix = %llx\n", zlp->l_hdr.lh_prefix); 784 mdb_printf(" lh_nentries = %u\n", zlp->l_hdr.lh_nentries); 785 mdb_printf(" lh_nfree = %u\n", zlp->l_hdr.lh_nfree, 786 zlp->l_hdr.lh_nfree * 100 / (ZAP_LEAF_NUMCHUNKS(&l))); 787 mdb_printf(" lh_freelist = %u\n", zlp->l_hdr.lh_freelist); 788 mdb_printf(" lh_flags = %x (%s)\n", zlp->l_hdr.lh_flags, 789 zlp->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED ? 790 "ENTRIES_CDSORTED" : ""); 791 792 if (verbose) { 793 mdb_printf(" hash table:\n"); 794 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) { 795 if (zlp->l_hash[i] != CHAIN_END) 796 mdb_printf(" %u: %u\n", i, zlp->l_hash[i]); 797 } 798 } 799 800 mdb_printf(" chunks:\n"); 801 for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) { 802 /* LINTED: alignment */ 803 zap_leaf_chunk_t *zlc = &ZAP_LEAF_CHUNK(&l, i); 804 switch (zlc->l_entry.le_type) { 805 case ZAP_CHUNK_FREE: 806 if (verbose) { 807 mdb_printf(" %u: free; lf_next = %u\n", 808 i, zlc->l_free.lf_next); 809 } 810 break; 811 case ZAP_CHUNK_ENTRY: 812 mdb_printf(" %u: entry\n", i); 813 if (verbose) { 814 mdb_printf(" le_next = %u\n", 815 zlc->l_entry.le_next); 816 } 817 mdb_printf(" le_name_chunk = %u\n", 818 zlc->l_entry.le_name_chunk); 819 mdb_printf(" le_name_numints = %u\n", 820 zlc->l_entry.le_name_numints); 821 mdb_printf(" le_value_chunk = %u\n", 822 zlc->l_entry.le_value_chunk); 823 mdb_printf(" le_value_intlen = %u\n", 824 zlc->l_entry.le_value_intlen); 825 mdb_printf(" le_value_numints = %u\n", 826 zlc->l_entry.le_value_numints); 827 mdb_printf(" le_cd = %u\n", 828 zlc->l_entry.le_cd); 829 mdb_printf(" le_hash = %llx\n", 830 zlc->l_entry.le_hash); 831 break; 832 case ZAP_CHUNK_ARRAY: 833 mdb_printf(" %u: array", i); 834 if (strisprint((char *)zlc->l_array.la_array)) 835 mdb_printf(" \"%s\"", zlc->l_array.la_array); 836 mdb_printf("\n"); 837 if (verbose) { 838 int j; 839 mdb_printf(" "); 840 for (j = 0; j < ZAP_LEAF_ARRAY_BYTES; j++) { 841 mdb_printf("%02x ", 842 zlc->l_array.la_array[j]); 843 } 844 mdb_printf("\n"); 845 } 846 if (zlc->l_array.la_next != CHAIN_END) { 847 mdb_printf(" lf_next = %u\n", 848 zlc->l_array.la_next); 849 } 850 break; 851 default: 852 mdb_printf(" %u: undefined type %u\n", 853 zlc->l_entry.le_type); 854 } 855 } 856 857 return (DCMD_OK); 858 } 859 860 typedef struct dbufs_data { 861 mdb_ctf_id_t id; 862 uint64_t objset; 863 uint64_t object; 864 uint64_t level; 865 uint64_t blkid; 866 char *osname; 867 } dbufs_data_t; 868 869 #define DBUFS_UNSET (0xbaddcafedeadbeefULL) 870 871 /* ARGSUSED */ 872 static int 873 dbufs_cb(uintptr_t addr, const void *unknown, void *arg) 874 { 875 dbufs_data_t *data = arg; 876 uintptr_t objset; 877 dmu_buf_t db; 878 uint8_t level; 879 uint64_t blkid; 880 char osname[ZFS_MAX_DATASET_NAME_LEN]; 881 882 if (GETMEMBID(addr, &data->id, db_objset, objset) || 883 GETMEMBID(addr, &data->id, db, db) || 884 GETMEMBID(addr, &data->id, db_level, level) || 885 GETMEMBID(addr, &data->id, db_blkid, blkid)) { 886 return (WALK_ERR); 887 } 888 889 if ((data->objset == DBUFS_UNSET || data->objset == objset) && 890 (data->osname == NULL || (objset_name(objset, osname) == 0 && 891 strcmp(data->osname, osname) == 0)) && 892 (data->object == DBUFS_UNSET || data->object == db.db_object) && 893 (data->level == DBUFS_UNSET || data->level == level) && 894 (data->blkid == DBUFS_UNSET || data->blkid == blkid)) { 895 mdb_printf("%#lr\n", addr); 896 } 897 return (WALK_NEXT); 898 } 899 900 /* ARGSUSED */ 901 static int 902 dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 903 { 904 dbufs_data_t data; 905 char *object = NULL; 906 char *blkid = NULL; 907 908 data.objset = data.object = data.level = data.blkid = DBUFS_UNSET; 909 data.osname = NULL; 910 911 if (mdb_getopts(argc, argv, 912 'O', MDB_OPT_UINT64, &data.objset, 913 'n', MDB_OPT_STR, &data.osname, 914 'o', MDB_OPT_STR, &object, 915 'l', MDB_OPT_UINT64, &data.level, 916 'b', MDB_OPT_STR, &blkid) != argc) { 917 return (DCMD_USAGE); 918 } 919 920 if (object) { 921 if (strcmp(object, "mdn") == 0) { 922 data.object = DMU_META_DNODE_OBJECT; 923 } else { 924 data.object = mdb_strtoull(object); 925 } 926 } 927 928 if (blkid) { 929 if (strcmp(blkid, "bonus") == 0) { 930 data.blkid = DMU_BONUS_BLKID; 931 } else { 932 data.blkid = mdb_strtoull(blkid); 933 } 934 } 935 936 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dmu_buf_impl", &data.id) == -1) { 937 mdb_warn("couldn't find struct dmu_buf_impl_t"); 938 return (DCMD_ERR); 939 } 940 941 if (mdb_walk("dmu_buf_impl_t", dbufs_cb, &data) != 0) { 942 mdb_warn("can't walk dbufs"); 943 return (DCMD_ERR); 944 } 945 946 return (DCMD_OK); 947 } 948 949 typedef struct abuf_find_data { 950 dva_t dva; 951 mdb_ctf_id_t id; 952 } abuf_find_data_t; 953 954 /* ARGSUSED */ 955 static int 956 abuf_find_cb(uintptr_t addr, const void *unknown, void *arg) 957 { 958 abuf_find_data_t *data = arg; 959 dva_t dva; 960 961 if (GETMEMBID(addr, &data->id, b_dva, dva)) { 962 return (WALK_ERR); 963 } 964 965 if (dva.dva_word[0] == data->dva.dva_word[0] && 966 dva.dva_word[1] == data->dva.dva_word[1]) { 967 mdb_printf("%#lr\n", addr); 968 } 969 return (WALK_NEXT); 970 } 971 972 /* ARGSUSED */ 973 static int 974 abuf_find(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 975 { 976 abuf_find_data_t data; 977 GElf_Sym sym; 978 int i; 979 const char *syms[] = { 980 "ARC_mru", 981 "ARC_mru_ghost", 982 "ARC_mfu", 983 "ARC_mfu_ghost", 984 }; 985 986 if (argc != 2) 987 return (DCMD_USAGE); 988 989 for (i = 0; i < 2; i ++) { 990 switch (argv[i].a_type) { 991 case MDB_TYPE_STRING: 992 data.dva.dva_word[i] = mdb_strtoull(argv[i].a_un.a_str); 993 break; 994 case MDB_TYPE_IMMEDIATE: 995 data.dva.dva_word[i] = argv[i].a_un.a_val; 996 break; 997 default: 998 return (DCMD_USAGE); 999 } 1000 } 1001 1002 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "arc_buf_hdr", &data.id) == -1) { 1003 mdb_warn("couldn't find struct arc_buf_hdr"); 1004 return (DCMD_ERR); 1005 } 1006 1007 for (i = 0; i < sizeof (syms) / sizeof (syms[0]); i++) { 1008 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, syms[i], &sym)) { 1009 mdb_warn("can't find symbol %s", syms[i]); 1010 return (DCMD_ERR); 1011 } 1012 1013 if (mdb_pwalk("list", abuf_find_cb, &data, sym.st_value) != 0) { 1014 mdb_warn("can't walk %s", syms[i]); 1015 return (DCMD_ERR); 1016 } 1017 } 1018 1019 return (DCMD_OK); 1020 } 1021 1022 1023 typedef struct dbgmsg_arg { 1024 boolean_t da_verbose; 1025 boolean_t da_address; 1026 } dbgmsg_arg_t; 1027 1028 /* ARGSUSED */ 1029 static int 1030 dbgmsg_cb(uintptr_t addr, const void *unknown, void *arg) 1031 { 1032 static mdb_ctf_id_t id; 1033 static boolean_t gotid; 1034 static ulong_t off; 1035 1036 dbgmsg_arg_t *da = arg; 1037 time_t timestamp; 1038 char buf[1024]; 1039 1040 if (!gotid) { 1041 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "zfs_dbgmsg", &id) == 1042 -1) { 1043 mdb_warn("couldn't find struct zfs_dbgmsg"); 1044 return (WALK_ERR); 1045 } 1046 gotid = TRUE; 1047 if (mdb_ctf_offsetof(id, "zdm_msg", &off) == -1) { 1048 mdb_warn("couldn't find zdm_msg"); 1049 return (WALK_ERR); 1050 } 1051 off /= 8; 1052 } 1053 1054 1055 if (GETMEMBID(addr, &id, zdm_timestamp, timestamp)) { 1056 return (WALK_ERR); 1057 } 1058 1059 if (mdb_readstr(buf, sizeof (buf), addr + off) == -1) { 1060 mdb_warn("failed to read zdm_msg at %p\n", addr + off); 1061 return (DCMD_ERR); 1062 } 1063 1064 if (da->da_address) 1065 mdb_printf("%p ", addr); 1066 if (da->da_verbose) 1067 mdb_printf("%Y ", timestamp); 1068 1069 mdb_printf("%s\n", buf); 1070 1071 if (da->da_verbose) 1072 (void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL); 1073 1074 return (WALK_NEXT); 1075 } 1076 1077 /* ARGSUSED */ 1078 static int 1079 dbgmsg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1080 { 1081 GElf_Sym sym; 1082 dbgmsg_arg_t da = { 0 }; 1083 1084 if (mdb_getopts(argc, argv, 1085 'v', MDB_OPT_SETBITS, B_TRUE, &da.da_verbose, 1086 'a', MDB_OPT_SETBITS, B_TRUE, &da.da_address, 1087 NULL) != argc) 1088 return (DCMD_USAGE); 1089 1090 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "zfs_dbgmsgs", &sym)) { 1091 mdb_warn("can't find zfs_dbgmsgs"); 1092 return (DCMD_ERR); 1093 } 1094 1095 if (mdb_pwalk("list", dbgmsg_cb, &da, sym.st_value) != 0) { 1096 mdb_warn("can't walk zfs_dbgmsgs"); 1097 return (DCMD_ERR); 1098 } 1099 1100 return (DCMD_OK); 1101 } 1102 1103 /*ARGSUSED*/ 1104 static int 1105 arc_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1106 { 1107 kstat_named_t *stats; 1108 GElf_Sym sym; 1109 int nstats, i; 1110 uint_t opt_a = FALSE; 1111 uint_t opt_b = FALSE; 1112 uint_t shift = 0; 1113 const char *suffix; 1114 1115 static const char *bytestats[] = { 1116 "p", "c", "c_min", "c_max", "size", "duplicate_buffers_size", 1117 "arc_meta_used", "arc_meta_limit", "arc_meta_max", 1118 "arc_meta_min", "hdr_size", "data_size", "metadata_size", 1119 "other_size", "anon_size", "anon_evictable_data", 1120 "anon_evictable_metadata", "mru_size", "mru_evictable_data", 1121 "mru_evictable_metadata", "mru_ghost_size", 1122 "mru_ghost_evictable_data", "mru_ghost_evictable_metadata", 1123 "mfu_size", "mfu_evictable_data", "mfu_evictable_metadata", 1124 "mfu_ghost_size", "mfu_ghost_evictable_data", 1125 "mfu_ghost_evictable_metadata", "evict_l2_cached", 1126 "evict_l2_eligible", "evict_l2_ineligible", "l2_read_bytes", 1127 "l2_write_bytes", "l2_size", "l2_asize", "l2_hdr_size", 1128 "compressed_size", "uncompressed_size", "overhead_size", 1129 NULL 1130 }; 1131 1132 static const char *extras[] = { 1133 "arc_no_grow", "arc_tempreserve", 1134 NULL 1135 }; 1136 1137 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "arc_stats", &sym) == -1) { 1138 mdb_warn("failed to find 'arc_stats'"); 1139 return (DCMD_ERR); 1140 } 1141 1142 stats = mdb_zalloc(sym.st_size, UM_SLEEP | UM_GC); 1143 1144 if (mdb_vread(stats, sym.st_size, sym.st_value) == -1) { 1145 mdb_warn("couldn't read 'arc_stats' at %p", sym.st_value); 1146 return (DCMD_ERR); 1147 } 1148 1149 nstats = sym.st_size / sizeof (kstat_named_t); 1150 1151 /* NB: -a / opt_a are ignored for backwards compatability */ 1152 if (mdb_getopts(argc, argv, 1153 'a', MDB_OPT_SETBITS, TRUE, &opt_a, 1154 'b', MDB_OPT_SETBITS, TRUE, &opt_b, 1155 'k', MDB_OPT_SETBITS, 10, &shift, 1156 'm', MDB_OPT_SETBITS, 20, &shift, 1157 'g', MDB_OPT_SETBITS, 30, &shift, 1158 NULL) != argc) 1159 return (DCMD_USAGE); 1160 1161 if (!opt_b && !shift) 1162 shift = 20; 1163 1164 switch (shift) { 1165 case 0: 1166 suffix = "B"; 1167 break; 1168 case 10: 1169 suffix = "KB"; 1170 break; 1171 case 20: 1172 suffix = "MB"; 1173 break; 1174 case 30: 1175 suffix = "GB"; 1176 break; 1177 default: 1178 suffix = "XX"; 1179 } 1180 1181 for (i = 0; i < nstats; i++) { 1182 int j; 1183 boolean_t bytes = B_FALSE; 1184 1185 for (j = 0; bytestats[j]; j++) { 1186 if (strcmp(stats[i].name, bytestats[j]) == 0) { 1187 bytes = B_TRUE; 1188 break; 1189 } 1190 } 1191 1192 if (bytes) { 1193 mdb_printf("%-25s = %9llu %s\n", stats[i].name, 1194 stats[i].value.ui64 >> shift, suffix); 1195 } else { 1196 mdb_printf("%-25s = %9llu\n", stats[i].name, 1197 stats[i].value.ui64); 1198 } 1199 } 1200 1201 for (i = 0; extras[i]; i++) { 1202 uint64_t buf; 1203 1204 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, extras[i], &sym) == -1) { 1205 mdb_warn("failed to find '%s'", extras[i]); 1206 return (DCMD_ERR); 1207 } 1208 1209 if (sym.st_size != sizeof (uint64_t) && 1210 sym.st_size != sizeof (uint32_t)) { 1211 mdb_warn("expected scalar for variable '%s'\n", 1212 extras[i]); 1213 return (DCMD_ERR); 1214 } 1215 1216 if (mdb_vread(&buf, sym.st_size, sym.st_value) == -1) { 1217 mdb_warn("couldn't read '%s'", extras[i]); 1218 return (DCMD_ERR); 1219 } 1220 1221 mdb_printf("%-25s = ", extras[i]); 1222 1223 /* NB: all the 64-bit extras happen to be byte counts */ 1224 if (sym.st_size == sizeof (uint64_t)) 1225 mdb_printf("%9llu %s\n", buf >> shift, suffix); 1226 1227 if (sym.st_size == sizeof (uint32_t)) 1228 mdb_printf("%9d\n", *((uint32_t *)&buf)); 1229 } 1230 return (DCMD_OK); 1231 } 1232 1233 typedef struct mdb_spa_print { 1234 pool_state_t spa_state; 1235 char spa_name[ZFS_MAX_DATASET_NAME_LEN]; 1236 uintptr_t spa_normal_class; 1237 } mdb_spa_print_t; 1238 1239 1240 const char histo_stars[] = "****************************************"; 1241 const int histo_width = sizeof (histo_stars) - 1; 1242 1243 static void 1244 dump_histogram(const uint64_t *histo, int size, int offset) 1245 { 1246 int i; 1247 int minidx = size - 1; 1248 int maxidx = 0; 1249 uint64_t max = 0; 1250 1251 for (i = 0; i < size; i++) { 1252 if (histo[i] > max) 1253 max = histo[i]; 1254 if (histo[i] > 0 && i > maxidx) 1255 maxidx = i; 1256 if (histo[i] > 0 && i < minidx) 1257 minidx = i; 1258 } 1259 1260 if (max < histo_width) 1261 max = histo_width; 1262 1263 for (i = minidx; i <= maxidx; i++) { 1264 mdb_printf("%3u: %6llu %s\n", 1265 i + offset, (u_longlong_t)histo[i], 1266 &histo_stars[(max - histo[i]) * histo_width / max]); 1267 } 1268 } 1269 1270 typedef struct mdb_metaslab_class { 1271 uint64_t mc_histogram[RANGE_TREE_HISTOGRAM_SIZE]; 1272 } mdb_metaslab_class_t; 1273 1274 /* 1275 * spa_class_histogram(uintptr_t class_addr) 1276 * 1277 * Prints free space histogram for a device class 1278 * 1279 * Returns DCMD_OK, or DCMD_ERR. 1280 */ 1281 static int 1282 spa_class_histogram(uintptr_t class_addr) 1283 { 1284 mdb_metaslab_class_t mc; 1285 if (mdb_ctf_vread(&mc, "metaslab_class_t", 1286 "mdb_metaslab_class_t", class_addr, 0) == -1) 1287 return (DCMD_ERR); 1288 1289 mdb_inc_indent(4); 1290 dump_histogram(mc.mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 1291 mdb_dec_indent(4); 1292 return (DCMD_OK); 1293 } 1294 1295 /* 1296 * ::spa 1297 * 1298 * -c Print configuration information as well 1299 * -v Print vdev state 1300 * -e Print vdev error stats 1301 * -m Print vdev metaslab info 1302 * -M print vdev metaslab group info 1303 * -h Print histogram info (must be combined with -m or -M) 1304 * 1305 * Print a summarized spa_t. When given no arguments, prints out a table of all 1306 * active pools on the system. 1307 */ 1308 /* ARGSUSED */ 1309 static int 1310 spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1311 { 1312 const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED", 1313 "SPARE", "L2CACHE", "UNINIT", "UNAVAIL", "POTENTIAL" }; 1314 const char *state; 1315 int spa_flags = 0; 1316 1317 if (mdb_getopts(argc, argv, 1318 'c', MDB_OPT_SETBITS, SPA_FLAG_CONFIG, &spa_flags, 1319 'v', MDB_OPT_SETBITS, SPA_FLAG_VDEVS, &spa_flags, 1320 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags, 1321 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags, 1322 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags, 1323 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags, 1324 NULL) != argc) 1325 return (DCMD_USAGE); 1326 1327 if (!(flags & DCMD_ADDRSPEC)) { 1328 if (mdb_walk_dcmd("spa", "spa", argc, argv) == -1) { 1329 mdb_warn("can't walk spa"); 1330 return (DCMD_ERR); 1331 } 1332 1333 return (DCMD_OK); 1334 } 1335 1336 if (flags & DCMD_PIPE_OUT) { 1337 mdb_printf("%#lr\n", addr); 1338 return (DCMD_OK); 1339 } 1340 1341 if (DCMD_HDRSPEC(flags)) 1342 mdb_printf("%<u>%-?s %9s %-*s%</u>\n", "ADDR", "STATE", 1343 sizeof (uintptr_t) == 4 ? 60 : 52, "NAME"); 1344 1345 mdb_spa_print_t spa; 1346 if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_print_t", addr, 0) == -1) 1347 return (DCMD_ERR); 1348 1349 if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL) 1350 state = "UNKNOWN"; 1351 else 1352 state = statetab[spa.spa_state]; 1353 1354 mdb_printf("%0?p %9s %s\n", addr, state, spa.spa_name); 1355 if (spa_flags & SPA_FLAG_HISTOGRAMS) 1356 spa_class_histogram(spa.spa_normal_class); 1357 1358 if (spa_flags & SPA_FLAG_CONFIG) { 1359 mdb_printf("\n"); 1360 mdb_inc_indent(4); 1361 if (mdb_call_dcmd("spa_config", addr, flags, 0, 1362 NULL) != DCMD_OK) 1363 return (DCMD_ERR); 1364 mdb_dec_indent(4); 1365 } 1366 1367 if (spa_flags & SPA_FLAG_ALL_VDEV) { 1368 mdb_arg_t v; 1369 char opts[100] = "-"; 1370 int args = 1371 (spa_flags | SPA_FLAG_VDEVS) == SPA_FLAG_VDEVS ? 0 : 1; 1372 1373 if (spa_flags & SPA_FLAG_ERRORS) 1374 strcat(opts, "e"); 1375 if (spa_flags & SPA_FLAG_METASLABS) 1376 strcat(opts, "m"); 1377 if (spa_flags & SPA_FLAG_METASLAB_GROUPS) 1378 strcat(opts, "M"); 1379 if (spa_flags & SPA_FLAG_HISTOGRAMS) 1380 strcat(opts, "h"); 1381 1382 v.a_type = MDB_TYPE_STRING; 1383 v.a_un.a_str = opts; 1384 1385 mdb_printf("\n"); 1386 mdb_inc_indent(4); 1387 if (mdb_call_dcmd("spa_vdevs", addr, flags, args, 1388 &v) != DCMD_OK) 1389 return (DCMD_ERR); 1390 mdb_dec_indent(4); 1391 } 1392 1393 return (DCMD_OK); 1394 } 1395 1396 typedef struct mdb_spa_config_spa { 1397 uintptr_t spa_config; 1398 } mdb_spa_config_spa_t; 1399 1400 /* 1401 * ::spa_config 1402 * 1403 * Given a spa_t, print the configuration information stored in spa_config. 1404 * Since it's just an nvlist, format it as an indented list of name=value pairs. 1405 * We simply read the value of spa_config and pass off to ::nvlist. 1406 */ 1407 /* ARGSUSED */ 1408 static int 1409 spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1410 { 1411 mdb_spa_config_spa_t spa; 1412 1413 if (argc != 0 || !(flags & DCMD_ADDRSPEC)) 1414 return (DCMD_USAGE); 1415 1416 if (mdb_ctf_vread(&spa, ZFS_STRUCT "spa", "mdb_spa_config_spa_t", 1417 addr, 0) == -1) 1418 return (DCMD_ERR); 1419 1420 if (spa.spa_config == 0) { 1421 mdb_printf("(none)\n"); 1422 return (DCMD_OK); 1423 } 1424 1425 return (mdb_call_dcmd("nvlist", spa.spa_config, flags, 1426 0, NULL)); 1427 } 1428 1429 1430 1431 typedef struct mdb_range_tree { 1432 uint64_t rt_space; 1433 } mdb_range_tree_t; 1434 1435 typedef struct mdb_metaslab_group { 1436 uint64_t mg_fragmentation; 1437 uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE]; 1438 uintptr_t mg_vd; 1439 } mdb_metaslab_group_t; 1440 1441 typedef struct mdb_metaslab { 1442 uint64_t ms_id; 1443 uint64_t ms_start; 1444 uint64_t ms_size; 1445 int64_t ms_deferspace; 1446 uint64_t ms_fragmentation; 1447 uint64_t ms_weight; 1448 uintptr_t ms_allocating[TXG_SIZE]; 1449 uintptr_t ms_checkpointing; 1450 uintptr_t ms_freeing; 1451 uintptr_t ms_freed; 1452 uintptr_t ms_allocatable; 1453 uintptr_t ms_sm; 1454 } mdb_metaslab_t; 1455 1456 typedef struct mdb_space_map_phys_t { 1457 int64_t smp_alloc; 1458 uint64_t smp_histogram[SPACE_MAP_HISTOGRAM_SIZE]; 1459 } mdb_space_map_phys_t; 1460 1461 typedef struct mdb_space_map { 1462 uint64_t sm_size; 1463 uint8_t sm_shift; 1464 uintptr_t sm_phys; 1465 } mdb_space_map_t; 1466 1467 typedef struct mdb_vdev { 1468 uintptr_t vdev_path; 1469 uintptr_t vdev_ms; 1470 uintptr_t vdev_ops; 1471 uint64_t vdev_ms_count; 1472 uint64_t vdev_id; 1473 vdev_stat_t vdev_stat; 1474 } mdb_vdev_t; 1475 1476 typedef struct mdb_vdev_ops { 1477 char vdev_op_type[16]; 1478 } mdb_vdev_ops_t; 1479 1480 static int 1481 metaslab_stats(uintptr_t addr, int spa_flags) 1482 { 1483 mdb_vdev_t vdev; 1484 uintptr_t *vdev_ms; 1485 1486 if (mdb_ctf_vread(&vdev, "vdev_t", "mdb_vdev_t", 1487 (uintptr_t)addr, 0) == -1) { 1488 mdb_warn("failed to read vdev at %p\n", addr); 1489 return (DCMD_ERR); 1490 } 1491 1492 mdb_inc_indent(4); 1493 mdb_printf("%<u>%-?s %6s %20s %10s %9s%</u>\n", "ADDR", "ID", 1494 "OFFSET", "FREE", "FRAGMENTATION"); 1495 1496 vdev_ms = mdb_alloc(vdev.vdev_ms_count * sizeof (void *), 1497 UM_SLEEP | UM_GC); 1498 if (mdb_vread(vdev_ms, vdev.vdev_ms_count * sizeof (void *), 1499 (uintptr_t)vdev.vdev_ms) == -1) { 1500 mdb_warn("failed to read vdev_ms at %p\n", vdev.vdev_ms); 1501 return (DCMD_ERR); 1502 } 1503 1504 for (int m = 0; m < vdev.vdev_ms_count; m++) { 1505 mdb_metaslab_t ms; 1506 mdb_space_map_t sm = { 0 }; 1507 mdb_space_map_phys_t smp; 1508 char free[MDB_NICENUM_BUFLEN]; 1509 1510 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t", 1511 (uintptr_t)vdev_ms[m], 0) == -1) 1512 return (DCMD_ERR); 1513 1514 if (ms.ms_sm != 0 && 1515 mdb_ctf_vread(&sm, "space_map_t", "mdb_space_map_t", 1516 ms.ms_sm, 0) == -1) 1517 return (DCMD_ERR); 1518 1519 if (sm.sm_phys != 0) { 1520 (void) mdb_ctf_vread(&smp, "space_map_phys_t", 1521 "mdb_space_map_phys_t", sm.sm_phys, 0); 1522 mdb_nicenum(ms.ms_size - smp.smp_alloc, free); 1523 } else { 1524 (void) mdb_snprintf(free, MDB_NICENUM_BUFLEN, "-"); 1525 } 1526 1527 mdb_printf("%0?p %6llu %20llx %10s ", vdev_ms[m], ms.ms_id, 1528 ms.ms_start, free); 1529 if (ms.ms_fragmentation == ZFS_FRAG_INVALID) 1530 mdb_printf("%9s\n", "-"); 1531 else 1532 mdb_printf("%9llu%%\n", ms.ms_fragmentation); 1533 1534 if ((spa_flags & SPA_FLAG_HISTOGRAMS) && ms.ms_sm != 0) { 1535 if (sm.sm_phys == 0) 1536 continue; 1537 1538 dump_histogram(smp.smp_histogram, 1539 SPACE_MAP_HISTOGRAM_SIZE, sm.sm_shift); 1540 } 1541 } 1542 mdb_dec_indent(4); 1543 return (DCMD_OK); 1544 } 1545 1546 static int 1547 metaslab_group_stats(uintptr_t addr, int spa_flags) 1548 { 1549 mdb_metaslab_group_t mg; 1550 if (mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t", 1551 (uintptr_t)addr, 0) == -1) { 1552 mdb_warn("failed to read vdev_mg at %p\n", addr); 1553 return (DCMD_ERR); 1554 } 1555 1556 mdb_inc_indent(4); 1557 mdb_printf("%<u>%-?s %15s%</u>\n", "ADDR", "FRAGMENTATION"); 1558 if (mg.mg_fragmentation == ZFS_FRAG_INVALID) 1559 mdb_printf("%0?p %15s\n", addr, "-"); 1560 else 1561 mdb_printf("%0?p %15llu%%\n", addr, mg.mg_fragmentation); 1562 1563 if (spa_flags & SPA_FLAG_HISTOGRAMS) 1564 dump_histogram(mg.mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 1565 mdb_dec_indent(4); 1566 return (DCMD_OK); 1567 } 1568 1569 /* 1570 * ::vdev 1571 * 1572 * Print out a summarized vdev_t, in the following form: 1573 * 1574 * ADDR STATE AUX DESC 1575 * fffffffbcde23df0 HEALTHY - /dev/dsk/c0t0d0 1576 * 1577 * If '-r' is specified, recursively visit all children. 1578 * 1579 * With '-e', the statistics associated with the vdev are printed as well. 1580 */ 1581 static int 1582 do_print_vdev(uintptr_t addr, int flags, int depth, boolean_t recursive, 1583 int spa_flags) 1584 { 1585 vdev_t vdev; 1586 char desc[MAXNAMELEN]; 1587 int c, children; 1588 uintptr_t *child; 1589 const char *state, *aux; 1590 1591 if (mdb_vread(&vdev, sizeof (vdev), (uintptr_t)addr) == -1) { 1592 mdb_warn("failed to read vdev_t at %p\n", (uintptr_t)addr); 1593 return (DCMD_ERR); 1594 } 1595 1596 if (flags & DCMD_PIPE_OUT) { 1597 mdb_printf("%#lr\n", addr); 1598 } else { 1599 if (vdev.vdev_path != NULL) { 1600 if (mdb_readstr(desc, sizeof (desc), 1601 (uintptr_t)vdev.vdev_path) == -1) { 1602 mdb_warn("failed to read vdev_path at %p\n", 1603 vdev.vdev_path); 1604 return (DCMD_ERR); 1605 } 1606 } else if (vdev.vdev_ops != NULL) { 1607 vdev_ops_t ops; 1608 if (mdb_vread(&ops, sizeof (ops), 1609 (uintptr_t)vdev.vdev_ops) == -1) { 1610 mdb_warn("failed to read vdev_ops at %p\n", 1611 vdev.vdev_ops); 1612 return (DCMD_ERR); 1613 } 1614 (void) strcpy(desc, ops.vdev_op_type); 1615 } else { 1616 (void) strcpy(desc, "<unknown>"); 1617 } 1618 1619 if (depth == 0 && DCMD_HDRSPEC(flags)) 1620 mdb_printf("%<u>%-?s %-9s %-12s %-*s%</u>\n", 1621 "ADDR", "STATE", "AUX", 1622 sizeof (uintptr_t) == 4 ? 43 : 35, 1623 "DESCRIPTION"); 1624 1625 mdb_printf("%0?p ", addr); 1626 1627 switch (vdev.vdev_state) { 1628 case VDEV_STATE_CLOSED: 1629 state = "CLOSED"; 1630 break; 1631 case VDEV_STATE_OFFLINE: 1632 state = "OFFLINE"; 1633 break; 1634 case VDEV_STATE_CANT_OPEN: 1635 state = "CANT_OPEN"; 1636 break; 1637 case VDEV_STATE_DEGRADED: 1638 state = "DEGRADED"; 1639 break; 1640 case VDEV_STATE_HEALTHY: 1641 state = "HEALTHY"; 1642 break; 1643 case VDEV_STATE_REMOVED: 1644 state = "REMOVED"; 1645 break; 1646 case VDEV_STATE_FAULTED: 1647 state = "FAULTED"; 1648 break; 1649 default: 1650 state = "UNKNOWN"; 1651 break; 1652 } 1653 1654 switch (vdev.vdev_stat.vs_aux) { 1655 case VDEV_AUX_NONE: 1656 aux = "-"; 1657 break; 1658 case VDEV_AUX_OPEN_FAILED: 1659 aux = "OPEN_FAILED"; 1660 break; 1661 case VDEV_AUX_CORRUPT_DATA: 1662 aux = "CORRUPT_DATA"; 1663 break; 1664 case VDEV_AUX_NO_REPLICAS: 1665 aux = "NO_REPLICAS"; 1666 break; 1667 case VDEV_AUX_BAD_GUID_SUM: 1668 aux = "BAD_GUID_SUM"; 1669 break; 1670 case VDEV_AUX_TOO_SMALL: 1671 aux = "TOO_SMALL"; 1672 break; 1673 case VDEV_AUX_BAD_LABEL: 1674 aux = "BAD_LABEL"; 1675 break; 1676 case VDEV_AUX_VERSION_NEWER: 1677 aux = "VERS_NEWER"; 1678 break; 1679 case VDEV_AUX_VERSION_OLDER: 1680 aux = "VERS_OLDER"; 1681 break; 1682 case VDEV_AUX_UNSUP_FEAT: 1683 aux = "UNSUP_FEAT"; 1684 break; 1685 case VDEV_AUX_SPARED: 1686 aux = "SPARED"; 1687 break; 1688 case VDEV_AUX_ERR_EXCEEDED: 1689 aux = "ERR_EXCEEDED"; 1690 break; 1691 case VDEV_AUX_IO_FAILURE: 1692 aux = "IO_FAILURE"; 1693 break; 1694 case VDEV_AUX_BAD_LOG: 1695 aux = "BAD_LOG"; 1696 break; 1697 case VDEV_AUX_EXTERNAL: 1698 aux = "EXTERNAL"; 1699 break; 1700 case VDEV_AUX_SPLIT_POOL: 1701 aux = "SPLIT_POOL"; 1702 break; 1703 case VDEV_AUX_CHILDREN_OFFLINE: 1704 aux = "CHILDREN_OFFLINE"; 1705 break; 1706 default: 1707 aux = "UNKNOWN"; 1708 break; 1709 } 1710 1711 mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc); 1712 1713 if (spa_flags & SPA_FLAG_ERRORS) { 1714 vdev_stat_t *vs = &vdev.vdev_stat; 1715 int i; 1716 1717 mdb_inc_indent(4); 1718 mdb_printf("\n"); 1719 mdb_printf("%<u> %12s %12s %12s %12s " 1720 "%12s%</u>\n", "READ", "WRITE", "FREE", "CLAIM", 1721 "IOCTL"); 1722 mdb_printf("OPS "); 1723 for (i = 1; i < ZIO_TYPES; i++) 1724 mdb_printf("%11#llx%s", vs->vs_ops[i], 1725 i == ZIO_TYPES - 1 ? "" : " "); 1726 mdb_printf("\n"); 1727 mdb_printf("BYTES "); 1728 for (i = 1; i < ZIO_TYPES; i++) 1729 mdb_printf("%11#llx%s", vs->vs_bytes[i], 1730 i == ZIO_TYPES - 1 ? "" : " "); 1731 1732 1733 mdb_printf("\n"); 1734 mdb_printf("EREAD %10#llx\n", vs->vs_read_errors); 1735 mdb_printf("EWRITE %10#llx\n", vs->vs_write_errors); 1736 mdb_printf("ECKSUM %10#llx\n", 1737 vs->vs_checksum_errors); 1738 mdb_dec_indent(4); 1739 mdb_printf("\n"); 1740 } 1741 1742 if (spa_flags & SPA_FLAG_METASLAB_GROUPS && 1743 vdev.vdev_mg != NULL) { 1744 metaslab_group_stats((uintptr_t)vdev.vdev_mg, 1745 spa_flags); 1746 } 1747 if (spa_flags & SPA_FLAG_METASLABS && vdev.vdev_ms != NULL) { 1748 metaslab_stats((uintptr_t)addr, spa_flags); 1749 } 1750 } 1751 1752 children = vdev.vdev_children; 1753 1754 if (children == 0 || !recursive) 1755 return (DCMD_OK); 1756 1757 child = mdb_alloc(children * sizeof (void *), UM_SLEEP | UM_GC); 1758 if (mdb_vread(child, children * sizeof (void *), 1759 (uintptr_t)vdev.vdev_child) == -1) { 1760 mdb_warn("failed to read vdev children at %p", vdev.vdev_child); 1761 return (DCMD_ERR); 1762 } 1763 1764 for (c = 0; c < children; c++) { 1765 if (do_print_vdev(child[c], flags, depth + 2, recursive, 1766 spa_flags)) { 1767 return (DCMD_ERR); 1768 } 1769 } 1770 1771 return (DCMD_OK); 1772 } 1773 1774 static int 1775 vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1776 { 1777 uint64_t depth = 0; 1778 boolean_t recursive = B_FALSE; 1779 int spa_flags = 0; 1780 1781 if (mdb_getopts(argc, argv, 1782 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags, 1783 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags, 1784 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags, 1785 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags, 1786 'r', MDB_OPT_SETBITS, TRUE, &recursive, 1787 'd', MDB_OPT_UINT64, &depth, NULL) != argc) 1788 return (DCMD_USAGE); 1789 1790 if (!(flags & DCMD_ADDRSPEC)) { 1791 mdb_warn("no vdev_t address given\n"); 1792 return (DCMD_ERR); 1793 } 1794 1795 return (do_print_vdev(addr, flags, (int)depth, recursive, spa_flags)); 1796 } 1797 1798 typedef struct mdb_metaslab_alloc_trace { 1799 uintptr_t mat_mg; 1800 uintptr_t mat_msp; 1801 uint64_t mat_size; 1802 uint64_t mat_weight; 1803 uint64_t mat_offset; 1804 uint32_t mat_dva_id; 1805 int mat_allocator; 1806 } mdb_metaslab_alloc_trace_t; 1807 1808 static void 1809 metaslab_print_weight(uint64_t weight) 1810 { 1811 char buf[100]; 1812 1813 if (WEIGHT_IS_SPACEBASED(weight)) { 1814 mdb_nicenum( 1815 weight & ~(METASLAB_ACTIVE_MASK | METASLAB_WEIGHT_TYPE), 1816 buf); 1817 } else { 1818 char size[MDB_NICENUM_BUFLEN]; 1819 mdb_nicenum(1ULL << WEIGHT_GET_INDEX(weight), size); 1820 (void) mdb_snprintf(buf, sizeof (buf), "%llu x %s", 1821 WEIGHT_GET_COUNT(weight), size); 1822 } 1823 mdb_printf("%11s ", buf); 1824 } 1825 1826 /* ARGSUSED */ 1827 static int 1828 metaslab_weight(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1829 { 1830 uint64_t weight = 0; 1831 char active; 1832 1833 if (argc == 0 && (flags & DCMD_ADDRSPEC)) { 1834 if (mdb_vread(&weight, sizeof (uint64_t), addr) == -1) { 1835 mdb_warn("failed to read weight at %p\n", addr); 1836 return (DCMD_ERR); 1837 } 1838 } else if (argc == 1 && !(flags & DCMD_ADDRSPEC)) { 1839 weight = (argv[0].a_type == MDB_TYPE_IMMEDIATE) ? 1840 argv[0].a_un.a_val : mdb_strtoull(argv[0].a_un.a_str); 1841 } else { 1842 return (DCMD_USAGE); 1843 } 1844 1845 if (DCMD_HDRSPEC(flags)) { 1846 mdb_printf("%<u>%-6s %9s %9s%</u>\n", 1847 "ACTIVE", "ALGORITHM", "WEIGHT"); 1848 } 1849 1850 if (weight & METASLAB_WEIGHT_PRIMARY) 1851 active = 'P'; 1852 else if (weight & METASLAB_WEIGHT_SECONDARY) 1853 active = 'S'; 1854 else 1855 active = '-'; 1856 mdb_printf("%6c %8s ", active, 1857 WEIGHT_IS_SPACEBASED(weight) ? "SPACE" : "SEGMENT"); 1858 metaslab_print_weight(weight); 1859 mdb_printf("\n"); 1860 1861 return (DCMD_OK); 1862 } 1863 1864 /* ARGSUSED */ 1865 static int 1866 metaslab_trace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1867 { 1868 mdb_metaslab_alloc_trace_t mat; 1869 mdb_metaslab_group_t mg = { 0 }; 1870 char result_type[100]; 1871 1872 if (mdb_ctf_vread(&mat, "metaslab_alloc_trace_t", 1873 "mdb_metaslab_alloc_trace_t", addr, 0) == -1) { 1874 return (DCMD_ERR); 1875 } 1876 1877 if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) { 1878 mdb_printf("%<u>%6s %6s %8s %11s %11s %18s %18s%</u>\n", 1879 "MSID", "DVA", "ASIZE", "ALLOCATOR", "WEIGHT", "RESULT", 1880 "VDEV"); 1881 } 1882 1883 if (mat.mat_msp != 0) { 1884 mdb_metaslab_t ms; 1885 1886 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t", 1887 mat.mat_msp, 0) == -1) { 1888 return (DCMD_ERR); 1889 } 1890 mdb_printf("%6llu ", ms.ms_id); 1891 } else { 1892 mdb_printf("%6s ", "-"); 1893 } 1894 1895 mdb_printf("%6d %8llx %11llx ", mat.mat_dva_id, mat.mat_size, 1896 mat.mat_allocator); 1897 1898 metaslab_print_weight(mat.mat_weight); 1899 1900 if ((int64_t)mat.mat_offset < 0) { 1901 if (enum_lookup("enum trace_alloc_type", mat.mat_offset, 1902 "TRACE_", sizeof (result_type), result_type) == -1) { 1903 mdb_warn("Could not find enum for trace_alloc_type"); 1904 return (DCMD_ERR); 1905 } 1906 mdb_printf("%18s ", result_type); 1907 } else { 1908 mdb_printf("%<b>%18llx%</b> ", mat.mat_offset); 1909 } 1910 1911 if (mat.mat_mg != 0 && 1912 mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t", 1913 mat.mat_mg, 0) == -1) { 1914 return (DCMD_ERR); 1915 } 1916 1917 if (mg.mg_vd != 0) { 1918 mdb_vdev_t vdev; 1919 char desc[MAXNAMELEN]; 1920 1921 if (mdb_ctf_vread(&vdev, "vdev_t", "mdb_vdev_t", 1922 mg.mg_vd, 0) == -1) { 1923 return (DCMD_ERR); 1924 } 1925 1926 if (vdev.vdev_path != 0) { 1927 char path[MAXNAMELEN]; 1928 1929 if (mdb_readstr(path, sizeof (path), 1930 vdev.vdev_path) == -1) { 1931 mdb_warn("failed to read vdev_path at %p\n", 1932 vdev.vdev_path); 1933 return (DCMD_ERR); 1934 } 1935 char *slash; 1936 if ((slash = strrchr(path, '/')) != NULL) { 1937 strcpy(desc, slash + 1); 1938 } else { 1939 strcpy(desc, path); 1940 } 1941 } else if (vdev.vdev_ops != 0) { 1942 mdb_vdev_ops_t ops; 1943 if (mdb_ctf_vread(&ops, "vdev_ops_t", "mdb_vdev_ops_t", 1944 vdev.vdev_ops, 0) == -1) { 1945 mdb_warn("failed to read vdev_ops at %p\n", 1946 vdev.vdev_ops); 1947 return (DCMD_ERR); 1948 } 1949 (void) mdb_snprintf(desc, sizeof (desc), 1950 "%s-%llu", ops.vdev_op_type, vdev.vdev_id); 1951 } else { 1952 (void) strcpy(desc, "<unknown>"); 1953 } 1954 mdb_printf("%18s\n", desc); 1955 } 1956 1957 return (DCMD_OK); 1958 } 1959 1960 typedef struct metaslab_walk_data { 1961 uint64_t mw_numvdevs; 1962 uintptr_t *mw_vdevs; 1963 int mw_curvdev; 1964 uint64_t mw_nummss; 1965 uintptr_t *mw_mss; 1966 int mw_curms; 1967 } metaslab_walk_data_t; 1968 1969 static int 1970 metaslab_walk_step(mdb_walk_state_t *wsp) 1971 { 1972 metaslab_walk_data_t *mw = wsp->walk_data; 1973 metaslab_t ms; 1974 uintptr_t msp; 1975 1976 if (mw->mw_curvdev >= mw->mw_numvdevs) 1977 return (WALK_DONE); 1978 1979 if (mw->mw_mss == NULL) { 1980 uintptr_t mssp; 1981 uintptr_t vdevp; 1982 1983 ASSERT(mw->mw_curms == 0); 1984 ASSERT(mw->mw_nummss == 0); 1985 1986 vdevp = mw->mw_vdevs[mw->mw_curvdev]; 1987 if (GETMEMB(vdevp, "vdev", vdev_ms, mssp) || 1988 GETMEMB(vdevp, "vdev", vdev_ms_count, mw->mw_nummss)) { 1989 return (WALK_ERR); 1990 } 1991 1992 mw->mw_mss = mdb_alloc(mw->mw_nummss * sizeof (void*), 1993 UM_SLEEP | UM_GC); 1994 if (mdb_vread(mw->mw_mss, mw->mw_nummss * sizeof (void*), 1995 mssp) == -1) { 1996 mdb_warn("failed to read vdev_ms at %p", mssp); 1997 return (WALK_ERR); 1998 } 1999 } 2000 2001 if (mw->mw_curms >= mw->mw_nummss) { 2002 mw->mw_mss = NULL; 2003 mw->mw_curms = 0; 2004 mw->mw_nummss = 0; 2005 mw->mw_curvdev++; 2006 return (WALK_NEXT); 2007 } 2008 2009 msp = mw->mw_mss[mw->mw_curms]; 2010 if (mdb_vread(&ms, sizeof (metaslab_t), msp) == -1) { 2011 mdb_warn("failed to read metaslab_t at %p", msp); 2012 return (WALK_ERR); 2013 } 2014 2015 mw->mw_curms++; 2016 2017 return (wsp->walk_callback(msp, &ms, wsp->walk_cbdata)); 2018 } 2019 2020 static int 2021 metaslab_walk_init(mdb_walk_state_t *wsp) 2022 { 2023 metaslab_walk_data_t *mw; 2024 uintptr_t root_vdevp; 2025 uintptr_t childp; 2026 2027 if (wsp->walk_addr == 0) { 2028 mdb_warn("must supply address of spa_t\n"); 2029 return (WALK_ERR); 2030 } 2031 2032 mw = mdb_zalloc(sizeof (metaslab_walk_data_t), UM_SLEEP | UM_GC); 2033 2034 if (GETMEMB(wsp->walk_addr, "spa", spa_root_vdev, root_vdevp) || 2035 GETMEMB(root_vdevp, "vdev", vdev_children, mw->mw_numvdevs) || 2036 GETMEMB(root_vdevp, "vdev", vdev_child, childp)) { 2037 return (DCMD_ERR); 2038 } 2039 2040 mw->mw_vdevs = mdb_alloc(mw->mw_numvdevs * sizeof (void *), 2041 UM_SLEEP | UM_GC); 2042 if (mdb_vread(mw->mw_vdevs, mw->mw_numvdevs * sizeof (void *), 2043 childp) == -1) { 2044 mdb_warn("failed to read root vdev children at %p", childp); 2045 return (DCMD_ERR); 2046 } 2047 2048 wsp->walk_data = mw; 2049 2050 return (WALK_NEXT); 2051 } 2052 2053 typedef struct mdb_spa { 2054 uintptr_t spa_dsl_pool; 2055 uintptr_t spa_root_vdev; 2056 } mdb_spa_t; 2057 2058 typedef struct mdb_dsl_pool { 2059 uintptr_t dp_root_dir; 2060 } mdb_dsl_pool_t; 2061 2062 typedef struct mdb_dsl_dir { 2063 uintptr_t dd_dbuf; 2064 int64_t dd_space_towrite[TXG_SIZE]; 2065 } mdb_dsl_dir_t; 2066 2067 typedef struct mdb_dsl_dir_phys { 2068 uint64_t dd_used_bytes; 2069 uint64_t dd_compressed_bytes; 2070 uint64_t dd_uncompressed_bytes; 2071 } mdb_dsl_dir_phys_t; 2072 2073 typedef struct space_data { 2074 uint64_t ms_allocating[TXG_SIZE]; 2075 uint64_t ms_checkpointing; 2076 uint64_t ms_freeing; 2077 uint64_t ms_freed; 2078 uint64_t ms_allocatable; 2079 int64_t ms_deferspace; 2080 uint64_t nowavail; 2081 } space_data_t; 2082 2083 /* ARGSUSED */ 2084 static int 2085 space_cb(uintptr_t addr, const void *unknown, void *arg) 2086 { 2087 space_data_t *sd = arg; 2088 mdb_metaslab_t ms; 2089 mdb_range_tree_t rt; 2090 mdb_space_map_t sm = { 0 }; 2091 mdb_space_map_phys_t smp = { 0 }; 2092 int i; 2093 2094 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t", 2095 addr, 0) == -1) 2096 return (WALK_ERR); 2097 2098 for (i = 0; i < TXG_SIZE; i++) { 2099 if (mdb_ctf_vread(&rt, "range_tree_t", 2100 "mdb_range_tree_t", ms.ms_allocating[i], 0) == -1) 2101 return (WALK_ERR); 2102 2103 sd->ms_allocating[i] += rt.rt_space; 2104 2105 } 2106 2107 if (mdb_ctf_vread(&rt, "range_tree_t", 2108 "mdb_range_tree_t", ms.ms_checkpointing, 0) == -1) 2109 return (WALK_ERR); 2110 sd->ms_checkpointing += rt.rt_space; 2111 2112 if (mdb_ctf_vread(&rt, "range_tree_t", 2113 "mdb_range_tree_t", ms.ms_freeing, 0) == -1) 2114 return (WALK_ERR); 2115 sd->ms_freeing += rt.rt_space; 2116 2117 if (mdb_ctf_vread(&rt, "range_tree_t", 2118 "mdb_range_tree_t", ms.ms_freed, 0) == -1) 2119 return (WALK_ERR); 2120 sd->ms_freed += rt.rt_space; 2121 2122 if (mdb_ctf_vread(&rt, "range_tree_t", 2123 "mdb_range_tree_t", ms.ms_allocatable, 0) == -1) 2124 return (WALK_ERR); 2125 sd->ms_allocatable += rt.rt_space; 2126 2127 if (ms.ms_sm != 0 && 2128 mdb_ctf_vread(&sm, "space_map_t", 2129 "mdb_space_map_t", ms.ms_sm, 0) == -1) 2130 return (WALK_ERR); 2131 2132 if (sm.sm_phys != 0) { 2133 (void) mdb_ctf_vread(&smp, "space_map_phys_t", 2134 "mdb_space_map_phys_t", sm.sm_phys, 0); 2135 } 2136 2137 sd->ms_deferspace += ms.ms_deferspace; 2138 sd->nowavail += sm.sm_size - smp.smp_alloc; 2139 2140 return (WALK_NEXT); 2141 } 2142 2143 /* 2144 * ::spa_space [-b] 2145 * 2146 * Given a spa_t, print out it's on-disk space usage and in-core 2147 * estimates of future usage. If -b is given, print space in bytes. 2148 * Otherwise print in megabytes. 2149 */ 2150 /* ARGSUSED */ 2151 static int 2152 spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2153 { 2154 mdb_spa_t spa; 2155 mdb_dsl_pool_t dp; 2156 mdb_dsl_dir_t dd; 2157 mdb_dmu_buf_impl_t db; 2158 mdb_dsl_dir_phys_t dsp; 2159 space_data_t sd; 2160 int shift = 20; 2161 char *suffix = "M"; 2162 int bytes = B_FALSE; 2163 2164 if (mdb_getopts(argc, argv, 'b', MDB_OPT_SETBITS, TRUE, &bytes, NULL) != 2165 argc) 2166 return (DCMD_USAGE); 2167 if (!(flags & DCMD_ADDRSPEC)) 2168 return (DCMD_USAGE); 2169 2170 if (bytes) { 2171 shift = 0; 2172 suffix = ""; 2173 } 2174 2175 if (mdb_ctf_vread(&spa, ZFS_STRUCT "spa", "mdb_spa_t", 2176 addr, 0) == -1 || 2177 mdb_ctf_vread(&dp, ZFS_STRUCT "dsl_pool", "mdb_dsl_pool_t", 2178 spa.spa_dsl_pool, 0) == -1 || 2179 mdb_ctf_vread(&dd, ZFS_STRUCT "dsl_dir", "mdb_dsl_dir_t", 2180 dp.dp_root_dir, 0) == -1 || 2181 mdb_ctf_vread(&db, ZFS_STRUCT "dmu_buf_impl", "mdb_dmu_buf_impl_t", 2182 dd.dd_dbuf, 0) == -1 || 2183 mdb_ctf_vread(&dsp, ZFS_STRUCT "dsl_dir_phys", 2184 "mdb_dsl_dir_phys_t", db.db.db_data, 0) == -1) { 2185 return (DCMD_ERR); 2186 } 2187 2188 mdb_printf("dd_space_towrite = %llu%s %llu%s %llu%s %llu%s\n", 2189 dd.dd_space_towrite[0] >> shift, suffix, 2190 dd.dd_space_towrite[1] >> shift, suffix, 2191 dd.dd_space_towrite[2] >> shift, suffix, 2192 dd.dd_space_towrite[3] >> shift, suffix); 2193 2194 mdb_printf("dd_phys.dd_used_bytes = %llu%s\n", 2195 dsp.dd_used_bytes >> shift, suffix); 2196 mdb_printf("dd_phys.dd_compressed_bytes = %llu%s\n", 2197 dsp.dd_compressed_bytes >> shift, suffix); 2198 mdb_printf("dd_phys.dd_uncompressed_bytes = %llu%s\n", 2199 dsp.dd_uncompressed_bytes >> shift, suffix); 2200 2201 bzero(&sd, sizeof (sd)); 2202 if (mdb_pwalk("metaslab", space_cb, &sd, addr) != 0) { 2203 mdb_warn("can't walk metaslabs"); 2204 return (DCMD_ERR); 2205 } 2206 2207 mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n", 2208 sd.ms_allocating[0] >> shift, suffix, 2209 sd.ms_allocating[1] >> shift, suffix, 2210 sd.ms_allocating[2] >> shift, suffix, 2211 sd.ms_allocating[3] >> shift, suffix); 2212 mdb_printf("ms_checkpointing = %llu%s\n", 2213 sd.ms_checkpointing >> shift, suffix); 2214 mdb_printf("ms_freeing = %llu%s\n", 2215 sd.ms_freeing >> shift, suffix); 2216 mdb_printf("ms_freed = %llu%s\n", 2217 sd.ms_freed >> shift, suffix); 2218 mdb_printf("ms_allocatable = %llu%s\n", 2219 sd.ms_allocatable >> shift, suffix); 2220 mdb_printf("ms_deferspace = %llu%s\n", 2221 sd.ms_deferspace >> shift, suffix); 2222 mdb_printf("current syncing avail = %llu%s\n", 2223 sd.nowavail >> shift, suffix); 2224 2225 return (DCMD_OK); 2226 } 2227 2228 typedef struct mdb_spa_aux_vdev { 2229 int sav_count; 2230 uintptr_t sav_vdevs; 2231 } mdb_spa_aux_vdev_t; 2232 2233 typedef struct mdb_spa_vdevs { 2234 uintptr_t spa_root_vdev; 2235 mdb_spa_aux_vdev_t spa_l2cache; 2236 mdb_spa_aux_vdev_t spa_spares; 2237 } mdb_spa_vdevs_t; 2238 2239 static int 2240 spa_print_aux(mdb_spa_aux_vdev_t *sav, uint_t flags, mdb_arg_t *v, 2241 const char *name) 2242 { 2243 uintptr_t *aux; 2244 size_t len; 2245 int ret, i; 2246 2247 /* 2248 * Iterate over aux vdevs and print those out as well. This is a 2249 * little annoying because we don't have a root vdev to pass to ::vdev. 2250 * Instead, we print a single line and then call it for each child 2251 * vdev. 2252 */ 2253 if (sav->sav_count != 0) { 2254 v[1].a_type = MDB_TYPE_STRING; 2255 v[1].a_un.a_str = "-d"; 2256 v[2].a_type = MDB_TYPE_IMMEDIATE; 2257 v[2].a_un.a_val = 2; 2258 2259 len = sav->sav_count * sizeof (uintptr_t); 2260 aux = mdb_alloc(len, UM_SLEEP); 2261 if (mdb_vread(aux, len, sav->sav_vdevs) == -1) { 2262 mdb_free(aux, len); 2263 mdb_warn("failed to read l2cache vdevs at %p", 2264 sav->sav_vdevs); 2265 return (DCMD_ERR); 2266 } 2267 2268 mdb_printf("%-?s %-9s %-12s %s\n", "-", "-", "-", name); 2269 2270 for (i = 0; i < sav->sav_count; i++) { 2271 ret = mdb_call_dcmd("vdev", aux[i], flags, 3, v); 2272 if (ret != DCMD_OK) { 2273 mdb_free(aux, len); 2274 return (ret); 2275 } 2276 } 2277 2278 mdb_free(aux, len); 2279 } 2280 2281 return (0); 2282 } 2283 2284 /* 2285 * ::spa_vdevs 2286 * 2287 * -e Include error stats 2288 * -m Include metaslab information 2289 * -M Include metaslab group information 2290 * -h Include histogram information (requires -m or -M) 2291 * 2292 * Print out a summarized list of vdevs for the given spa_t. 2293 * This is accomplished by invoking "::vdev -re" on the root vdev, as well as 2294 * iterating over the cache devices. 2295 */ 2296 /* ARGSUSED */ 2297 static int 2298 spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2299 { 2300 mdb_arg_t v[3]; 2301 int ret; 2302 char opts[100] = "-r"; 2303 int spa_flags = 0; 2304 2305 if (mdb_getopts(argc, argv, 2306 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags, 2307 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags, 2308 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags, 2309 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags, 2310 NULL) != argc) 2311 return (DCMD_USAGE); 2312 2313 if (!(flags & DCMD_ADDRSPEC)) 2314 return (DCMD_USAGE); 2315 2316 mdb_spa_vdevs_t spa; 2317 if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_vdevs_t", addr, 0) == -1) 2318 return (DCMD_ERR); 2319 2320 /* 2321 * Unitialized spa_t structures can have a NULL root vdev. 2322 */ 2323 if (spa.spa_root_vdev == 0) { 2324 mdb_printf("no associated vdevs\n"); 2325 return (DCMD_OK); 2326 } 2327 2328 if (spa_flags & SPA_FLAG_ERRORS) 2329 strcat(opts, "e"); 2330 if (spa_flags & SPA_FLAG_METASLABS) 2331 strcat(opts, "m"); 2332 if (spa_flags & SPA_FLAG_METASLAB_GROUPS) 2333 strcat(opts, "M"); 2334 if (spa_flags & SPA_FLAG_HISTOGRAMS) 2335 strcat(opts, "h"); 2336 2337 v[0].a_type = MDB_TYPE_STRING; 2338 v[0].a_un.a_str = opts; 2339 2340 ret = mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev, 2341 flags, 1, v); 2342 if (ret != DCMD_OK) 2343 return (ret); 2344 2345 if (spa_print_aux(&spa.spa_l2cache, flags, v, "cache") != 0 || 2346 spa_print_aux(&spa.spa_spares, flags, v, "spares") != 0) 2347 return (DCMD_ERR); 2348 2349 return (DCMD_OK); 2350 } 2351 2352 /* 2353 * ::zio 2354 * 2355 * Print a summary of zio_t and all its children. This is intended to display a 2356 * zio tree, and hence we only pick the most important pieces of information for 2357 * the main summary. More detailed information can always be found by doing a 2358 * '::print zio' on the underlying zio_t. The columns we display are: 2359 * 2360 * ADDRESS TYPE STAGE WAITER TIME_ELAPSED 2361 * 2362 * The 'address' column is indented by one space for each depth level as we 2363 * descend down the tree. 2364 */ 2365 2366 #define ZIO_MAXINDENT 7 2367 #define ZIO_MAXWIDTH (sizeof (uintptr_t) * 2 + ZIO_MAXINDENT) 2368 #define ZIO_WALK_SELF 0 2369 #define ZIO_WALK_CHILD 1 2370 #define ZIO_WALK_PARENT 2 2371 2372 typedef struct zio_print_args { 2373 int zpa_current_depth; 2374 int zpa_min_depth; 2375 int zpa_max_depth; 2376 int zpa_type; 2377 uint_t zpa_flags; 2378 } zio_print_args_t; 2379 2380 typedef struct mdb_zio { 2381 enum zio_type io_type; 2382 enum zio_stage io_stage; 2383 uintptr_t io_waiter; 2384 uintptr_t io_spa; 2385 struct { 2386 struct { 2387 uintptr_t list_next; 2388 } list_head; 2389 } io_parent_list; 2390 int io_error; 2391 } mdb_zio_t; 2392 2393 typedef struct mdb_zio_timestamp { 2394 hrtime_t io_timestamp; 2395 } mdb_zio_timestamp_t; 2396 2397 static int zio_child_cb(uintptr_t addr, const void *unknown, void *arg); 2398 2399 static int 2400 zio_print_cb(uintptr_t addr, zio_print_args_t *zpa) 2401 { 2402 mdb_ctf_id_t type_enum, stage_enum; 2403 int indent = zpa->zpa_current_depth; 2404 const char *type, *stage; 2405 uintptr_t laddr; 2406 mdb_zio_t zio; 2407 mdb_zio_timestamp_t zio_timestamp = { 0 }; 2408 2409 if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", addr, 0) == -1) 2410 return (WALK_ERR); 2411 (void) mdb_ctf_vread(&zio_timestamp, ZFS_STRUCT "zio", 2412 "mdb_zio_timestamp_t", addr, MDB_CTF_VREAD_QUIET); 2413 2414 if (indent > ZIO_MAXINDENT) 2415 indent = ZIO_MAXINDENT; 2416 2417 if (mdb_ctf_lookup_by_name("enum zio_type", &type_enum) == -1 || 2418 mdb_ctf_lookup_by_name("enum zio_stage", &stage_enum) == -1) { 2419 mdb_warn("failed to lookup zio enums"); 2420 return (WALK_ERR); 2421 } 2422 2423 if ((type = mdb_ctf_enum_name(type_enum, zio.io_type)) != NULL) 2424 type += sizeof ("ZIO_TYPE_") - 1; 2425 else 2426 type = "?"; 2427 2428 if (zio.io_error == 0) { 2429 stage = mdb_ctf_enum_name(stage_enum, zio.io_stage); 2430 if (stage != NULL) 2431 stage += sizeof ("ZIO_STAGE_") - 1; 2432 else 2433 stage = "?"; 2434 } else { 2435 stage = "FAILED"; 2436 } 2437 2438 if (zpa->zpa_current_depth >= zpa->zpa_min_depth) { 2439 if (zpa->zpa_flags & DCMD_PIPE_OUT) { 2440 mdb_printf("%?p\n", addr); 2441 } else { 2442 mdb_printf("%*s%-*p %-5s %-16s ", indent, "", 2443 ZIO_MAXWIDTH - indent, addr, type, stage); 2444 if (zio.io_waiter != 0) 2445 mdb_printf("%-16lx ", zio.io_waiter); 2446 else 2447 mdb_printf("%-16s ", "-"); 2448 #ifdef _KERNEL 2449 if (zio_timestamp.io_timestamp != 0) { 2450 mdb_printf("%llums", (mdb_gethrtime() - 2451 zio_timestamp.io_timestamp) / 2452 1000000); 2453 } else { 2454 mdb_printf("%-12s ", "-"); 2455 } 2456 #else 2457 mdb_printf("%-12s ", "-"); 2458 #endif 2459 mdb_printf("\n"); 2460 } 2461 } 2462 2463 if (zpa->zpa_current_depth >= zpa->zpa_max_depth) 2464 return (WALK_NEXT); 2465 2466 if (zpa->zpa_type == ZIO_WALK_PARENT) 2467 laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", 2468 "io_parent_list"); 2469 else 2470 laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", 2471 "io_child_list"); 2472 2473 zpa->zpa_current_depth++; 2474 if (mdb_pwalk("list", zio_child_cb, zpa, laddr) != 0) { 2475 mdb_warn("failed to walk zio_t children at %p\n", laddr); 2476 return (WALK_ERR); 2477 } 2478 zpa->zpa_current_depth--; 2479 2480 return (WALK_NEXT); 2481 } 2482 2483 /* ARGSUSED */ 2484 static int 2485 zio_child_cb(uintptr_t addr, const void *unknown, void *arg) 2486 { 2487 zio_link_t zl; 2488 uintptr_t ziop; 2489 zio_print_args_t *zpa = arg; 2490 2491 if (mdb_vread(&zl, sizeof (zl), addr) == -1) { 2492 mdb_warn("failed to read zio_link_t at %p", addr); 2493 return (WALK_ERR); 2494 } 2495 2496 if (zpa->zpa_type == ZIO_WALK_PARENT) 2497 ziop = (uintptr_t)zl.zl_parent; 2498 else 2499 ziop = (uintptr_t)zl.zl_child; 2500 2501 return (zio_print_cb(ziop, zpa)); 2502 } 2503 2504 /* ARGSUSED */ 2505 static int 2506 zio_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2507 { 2508 zio_print_args_t zpa = { 0 }; 2509 2510 if (!(flags & DCMD_ADDRSPEC)) 2511 return (DCMD_USAGE); 2512 2513 if (mdb_getopts(argc, argv, 2514 'r', MDB_OPT_SETBITS, INT_MAX, &zpa.zpa_max_depth, 2515 'c', MDB_OPT_SETBITS, ZIO_WALK_CHILD, &zpa.zpa_type, 2516 'p', MDB_OPT_SETBITS, ZIO_WALK_PARENT, &zpa.zpa_type, 2517 NULL) != argc) 2518 return (DCMD_USAGE); 2519 2520 zpa.zpa_flags = flags; 2521 if (zpa.zpa_max_depth != 0) { 2522 if (zpa.zpa_type == ZIO_WALK_SELF) 2523 zpa.zpa_type = ZIO_WALK_CHILD; 2524 } else if (zpa.zpa_type != ZIO_WALK_SELF) { 2525 zpa.zpa_min_depth = 1; 2526 zpa.zpa_max_depth = 1; 2527 } 2528 2529 if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) { 2530 mdb_printf("%<u>%-*s %-5s %-16s %-16s %-12s%</u>\n", 2531 ZIO_MAXWIDTH, "ADDRESS", "TYPE", "STAGE", "WAITER", 2532 "TIME_ELAPSED"); 2533 } 2534 2535 if (zio_print_cb(addr, &zpa) != WALK_NEXT) 2536 return (DCMD_ERR); 2537 2538 return (DCMD_OK); 2539 } 2540 2541 /* 2542 * [addr]::zio_state 2543 * 2544 * Print a summary of all zio_t structures on the system, or for a particular 2545 * pool. This is equivalent to '::walk zio_root | ::zio'. 2546 */ 2547 /*ARGSUSED*/ 2548 static int 2549 zio_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2550 { 2551 /* 2552 * MDB will remember the last address of the pipeline, so if we don't 2553 * zero this we'll end up trying to walk zio structures for a 2554 * non-existent spa_t. 2555 */ 2556 if (!(flags & DCMD_ADDRSPEC)) 2557 addr = 0; 2558 2559 return (mdb_pwalk_dcmd("zio_root", "zio", argc, argv, addr)); 2560 } 2561 2562 typedef struct mdb_multilist { 2563 uint64_t ml_num_sublists; 2564 uintptr_t ml_sublists; 2565 } mdb_multilist_t; 2566 2567 typedef struct multilist_walk_data { 2568 uint64_t mwd_idx; 2569 mdb_multilist_t mwd_ml; 2570 } multilist_walk_data_t; 2571 2572 /* ARGSUSED */ 2573 static int 2574 multilist_print_cb(uintptr_t addr, const void *unknown, void *arg) 2575 { 2576 mdb_printf("%#lr\n", addr); 2577 return (WALK_NEXT); 2578 } 2579 2580 static int 2581 multilist_walk_step(mdb_walk_state_t *wsp) 2582 { 2583 multilist_walk_data_t *mwd = wsp->walk_data; 2584 2585 if (mwd->mwd_idx >= mwd->mwd_ml.ml_num_sublists) 2586 return (WALK_DONE); 2587 2588 wsp->walk_addr = mwd->mwd_ml.ml_sublists + 2589 mdb_ctf_sizeof_by_name("multilist_sublist_t") * mwd->mwd_idx + 2590 mdb_ctf_offsetof_by_name("multilist_sublist_t", "mls_list"); 2591 2592 mdb_pwalk("list", multilist_print_cb, (void*)NULL, wsp->walk_addr); 2593 mwd->mwd_idx++; 2594 2595 return (WALK_NEXT); 2596 } 2597 2598 static int 2599 multilist_walk_init(mdb_walk_state_t *wsp) 2600 { 2601 multilist_walk_data_t *mwd; 2602 2603 if (wsp->walk_addr == 0) { 2604 mdb_warn("must supply address of multilist_t\n"); 2605 return (WALK_ERR); 2606 } 2607 2608 mwd = mdb_zalloc(sizeof (multilist_walk_data_t), UM_SLEEP | UM_GC); 2609 if (mdb_ctf_vread(&mwd->mwd_ml, "multilist_t", "mdb_multilist_t", 2610 wsp->walk_addr, 0) == -1) { 2611 return (WALK_ERR); 2612 } 2613 2614 if (mwd->mwd_ml.ml_num_sublists == 0 || 2615 mwd->mwd_ml.ml_sublists == 0) { 2616 mdb_warn("invalid or uninitialized multilist at %#lx\n", 2617 wsp->walk_addr); 2618 return (WALK_ERR); 2619 } 2620 2621 wsp->walk_data = mwd; 2622 return (WALK_NEXT); 2623 } 2624 2625 typedef struct mdb_txg_list { 2626 size_t tl_offset; 2627 uintptr_t tl_head[TXG_SIZE]; 2628 } mdb_txg_list_t; 2629 2630 typedef struct txg_list_walk_data { 2631 uintptr_t lw_head[TXG_SIZE]; 2632 int lw_txgoff; 2633 int lw_maxoff; 2634 size_t lw_offset; 2635 void *lw_obj; 2636 } txg_list_walk_data_t; 2637 2638 static int 2639 txg_list_walk_init_common(mdb_walk_state_t *wsp, int txg, int maxoff) 2640 { 2641 txg_list_walk_data_t *lwd; 2642 mdb_txg_list_t list; 2643 int i; 2644 2645 lwd = mdb_alloc(sizeof (txg_list_walk_data_t), UM_SLEEP | UM_GC); 2646 if (mdb_ctf_vread(&list, "txg_list_t", "mdb_txg_list_t", wsp->walk_addr, 2647 0) == -1) { 2648 mdb_warn("failed to read txg_list_t at %#lx", wsp->walk_addr); 2649 return (WALK_ERR); 2650 } 2651 2652 for (i = 0; i < TXG_SIZE; i++) 2653 lwd->lw_head[i] = list.tl_head[i]; 2654 lwd->lw_offset = list.tl_offset; 2655 lwd->lw_obj = mdb_alloc(lwd->lw_offset + sizeof (txg_node_t), 2656 UM_SLEEP | UM_GC); 2657 lwd->lw_txgoff = txg; 2658 lwd->lw_maxoff = maxoff; 2659 2660 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff]; 2661 wsp->walk_data = lwd; 2662 2663 return (WALK_NEXT); 2664 } 2665 2666 static int 2667 txg_list_walk_init(mdb_walk_state_t *wsp) 2668 { 2669 return (txg_list_walk_init_common(wsp, 0, TXG_SIZE-1)); 2670 } 2671 2672 static int 2673 txg_list0_walk_init(mdb_walk_state_t *wsp) 2674 { 2675 return (txg_list_walk_init_common(wsp, 0, 0)); 2676 } 2677 2678 static int 2679 txg_list1_walk_init(mdb_walk_state_t *wsp) 2680 { 2681 return (txg_list_walk_init_common(wsp, 1, 1)); 2682 } 2683 2684 static int 2685 txg_list2_walk_init(mdb_walk_state_t *wsp) 2686 { 2687 return (txg_list_walk_init_common(wsp, 2, 2)); 2688 } 2689 2690 static int 2691 txg_list3_walk_init(mdb_walk_state_t *wsp) 2692 { 2693 return (txg_list_walk_init_common(wsp, 3, 3)); 2694 } 2695 2696 static int 2697 txg_list_walk_step(mdb_walk_state_t *wsp) 2698 { 2699 txg_list_walk_data_t *lwd = wsp->walk_data; 2700 uintptr_t addr; 2701 txg_node_t *node; 2702 int status; 2703 2704 while (wsp->walk_addr == 0 && lwd->lw_txgoff < lwd->lw_maxoff) { 2705 lwd->lw_txgoff++; 2706 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff]; 2707 } 2708 2709 if (wsp->walk_addr == 0) 2710 return (WALK_DONE); 2711 2712 addr = wsp->walk_addr - lwd->lw_offset; 2713 2714 if (mdb_vread(lwd->lw_obj, 2715 lwd->lw_offset + sizeof (txg_node_t), addr) == -1) { 2716 mdb_warn("failed to read list element at %#lx", addr); 2717 return (WALK_ERR); 2718 } 2719 2720 status = wsp->walk_callback(addr, lwd->lw_obj, wsp->walk_cbdata); 2721 node = (txg_node_t *)((uintptr_t)lwd->lw_obj + lwd->lw_offset); 2722 wsp->walk_addr = (uintptr_t)node->tn_next[lwd->lw_txgoff]; 2723 2724 return (status); 2725 } 2726 2727 /* 2728 * ::walk spa 2729 * 2730 * Walk all named spa_t structures in the namespace. This is nothing more than 2731 * a layered avl walk. 2732 */ 2733 static int 2734 spa_walk_init(mdb_walk_state_t *wsp) 2735 { 2736 GElf_Sym sym; 2737 2738 if (wsp->walk_addr != 0) { 2739 mdb_warn("spa walk only supports global walks\n"); 2740 return (WALK_ERR); 2741 } 2742 2743 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "spa_namespace_avl", &sym) == -1) { 2744 mdb_warn("failed to find symbol 'spa_namespace_avl'"); 2745 return (WALK_ERR); 2746 } 2747 2748 wsp->walk_addr = (uintptr_t)sym.st_value; 2749 2750 if (mdb_layered_walk("avl", wsp) == -1) { 2751 mdb_warn("failed to walk 'avl'\n"); 2752 return (WALK_ERR); 2753 } 2754 2755 return (WALK_NEXT); 2756 } 2757 2758 static int 2759 spa_walk_step(mdb_walk_state_t *wsp) 2760 { 2761 return (wsp->walk_callback(wsp->walk_addr, NULL, wsp->walk_cbdata)); 2762 } 2763 2764 /* 2765 * [addr]::walk zio 2766 * 2767 * Walk all active zio_t structures on the system. This is simply a layered 2768 * walk on top of ::walk zio_cache, with the optional ability to limit the 2769 * structures to a particular pool. 2770 */ 2771 static int 2772 zio_walk_init(mdb_walk_state_t *wsp) 2773 { 2774 wsp->walk_data = (void *)wsp->walk_addr; 2775 2776 if (mdb_layered_walk("zio_cache", wsp) == -1) { 2777 mdb_warn("failed to walk 'zio_cache'\n"); 2778 return (WALK_ERR); 2779 } 2780 2781 return (WALK_NEXT); 2782 } 2783 2784 static int 2785 zio_walk_step(mdb_walk_state_t *wsp) 2786 { 2787 mdb_zio_t zio; 2788 uintptr_t spa = (uintptr_t)wsp->walk_data; 2789 2790 if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", 2791 wsp->walk_addr, 0) == -1) 2792 return (WALK_ERR); 2793 2794 if (spa != 0 && spa != zio.io_spa) 2795 return (WALK_NEXT); 2796 2797 return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata)); 2798 } 2799 2800 /* 2801 * [addr]::walk zio_root 2802 * 2803 * Walk only root zio_t structures, optionally for a particular spa_t. 2804 */ 2805 static int 2806 zio_walk_root_step(mdb_walk_state_t *wsp) 2807 { 2808 mdb_zio_t zio; 2809 uintptr_t spa = (uintptr_t)wsp->walk_data; 2810 2811 if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", 2812 wsp->walk_addr, 0) == -1) 2813 return (WALK_ERR); 2814 2815 if (spa != 0 && spa != zio.io_spa) 2816 return (WALK_NEXT); 2817 2818 /* If the parent list is not empty, ignore */ 2819 if (zio.io_parent_list.list_head.list_next != 2820 wsp->walk_addr + 2821 mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", "io_parent_list") + 2822 mdb_ctf_offsetof_by_name("struct list", "list_head")) 2823 return (WALK_NEXT); 2824 2825 return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata)); 2826 } 2827 2828 /* 2829 * ::zfs_blkstats 2830 * 2831 * -v print verbose per-level information 2832 * 2833 */ 2834 static int 2835 zfs_blkstats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2836 { 2837 boolean_t verbose = B_FALSE; 2838 zfs_all_blkstats_t stats; 2839 dmu_object_type_t t; 2840 zfs_blkstat_t *tzb; 2841 uint64_t ditto; 2842 dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES + 10]; 2843 /* +10 in case it grew */ 2844 2845 if (mdb_readvar(&dmu_ot, "dmu_ot") == -1) { 2846 mdb_warn("failed to read 'dmu_ot'"); 2847 return (DCMD_ERR); 2848 } 2849 2850 if (mdb_getopts(argc, argv, 2851 'v', MDB_OPT_SETBITS, TRUE, &verbose, 2852 NULL) != argc) 2853 return (DCMD_USAGE); 2854 2855 if (!(flags & DCMD_ADDRSPEC)) 2856 return (DCMD_USAGE); 2857 2858 if (GETMEMB(addr, "spa", spa_dsl_pool, addr) || 2859 GETMEMB(addr, "dsl_pool", dp_blkstats, addr) || 2860 mdb_vread(&stats, sizeof (zfs_all_blkstats_t), addr) == -1) { 2861 mdb_warn("failed to read data at %p;", addr); 2862 mdb_printf("maybe no stats? run \"zpool scrub\" first."); 2863 return (DCMD_ERR); 2864 } 2865 2866 tzb = &stats.zab_type[DN_MAX_LEVELS][DMU_OT_TOTAL]; 2867 if (tzb->zb_gangs != 0) { 2868 mdb_printf("Ganged blocks: %llu\n", 2869 (longlong_t)tzb->zb_gangs); 2870 } 2871 2872 ditto = tzb->zb_ditto_2_of_2_samevdev + tzb->zb_ditto_2_of_3_samevdev + 2873 tzb->zb_ditto_3_of_3_samevdev; 2874 if (ditto != 0) { 2875 mdb_printf("Dittoed blocks on same vdev: %llu\n", 2876 (longlong_t)ditto); 2877 } 2878 2879 mdb_printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" 2880 "\t avg\t comp\t%%Total\tType\n"); 2881 2882 for (t = 0; t <= DMU_OT_TOTAL; t++) { 2883 char csize[MDB_NICENUM_BUFLEN], lsize[MDB_NICENUM_BUFLEN]; 2884 char psize[MDB_NICENUM_BUFLEN], asize[MDB_NICENUM_BUFLEN]; 2885 char avg[MDB_NICENUM_BUFLEN]; 2886 char comp[MDB_NICENUM_BUFLEN], pct[MDB_NICENUM_BUFLEN]; 2887 char typename[64]; 2888 int l; 2889 2890 2891 if (t == DMU_OT_DEFERRED) 2892 strcpy(typename, "deferred free"); 2893 else if (t == DMU_OT_OTHER) 2894 strcpy(typename, "other"); 2895 else if (t == DMU_OT_TOTAL) 2896 strcpy(typename, "Total"); 2897 else if (mdb_readstr(typename, sizeof (typename), 2898 (uintptr_t)dmu_ot[t].ot_name) == -1) { 2899 mdb_warn("failed to read type name"); 2900 return (DCMD_ERR); 2901 } 2902 2903 if (stats.zab_type[DN_MAX_LEVELS][t].zb_asize == 0) 2904 continue; 2905 2906 for (l = -1; l < DN_MAX_LEVELS; l++) { 2907 int level = (l == -1 ? DN_MAX_LEVELS : l); 2908 zfs_blkstat_t *zb = &stats.zab_type[level][t]; 2909 2910 if (zb->zb_asize == 0) 2911 continue; 2912 2913 /* 2914 * Don't print each level unless requested. 2915 */ 2916 if (!verbose && level != DN_MAX_LEVELS) 2917 continue; 2918 2919 /* 2920 * If all the space is level 0, don't print the 2921 * level 0 separately. 2922 */ 2923 if (level == 0 && zb->zb_asize == 2924 stats.zab_type[DN_MAX_LEVELS][t].zb_asize) 2925 continue; 2926 2927 mdb_nicenum(zb->zb_count, csize); 2928 mdb_nicenum(zb->zb_lsize, lsize); 2929 mdb_nicenum(zb->zb_psize, psize); 2930 mdb_nicenum(zb->zb_asize, asize); 2931 mdb_nicenum(zb->zb_asize / zb->zb_count, avg); 2932 (void) mdb_snprintfrac(comp, MDB_NICENUM_BUFLEN, 2933 zb->zb_lsize, zb->zb_psize, 2); 2934 (void) mdb_snprintfrac(pct, MDB_NICENUM_BUFLEN, 2935 100 * zb->zb_asize, tzb->zb_asize, 2); 2936 2937 mdb_printf("%6s\t%5s\t%5s\t%5s\t%5s" 2938 "\t%5s\t%6s\t", 2939 csize, lsize, psize, asize, avg, comp, pct); 2940 2941 if (level == DN_MAX_LEVELS) 2942 mdb_printf("%s\n", typename); 2943 else 2944 mdb_printf(" L%d %s\n", 2945 level, typename); 2946 } 2947 } 2948 2949 return (DCMD_OK); 2950 } 2951 2952 typedef struct mdb_reference { 2953 uintptr_t ref_holder; 2954 uintptr_t ref_removed; 2955 uint64_t ref_number; 2956 } mdb_reference_t; 2957 2958 /* ARGSUSED */ 2959 static int 2960 reference_cb(uintptr_t addr, const void *ignored, void *arg) 2961 { 2962 mdb_reference_t ref; 2963 boolean_t holder_is_str = B_FALSE; 2964 char holder_str[128]; 2965 boolean_t removed = (boolean_t)arg; 2966 2967 if (mdb_ctf_vread(&ref, "reference_t", "mdb_reference_t", addr, 2968 0) == -1) 2969 return (DCMD_ERR); 2970 2971 if (mdb_readstr(holder_str, sizeof (holder_str), 2972 ref.ref_holder) != -1) 2973 holder_is_str = strisprint(holder_str); 2974 2975 if (removed) 2976 mdb_printf("removed "); 2977 mdb_printf("reference "); 2978 if (ref.ref_number != 1) 2979 mdb_printf("with count=%llu ", ref.ref_number); 2980 mdb_printf("with tag %lx", ref.ref_holder); 2981 if (holder_is_str) 2982 mdb_printf(" \"%s\"", holder_str); 2983 mdb_printf(", held at:\n"); 2984 2985 (void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL); 2986 2987 if (removed) { 2988 mdb_printf("removed at:\n"); 2989 (void) mdb_call_dcmd("whatis", ref.ref_removed, 2990 DCMD_ADDRSPEC, 0, NULL); 2991 } 2992 2993 mdb_printf("\n"); 2994 2995 return (WALK_NEXT); 2996 } 2997 2998 typedef struct mdb_refcount { 2999 uint64_t rc_count; 3000 } mdb_refcount_t; 3001 3002 typedef struct mdb_refcount_removed { 3003 uint64_t rc_removed_count; 3004 } mdb_refcount_removed_t; 3005 3006 typedef struct mdb_refcount_tracked { 3007 boolean_t rc_tracked; 3008 } mdb_refcount_tracked_t; 3009 3010 /* ARGSUSED */ 3011 static int 3012 refcount(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3013 { 3014 mdb_refcount_t rc; 3015 mdb_refcount_removed_t rcr; 3016 mdb_refcount_tracked_t rct; 3017 int off; 3018 boolean_t released = B_FALSE; 3019 3020 if (!(flags & DCMD_ADDRSPEC)) 3021 return (DCMD_USAGE); 3022 3023 if (mdb_getopts(argc, argv, 3024 'r', MDB_OPT_SETBITS, B_TRUE, &released, 3025 NULL) != argc) 3026 return (DCMD_USAGE); 3027 3028 if (mdb_ctf_vread(&rc, "refcount_t", "mdb_refcount_t", addr, 3029 0) == -1) 3030 return (DCMD_ERR); 3031 3032 if (mdb_ctf_vread(&rcr, "refcount_t", "mdb_refcount_removed_t", addr, 3033 MDB_CTF_VREAD_QUIET) == -1) { 3034 mdb_printf("refcount_t at %p has %llu holds (untracked)\n", 3035 addr, (longlong_t)rc.rc_count); 3036 return (DCMD_OK); 3037 } 3038 3039 if (mdb_ctf_vread(&rct, "refcount_t", "mdb_refcount_tracked_t", addr, 3040 MDB_CTF_VREAD_QUIET) == -1) { 3041 /* If this is an old target, it might be tracked. */ 3042 rct.rc_tracked = B_TRUE; 3043 } 3044 3045 mdb_printf("refcount_t at %p has %llu current holds, " 3046 "%llu recently released holds\n", 3047 addr, (longlong_t)rc.rc_count, (longlong_t)rcr.rc_removed_count); 3048 3049 if (rct.rc_tracked && rc.rc_count > 0) 3050 mdb_printf("current holds:\n"); 3051 off = mdb_ctf_offsetof_by_name("refcount_t", "rc_list"); 3052 if (off == -1) 3053 return (DCMD_ERR); 3054 mdb_pwalk("list", reference_cb, (void*)B_FALSE, addr + off); 3055 3056 if (released && rcr.rc_removed_count > 0) { 3057 mdb_printf("released holds:\n"); 3058 3059 off = mdb_ctf_offsetof_by_name("refcount_t", "rc_removed"); 3060 if (off == -1) 3061 return (DCMD_ERR); 3062 mdb_pwalk("list", reference_cb, (void*)B_TRUE, addr + off); 3063 } 3064 3065 return (DCMD_OK); 3066 } 3067 3068 /* ARGSUSED */ 3069 static int 3070 sa_attr_table(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3071 { 3072 sa_attr_table_t *table; 3073 sa_os_t sa_os; 3074 char *name; 3075 int i; 3076 3077 if (mdb_vread(&sa_os, sizeof (sa_os_t), addr) == -1) { 3078 mdb_warn("failed to read sa_os at %p", addr); 3079 return (DCMD_ERR); 3080 } 3081 3082 table = mdb_alloc(sizeof (sa_attr_table_t) * sa_os.sa_num_attrs, 3083 UM_SLEEP | UM_GC); 3084 name = mdb_alloc(MAXPATHLEN, UM_SLEEP | UM_GC); 3085 3086 if (mdb_vread(table, sizeof (sa_attr_table_t) * sa_os.sa_num_attrs, 3087 (uintptr_t)sa_os.sa_attr_table) == -1) { 3088 mdb_warn("failed to read sa_os at %p", addr); 3089 return (DCMD_ERR); 3090 } 3091 3092 mdb_printf("%<u>%-10s %-10s %-10s %-10s %s%</u>\n", 3093 "ATTR ID", "REGISTERED", "LENGTH", "BSWAP", "NAME"); 3094 for (i = 0; i != sa_os.sa_num_attrs; i++) { 3095 mdb_readstr(name, MAXPATHLEN, (uintptr_t)table[i].sa_name); 3096 mdb_printf("%5x %8x %8x %8x %-s\n", 3097 (int)table[i].sa_attr, (int)table[i].sa_registered, 3098 (int)table[i].sa_length, table[i].sa_byteswap, name); 3099 } 3100 3101 return (DCMD_OK); 3102 } 3103 3104 static int 3105 sa_get_off_table(uintptr_t addr, uint32_t **off_tab, int attr_count) 3106 { 3107 uintptr_t idx_table; 3108 3109 if (GETMEMB(addr, "sa_idx_tab", sa_idx_tab, idx_table)) { 3110 mdb_printf("can't find offset table in sa_idx_tab\n"); 3111 return (-1); 3112 } 3113 3114 *off_tab = mdb_alloc(attr_count * sizeof (uint32_t), 3115 UM_SLEEP | UM_GC); 3116 3117 if (mdb_vread(*off_tab, 3118 attr_count * sizeof (uint32_t), idx_table) == -1) { 3119 mdb_warn("failed to attribute offset table %p", idx_table); 3120 return (-1); 3121 } 3122 3123 return (DCMD_OK); 3124 } 3125 3126 /*ARGSUSED*/ 3127 static int 3128 sa_attr_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3129 { 3130 uint32_t *offset_tab; 3131 int attr_count; 3132 uint64_t attr_id; 3133 uintptr_t attr_addr; 3134 uintptr_t bonus_tab, spill_tab; 3135 uintptr_t db_bonus, db_spill; 3136 uintptr_t os, os_sa; 3137 uintptr_t db_data; 3138 3139 if (argc != 1) 3140 return (DCMD_USAGE); 3141 3142 if (argv[0].a_type == MDB_TYPE_STRING) 3143 attr_id = mdb_strtoull(argv[0].a_un.a_str); 3144 else 3145 return (DCMD_USAGE); 3146 3147 if (GETMEMB(addr, "sa_handle", sa_bonus_tab, bonus_tab) || 3148 GETMEMB(addr, "sa_handle", sa_spill_tab, spill_tab) || 3149 GETMEMB(addr, "sa_handle", sa_os, os) || 3150 GETMEMB(addr, "sa_handle", sa_bonus, db_bonus) || 3151 GETMEMB(addr, "sa_handle", sa_spill, db_spill)) { 3152 mdb_printf("Can't find necessary information in sa_handle " 3153 "in sa_handle\n"); 3154 return (DCMD_ERR); 3155 } 3156 3157 if (GETMEMB(os, "objset", os_sa, os_sa)) { 3158 mdb_printf("Can't find os_sa in objset\n"); 3159 return (DCMD_ERR); 3160 } 3161 3162 if (GETMEMB(os_sa, "sa_os", sa_num_attrs, attr_count)) { 3163 mdb_printf("Can't find sa_num_attrs\n"); 3164 return (DCMD_ERR); 3165 } 3166 3167 if (attr_id > attr_count) { 3168 mdb_printf("attribute id number is out of range\n"); 3169 return (DCMD_ERR); 3170 } 3171 3172 if (bonus_tab) { 3173 if (sa_get_off_table(bonus_tab, &offset_tab, 3174 attr_count) == -1) { 3175 return (DCMD_ERR); 3176 } 3177 3178 if (GETMEMB(db_bonus, "dmu_buf", db_data, db_data)) { 3179 mdb_printf("can't find db_data in bonus dbuf\n"); 3180 return (DCMD_ERR); 3181 } 3182 } 3183 3184 if (bonus_tab && !TOC_ATTR_PRESENT(offset_tab[attr_id]) && 3185 spill_tab == 0) { 3186 mdb_printf("Attribute does not exist\n"); 3187 return (DCMD_ERR); 3188 } else if (!TOC_ATTR_PRESENT(offset_tab[attr_id]) && spill_tab) { 3189 if (sa_get_off_table(spill_tab, &offset_tab, 3190 attr_count) == -1) { 3191 return (DCMD_ERR); 3192 } 3193 if (GETMEMB(db_spill, "dmu_buf", db_data, db_data)) { 3194 mdb_printf("can't find db_data in spill dbuf\n"); 3195 return (DCMD_ERR); 3196 } 3197 if (!TOC_ATTR_PRESENT(offset_tab[attr_id])) { 3198 mdb_printf("Attribute does not exist\n"); 3199 return (DCMD_ERR); 3200 } 3201 } 3202 attr_addr = db_data + TOC_OFF(offset_tab[attr_id]); 3203 mdb_printf("%p\n", attr_addr); 3204 return (DCMD_OK); 3205 } 3206 3207 /* ARGSUSED */ 3208 static int 3209 zfs_ace_print_common(uintptr_t addr, uint_t flags, 3210 uint64_t id, uint32_t access_mask, uint16_t ace_flags, 3211 uint16_t ace_type, int verbose) 3212 { 3213 if (DCMD_HDRSPEC(flags) && !verbose) 3214 mdb_printf("%<u>%-?s %-8s %-8s %-8s %s%</u>\n", 3215 "ADDR", "FLAGS", "MASK", "TYPE", "ID"); 3216 3217 if (!verbose) { 3218 mdb_printf("%0?p %-8x %-8x %-8x %-llx\n", addr, 3219 ace_flags, access_mask, ace_type, id); 3220 return (DCMD_OK); 3221 } 3222 3223 switch (ace_flags & ACE_TYPE_FLAGS) { 3224 case ACE_OWNER: 3225 mdb_printf("owner@:"); 3226 break; 3227 case (ACE_IDENTIFIER_GROUP | ACE_GROUP): 3228 mdb_printf("group@:"); 3229 break; 3230 case ACE_EVERYONE: 3231 mdb_printf("everyone@:"); 3232 break; 3233 case ACE_IDENTIFIER_GROUP: 3234 mdb_printf("group:%llx:", (u_longlong_t)id); 3235 break; 3236 case 0: /* User entry */ 3237 mdb_printf("user:%llx:", (u_longlong_t)id); 3238 break; 3239 } 3240 3241 /* print out permission mask */ 3242 if (access_mask & ACE_READ_DATA) 3243 mdb_printf("r"); 3244 else 3245 mdb_printf("-"); 3246 if (access_mask & ACE_WRITE_DATA) 3247 mdb_printf("w"); 3248 else 3249 mdb_printf("-"); 3250 if (access_mask & ACE_EXECUTE) 3251 mdb_printf("x"); 3252 else 3253 mdb_printf("-"); 3254 if (access_mask & ACE_APPEND_DATA) 3255 mdb_printf("p"); 3256 else 3257 mdb_printf("-"); 3258 if (access_mask & ACE_DELETE) 3259 mdb_printf("d"); 3260 else 3261 mdb_printf("-"); 3262 if (access_mask & ACE_DELETE_CHILD) 3263 mdb_printf("D"); 3264 else 3265 mdb_printf("-"); 3266 if (access_mask & ACE_READ_ATTRIBUTES) 3267 mdb_printf("a"); 3268 else 3269 mdb_printf("-"); 3270 if (access_mask & ACE_WRITE_ATTRIBUTES) 3271 mdb_printf("A"); 3272 else 3273 mdb_printf("-"); 3274 if (access_mask & ACE_READ_NAMED_ATTRS) 3275 mdb_printf("R"); 3276 else 3277 mdb_printf("-"); 3278 if (access_mask & ACE_WRITE_NAMED_ATTRS) 3279 mdb_printf("W"); 3280 else 3281 mdb_printf("-"); 3282 if (access_mask & ACE_READ_ACL) 3283 mdb_printf("c"); 3284 else 3285 mdb_printf("-"); 3286 if (access_mask & ACE_WRITE_ACL) 3287 mdb_printf("C"); 3288 else 3289 mdb_printf("-"); 3290 if (access_mask & ACE_WRITE_OWNER) 3291 mdb_printf("o"); 3292 else 3293 mdb_printf("-"); 3294 if (access_mask & ACE_SYNCHRONIZE) 3295 mdb_printf("s"); 3296 else 3297 mdb_printf("-"); 3298 3299 mdb_printf(":"); 3300 3301 /* Print out inheritance flags */ 3302 if (ace_flags & ACE_FILE_INHERIT_ACE) 3303 mdb_printf("f"); 3304 else 3305 mdb_printf("-"); 3306 if (ace_flags & ACE_DIRECTORY_INHERIT_ACE) 3307 mdb_printf("d"); 3308 else 3309 mdb_printf("-"); 3310 if (ace_flags & ACE_INHERIT_ONLY_ACE) 3311 mdb_printf("i"); 3312 else 3313 mdb_printf("-"); 3314 if (ace_flags & ACE_NO_PROPAGATE_INHERIT_ACE) 3315 mdb_printf("n"); 3316 else 3317 mdb_printf("-"); 3318 if (ace_flags & ACE_SUCCESSFUL_ACCESS_ACE_FLAG) 3319 mdb_printf("S"); 3320 else 3321 mdb_printf("-"); 3322 if (ace_flags & ACE_FAILED_ACCESS_ACE_FLAG) 3323 mdb_printf("F"); 3324 else 3325 mdb_printf("-"); 3326 if (ace_flags & ACE_INHERITED_ACE) 3327 mdb_printf("I"); 3328 else 3329 mdb_printf("-"); 3330 3331 switch (ace_type) { 3332 case ACE_ACCESS_ALLOWED_ACE_TYPE: 3333 mdb_printf(":allow\n"); 3334 break; 3335 case ACE_ACCESS_DENIED_ACE_TYPE: 3336 mdb_printf(":deny\n"); 3337 break; 3338 case ACE_SYSTEM_AUDIT_ACE_TYPE: 3339 mdb_printf(":audit\n"); 3340 break; 3341 case ACE_SYSTEM_ALARM_ACE_TYPE: 3342 mdb_printf(":alarm\n"); 3343 break; 3344 default: 3345 mdb_printf(":?\n"); 3346 } 3347 return (DCMD_OK); 3348 } 3349 3350 /* ARGSUSED */ 3351 static int 3352 zfs_ace_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3353 { 3354 zfs_ace_t zace; 3355 int verbose = FALSE; 3356 uint64_t id; 3357 3358 if (!(flags & DCMD_ADDRSPEC)) 3359 return (DCMD_USAGE); 3360 3361 if (mdb_getopts(argc, argv, 3362 'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc) 3363 return (DCMD_USAGE); 3364 3365 if (mdb_vread(&zace, sizeof (zfs_ace_t), addr) == -1) { 3366 mdb_warn("failed to read zfs_ace_t"); 3367 return (DCMD_ERR); 3368 } 3369 3370 if ((zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == 0 || 3371 (zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP) 3372 id = zace.z_fuid; 3373 else 3374 id = -1; 3375 3376 return (zfs_ace_print_common(addr, flags, id, zace.z_hdr.z_access_mask, 3377 zace.z_hdr.z_flags, zace.z_hdr.z_type, verbose)); 3378 } 3379 3380 /* ARGSUSED */ 3381 static int 3382 zfs_ace0_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3383 { 3384 ace_t ace; 3385 uint64_t id; 3386 int verbose = FALSE; 3387 3388 if (!(flags & DCMD_ADDRSPEC)) 3389 return (DCMD_USAGE); 3390 3391 if (mdb_getopts(argc, argv, 3392 'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc) 3393 return (DCMD_USAGE); 3394 3395 if (mdb_vread(&ace, sizeof (ace_t), addr) == -1) { 3396 mdb_warn("failed to read ace_t"); 3397 return (DCMD_ERR); 3398 } 3399 3400 if ((ace.a_flags & ACE_TYPE_FLAGS) == 0 || 3401 (ace.a_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP) 3402 id = ace.a_who; 3403 else 3404 id = -1; 3405 3406 return (zfs_ace_print_common(addr, flags, id, ace.a_access_mask, 3407 ace.a_flags, ace.a_type, verbose)); 3408 } 3409 3410 typedef struct acl_dump_args { 3411 int a_argc; 3412 const mdb_arg_t *a_argv; 3413 uint16_t a_version; 3414 int a_flags; 3415 } acl_dump_args_t; 3416 3417 /* ARGSUSED */ 3418 static int 3419 acl_aces_cb(uintptr_t addr, const void *unknown, void *arg) 3420 { 3421 acl_dump_args_t *acl_args = (acl_dump_args_t *)arg; 3422 3423 if (acl_args->a_version == 1) { 3424 if (mdb_call_dcmd("zfs_ace", addr, 3425 DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc, 3426 acl_args->a_argv) != DCMD_OK) { 3427 return (WALK_ERR); 3428 } 3429 } else { 3430 if (mdb_call_dcmd("zfs_ace0", addr, 3431 DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc, 3432 acl_args->a_argv) != DCMD_OK) { 3433 return (WALK_ERR); 3434 } 3435 } 3436 acl_args->a_flags = DCMD_LOOP; 3437 return (WALK_NEXT); 3438 } 3439 3440 /* ARGSUSED */ 3441 static int 3442 acl_cb(uintptr_t addr, const void *unknown, void *arg) 3443 { 3444 acl_dump_args_t *acl_args = (acl_dump_args_t *)arg; 3445 3446 if (acl_args->a_version == 1) { 3447 if (mdb_pwalk("zfs_acl_node_aces", acl_aces_cb, 3448 arg, addr) != 0) { 3449 mdb_warn("can't walk ACEs"); 3450 return (DCMD_ERR); 3451 } 3452 } else { 3453 if (mdb_pwalk("zfs_acl_node_aces0", acl_aces_cb, 3454 arg, addr) != 0) { 3455 mdb_warn("can't walk ACEs"); 3456 return (DCMD_ERR); 3457 } 3458 } 3459 return (WALK_NEXT); 3460 } 3461 3462 /* ARGSUSED */ 3463 static int 3464 zfs_acl_dump(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3465 { 3466 zfs_acl_t zacl; 3467 int verbose = FALSE; 3468 acl_dump_args_t acl_args; 3469 3470 if (!(flags & DCMD_ADDRSPEC)) 3471 return (DCMD_USAGE); 3472 3473 if (mdb_getopts(argc, argv, 3474 'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc) 3475 return (DCMD_USAGE); 3476 3477 if (mdb_vread(&zacl, sizeof (zfs_acl_t), addr) == -1) { 3478 mdb_warn("failed to read zfs_acl_t"); 3479 return (DCMD_ERR); 3480 } 3481 3482 acl_args.a_argc = argc; 3483 acl_args.a_argv = argv; 3484 acl_args.a_version = zacl.z_version; 3485 acl_args.a_flags = DCMD_LOOPFIRST; 3486 3487 if (mdb_pwalk("zfs_acl_node", acl_cb, &acl_args, addr) != 0) { 3488 mdb_warn("can't walk ACL"); 3489 return (DCMD_ERR); 3490 } 3491 3492 return (DCMD_OK); 3493 } 3494 3495 /* ARGSUSED */ 3496 static int 3497 zfs_acl_node_walk_init(mdb_walk_state_t *wsp) 3498 { 3499 if (wsp->walk_addr == 0) { 3500 mdb_warn("must supply address of zfs_acl_node_t\n"); 3501 return (WALK_ERR); 3502 } 3503 3504 wsp->walk_addr += 3505 mdb_ctf_offsetof_by_name(ZFS_STRUCT "zfs_acl", "z_acl"); 3506 3507 if (mdb_layered_walk("list", wsp) == -1) { 3508 mdb_warn("failed to walk 'list'\n"); 3509 return (WALK_ERR); 3510 } 3511 3512 return (WALK_NEXT); 3513 } 3514 3515 static int 3516 zfs_acl_node_walk_step(mdb_walk_state_t *wsp) 3517 { 3518 zfs_acl_node_t aclnode; 3519 3520 if (mdb_vread(&aclnode, sizeof (zfs_acl_node_t), 3521 wsp->walk_addr) == -1) { 3522 mdb_warn("failed to read zfs_acl_node at %p", wsp->walk_addr); 3523 return (WALK_ERR); 3524 } 3525 3526 return (wsp->walk_callback(wsp->walk_addr, &aclnode, wsp->walk_cbdata)); 3527 } 3528 3529 typedef struct ace_walk_data { 3530 int ace_count; 3531 int ace_version; 3532 } ace_walk_data_t; 3533 3534 static int 3535 zfs_aces_walk_init_common(mdb_walk_state_t *wsp, int version, 3536 int ace_count, uintptr_t ace_data) 3537 { 3538 ace_walk_data_t *ace_walk_data; 3539 3540 if (wsp->walk_addr == 0) { 3541 mdb_warn("must supply address of zfs_acl_node_t\n"); 3542 return (WALK_ERR); 3543 } 3544 3545 ace_walk_data = mdb_alloc(sizeof (ace_walk_data_t), UM_SLEEP | UM_GC); 3546 3547 ace_walk_data->ace_count = ace_count; 3548 ace_walk_data->ace_version = version; 3549 3550 wsp->walk_addr = ace_data; 3551 wsp->walk_data = ace_walk_data; 3552 3553 return (WALK_NEXT); 3554 } 3555 3556 static int 3557 zfs_acl_node_aces_walk_init_common(mdb_walk_state_t *wsp, int version) 3558 { 3559 static int gotid; 3560 static mdb_ctf_id_t acl_id; 3561 int z_ace_count; 3562 uintptr_t z_acldata; 3563 3564 if (!gotid) { 3565 if (mdb_ctf_lookup_by_name("struct zfs_acl_node", 3566 &acl_id) == -1) { 3567 mdb_warn("couldn't find struct zfs_acl_node"); 3568 return (DCMD_ERR); 3569 } 3570 gotid = TRUE; 3571 } 3572 3573 if (GETMEMBID(wsp->walk_addr, &acl_id, z_ace_count, z_ace_count)) { 3574 return (DCMD_ERR); 3575 } 3576 if (GETMEMBID(wsp->walk_addr, &acl_id, z_acldata, z_acldata)) { 3577 return (DCMD_ERR); 3578 } 3579 3580 return (zfs_aces_walk_init_common(wsp, version, 3581 z_ace_count, z_acldata)); 3582 } 3583 3584 /* ARGSUSED */ 3585 static int 3586 zfs_acl_node_aces_walk_init(mdb_walk_state_t *wsp) 3587 { 3588 return (zfs_acl_node_aces_walk_init_common(wsp, 1)); 3589 } 3590 3591 /* ARGSUSED */ 3592 static int 3593 zfs_acl_node_aces0_walk_init(mdb_walk_state_t *wsp) 3594 { 3595 return (zfs_acl_node_aces_walk_init_common(wsp, 0)); 3596 } 3597 3598 static int 3599 zfs_aces_walk_step(mdb_walk_state_t *wsp) 3600 { 3601 ace_walk_data_t *ace_data = wsp->walk_data; 3602 zfs_ace_t zace; 3603 ace_t *acep; 3604 int status; 3605 int entry_type; 3606 int allow_type; 3607 uintptr_t ptr; 3608 3609 if (ace_data->ace_count == 0) 3610 return (WALK_DONE); 3611 3612 if (mdb_vread(&zace, sizeof (zfs_ace_t), wsp->walk_addr) == -1) { 3613 mdb_warn("failed to read zfs_ace_t at %#lx", 3614 wsp->walk_addr); 3615 return (WALK_ERR); 3616 } 3617 3618 switch (ace_data->ace_version) { 3619 case 0: 3620 acep = (ace_t *)&zace; 3621 entry_type = acep->a_flags & ACE_TYPE_FLAGS; 3622 allow_type = acep->a_type; 3623 break; 3624 case 1: 3625 entry_type = zace.z_hdr.z_flags & ACE_TYPE_FLAGS; 3626 allow_type = zace.z_hdr.z_type; 3627 break; 3628 default: 3629 return (WALK_ERR); 3630 } 3631 3632 ptr = (uintptr_t)wsp->walk_addr; 3633 switch (entry_type) { 3634 case ACE_OWNER: 3635 case ACE_EVERYONE: 3636 case (ACE_IDENTIFIER_GROUP | ACE_GROUP): 3637 ptr += ace_data->ace_version == 0 ? 3638 sizeof (ace_t) : sizeof (zfs_ace_hdr_t); 3639 break; 3640 case ACE_IDENTIFIER_GROUP: 3641 default: 3642 switch (allow_type) { 3643 case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: 3644 case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: 3645 case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: 3646 case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: 3647 ptr += ace_data->ace_version == 0 ? 3648 sizeof (ace_t) : sizeof (zfs_object_ace_t); 3649 break; 3650 default: 3651 ptr += ace_data->ace_version == 0 ? 3652 sizeof (ace_t) : sizeof (zfs_ace_t); 3653 break; 3654 } 3655 } 3656 3657 ace_data->ace_count--; 3658 status = wsp->walk_callback(wsp->walk_addr, 3659 (void *)(uintptr_t)&zace, wsp->walk_cbdata); 3660 3661 wsp->walk_addr = ptr; 3662 return (status); 3663 } 3664 3665 typedef struct mdb_zfs_rrwlock { 3666 uintptr_t rr_writer; 3667 boolean_t rr_writer_wanted; 3668 } mdb_zfs_rrwlock_t; 3669 3670 static uint_t rrw_key; 3671 3672 /* ARGSUSED */ 3673 static int 3674 rrwlock(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3675 { 3676 mdb_zfs_rrwlock_t rrw; 3677 3678 if (rrw_key == 0) { 3679 if (mdb_ctf_readsym(&rrw_key, "uint_t", "rrw_tsd_key", 0) == -1) 3680 return (DCMD_ERR); 3681 } 3682 3683 if (mdb_ctf_vread(&rrw, "rrwlock_t", "mdb_zfs_rrwlock_t", addr, 3684 0) == -1) 3685 return (DCMD_ERR); 3686 3687 if (rrw.rr_writer != 0) { 3688 mdb_printf("write lock held by thread %lx\n", rrw.rr_writer); 3689 return (DCMD_OK); 3690 } 3691 3692 if (rrw.rr_writer_wanted) { 3693 mdb_printf("writer wanted\n"); 3694 } 3695 3696 mdb_printf("anonymous references:\n"); 3697 (void) mdb_call_dcmd("refcount", addr + 3698 mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_anon_rcount"), 3699 DCMD_ADDRSPEC, 0, NULL); 3700 3701 mdb_printf("linked references:\n"); 3702 (void) mdb_call_dcmd("refcount", addr + 3703 mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_linked_rcount"), 3704 DCMD_ADDRSPEC, 0, NULL); 3705 3706 /* 3707 * XXX This should find references from 3708 * "::walk thread | ::tsd -v <rrw_key>", but there is no support 3709 * for programmatic consumption of dcmds, so this would be 3710 * difficult, potentially requiring reimplementing ::tsd (both 3711 * user and kernel versions) in this MDB module. 3712 */ 3713 3714 return (DCMD_OK); 3715 } 3716 3717 typedef struct mdb_arc_buf_hdr_t { 3718 uint16_t b_psize; 3719 uint16_t b_lsize; 3720 struct { 3721 uint32_t b_bufcnt; 3722 uintptr_t b_state; 3723 } b_l1hdr; 3724 } mdb_arc_buf_hdr_t; 3725 3726 enum arc_cflags { 3727 ARC_CFLAG_VERBOSE = 1 << 0, 3728 ARC_CFLAG_ANON = 1 << 1, 3729 ARC_CFLAG_MRU = 1 << 2, 3730 ARC_CFLAG_MFU = 1 << 3, 3731 ARC_CFLAG_BUFS = 1 << 4, 3732 }; 3733 3734 typedef struct arc_compression_stats_data { 3735 GElf_Sym anon_sym; /* ARC_anon symbol */ 3736 GElf_Sym mru_sym; /* ARC_mru symbol */ 3737 GElf_Sym mrug_sym; /* ARC_mru_ghost symbol */ 3738 GElf_Sym mfu_sym; /* ARC_mfu symbol */ 3739 GElf_Sym mfug_sym; /* ARC_mfu_ghost symbol */ 3740 GElf_Sym l2c_sym; /* ARC_l2c_only symbol */ 3741 uint64_t *anon_c_hist; /* histogram of compressed sizes in anon */ 3742 uint64_t *anon_u_hist; /* histogram of uncompressed sizes in anon */ 3743 uint64_t *anon_bufs; /* histogram of buffer counts in anon state */ 3744 uint64_t *mru_c_hist; /* histogram of compressed sizes in mru */ 3745 uint64_t *mru_u_hist; /* histogram of uncompressed sizes in mru */ 3746 uint64_t *mru_bufs; /* histogram of buffer counts in mru */ 3747 uint64_t *mfu_c_hist; /* histogram of compressed sizes in mfu */ 3748 uint64_t *mfu_u_hist; /* histogram of uncompressed sizes in mfu */ 3749 uint64_t *mfu_bufs; /* histogram of buffer counts in mfu */ 3750 uint64_t *all_c_hist; /* histogram of compressed anon + mru + mfu */ 3751 uint64_t *all_u_hist; /* histogram of uncompressed anon + mru + mfu */ 3752 uint64_t *all_bufs; /* histogram of buffer counts in all states */ 3753 int arc_cflags; /* arc compression flags, specified by user */ 3754 int hist_nbuckets; /* number of buckets in each histogram */ 3755 } arc_compression_stats_data_t; 3756 3757 int 3758 highbit64(uint64_t i) 3759 { 3760 int h = 1; 3761 3762 if (i == 0) 3763 return (0); 3764 if (i & 0xffffffff00000000ULL) { 3765 h += 32; i >>= 32; 3766 } 3767 if (i & 0xffff0000) { 3768 h += 16; i >>= 16; 3769 } 3770 if (i & 0xff00) { 3771 h += 8; i >>= 8; 3772 } 3773 if (i & 0xf0) { 3774 h += 4; i >>= 4; 3775 } 3776 if (i & 0xc) { 3777 h += 2; i >>= 2; 3778 } 3779 if (i & 0x2) { 3780 h += 1; 3781 } 3782 return (h); 3783 } 3784 3785 /* ARGSUSED */ 3786 static int 3787 arc_compression_stats_cb(uintptr_t addr, const void *unknown, void *arg) 3788 { 3789 arc_compression_stats_data_t *data = arg; 3790 mdb_arc_buf_hdr_t hdr; 3791 int cbucket, ubucket, bufcnt; 3792 3793 if (mdb_ctf_vread(&hdr, "arc_buf_hdr_t", "mdb_arc_buf_hdr_t", 3794 addr, 0) == -1) { 3795 return (WALK_ERR); 3796 } 3797 3798 /* 3799 * Headers in the ghost states, or the l2c_only state don't have 3800 * arc buffers linked off of them. Thus, their compressed size 3801 * is meaningless, so we skip these from the stats. 3802 */ 3803 if (hdr.b_l1hdr.b_state == data->mrug_sym.st_value || 3804 hdr.b_l1hdr.b_state == data->mfug_sym.st_value || 3805 hdr.b_l1hdr.b_state == data->l2c_sym.st_value) { 3806 return (WALK_NEXT); 3807 } 3808 3809 /* 3810 * The physical size (compressed) and logical size 3811 * (uncompressed) are in units of SPA_MINBLOCKSIZE. By default, 3812 * we use the log2 of this value (rounded down to the nearest 3813 * integer) to determine the bucket to assign this header to. 3814 * Thus, the histogram is logarithmic with respect to the size 3815 * of the header. For example, the following is a mapping of the 3816 * bucket numbers and the range of header sizes they correspond to: 3817 * 3818 * 0: 0 byte headers 3819 * 1: 512 byte headers 3820 * 2: [1024 - 2048) byte headers 3821 * 3: [2048 - 4096) byte headers 3822 * 4: [4096 - 8192) byte headers 3823 * 5: [8192 - 16394) byte headers 3824 * 6: [16384 - 32768) byte headers 3825 * 7: [32768 - 65536) byte headers 3826 * 8: [65536 - 131072) byte headers 3827 * 9: 131072 byte headers 3828 * 3829 * If the ARC_CFLAG_VERBOSE flag was specified, we use the 3830 * physical and logical sizes directly. Thus, the histogram will 3831 * no longer be logarithmic; instead it will be linear with 3832 * respect to the size of the header. The following is a mapping 3833 * of the first many bucket numbers and the header size they 3834 * correspond to: 3835 * 3836 * 0: 0 byte headers 3837 * 1: 512 byte headers 3838 * 2: 1024 byte headers 3839 * 3: 1536 byte headers 3840 * 4: 2048 byte headers 3841 * 5: 2560 byte headers 3842 * 6: 3072 byte headers 3843 * 3844 * And so on. Keep in mind that a range of sizes isn't used in 3845 * the case of linear scale because the headers can only 3846 * increment or decrement in sizes of 512 bytes. So, it's not 3847 * possible for a header to be sized in between whats listed 3848 * above. 3849 * 3850 * Also, the above mapping values were calculated assuming a 3851 * SPA_MINBLOCKSHIFT of 512 bytes and a SPA_MAXBLOCKSIZE of 128K. 3852 */ 3853 3854 if (data->arc_cflags & ARC_CFLAG_VERBOSE) { 3855 cbucket = hdr.b_psize; 3856 ubucket = hdr.b_lsize; 3857 } else { 3858 cbucket = highbit64(hdr.b_psize); 3859 ubucket = highbit64(hdr.b_lsize); 3860 } 3861 3862 bufcnt = hdr.b_l1hdr.b_bufcnt; 3863 if (bufcnt >= data->hist_nbuckets) 3864 bufcnt = data->hist_nbuckets - 1; 3865 3866 /* Ensure we stay within the bounds of the histogram array */ 3867 ASSERT3U(cbucket, <, data->hist_nbuckets); 3868 ASSERT3U(ubucket, <, data->hist_nbuckets); 3869 3870 if (hdr.b_l1hdr.b_state == data->anon_sym.st_value) { 3871 data->anon_c_hist[cbucket]++; 3872 data->anon_u_hist[ubucket]++; 3873 data->anon_bufs[bufcnt]++; 3874 } else if (hdr.b_l1hdr.b_state == data->mru_sym.st_value) { 3875 data->mru_c_hist[cbucket]++; 3876 data->mru_u_hist[ubucket]++; 3877 data->mru_bufs[bufcnt]++; 3878 } else if (hdr.b_l1hdr.b_state == data->mfu_sym.st_value) { 3879 data->mfu_c_hist[cbucket]++; 3880 data->mfu_u_hist[ubucket]++; 3881 data->mfu_bufs[bufcnt]++; 3882 } 3883 3884 data->all_c_hist[cbucket]++; 3885 data->all_u_hist[ubucket]++; 3886 data->all_bufs[bufcnt]++; 3887 3888 return (WALK_NEXT); 3889 } 3890 3891 /* ARGSUSED */ 3892 static int 3893 arc_compression_stats(uintptr_t addr, uint_t flags, int argc, 3894 const mdb_arg_t *argv) 3895 { 3896 arc_compression_stats_data_t data = { 0 }; 3897 unsigned int max_shifted = SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; 3898 unsigned int hist_size; 3899 char range[32]; 3900 int rc = DCMD_OK; 3901 3902 if (mdb_getopts(argc, argv, 3903 'v', MDB_OPT_SETBITS, ARC_CFLAG_VERBOSE, &data.arc_cflags, 3904 'a', MDB_OPT_SETBITS, ARC_CFLAG_ANON, &data.arc_cflags, 3905 'b', MDB_OPT_SETBITS, ARC_CFLAG_BUFS, &data.arc_cflags, 3906 'r', MDB_OPT_SETBITS, ARC_CFLAG_MRU, &data.arc_cflags, 3907 'f', MDB_OPT_SETBITS, ARC_CFLAG_MFU, &data.arc_cflags) != argc) 3908 return (DCMD_USAGE); 3909 3910 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_anon", &data.anon_sym) || 3911 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mru", &data.mru_sym) || 3912 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mru_ghost", &data.mrug_sym) || 3913 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mfu", &data.mfu_sym) || 3914 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mfu_ghost", &data.mfug_sym) || 3915 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_l2c_only", &data.l2c_sym)) { 3916 mdb_warn("can't find arc state symbol"); 3917 return (DCMD_ERR); 3918 } 3919 3920 /* 3921 * Determine the maximum expected size for any header, and use 3922 * this to determine the number of buckets needed for each 3923 * histogram. If ARC_CFLAG_VERBOSE is specified, this value is 3924 * used directly; otherwise the log2 of the maximum size is 3925 * used. Thus, if using a log2 scale there's a maximum of 10 3926 * possible buckets, while the linear scale (when using 3927 * ARC_CFLAG_VERBOSE) has a maximum of 257 buckets. 3928 */ 3929 if (data.arc_cflags & ARC_CFLAG_VERBOSE) 3930 data.hist_nbuckets = max_shifted + 1; 3931 else 3932 data.hist_nbuckets = highbit64(max_shifted) + 1; 3933 3934 hist_size = sizeof (uint64_t) * data.hist_nbuckets; 3935 3936 data.anon_c_hist = mdb_zalloc(hist_size, UM_SLEEP); 3937 data.anon_u_hist = mdb_zalloc(hist_size, UM_SLEEP); 3938 data.anon_bufs = mdb_zalloc(hist_size, UM_SLEEP); 3939 3940 data.mru_c_hist = mdb_zalloc(hist_size, UM_SLEEP); 3941 data.mru_u_hist = mdb_zalloc(hist_size, UM_SLEEP); 3942 data.mru_bufs = mdb_zalloc(hist_size, UM_SLEEP); 3943 3944 data.mfu_c_hist = mdb_zalloc(hist_size, UM_SLEEP); 3945 data.mfu_u_hist = mdb_zalloc(hist_size, UM_SLEEP); 3946 data.mfu_bufs = mdb_zalloc(hist_size, UM_SLEEP); 3947 3948 data.all_c_hist = mdb_zalloc(hist_size, UM_SLEEP); 3949 data.all_u_hist = mdb_zalloc(hist_size, UM_SLEEP); 3950 data.all_bufs = mdb_zalloc(hist_size, UM_SLEEP); 3951 3952 if (mdb_walk("arc_buf_hdr_t_full", arc_compression_stats_cb, 3953 &data) != 0) { 3954 mdb_warn("can't walk arc_buf_hdr's"); 3955 rc = DCMD_ERR; 3956 goto out; 3957 } 3958 3959 if (data.arc_cflags & ARC_CFLAG_VERBOSE) { 3960 rc = mdb_snprintf(range, sizeof (range), 3961 "[n*%llu, (n+1)*%llu)", SPA_MINBLOCKSIZE, 3962 SPA_MINBLOCKSIZE); 3963 } else { 3964 rc = mdb_snprintf(range, sizeof (range), 3965 "[2^(n-1)*%llu, 2^n*%llu)", SPA_MINBLOCKSIZE, 3966 SPA_MINBLOCKSIZE); 3967 } 3968 3969 if (rc < 0) { 3970 /* snprintf failed, abort the dcmd */ 3971 rc = DCMD_ERR; 3972 goto out; 3973 } else { 3974 /* snprintf succeeded above, reset return code */ 3975 rc = DCMD_OK; 3976 } 3977 3978 if (data.arc_cflags & ARC_CFLAG_ANON) { 3979 if (data.arc_cflags & ARC_CFLAG_BUFS) { 3980 mdb_printf("Histogram of the number of anon buffers " 3981 "that are associated with an arc hdr.\n"); 3982 dump_histogram(data.anon_bufs, data.hist_nbuckets, 0); 3983 mdb_printf("\n"); 3984 } 3985 mdb_printf("Histogram of compressed anon buffers.\n" 3986 "Each bucket represents buffers of size: %s.\n", range); 3987 dump_histogram(data.anon_c_hist, data.hist_nbuckets, 0); 3988 mdb_printf("\n"); 3989 3990 mdb_printf("Histogram of uncompressed anon buffers.\n" 3991 "Each bucket represents buffers of size: %s.\n", range); 3992 dump_histogram(data.anon_u_hist, data.hist_nbuckets, 0); 3993 mdb_printf("\n"); 3994 } 3995 3996 if (data.arc_cflags & ARC_CFLAG_MRU) { 3997 if (data.arc_cflags & ARC_CFLAG_BUFS) { 3998 mdb_printf("Histogram of the number of mru buffers " 3999 "that are associated with an arc hdr.\n"); 4000 dump_histogram(data.mru_bufs, data.hist_nbuckets, 0); 4001 mdb_printf("\n"); 4002 } 4003 mdb_printf("Histogram of compressed mru buffers.\n" 4004 "Each bucket represents buffers of size: %s.\n", range); 4005 dump_histogram(data.mru_c_hist, data.hist_nbuckets, 0); 4006 mdb_printf("\n"); 4007 4008 mdb_printf("Histogram of uncompressed mru buffers.\n" 4009 "Each bucket represents buffers of size: %s.\n", range); 4010 dump_histogram(data.mru_u_hist, data.hist_nbuckets, 0); 4011 mdb_printf("\n"); 4012 } 4013 4014 if (data.arc_cflags & ARC_CFLAG_MFU) { 4015 if (data.arc_cflags & ARC_CFLAG_BUFS) { 4016 mdb_printf("Histogram of the number of mfu buffers " 4017 "that are associated with an arc hdr.\n"); 4018 dump_histogram(data.mfu_bufs, data.hist_nbuckets, 0); 4019 mdb_printf("\n"); 4020 } 4021 4022 mdb_printf("Histogram of compressed mfu buffers.\n" 4023 "Each bucket represents buffers of size: %s.\n", range); 4024 dump_histogram(data.mfu_c_hist, data.hist_nbuckets, 0); 4025 mdb_printf("\n"); 4026 4027 mdb_printf("Histogram of uncompressed mfu buffers.\n" 4028 "Each bucket represents buffers of size: %s.\n", range); 4029 dump_histogram(data.mfu_u_hist, data.hist_nbuckets, 0); 4030 mdb_printf("\n"); 4031 } 4032 4033 if (data.arc_cflags & ARC_CFLAG_BUFS) { 4034 mdb_printf("Histogram of all buffers that " 4035 "are associated with an arc hdr.\n"); 4036 dump_histogram(data.all_bufs, data.hist_nbuckets, 0); 4037 mdb_printf("\n"); 4038 } 4039 4040 mdb_printf("Histogram of all compressed buffers.\n" 4041 "Each bucket represents buffers of size: %s.\n", range); 4042 dump_histogram(data.all_c_hist, data.hist_nbuckets, 0); 4043 mdb_printf("\n"); 4044 4045 mdb_printf("Histogram of all uncompressed buffers.\n" 4046 "Each bucket represents buffers of size: %s.\n", range); 4047 dump_histogram(data.all_u_hist, data.hist_nbuckets, 0); 4048 4049 out: 4050 mdb_free(data.anon_c_hist, hist_size); 4051 mdb_free(data.anon_u_hist, hist_size); 4052 mdb_free(data.anon_bufs, hist_size); 4053 4054 mdb_free(data.mru_c_hist, hist_size); 4055 mdb_free(data.mru_u_hist, hist_size); 4056 mdb_free(data.mru_bufs, hist_size); 4057 4058 mdb_free(data.mfu_c_hist, hist_size); 4059 mdb_free(data.mfu_u_hist, hist_size); 4060 mdb_free(data.mfu_bufs, hist_size); 4061 4062 mdb_free(data.all_c_hist, hist_size); 4063 mdb_free(data.all_u_hist, hist_size); 4064 mdb_free(data.all_bufs, hist_size); 4065 4066 return (rc); 4067 } 4068 4069 /* 4070 * MDB module linkage information: 4071 * 4072 * We declare a list of structures describing our dcmds, and a function 4073 * named _mdb_init to return a pointer to our module information. 4074 */ 4075 4076 static const mdb_dcmd_t dcmds[] = { 4077 { "arc", "[-bkmg]", "print ARC variables", arc_print }, 4078 { "blkptr", ":", "print blkptr_t", blkptr }, 4079 { "dva", ":", "print dva_t", dva }, 4080 { "dbuf", ":", "print dmu_buf_impl_t", dbuf }, 4081 { "dbuf_stats", ":", "dbuf stats", dbuf_stats }, 4082 { "dbufs", 4083 "\t[-O objset_t*] [-n objset_name | \"mos\"] " 4084 "[-o object | \"mdn\"] \n" 4085 "\t[-l level] [-b blkid | \"bonus\"]", 4086 "find dmu_buf_impl_t's that match specified criteria", dbufs }, 4087 { "abuf_find", "dva_word[0] dva_word[1]", 4088 "find arc_buf_hdr_t of a specified DVA", 4089 abuf_find }, 4090 { "spa", "?[-cevmMh]\n" 4091 "\t-c display spa config\n" 4092 "\t-e display vdev statistics\n" 4093 "\t-v display vdev information\n" 4094 "\t-m display metaslab statistics\n" 4095 "\t-M display metaslab group statistics\n" 4096 "\t-h display histogram (requires -m or -M)\n", 4097 "spa_t summary", spa_print }, 4098 { "spa_config", ":", "print spa_t configuration", spa_print_config }, 4099 { "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space }, 4100 { "spa_vdevs", ":[-emMh]\n" 4101 "\t-e display vdev statistics\n" 4102 "\t-m dispaly metaslab statistics\n" 4103 "\t-M display metaslab group statistic\n" 4104 "\t-h display histogram (requires -m or -M)\n", 4105 "given a spa_t, print vdev summary", spa_vdevs }, 4106 { "sm_entries", "<buffer length in bytes>", 4107 "print out space map entries from a buffer decoded", 4108 sm_entries}, 4109 { "vdev", ":[-remMh]\n" 4110 "\t-r display recursively\n" 4111 "\t-e display statistics\n" 4112 "\t-m display metaslab statistics (top level vdev only)\n" 4113 "\t-M display metaslab group statistics (top level vdev only)\n" 4114 "\t-h display histogram (requires -m or -M)\n", 4115 "vdev_t summary", vdev_print }, 4116 { "zio", ":[-cpr]\n" 4117 "\t-c display children\n" 4118 "\t-p display parents\n" 4119 "\t-r display recursively", 4120 "zio_t summary", zio_print }, 4121 { "zio_state", "?", "print out all zio_t structures on system or " 4122 "for a particular pool", zio_state }, 4123 { "zfs_blkstats", ":[-v]", 4124 "given a spa_t, print block type stats from last scrub", 4125 zfs_blkstats }, 4126 { "zfs_params", "", "print zfs tunable parameters", zfs_params }, 4127 { "refcount", ":[-r]\n" 4128 "\t-r display recently removed references", 4129 "print refcount_t holders", refcount }, 4130 { "zap_leaf", "", "print zap_leaf_phys_t", zap_leaf }, 4131 { "zfs_aces", ":[-v]", "print all ACEs from a zfs_acl_t", 4132 zfs_acl_dump }, 4133 { "zfs_ace", ":[-v]", "print zfs_ace", zfs_ace_print }, 4134 { "zfs_ace0", ":[-v]", "print zfs_ace0", zfs_ace0_print }, 4135 { "sa_attr_table", ":", "print SA attribute table from sa_os_t", 4136 sa_attr_table}, 4137 { "sa_attr", ": attr_id", 4138 "print SA attribute address when given sa_handle_t", sa_attr_print}, 4139 { "zfs_dbgmsg", ":[-va]", 4140 "print zfs debug log", dbgmsg}, 4141 { "rrwlock", ":", 4142 "print rrwlock_t, including readers", rrwlock}, 4143 { "metaslab_weight", "weight", 4144 "print metaslab weight", metaslab_weight}, 4145 { "metaslab_trace", ":", 4146 "print metaslab allocation trace records", metaslab_trace}, 4147 { "arc_compression_stats", ":[-vabrf]\n" 4148 "\t-v verbose, display a linearly scaled histogram\n" 4149 "\t-a display ARC_anon state statistics individually\n" 4150 "\t-r display ARC_mru state statistics individually\n" 4151 "\t-f display ARC_mfu state statistics individually\n" 4152 "\t-b display histogram of buffer counts\n", 4153 "print a histogram of compressed arc buffer sizes", 4154 arc_compression_stats}, 4155 { NULL } 4156 }; 4157 4158 static const mdb_walker_t walkers[] = { 4159 { "txg_list", "given any txg_list_t *, walk all entries in all txgs", 4160 txg_list_walk_init, txg_list_walk_step, NULL }, 4161 { "txg_list0", "given any txg_list_t *, walk all entries in txg 0", 4162 txg_list0_walk_init, txg_list_walk_step, NULL }, 4163 { "txg_list1", "given any txg_list_t *, walk all entries in txg 1", 4164 txg_list1_walk_init, txg_list_walk_step, NULL }, 4165 { "txg_list2", "given any txg_list_t *, walk all entries in txg 2", 4166 txg_list2_walk_init, txg_list_walk_step, NULL }, 4167 { "txg_list3", "given any txg_list_t *, walk all entries in txg 3", 4168 txg_list3_walk_init, txg_list_walk_step, NULL }, 4169 { "zio", "walk all zio structures, optionally for a particular spa_t", 4170 zio_walk_init, zio_walk_step, NULL }, 4171 { "zio_root", 4172 "walk all root zio_t structures, optionally for a particular spa_t", 4173 zio_walk_init, zio_walk_root_step, NULL }, 4174 { "spa", "walk all spa_t entries in the namespace", 4175 spa_walk_init, spa_walk_step, NULL }, 4176 { "metaslab", "given a spa_t *, walk all metaslab_t structures", 4177 metaslab_walk_init, metaslab_walk_step, NULL }, 4178 { "multilist", "given a multilist_t *, walk all list_t structures", 4179 multilist_walk_init, multilist_walk_step, NULL }, 4180 { "zfs_acl_node", "given a zfs_acl_t, walk all zfs_acl_nodes", 4181 zfs_acl_node_walk_init, zfs_acl_node_walk_step, NULL }, 4182 { "zfs_acl_node_aces", "given a zfs_acl_node_t, walk all ACEs", 4183 zfs_acl_node_aces_walk_init, zfs_aces_walk_step, NULL }, 4184 { "zfs_acl_node_aces0", 4185 "given a zfs_acl_node_t, walk all ACEs as ace_t", 4186 zfs_acl_node_aces0_walk_init, zfs_aces_walk_step, NULL }, 4187 { NULL } 4188 }; 4189 4190 static const mdb_modinfo_t modinfo = { 4191 MDB_API_VERSION, dcmds, walkers 4192 }; 4193 4194 const mdb_modinfo_t * 4195 _mdb_init(void) 4196 { 4197 return (&modinfo); 4198 } 4199