1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 * Copyright (c) 2011, 2018 by Delphix. All rights reserved. 25 * Copyright 2020 Joyent, Inc. 26 * Copyright 2025 Oxide Computer Company 27 */ 28 29 /* Portions Copyright 2010 Robert Milkowski */ 30 31 /* 32 * ZFS_MDB lets dmu.h know that we don't have dmu_ot, and we will define our 33 * own macros to access the target's dmu_ot. Therefore it must be defined 34 * before including any ZFS headers. Note that we don't define 35 * DMU_OT_IS_ENCRYPTED_IMPL() or DMU_OT_BYTESWAP_IMPL(), therefore using them 36 * will result in a compilation error. If they are needed in the future, we 37 * can implement them similarly to mdb_dmu_ot_is_encrypted_impl(). 38 */ 39 #define ZFS_MDB 40 #define DMU_OT_IS_ENCRYPTED_IMPL(ot) mdb_dmu_ot_is_encrypted_impl(ot) 41 42 #include <mdb/mdb_ctf.h> 43 #include <sys/zfs_context.h> 44 #include <sys/mdb_modapi.h> 45 #include <sys/dbuf.h> 46 #include <sys/dmu_objset.h> 47 #include <sys/dsl_dir.h> 48 #include <sys/dsl_pool.h> 49 #include <sys/metaslab_impl.h> 50 #include <sys/space_map.h> 51 #include <sys/list.h> 52 #include <sys/vdev_impl.h> 53 #include <sys/zap_leaf.h> 54 #include <sys/zap_impl.h> 55 #include <ctype.h> 56 #include <sys/zfs_acl.h> 57 #include <sys/sa_impl.h> 58 #include <sys/multilist.h> 59 #include <sys/btree.h> 60 61 #ifdef _KERNEL 62 #define ZFS_OBJ_NAME "zfs" 63 #else 64 #define ZFS_OBJ_NAME "libzpool.so.1" 65 #endif 66 extern int64_t mdb_gethrtime(void); 67 68 #define ZFS_STRUCT "struct " ZFS_OBJ_NAME "`" 69 70 #ifndef _KERNEL 71 int aok; 72 #endif 73 74 enum spa_flags { 75 SPA_FLAG_CONFIG = 1 << 0, 76 SPA_FLAG_VDEVS = 1 << 1, 77 SPA_FLAG_ERRORS = 1 << 2, 78 SPA_FLAG_METASLAB_GROUPS = 1 << 3, 79 SPA_FLAG_METASLABS = 1 << 4, 80 SPA_FLAG_HISTOGRAMS = 1 << 5 81 }; 82 83 /* 84 * If any of these flags are set, call spa_vdevs in spa_print 85 */ 86 #define SPA_FLAG_ALL_VDEV \ 87 (SPA_FLAG_VDEVS | SPA_FLAG_ERRORS | SPA_FLAG_METASLAB_GROUPS | \ 88 SPA_FLAG_METASLABS) 89 90 static int 91 getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp, 92 const char *member, int len, void *buf) 93 { 94 mdb_ctf_id_t id; 95 ulong_t off; 96 char name[64]; 97 98 if (idp == NULL) { 99 if (mdb_ctf_lookup_by_name(type, &id) == -1) { 100 mdb_warn("couldn't find type %s", type); 101 return (DCMD_ERR); 102 } 103 idp = &id; 104 } else { 105 type = name; 106 mdb_ctf_type_name(*idp, name, sizeof (name)); 107 } 108 109 if (mdb_ctf_offsetof(*idp, member, &off) == -1) { 110 mdb_warn("couldn't find member %s of type %s\n", member, type); 111 return (DCMD_ERR); 112 } 113 if (off % 8 != 0) { 114 mdb_warn("member %s of type %s is unsupported bitfield", 115 member, type); 116 return (DCMD_ERR); 117 } 118 off /= 8; 119 120 if (mdb_vread(buf, len, addr + off) == -1) { 121 mdb_warn("failed to read %s from %s at %p", 122 member, type, addr + off); 123 return (DCMD_ERR); 124 } 125 /* mdb_warn("read %s from %s at %p+%llx\n", member, type, addr, off); */ 126 127 return (0); 128 } 129 130 #define GETMEMB(addr, structname, member, dest) \ 131 getmember(addr, ZFS_STRUCT structname, NULL, #member, \ 132 sizeof (dest), &(dest)) 133 134 #define GETMEMBID(addr, ctfid, member, dest) \ 135 getmember(addr, NULL, ctfid, #member, sizeof (dest), &(dest)) 136 137 static boolean_t 138 strisprint(const char *cp) 139 { 140 for (; *cp; cp++) { 141 if (!isprint(*cp)) 142 return (B_FALSE); 143 } 144 return (B_TRUE); 145 } 146 147 /* 148 * <addr>::sm_entries <buffer length in bytes> 149 * 150 * Treat the buffer specified by the given address as a buffer that contains 151 * space map entries. Iterate over the specified number of entries and print 152 * them in both encoded and decoded form. 153 */ 154 /* ARGSUSED */ 155 static int 156 sm_entries(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 157 { 158 uint64_t bufsz = 0; 159 boolean_t preview = B_FALSE; 160 161 if (!(flags & DCMD_ADDRSPEC)) 162 return (DCMD_USAGE); 163 164 if (argc < 1) { 165 preview = B_TRUE; 166 bufsz = 2; 167 } else if (argc != 1) { 168 return (DCMD_USAGE); 169 } else { 170 switch (argv[0].a_type) { 171 case MDB_TYPE_STRING: 172 bufsz = mdb_strtoull(argv[0].a_un.a_str); 173 break; 174 case MDB_TYPE_IMMEDIATE: 175 bufsz = argv[0].a_un.a_val; 176 break; 177 default: 178 return (DCMD_USAGE); 179 } 180 } 181 182 char *actions[] = { "ALLOC", "FREE", "INVALID" }; 183 for (uintptr_t bufend = addr + bufsz; addr < bufend; 184 addr += sizeof (uint64_t)) { 185 uint64_t nwords; 186 uint64_t start_addr = addr; 187 188 uint64_t word = 0; 189 if (mdb_vread(&word, sizeof (word), addr) == -1) { 190 mdb_warn("failed to read space map entry %p", addr); 191 return (DCMD_ERR); 192 } 193 194 if (SM_PREFIX_DECODE(word) == SM_DEBUG_PREFIX) { 195 (void) mdb_printf("\t [%6llu] %s: txg %llu, " 196 "pass %llu\n", 197 (u_longlong_t)(addr), 198 actions[SM_DEBUG_ACTION_DECODE(word)], 199 (u_longlong_t)SM_DEBUG_TXG_DECODE(word), 200 (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word)); 201 continue; 202 } 203 204 char entry_type; 205 uint64_t raw_offset, raw_run, vdev_id = SM_NO_VDEVID; 206 207 if (SM_PREFIX_DECODE(word) != SM2_PREFIX) { 208 entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ? 209 'A' : 'F'; 210 raw_offset = SM_OFFSET_DECODE(word); 211 raw_run = SM_RUN_DECODE(word); 212 nwords = 1; 213 } else { 214 ASSERT3U(SM_PREFIX_DECODE(word), ==, SM2_PREFIX); 215 216 raw_run = SM2_RUN_DECODE(word); 217 vdev_id = SM2_VDEV_DECODE(word); 218 219 /* it is a two-word entry so we read another word */ 220 addr += sizeof (uint64_t); 221 if (addr >= bufend) { 222 mdb_warn("buffer ends in the middle of a two " 223 "word entry\n", addr); 224 return (DCMD_ERR); 225 } 226 227 if (mdb_vread(&word, sizeof (word), addr) == -1) { 228 mdb_warn("failed to read space map entry %p", 229 addr); 230 return (DCMD_ERR); 231 } 232 233 entry_type = (SM2_TYPE_DECODE(word) == SM_ALLOC) ? 234 'A' : 'F'; 235 raw_offset = SM2_OFFSET_DECODE(word); 236 nwords = 2; 237 } 238 239 (void) mdb_printf("\t [%6llx] %c range:" 240 " %010llx-%010llx size: %06llx vdev: %06llu words: %llu\n", 241 (u_longlong_t)start_addr, 242 entry_type, (u_longlong_t)raw_offset, 243 (u_longlong_t)(raw_offset + raw_run), 244 (u_longlong_t)raw_run, 245 (u_longlong_t)vdev_id, (u_longlong_t)nwords); 246 247 if (preview) 248 break; 249 } 250 return (DCMD_OK); 251 } 252 253 static int 254 mdb_dsl_dir_name(uintptr_t addr, char *buf) 255 { 256 static int gotid; 257 static mdb_ctf_id_t dd_id; 258 uintptr_t dd_parent; 259 char dd_myname[ZFS_MAX_DATASET_NAME_LEN]; 260 261 if (!gotid) { 262 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dir", 263 &dd_id) == -1) { 264 mdb_warn("couldn't find struct dsl_dir"); 265 return (DCMD_ERR); 266 } 267 gotid = TRUE; 268 } 269 if (GETMEMBID(addr, &dd_id, dd_parent, dd_parent) || 270 GETMEMBID(addr, &dd_id, dd_myname, dd_myname)) { 271 return (DCMD_ERR); 272 } 273 274 if (dd_parent) { 275 if (mdb_dsl_dir_name(dd_parent, buf)) 276 return (DCMD_ERR); 277 strcat(buf, "/"); 278 } 279 280 if (dd_myname[0]) 281 strcat(buf, dd_myname); 282 else 283 strcat(buf, "???"); 284 285 return (0); 286 } 287 288 static int 289 objset_name(uintptr_t addr, char *buf) 290 { 291 static int gotid; 292 static mdb_ctf_id_t os_id, ds_id; 293 uintptr_t os_dsl_dataset; 294 char ds_snapname[ZFS_MAX_DATASET_NAME_LEN]; 295 uintptr_t ds_dir; 296 297 buf[0] = '\0'; 298 299 if (!gotid) { 300 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "objset", 301 &os_id) == -1) { 302 mdb_warn("couldn't find struct objset"); 303 return (DCMD_ERR); 304 } 305 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dataset", 306 &ds_id) == -1) { 307 mdb_warn("couldn't find struct dsl_dataset"); 308 return (DCMD_ERR); 309 } 310 311 gotid = TRUE; 312 } 313 314 if (GETMEMBID(addr, &os_id, os_dsl_dataset, os_dsl_dataset)) 315 return (DCMD_ERR); 316 317 if (os_dsl_dataset == 0) { 318 strcat(buf, "mos"); 319 return (0); 320 } 321 322 if (GETMEMBID(os_dsl_dataset, &ds_id, ds_snapname, ds_snapname) || 323 GETMEMBID(os_dsl_dataset, &ds_id, ds_dir, ds_dir)) { 324 return (DCMD_ERR); 325 } 326 327 if (ds_dir && mdb_dsl_dir_name(ds_dir, buf)) 328 return (DCMD_ERR); 329 330 if (ds_snapname[0]) { 331 strcat(buf, "@"); 332 strcat(buf, ds_snapname); 333 } 334 return (0); 335 } 336 337 static int 338 enum_lookup(char *type, int val, const char *prefix, size_t size, char *out) 339 { 340 const char *cp; 341 size_t len = strlen(prefix); 342 mdb_ctf_id_t enum_type; 343 344 if (mdb_ctf_lookup_by_name(type, &enum_type) != 0) { 345 mdb_warn("Could not find enum for %s", type); 346 return (-1); 347 } 348 349 if ((cp = mdb_ctf_enum_name(enum_type, val)) != NULL) { 350 if (strncmp(cp, prefix, len) == 0) 351 cp += len; 352 (void) strncpy(out, cp, size); 353 } else { 354 mdb_snprintf(out, size, "? (%d)", val); 355 } 356 return (0); 357 } 358 359 /* ARGSUSED */ 360 static int 361 zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 362 { 363 /* 364 * This table can be approximately generated by running: 365 * egrep "^[a-z0-9_]+ [a-z0-9_]+( =.*)?;" *.c | cut -d ' ' -f 2 366 */ 367 static const char *params[] = { 368 "arc_lotsfree_percent", 369 "arc_pages_pp_reserve", 370 "arc_reduce_dnlc_percent", 371 "arc_swapfs_reserve", 372 "arc_zio_arena_free_shift", 373 "dbuf_cache_hiwater_pct", 374 "dbuf_cache_lowater_pct", 375 "dbuf_cache_max_bytes", 376 "dbuf_cache_max_shift", 377 "ddt_zap_indirect_blockshift", 378 "ddt_zap_leaf_blockshift", 379 "ditto_same_vdev_distance_shift", 380 "dmu_find_threads", 381 "dmu_rescan_dnode_threshold", 382 "dsl_scan_delay_completion", 383 "fzap_default_block_shift", 384 "l2arc_feed_again", 385 "l2arc_feed_min_ms", 386 "l2arc_feed_secs", 387 "l2arc_headroom", 388 "l2arc_headroom_boost", 389 "l2arc_noprefetch", 390 "l2arc_norw", 391 "l2arc_write_boost", 392 "l2arc_write_max", 393 "metaslab_aliquot", 394 "metaslab_bias_enabled", 395 "metaslab_debug_load", 396 "metaslab_debug_unload", 397 "metaslab_df_alloc_threshold", 398 "metaslab_df_free_pct", 399 "metaslab_fragmentation_factor_enabled", 400 "metaslab_force_ganging", 401 "metaslab_lba_weighting_enabled", 402 "metaslab_load_pct", 403 "metaslab_min_alloc_size", 404 "metaslab_ndf_clump_shift", 405 "metaslab_preload_enabled", 406 "metaslab_preload_limit", 407 "metaslab_trace_enabled", 408 "metaslab_trace_max_entries", 409 "metaslab_unload_delay", 410 "metaslabs_per_vdev", 411 "reference_history", 412 "reference_tracking_enable", 413 "send_holes_without_birth_time", 414 "spa_asize_inflation", 415 "spa_load_verify_data", 416 "spa_load_verify_maxinflight", 417 "spa_load_verify_metadata", 418 "spa_max_replication_override", 419 "spa_min_slop", 420 "spa_mode_global", 421 "spa_slop_shift", 422 "space_map_blksz", 423 "vdev_mirror_shift", 424 "zfetch_max_distance", 425 "zfs_abd_chunk_size", 426 "zfs_abd_scatter_enabled", 427 "zfs_arc_average_blocksize", 428 "zfs_arc_evict_batch_limit", 429 "zfs_arc_grow_retry", 430 "zfs_arc_max", 431 "zfs_arc_meta_limit", 432 "zfs_arc_meta_min", 433 "zfs_arc_min", 434 "zfs_arc_p_min_shift", 435 "zfs_arc_shrink_shift", 436 "zfs_async_block_max_blocks", 437 "zfs_ccw_retry_interval", 438 "zfs_commit_timeout_pct", 439 "zfs_compressed_arc_enabled", 440 "zfs_condense_indirect_commit_entry_delay_ticks", 441 "zfs_condense_indirect_vdevs_enable", 442 "zfs_condense_max_obsolete_bytes", 443 "zfs_condense_min_mapping_bytes", 444 "zfs_condense_pct", 445 "zfs_dbgmsg_maxsize", 446 "zfs_deadman_checktime_ms", 447 "zfs_deadman_enabled", 448 "zfs_deadman_synctime_ms", 449 "zfs_dedup_prefetch", 450 "zfs_default_bs", 451 "zfs_default_ibs", 452 "zfs_delay_max_ns", 453 "zfs_delay_min_dirty_percent", 454 "zfs_delay_resolution_ns", 455 "zfs_delay_scale", 456 "zfs_dirty_data_max", 457 "zfs_dirty_data_max_max", 458 "zfs_dirty_data_max_percent", 459 "zfs_dirty_data_sync", 460 "zfs_flags", 461 "zfs_free_bpobj_enabled", 462 "zfs_free_leak_on_eio", 463 "zfs_free_min_time_ms", 464 "zfs_immediate_write_sz", 465 "zfs_indirect_condense_obsolete_pct", 466 "zfs_lua_check_instrlimit_interval", 467 "zfs_lua_max_instrlimit", 468 "zfs_lua_max_memlimit", 469 "zfs_max_recordsize", 470 "zfs_mdcomp_disable", 471 "zfs_metaslab_condense_block_threshold", 472 "zfs_metaslab_fragmentation_threshold", 473 "zfs_metaslab_segment_weight_enabled", 474 "zfs_metaslab_switch_threshold", 475 "zfs_mg_fragmentation_threshold", 476 "zfs_mg_noalloc_threshold", 477 "zfs_multilist_num_sublists", 478 "zfs_no_scrub_io", 479 "zfs_no_scrub_prefetch", 480 "zfs_nocacheflush", 481 "zfs_nopwrite_enabled", 482 "zfs_object_remap_one_indirect_delay_ticks", 483 "zfs_obsolete_min_time_ms", 484 "zfs_pd_bytes_max", 485 "zfs_per_txg_dirty_frees_percent", 486 "zfs_prefetch_disable", 487 "zfs_read_chunk_size", 488 "zfs_recover", 489 "zfs_recv_queue_length", 490 "zfs_redundant_metadata_most_ditto_level", 491 "zfs_remap_blkptr_enable", 492 "zfs_remove_max_copy_bytes", 493 "zfs_remove_max_segment", 494 "zfs_resilver_min_time_ms", 495 "zfs_scan_min_time_ms", 496 "zfs_scrub_limit", 497 "zfs_send_corrupt_data", 498 "zfs_send_queue_length", 499 "zfs_send_set_freerecords_bit", 500 "zfs_sync_pass_deferred_free", 501 "zfs_sync_pass_dont_compress", 502 "zfs_sync_pass_rewrite", 503 "zfs_sync_taskq_batch_pct", 504 "zfs_top_maxinflight", 505 "zfs_txg_timeout", 506 "zfs_vdev_aggregation_limit", 507 "zfs_vdev_async_read_max_active", 508 "zfs_vdev_async_read_min_active", 509 "zfs_vdev_async_write_active_max_dirty_percent", 510 "zfs_vdev_async_write_active_min_dirty_percent", 511 "zfs_vdev_async_write_max_active", 512 "zfs_vdev_async_write_min_active", 513 "zfs_vdev_cache_bshift", 514 "zfs_vdev_cache_max", 515 "zfs_vdev_cache_size", 516 "zfs_vdev_max_active", 517 "zfs_vdev_queue_depth_pct", 518 "zfs_vdev_read_gap_limit", 519 "zfs_vdev_removal_max_active", 520 "zfs_vdev_removal_min_active", 521 "zfs_vdev_scrub_max_active", 522 "zfs_vdev_scrub_min_active", 523 "zfs_vdev_sync_read_max_active", 524 "zfs_vdev_sync_read_min_active", 525 "zfs_vdev_sync_write_max_active", 526 "zfs_vdev_sync_write_min_active", 527 "zfs_vdev_write_gap_limit", 528 "zfs_write_implies_delete_child", 529 "zfs_zil_clean_taskq_maxalloc", 530 "zfs_zil_clean_taskq_minalloc", 531 "zfs_zil_clean_taskq_nthr_pct", 532 "zil_replay_disable", 533 "zil_slog_bulk", 534 "zio_buf_debug_limit", 535 "zio_dva_throttle_enabled", 536 "zio_injection_enabled", 537 "zvol_immediate_write_sz", 538 "zvol_maxphys", 539 "zvol_unmap_enabled", 540 "zvol_unmap_sync_enabled", 541 "zfs_max_dataset_nesting", 542 }; 543 544 for (int i = 0; i < sizeof (params) / sizeof (params[0]); i++) { 545 int sz; 546 uint64_t val64; 547 uint32_t *val32p = (uint32_t *)&val64; 548 549 sz = mdb_readvar(&val64, params[i]); 550 if (sz == 4) { 551 mdb_printf("%s = 0x%x\n", params[i], *val32p); 552 } else if (sz == 8) { 553 mdb_printf("%s = 0x%llx\n", params[i], val64); 554 } else { 555 mdb_warn("variable %s not found", params[i]); 556 } 557 } 558 559 return (DCMD_OK); 560 } 561 562 /* ARGSUSED */ 563 static int 564 dva(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 565 { 566 dva_t dva; 567 if (mdb_vread(&dva, sizeof (dva_t), addr) == -1) { 568 mdb_warn("failed to read dva_t"); 569 return (DCMD_ERR); 570 } 571 mdb_printf("<%llu:%llx:%llx>\n", 572 (u_longlong_t)DVA_GET_VDEV(&dva), 573 (u_longlong_t)DVA_GET_OFFSET(&dva), 574 (u_longlong_t)DVA_GET_ASIZE(&dva)); 575 576 return (DCMD_OK); 577 } 578 579 typedef struct mdb_dmu_object_type_info { 580 boolean_t ot_encrypt; 581 } mdb_dmu_object_type_info_t; 582 583 static boolean_t 584 mdb_dmu_ot_is_encrypted_impl(dmu_object_type_t ot) 585 { 586 mdb_dmu_object_type_info_t mdoti; 587 GElf_Sym sym; 588 size_t sz = mdb_ctf_sizeof_by_name("dmu_object_type_info_t"); 589 590 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "dmu_ot", &sym)) { 591 mdb_warn("failed to find " ZFS_OBJ_NAME "`dmu_ot"); 592 return (B_FALSE); 593 } 594 595 if (mdb_ctf_vread(&mdoti, "dmu_object_type_info_t", 596 "mdb_dmu_object_type_info_t", sym.st_value + sz * ot, 0) != 0) { 597 return (B_FALSE); 598 } 599 600 return (mdoti.ot_encrypt); 601 } 602 603 /* ARGSUSED */ 604 static int 605 blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 606 { 607 char type[80], checksum[80], compress[80]; 608 blkptr_t blk, *bp = &blk; 609 char buf[BP_SPRINTF_LEN]; 610 611 if (mdb_vread(&blk, sizeof (blkptr_t), addr) == -1) { 612 mdb_warn("failed to read blkptr_t"); 613 return (DCMD_ERR); 614 } 615 616 if (enum_lookup("enum dmu_object_type", BP_GET_TYPE(bp), "DMU_OT_", 617 sizeof (type), type) == -1 || 618 enum_lookup("enum zio_checksum", BP_GET_CHECKSUM(bp), 619 "ZIO_CHECKSUM_", sizeof (checksum), checksum) == -1 || 620 enum_lookup("enum zio_compress", BP_GET_COMPRESS(bp), 621 "ZIO_COMPRESS_", sizeof (compress), compress) == -1) { 622 mdb_warn("Could not find blkptr enumerated types"); 623 return (DCMD_ERR); 624 } 625 626 SNPRINTF_BLKPTR(mdb_snprintf, '\n', buf, sizeof (buf), bp, type, 627 checksum, compress); 628 629 mdb_printf("%s\n", buf); 630 631 return (DCMD_OK); 632 } 633 634 typedef struct mdb_dmu_buf_impl { 635 struct { 636 uint64_t db_object; 637 uintptr_t db_data; 638 } db; 639 uintptr_t db_objset; 640 uint64_t db_level; 641 uint64_t db_blkid; 642 struct { 643 uint64_t rc_count; 644 } db_holds; 645 } mdb_dmu_buf_impl_t; 646 647 /* ARGSUSED */ 648 static int 649 dbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 650 { 651 mdb_dmu_buf_impl_t db; 652 char objectname[32]; 653 char blkidname[32]; 654 char path[ZFS_MAX_DATASET_NAME_LEN]; 655 int ptr_width = (int)(sizeof (void *)) * 2; 656 657 if (DCMD_HDRSPEC(flags)) 658 mdb_printf("%*s %8s %3s %9s %5s %s\n", 659 ptr_width, "addr", "object", "lvl", "blkid", "holds", "os"); 660 661 if (mdb_ctf_vread(&db, ZFS_STRUCT "dmu_buf_impl", "mdb_dmu_buf_impl_t", 662 addr, 0) == -1) 663 return (DCMD_ERR); 664 665 if (db.db.db_object == DMU_META_DNODE_OBJECT) 666 (void) strcpy(objectname, "mdn"); 667 else 668 (void) mdb_snprintf(objectname, sizeof (objectname), "%llx", 669 (u_longlong_t)db.db.db_object); 670 671 if (db.db_blkid == DMU_BONUS_BLKID) 672 (void) strcpy(blkidname, "bonus"); 673 else 674 (void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx", 675 (u_longlong_t)db.db_blkid); 676 677 if (objset_name(db.db_objset, path)) { 678 return (DCMD_ERR); 679 } 680 681 mdb_printf("%*p %8s %3u %9s %5llu %s\n", ptr_width, addr, 682 objectname, (int)db.db_level, blkidname, 683 db.db_holds.rc_count, path); 684 685 return (DCMD_OK); 686 } 687 688 /* ARGSUSED */ 689 static int 690 dbuf_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 691 { 692 #define HISTOSZ 32 693 uintptr_t dbp; 694 dmu_buf_impl_t db; 695 dbuf_hash_table_t ht; 696 uint64_t bucket, ndbufs; 697 uint64_t histo[HISTOSZ]; 698 uint64_t histo2[HISTOSZ]; 699 int i, maxidx; 700 701 if (mdb_readvar(&ht, "dbuf_hash_table") == -1) { 702 mdb_warn("failed to read 'dbuf_hash_table'"); 703 return (DCMD_ERR); 704 } 705 706 for (i = 0; i < HISTOSZ; i++) { 707 histo[i] = 0; 708 histo2[i] = 0; 709 } 710 711 ndbufs = 0; 712 for (bucket = 0; bucket < ht.hash_table_mask+1; bucket++) { 713 int len; 714 715 if (mdb_vread(&dbp, sizeof (void *), 716 (uintptr_t)(ht.hash_table+bucket)) == -1) { 717 mdb_warn("failed to read hash bucket %u at %p", 718 bucket, ht.hash_table+bucket); 719 return (DCMD_ERR); 720 } 721 722 len = 0; 723 while (dbp != 0) { 724 if (mdb_vread(&db, sizeof (dmu_buf_impl_t), 725 dbp) == -1) { 726 mdb_warn("failed to read dbuf at %p", dbp); 727 return (DCMD_ERR); 728 } 729 dbp = (uintptr_t)db.db_hash_next; 730 for (i = MIN(len, HISTOSZ - 1); i >= 0; i--) 731 histo2[i]++; 732 len++; 733 ndbufs++; 734 } 735 736 if (len >= HISTOSZ) 737 len = HISTOSZ-1; 738 histo[len]++; 739 } 740 741 mdb_printf("hash table has %llu buckets, %llu dbufs " 742 "(avg %llu buckets/dbuf)\n", 743 ht.hash_table_mask+1, ndbufs, 744 (ht.hash_table_mask+1)/ndbufs); 745 746 mdb_printf("\n"); 747 maxidx = 0; 748 for (i = 0; i < HISTOSZ; i++) 749 if (histo[i] > 0) 750 maxidx = i; 751 mdb_printf("hash chain length number of buckets\n"); 752 for (i = 0; i <= maxidx; i++) 753 mdb_printf("%u %llu\n", i, histo[i]); 754 755 mdb_printf("\n"); 756 maxidx = 0; 757 for (i = 0; i < HISTOSZ; i++) 758 if (histo2[i] > 0) 759 maxidx = i; 760 mdb_printf("hash chain depth number of dbufs\n"); 761 for (i = 0; i <= maxidx; i++) 762 mdb_printf("%u or more %llu %llu%%\n", 763 i, histo2[i], histo2[i]*100/ndbufs); 764 765 766 return (DCMD_OK); 767 } 768 769 #define CHAIN_END 0xffff 770 /* 771 * ::zap_leaf [-v] 772 * 773 * Print a zap_leaf_phys_t, assumed to be 16k 774 */ 775 /* ARGSUSED */ 776 static int 777 zap_leaf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 778 { 779 char buf[16*1024]; 780 int verbose = B_FALSE; 781 int four = B_FALSE; 782 dmu_buf_t l_dbuf; 783 zap_leaf_t l; 784 zap_leaf_phys_t *zlp = (void *)buf; 785 int i; 786 787 if (mdb_getopts(argc, argv, 788 'v', MDB_OPT_SETBITS, TRUE, &verbose, 789 '4', MDB_OPT_SETBITS, TRUE, &four, 790 NULL) != argc) 791 return (DCMD_USAGE); 792 793 l_dbuf.db_data = zlp; 794 l.l_dbuf = &l_dbuf; 795 l.l_bs = 14; /* assume 16k blocks */ 796 if (four) 797 l.l_bs = 12; 798 799 if (!(flags & DCMD_ADDRSPEC)) { 800 return (DCMD_USAGE); 801 } 802 803 if (mdb_vread(buf, sizeof (buf), addr) == -1) { 804 mdb_warn("failed to read zap_leaf_phys_t at %p", addr); 805 return (DCMD_ERR); 806 } 807 808 if (zlp->l_hdr.lh_block_type != ZBT_LEAF || 809 zlp->l_hdr.lh_magic != ZAP_LEAF_MAGIC) { 810 mdb_warn("This does not appear to be a zap_leaf_phys_t"); 811 return (DCMD_ERR); 812 } 813 814 mdb_printf("zap_leaf_phys_t at %p:\n", addr); 815 mdb_printf(" lh_prefix_len = %u\n", zlp->l_hdr.lh_prefix_len); 816 mdb_printf(" lh_prefix = %llx\n", zlp->l_hdr.lh_prefix); 817 mdb_printf(" lh_nentries = %u\n", zlp->l_hdr.lh_nentries); 818 mdb_printf(" lh_nfree = %u\n", zlp->l_hdr.lh_nfree, 819 zlp->l_hdr.lh_nfree * 100 / (ZAP_LEAF_NUMCHUNKS(&l))); 820 mdb_printf(" lh_freelist = %u\n", zlp->l_hdr.lh_freelist); 821 mdb_printf(" lh_flags = %x (%s)\n", zlp->l_hdr.lh_flags, 822 zlp->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED ? 823 "ENTRIES_CDSORTED" : ""); 824 825 if (verbose) { 826 mdb_printf(" hash table:\n"); 827 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) { 828 if (zlp->l_hash[i] != CHAIN_END) 829 mdb_printf(" %u: %u\n", i, zlp->l_hash[i]); 830 } 831 } 832 833 mdb_printf(" chunks:\n"); 834 for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) { 835 /* LINTED: alignment */ 836 zap_leaf_chunk_t *zlc = &ZAP_LEAF_CHUNK(&l, i); 837 switch (zlc->l_entry.le_type) { 838 case ZAP_CHUNK_FREE: 839 if (verbose) { 840 mdb_printf(" %u: free; lf_next = %u\n", 841 i, zlc->l_free.lf_next); 842 } 843 break; 844 case ZAP_CHUNK_ENTRY: 845 mdb_printf(" %u: entry\n", i); 846 if (verbose) { 847 mdb_printf(" le_next = %u\n", 848 zlc->l_entry.le_next); 849 } 850 mdb_printf(" le_name_chunk = %u\n", 851 zlc->l_entry.le_name_chunk); 852 mdb_printf(" le_name_numints = %u\n", 853 zlc->l_entry.le_name_numints); 854 mdb_printf(" le_value_chunk = %u\n", 855 zlc->l_entry.le_value_chunk); 856 mdb_printf(" le_value_intlen = %u\n", 857 zlc->l_entry.le_value_intlen); 858 mdb_printf(" le_value_numints = %u\n", 859 zlc->l_entry.le_value_numints); 860 mdb_printf(" le_cd = %u\n", 861 zlc->l_entry.le_cd); 862 mdb_printf(" le_hash = %llx\n", 863 zlc->l_entry.le_hash); 864 break; 865 case ZAP_CHUNK_ARRAY: 866 mdb_printf(" %u: array", i); 867 if (strisprint((char *)zlc->l_array.la_array)) 868 mdb_printf(" \"%s\"", zlc->l_array.la_array); 869 mdb_printf("\n"); 870 if (verbose) { 871 int j; 872 mdb_printf(" "); 873 for (j = 0; j < ZAP_LEAF_ARRAY_BYTES; j++) { 874 mdb_printf("%02x ", 875 zlc->l_array.la_array[j]); 876 } 877 mdb_printf("\n"); 878 } 879 if (zlc->l_array.la_next != CHAIN_END) { 880 mdb_printf(" lf_next = %u\n", 881 zlc->l_array.la_next); 882 } 883 break; 884 default: 885 mdb_printf(" %u: undefined type %u\n", 886 zlc->l_entry.le_type); 887 } 888 } 889 890 return (DCMD_OK); 891 } 892 893 typedef struct dbufs_data { 894 mdb_ctf_id_t id; 895 uint64_t objset; 896 uint64_t object; 897 uint64_t level; 898 uint64_t blkid; 899 char *osname; 900 } dbufs_data_t; 901 902 #define DBUFS_UNSET (0xbaddcafedeadbeefULL) 903 904 /* ARGSUSED */ 905 static int 906 dbufs_cb(uintptr_t addr, const void *unknown, void *arg) 907 { 908 dbufs_data_t *data = arg; 909 uintptr_t objset; 910 dmu_buf_t db; 911 uint8_t level; 912 uint64_t blkid; 913 char osname[ZFS_MAX_DATASET_NAME_LEN]; 914 915 if (GETMEMBID(addr, &data->id, db_objset, objset) || 916 GETMEMBID(addr, &data->id, db, db) || 917 GETMEMBID(addr, &data->id, db_level, level) || 918 GETMEMBID(addr, &data->id, db_blkid, blkid)) { 919 return (WALK_ERR); 920 } 921 922 if ((data->objset == DBUFS_UNSET || data->objset == objset) && 923 (data->osname == NULL || (objset_name(objset, osname) == 0 && 924 strcmp(data->osname, osname) == 0)) && 925 (data->object == DBUFS_UNSET || data->object == db.db_object) && 926 (data->level == DBUFS_UNSET || data->level == level) && 927 (data->blkid == DBUFS_UNSET || data->blkid == blkid)) { 928 mdb_printf("%#lr\n", addr); 929 } 930 return (WALK_NEXT); 931 } 932 933 /* ARGSUSED */ 934 static int 935 dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 936 { 937 dbufs_data_t data; 938 char *object = NULL; 939 char *blkid = NULL; 940 941 data.objset = data.object = data.level = data.blkid = DBUFS_UNSET; 942 data.osname = NULL; 943 944 if (mdb_getopts(argc, argv, 945 'O', MDB_OPT_UINT64, &data.objset, 946 'n', MDB_OPT_STR, &data.osname, 947 'o', MDB_OPT_STR, &object, 948 'l', MDB_OPT_UINT64, &data.level, 949 'b', MDB_OPT_STR, &blkid, 950 NULL) != argc) { 951 return (DCMD_USAGE); 952 } 953 954 if (object) { 955 if (strcmp(object, "mdn") == 0) { 956 data.object = DMU_META_DNODE_OBJECT; 957 } else { 958 data.object = mdb_strtoull(object); 959 } 960 } 961 962 if (blkid) { 963 if (strcmp(blkid, "bonus") == 0) { 964 data.blkid = DMU_BONUS_BLKID; 965 } else { 966 data.blkid = mdb_strtoull(blkid); 967 } 968 } 969 970 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dmu_buf_impl", &data.id) == -1) { 971 mdb_warn("couldn't find struct dmu_buf_impl_t"); 972 return (DCMD_ERR); 973 } 974 975 if (mdb_walk("dmu_buf_impl_t", dbufs_cb, &data) != 0) { 976 mdb_warn("can't walk dbufs"); 977 return (DCMD_ERR); 978 } 979 980 return (DCMD_OK); 981 } 982 983 typedef struct abuf_find_data { 984 dva_t dva; 985 mdb_ctf_id_t id; 986 } abuf_find_data_t; 987 988 /* ARGSUSED */ 989 static int 990 abuf_find_cb(uintptr_t addr, const void *unknown, void *arg) 991 { 992 abuf_find_data_t *data = arg; 993 dva_t dva; 994 995 if (GETMEMBID(addr, &data->id, b_dva, dva)) { 996 return (WALK_ERR); 997 } 998 999 if (dva.dva_word[0] == data->dva.dva_word[0] && 1000 dva.dva_word[1] == data->dva.dva_word[1]) { 1001 mdb_printf("%#lr\n", addr); 1002 } 1003 return (WALK_NEXT); 1004 } 1005 1006 typedef struct mdb_arc_state { 1007 uintptr_t arcs_list[ARC_BUFC_NUMTYPES]; 1008 } mdb_arc_state_t; 1009 1010 /* ARGSUSED */ 1011 static int 1012 abuf_find(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1013 { 1014 abuf_find_data_t data; 1015 GElf_Sym sym; 1016 int i, j; 1017 const char *syms[] = { 1018 "ARC_mru", 1019 "ARC_mru_ghost", 1020 "ARC_mfu", 1021 "ARC_mfu_ghost", 1022 }; 1023 1024 if (argc != 2) 1025 return (DCMD_USAGE); 1026 1027 for (i = 0; i < 2; i ++) { 1028 switch (argv[i].a_type) { 1029 case MDB_TYPE_STRING: 1030 data.dva.dva_word[i] = mdb_strtoull(argv[i].a_un.a_str); 1031 break; 1032 case MDB_TYPE_IMMEDIATE: 1033 data.dva.dva_word[i] = argv[i].a_un.a_val; 1034 break; 1035 default: 1036 return (DCMD_USAGE); 1037 } 1038 } 1039 1040 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "arc_buf_hdr", &data.id) == -1) { 1041 mdb_warn("couldn't find struct arc_buf_hdr"); 1042 return (DCMD_ERR); 1043 } 1044 1045 for (i = 0; i < sizeof (syms) / sizeof (syms[0]); i++) { 1046 mdb_arc_state_t mas; 1047 1048 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, syms[i], &sym)) { 1049 mdb_warn("can't find symbol %s", syms[i]); 1050 return (DCMD_ERR); 1051 } 1052 1053 if (mdb_ctf_vread(&mas, "arc_state_t", "mdb_arc_state_t", 1054 sym.st_value, 0) != 0) { 1055 mdb_warn("can't read arcs_list of %s", syms[i]); 1056 return (DCMD_ERR); 1057 } 1058 1059 for (j = 0; j < ARC_BUFC_NUMTYPES; j++) { 1060 uintptr_t addr = mas.arcs_list[j]; 1061 1062 if (addr == 0) 1063 continue; 1064 1065 if (mdb_pwalk("multilist", abuf_find_cb, &data, 1066 addr) != 0) { 1067 mdb_warn("can't walk %s", syms[i]); 1068 return (DCMD_ERR); 1069 } 1070 } 1071 } 1072 1073 return (DCMD_OK); 1074 } 1075 1076 typedef struct dbgmsg_arg { 1077 boolean_t da_address; 1078 boolean_t da_hrtime; 1079 boolean_t da_timedelta; 1080 boolean_t da_time; 1081 boolean_t da_whatis; 1082 1083 hrtime_t da_curtime; 1084 } dbgmsg_arg_t; 1085 1086 static int 1087 dbgmsg_cb(uintptr_t addr, const void *unknown __unused, void *arg) 1088 { 1089 static mdb_ctf_id_t id; 1090 static boolean_t gotid; 1091 static ulong_t off; 1092 1093 dbgmsg_arg_t *da = arg; 1094 time_t timestamp; 1095 hrtime_t hrtime; 1096 char buf[1024]; 1097 1098 if (!gotid) { 1099 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "zfs_dbgmsg", &id) == 1100 -1) { 1101 mdb_warn("couldn't find struct zfs_dbgmsg"); 1102 return (WALK_ERR); 1103 } 1104 gotid = TRUE; 1105 if (mdb_ctf_offsetof(id, "zdm_msg", &off) == -1) { 1106 mdb_warn("couldn't find zdm_msg"); 1107 return (WALK_ERR); 1108 } 1109 off /= 8; 1110 } 1111 1112 if (GETMEMBID(addr, &id, zdm_timestamp, timestamp)) { 1113 return (WALK_ERR); 1114 } 1115 1116 if (da->da_hrtime || da->da_timedelta) { 1117 if (GETMEMBID(addr, &id, zdm_hrtime, hrtime)) { 1118 return (WALK_ERR); 1119 } 1120 } 1121 1122 if (mdb_readstr(buf, sizeof (buf), addr + off) == -1) { 1123 mdb_warn("failed to read zdm_msg at %p\n", addr + off); 1124 return (DCMD_ERR); 1125 } 1126 1127 if (da->da_address) 1128 mdb_printf("%p ", addr); 1129 1130 if (da->da_timedelta) { 1131 int64_t diff; 1132 char dbuf[32] = { 0 }; 1133 1134 if (da->da_curtime == 0) 1135 da->da_curtime = mdb_gethrtime(); 1136 1137 diff = (int64_t)hrtime - da->da_curtime; 1138 mdb_nicetime(diff, dbuf, sizeof (dbuf)); 1139 mdb_printf("%-20s ", dbuf); 1140 } else if (da->da_hrtime) { 1141 mdb_printf("%016x ", hrtime); 1142 } else if (da->da_time) { 1143 mdb_printf("%Y ", timestamp); 1144 } 1145 1146 mdb_printf("%s\n", buf); 1147 1148 if (da->da_whatis) 1149 (void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL); 1150 1151 return (WALK_NEXT); 1152 } 1153 1154 static int 1155 dbgmsg(uintptr_t addr, uint_t flags __unused, int argc, const mdb_arg_t *argv) 1156 { 1157 GElf_Sym sym; 1158 dbgmsg_arg_t da = { 0 }; 1159 boolean_t verbose = B_FALSE; 1160 1161 if (mdb_getopts(argc, argv, 1162 'a', MDB_OPT_SETBITS, B_TRUE, &da.da_address, 1163 'r', MDB_OPT_SETBITS, B_TRUE, &da.da_hrtime, 1164 't', MDB_OPT_SETBITS, B_TRUE, &da.da_timedelta, 1165 'T', MDB_OPT_SETBITS, B_TRUE, &da.da_time, 1166 'v', MDB_OPT_SETBITS, B_TRUE, &verbose, 1167 'w', MDB_OPT_SETBITS, B_TRUE, &da.da_whatis, 1168 NULL) != argc) { 1169 return (DCMD_USAGE); 1170 } 1171 1172 if (verbose) 1173 da.da_address = da.da_time = B_TRUE; 1174 1175 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "zfs_dbgmsgs", &sym)) { 1176 mdb_warn("can't find zfs_dbgmsgs"); 1177 return (DCMD_ERR); 1178 } 1179 1180 if (mdb_pwalk("list", dbgmsg_cb, &da, sym.st_value) != 0) { 1181 mdb_warn("can't walk zfs_dbgmsgs"); 1182 return (DCMD_ERR); 1183 } 1184 1185 return (DCMD_OK); 1186 } 1187 1188 1189 static void 1190 dbgmsg_help(void) 1191 { 1192 mdb_printf("Print entries from the ZFS debug log.\n\n" 1193 "%<b>OPTIONS%</b>\n" 1194 "\t-a\tInclude the address of each zfs_dbgmsg_t.\n" 1195 "\t-r\tDisplay high-resolution timestamps.\n" 1196 "\t-t\tInclude the age of the message.\n" 1197 "\t-T\tInclude the date/time of the message.\n" 1198 "\t-v\tEquivalent to -aT.\n" 1199 "\t-w\tRun ::whatis on each zfs_dbgmsg_t. Useful in DEBUG kernels\n" 1200 "\t\tto show the origin of the message.\n"); 1201 } 1202 1203 /*ARGSUSED*/ 1204 static int 1205 arc_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1206 { 1207 kstat_named_t *stats; 1208 GElf_Sym sym; 1209 int nstats, i; 1210 uint_t opt_a = FALSE; 1211 uint_t opt_b = FALSE; 1212 uint_t shift = 0; 1213 const char *suffix; 1214 1215 static const char *bytestats[] = { 1216 "p", "c", "c_min", "c_max", "size", "duplicate_buffers_size", 1217 "arc_meta_used", "arc_meta_limit", "arc_meta_max", 1218 "arc_meta_min", "hdr_size", "data_size", "metadata_size", 1219 "other_size", "anon_size", "anon_evictable_data", 1220 "anon_evictable_metadata", "mru_size", "mru_evictable_data", 1221 "mru_evictable_metadata", "mru_ghost_size", 1222 "mru_ghost_evictable_data", "mru_ghost_evictable_metadata", 1223 "mfu_size", "mfu_evictable_data", "mfu_evictable_metadata", 1224 "mfu_ghost_size", "mfu_ghost_evictable_data", 1225 "mfu_ghost_evictable_metadata", "evict_l2_cached", 1226 "evict_l2_eligible", "evict_l2_ineligible", "l2_read_bytes", 1227 "l2_write_bytes", "l2_size", "l2_asize", "l2_hdr_size", 1228 "compressed_size", "uncompressed_size", "overhead_size", 1229 NULL 1230 }; 1231 1232 static const char *extras[] = { 1233 "arc_no_grow", "arc_tempreserve", 1234 NULL 1235 }; 1236 1237 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "arc_stats", &sym) == -1) { 1238 mdb_warn("failed to find 'arc_stats'"); 1239 return (DCMD_ERR); 1240 } 1241 1242 stats = mdb_zalloc(sym.st_size, UM_SLEEP | UM_GC); 1243 1244 if (mdb_vread(stats, sym.st_size, sym.st_value) == -1) { 1245 mdb_warn("couldn't read 'arc_stats' at %p", sym.st_value); 1246 return (DCMD_ERR); 1247 } 1248 1249 nstats = sym.st_size / sizeof (kstat_named_t); 1250 1251 /* NB: -a / opt_a are ignored for backwards compatability */ 1252 if (mdb_getopts(argc, argv, 1253 'a', MDB_OPT_SETBITS, TRUE, &opt_a, 1254 'b', MDB_OPT_SETBITS, TRUE, &opt_b, 1255 'k', MDB_OPT_SETBITS, 10, &shift, 1256 'm', MDB_OPT_SETBITS, 20, &shift, 1257 'g', MDB_OPT_SETBITS, 30, &shift, 1258 NULL) != argc) 1259 return (DCMD_USAGE); 1260 1261 if (!opt_b && !shift) 1262 shift = 20; 1263 1264 switch (shift) { 1265 case 0: 1266 suffix = "B"; 1267 break; 1268 case 10: 1269 suffix = "KB"; 1270 break; 1271 case 20: 1272 suffix = "MB"; 1273 break; 1274 case 30: 1275 suffix = "GB"; 1276 break; 1277 default: 1278 suffix = "XX"; 1279 } 1280 1281 for (i = 0; i < nstats; i++) { 1282 int j; 1283 boolean_t bytes = B_FALSE; 1284 1285 for (j = 0; bytestats[j]; j++) { 1286 if (strcmp(stats[i].name, bytestats[j]) == 0) { 1287 bytes = B_TRUE; 1288 break; 1289 } 1290 } 1291 1292 if (bytes) { 1293 mdb_printf("%-25s = %9llu %s\n", stats[i].name, 1294 stats[i].value.ui64 >> shift, suffix); 1295 } else { 1296 mdb_printf("%-25s = %9llu\n", stats[i].name, 1297 stats[i].value.ui64); 1298 } 1299 } 1300 1301 for (i = 0; extras[i]; i++) { 1302 uint64_t buf; 1303 1304 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, extras[i], &sym) == -1) { 1305 mdb_warn("failed to find '%s'", extras[i]); 1306 return (DCMD_ERR); 1307 } 1308 1309 if (sym.st_size != sizeof (uint64_t) && 1310 sym.st_size != sizeof (uint32_t)) { 1311 mdb_warn("expected scalar for variable '%s'\n", 1312 extras[i]); 1313 return (DCMD_ERR); 1314 } 1315 1316 if (mdb_vread(&buf, sym.st_size, sym.st_value) == -1) { 1317 mdb_warn("couldn't read '%s'", extras[i]); 1318 return (DCMD_ERR); 1319 } 1320 1321 mdb_printf("%-25s = ", extras[i]); 1322 1323 /* NB: all the 64-bit extras happen to be byte counts */ 1324 if (sym.st_size == sizeof (uint64_t)) 1325 mdb_printf("%9llu %s\n", buf >> shift, suffix); 1326 1327 if (sym.st_size == sizeof (uint32_t)) 1328 mdb_printf("%9d\n", *((uint32_t *)&buf)); 1329 } 1330 return (DCMD_OK); 1331 } 1332 1333 typedef struct mdb_spa_print { 1334 pool_state_t spa_state; 1335 char spa_name[ZFS_MAX_DATASET_NAME_LEN]; 1336 uintptr_t spa_normal_class; 1337 } mdb_spa_print_t; 1338 1339 1340 const char histo_stars[] = "****************************************"; 1341 const int histo_width = sizeof (histo_stars) - 1; 1342 1343 static void 1344 dump_histogram(const uint64_t *histo, int size, int offset) 1345 { 1346 int i; 1347 int minidx = size - 1; 1348 int maxidx = 0; 1349 uint64_t max = 0; 1350 1351 for (i = 0; i < size; i++) { 1352 if (histo[i] > max) 1353 max = histo[i]; 1354 if (histo[i] > 0 && i > maxidx) 1355 maxidx = i; 1356 if (histo[i] > 0 && i < minidx) 1357 minidx = i; 1358 } 1359 1360 if (max < histo_width) 1361 max = histo_width; 1362 1363 for (i = minidx; i <= maxidx; i++) { 1364 mdb_printf("%3u: %6llu %s\n", 1365 i + offset, (u_longlong_t)histo[i], 1366 &histo_stars[(max - histo[i]) * histo_width / max]); 1367 } 1368 } 1369 1370 typedef struct mdb_metaslab_class { 1371 uint64_t mc_histogram[RANGE_TREE_HISTOGRAM_SIZE]; 1372 } mdb_metaslab_class_t; 1373 1374 /* 1375 * spa_class_histogram(uintptr_t class_addr) 1376 * 1377 * Prints free space histogram for a device class 1378 * 1379 * Returns DCMD_OK, or DCMD_ERR. 1380 */ 1381 static int 1382 spa_class_histogram(uintptr_t class_addr) 1383 { 1384 mdb_metaslab_class_t mc; 1385 if (mdb_ctf_vread(&mc, "metaslab_class_t", 1386 "mdb_metaslab_class_t", class_addr, 0) == -1) 1387 return (DCMD_ERR); 1388 1389 mdb_inc_indent(4); 1390 dump_histogram(mc.mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 1391 mdb_dec_indent(4); 1392 return (DCMD_OK); 1393 } 1394 1395 /* 1396 * ::spa 1397 * 1398 * -c Print configuration information as well 1399 * -v Print vdev state 1400 * -e Print vdev error stats 1401 * -m Print vdev metaslab info 1402 * -M print vdev metaslab group info 1403 * -h Print histogram info (must be combined with -m or -M) 1404 * 1405 * Print a summarized spa_t. When given no arguments, prints out a table of all 1406 * active pools on the system. 1407 */ 1408 /* ARGSUSED */ 1409 static int 1410 spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1411 { 1412 const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED", 1413 "SPARE", "L2CACHE", "UNINIT", "UNAVAIL", "POTENTIAL" }; 1414 const char *state; 1415 int spa_flags = 0; 1416 1417 if (mdb_getopts(argc, argv, 1418 'c', MDB_OPT_SETBITS, SPA_FLAG_CONFIG, &spa_flags, 1419 'v', MDB_OPT_SETBITS, SPA_FLAG_VDEVS, &spa_flags, 1420 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags, 1421 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags, 1422 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags, 1423 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags, 1424 NULL) != argc) 1425 return (DCMD_USAGE); 1426 1427 if (!(flags & DCMD_ADDRSPEC)) { 1428 if (mdb_walk_dcmd("spa", "spa", argc, argv) == -1) { 1429 mdb_warn("can't walk spa"); 1430 return (DCMD_ERR); 1431 } 1432 1433 return (DCMD_OK); 1434 } 1435 1436 if (flags & DCMD_PIPE_OUT) { 1437 mdb_printf("%#lr\n", addr); 1438 return (DCMD_OK); 1439 } 1440 1441 if (DCMD_HDRSPEC(flags)) 1442 mdb_printf("%<u>%-?s %9s %-*s%</u>\n", "ADDR", "STATE", 1443 sizeof (uintptr_t) == 4 ? 60 : 52, "NAME"); 1444 1445 mdb_spa_print_t spa; 1446 if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_print_t", addr, 0) == -1) 1447 return (DCMD_ERR); 1448 1449 if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL) 1450 state = "UNKNOWN"; 1451 else 1452 state = statetab[spa.spa_state]; 1453 1454 mdb_printf("%0?p %9s %s\n", addr, state, spa.spa_name); 1455 if (spa_flags & SPA_FLAG_HISTOGRAMS) 1456 spa_class_histogram(spa.spa_normal_class); 1457 1458 if (spa_flags & SPA_FLAG_CONFIG) { 1459 mdb_printf("\n"); 1460 mdb_inc_indent(4); 1461 if (mdb_call_dcmd("spa_config", addr, flags, 0, 1462 NULL) != DCMD_OK) 1463 return (DCMD_ERR); 1464 mdb_dec_indent(4); 1465 } 1466 1467 if (spa_flags & SPA_FLAG_ALL_VDEV) { 1468 mdb_arg_t v; 1469 char opts[100] = "-"; 1470 int args = 1471 (spa_flags | SPA_FLAG_VDEVS) == SPA_FLAG_VDEVS ? 0 : 1; 1472 1473 if (spa_flags & SPA_FLAG_ERRORS) 1474 strcat(opts, "e"); 1475 if (spa_flags & SPA_FLAG_METASLABS) 1476 strcat(opts, "m"); 1477 if (spa_flags & SPA_FLAG_METASLAB_GROUPS) 1478 strcat(opts, "M"); 1479 if (spa_flags & SPA_FLAG_HISTOGRAMS) 1480 strcat(opts, "h"); 1481 1482 v.a_type = MDB_TYPE_STRING; 1483 v.a_un.a_str = opts; 1484 1485 mdb_printf("\n"); 1486 mdb_inc_indent(4); 1487 if (mdb_call_dcmd("spa_vdevs", addr, flags, args, 1488 &v) != DCMD_OK) 1489 return (DCMD_ERR); 1490 mdb_dec_indent(4); 1491 } 1492 1493 return (DCMD_OK); 1494 } 1495 1496 typedef struct mdb_spa_config_spa { 1497 uintptr_t spa_config; 1498 } mdb_spa_config_spa_t; 1499 1500 /* 1501 * ::spa_config 1502 * 1503 * Given a spa_t, print the configuration information stored in spa_config. 1504 * Since it's just an nvlist, format it as an indented list of name=value pairs. 1505 * We simply read the value of spa_config and pass off to ::nvlist. 1506 */ 1507 /* ARGSUSED */ 1508 static int 1509 spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1510 { 1511 mdb_spa_config_spa_t spa; 1512 1513 if (argc != 0 || !(flags & DCMD_ADDRSPEC)) 1514 return (DCMD_USAGE); 1515 1516 if (mdb_ctf_vread(&spa, ZFS_STRUCT "spa", "mdb_spa_config_spa_t", 1517 addr, 0) == -1) 1518 return (DCMD_ERR); 1519 1520 if (spa.spa_config == 0) { 1521 mdb_printf("(none)\n"); 1522 return (DCMD_OK); 1523 } 1524 1525 return (mdb_call_dcmd("nvlist", spa.spa_config, flags, 1526 0, NULL)); 1527 } 1528 1529 typedef struct mdb_range_tree { 1530 struct { 1531 uint64_t bt_num_elems; 1532 uint64_t bt_num_nodes; 1533 } rt_root; 1534 uint64_t rt_space; 1535 range_seg_type_t rt_type; 1536 uint8_t rt_shift; 1537 uint64_t rt_start; 1538 } mdb_range_tree_t; 1539 1540 typedef struct mdb_metaslab_group { 1541 uint64_t mg_fragmentation; 1542 uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE]; 1543 uintptr_t mg_vd; 1544 } mdb_metaslab_group_t; 1545 1546 typedef struct mdb_metaslab { 1547 uint64_t ms_id; 1548 uint64_t ms_start; 1549 uint64_t ms_size; 1550 int64_t ms_deferspace; 1551 uint64_t ms_fragmentation; 1552 uint64_t ms_weight; 1553 uintptr_t ms_allocating[TXG_SIZE]; 1554 uintptr_t ms_checkpointing; 1555 uintptr_t ms_freeing; 1556 uintptr_t ms_freed; 1557 uintptr_t ms_allocatable; 1558 uintptr_t ms_unflushed_frees; 1559 uintptr_t ms_unflushed_allocs; 1560 uintptr_t ms_sm; 1561 } mdb_metaslab_t; 1562 1563 typedef struct mdb_space_map_phys_t { 1564 int64_t smp_alloc; 1565 uint64_t smp_histogram[SPACE_MAP_HISTOGRAM_SIZE]; 1566 } mdb_space_map_phys_t; 1567 1568 typedef struct mdb_space_map { 1569 uint64_t sm_size; 1570 uint8_t sm_shift; 1571 uintptr_t sm_phys; 1572 } mdb_space_map_t; 1573 1574 typedef struct mdb_vdev { 1575 uint64_t vdev_id; 1576 uint64_t vdev_state; 1577 uintptr_t vdev_ops; 1578 struct { 1579 uint64_t vs_aux; 1580 uint64_t vs_ops[VS_ZIO_TYPES]; 1581 uint64_t vs_bytes[VS_ZIO_TYPES]; 1582 uint64_t vs_read_errors; 1583 uint64_t vs_write_errors; 1584 uint64_t vs_checksum_errors; 1585 } vdev_stat; 1586 uintptr_t vdev_child; 1587 uint64_t vdev_children; 1588 uint64_t vdev_ms_count; 1589 uintptr_t vdev_mg; 1590 uintptr_t vdev_ms; 1591 uintptr_t vdev_path; 1592 } mdb_vdev_t; 1593 1594 typedef struct mdb_vdev_ops { 1595 char vdev_op_type[16]; 1596 } mdb_vdev_ops_t; 1597 1598 static int 1599 metaslab_stats(mdb_vdev_t *vd, int spa_flags) 1600 { 1601 mdb_inc_indent(4); 1602 mdb_printf("%<u>%-?s %6s %20s %10s %10s %10s%</u>\n", "ADDR", "ID", 1603 "OFFSET", "FREE", "FRAG", "UCMU"); 1604 1605 uintptr_t *vdev_ms = mdb_alloc(vd->vdev_ms_count * sizeof (vdev_ms), 1606 UM_SLEEP | UM_GC); 1607 if (mdb_vread(vdev_ms, vd->vdev_ms_count * sizeof (uintptr_t), 1608 vd->vdev_ms) == -1) { 1609 mdb_warn("failed to read vdev_ms at %p\n", vd->vdev_ms); 1610 return (DCMD_ERR); 1611 } 1612 1613 for (int m = 0; m < vd->vdev_ms_count; m++) { 1614 mdb_metaslab_t ms; 1615 mdb_space_map_t sm = { 0 }; 1616 mdb_space_map_phys_t smp = { 0 }; 1617 mdb_range_tree_t rt; 1618 uint64_t uallocs, ufrees, raw_free, raw_uchanges_mem; 1619 char free[MDB_NICENUM_BUFLEN]; 1620 char uchanges_mem[MDB_NICENUM_BUFLEN]; 1621 1622 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t", 1623 vdev_ms[m], 0) == -1) 1624 return (DCMD_ERR); 1625 1626 if (ms.ms_sm != 0 && 1627 mdb_ctf_vread(&sm, "space_map_t", "mdb_space_map_t", 1628 ms.ms_sm, 0) == -1) 1629 return (DCMD_ERR); 1630 1631 if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t", 1632 ms.ms_unflushed_frees, 0) == -1) 1633 return (DCMD_ERR); 1634 ufrees = rt.rt_space; 1635 raw_uchanges_mem = rt.rt_root.bt_num_nodes * BTREE_LEAF_SIZE; 1636 1637 if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t", 1638 ms.ms_unflushed_allocs, 0) == -1) 1639 return (DCMD_ERR); 1640 uallocs = rt.rt_space; 1641 raw_uchanges_mem += rt.rt_root.bt_num_nodes * BTREE_LEAF_SIZE; 1642 mdb_nicenum(raw_uchanges_mem, uchanges_mem); 1643 1644 raw_free = ms.ms_size; 1645 if (ms.ms_sm != 0 && sm.sm_phys != 0) { 1646 (void) mdb_ctf_vread(&smp, "space_map_phys_t", 1647 "mdb_space_map_phys_t", sm.sm_phys, 0); 1648 raw_free -= smp.smp_alloc; 1649 } 1650 raw_free += ufrees - uallocs; 1651 mdb_nicenum(raw_free, free); 1652 1653 mdb_printf("%0?p %6llu %20llx %10s ", vdev_ms[m], ms.ms_id, 1654 ms.ms_start, free); 1655 if (ms.ms_fragmentation == ZFS_FRAG_INVALID) 1656 mdb_printf("%9s ", "-"); 1657 else 1658 mdb_printf("%9llu%% ", ms.ms_fragmentation); 1659 mdb_printf("%10s\n", uchanges_mem); 1660 1661 if ((spa_flags & SPA_FLAG_HISTOGRAMS) && ms.ms_sm != 0 && 1662 sm.sm_phys != 0) { 1663 dump_histogram(smp.smp_histogram, 1664 SPACE_MAP_HISTOGRAM_SIZE, sm.sm_shift); 1665 } 1666 } 1667 mdb_dec_indent(4); 1668 return (DCMD_OK); 1669 } 1670 1671 static int 1672 metaslab_group_stats(mdb_vdev_t *vd, int spa_flags) 1673 { 1674 mdb_metaslab_group_t mg; 1675 if (mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t", 1676 vd->vdev_mg, 0) == -1) { 1677 mdb_warn("failed to read vdev_mg at %p\n", vd->vdev_mg); 1678 return (DCMD_ERR); 1679 } 1680 1681 mdb_inc_indent(4); 1682 mdb_printf("%<u>%-?s %7s %9s%</u>\n", "ADDR", "FRAG", "UCMU"); 1683 1684 if (mg.mg_fragmentation == ZFS_FRAG_INVALID) 1685 mdb_printf("%0?p %6s\n", vd->vdev_mg, "-"); 1686 else 1687 mdb_printf("%0?p %6llu%%", vd->vdev_mg, mg.mg_fragmentation); 1688 1689 1690 uintptr_t *vdev_ms = mdb_alloc(vd->vdev_ms_count * sizeof (vdev_ms), 1691 UM_SLEEP | UM_GC); 1692 if (mdb_vread(vdev_ms, vd->vdev_ms_count * sizeof (uintptr_t), 1693 vd->vdev_ms) == -1) { 1694 mdb_warn("failed to read vdev_ms at %p\n", vd->vdev_ms); 1695 return (DCMD_ERR); 1696 } 1697 1698 uint64_t raw_uchanges_mem = 0; 1699 char uchanges_mem[MDB_NICENUM_BUFLEN]; 1700 for (int m = 0; m < vd->vdev_ms_count; m++) { 1701 mdb_metaslab_t ms; 1702 mdb_range_tree_t rt; 1703 1704 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t", 1705 vdev_ms[m], 0) == -1) 1706 return (DCMD_ERR); 1707 1708 if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t", 1709 ms.ms_unflushed_frees, 0) == -1) 1710 return (DCMD_ERR); 1711 raw_uchanges_mem += rt.rt_root.bt_num_nodes * BTREE_LEAF_SIZE; 1712 1713 if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t", 1714 ms.ms_unflushed_allocs, 0) == -1) 1715 return (DCMD_ERR); 1716 raw_uchanges_mem += rt.rt_root.bt_num_nodes * BTREE_LEAF_SIZE; 1717 } 1718 mdb_nicenum(raw_uchanges_mem, uchanges_mem); 1719 mdb_printf("%10s\n", uchanges_mem); 1720 1721 if (spa_flags & SPA_FLAG_HISTOGRAMS) 1722 dump_histogram(mg.mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 1723 mdb_dec_indent(4); 1724 return (DCMD_OK); 1725 } 1726 1727 /* 1728 * ::vdev 1729 * 1730 * Print out a summarized vdev_t, in the following form: 1731 * 1732 * ADDR STATE AUX DESC 1733 * fffffffbcde23df0 HEALTHY - /dev/dsk/c0t0d0 1734 * 1735 * If '-r' is specified, recursively visit all children. 1736 * 1737 * With '-e', the statistics associated with the vdev are printed as well. 1738 */ 1739 static int 1740 do_print_vdev(uintptr_t addr, int flags, int depth, boolean_t recursive, 1741 int spa_flags) 1742 { 1743 mdb_vdev_t vd; 1744 if (mdb_ctf_vread(&vd, "vdev_t", "mdb_vdev_t", 1745 (uintptr_t)addr, 0) == -1) 1746 return (DCMD_ERR); 1747 1748 if (flags & DCMD_PIPE_OUT) { 1749 mdb_printf("%#lr\n", addr); 1750 } else { 1751 char desc[MAXNAMELEN]; 1752 if (vd.vdev_path != 0) { 1753 if (mdb_readstr(desc, sizeof (desc), 1754 (uintptr_t)vd.vdev_path) == -1) { 1755 mdb_warn("failed to read vdev_path at %p\n", 1756 vd.vdev_path); 1757 return (DCMD_ERR); 1758 } 1759 } else if (vd.vdev_ops != 0) { 1760 vdev_ops_t ops; 1761 if (mdb_vread(&ops, sizeof (ops), 1762 (uintptr_t)vd.vdev_ops) == -1) { 1763 mdb_warn("failed to read vdev_ops at %p\n", 1764 vd.vdev_ops); 1765 return (DCMD_ERR); 1766 } 1767 (void) strcpy(desc, ops.vdev_op_type); 1768 } else { 1769 (void) strcpy(desc, "<unknown>"); 1770 } 1771 1772 if (depth == 0 && DCMD_HDRSPEC(flags)) 1773 mdb_printf("%<u>%-?s %-9s %-12s %-*s%</u>\n", 1774 "ADDR", "STATE", "AUX", 1775 sizeof (uintptr_t) == 4 ? 43 : 35, 1776 "DESCRIPTION"); 1777 1778 mdb_printf("%0?p ", addr); 1779 1780 const char *state, *aux; 1781 switch (vd.vdev_state) { 1782 case VDEV_STATE_CLOSED: 1783 state = "CLOSED"; 1784 break; 1785 case VDEV_STATE_OFFLINE: 1786 state = "OFFLINE"; 1787 break; 1788 case VDEV_STATE_CANT_OPEN: 1789 state = "CANT_OPEN"; 1790 break; 1791 case VDEV_STATE_DEGRADED: 1792 state = "DEGRADED"; 1793 break; 1794 case VDEV_STATE_HEALTHY: 1795 state = "HEALTHY"; 1796 break; 1797 case VDEV_STATE_REMOVED: 1798 state = "REMOVED"; 1799 break; 1800 case VDEV_STATE_FAULTED: 1801 state = "FAULTED"; 1802 break; 1803 default: 1804 state = "UNKNOWN"; 1805 break; 1806 } 1807 1808 switch (vd.vdev_stat.vs_aux) { 1809 case VDEV_AUX_NONE: 1810 aux = "-"; 1811 break; 1812 case VDEV_AUX_OPEN_FAILED: 1813 aux = "OPEN_FAILED"; 1814 break; 1815 case VDEV_AUX_CORRUPT_DATA: 1816 aux = "CORRUPT_DATA"; 1817 break; 1818 case VDEV_AUX_NO_REPLICAS: 1819 aux = "NO_REPLICAS"; 1820 break; 1821 case VDEV_AUX_BAD_GUID_SUM: 1822 aux = "BAD_GUID_SUM"; 1823 break; 1824 case VDEV_AUX_TOO_SMALL: 1825 aux = "TOO_SMALL"; 1826 break; 1827 case VDEV_AUX_BAD_LABEL: 1828 aux = "BAD_LABEL"; 1829 break; 1830 case VDEV_AUX_VERSION_NEWER: 1831 aux = "VERS_NEWER"; 1832 break; 1833 case VDEV_AUX_VERSION_OLDER: 1834 aux = "VERS_OLDER"; 1835 break; 1836 case VDEV_AUX_UNSUP_FEAT: 1837 aux = "UNSUP_FEAT"; 1838 break; 1839 case VDEV_AUX_SPARED: 1840 aux = "SPARED"; 1841 break; 1842 case VDEV_AUX_ERR_EXCEEDED: 1843 aux = "ERR_EXCEEDED"; 1844 break; 1845 case VDEV_AUX_IO_FAILURE: 1846 aux = "IO_FAILURE"; 1847 break; 1848 case VDEV_AUX_BAD_LOG: 1849 aux = "BAD_LOG"; 1850 break; 1851 case VDEV_AUX_EXTERNAL: 1852 aux = "EXTERNAL"; 1853 break; 1854 case VDEV_AUX_SPLIT_POOL: 1855 aux = "SPLIT_POOL"; 1856 break; 1857 case VDEV_AUX_CHILDREN_OFFLINE: 1858 aux = "CHILDREN_OFFLINE"; 1859 break; 1860 default: 1861 aux = "UNKNOWN"; 1862 break; 1863 } 1864 1865 mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc); 1866 1867 if (spa_flags & SPA_FLAG_ERRORS) { 1868 int i; 1869 1870 mdb_inc_indent(4); 1871 mdb_printf("\n"); 1872 mdb_printf("%<u> %12s %12s %12s %12s " 1873 "%12s%</u>\n", "READ", "WRITE", "FREE", "CLAIM", 1874 "IOCTL"); 1875 mdb_printf("OPS "); 1876 for (i = 1; i < VS_ZIO_TYPES; i++) 1877 mdb_printf("%11#llx%s", 1878 vd.vdev_stat.vs_ops[i], 1879 i == VS_ZIO_TYPES - 1 ? "" : " "); 1880 mdb_printf("\n"); 1881 mdb_printf("BYTES "); 1882 for (i = 1; i < VS_ZIO_TYPES; i++) 1883 mdb_printf("%11#llx%s", 1884 vd.vdev_stat.vs_bytes[i], 1885 i == VS_ZIO_TYPES - 1 ? "" : " "); 1886 1887 1888 mdb_printf("\n"); 1889 mdb_printf("EREAD %10#llx\n", 1890 vd.vdev_stat.vs_read_errors); 1891 mdb_printf("EWRITE %10#llx\n", 1892 vd.vdev_stat.vs_write_errors); 1893 mdb_printf("ECKSUM %10#llx\n", 1894 vd.vdev_stat.vs_checksum_errors); 1895 mdb_dec_indent(4); 1896 mdb_printf("\n"); 1897 } 1898 1899 if ((spa_flags & SPA_FLAG_METASLAB_GROUPS) && 1900 vd.vdev_mg != 0) { 1901 metaslab_group_stats(&vd, spa_flags); 1902 } 1903 if ((spa_flags & SPA_FLAG_METASLABS) && vd.vdev_ms != 0) { 1904 metaslab_stats(&vd, spa_flags); 1905 } 1906 } 1907 1908 uint64_t children = vd.vdev_children; 1909 if (children == 0 || !recursive) 1910 return (DCMD_OK); 1911 1912 uintptr_t *child = mdb_alloc(children * sizeof (child), 1913 UM_SLEEP | UM_GC); 1914 if (mdb_vread(child, children * sizeof (void *), vd.vdev_child) == -1) { 1915 mdb_warn("failed to read vdev children at %p", vd.vdev_child); 1916 return (DCMD_ERR); 1917 } 1918 1919 for (uint64_t c = 0; c < children; c++) { 1920 if (do_print_vdev(child[c], flags, depth + 2, recursive, 1921 spa_flags)) { 1922 return (DCMD_ERR); 1923 } 1924 } 1925 1926 return (DCMD_OK); 1927 } 1928 1929 static int 1930 vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1931 { 1932 uint64_t depth = 0; 1933 boolean_t recursive = B_FALSE; 1934 int spa_flags = 0; 1935 1936 if (mdb_getopts(argc, argv, 1937 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags, 1938 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags, 1939 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags, 1940 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags, 1941 'r', MDB_OPT_SETBITS, TRUE, &recursive, 1942 'd', MDB_OPT_UINT64, &depth, NULL) != argc) 1943 return (DCMD_USAGE); 1944 1945 if (!(flags & DCMD_ADDRSPEC)) { 1946 mdb_warn("no vdev_t address given\n"); 1947 return (DCMD_ERR); 1948 } 1949 1950 return (do_print_vdev(addr, flags, (int)depth, recursive, spa_flags)); 1951 } 1952 1953 typedef struct mdb_metaslab_alloc_trace { 1954 uintptr_t mat_mg; 1955 uintptr_t mat_msp; 1956 uint64_t mat_size; 1957 uint64_t mat_weight; 1958 uint64_t mat_offset; 1959 uint32_t mat_dva_id; 1960 int mat_allocator; 1961 } mdb_metaslab_alloc_trace_t; 1962 1963 static void 1964 metaslab_print_weight(uint64_t weight) 1965 { 1966 char buf[100]; 1967 1968 if (WEIGHT_IS_SPACEBASED(weight)) { 1969 mdb_nicenum( 1970 weight & ~(METASLAB_ACTIVE_MASK | METASLAB_WEIGHT_TYPE), 1971 buf); 1972 } else { 1973 char size[MDB_NICENUM_BUFLEN]; 1974 mdb_nicenum(1ULL << WEIGHT_GET_INDEX(weight), size); 1975 (void) mdb_snprintf(buf, sizeof (buf), "%llu x %s", 1976 WEIGHT_GET_COUNT(weight), size); 1977 } 1978 mdb_printf("%11s ", buf); 1979 } 1980 1981 /* ARGSUSED */ 1982 static int 1983 metaslab_weight(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1984 { 1985 uint64_t weight = 0; 1986 char active; 1987 1988 if (argc == 0 && (flags & DCMD_ADDRSPEC)) { 1989 if (mdb_vread(&weight, sizeof (uint64_t), addr) == -1) { 1990 mdb_warn("failed to read weight at %p\n", addr); 1991 return (DCMD_ERR); 1992 } 1993 } else if (argc == 1 && !(flags & DCMD_ADDRSPEC)) { 1994 weight = (uint64_t)mdb_argtoull(&argv[0]); 1995 } else { 1996 return (DCMD_USAGE); 1997 } 1998 1999 if (DCMD_HDRSPEC(flags)) { 2000 mdb_printf("%<u>%-6s %9s %9s%</u>\n", 2001 "ACTIVE", "ALGORITHM", "WEIGHT"); 2002 } 2003 2004 if (weight & METASLAB_WEIGHT_PRIMARY) 2005 active = 'P'; 2006 else if (weight & METASLAB_WEIGHT_SECONDARY) 2007 active = 'S'; 2008 else 2009 active = '-'; 2010 mdb_printf("%6c %8s ", active, 2011 WEIGHT_IS_SPACEBASED(weight) ? "SPACE" : "SEGMENT"); 2012 metaslab_print_weight(weight); 2013 mdb_printf("\n"); 2014 2015 return (DCMD_OK); 2016 } 2017 2018 /* ARGSUSED */ 2019 static int 2020 metaslab_trace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2021 { 2022 mdb_metaslab_alloc_trace_t mat; 2023 mdb_metaslab_group_t mg = { 0 }; 2024 char result_type[100]; 2025 2026 if (mdb_ctf_vread(&mat, "metaslab_alloc_trace_t", 2027 "mdb_metaslab_alloc_trace_t", addr, 0) == -1) { 2028 return (DCMD_ERR); 2029 } 2030 2031 if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) { 2032 mdb_printf("%<u>%6s %6s %8s %11s %11s %18s %18s%</u>\n", 2033 "MSID", "DVA", "ASIZE", "ALLOCATOR", "WEIGHT", "RESULT", 2034 "VDEV"); 2035 } 2036 2037 if (mat.mat_msp != 0) { 2038 mdb_metaslab_t ms; 2039 2040 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t", 2041 mat.mat_msp, 0) == -1) { 2042 return (DCMD_ERR); 2043 } 2044 mdb_printf("%6llu ", ms.ms_id); 2045 } else { 2046 mdb_printf("%6s ", "-"); 2047 } 2048 2049 mdb_printf("%6d %8llx %11llx ", mat.mat_dva_id, mat.mat_size, 2050 mat.mat_allocator); 2051 2052 metaslab_print_weight(mat.mat_weight); 2053 2054 if ((int64_t)mat.mat_offset < 0) { 2055 if (enum_lookup("enum trace_alloc_type", mat.mat_offset, 2056 "TRACE_", sizeof (result_type), result_type) == -1) { 2057 mdb_warn("Could not find enum for trace_alloc_type"); 2058 return (DCMD_ERR); 2059 } 2060 mdb_printf("%18s ", result_type); 2061 } else { 2062 mdb_printf("%<b>%18llx%</b> ", mat.mat_offset); 2063 } 2064 2065 if (mat.mat_mg != 0 && 2066 mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t", 2067 mat.mat_mg, 0) == -1) { 2068 return (DCMD_ERR); 2069 } 2070 2071 if (mg.mg_vd != 0) { 2072 mdb_vdev_t vdev; 2073 char desc[MAXNAMELEN]; 2074 2075 if (mdb_ctf_vread(&vdev, "vdev_t", "mdb_vdev_t", 2076 mg.mg_vd, 0) == -1) { 2077 return (DCMD_ERR); 2078 } 2079 2080 if (vdev.vdev_path != 0) { 2081 char path[MAXNAMELEN]; 2082 2083 if (mdb_readstr(path, sizeof (path), 2084 vdev.vdev_path) == -1) { 2085 mdb_warn("failed to read vdev_path at %p\n", 2086 vdev.vdev_path); 2087 return (DCMD_ERR); 2088 } 2089 char *slash; 2090 if ((slash = strrchr(path, '/')) != NULL) { 2091 strcpy(desc, slash + 1); 2092 } else { 2093 strcpy(desc, path); 2094 } 2095 } else if (vdev.vdev_ops != 0) { 2096 mdb_vdev_ops_t ops; 2097 if (mdb_ctf_vread(&ops, "vdev_ops_t", "mdb_vdev_ops_t", 2098 vdev.vdev_ops, 0) == -1) { 2099 mdb_warn("failed to read vdev_ops at %p\n", 2100 vdev.vdev_ops); 2101 return (DCMD_ERR); 2102 } 2103 (void) mdb_snprintf(desc, sizeof (desc), 2104 "%s-%llu", ops.vdev_op_type, vdev.vdev_id); 2105 } else { 2106 (void) strcpy(desc, "<unknown>"); 2107 } 2108 mdb_printf("%18s\n", desc); 2109 } 2110 2111 return (DCMD_OK); 2112 } 2113 2114 typedef struct metaslab_walk_data { 2115 uint64_t mw_numvdevs; 2116 uintptr_t *mw_vdevs; 2117 int mw_curvdev; 2118 uint64_t mw_nummss; 2119 uintptr_t *mw_mss; 2120 int mw_curms; 2121 } metaslab_walk_data_t; 2122 2123 static int 2124 metaslab_walk_step(mdb_walk_state_t *wsp) 2125 { 2126 metaslab_walk_data_t *mw = wsp->walk_data; 2127 metaslab_t ms; 2128 uintptr_t msp; 2129 2130 if (mw->mw_curvdev >= mw->mw_numvdevs) 2131 return (WALK_DONE); 2132 2133 if (mw->mw_mss == NULL) { 2134 uintptr_t mssp; 2135 uintptr_t vdevp; 2136 2137 ASSERT(mw->mw_curms == 0); 2138 ASSERT(mw->mw_nummss == 0); 2139 2140 vdevp = mw->mw_vdevs[mw->mw_curvdev]; 2141 if (GETMEMB(vdevp, "vdev", vdev_ms, mssp) || 2142 GETMEMB(vdevp, "vdev", vdev_ms_count, mw->mw_nummss)) { 2143 return (WALK_ERR); 2144 } 2145 2146 mw->mw_mss = mdb_alloc(mw->mw_nummss * sizeof (void*), 2147 UM_SLEEP | UM_GC); 2148 if (mdb_vread(mw->mw_mss, mw->mw_nummss * sizeof (void*), 2149 mssp) == -1) { 2150 mdb_warn("failed to read vdev_ms at %p", mssp); 2151 return (WALK_ERR); 2152 } 2153 } 2154 2155 if (mw->mw_curms >= mw->mw_nummss) { 2156 mw->mw_mss = NULL; 2157 mw->mw_curms = 0; 2158 mw->mw_nummss = 0; 2159 mw->mw_curvdev++; 2160 return (WALK_NEXT); 2161 } 2162 2163 msp = mw->mw_mss[mw->mw_curms]; 2164 if (mdb_vread(&ms, sizeof (metaslab_t), msp) == -1) { 2165 mdb_warn("failed to read metaslab_t at %p", msp); 2166 return (WALK_ERR); 2167 } 2168 2169 mw->mw_curms++; 2170 2171 return (wsp->walk_callback(msp, &ms, wsp->walk_cbdata)); 2172 } 2173 2174 static int 2175 metaslab_walk_init(mdb_walk_state_t *wsp) 2176 { 2177 metaslab_walk_data_t *mw; 2178 uintptr_t root_vdevp; 2179 uintptr_t childp; 2180 2181 if (wsp->walk_addr == 0) { 2182 mdb_warn("must supply address of spa_t\n"); 2183 return (WALK_ERR); 2184 } 2185 2186 mw = mdb_zalloc(sizeof (metaslab_walk_data_t), UM_SLEEP | UM_GC); 2187 2188 if (GETMEMB(wsp->walk_addr, "spa", spa_root_vdev, root_vdevp) || 2189 GETMEMB(root_vdevp, "vdev", vdev_children, mw->mw_numvdevs) || 2190 GETMEMB(root_vdevp, "vdev", vdev_child, childp)) { 2191 return (DCMD_ERR); 2192 } 2193 2194 mw->mw_vdevs = mdb_alloc(mw->mw_numvdevs * sizeof (void *), 2195 UM_SLEEP | UM_GC); 2196 if (mdb_vread(mw->mw_vdevs, mw->mw_numvdevs * sizeof (void *), 2197 childp) == -1) { 2198 mdb_warn("failed to read root vdev children at %p", childp); 2199 return (DCMD_ERR); 2200 } 2201 2202 wsp->walk_data = mw; 2203 2204 return (WALK_NEXT); 2205 } 2206 2207 typedef struct mdb_spa { 2208 uintptr_t spa_dsl_pool; 2209 uintptr_t spa_root_vdev; 2210 } mdb_spa_t; 2211 2212 typedef struct mdb_dsl_pool { 2213 uintptr_t dp_root_dir; 2214 } mdb_dsl_pool_t; 2215 2216 typedef struct mdb_dsl_dir { 2217 uintptr_t dd_dbuf; 2218 int64_t dd_space_towrite[TXG_SIZE]; 2219 } mdb_dsl_dir_t; 2220 2221 typedef struct mdb_dsl_dir_phys { 2222 uint64_t dd_used_bytes; 2223 uint64_t dd_compressed_bytes; 2224 uint64_t dd_uncompressed_bytes; 2225 } mdb_dsl_dir_phys_t; 2226 2227 typedef struct space_data { 2228 uint64_t ms_allocating[TXG_SIZE]; 2229 uint64_t ms_checkpointing; 2230 uint64_t ms_freeing; 2231 uint64_t ms_freed; 2232 uint64_t ms_unflushed_frees; 2233 uint64_t ms_unflushed_allocs; 2234 uint64_t ms_allocatable; 2235 int64_t ms_deferspace; 2236 uint64_t avail; 2237 } space_data_t; 2238 2239 /* ARGSUSED */ 2240 static int 2241 space_cb(uintptr_t addr, const void *unknown, void *arg) 2242 { 2243 space_data_t *sd = arg; 2244 mdb_metaslab_t ms; 2245 mdb_range_tree_t rt; 2246 mdb_space_map_t sm = { 0 }; 2247 mdb_space_map_phys_t smp = { 0 }; 2248 uint64_t uallocs, ufrees; 2249 int i; 2250 2251 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t", 2252 addr, 0) == -1) 2253 return (WALK_ERR); 2254 2255 for (i = 0; i < TXG_SIZE; i++) { 2256 if (mdb_ctf_vread(&rt, "range_tree_t", 2257 "mdb_range_tree_t", ms.ms_allocating[i], 0) == -1) 2258 return (WALK_ERR); 2259 sd->ms_allocating[i] += rt.rt_space; 2260 } 2261 2262 if (mdb_ctf_vread(&rt, "range_tree_t", 2263 "mdb_range_tree_t", ms.ms_checkpointing, 0) == -1) 2264 return (WALK_ERR); 2265 sd->ms_checkpointing += rt.rt_space; 2266 2267 if (mdb_ctf_vread(&rt, "range_tree_t", 2268 "mdb_range_tree_t", ms.ms_freeing, 0) == -1) 2269 return (WALK_ERR); 2270 sd->ms_freeing += rt.rt_space; 2271 2272 if (mdb_ctf_vread(&rt, "range_tree_t", 2273 "mdb_range_tree_t", ms.ms_freed, 0) == -1) 2274 return (WALK_ERR); 2275 sd->ms_freed += rt.rt_space; 2276 2277 if (mdb_ctf_vread(&rt, "range_tree_t", 2278 "mdb_range_tree_t", ms.ms_allocatable, 0) == -1) 2279 return (WALK_ERR); 2280 sd->ms_allocatable += rt.rt_space; 2281 2282 if (mdb_ctf_vread(&rt, "range_tree_t", 2283 "mdb_range_tree_t", ms.ms_unflushed_frees, 0) == -1) 2284 return (WALK_ERR); 2285 sd->ms_unflushed_frees += rt.rt_space; 2286 ufrees = rt.rt_space; 2287 2288 if (mdb_ctf_vread(&rt, "range_tree_t", 2289 "mdb_range_tree_t", ms.ms_unflushed_allocs, 0) == -1) 2290 return (WALK_ERR); 2291 sd->ms_unflushed_allocs += rt.rt_space; 2292 uallocs = rt.rt_space; 2293 2294 if (ms.ms_sm != 0 && 2295 mdb_ctf_vread(&sm, "space_map_t", 2296 "mdb_space_map_t", ms.ms_sm, 0) == -1) 2297 return (WALK_ERR); 2298 2299 if (sm.sm_phys != 0) { 2300 (void) mdb_ctf_vread(&smp, "space_map_phys_t", 2301 "mdb_space_map_phys_t", sm.sm_phys, 0); 2302 } 2303 2304 sd->ms_deferspace += ms.ms_deferspace; 2305 sd->avail += sm.sm_size - smp.smp_alloc + ufrees - uallocs; 2306 2307 return (WALK_NEXT); 2308 } 2309 2310 /* 2311 * ::spa_space [-b] 2312 * 2313 * Given a spa_t, print out it's on-disk space usage and in-core 2314 * estimates of future usage. If -b is given, print space in bytes. 2315 * Otherwise print in megabytes. 2316 */ 2317 /* ARGSUSED */ 2318 static int 2319 spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2320 { 2321 mdb_spa_t spa; 2322 mdb_dsl_pool_t dp; 2323 mdb_dsl_dir_t dd; 2324 mdb_dmu_buf_impl_t db; 2325 mdb_dsl_dir_phys_t dsp; 2326 space_data_t sd; 2327 int shift = 20; 2328 char *suffix = "M"; 2329 int bytes = B_FALSE; 2330 2331 if (mdb_getopts(argc, argv, 'b', MDB_OPT_SETBITS, TRUE, &bytes, NULL) != 2332 argc) 2333 return (DCMD_USAGE); 2334 if (!(flags & DCMD_ADDRSPEC)) 2335 return (DCMD_USAGE); 2336 2337 if (bytes) { 2338 shift = 0; 2339 suffix = ""; 2340 } 2341 2342 if (mdb_ctf_vread(&spa, ZFS_STRUCT "spa", "mdb_spa_t", 2343 addr, 0) == -1 || 2344 mdb_ctf_vread(&dp, ZFS_STRUCT "dsl_pool", "mdb_dsl_pool_t", 2345 spa.spa_dsl_pool, 0) == -1 || 2346 mdb_ctf_vread(&dd, ZFS_STRUCT "dsl_dir", "mdb_dsl_dir_t", 2347 dp.dp_root_dir, 0) == -1 || 2348 mdb_ctf_vread(&db, ZFS_STRUCT "dmu_buf_impl", "mdb_dmu_buf_impl_t", 2349 dd.dd_dbuf, 0) == -1 || 2350 mdb_ctf_vread(&dsp, ZFS_STRUCT "dsl_dir_phys", 2351 "mdb_dsl_dir_phys_t", db.db.db_data, 0) == -1) { 2352 return (DCMD_ERR); 2353 } 2354 2355 mdb_printf("dd_space_towrite = %llu%s %llu%s %llu%s %llu%s\n", 2356 dd.dd_space_towrite[0] >> shift, suffix, 2357 dd.dd_space_towrite[1] >> shift, suffix, 2358 dd.dd_space_towrite[2] >> shift, suffix, 2359 dd.dd_space_towrite[3] >> shift, suffix); 2360 2361 mdb_printf("dd_phys.dd_used_bytes = %llu%s\n", 2362 dsp.dd_used_bytes >> shift, suffix); 2363 mdb_printf("dd_phys.dd_compressed_bytes = %llu%s\n", 2364 dsp.dd_compressed_bytes >> shift, suffix); 2365 mdb_printf("dd_phys.dd_uncompressed_bytes = %llu%s\n", 2366 dsp.dd_uncompressed_bytes >> shift, suffix); 2367 2368 bzero(&sd, sizeof (sd)); 2369 if (mdb_pwalk("metaslab", space_cb, &sd, addr) != 0) { 2370 mdb_warn("can't walk metaslabs"); 2371 return (DCMD_ERR); 2372 } 2373 2374 mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n", 2375 sd.ms_allocating[0] >> shift, suffix, 2376 sd.ms_allocating[1] >> shift, suffix, 2377 sd.ms_allocating[2] >> shift, suffix, 2378 sd.ms_allocating[3] >> shift, suffix); 2379 mdb_printf("ms_checkpointing = %llu%s\n", 2380 sd.ms_checkpointing >> shift, suffix); 2381 mdb_printf("ms_freeing = %llu%s\n", 2382 sd.ms_freeing >> shift, suffix); 2383 mdb_printf("ms_freed = %llu%s\n", 2384 sd.ms_freed >> shift, suffix); 2385 mdb_printf("ms_unflushed_frees = %llu%s\n", 2386 sd.ms_unflushed_frees >> shift, suffix); 2387 mdb_printf("ms_unflushed_allocs = %llu%s\n", 2388 sd.ms_unflushed_allocs >> shift, suffix); 2389 mdb_printf("ms_allocatable = %llu%s\n", 2390 sd.ms_allocatable >> shift, suffix); 2391 mdb_printf("ms_deferspace = %llu%s\n", 2392 sd.ms_deferspace >> shift, suffix); 2393 mdb_printf("current avail = %llu%s\n", 2394 sd.avail >> shift, suffix); 2395 2396 return (DCMD_OK); 2397 } 2398 2399 typedef struct mdb_spa_aux_vdev { 2400 int sav_count; 2401 uintptr_t sav_vdevs; 2402 } mdb_spa_aux_vdev_t; 2403 2404 typedef struct mdb_spa_vdevs { 2405 uintptr_t spa_root_vdev; 2406 mdb_spa_aux_vdev_t spa_l2cache; 2407 mdb_spa_aux_vdev_t spa_spares; 2408 } mdb_spa_vdevs_t; 2409 2410 static int 2411 spa_print_aux(mdb_spa_aux_vdev_t *sav, uint_t flags, mdb_arg_t *v, 2412 const char *name) 2413 { 2414 uintptr_t *aux; 2415 size_t len; 2416 int ret, i; 2417 2418 /* 2419 * Iterate over aux vdevs and print those out as well. This is a 2420 * little annoying because we don't have a root vdev to pass to ::vdev. 2421 * Instead, we print a single line and then call it for each child 2422 * vdev. 2423 */ 2424 if (sav->sav_count != 0) { 2425 v[1].a_type = MDB_TYPE_STRING; 2426 v[1].a_un.a_str = "-d"; 2427 v[2].a_type = MDB_TYPE_IMMEDIATE; 2428 v[2].a_un.a_val = 2; 2429 2430 len = sav->sav_count * sizeof (uintptr_t); 2431 aux = mdb_alloc(len, UM_SLEEP); 2432 if (mdb_vread(aux, len, sav->sav_vdevs) == -1) { 2433 mdb_free(aux, len); 2434 mdb_warn("failed to read l2cache vdevs at %p", 2435 sav->sav_vdevs); 2436 return (DCMD_ERR); 2437 } 2438 2439 mdb_printf("%-?s %-9s %-12s %s\n", "-", "-", "-", name); 2440 2441 for (i = 0; i < sav->sav_count; i++) { 2442 ret = mdb_call_dcmd("vdev", aux[i], flags, 3, v); 2443 if (ret != DCMD_OK) { 2444 mdb_free(aux, len); 2445 return (ret); 2446 } 2447 } 2448 2449 mdb_free(aux, len); 2450 } 2451 2452 return (0); 2453 } 2454 2455 /* 2456 * ::spa_vdevs 2457 * 2458 * -e Include error stats 2459 * -m Include metaslab information 2460 * -M Include metaslab group information 2461 * -h Include histogram information (requires -m or -M) 2462 * 2463 * Print out a summarized list of vdevs for the given spa_t. 2464 * This is accomplished by invoking "::vdev -re" on the root vdev, as well as 2465 * iterating over the cache devices. 2466 */ 2467 /* ARGSUSED */ 2468 static int 2469 spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2470 { 2471 mdb_arg_t v[3]; 2472 int ret; 2473 char opts[100] = "-r"; 2474 int spa_flags = 0; 2475 2476 if (mdb_getopts(argc, argv, 2477 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags, 2478 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags, 2479 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags, 2480 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags, 2481 NULL) != argc) 2482 return (DCMD_USAGE); 2483 2484 if (!(flags & DCMD_ADDRSPEC)) 2485 return (DCMD_USAGE); 2486 2487 mdb_spa_vdevs_t spa; 2488 if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_vdevs_t", addr, 0) == -1) 2489 return (DCMD_ERR); 2490 2491 /* 2492 * Unitialized spa_t structures can have a NULL root vdev. 2493 */ 2494 if (spa.spa_root_vdev == 0) { 2495 mdb_printf("no associated vdevs\n"); 2496 return (DCMD_OK); 2497 } 2498 2499 if (spa_flags & SPA_FLAG_ERRORS) 2500 strcat(opts, "e"); 2501 if (spa_flags & SPA_FLAG_METASLABS) 2502 strcat(opts, "m"); 2503 if (spa_flags & SPA_FLAG_METASLAB_GROUPS) 2504 strcat(opts, "M"); 2505 if (spa_flags & SPA_FLAG_HISTOGRAMS) 2506 strcat(opts, "h"); 2507 2508 v[0].a_type = MDB_TYPE_STRING; 2509 v[0].a_un.a_str = opts; 2510 2511 ret = mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev, 2512 flags, 1, v); 2513 if (ret != DCMD_OK) 2514 return (ret); 2515 2516 if (spa_print_aux(&spa.spa_l2cache, flags, v, "cache") != 0 || 2517 spa_print_aux(&spa.spa_spares, flags, v, "spares") != 0) 2518 return (DCMD_ERR); 2519 2520 return (DCMD_OK); 2521 } 2522 2523 /* 2524 * ::zio 2525 * 2526 * Print a summary of zio_t and all its children. This is intended to display a 2527 * zio tree, and hence we only pick the most important pieces of information for 2528 * the main summary. More detailed information can always be found by doing a 2529 * '::print zio' on the underlying zio_t. The columns we display are: 2530 * 2531 * ADDRESS TYPE STAGE WAITER TIME_ELAPSED 2532 * 2533 * The 'address' column is indented by one space for each depth level as we 2534 * descend down the tree. 2535 */ 2536 2537 #define ZIO_MAXINDENT 7 2538 #define ZIO_MAXWIDTH (sizeof (uintptr_t) * 2 + ZIO_MAXINDENT) 2539 #define ZIO_WALK_SELF 0 2540 #define ZIO_WALK_CHILD 1 2541 #define ZIO_WALK_PARENT 2 2542 2543 typedef struct zio_print_args { 2544 int zpa_current_depth; 2545 int zpa_min_depth; 2546 int zpa_max_depth; 2547 int zpa_type; 2548 uint_t zpa_flags; 2549 } zio_print_args_t; 2550 2551 typedef struct mdb_zio { 2552 enum zio_type io_type; 2553 enum zio_stage io_stage; 2554 uintptr_t io_waiter; 2555 uintptr_t io_spa; 2556 struct { 2557 struct { 2558 uintptr_t list_next; 2559 } list_head; 2560 } io_parent_list; 2561 int io_error; 2562 } mdb_zio_t; 2563 2564 typedef struct mdb_zio_timestamp { 2565 hrtime_t io_timestamp; 2566 } mdb_zio_timestamp_t; 2567 2568 static int zio_child_cb(uintptr_t addr, const void *unknown, void *arg); 2569 2570 static int 2571 zio_print_cb(uintptr_t addr, zio_print_args_t *zpa) 2572 { 2573 mdb_ctf_id_t type_enum, stage_enum; 2574 int indent = zpa->zpa_current_depth; 2575 const char *type, *stage; 2576 uintptr_t laddr; 2577 mdb_zio_t zio; 2578 mdb_zio_timestamp_t zio_timestamp = { 0 }; 2579 2580 if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", addr, 0) == -1) 2581 return (WALK_ERR); 2582 (void) mdb_ctf_vread(&zio_timestamp, ZFS_STRUCT "zio", 2583 "mdb_zio_timestamp_t", addr, MDB_CTF_VREAD_QUIET); 2584 2585 if (indent > ZIO_MAXINDENT) 2586 indent = ZIO_MAXINDENT; 2587 2588 if (mdb_ctf_lookup_by_name("enum zio_type", &type_enum) == -1 || 2589 mdb_ctf_lookup_by_name("enum zio_stage", &stage_enum) == -1) { 2590 mdb_warn("failed to lookup zio enums"); 2591 return (WALK_ERR); 2592 } 2593 2594 if ((type = mdb_ctf_enum_name(type_enum, zio.io_type)) != NULL) 2595 type += sizeof ("ZIO_TYPE_") - 1; 2596 else 2597 type = "?"; 2598 2599 if (zio.io_error == 0) { 2600 stage = mdb_ctf_enum_name(stage_enum, zio.io_stage); 2601 if (stage != NULL) 2602 stage += sizeof ("ZIO_STAGE_") - 1; 2603 else 2604 stage = "?"; 2605 } else { 2606 stage = "FAILED"; 2607 } 2608 2609 if (zpa->zpa_current_depth >= zpa->zpa_min_depth) { 2610 if (zpa->zpa_flags & DCMD_PIPE_OUT) { 2611 mdb_printf("%?p\n", addr); 2612 } else { 2613 mdb_printf("%*s%-*p %-5s %-16s ", indent, "", 2614 ZIO_MAXWIDTH - indent, addr, type, stage); 2615 if (zio.io_waiter != 0) 2616 mdb_printf("%-16lx ", zio.io_waiter); 2617 else 2618 mdb_printf("%-16s ", "-"); 2619 #ifdef _KERNEL 2620 if (zio_timestamp.io_timestamp != 0) { 2621 mdb_printf("%llums", (mdb_gethrtime() - 2622 zio_timestamp.io_timestamp) / 2623 1000000); 2624 } else { 2625 mdb_printf("%-12s ", "-"); 2626 } 2627 #else 2628 mdb_printf("%-12s ", "-"); 2629 #endif 2630 mdb_printf("\n"); 2631 } 2632 } 2633 2634 if (zpa->zpa_current_depth >= zpa->zpa_max_depth) 2635 return (WALK_NEXT); 2636 2637 if (zpa->zpa_type == ZIO_WALK_PARENT) 2638 laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", 2639 "io_parent_list"); 2640 else 2641 laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", 2642 "io_child_list"); 2643 2644 zpa->zpa_current_depth++; 2645 if (mdb_pwalk("list", zio_child_cb, zpa, laddr) != 0) { 2646 mdb_warn("failed to walk zio_t children at %p\n", laddr); 2647 return (WALK_ERR); 2648 } 2649 zpa->zpa_current_depth--; 2650 2651 return (WALK_NEXT); 2652 } 2653 2654 /* ARGSUSED */ 2655 static int 2656 zio_child_cb(uintptr_t addr, const void *unknown, void *arg) 2657 { 2658 zio_link_t zl; 2659 uintptr_t ziop; 2660 zio_print_args_t *zpa = arg; 2661 2662 if (mdb_vread(&zl, sizeof (zl), addr) == -1) { 2663 mdb_warn("failed to read zio_link_t at %p", addr); 2664 return (WALK_ERR); 2665 } 2666 2667 if (zpa->zpa_type == ZIO_WALK_PARENT) 2668 ziop = (uintptr_t)zl.zl_parent; 2669 else 2670 ziop = (uintptr_t)zl.zl_child; 2671 2672 return (zio_print_cb(ziop, zpa)); 2673 } 2674 2675 /* ARGSUSED */ 2676 static int 2677 zio_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2678 { 2679 zio_print_args_t zpa = { 0 }; 2680 2681 if (!(flags & DCMD_ADDRSPEC)) 2682 return (DCMD_USAGE); 2683 2684 if (mdb_getopts(argc, argv, 2685 'r', MDB_OPT_SETBITS, INT_MAX, &zpa.zpa_max_depth, 2686 'c', MDB_OPT_SETBITS, ZIO_WALK_CHILD, &zpa.zpa_type, 2687 'p', MDB_OPT_SETBITS, ZIO_WALK_PARENT, &zpa.zpa_type, 2688 NULL) != argc) 2689 return (DCMD_USAGE); 2690 2691 zpa.zpa_flags = flags; 2692 if (zpa.zpa_max_depth != 0) { 2693 if (zpa.zpa_type == ZIO_WALK_SELF) 2694 zpa.zpa_type = ZIO_WALK_CHILD; 2695 } else if (zpa.zpa_type != ZIO_WALK_SELF) { 2696 zpa.zpa_min_depth = 1; 2697 zpa.zpa_max_depth = 1; 2698 } 2699 2700 if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) { 2701 mdb_printf("%<u>%-*s %-5s %-16s %-16s %-12s%</u>\n", 2702 ZIO_MAXWIDTH, "ADDRESS", "TYPE", "STAGE", "WAITER", 2703 "TIME_ELAPSED"); 2704 } 2705 2706 if (zio_print_cb(addr, &zpa) != WALK_NEXT) 2707 return (DCMD_ERR); 2708 2709 return (DCMD_OK); 2710 } 2711 2712 /* 2713 * [addr]::zio_state 2714 * 2715 * Print a summary of all zio_t structures on the system, or for a particular 2716 * pool. This is equivalent to '::walk zio_root | ::zio'. 2717 */ 2718 /*ARGSUSED*/ 2719 static int 2720 zio_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2721 { 2722 /* 2723 * MDB will remember the last address of the pipeline, so if we don't 2724 * zero this we'll end up trying to walk zio structures for a 2725 * non-existent spa_t. 2726 */ 2727 if (!(flags & DCMD_ADDRSPEC)) 2728 addr = 0; 2729 2730 return (mdb_pwalk_dcmd("zio_root", "zio", argc, argv, addr)); 2731 } 2732 2733 2734 typedef struct mdb_zfs_btree_hdr { 2735 uintptr_t bth_parent; 2736 boolean_t bth_core; 2737 /* 2738 * For both leaf and core nodes, represents the number of elements in 2739 * the node. For core nodes, they will have bth_count + 1 children. 2740 */ 2741 uint32_t bth_count; 2742 } mdb_zfs_btree_hdr_t; 2743 2744 typedef struct mdb_zfs_btree_core { 2745 mdb_zfs_btree_hdr_t btc_hdr; 2746 uintptr_t btc_children[BTREE_CORE_ELEMS + 1]; 2747 uint8_t btc_elems[]; 2748 } mdb_zfs_btree_core_t; 2749 2750 typedef struct mdb_zfs_btree_leaf { 2751 mdb_zfs_btree_hdr_t btl_hdr; 2752 uint8_t btl_elems[]; 2753 } mdb_zfs_btree_leaf_t; 2754 2755 typedef struct mdb_zfs_btree { 2756 uintptr_t bt_root; 2757 size_t bt_elem_size; 2758 } mdb_zfs_btree_t; 2759 2760 typedef struct btree_walk_data { 2761 mdb_zfs_btree_t bwd_btree; 2762 mdb_zfs_btree_hdr_t *bwd_node; 2763 uint64_t bwd_offset; // In units of bt_node_size 2764 } btree_walk_data_t; 2765 2766 static uintptr_t 2767 btree_leftmost_child(uintptr_t addr, mdb_zfs_btree_hdr_t *buf) 2768 { 2769 size_t size = offsetof(zfs_btree_core_t, btc_children) + 2770 sizeof (uintptr_t); 2771 for (;;) { 2772 if (mdb_vread(buf, size, addr) == -1) { 2773 mdb_warn("failed to read at %p\n", addr); 2774 return ((uintptr_t)0ULL); 2775 } 2776 if (!buf->bth_core) 2777 return (addr); 2778 mdb_zfs_btree_core_t *node = (mdb_zfs_btree_core_t *)buf; 2779 addr = node->btc_children[0]; 2780 } 2781 } 2782 2783 static int 2784 btree_walk_step(mdb_walk_state_t *wsp) 2785 { 2786 btree_walk_data_t *bwd = wsp->walk_data; 2787 size_t elem_size = bwd->bwd_btree.bt_elem_size; 2788 if (wsp->walk_addr == 0ULL) 2789 return (WALK_DONE); 2790 2791 if (!bwd->bwd_node->bth_core) { 2792 /* 2793 * For the first element in a leaf node, read in the full 2794 * leaf, since we only had part of it read in before. 2795 */ 2796 if (bwd->bwd_offset == 0) { 2797 if (mdb_vread(bwd->bwd_node, BTREE_LEAF_SIZE, 2798 wsp->walk_addr) == -1) { 2799 mdb_warn("failed to read at %p\n", 2800 wsp->walk_addr); 2801 return (WALK_ERR); 2802 } 2803 } 2804 2805 int status = wsp->walk_callback((uintptr_t)(wsp->walk_addr + 2806 offsetof(mdb_zfs_btree_leaf_t, btl_elems) + 2807 bwd->bwd_offset * elem_size), bwd->bwd_node, 2808 wsp->walk_cbdata); 2809 if (status != WALK_NEXT) 2810 return (status); 2811 bwd->bwd_offset++; 2812 2813 /* Find the next element, if we're at the end of the leaf. */ 2814 while (bwd->bwd_offset == bwd->bwd_node->bth_count) { 2815 uintptr_t par = bwd->bwd_node->bth_parent; 2816 uintptr_t cur = wsp->walk_addr; 2817 wsp->walk_addr = par; 2818 if (par == 0ULL) 2819 return (WALK_NEXT); 2820 2821 size_t size = sizeof (zfs_btree_core_t) + 2822 BTREE_CORE_ELEMS * elem_size; 2823 if (mdb_vread(bwd->bwd_node, size, wsp->walk_addr) == 2824 -1) { 2825 mdb_warn("failed to read at %p\n", 2826 wsp->walk_addr); 2827 return (WALK_ERR); 2828 } 2829 mdb_zfs_btree_core_t *node = 2830 (mdb_zfs_btree_core_t *)bwd->bwd_node; 2831 int i; 2832 for (i = 0; i <= bwd->bwd_node->bth_count; i++) { 2833 if (node->btc_children[i] == cur) 2834 break; 2835 } 2836 if (i > bwd->bwd_node->bth_count) { 2837 mdb_warn("btree parent/child mismatch at " 2838 "%#lx\n", cur); 2839 return (WALK_ERR); 2840 } 2841 bwd->bwd_offset = i; 2842 } 2843 return (WALK_NEXT); 2844 } 2845 2846 if (!bwd->bwd_node->bth_core) { 2847 mdb_warn("Invalid btree node at %#lx\n", wsp->walk_addr); 2848 return (WALK_ERR); 2849 } 2850 mdb_zfs_btree_core_t *node = (mdb_zfs_btree_core_t *)bwd->bwd_node; 2851 int status = wsp->walk_callback((uintptr_t)(wsp->walk_addr + 2852 offsetof(mdb_zfs_btree_core_t, btc_elems) + bwd->bwd_offset * 2853 elem_size), bwd->bwd_node, wsp->walk_cbdata); 2854 if (status != WALK_NEXT) 2855 return (status); 2856 2857 uintptr_t new_child = node->btc_children[bwd->bwd_offset + 1]; 2858 wsp->walk_addr = btree_leftmost_child(new_child, bwd->bwd_node); 2859 if (wsp->walk_addr == 0ULL) 2860 return (WALK_ERR); 2861 2862 bwd->bwd_offset = 0; 2863 return (WALK_NEXT); 2864 } 2865 2866 static int 2867 btree_walk_init(mdb_walk_state_t *wsp) 2868 { 2869 btree_walk_data_t *bwd; 2870 2871 if (wsp->walk_addr == 0ULL) { 2872 mdb_warn("must supply address of zfs_btree_t\n"); 2873 return (WALK_ERR); 2874 } 2875 2876 bwd = mdb_zalloc(sizeof (btree_walk_data_t), UM_SLEEP); 2877 if (mdb_ctf_vread(&bwd->bwd_btree, "zfs_btree_t", "mdb_zfs_btree_t", 2878 wsp->walk_addr, 0) == -1) { 2879 mdb_free(bwd, sizeof (*bwd)); 2880 return (WALK_ERR); 2881 } 2882 2883 if (bwd->bwd_btree.bt_elem_size == 0) { 2884 mdb_warn("invalid or uninitialized btree at %#lx\n", 2885 wsp->walk_addr); 2886 mdb_free(bwd, sizeof (*bwd)); 2887 return (WALK_ERR); 2888 } 2889 2890 size_t size = MAX(BTREE_LEAF_SIZE, sizeof (zfs_btree_core_t) + 2891 BTREE_CORE_ELEMS * bwd->bwd_btree.bt_elem_size); 2892 bwd->bwd_node = mdb_zalloc(size, UM_SLEEP); 2893 2894 uintptr_t node = (uintptr_t)bwd->bwd_btree.bt_root; 2895 if (node == 0ULL) { 2896 wsp->walk_addr = 0ULL; 2897 wsp->walk_data = bwd; 2898 return (WALK_NEXT); 2899 } 2900 node = btree_leftmost_child(node, bwd->bwd_node); 2901 if (node == 0ULL) { 2902 mdb_free(bwd->bwd_node, size); 2903 mdb_free(bwd, sizeof (*bwd)); 2904 return (WALK_ERR); 2905 } 2906 bwd->bwd_offset = 0; 2907 2908 wsp->walk_addr = node; 2909 wsp->walk_data = bwd; 2910 return (WALK_NEXT); 2911 } 2912 2913 static void 2914 btree_walk_fini(mdb_walk_state_t *wsp) 2915 { 2916 btree_walk_data_t *bwd = (btree_walk_data_t *)wsp->walk_data; 2917 2918 if (bwd == NULL) 2919 return; 2920 2921 size_t size = MAX(BTREE_LEAF_SIZE, sizeof (zfs_btree_core_t) + 2922 BTREE_CORE_ELEMS * bwd->bwd_btree.bt_elem_size); 2923 if (bwd->bwd_node != NULL) 2924 mdb_free(bwd->bwd_node, size); 2925 2926 mdb_free(bwd, sizeof (*bwd)); 2927 } 2928 2929 typedef struct mdb_multilist { 2930 uint64_t ml_num_sublists; 2931 uintptr_t ml_sublists; 2932 } mdb_multilist_t; 2933 2934 static int 2935 multilist_walk_step(mdb_walk_state_t *wsp) 2936 { 2937 return (wsp->walk_callback(wsp->walk_addr, wsp->walk_layer, 2938 wsp->walk_cbdata)); 2939 } 2940 2941 static int 2942 multilist_walk_init(mdb_walk_state_t *wsp) 2943 { 2944 mdb_multilist_t ml; 2945 ssize_t sublist_sz; 2946 int list_offset; 2947 size_t i; 2948 2949 if (wsp->walk_addr == 0) { 2950 mdb_warn("must supply address of multilist_t\n"); 2951 return (WALK_ERR); 2952 } 2953 2954 if (mdb_ctf_vread(&ml, "multilist_t", "mdb_multilist_t", 2955 wsp->walk_addr, 0) == -1) { 2956 return (WALK_ERR); 2957 } 2958 2959 if (ml.ml_num_sublists == 0 || ml.ml_sublists == 0) { 2960 mdb_warn("invalid or uninitialized multilist at %#lx\n", 2961 wsp->walk_addr); 2962 return (WALK_ERR); 2963 } 2964 2965 /* mdb_ctf_sizeof_by_name() will print an error for us */ 2966 sublist_sz = mdb_ctf_sizeof_by_name("multilist_sublist_t"); 2967 if (sublist_sz == -1) 2968 return (WALK_ERR); 2969 2970 /* mdb_ctf_offsetof_by_name will print an error for us */ 2971 list_offset = mdb_ctf_offsetof_by_name("multilist_sublist_t", 2972 "mls_list"); 2973 if (list_offset == -1) 2974 return (WALK_ERR); 2975 2976 for (i = 0; i < ml.ml_num_sublists; i++) { 2977 wsp->walk_addr = ml.ml_sublists + i * sublist_sz + list_offset; 2978 2979 if (mdb_layered_walk("list", wsp) == -1) { 2980 mdb_warn("can't walk multilist sublist"); 2981 return (WALK_ERR); 2982 } 2983 } 2984 2985 return (WALK_NEXT); 2986 } 2987 2988 typedef struct mdb_txg_list { 2989 size_t tl_offset; 2990 uintptr_t tl_head[TXG_SIZE]; 2991 } mdb_txg_list_t; 2992 2993 typedef struct txg_list_walk_data { 2994 uintptr_t lw_head[TXG_SIZE]; 2995 int lw_txgoff; 2996 int lw_maxoff; 2997 size_t lw_offset; 2998 void *lw_obj; 2999 } txg_list_walk_data_t; 3000 3001 static int 3002 txg_list_walk_init_common(mdb_walk_state_t *wsp, int txg, int maxoff) 3003 { 3004 txg_list_walk_data_t *lwd; 3005 mdb_txg_list_t list; 3006 int i; 3007 3008 lwd = mdb_alloc(sizeof (txg_list_walk_data_t), UM_SLEEP | UM_GC); 3009 if (mdb_ctf_vread(&list, "txg_list_t", "mdb_txg_list_t", wsp->walk_addr, 3010 0) == -1) { 3011 mdb_warn("failed to read txg_list_t at %#lx", wsp->walk_addr); 3012 return (WALK_ERR); 3013 } 3014 3015 for (i = 0; i < TXG_SIZE; i++) 3016 lwd->lw_head[i] = list.tl_head[i]; 3017 lwd->lw_offset = list.tl_offset; 3018 lwd->lw_obj = mdb_alloc(lwd->lw_offset + sizeof (txg_node_t), 3019 UM_SLEEP | UM_GC); 3020 lwd->lw_txgoff = txg; 3021 lwd->lw_maxoff = maxoff; 3022 3023 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff]; 3024 wsp->walk_data = lwd; 3025 3026 return (WALK_NEXT); 3027 } 3028 3029 static int 3030 txg_list_walk_init(mdb_walk_state_t *wsp) 3031 { 3032 return (txg_list_walk_init_common(wsp, 0, TXG_SIZE-1)); 3033 } 3034 3035 static int 3036 txg_list0_walk_init(mdb_walk_state_t *wsp) 3037 { 3038 return (txg_list_walk_init_common(wsp, 0, 0)); 3039 } 3040 3041 static int 3042 txg_list1_walk_init(mdb_walk_state_t *wsp) 3043 { 3044 return (txg_list_walk_init_common(wsp, 1, 1)); 3045 } 3046 3047 static int 3048 txg_list2_walk_init(mdb_walk_state_t *wsp) 3049 { 3050 return (txg_list_walk_init_common(wsp, 2, 2)); 3051 } 3052 3053 static int 3054 txg_list3_walk_init(mdb_walk_state_t *wsp) 3055 { 3056 return (txg_list_walk_init_common(wsp, 3, 3)); 3057 } 3058 3059 static int 3060 txg_list_walk_step(mdb_walk_state_t *wsp) 3061 { 3062 txg_list_walk_data_t *lwd = wsp->walk_data; 3063 uintptr_t addr; 3064 txg_node_t *node; 3065 int status; 3066 3067 while (wsp->walk_addr == 0 && lwd->lw_txgoff < lwd->lw_maxoff) { 3068 lwd->lw_txgoff++; 3069 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff]; 3070 } 3071 3072 if (wsp->walk_addr == 0) 3073 return (WALK_DONE); 3074 3075 addr = wsp->walk_addr - lwd->lw_offset; 3076 3077 if (mdb_vread(lwd->lw_obj, 3078 lwd->lw_offset + sizeof (txg_node_t), addr) == -1) { 3079 mdb_warn("failed to read list element at %#lx", addr); 3080 return (WALK_ERR); 3081 } 3082 3083 status = wsp->walk_callback(addr, lwd->lw_obj, wsp->walk_cbdata); 3084 node = (txg_node_t *)((uintptr_t)lwd->lw_obj + lwd->lw_offset); 3085 wsp->walk_addr = (uintptr_t)node->tn_next[lwd->lw_txgoff]; 3086 3087 return (status); 3088 } 3089 3090 /* 3091 * ::walk spa 3092 * 3093 * Walk all named spa_t structures in the namespace. This is nothing more than 3094 * a layered avl walk. 3095 */ 3096 static int 3097 spa_walk_init(mdb_walk_state_t *wsp) 3098 { 3099 GElf_Sym sym; 3100 3101 if (wsp->walk_addr != 0) { 3102 mdb_warn("spa walk only supports global walks\n"); 3103 return (WALK_ERR); 3104 } 3105 3106 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "spa_namespace_avl", &sym) == -1) { 3107 mdb_warn("failed to find symbol 'spa_namespace_avl'"); 3108 return (WALK_ERR); 3109 } 3110 3111 wsp->walk_addr = (uintptr_t)sym.st_value; 3112 3113 if (mdb_layered_walk("avl", wsp) == -1) { 3114 mdb_warn("failed to walk 'avl'\n"); 3115 return (WALK_ERR); 3116 } 3117 3118 return (WALK_NEXT); 3119 } 3120 3121 static int 3122 spa_walk_step(mdb_walk_state_t *wsp) 3123 { 3124 return (wsp->walk_callback(wsp->walk_addr, NULL, wsp->walk_cbdata)); 3125 } 3126 3127 /* 3128 * [addr]::walk zio 3129 * 3130 * Walk all active zio_t structures on the system. This is simply a layered 3131 * walk on top of ::walk zio_cache, with the optional ability to limit the 3132 * structures to a particular pool. 3133 */ 3134 static int 3135 zio_walk_init(mdb_walk_state_t *wsp) 3136 { 3137 wsp->walk_data = (void *)wsp->walk_addr; 3138 3139 if (mdb_layered_walk("zio_cache", wsp) == -1) { 3140 mdb_warn("failed to walk 'zio_cache'\n"); 3141 return (WALK_ERR); 3142 } 3143 3144 return (WALK_NEXT); 3145 } 3146 3147 static int 3148 zio_walk_step(mdb_walk_state_t *wsp) 3149 { 3150 mdb_zio_t zio; 3151 uintptr_t spa = (uintptr_t)wsp->walk_data; 3152 3153 if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", 3154 wsp->walk_addr, 0) == -1) 3155 return (WALK_ERR); 3156 3157 if (spa != 0 && spa != zio.io_spa) 3158 return (WALK_NEXT); 3159 3160 return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata)); 3161 } 3162 3163 /* 3164 * [addr]::walk zio_root 3165 * 3166 * Walk only root zio_t structures, optionally for a particular spa_t. 3167 */ 3168 static int 3169 zio_walk_root_step(mdb_walk_state_t *wsp) 3170 { 3171 mdb_zio_t zio; 3172 uintptr_t spa = (uintptr_t)wsp->walk_data; 3173 3174 if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", 3175 wsp->walk_addr, 0) == -1) 3176 return (WALK_ERR); 3177 3178 if (spa != 0 && spa != zio.io_spa) 3179 return (WALK_NEXT); 3180 3181 /* If the parent list is not empty, ignore */ 3182 if (zio.io_parent_list.list_head.list_next != 3183 wsp->walk_addr + 3184 mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", "io_parent_list") + 3185 mdb_ctf_offsetof_by_name("struct list", "list_head")) 3186 return (WALK_NEXT); 3187 3188 return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata)); 3189 } 3190 3191 /* 3192 * ::zfs_blkstats 3193 * 3194 * -v print verbose per-level information 3195 * 3196 */ 3197 static int 3198 zfs_blkstats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3199 { 3200 boolean_t verbose = B_FALSE; 3201 zfs_all_blkstats_t stats; 3202 dmu_object_type_t t; 3203 zfs_blkstat_t *tzb; 3204 uint64_t ditto; 3205 3206 if (mdb_getopts(argc, argv, 3207 'v', MDB_OPT_SETBITS, TRUE, &verbose, 3208 NULL) != argc) 3209 return (DCMD_USAGE); 3210 3211 if (!(flags & DCMD_ADDRSPEC)) 3212 return (DCMD_USAGE); 3213 3214 if (GETMEMB(addr, "spa", spa_dsl_pool, addr) || 3215 GETMEMB(addr, "dsl_pool", dp_blkstats, addr) || 3216 mdb_vread(&stats, sizeof (zfs_all_blkstats_t), addr) == -1) { 3217 mdb_warn("failed to read data at %p;", addr); 3218 mdb_printf("maybe no stats? run \"zpool scrub\" first."); 3219 return (DCMD_ERR); 3220 } 3221 3222 tzb = &stats.zab_type[DN_MAX_LEVELS][DMU_OT_TOTAL]; 3223 if (tzb->zb_gangs != 0) { 3224 mdb_printf("Ganged blocks: %llu\n", 3225 (longlong_t)tzb->zb_gangs); 3226 } 3227 3228 ditto = tzb->zb_ditto_2_of_2_samevdev + tzb->zb_ditto_2_of_3_samevdev + 3229 tzb->zb_ditto_3_of_3_samevdev; 3230 if (ditto != 0) { 3231 mdb_printf("Dittoed blocks on same vdev: %llu\n", 3232 (longlong_t)ditto); 3233 } 3234 3235 mdb_printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" 3236 "\t avg\t comp\t%%Total\tType\n"); 3237 3238 for (t = 0; t <= DMU_OT_TOTAL; t++) { 3239 char csize[MDB_NICENUM_BUFLEN], lsize[MDB_NICENUM_BUFLEN]; 3240 char psize[MDB_NICENUM_BUFLEN], asize[MDB_NICENUM_BUFLEN]; 3241 char avg[MDB_NICENUM_BUFLEN]; 3242 char comp[MDB_NICENUM_BUFLEN], pct[MDB_NICENUM_BUFLEN]; 3243 char typename[64]; 3244 int l; 3245 3246 3247 if (t == DMU_OT_DEFERRED) 3248 strcpy(typename, "deferred free"); 3249 else if (t == DMU_OT_OTHER) 3250 strcpy(typename, "other"); 3251 else if (t == DMU_OT_TOTAL) 3252 strcpy(typename, "Total"); 3253 else if (enum_lookup("enum dmu_object_type", 3254 t, "DMU_OT_", sizeof (typename), typename) == -1) { 3255 mdb_warn("failed to read type name"); 3256 return (DCMD_ERR); 3257 } 3258 3259 if (stats.zab_type[DN_MAX_LEVELS][t].zb_asize == 0) 3260 continue; 3261 3262 for (l = -1; l < DN_MAX_LEVELS; l++) { 3263 int level = (l == -1 ? DN_MAX_LEVELS : l); 3264 zfs_blkstat_t *zb = &stats.zab_type[level][t]; 3265 3266 if (zb->zb_asize == 0) 3267 continue; 3268 3269 /* 3270 * Don't print each level unless requested. 3271 */ 3272 if (!verbose && level != DN_MAX_LEVELS) 3273 continue; 3274 3275 /* 3276 * If all the space is level 0, don't print the 3277 * level 0 separately. 3278 */ 3279 if (level == 0 && zb->zb_asize == 3280 stats.zab_type[DN_MAX_LEVELS][t].zb_asize) 3281 continue; 3282 3283 mdb_nicenum(zb->zb_count, csize); 3284 mdb_nicenum(zb->zb_lsize, lsize); 3285 mdb_nicenum(zb->zb_psize, psize); 3286 mdb_nicenum(zb->zb_asize, asize); 3287 mdb_nicenum(zb->zb_asize / zb->zb_count, avg); 3288 (void) mdb_snprintfrac(comp, MDB_NICENUM_BUFLEN, 3289 zb->zb_lsize, zb->zb_psize, 2); 3290 (void) mdb_snprintfrac(pct, MDB_NICENUM_BUFLEN, 3291 100 * zb->zb_asize, tzb->zb_asize, 2); 3292 3293 mdb_printf("%6s\t%5s\t%5s\t%5s\t%5s" 3294 "\t%5s\t%6s\t", 3295 csize, lsize, psize, asize, avg, comp, pct); 3296 3297 if (level == DN_MAX_LEVELS) 3298 mdb_printf("%s\n", typename); 3299 else 3300 mdb_printf(" L%d %s\n", 3301 level, typename); 3302 } 3303 } 3304 3305 return (DCMD_OK); 3306 } 3307 3308 typedef struct mdb_reference { 3309 uintptr_t ref_holder; 3310 uintptr_t ref_removed; 3311 uint64_t ref_number; 3312 } mdb_reference_t; 3313 3314 /* ARGSUSED */ 3315 static int 3316 reference_cb(uintptr_t addr, const void *ignored, void *arg) 3317 { 3318 mdb_reference_t ref; 3319 boolean_t holder_is_str = B_FALSE; 3320 char holder_str[128]; 3321 boolean_t removed = (boolean_t)arg; 3322 3323 if (mdb_ctf_vread(&ref, "reference_t", "mdb_reference_t", addr, 3324 0) == -1) 3325 return (DCMD_ERR); 3326 3327 if (mdb_readstr(holder_str, sizeof (holder_str), 3328 ref.ref_holder) != -1) 3329 holder_is_str = strisprint(holder_str); 3330 3331 if (removed) 3332 mdb_printf("removed "); 3333 mdb_printf("reference "); 3334 if (ref.ref_number != 1) 3335 mdb_printf("with count=%llu ", ref.ref_number); 3336 mdb_printf("with tag %lx", ref.ref_holder); 3337 if (holder_is_str) 3338 mdb_printf(" \"%s\"", holder_str); 3339 mdb_printf(", held at:\n"); 3340 3341 (void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL); 3342 3343 if (removed) { 3344 mdb_printf("removed at:\n"); 3345 (void) mdb_call_dcmd("whatis", ref.ref_removed, 3346 DCMD_ADDRSPEC, 0, NULL); 3347 } 3348 3349 mdb_printf("\n"); 3350 3351 return (WALK_NEXT); 3352 } 3353 3354 typedef struct mdb_zfs_refcount { 3355 uint64_t rc_count; 3356 } mdb_zfs_refcount_t; 3357 3358 typedef struct mdb_zfs_refcount_removed { 3359 uint_t rc_removed_count; 3360 } mdb_zfs_refcount_removed_t; 3361 3362 typedef struct mdb_zfs_refcount_tracked { 3363 boolean_t rc_tracked; 3364 } mdb_zfs_refcount_tracked_t; 3365 3366 /* ARGSUSED */ 3367 static int 3368 zfs_refcount(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3369 { 3370 mdb_zfs_refcount_t rc; 3371 mdb_zfs_refcount_removed_t rcr; 3372 mdb_zfs_refcount_tracked_t rct; 3373 int off; 3374 boolean_t released = B_FALSE; 3375 3376 if (!(flags & DCMD_ADDRSPEC)) 3377 return (DCMD_USAGE); 3378 3379 if (mdb_getopts(argc, argv, 3380 'r', MDB_OPT_SETBITS, B_TRUE, &released, 3381 NULL) != argc) 3382 return (DCMD_USAGE); 3383 3384 if (mdb_ctf_vread(&rc, "zfs_refcount_t", "mdb_zfs_refcount_t", addr, 3385 0) == -1) 3386 return (DCMD_ERR); 3387 3388 if (mdb_ctf_vread(&rcr, "zfs_refcount_t", "mdb_zfs_refcount_removed_t", 3389 addr, MDB_CTF_VREAD_QUIET) == -1) { 3390 mdb_printf("zfs_refcount_t at %p has %llu holds (untracked)\n", 3391 addr, (longlong_t)rc.rc_count); 3392 return (DCMD_OK); 3393 } 3394 3395 if (mdb_ctf_vread(&rct, "zfs_refcount_t", "mdb_zfs_refcount_tracked_t", 3396 addr, MDB_CTF_VREAD_QUIET) == -1) { 3397 /* If this is an old target, it might be tracked. */ 3398 rct.rc_tracked = B_TRUE; 3399 } 3400 3401 mdb_printf("zfs_refcount_t at %p has %llu current holds, " 3402 "%llu recently released holds\n", 3403 addr, (longlong_t)rc.rc_count, (longlong_t)rcr.rc_removed_count); 3404 3405 if (rct.rc_tracked && rc.rc_count > 0) 3406 mdb_printf("current holds:\n"); 3407 off = mdb_ctf_offsetof_by_name("zfs_refcount_t", "rc_tree"); 3408 if (off == -1) 3409 return (DCMD_ERR); 3410 mdb_pwalk("avl", reference_cb, (void *)B_FALSE, addr + off); 3411 3412 if (released && rcr.rc_removed_count > 0) { 3413 mdb_printf("released holds:\n"); 3414 3415 off = mdb_ctf_offsetof_by_name("zfs_refcount_t", "rc_removed"); 3416 if (off == -1) 3417 return (DCMD_ERR); 3418 mdb_pwalk("list", reference_cb, (void *)B_TRUE, addr + off); 3419 } 3420 3421 return (DCMD_OK); 3422 } 3423 3424 /* ARGSUSED */ 3425 static int 3426 sa_attr_table(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3427 { 3428 sa_attr_table_t *table; 3429 sa_os_t sa_os; 3430 char *name; 3431 int i; 3432 3433 if (mdb_vread(&sa_os, sizeof (sa_os_t), addr) == -1) { 3434 mdb_warn("failed to read sa_os at %p", addr); 3435 return (DCMD_ERR); 3436 } 3437 3438 table = mdb_alloc(sizeof (sa_attr_table_t) * sa_os.sa_num_attrs, 3439 UM_SLEEP | UM_GC); 3440 name = mdb_alloc(MAXPATHLEN, UM_SLEEP | UM_GC); 3441 3442 if (mdb_vread(table, sizeof (sa_attr_table_t) * sa_os.sa_num_attrs, 3443 (uintptr_t)sa_os.sa_attr_table) == -1) { 3444 mdb_warn("failed to read sa_os at %p", addr); 3445 return (DCMD_ERR); 3446 } 3447 3448 mdb_printf("%<u>%-10s %-10s %-10s %-10s %s%</u>\n", 3449 "ATTR ID", "REGISTERED", "LENGTH", "BSWAP", "NAME"); 3450 for (i = 0; i != sa_os.sa_num_attrs; i++) { 3451 mdb_readstr(name, MAXPATHLEN, (uintptr_t)table[i].sa_name); 3452 mdb_printf("%5x %8x %8x %8x %-s\n", 3453 (int)table[i].sa_attr, (int)table[i].sa_registered, 3454 (int)table[i].sa_length, table[i].sa_byteswap, name); 3455 } 3456 3457 return (DCMD_OK); 3458 } 3459 3460 static int 3461 sa_get_off_table(uintptr_t addr, uint32_t **off_tab, int attr_count) 3462 { 3463 uintptr_t idx_table; 3464 3465 if (GETMEMB(addr, "sa_idx_tab", sa_idx_tab, idx_table)) { 3466 mdb_printf("can't find offset table in sa_idx_tab\n"); 3467 return (-1); 3468 } 3469 3470 *off_tab = mdb_alloc(attr_count * sizeof (uint32_t), 3471 UM_SLEEP | UM_GC); 3472 3473 if (mdb_vread(*off_tab, 3474 attr_count * sizeof (uint32_t), idx_table) == -1) { 3475 mdb_warn("failed to attribute offset table %p", idx_table); 3476 return (-1); 3477 } 3478 3479 return (DCMD_OK); 3480 } 3481 3482 /*ARGSUSED*/ 3483 static int 3484 sa_attr_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3485 { 3486 uint32_t *offset_tab; 3487 int attr_count; 3488 uint64_t attr_id; 3489 uintptr_t attr_addr; 3490 uintptr_t bonus_tab, spill_tab; 3491 uintptr_t db_bonus, db_spill; 3492 uintptr_t os, os_sa; 3493 uintptr_t db_data; 3494 3495 if (argc != 1) 3496 return (DCMD_USAGE); 3497 3498 if (argv[0].a_type == MDB_TYPE_STRING) 3499 attr_id = mdb_strtoull(argv[0].a_un.a_str); 3500 else 3501 return (DCMD_USAGE); 3502 3503 if (GETMEMB(addr, "sa_handle", sa_bonus_tab, bonus_tab) || 3504 GETMEMB(addr, "sa_handle", sa_spill_tab, spill_tab) || 3505 GETMEMB(addr, "sa_handle", sa_os, os) || 3506 GETMEMB(addr, "sa_handle", sa_bonus, db_bonus) || 3507 GETMEMB(addr, "sa_handle", sa_spill, db_spill)) { 3508 mdb_printf("Can't find necessary information in sa_handle " 3509 "in sa_handle\n"); 3510 return (DCMD_ERR); 3511 } 3512 3513 if (GETMEMB(os, "objset", os_sa, os_sa)) { 3514 mdb_printf("Can't find os_sa in objset\n"); 3515 return (DCMD_ERR); 3516 } 3517 3518 if (GETMEMB(os_sa, "sa_os", sa_num_attrs, attr_count)) { 3519 mdb_printf("Can't find sa_num_attrs\n"); 3520 return (DCMD_ERR); 3521 } 3522 3523 if (attr_id > attr_count) { 3524 mdb_printf("attribute id number is out of range\n"); 3525 return (DCMD_ERR); 3526 } 3527 3528 if (bonus_tab) { 3529 if (sa_get_off_table(bonus_tab, &offset_tab, 3530 attr_count) == -1) { 3531 return (DCMD_ERR); 3532 } 3533 3534 if (GETMEMB(db_bonus, "dmu_buf", db_data, db_data)) { 3535 mdb_printf("can't find db_data in bonus dbuf\n"); 3536 return (DCMD_ERR); 3537 } 3538 } 3539 3540 if (bonus_tab && !TOC_ATTR_PRESENT(offset_tab[attr_id]) && 3541 spill_tab == 0) { 3542 mdb_printf("Attribute does not exist\n"); 3543 return (DCMD_ERR); 3544 } else if (!TOC_ATTR_PRESENT(offset_tab[attr_id]) && spill_tab) { 3545 if (sa_get_off_table(spill_tab, &offset_tab, 3546 attr_count) == -1) { 3547 return (DCMD_ERR); 3548 } 3549 if (GETMEMB(db_spill, "dmu_buf", db_data, db_data)) { 3550 mdb_printf("can't find db_data in spill dbuf\n"); 3551 return (DCMD_ERR); 3552 } 3553 if (!TOC_ATTR_PRESENT(offset_tab[attr_id])) { 3554 mdb_printf("Attribute does not exist\n"); 3555 return (DCMD_ERR); 3556 } 3557 } 3558 attr_addr = db_data + TOC_OFF(offset_tab[attr_id]); 3559 mdb_printf("%p\n", attr_addr); 3560 return (DCMD_OK); 3561 } 3562 3563 /* ARGSUSED */ 3564 static int 3565 zfs_ace_print_common(uintptr_t addr, uint_t flags, 3566 uint64_t id, uint32_t access_mask, uint16_t ace_flags, 3567 uint16_t ace_type, int verbose) 3568 { 3569 if (DCMD_HDRSPEC(flags) && !verbose) 3570 mdb_printf("%<u>%-?s %-8s %-8s %-8s %s%</u>\n", 3571 "ADDR", "FLAGS", "MASK", "TYPE", "ID"); 3572 3573 if (!verbose) { 3574 mdb_printf("%0?p %-8x %-8x %-8x %-llx\n", addr, 3575 ace_flags, access_mask, ace_type, id); 3576 return (DCMD_OK); 3577 } 3578 3579 switch (ace_flags & ACE_TYPE_FLAGS) { 3580 case ACE_OWNER: 3581 mdb_printf("owner@:"); 3582 break; 3583 case (ACE_IDENTIFIER_GROUP | ACE_GROUP): 3584 mdb_printf("group@:"); 3585 break; 3586 case ACE_EVERYONE: 3587 mdb_printf("everyone@:"); 3588 break; 3589 case ACE_IDENTIFIER_GROUP: 3590 mdb_printf("group:%llx:", (u_longlong_t)id); 3591 break; 3592 case 0: /* User entry */ 3593 mdb_printf("user:%llx:", (u_longlong_t)id); 3594 break; 3595 } 3596 3597 /* print out permission mask */ 3598 if (access_mask & ACE_READ_DATA) 3599 mdb_printf("r"); 3600 else 3601 mdb_printf("-"); 3602 if (access_mask & ACE_WRITE_DATA) 3603 mdb_printf("w"); 3604 else 3605 mdb_printf("-"); 3606 if (access_mask & ACE_EXECUTE) 3607 mdb_printf("x"); 3608 else 3609 mdb_printf("-"); 3610 if (access_mask & ACE_APPEND_DATA) 3611 mdb_printf("p"); 3612 else 3613 mdb_printf("-"); 3614 if (access_mask & ACE_DELETE) 3615 mdb_printf("d"); 3616 else 3617 mdb_printf("-"); 3618 if (access_mask & ACE_DELETE_CHILD) 3619 mdb_printf("D"); 3620 else 3621 mdb_printf("-"); 3622 if (access_mask & ACE_READ_ATTRIBUTES) 3623 mdb_printf("a"); 3624 else 3625 mdb_printf("-"); 3626 if (access_mask & ACE_WRITE_ATTRIBUTES) 3627 mdb_printf("A"); 3628 else 3629 mdb_printf("-"); 3630 if (access_mask & ACE_READ_NAMED_ATTRS) 3631 mdb_printf("R"); 3632 else 3633 mdb_printf("-"); 3634 if (access_mask & ACE_WRITE_NAMED_ATTRS) 3635 mdb_printf("W"); 3636 else 3637 mdb_printf("-"); 3638 if (access_mask & ACE_READ_ACL) 3639 mdb_printf("c"); 3640 else 3641 mdb_printf("-"); 3642 if (access_mask & ACE_WRITE_ACL) 3643 mdb_printf("C"); 3644 else 3645 mdb_printf("-"); 3646 if (access_mask & ACE_WRITE_OWNER) 3647 mdb_printf("o"); 3648 else 3649 mdb_printf("-"); 3650 if (access_mask & ACE_SYNCHRONIZE) 3651 mdb_printf("s"); 3652 else 3653 mdb_printf("-"); 3654 3655 mdb_printf(":"); 3656 3657 /* Print out inheritance flags */ 3658 if (ace_flags & ACE_FILE_INHERIT_ACE) 3659 mdb_printf("f"); 3660 else 3661 mdb_printf("-"); 3662 if (ace_flags & ACE_DIRECTORY_INHERIT_ACE) 3663 mdb_printf("d"); 3664 else 3665 mdb_printf("-"); 3666 if (ace_flags & ACE_INHERIT_ONLY_ACE) 3667 mdb_printf("i"); 3668 else 3669 mdb_printf("-"); 3670 if (ace_flags & ACE_NO_PROPAGATE_INHERIT_ACE) 3671 mdb_printf("n"); 3672 else 3673 mdb_printf("-"); 3674 if (ace_flags & ACE_SUCCESSFUL_ACCESS_ACE_FLAG) 3675 mdb_printf("S"); 3676 else 3677 mdb_printf("-"); 3678 if (ace_flags & ACE_FAILED_ACCESS_ACE_FLAG) 3679 mdb_printf("F"); 3680 else 3681 mdb_printf("-"); 3682 if (ace_flags & ACE_INHERITED_ACE) 3683 mdb_printf("I"); 3684 else 3685 mdb_printf("-"); 3686 3687 switch (ace_type) { 3688 case ACE_ACCESS_ALLOWED_ACE_TYPE: 3689 mdb_printf(":allow\n"); 3690 break; 3691 case ACE_ACCESS_DENIED_ACE_TYPE: 3692 mdb_printf(":deny\n"); 3693 break; 3694 case ACE_SYSTEM_AUDIT_ACE_TYPE: 3695 mdb_printf(":audit\n"); 3696 break; 3697 case ACE_SYSTEM_ALARM_ACE_TYPE: 3698 mdb_printf(":alarm\n"); 3699 break; 3700 default: 3701 mdb_printf(":?\n"); 3702 } 3703 return (DCMD_OK); 3704 } 3705 3706 /* ARGSUSED */ 3707 static int 3708 zfs_ace_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3709 { 3710 zfs_ace_t zace; 3711 int verbose = FALSE; 3712 uint64_t id; 3713 3714 if (!(flags & DCMD_ADDRSPEC)) 3715 return (DCMD_USAGE); 3716 3717 if (mdb_getopts(argc, argv, 3718 'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc) 3719 return (DCMD_USAGE); 3720 3721 if (mdb_vread(&zace, sizeof (zfs_ace_t), addr) == -1) { 3722 mdb_warn("failed to read zfs_ace_t"); 3723 return (DCMD_ERR); 3724 } 3725 3726 if ((zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == 0 || 3727 (zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP) 3728 id = zace.z_fuid; 3729 else 3730 id = -1; 3731 3732 return (zfs_ace_print_common(addr, flags, id, zace.z_hdr.z_access_mask, 3733 zace.z_hdr.z_flags, zace.z_hdr.z_type, verbose)); 3734 } 3735 3736 /* ARGSUSED */ 3737 static int 3738 zfs_ace0_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3739 { 3740 ace_t ace; 3741 uint64_t id; 3742 int verbose = FALSE; 3743 3744 if (!(flags & DCMD_ADDRSPEC)) 3745 return (DCMD_USAGE); 3746 3747 if (mdb_getopts(argc, argv, 3748 'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc) 3749 return (DCMD_USAGE); 3750 3751 if (mdb_vread(&ace, sizeof (ace_t), addr) == -1) { 3752 mdb_warn("failed to read ace_t"); 3753 return (DCMD_ERR); 3754 } 3755 3756 if ((ace.a_flags & ACE_TYPE_FLAGS) == 0 || 3757 (ace.a_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP) 3758 id = ace.a_who; 3759 else 3760 id = -1; 3761 3762 return (zfs_ace_print_common(addr, flags, id, ace.a_access_mask, 3763 ace.a_flags, ace.a_type, verbose)); 3764 } 3765 3766 typedef struct acl_dump_args { 3767 int a_argc; 3768 const mdb_arg_t *a_argv; 3769 uint16_t a_version; 3770 int a_flags; 3771 } acl_dump_args_t; 3772 3773 /* ARGSUSED */ 3774 static int 3775 acl_aces_cb(uintptr_t addr, const void *unknown, void *arg) 3776 { 3777 acl_dump_args_t *acl_args = (acl_dump_args_t *)arg; 3778 3779 if (acl_args->a_version == 1) { 3780 if (mdb_call_dcmd("zfs_ace", addr, 3781 DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc, 3782 acl_args->a_argv) != DCMD_OK) { 3783 return (WALK_ERR); 3784 } 3785 } else { 3786 if (mdb_call_dcmd("zfs_ace0", addr, 3787 DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc, 3788 acl_args->a_argv) != DCMD_OK) { 3789 return (WALK_ERR); 3790 } 3791 } 3792 acl_args->a_flags = DCMD_LOOP; 3793 return (WALK_NEXT); 3794 } 3795 3796 /* ARGSUSED */ 3797 static int 3798 acl_cb(uintptr_t addr, const void *unknown, void *arg) 3799 { 3800 acl_dump_args_t *acl_args = (acl_dump_args_t *)arg; 3801 3802 if (acl_args->a_version == 1) { 3803 if (mdb_pwalk("zfs_acl_node_aces", acl_aces_cb, 3804 arg, addr) != 0) { 3805 mdb_warn("can't walk ACEs"); 3806 return (DCMD_ERR); 3807 } 3808 } else { 3809 if (mdb_pwalk("zfs_acl_node_aces0", acl_aces_cb, 3810 arg, addr) != 0) { 3811 mdb_warn("can't walk ACEs"); 3812 return (DCMD_ERR); 3813 } 3814 } 3815 return (WALK_NEXT); 3816 } 3817 3818 /* ARGSUSED */ 3819 static int 3820 zfs_acl_dump(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3821 { 3822 zfs_acl_t zacl; 3823 int verbose = FALSE; 3824 acl_dump_args_t acl_args; 3825 3826 if (!(flags & DCMD_ADDRSPEC)) 3827 return (DCMD_USAGE); 3828 3829 if (mdb_getopts(argc, argv, 3830 'v', MDB_OPT_SETBITS, TRUE, &verbose, NULL) != argc) 3831 return (DCMD_USAGE); 3832 3833 if (mdb_vread(&zacl, sizeof (zfs_acl_t), addr) == -1) { 3834 mdb_warn("failed to read zfs_acl_t"); 3835 return (DCMD_ERR); 3836 } 3837 3838 acl_args.a_argc = argc; 3839 acl_args.a_argv = argv; 3840 acl_args.a_version = zacl.z_version; 3841 acl_args.a_flags = DCMD_LOOPFIRST; 3842 3843 if (mdb_pwalk("zfs_acl_node", acl_cb, &acl_args, addr) != 0) { 3844 mdb_warn("can't walk ACL"); 3845 return (DCMD_ERR); 3846 } 3847 3848 return (DCMD_OK); 3849 } 3850 3851 /* ARGSUSED */ 3852 static int 3853 zfs_acl_node_walk_init(mdb_walk_state_t *wsp) 3854 { 3855 if (wsp->walk_addr == 0) { 3856 mdb_warn("must supply address of zfs_acl_node_t\n"); 3857 return (WALK_ERR); 3858 } 3859 3860 wsp->walk_addr += 3861 mdb_ctf_offsetof_by_name(ZFS_STRUCT "zfs_acl", "z_acl"); 3862 3863 if (mdb_layered_walk("list", wsp) == -1) { 3864 mdb_warn("failed to walk 'list'\n"); 3865 return (WALK_ERR); 3866 } 3867 3868 return (WALK_NEXT); 3869 } 3870 3871 static int 3872 zfs_acl_node_walk_step(mdb_walk_state_t *wsp) 3873 { 3874 zfs_acl_node_t aclnode; 3875 3876 if (mdb_vread(&aclnode, sizeof (zfs_acl_node_t), 3877 wsp->walk_addr) == -1) { 3878 mdb_warn("failed to read zfs_acl_node at %p", wsp->walk_addr); 3879 return (WALK_ERR); 3880 } 3881 3882 return (wsp->walk_callback(wsp->walk_addr, &aclnode, wsp->walk_cbdata)); 3883 } 3884 3885 typedef struct ace_walk_data { 3886 int ace_count; 3887 int ace_version; 3888 } ace_walk_data_t; 3889 3890 static int 3891 zfs_aces_walk_init_common(mdb_walk_state_t *wsp, int version, 3892 int ace_count, uintptr_t ace_data) 3893 { 3894 ace_walk_data_t *ace_walk_data; 3895 3896 if (wsp->walk_addr == 0) { 3897 mdb_warn("must supply address of zfs_acl_node_t\n"); 3898 return (WALK_ERR); 3899 } 3900 3901 ace_walk_data = mdb_alloc(sizeof (ace_walk_data_t), UM_SLEEP | UM_GC); 3902 3903 ace_walk_data->ace_count = ace_count; 3904 ace_walk_data->ace_version = version; 3905 3906 wsp->walk_addr = ace_data; 3907 wsp->walk_data = ace_walk_data; 3908 3909 return (WALK_NEXT); 3910 } 3911 3912 static int 3913 zfs_acl_node_aces_walk_init_common(mdb_walk_state_t *wsp, int version) 3914 { 3915 static int gotid; 3916 static mdb_ctf_id_t acl_id; 3917 int z_ace_count; 3918 uintptr_t z_acldata; 3919 3920 if (!gotid) { 3921 if (mdb_ctf_lookup_by_name("struct zfs_acl_node", 3922 &acl_id) == -1) { 3923 mdb_warn("couldn't find struct zfs_acl_node"); 3924 return (DCMD_ERR); 3925 } 3926 gotid = TRUE; 3927 } 3928 3929 if (GETMEMBID(wsp->walk_addr, &acl_id, z_ace_count, z_ace_count)) { 3930 return (DCMD_ERR); 3931 } 3932 if (GETMEMBID(wsp->walk_addr, &acl_id, z_acldata, z_acldata)) { 3933 return (DCMD_ERR); 3934 } 3935 3936 return (zfs_aces_walk_init_common(wsp, version, 3937 z_ace_count, z_acldata)); 3938 } 3939 3940 /* ARGSUSED */ 3941 static int 3942 zfs_acl_node_aces_walk_init(mdb_walk_state_t *wsp) 3943 { 3944 return (zfs_acl_node_aces_walk_init_common(wsp, 1)); 3945 } 3946 3947 /* ARGSUSED */ 3948 static int 3949 zfs_acl_node_aces0_walk_init(mdb_walk_state_t *wsp) 3950 { 3951 return (zfs_acl_node_aces_walk_init_common(wsp, 0)); 3952 } 3953 3954 static int 3955 zfs_aces_walk_step(mdb_walk_state_t *wsp) 3956 { 3957 ace_walk_data_t *ace_data = wsp->walk_data; 3958 zfs_ace_t zace; 3959 ace_t *acep; 3960 int status; 3961 int entry_type; 3962 int allow_type; 3963 uintptr_t ptr; 3964 3965 if (ace_data->ace_count == 0) 3966 return (WALK_DONE); 3967 3968 if (mdb_vread(&zace, sizeof (zfs_ace_t), wsp->walk_addr) == -1) { 3969 mdb_warn("failed to read zfs_ace_t at %#lx", 3970 wsp->walk_addr); 3971 return (WALK_ERR); 3972 } 3973 3974 switch (ace_data->ace_version) { 3975 case 0: 3976 acep = (ace_t *)&zace; 3977 entry_type = acep->a_flags & ACE_TYPE_FLAGS; 3978 allow_type = acep->a_type; 3979 break; 3980 case 1: 3981 entry_type = zace.z_hdr.z_flags & ACE_TYPE_FLAGS; 3982 allow_type = zace.z_hdr.z_type; 3983 break; 3984 default: 3985 return (WALK_ERR); 3986 } 3987 3988 ptr = (uintptr_t)wsp->walk_addr; 3989 switch (entry_type) { 3990 case ACE_OWNER: 3991 case ACE_EVERYONE: 3992 case (ACE_IDENTIFIER_GROUP | ACE_GROUP): 3993 ptr += ace_data->ace_version == 0 ? 3994 sizeof (ace_t) : sizeof (zfs_ace_hdr_t); 3995 break; 3996 case ACE_IDENTIFIER_GROUP: 3997 default: 3998 switch (allow_type) { 3999 case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: 4000 case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: 4001 case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: 4002 case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: 4003 ptr += ace_data->ace_version == 0 ? 4004 sizeof (ace_t) : sizeof (zfs_object_ace_t); 4005 break; 4006 default: 4007 ptr += ace_data->ace_version == 0 ? 4008 sizeof (ace_t) : sizeof (zfs_ace_t); 4009 break; 4010 } 4011 } 4012 4013 ace_data->ace_count--; 4014 status = wsp->walk_callback(wsp->walk_addr, 4015 (void *)(uintptr_t)&zace, wsp->walk_cbdata); 4016 4017 wsp->walk_addr = ptr; 4018 return (status); 4019 } 4020 4021 typedef struct mdb_zfs_rrwlock { 4022 uintptr_t rr_writer; 4023 boolean_t rr_writer_wanted; 4024 } mdb_zfs_rrwlock_t; 4025 4026 static uint_t rrw_key; 4027 4028 /* ARGSUSED */ 4029 static int 4030 rrwlock(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4031 { 4032 mdb_zfs_rrwlock_t rrw; 4033 4034 if (rrw_key == 0) { 4035 if (mdb_ctf_readsym(&rrw_key, "uint_t", "rrw_tsd_key", 0) == -1) 4036 return (DCMD_ERR); 4037 } 4038 4039 if (mdb_ctf_vread(&rrw, "rrwlock_t", "mdb_zfs_rrwlock_t", addr, 4040 0) == -1) 4041 return (DCMD_ERR); 4042 4043 if (rrw.rr_writer != 0) { 4044 mdb_printf("write lock held by thread %lx\n", rrw.rr_writer); 4045 return (DCMD_OK); 4046 } 4047 4048 if (rrw.rr_writer_wanted) { 4049 mdb_printf("writer wanted\n"); 4050 } 4051 4052 mdb_printf("anonymous references:\n"); 4053 (void) mdb_call_dcmd("zfs_refcount", addr + 4054 mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_anon_rcount"), 4055 DCMD_ADDRSPEC, 0, NULL); 4056 4057 mdb_printf("linked references:\n"); 4058 (void) mdb_call_dcmd("zfs_refcount", addr + 4059 mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_linked_rcount"), 4060 DCMD_ADDRSPEC, 0, NULL); 4061 4062 /* 4063 * XXX This should find references from 4064 * "::walk thread | ::tsd -v <rrw_key>", but there is no support 4065 * for programmatic consumption of dcmds, so this would be 4066 * difficult, potentially requiring reimplementing ::tsd (both 4067 * user and kernel versions) in this MDB module. 4068 */ 4069 4070 return (DCMD_OK); 4071 } 4072 4073 typedef struct mdb_arc_buf_hdr_t { 4074 uint16_t b_psize; 4075 uint16_t b_lsize; 4076 struct { 4077 uint32_t b_bufcnt; 4078 uintptr_t b_state; 4079 } b_l1hdr; 4080 } mdb_arc_buf_hdr_t; 4081 4082 enum arc_cflags { 4083 ARC_CFLAG_VERBOSE = 1 << 0, 4084 ARC_CFLAG_ANON = 1 << 1, 4085 ARC_CFLAG_MRU = 1 << 2, 4086 ARC_CFLAG_MFU = 1 << 3, 4087 ARC_CFLAG_BUFS = 1 << 4, 4088 }; 4089 4090 typedef struct arc_compression_stats_data { 4091 GElf_Sym anon_sym; /* ARC_anon symbol */ 4092 GElf_Sym mru_sym; /* ARC_mru symbol */ 4093 GElf_Sym mrug_sym; /* ARC_mru_ghost symbol */ 4094 GElf_Sym mfu_sym; /* ARC_mfu symbol */ 4095 GElf_Sym mfug_sym; /* ARC_mfu_ghost symbol */ 4096 GElf_Sym l2c_sym; /* ARC_l2c_only symbol */ 4097 uint64_t *anon_c_hist; /* histogram of compressed sizes in anon */ 4098 uint64_t *anon_u_hist; /* histogram of uncompressed sizes in anon */ 4099 uint64_t *anon_bufs; /* histogram of buffer counts in anon state */ 4100 uint64_t *mru_c_hist; /* histogram of compressed sizes in mru */ 4101 uint64_t *mru_u_hist; /* histogram of uncompressed sizes in mru */ 4102 uint64_t *mru_bufs; /* histogram of buffer counts in mru */ 4103 uint64_t *mfu_c_hist; /* histogram of compressed sizes in mfu */ 4104 uint64_t *mfu_u_hist; /* histogram of uncompressed sizes in mfu */ 4105 uint64_t *mfu_bufs; /* histogram of buffer counts in mfu */ 4106 uint64_t *all_c_hist; /* histogram of compressed anon + mru + mfu */ 4107 uint64_t *all_u_hist; /* histogram of uncompressed anon + mru + mfu */ 4108 uint64_t *all_bufs; /* histogram of buffer counts in all states */ 4109 int arc_cflags; /* arc compression flags, specified by user */ 4110 int hist_nbuckets; /* number of buckets in each histogram */ 4111 4112 ulong_t l1hdr_off; /* offset of b_l1hdr in arc_buf_hdr_t */ 4113 } arc_compression_stats_data_t; 4114 4115 int 4116 highbit64(uint64_t i) 4117 { 4118 int h = 1; 4119 4120 if (i == 0) 4121 return (0); 4122 if (i & 0xffffffff00000000ULL) { 4123 h += 32; i >>= 32; 4124 } 4125 if (i & 0xffff0000) { 4126 h += 16; i >>= 16; 4127 } 4128 if (i & 0xff00) { 4129 h += 8; i >>= 8; 4130 } 4131 if (i & 0xf0) { 4132 h += 4; i >>= 4; 4133 } 4134 if (i & 0xc) { 4135 h += 2; i >>= 2; 4136 } 4137 if (i & 0x2) { 4138 h += 1; 4139 } 4140 return (h); 4141 } 4142 4143 /* ARGSUSED */ 4144 static int 4145 arc_compression_stats_cb(uintptr_t addr, const void *unknown, void *arg) 4146 { 4147 arc_compression_stats_data_t *data = arg; 4148 arc_flags_t flags; 4149 mdb_arc_buf_hdr_t hdr; 4150 int cbucket, ubucket, bufcnt; 4151 4152 /* 4153 * mdb_ctf_vread() uses the sizeof the target type (e.g. 4154 * sizeof (arc_buf_hdr_t) in the target) to read in the entire contents 4155 * of the target type into a buffer and then copy the values of the 4156 * desired members from the mdb typename (e.g. mdb_arc_buf_hdr_t) from 4157 * this buffer. Unfortunately, the way arc_buf_hdr_t is used by zfs, 4158 * the actual size allocated by the kernel for arc_buf_hdr_t is often 4159 * smaller than `sizeof (arc_buf_hdr_t)` (see the definitions of 4160 * l1arc_buf_hdr_t and arc_buf_hdr_t in 4161 * usr/src/uts/common/fs/zfs/arc.c). Attempting to read the entire 4162 * contents of arc_buf_hdr_t from the target (as mdb_ctf_vread() does) 4163 * can cause an error if the allocated size is indeed smaller--it's 4164 * possible that the 'missing' trailing members of arc_buf_hdr_t 4165 * (l1arc_buf_hdr_t and/or arc_buf_hdr_crypt_t) may fall into unmapped 4166 * memory. 4167 * 4168 * We use the GETMEMB macro instead which performs an mdb_vread() 4169 * but only reads enough of the target to retrieve the desired struct 4170 * member instead of the entire struct. 4171 */ 4172 if (GETMEMB(addr, "arc_buf_hdr", b_flags, flags) == -1) 4173 return (WALK_ERR); 4174 4175 /* 4176 * We only count headers that have data loaded in the kernel. 4177 * This means an L1 header must be present as well as the data 4178 * that corresponds to the L1 header. If there's no L1 header, 4179 * we can skip the arc_buf_hdr_t completely. If it's present, we 4180 * must look at the ARC state (b_l1hdr.b_state) to determine if 4181 * the data is present. 4182 */ 4183 if ((flags & ARC_FLAG_HAS_L1HDR) == 0) 4184 return (WALK_NEXT); 4185 4186 if (GETMEMB(addr, "arc_buf_hdr", b_psize, hdr.b_psize) == -1 || 4187 GETMEMB(addr, "arc_buf_hdr", b_lsize, hdr.b_lsize) == -1 || 4188 GETMEMB(addr + data->l1hdr_off, "l1arc_buf_hdr", b_bufcnt, 4189 hdr.b_l1hdr.b_bufcnt) == -1 || 4190 GETMEMB(addr + data->l1hdr_off, "l1arc_buf_hdr", b_state, 4191 hdr.b_l1hdr.b_state) == -1) 4192 return (WALK_ERR); 4193 4194 /* 4195 * Headers in the ghost states, or the l2c_only state don't have 4196 * arc buffers linked off of them. Thus, their compressed size 4197 * is meaningless, so we skip these from the stats. 4198 */ 4199 if (hdr.b_l1hdr.b_state == data->mrug_sym.st_value || 4200 hdr.b_l1hdr.b_state == data->mfug_sym.st_value || 4201 hdr.b_l1hdr.b_state == data->l2c_sym.st_value) { 4202 return (WALK_NEXT); 4203 } 4204 4205 /* 4206 * The physical size (compressed) and logical size 4207 * (uncompressed) are in units of SPA_MINBLOCKSIZE. By default, 4208 * we use the log2 of this value (rounded down to the nearest 4209 * integer) to determine the bucket to assign this header to. 4210 * Thus, the histogram is logarithmic with respect to the size 4211 * of the header. For example, the following is a mapping of the 4212 * bucket numbers and the range of header sizes they correspond to: 4213 * 4214 * 0: 0 byte headers 4215 * 1: 512 byte headers 4216 * 2: [1024 - 2048) byte headers 4217 * 3: [2048 - 4096) byte headers 4218 * 4: [4096 - 8192) byte headers 4219 * 5: [8192 - 16394) byte headers 4220 * 6: [16384 - 32768) byte headers 4221 * 7: [32768 - 65536) byte headers 4222 * 8: [65536 - 131072) byte headers 4223 * 9: 131072 byte headers 4224 * 4225 * If the ARC_CFLAG_VERBOSE flag was specified, we use the 4226 * physical and logical sizes directly. Thus, the histogram will 4227 * no longer be logarithmic; instead it will be linear with 4228 * respect to the size of the header. The following is a mapping 4229 * of the first many bucket numbers and the header size they 4230 * correspond to: 4231 * 4232 * 0: 0 byte headers 4233 * 1: 512 byte headers 4234 * 2: 1024 byte headers 4235 * 3: 1536 byte headers 4236 * 4: 2048 byte headers 4237 * 5: 2560 byte headers 4238 * 6: 3072 byte headers 4239 * 4240 * And so on. Keep in mind that a range of sizes isn't used in 4241 * the case of linear scale because the headers can only 4242 * increment or decrement in sizes of 512 bytes. So, it's not 4243 * possible for a header to be sized in between whats listed 4244 * above. 4245 * 4246 * Also, the above mapping values were calculated assuming a 4247 * SPA_MINBLOCKSHIFT of 512 bytes and a SPA_MAXBLOCKSIZE of 128K. 4248 */ 4249 4250 if (data->arc_cflags & ARC_CFLAG_VERBOSE) { 4251 cbucket = hdr.b_psize; 4252 ubucket = hdr.b_lsize; 4253 } else { 4254 cbucket = highbit64(hdr.b_psize); 4255 ubucket = highbit64(hdr.b_lsize); 4256 } 4257 4258 bufcnt = hdr.b_l1hdr.b_bufcnt; 4259 if (bufcnt >= data->hist_nbuckets) 4260 bufcnt = data->hist_nbuckets - 1; 4261 4262 /* Ensure we stay within the bounds of the histogram array */ 4263 ASSERT3U(cbucket, <, data->hist_nbuckets); 4264 ASSERT3U(ubucket, <, data->hist_nbuckets); 4265 4266 if (hdr.b_l1hdr.b_state == data->anon_sym.st_value) { 4267 data->anon_c_hist[cbucket]++; 4268 data->anon_u_hist[ubucket]++; 4269 data->anon_bufs[bufcnt]++; 4270 } else if (hdr.b_l1hdr.b_state == data->mru_sym.st_value) { 4271 data->mru_c_hist[cbucket]++; 4272 data->mru_u_hist[ubucket]++; 4273 data->mru_bufs[bufcnt]++; 4274 } else if (hdr.b_l1hdr.b_state == data->mfu_sym.st_value) { 4275 data->mfu_c_hist[cbucket]++; 4276 data->mfu_u_hist[ubucket]++; 4277 data->mfu_bufs[bufcnt]++; 4278 } 4279 4280 data->all_c_hist[cbucket]++; 4281 data->all_u_hist[ubucket]++; 4282 data->all_bufs[bufcnt]++; 4283 4284 return (WALK_NEXT); 4285 } 4286 4287 /* ARGSUSED */ 4288 static int 4289 arc_compression_stats(uintptr_t addr, uint_t flags, int argc, 4290 const mdb_arg_t *argv) 4291 { 4292 arc_compression_stats_data_t data = { 0 }; 4293 unsigned int max_shifted = SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; 4294 unsigned int hist_size; 4295 char range[32]; 4296 int rc = DCMD_OK; 4297 int off; 4298 4299 if (mdb_getopts(argc, argv, 4300 'v', MDB_OPT_SETBITS, ARC_CFLAG_VERBOSE, &data.arc_cflags, 4301 'a', MDB_OPT_SETBITS, ARC_CFLAG_ANON, &data.arc_cflags, 4302 'b', MDB_OPT_SETBITS, ARC_CFLAG_BUFS, &data.arc_cflags, 4303 'r', MDB_OPT_SETBITS, ARC_CFLAG_MRU, &data.arc_cflags, 4304 'f', MDB_OPT_SETBITS, ARC_CFLAG_MFU, &data.arc_cflags, 4305 NULL) != argc) 4306 return (DCMD_USAGE); 4307 4308 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_anon", &data.anon_sym) || 4309 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mru", &data.mru_sym) || 4310 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mru_ghost", &data.mrug_sym) || 4311 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mfu", &data.mfu_sym) || 4312 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mfu_ghost", &data.mfug_sym) || 4313 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_l2c_only", &data.l2c_sym)) { 4314 mdb_warn("can't find arc state symbol"); 4315 return (DCMD_ERR); 4316 } 4317 4318 /* 4319 * Determine the maximum expected size for any header, and use 4320 * this to determine the number of buckets needed for each 4321 * histogram. If ARC_CFLAG_VERBOSE is specified, this value is 4322 * used directly; otherwise the log2 of the maximum size is 4323 * used. Thus, if using a log2 scale there's a maximum of 10 4324 * possible buckets, while the linear scale (when using 4325 * ARC_CFLAG_VERBOSE) has a maximum of 257 buckets. 4326 */ 4327 if (data.arc_cflags & ARC_CFLAG_VERBOSE) 4328 data.hist_nbuckets = max_shifted + 1; 4329 else 4330 data.hist_nbuckets = highbit64(max_shifted) + 1; 4331 4332 hist_size = sizeof (uint64_t) * data.hist_nbuckets; 4333 4334 data.anon_c_hist = mdb_zalloc(hist_size, UM_SLEEP); 4335 data.anon_u_hist = mdb_zalloc(hist_size, UM_SLEEP); 4336 data.anon_bufs = mdb_zalloc(hist_size, UM_SLEEP); 4337 4338 data.mru_c_hist = mdb_zalloc(hist_size, UM_SLEEP); 4339 data.mru_u_hist = mdb_zalloc(hist_size, UM_SLEEP); 4340 data.mru_bufs = mdb_zalloc(hist_size, UM_SLEEP); 4341 4342 data.mfu_c_hist = mdb_zalloc(hist_size, UM_SLEEP); 4343 data.mfu_u_hist = mdb_zalloc(hist_size, UM_SLEEP); 4344 data.mfu_bufs = mdb_zalloc(hist_size, UM_SLEEP); 4345 4346 data.all_c_hist = mdb_zalloc(hist_size, UM_SLEEP); 4347 data.all_u_hist = mdb_zalloc(hist_size, UM_SLEEP); 4348 data.all_bufs = mdb_zalloc(hist_size, UM_SLEEP); 4349 4350 if ((off = mdb_ctf_offsetof_by_name(ZFS_STRUCT "arc_buf_hdr", 4351 "b_l1hdr")) == -1) { 4352 mdb_warn("could not get offset of b_l1hdr from arc_buf_hdr_t"); 4353 rc = DCMD_ERR; 4354 goto out; 4355 } 4356 data.l1hdr_off = off; 4357 4358 if (mdb_walk("arc_buf_hdr_t_full", arc_compression_stats_cb, 4359 &data) != 0) { 4360 mdb_warn("can't walk arc_buf_hdr's"); 4361 rc = DCMD_ERR; 4362 goto out; 4363 } 4364 4365 if (data.arc_cflags & ARC_CFLAG_VERBOSE) { 4366 rc = mdb_snprintf(range, sizeof (range), 4367 "[n*%llu, (n+1)*%llu)", SPA_MINBLOCKSIZE, 4368 SPA_MINBLOCKSIZE); 4369 } else { 4370 rc = mdb_snprintf(range, sizeof (range), 4371 "[2^(n-1)*%llu, 2^n*%llu)", SPA_MINBLOCKSIZE, 4372 SPA_MINBLOCKSIZE); 4373 } 4374 4375 if (rc < 0) { 4376 /* snprintf failed, abort the dcmd */ 4377 rc = DCMD_ERR; 4378 goto out; 4379 } else { 4380 /* snprintf succeeded above, reset return code */ 4381 rc = DCMD_OK; 4382 } 4383 4384 if (data.arc_cflags & ARC_CFLAG_ANON) { 4385 if (data.arc_cflags & ARC_CFLAG_BUFS) { 4386 mdb_printf("Histogram of the number of anon buffers " 4387 "that are associated with an arc hdr.\n"); 4388 dump_histogram(data.anon_bufs, data.hist_nbuckets, 0); 4389 mdb_printf("\n"); 4390 } 4391 mdb_printf("Histogram of compressed anon buffers.\n" 4392 "Each bucket represents buffers of size: %s.\n", range); 4393 dump_histogram(data.anon_c_hist, data.hist_nbuckets, 0); 4394 mdb_printf("\n"); 4395 4396 mdb_printf("Histogram of uncompressed anon buffers.\n" 4397 "Each bucket represents buffers of size: %s.\n", range); 4398 dump_histogram(data.anon_u_hist, data.hist_nbuckets, 0); 4399 mdb_printf("\n"); 4400 } 4401 4402 if (data.arc_cflags & ARC_CFLAG_MRU) { 4403 if (data.arc_cflags & ARC_CFLAG_BUFS) { 4404 mdb_printf("Histogram of the number of mru buffers " 4405 "that are associated with an arc hdr.\n"); 4406 dump_histogram(data.mru_bufs, data.hist_nbuckets, 0); 4407 mdb_printf("\n"); 4408 } 4409 mdb_printf("Histogram of compressed mru buffers.\n" 4410 "Each bucket represents buffers of size: %s.\n", range); 4411 dump_histogram(data.mru_c_hist, data.hist_nbuckets, 0); 4412 mdb_printf("\n"); 4413 4414 mdb_printf("Histogram of uncompressed mru buffers.\n" 4415 "Each bucket represents buffers of size: %s.\n", range); 4416 dump_histogram(data.mru_u_hist, data.hist_nbuckets, 0); 4417 mdb_printf("\n"); 4418 } 4419 4420 if (data.arc_cflags & ARC_CFLAG_MFU) { 4421 if (data.arc_cflags & ARC_CFLAG_BUFS) { 4422 mdb_printf("Histogram of the number of mfu buffers " 4423 "that are associated with an arc hdr.\n"); 4424 dump_histogram(data.mfu_bufs, data.hist_nbuckets, 0); 4425 mdb_printf("\n"); 4426 } 4427 4428 mdb_printf("Histogram of compressed mfu buffers.\n" 4429 "Each bucket represents buffers of size: %s.\n", range); 4430 dump_histogram(data.mfu_c_hist, data.hist_nbuckets, 0); 4431 mdb_printf("\n"); 4432 4433 mdb_printf("Histogram of uncompressed mfu buffers.\n" 4434 "Each bucket represents buffers of size: %s.\n", range); 4435 dump_histogram(data.mfu_u_hist, data.hist_nbuckets, 0); 4436 mdb_printf("\n"); 4437 } 4438 4439 if (data.arc_cflags & ARC_CFLAG_BUFS) { 4440 mdb_printf("Histogram of all buffers that " 4441 "are associated with an arc hdr.\n"); 4442 dump_histogram(data.all_bufs, data.hist_nbuckets, 0); 4443 mdb_printf("\n"); 4444 } 4445 4446 mdb_printf("Histogram of all compressed buffers.\n" 4447 "Each bucket represents buffers of size: %s.\n", range); 4448 dump_histogram(data.all_c_hist, data.hist_nbuckets, 0); 4449 mdb_printf("\n"); 4450 4451 mdb_printf("Histogram of all uncompressed buffers.\n" 4452 "Each bucket represents buffers of size: %s.\n", range); 4453 dump_histogram(data.all_u_hist, data.hist_nbuckets, 0); 4454 4455 out: 4456 mdb_free(data.anon_c_hist, hist_size); 4457 mdb_free(data.anon_u_hist, hist_size); 4458 mdb_free(data.anon_bufs, hist_size); 4459 4460 mdb_free(data.mru_c_hist, hist_size); 4461 mdb_free(data.mru_u_hist, hist_size); 4462 mdb_free(data.mru_bufs, hist_size); 4463 4464 mdb_free(data.mfu_c_hist, hist_size); 4465 mdb_free(data.mfu_u_hist, hist_size); 4466 mdb_free(data.mfu_bufs, hist_size); 4467 4468 mdb_free(data.all_c_hist, hist_size); 4469 mdb_free(data.all_u_hist, hist_size); 4470 mdb_free(data.all_bufs, hist_size); 4471 4472 return (rc); 4473 } 4474 4475 typedef struct mdb_range_seg64 { 4476 uint64_t rs_start; 4477 uint64_t rs_end; 4478 } mdb_range_seg64_t; 4479 4480 typedef struct mdb_range_seg32 { 4481 uint32_t rs_start; 4482 uint32_t rs_end; 4483 } mdb_range_seg32_t; 4484 4485 /* ARGSUSED */ 4486 static int 4487 range_tree_cb(uintptr_t addr, const void *unknown, void *arg) 4488 { 4489 mdb_range_tree_t *rt = (mdb_range_tree_t *)arg; 4490 uint64_t start, end; 4491 4492 if (rt->rt_type == RANGE_SEG64) { 4493 mdb_range_seg64_t rs; 4494 4495 if (mdb_ctf_vread(&rs, ZFS_STRUCT "range_seg64", 4496 "mdb_range_seg64_t", addr, 0) == -1) 4497 return (DCMD_ERR); 4498 start = rs.rs_start; 4499 end = rs.rs_end; 4500 } else { 4501 ASSERT3U(rt->rt_type, ==, RANGE_SEG32); 4502 mdb_range_seg32_t rs; 4503 4504 if (mdb_ctf_vread(&rs, ZFS_STRUCT "range_seg32", 4505 "mdb_range_seg32_t", addr, 0) == -1) 4506 return (DCMD_ERR); 4507 start = ((uint64_t)rs.rs_start << rt->rt_shift) + rt->rt_start; 4508 end = ((uint64_t)rs.rs_end << rt->rt_shift) + rt->rt_start; 4509 } 4510 4511 mdb_printf("\t[%llx %llx) (length %llx)\n", start, end, end - start); 4512 4513 return (0); 4514 } 4515 4516 /* ARGSUSED */ 4517 static int 4518 range_tree(uintptr_t addr, uint_t flags, int argc, 4519 const mdb_arg_t *argv) 4520 { 4521 mdb_range_tree_t rt; 4522 uintptr_t btree_addr; 4523 4524 if (!(flags & DCMD_ADDRSPEC)) 4525 return (DCMD_USAGE); 4526 4527 if (mdb_ctf_vread(&rt, ZFS_STRUCT "range_tree", "mdb_range_tree_t", 4528 addr, 0) == -1) 4529 return (DCMD_ERR); 4530 4531 mdb_printf("%p: range tree of %llu entries, %llu bytes\n", 4532 addr, rt.rt_root.bt_num_elems, rt.rt_space); 4533 4534 btree_addr = addr + 4535 mdb_ctf_offsetof_by_name(ZFS_STRUCT "range_tree", "rt_root"); 4536 4537 if (mdb_pwalk("zfs_btree", range_tree_cb, &rt, btree_addr) != 0) { 4538 mdb_warn("can't walk range_tree segments"); 4539 return (DCMD_ERR); 4540 } 4541 return (DCMD_OK); 4542 } 4543 4544 typedef struct mdb_spa_log_sm { 4545 uint64_t sls_sm_obj; 4546 uint64_t sls_txg; 4547 uint64_t sls_nblocks; 4548 uint64_t sls_mscount; 4549 } mdb_spa_log_sm_t; 4550 4551 /* ARGSUSED */ 4552 static int 4553 logsm_stats_cb(uintptr_t addr, const void *unknown, void *arg) 4554 { 4555 mdb_spa_log_sm_t sls; 4556 if (mdb_ctf_vread(&sls, ZFS_STRUCT "spa_log_sm", "mdb_spa_log_sm_t", 4557 addr, 0) == -1) 4558 return (WALK_ERR); 4559 4560 mdb_printf("%7lld %7lld %7lld %7lld\n", 4561 sls.sls_txg, sls.sls_nblocks, sls.sls_mscount, sls.sls_sm_obj); 4562 4563 return (WALK_NEXT); 4564 } 4565 typedef struct mdb_log_summary_entry { 4566 uint64_t lse_start; 4567 uint64_t lse_blkcount; 4568 uint64_t lse_mscount; 4569 } mdb_log_summary_entry_t; 4570 4571 /* ARGSUSED */ 4572 static int 4573 logsm_summary_cb(uintptr_t addr, const void *unknown, void *arg) 4574 { 4575 mdb_log_summary_entry_t lse; 4576 if (mdb_ctf_vread(&lse, ZFS_STRUCT "log_summary_entry", 4577 "mdb_log_summary_entry_t", addr, 0) == -1) 4578 return (WALK_ERR); 4579 4580 mdb_printf("%7lld %7lld %7lld\n", 4581 lse.lse_start, lse.lse_blkcount, lse.lse_mscount); 4582 return (WALK_NEXT); 4583 } 4584 4585 /* ARGSUSED */ 4586 static int 4587 logsm_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4588 { 4589 if (!(flags & DCMD_ADDRSPEC)) 4590 return (DCMD_USAGE); 4591 4592 uintptr_t sls_avl_addr = addr + 4593 mdb_ctf_offsetof_by_name(ZFS_STRUCT "spa", "spa_sm_logs_by_txg"); 4594 uintptr_t summary_addr = addr + 4595 mdb_ctf_offsetof_by_name(ZFS_STRUCT "spa", "spa_log_summary"); 4596 4597 mdb_printf("Log Entries:\n"); 4598 mdb_printf("%7s %7s %7s %7s\n", "txg", "blk", "ms", "obj"); 4599 if (mdb_pwalk("avl", logsm_stats_cb, NULL, sls_avl_addr) != 0) 4600 return (DCMD_ERR); 4601 4602 mdb_printf("\nSummary Entries:\n"); 4603 mdb_printf("%7s %7s %7s\n", "txg", "blk", "ms"); 4604 if (mdb_pwalk("list", logsm_summary_cb, NULL, summary_addr) != 0) 4605 return (DCMD_ERR); 4606 4607 return (DCMD_OK); 4608 } 4609 4610 /* 4611 * MDB module linkage information: 4612 * 4613 * We declare a list of structures describing our dcmds, and a function 4614 * named _mdb_init to return a pointer to our module information. 4615 */ 4616 4617 static const mdb_dcmd_t dcmds[] = { 4618 { "arc", "[-bkmg]", "print ARC variables", arc_print }, 4619 { "blkptr", ":", "print blkptr_t", blkptr }, 4620 { "dva", ":", "print dva_t", dva }, 4621 { "dbuf", ":", "print dmu_buf_impl_t", dbuf }, 4622 { "dbuf_stats", ":", "dbuf stats", dbuf_stats }, 4623 { "dbufs", 4624 "\t[-O objset_t*] [-n objset_name | \"mos\"] " 4625 "[-o object | \"mdn\"] \n" 4626 "\t[-l level] [-b blkid | \"bonus\"]", 4627 "find dmu_buf_impl_t's that match specified criteria", dbufs }, 4628 { "abuf_find", "dva_word[0] dva_word[1]", 4629 "find arc_buf_hdr_t of a specified DVA", 4630 abuf_find }, 4631 { "logsm_stats", ":", "print log space map statistics of a spa_t", 4632 logsm_stats}, 4633 { "spa", "?[-cevmMh]\n" 4634 "\t-c display spa config\n" 4635 "\t-e display vdev statistics\n" 4636 "\t-v display vdev information\n" 4637 "\t-m display metaslab statistics\n" 4638 "\t-M display metaslab group statistics\n" 4639 "\t-h display histogram (requires -m or -M)\n", 4640 "spa_t summary", spa_print }, 4641 { "spa_config", ":", "print spa_t configuration", spa_print_config }, 4642 { "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space }, 4643 { "spa_vdevs", ":[-emMh]\n" 4644 "\t-e display vdev statistics\n" 4645 "\t-m dispaly metaslab statistics\n" 4646 "\t-M display metaslab group statistic\n" 4647 "\t-h display histogram (requires -m or -M)\n", 4648 "given a spa_t, print vdev summary", spa_vdevs }, 4649 { "sm_entries", "<buffer length in bytes>", 4650 "print out space map entries from a buffer decoded", 4651 sm_entries}, 4652 { "vdev", ":[-remMh]\n" 4653 "\t-r display recursively\n" 4654 "\t-e display statistics\n" 4655 "\t-m display metaslab statistics (top level vdev only)\n" 4656 "\t-M display metaslab group statistics (top level vdev only)\n" 4657 "\t-h display histogram (requires -m or -M)\n", 4658 "vdev_t summary", vdev_print }, 4659 { "zio", ":[-cpr]\n" 4660 "\t-c display children\n" 4661 "\t-p display parents\n" 4662 "\t-r display recursively", 4663 "zio_t summary", zio_print }, 4664 { "zio_state", "?", "print out all zio_t structures on system or " 4665 "for a particular pool", zio_state }, 4666 { "zfs_blkstats", ":[-v]", 4667 "given a spa_t, print block type stats from last scrub", 4668 zfs_blkstats }, 4669 { "zfs_params", "", "print zfs tunable parameters", zfs_params }, 4670 { "zfs_refcount", ":[-r]\n" 4671 "\t-r display recently removed references", 4672 "print zfs_refcount_t holders", zfs_refcount }, 4673 { "zap_leaf", "", "print zap_leaf_phys_t", zap_leaf }, 4674 { "zfs_aces", ":[-v]", "print all ACEs from a zfs_acl_t", 4675 zfs_acl_dump }, 4676 { "zfs_ace", ":[-v]", "print zfs_ace", zfs_ace_print }, 4677 { "zfs_ace0", ":[-v]", "print zfs_ace0", zfs_ace0_print }, 4678 { "sa_attr_table", ":", "print SA attribute table from sa_os_t", 4679 sa_attr_table}, 4680 { "sa_attr", ": attr_id", 4681 "print SA attribute address when given sa_handle_t", sa_attr_print}, 4682 { "zfs_dbgmsg", ":[-artTvw]", 4683 "print zfs debug log", dbgmsg, dbgmsg_help}, 4684 { "rrwlock", ":", 4685 "print rrwlock_t, including readers", rrwlock}, 4686 { "metaslab_weight", "weight", 4687 "print metaslab weight", metaslab_weight}, 4688 { "metaslab_trace", ":", 4689 "print metaslab allocation trace records", metaslab_trace}, 4690 { "arc_compression_stats", ":[-vabrf]\n" 4691 "\t-v verbose, display a linearly scaled histogram\n" 4692 "\t-a display ARC_anon state statistics individually\n" 4693 "\t-r display ARC_mru state statistics individually\n" 4694 "\t-f display ARC_mfu state statistics individually\n" 4695 "\t-b display histogram of buffer counts\n", 4696 "print a histogram of compressed arc buffer sizes", 4697 arc_compression_stats}, 4698 { "range_tree", ":", 4699 "print entries in range_tree_t", range_tree}, 4700 { NULL } 4701 }; 4702 4703 static const mdb_walker_t walkers[] = { 4704 { "txg_list", "given any txg_list_t *, walk all entries in all txgs", 4705 txg_list_walk_init, txg_list_walk_step, NULL }, 4706 { "txg_list0", "given any txg_list_t *, walk all entries in txg 0", 4707 txg_list0_walk_init, txg_list_walk_step, NULL }, 4708 { "txg_list1", "given any txg_list_t *, walk all entries in txg 1", 4709 txg_list1_walk_init, txg_list_walk_step, NULL }, 4710 { "txg_list2", "given any txg_list_t *, walk all entries in txg 2", 4711 txg_list2_walk_init, txg_list_walk_step, NULL }, 4712 { "txg_list3", "given any txg_list_t *, walk all entries in txg 3", 4713 txg_list3_walk_init, txg_list_walk_step, NULL }, 4714 { "zio", "walk all zio structures, optionally for a particular spa_t", 4715 zio_walk_init, zio_walk_step, NULL }, 4716 { "zio_root", 4717 "walk all root zio_t structures, optionally for a particular spa_t", 4718 zio_walk_init, zio_walk_root_step, NULL }, 4719 { "spa", "walk all spa_t entries in the namespace", 4720 spa_walk_init, spa_walk_step, NULL }, 4721 { "metaslab", "given a spa_t *, walk all metaslab_t structures", 4722 metaslab_walk_init, metaslab_walk_step, NULL }, 4723 { "multilist", "given a multilist_t *, walk all list_t structures", 4724 multilist_walk_init, multilist_walk_step, NULL }, 4725 { "zfs_acl_node", "given a zfs_acl_t, walk all zfs_acl_nodes", 4726 zfs_acl_node_walk_init, zfs_acl_node_walk_step, NULL }, 4727 { "zfs_acl_node_aces", "given a zfs_acl_node_t, walk all ACEs", 4728 zfs_acl_node_aces_walk_init, zfs_aces_walk_step, NULL }, 4729 { "zfs_acl_node_aces0", 4730 "given a zfs_acl_node_t, walk all ACEs as ace_t", 4731 zfs_acl_node_aces0_walk_init, zfs_aces_walk_step, NULL }, 4732 { "zfs_btree", "given a zfs_btree_t *, walk all entries", 4733 btree_walk_init, btree_walk_step, btree_walk_fini }, 4734 { NULL } 4735 }; 4736 4737 static const mdb_modinfo_t modinfo = { 4738 MDB_API_VERSION, dcmds, walkers 4739 }; 4740 4741 const mdb_modinfo_t * 4742 _mdb_init(void) 4743 { 4744 return (&modinfo); 4745 } 4746