1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 * Copyright (c) 2011, 2018 by Delphix. All rights reserved. 25 * Copyright 2020 Joyent, Inc. 26 * Copyright 2025 Oxide Computer Company 27 */ 28 29 /* Portions Copyright 2010 Robert Milkowski */ 30 31 /* 32 * ZFS_MDB lets dmu.h know that we don't have dmu_ot, and we will define our 33 * own macros to access the target's dmu_ot. Therefore it must be defined 34 * before including any ZFS headers. Note that we don't define 35 * DMU_OT_IS_ENCRYPTED_IMPL() or DMU_OT_BYTESWAP_IMPL(), therefore using them 36 * will result in a compilation error. If they are needed in the future, we 37 * can implement them similarly to mdb_dmu_ot_is_encrypted_impl(). 38 */ 39 #define ZFS_MDB 40 #define DMU_OT_IS_ENCRYPTED_IMPL(ot) mdb_dmu_ot_is_encrypted_impl(ot) 41 42 #include <mdb/mdb_ctf.h> 43 #include <sys/zfs_context.h> 44 #include <sys/mdb_modapi.h> 45 #include <sys/dbuf.h> 46 #include <sys/dmu_objset.h> 47 #include <sys/dsl_dir.h> 48 #include <sys/dsl_pool.h> 49 #include <sys/metaslab_impl.h> 50 #include <sys/space_map.h> 51 #include <sys/list.h> 52 #include <sys/vdev_impl.h> 53 #include <sys/zap_leaf.h> 54 #include <sys/zap_impl.h> 55 #include <ctype.h> 56 #include <sys/zfs_acl.h> 57 #include <sys/sa_impl.h> 58 #include <sys/multilist.h> 59 #include <sys/btree.h> 60 61 #ifdef _KERNEL 62 #define ZFS_OBJ_NAME "zfs" 63 #else 64 #define ZFS_OBJ_NAME "libzpool.so.1" 65 #endif 66 extern int64_t mdb_gethrtime(void); 67 68 #define ZFS_STRUCT "struct " ZFS_OBJ_NAME "`" 69 70 #ifndef _KERNEL 71 int aok; 72 #endif 73 74 enum spa_flags { 75 SPA_FLAG_CONFIG = 1 << 0, 76 SPA_FLAG_VDEVS = 1 << 1, 77 SPA_FLAG_ERRORS = 1 << 2, 78 SPA_FLAG_METASLAB_GROUPS = 1 << 3, 79 SPA_FLAG_METASLABS = 1 << 4, 80 SPA_FLAG_HISTOGRAMS = 1 << 5 81 }; 82 83 /* 84 * If any of these flags are set, call spa_vdevs in spa_print 85 */ 86 #define SPA_FLAG_ALL_VDEV \ 87 (SPA_FLAG_VDEVS | SPA_FLAG_ERRORS | SPA_FLAG_METASLAB_GROUPS | \ 88 SPA_FLAG_METASLABS) 89 90 static int 91 getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp, 92 const char *member, int len, void *buf) 93 { 94 mdb_ctf_id_t id; 95 ulong_t off; 96 char name[64]; 97 98 if (idp == NULL) { 99 if (mdb_ctf_lookup_by_name(type, &id) == -1) { 100 mdb_warn("couldn't find type %s", type); 101 return (DCMD_ERR); 102 } 103 idp = &id; 104 } else { 105 type = name; 106 mdb_ctf_type_name(*idp, name, sizeof (name)); 107 } 108 109 if (mdb_ctf_offsetof(*idp, member, &off) == -1) { 110 mdb_warn("couldn't find member %s of type %s\n", member, type); 111 return (DCMD_ERR); 112 } 113 if (off % 8 != 0) { 114 mdb_warn("member %s of type %s is unsupported bitfield", 115 member, type); 116 return (DCMD_ERR); 117 } 118 off /= 8; 119 120 if (mdb_vread(buf, len, addr + off) == -1) { 121 mdb_warn("failed to read %s from %s at %p", 122 member, type, addr + off); 123 return (DCMD_ERR); 124 } 125 /* mdb_warn("read %s from %s at %p+%llx\n", member, type, addr, off); */ 126 127 return (0); 128 } 129 130 #define GETMEMB(addr, structname, member, dest) \ 131 getmember(addr, ZFS_STRUCT structname, NULL, #member, \ 132 sizeof (dest), &(dest)) 133 134 #define GETMEMBID(addr, ctfid, member, dest) \ 135 getmember(addr, NULL, ctfid, #member, sizeof (dest), &(dest)) 136 137 static boolean_t 138 strisprint(const char *cp) 139 { 140 for (; *cp; cp++) { 141 if (!isprint(*cp)) 142 return (B_FALSE); 143 } 144 return (B_TRUE); 145 } 146 147 /* 148 * <addr>::sm_entries <buffer length in bytes> 149 * 150 * Treat the buffer specified by the given address as a buffer that contains 151 * space map entries. Iterate over the specified number of entries and print 152 * them in both encoded and decoded form. 153 */ 154 /* ARGSUSED */ 155 static int 156 sm_entries(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 157 { 158 uint64_t bufsz = 0; 159 boolean_t preview = B_FALSE; 160 161 if (!(flags & DCMD_ADDRSPEC)) 162 return (DCMD_USAGE); 163 164 if (argc < 1) { 165 preview = B_TRUE; 166 bufsz = 2; 167 } else if (argc != 1) { 168 return (DCMD_USAGE); 169 } else { 170 switch (argv[0].a_type) { 171 case MDB_TYPE_STRING: 172 bufsz = mdb_strtoull(argv[0].a_un.a_str); 173 break; 174 case MDB_TYPE_IMMEDIATE: 175 bufsz = argv[0].a_un.a_val; 176 break; 177 default: 178 return (DCMD_USAGE); 179 } 180 } 181 182 char *actions[] = { "ALLOC", "FREE", "INVALID" }; 183 for (uintptr_t bufend = addr + bufsz; addr < bufend; 184 addr += sizeof (uint64_t)) { 185 uint64_t nwords; 186 uint64_t start_addr = addr; 187 188 uint64_t word = 0; 189 if (mdb_vread(&word, sizeof (word), addr) == -1) { 190 mdb_warn("failed to read space map entry %p", addr); 191 return (DCMD_ERR); 192 } 193 194 if (SM_PREFIX_DECODE(word) == SM_DEBUG_PREFIX) { 195 (void) mdb_printf("\t [%6llu] %s: txg %llu, " 196 "pass %llu\n", 197 (u_longlong_t)(addr), 198 actions[SM_DEBUG_ACTION_DECODE(word)], 199 (u_longlong_t)SM_DEBUG_TXG_DECODE(word), 200 (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word)); 201 continue; 202 } 203 204 char entry_type; 205 uint64_t raw_offset, raw_run, vdev_id = SM_NO_VDEVID; 206 207 if (SM_PREFIX_DECODE(word) != SM2_PREFIX) { 208 entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ? 209 'A' : 'F'; 210 raw_offset = SM_OFFSET_DECODE(word); 211 raw_run = SM_RUN_DECODE(word); 212 nwords = 1; 213 } else { 214 ASSERT3U(SM_PREFIX_DECODE(word), ==, SM2_PREFIX); 215 216 raw_run = SM2_RUN_DECODE(word); 217 vdev_id = SM2_VDEV_DECODE(word); 218 219 /* it is a two-word entry so we read another word */ 220 addr += sizeof (uint64_t); 221 if (addr >= bufend) { 222 mdb_warn("buffer ends in the middle of a two " 223 "word entry\n", addr); 224 return (DCMD_ERR); 225 } 226 227 if (mdb_vread(&word, sizeof (word), addr) == -1) { 228 mdb_warn("failed to read space map entry %p", 229 addr); 230 return (DCMD_ERR); 231 } 232 233 entry_type = (SM2_TYPE_DECODE(word) == SM_ALLOC) ? 234 'A' : 'F'; 235 raw_offset = SM2_OFFSET_DECODE(word); 236 nwords = 2; 237 } 238 239 (void) mdb_printf("\t [%6llx] %c range:" 240 " %010llx-%010llx size: %06llx vdev: %06llu words: %llu\n", 241 (u_longlong_t)start_addr, 242 entry_type, (u_longlong_t)raw_offset, 243 (u_longlong_t)(raw_offset + raw_run), 244 (u_longlong_t)raw_run, 245 (u_longlong_t)vdev_id, (u_longlong_t)nwords); 246 247 if (preview) 248 break; 249 } 250 return (DCMD_OK); 251 } 252 253 static int 254 mdb_dsl_dir_name(uintptr_t addr, char *buf) 255 { 256 static int gotid; 257 static mdb_ctf_id_t dd_id; 258 uintptr_t dd_parent; 259 char dd_myname[ZFS_MAX_DATASET_NAME_LEN]; 260 261 if (!gotid) { 262 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dir", 263 &dd_id) == -1) { 264 mdb_warn("couldn't find struct dsl_dir"); 265 return (DCMD_ERR); 266 } 267 gotid = TRUE; 268 } 269 if (GETMEMBID(addr, &dd_id, dd_parent, dd_parent) || 270 GETMEMBID(addr, &dd_id, dd_myname, dd_myname)) { 271 return (DCMD_ERR); 272 } 273 274 if (dd_parent) { 275 if (mdb_dsl_dir_name(dd_parent, buf)) 276 return (DCMD_ERR); 277 strcat(buf, "/"); 278 } 279 280 if (dd_myname[0]) 281 strcat(buf, dd_myname); 282 else 283 strcat(buf, "???"); 284 285 return (0); 286 } 287 288 static int 289 objset_name(uintptr_t addr, char *buf) 290 { 291 static int gotid; 292 static mdb_ctf_id_t os_id, ds_id; 293 uintptr_t os_dsl_dataset; 294 char ds_snapname[ZFS_MAX_DATASET_NAME_LEN]; 295 uintptr_t ds_dir; 296 297 buf[0] = '\0'; 298 299 if (!gotid) { 300 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "objset", 301 &os_id) == -1) { 302 mdb_warn("couldn't find struct objset"); 303 return (DCMD_ERR); 304 } 305 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dataset", 306 &ds_id) == -1) { 307 mdb_warn("couldn't find struct dsl_dataset"); 308 return (DCMD_ERR); 309 } 310 311 gotid = TRUE; 312 } 313 314 if (GETMEMBID(addr, &os_id, os_dsl_dataset, os_dsl_dataset)) 315 return (DCMD_ERR); 316 317 if (os_dsl_dataset == 0) { 318 strcat(buf, "mos"); 319 return (0); 320 } 321 322 if (GETMEMBID(os_dsl_dataset, &ds_id, ds_snapname, ds_snapname) || 323 GETMEMBID(os_dsl_dataset, &ds_id, ds_dir, ds_dir)) { 324 return (DCMD_ERR); 325 } 326 327 if (ds_dir && mdb_dsl_dir_name(ds_dir, buf)) 328 return (DCMD_ERR); 329 330 if (ds_snapname[0]) { 331 strcat(buf, "@"); 332 strcat(buf, ds_snapname); 333 } 334 return (0); 335 } 336 337 static int 338 enum_lookup(char *type, int val, const char *prefix, size_t size, char *out) 339 { 340 const char *cp; 341 size_t len = strlen(prefix); 342 mdb_ctf_id_t enum_type; 343 344 if (mdb_ctf_lookup_by_name(type, &enum_type) != 0) { 345 mdb_warn("Could not find enum for %s", type); 346 return (-1); 347 } 348 349 if ((cp = mdb_ctf_enum_name(enum_type, val)) != NULL) { 350 if (strncmp(cp, prefix, len) == 0) 351 cp += len; 352 (void) strncpy(out, cp, size); 353 } else { 354 mdb_snprintf(out, size, "? (%d)", val); 355 } 356 return (0); 357 } 358 359 /* ARGSUSED */ 360 static int 361 zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 362 { 363 /* 364 * This table can be approximately generated by running: 365 * egrep "^[a-z0-9_]+ [a-z0-9_]+( =.*)?;" *.c | cut -d ' ' -f 2 366 */ 367 static const char *params[] = { 368 "arc_lotsfree_percent", 369 "arc_pages_pp_reserve", 370 "arc_reduce_dnlc_percent", 371 "arc_swapfs_reserve", 372 "arc_zio_arena_free_shift", 373 "dbuf_cache_hiwater_pct", 374 "dbuf_cache_lowater_pct", 375 "dbuf_cache_max_bytes", 376 "dbuf_cache_max_shift", 377 "ddt_zap_indirect_blockshift", 378 "ddt_zap_leaf_blockshift", 379 "ditto_same_vdev_distance_shift", 380 "dmu_find_threads", 381 "dmu_rescan_dnode_threshold", 382 "dsl_scan_delay_completion", 383 "fzap_default_block_shift", 384 "l2arc_feed_again", 385 "l2arc_feed_min_ms", 386 "l2arc_feed_secs", 387 "l2arc_headroom", 388 "l2arc_headroom_boost", 389 "l2arc_noprefetch", 390 "l2arc_norw", 391 "l2arc_write_boost", 392 "l2arc_write_max", 393 "metaslab_aliquot", 394 "metaslab_bias_enabled", 395 "metaslab_debug_load", 396 "metaslab_debug_unload", 397 "metaslab_df_alloc_threshold", 398 "metaslab_df_free_pct", 399 "metaslab_fragmentation_factor_enabled", 400 "metaslab_force_ganging", 401 "metaslab_lba_weighting_enabled", 402 "metaslab_load_pct", 403 "metaslab_min_alloc_size", 404 "metaslab_ndf_clump_shift", 405 "metaslab_preload_enabled", 406 "metaslab_preload_limit", 407 "metaslab_trace_enabled", 408 "metaslab_trace_max_entries", 409 "metaslab_unload_delay", 410 "metaslabs_per_vdev", 411 "reference_history", 412 "reference_tracking_enable", 413 "send_holes_without_birth_time", 414 "spa_asize_inflation", 415 "spa_load_verify_data", 416 "spa_load_verify_maxinflight", 417 "spa_load_verify_metadata", 418 "spa_max_replication_override", 419 "spa_min_slop", 420 "spa_mode_global", 421 "spa_slop_shift", 422 "space_map_blksz", 423 "vdev_mirror_shift", 424 "zfetch_max_distance", 425 "zfs_abd_chunk_size", 426 "zfs_abd_scatter_enabled", 427 "zfs_arc_average_blocksize", 428 "zfs_arc_evict_batch_limit", 429 "zfs_arc_grow_retry", 430 "zfs_arc_max", 431 "zfs_arc_meta_limit", 432 "zfs_arc_meta_min", 433 "zfs_arc_min", 434 "zfs_arc_p_min_shift", 435 "zfs_arc_shrink_shift", 436 "zfs_async_block_max_blocks", 437 "zfs_ccw_retry_interval", 438 "zfs_commit_timeout_pct", 439 "zfs_compressed_arc_enabled", 440 "zfs_condense_indirect_commit_entry_delay_ticks", 441 "zfs_condense_indirect_vdevs_enable", 442 "zfs_condense_max_obsolete_bytes", 443 "zfs_condense_min_mapping_bytes", 444 "zfs_condense_pct", 445 "zfs_dbgmsg_maxsize", 446 "zfs_deadman_checktime_ms", 447 "zfs_deadman_enabled", 448 "zfs_deadman_synctime_ms", 449 "zfs_dedup_prefetch", 450 "zfs_default_bs", 451 "zfs_default_ibs", 452 "zfs_delay_max_ns", 453 "zfs_delay_min_dirty_percent", 454 "zfs_delay_resolution_ns", 455 "zfs_delay_scale", 456 "zfs_dirty_data_max", 457 "zfs_dirty_data_max_max", 458 "zfs_dirty_data_max_percent", 459 "zfs_dirty_data_sync", 460 "zfs_flags", 461 "zfs_free_bpobj_enabled", 462 "zfs_free_leak_on_eio", 463 "zfs_free_min_time_ms", 464 "zfs_fsync_sync_cnt", 465 "zfs_immediate_write_sz", 466 "zfs_indirect_condense_obsolete_pct", 467 "zfs_lua_check_instrlimit_interval", 468 "zfs_lua_max_instrlimit", 469 "zfs_lua_max_memlimit", 470 "zfs_max_recordsize", 471 "zfs_mdcomp_disable", 472 "zfs_metaslab_condense_block_threshold", 473 "zfs_metaslab_fragmentation_threshold", 474 "zfs_metaslab_segment_weight_enabled", 475 "zfs_metaslab_switch_threshold", 476 "zfs_mg_fragmentation_threshold", 477 "zfs_mg_noalloc_threshold", 478 "zfs_multilist_num_sublists", 479 "zfs_no_scrub_io", 480 "zfs_no_scrub_prefetch", 481 "zfs_nocacheflush", 482 "zfs_nopwrite_enabled", 483 "zfs_object_remap_one_indirect_delay_ticks", 484 "zfs_obsolete_min_time_ms", 485 "zfs_pd_bytes_max", 486 "zfs_per_txg_dirty_frees_percent", 487 "zfs_prefetch_disable", 488 "zfs_read_chunk_size", 489 "zfs_recover", 490 "zfs_recv_queue_length", 491 "zfs_redundant_metadata_most_ditto_level", 492 "zfs_remap_blkptr_enable", 493 "zfs_remove_max_copy_bytes", 494 "zfs_remove_max_segment", 495 "zfs_resilver_min_time_ms", 496 "zfs_scan_min_time_ms", 497 "zfs_scrub_limit", 498 "zfs_send_corrupt_data", 499 "zfs_send_queue_length", 500 "zfs_send_set_freerecords_bit", 501 "zfs_sync_pass_deferred_free", 502 "zfs_sync_pass_dont_compress", 503 "zfs_sync_pass_rewrite", 504 "zfs_sync_taskq_batch_pct", 505 "zfs_top_maxinflight", 506 "zfs_txg_timeout", 507 "zfs_vdev_aggregation_limit", 508 "zfs_vdev_async_read_max_active", 509 "zfs_vdev_async_read_min_active", 510 "zfs_vdev_async_write_active_max_dirty_percent", 511 "zfs_vdev_async_write_active_min_dirty_percent", 512 "zfs_vdev_async_write_max_active", 513 "zfs_vdev_async_write_min_active", 514 "zfs_vdev_cache_bshift", 515 "zfs_vdev_cache_max", 516 "zfs_vdev_cache_size", 517 "zfs_vdev_max_active", 518 "zfs_vdev_queue_depth_pct", 519 "zfs_vdev_read_gap_limit", 520 "zfs_vdev_removal_max_active", 521 "zfs_vdev_removal_min_active", 522 "zfs_vdev_scrub_max_active", 523 "zfs_vdev_scrub_min_active", 524 "zfs_vdev_sync_read_max_active", 525 "zfs_vdev_sync_read_min_active", 526 "zfs_vdev_sync_write_max_active", 527 "zfs_vdev_sync_write_min_active", 528 "zfs_vdev_write_gap_limit", 529 "zfs_write_implies_delete_child", 530 "zfs_zil_clean_taskq_maxalloc", 531 "zfs_zil_clean_taskq_minalloc", 532 "zfs_zil_clean_taskq_nthr_pct", 533 "zil_replay_disable", 534 "zil_slog_bulk", 535 "zio_buf_debug_limit", 536 "zio_dva_throttle_enabled", 537 "zio_injection_enabled", 538 "zvol_immediate_write_sz", 539 "zvol_maxphys", 540 "zvol_unmap_enabled", 541 "zvol_unmap_sync_enabled", 542 "zfs_max_dataset_nesting", 543 }; 544 545 for (int i = 0; i < sizeof (params) / sizeof (params[0]); i++) { 546 int sz; 547 uint64_t val64; 548 uint32_t *val32p = (uint32_t *)&val64; 549 550 sz = mdb_readvar(&val64, params[i]); 551 if (sz == 4) { 552 mdb_printf("%s = 0x%x\n", params[i], *val32p); 553 } else if (sz == 8) { 554 mdb_printf("%s = 0x%llx\n", params[i], val64); 555 } else { 556 mdb_warn("variable %s not found", params[i]); 557 } 558 } 559 560 return (DCMD_OK); 561 } 562 563 /* ARGSUSED */ 564 static int 565 dva(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 566 { 567 dva_t dva; 568 if (mdb_vread(&dva, sizeof (dva_t), addr) == -1) { 569 mdb_warn("failed to read dva_t"); 570 return (DCMD_ERR); 571 } 572 mdb_printf("<%llu:%llx:%llx>\n", 573 (u_longlong_t)DVA_GET_VDEV(&dva), 574 (u_longlong_t)DVA_GET_OFFSET(&dva), 575 (u_longlong_t)DVA_GET_ASIZE(&dva)); 576 577 return (DCMD_OK); 578 } 579 580 typedef struct mdb_dmu_object_type_info { 581 boolean_t ot_encrypt; 582 } mdb_dmu_object_type_info_t; 583 584 static boolean_t 585 mdb_dmu_ot_is_encrypted_impl(dmu_object_type_t ot) 586 { 587 mdb_dmu_object_type_info_t mdoti; 588 GElf_Sym sym; 589 size_t sz = mdb_ctf_sizeof_by_name("dmu_object_type_info_t"); 590 591 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "dmu_ot", &sym)) { 592 mdb_warn("failed to find " ZFS_OBJ_NAME "`dmu_ot"); 593 return (B_FALSE); 594 } 595 596 if (mdb_ctf_vread(&mdoti, "dmu_object_type_info_t", 597 "mdb_dmu_object_type_info_t", sym.st_value + sz * ot, 0) != 0) { 598 return (B_FALSE); 599 } 600 601 return (mdoti.ot_encrypt); 602 } 603 604 /* ARGSUSED */ 605 static int 606 blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 607 { 608 char type[80], checksum[80], compress[80]; 609 blkptr_t blk, *bp = &blk; 610 char buf[BP_SPRINTF_LEN]; 611 612 if (mdb_vread(&blk, sizeof (blkptr_t), addr) == -1) { 613 mdb_warn("failed to read blkptr_t"); 614 return (DCMD_ERR); 615 } 616 617 if (enum_lookup("enum dmu_object_type", BP_GET_TYPE(bp), "DMU_OT_", 618 sizeof (type), type) == -1 || 619 enum_lookup("enum zio_checksum", BP_GET_CHECKSUM(bp), 620 "ZIO_CHECKSUM_", sizeof (checksum), checksum) == -1 || 621 enum_lookup("enum zio_compress", BP_GET_COMPRESS(bp), 622 "ZIO_COMPRESS_", sizeof (compress), compress) == -1) { 623 mdb_warn("Could not find blkptr enumerated types"); 624 return (DCMD_ERR); 625 } 626 627 SNPRINTF_BLKPTR(mdb_snprintf, '\n', buf, sizeof (buf), bp, type, 628 checksum, compress); 629 630 mdb_printf("%s\n", buf); 631 632 return (DCMD_OK); 633 } 634 635 typedef struct mdb_dmu_buf_impl { 636 struct { 637 uint64_t db_object; 638 uintptr_t db_data; 639 } db; 640 uintptr_t db_objset; 641 uint64_t db_level; 642 uint64_t db_blkid; 643 struct { 644 uint64_t rc_count; 645 } db_holds; 646 } mdb_dmu_buf_impl_t; 647 648 /* ARGSUSED */ 649 static int 650 dbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 651 { 652 mdb_dmu_buf_impl_t db; 653 char objectname[32]; 654 char blkidname[32]; 655 char path[ZFS_MAX_DATASET_NAME_LEN]; 656 int ptr_width = (int)(sizeof (void *)) * 2; 657 658 if (DCMD_HDRSPEC(flags)) 659 mdb_printf("%*s %8s %3s %9s %5s %s\n", 660 ptr_width, "addr", "object", "lvl", "blkid", "holds", "os"); 661 662 if (mdb_ctf_vread(&db, ZFS_STRUCT "dmu_buf_impl", "mdb_dmu_buf_impl_t", 663 addr, 0) == -1) 664 return (DCMD_ERR); 665 666 if (db.db.db_object == DMU_META_DNODE_OBJECT) 667 (void) strcpy(objectname, "mdn"); 668 else 669 (void) mdb_snprintf(objectname, sizeof (objectname), "%llx", 670 (u_longlong_t)db.db.db_object); 671 672 if (db.db_blkid == DMU_BONUS_BLKID) 673 (void) strcpy(blkidname, "bonus"); 674 else 675 (void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx", 676 (u_longlong_t)db.db_blkid); 677 678 if (objset_name(db.db_objset, path)) { 679 return (DCMD_ERR); 680 } 681 682 mdb_printf("%*p %8s %3u %9s %5llu %s\n", ptr_width, addr, 683 objectname, (int)db.db_level, blkidname, 684 db.db_holds.rc_count, path); 685 686 return (DCMD_OK); 687 } 688 689 /* ARGSUSED */ 690 static int 691 dbuf_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 692 { 693 #define HISTOSZ 32 694 uintptr_t dbp; 695 dmu_buf_impl_t db; 696 dbuf_hash_table_t ht; 697 uint64_t bucket, ndbufs; 698 uint64_t histo[HISTOSZ]; 699 uint64_t histo2[HISTOSZ]; 700 int i, maxidx; 701 702 if (mdb_readvar(&ht, "dbuf_hash_table") == -1) { 703 mdb_warn("failed to read 'dbuf_hash_table'"); 704 return (DCMD_ERR); 705 } 706 707 for (i = 0; i < HISTOSZ; i++) { 708 histo[i] = 0; 709 histo2[i] = 0; 710 } 711 712 ndbufs = 0; 713 for (bucket = 0; bucket < ht.hash_table_mask+1; bucket++) { 714 int len; 715 716 if (mdb_vread(&dbp, sizeof (void *), 717 (uintptr_t)(ht.hash_table+bucket)) == -1) { 718 mdb_warn("failed to read hash bucket %u at %p", 719 bucket, ht.hash_table+bucket); 720 return (DCMD_ERR); 721 } 722 723 len = 0; 724 while (dbp != 0) { 725 if (mdb_vread(&db, sizeof (dmu_buf_impl_t), 726 dbp) == -1) { 727 mdb_warn("failed to read dbuf at %p", dbp); 728 return (DCMD_ERR); 729 } 730 dbp = (uintptr_t)db.db_hash_next; 731 for (i = MIN(len, HISTOSZ - 1); i >= 0; i--) 732 histo2[i]++; 733 len++; 734 ndbufs++; 735 } 736 737 if (len >= HISTOSZ) 738 len = HISTOSZ-1; 739 histo[len]++; 740 } 741 742 mdb_printf("hash table has %llu buckets, %llu dbufs " 743 "(avg %llu buckets/dbuf)\n", 744 ht.hash_table_mask+1, ndbufs, 745 (ht.hash_table_mask+1)/ndbufs); 746 747 mdb_printf("\n"); 748 maxidx = 0; 749 for (i = 0; i < HISTOSZ; i++) 750 if (histo[i] > 0) 751 maxidx = i; 752 mdb_printf("hash chain length number of buckets\n"); 753 for (i = 0; i <= maxidx; i++) 754 mdb_printf("%u %llu\n", i, histo[i]); 755 756 mdb_printf("\n"); 757 maxidx = 0; 758 for (i = 0; i < HISTOSZ; i++) 759 if (histo2[i] > 0) 760 maxidx = i; 761 mdb_printf("hash chain depth number of dbufs\n"); 762 for (i = 0; i <= maxidx; i++) 763 mdb_printf("%u or more %llu %llu%%\n", 764 i, histo2[i], histo2[i]*100/ndbufs); 765 766 767 return (DCMD_OK); 768 } 769 770 #define CHAIN_END 0xffff 771 /* 772 * ::zap_leaf [-v] 773 * 774 * Print a zap_leaf_phys_t, assumed to be 16k 775 */ 776 /* ARGSUSED */ 777 static int 778 zap_leaf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 779 { 780 char buf[16*1024]; 781 int verbose = B_FALSE; 782 int four = B_FALSE; 783 dmu_buf_t l_dbuf; 784 zap_leaf_t l; 785 zap_leaf_phys_t *zlp = (void *)buf; 786 int i; 787 788 if (mdb_getopts(argc, argv, 789 'v', MDB_OPT_SETBITS, TRUE, &verbose, 790 '4', MDB_OPT_SETBITS, TRUE, &four, 791 NULL) != argc) 792 return (DCMD_USAGE); 793 794 l_dbuf.db_data = zlp; 795 l.l_dbuf = &l_dbuf; 796 l.l_bs = 14; /* assume 16k blocks */ 797 if (four) 798 l.l_bs = 12; 799 800 if (!(flags & DCMD_ADDRSPEC)) { 801 return (DCMD_USAGE); 802 } 803 804 if (mdb_vread(buf, sizeof (buf), addr) == -1) { 805 mdb_warn("failed to read zap_leaf_phys_t at %p", addr); 806 return (DCMD_ERR); 807 } 808 809 if (zlp->l_hdr.lh_block_type != ZBT_LEAF || 810 zlp->l_hdr.lh_magic != ZAP_LEAF_MAGIC) { 811 mdb_warn("This does not appear to be a zap_leaf_phys_t"); 812 return (DCMD_ERR); 813 } 814 815 mdb_printf("zap_leaf_phys_t at %p:\n", addr); 816 mdb_printf(" lh_prefix_len = %u\n", zlp->l_hdr.lh_prefix_len); 817 mdb_printf(" lh_prefix = %llx\n", zlp->l_hdr.lh_prefix); 818 mdb_printf(" lh_nentries = %u\n", zlp->l_hdr.lh_nentries); 819 mdb_printf(" lh_nfree = %u\n", zlp->l_hdr.lh_nfree, 820 zlp->l_hdr.lh_nfree * 100 / (ZAP_LEAF_NUMCHUNKS(&l))); 821 mdb_printf(" lh_freelist = %u\n", zlp->l_hdr.lh_freelist); 822 mdb_printf(" lh_flags = %x (%s)\n", zlp->l_hdr.lh_flags, 823 zlp->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED ? 824 "ENTRIES_CDSORTED" : ""); 825 826 if (verbose) { 827 mdb_printf(" hash table:\n"); 828 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) { 829 if (zlp->l_hash[i] != CHAIN_END) 830 mdb_printf(" %u: %u\n", i, zlp->l_hash[i]); 831 } 832 } 833 834 mdb_printf(" chunks:\n"); 835 for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) { 836 /* LINTED: alignment */ 837 zap_leaf_chunk_t *zlc = &ZAP_LEAF_CHUNK(&l, i); 838 switch (zlc->l_entry.le_type) { 839 case ZAP_CHUNK_FREE: 840 if (verbose) { 841 mdb_printf(" %u: free; lf_next = %u\n", 842 i, zlc->l_free.lf_next); 843 } 844 break; 845 case ZAP_CHUNK_ENTRY: 846 mdb_printf(" %u: entry\n", i); 847 if (verbose) { 848 mdb_printf(" le_next = %u\n", 849 zlc->l_entry.le_next); 850 } 851 mdb_printf(" le_name_chunk = %u\n", 852 zlc->l_entry.le_name_chunk); 853 mdb_printf(" le_name_numints = %u\n", 854 zlc->l_entry.le_name_numints); 855 mdb_printf(" le_value_chunk = %u\n", 856 zlc->l_entry.le_value_chunk); 857 mdb_printf(" le_value_intlen = %u\n", 858 zlc->l_entry.le_value_intlen); 859 mdb_printf(" le_value_numints = %u\n", 860 zlc->l_entry.le_value_numints); 861 mdb_printf(" le_cd = %u\n", 862 zlc->l_entry.le_cd); 863 mdb_printf(" le_hash = %llx\n", 864 zlc->l_entry.le_hash); 865 break; 866 case ZAP_CHUNK_ARRAY: 867 mdb_printf(" %u: array", i); 868 if (strisprint((char *)zlc->l_array.la_array)) 869 mdb_printf(" \"%s\"", zlc->l_array.la_array); 870 mdb_printf("\n"); 871 if (verbose) { 872 int j; 873 mdb_printf(" "); 874 for (j = 0; j < ZAP_LEAF_ARRAY_BYTES; j++) { 875 mdb_printf("%02x ", 876 zlc->l_array.la_array[j]); 877 } 878 mdb_printf("\n"); 879 } 880 if (zlc->l_array.la_next != CHAIN_END) { 881 mdb_printf(" lf_next = %u\n", 882 zlc->l_array.la_next); 883 } 884 break; 885 default: 886 mdb_printf(" %u: undefined type %u\n", 887 zlc->l_entry.le_type); 888 } 889 } 890 891 return (DCMD_OK); 892 } 893 894 typedef struct dbufs_data { 895 mdb_ctf_id_t id; 896 uint64_t objset; 897 uint64_t object; 898 uint64_t level; 899 uint64_t blkid; 900 char *osname; 901 } dbufs_data_t; 902 903 #define DBUFS_UNSET (0xbaddcafedeadbeefULL) 904 905 /* ARGSUSED */ 906 static int 907 dbufs_cb(uintptr_t addr, const void *unknown, void *arg) 908 { 909 dbufs_data_t *data = arg; 910 uintptr_t objset; 911 dmu_buf_t db; 912 uint8_t level; 913 uint64_t blkid; 914 char osname[ZFS_MAX_DATASET_NAME_LEN]; 915 916 if (GETMEMBID(addr, &data->id, db_objset, objset) || 917 GETMEMBID(addr, &data->id, db, db) || 918 GETMEMBID(addr, &data->id, db_level, level) || 919 GETMEMBID(addr, &data->id, db_blkid, blkid)) { 920 return (WALK_ERR); 921 } 922 923 if ((data->objset == DBUFS_UNSET || data->objset == objset) && 924 (data->osname == NULL || (objset_name(objset, osname) == 0 && 925 strcmp(data->osname, osname) == 0)) && 926 (data->object == DBUFS_UNSET || data->object == db.db_object) && 927 (data->level == DBUFS_UNSET || data->level == level) && 928 (data->blkid == DBUFS_UNSET || data->blkid == blkid)) { 929 mdb_printf("%#lr\n", addr); 930 } 931 return (WALK_NEXT); 932 } 933 934 /* ARGSUSED */ 935 static int 936 dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 937 { 938 dbufs_data_t data; 939 char *object = NULL; 940 char *blkid = NULL; 941 942 data.objset = data.object = data.level = data.blkid = DBUFS_UNSET; 943 data.osname = NULL; 944 945 if (mdb_getopts(argc, argv, 946 'O', MDB_OPT_UINT64, &data.objset, 947 'n', MDB_OPT_STR, &data.osname, 948 'o', MDB_OPT_STR, &object, 949 'l', MDB_OPT_UINT64, &data.level, 950 'b', MDB_OPT_STR, &blkid, 951 NULL) != argc) { 952 return (DCMD_USAGE); 953 } 954 955 if (object) { 956 if (strcmp(object, "mdn") == 0) { 957 data.object = DMU_META_DNODE_OBJECT; 958 } else { 959 data.object = mdb_strtoull(object); 960 } 961 } 962 963 if (blkid) { 964 if (strcmp(blkid, "bonus") == 0) { 965 data.blkid = DMU_BONUS_BLKID; 966 } else { 967 data.blkid = mdb_strtoull(blkid); 968 } 969 } 970 971 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dmu_buf_impl", &data.id) == -1) { 972 mdb_warn("couldn't find struct dmu_buf_impl_t"); 973 return (DCMD_ERR); 974 } 975 976 if (mdb_walk("dmu_buf_impl_t", dbufs_cb, &data) != 0) { 977 mdb_warn("can't walk dbufs"); 978 return (DCMD_ERR); 979 } 980 981 return (DCMD_OK); 982 } 983 984 typedef struct abuf_find_data { 985 dva_t dva; 986 mdb_ctf_id_t id; 987 } abuf_find_data_t; 988 989 /* ARGSUSED */ 990 static int 991 abuf_find_cb(uintptr_t addr, const void *unknown, void *arg) 992 { 993 abuf_find_data_t *data = arg; 994 dva_t dva; 995 996 if (GETMEMBID(addr, &data->id, b_dva, dva)) { 997 return (WALK_ERR); 998 } 999 1000 if (dva.dva_word[0] == data->dva.dva_word[0] && 1001 dva.dva_word[1] == data->dva.dva_word[1]) { 1002 mdb_printf("%#lr\n", addr); 1003 } 1004 return (WALK_NEXT); 1005 } 1006 1007 typedef struct mdb_arc_state { 1008 uintptr_t arcs_list[ARC_BUFC_NUMTYPES]; 1009 } mdb_arc_state_t; 1010 1011 /* ARGSUSED */ 1012 static int 1013 abuf_find(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1014 { 1015 abuf_find_data_t data; 1016 GElf_Sym sym; 1017 int i, j; 1018 const char *syms[] = { 1019 "ARC_mru", 1020 "ARC_mru_ghost", 1021 "ARC_mfu", 1022 "ARC_mfu_ghost", 1023 }; 1024 1025 if (argc != 2) 1026 return (DCMD_USAGE); 1027 1028 for (i = 0; i < 2; i ++) { 1029 switch (argv[i].a_type) { 1030 case MDB_TYPE_STRING: 1031 data.dva.dva_word[i] = mdb_strtoull(argv[i].a_un.a_str); 1032 break; 1033 case MDB_TYPE_IMMEDIATE: 1034 data.dva.dva_word[i] = argv[i].a_un.a_val; 1035 break; 1036 default: 1037 return (DCMD_USAGE); 1038 } 1039 } 1040 1041 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "arc_buf_hdr", &data.id) == -1) { 1042 mdb_warn("couldn't find struct arc_buf_hdr"); 1043 return (DCMD_ERR); 1044 } 1045 1046 for (i = 0; i < sizeof (syms) / sizeof (syms[0]); i++) { 1047 mdb_arc_state_t mas; 1048 1049 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, syms[i], &sym)) { 1050 mdb_warn("can't find symbol %s", syms[i]); 1051 return (DCMD_ERR); 1052 } 1053 1054 if (mdb_ctf_vread(&mas, "arc_state_t", "mdb_arc_state_t", 1055 sym.st_value, 0) != 0) { 1056 mdb_warn("can't read arcs_list of %s", syms[i]); 1057 return (DCMD_ERR); 1058 } 1059 1060 for (j = 0; j < ARC_BUFC_NUMTYPES; j++) { 1061 uintptr_t addr = mas.arcs_list[j]; 1062 1063 if (addr == 0) 1064 continue; 1065 1066 if (mdb_pwalk("multilist", abuf_find_cb, &data, 1067 addr) != 0) { 1068 mdb_warn("can't walk %s", syms[i]); 1069 return (DCMD_ERR); 1070 } 1071 } 1072 } 1073 1074 return (DCMD_OK); 1075 } 1076 1077 typedef struct dbgmsg_arg { 1078 boolean_t da_address; 1079 boolean_t da_hrtime; 1080 boolean_t da_timedelta; 1081 boolean_t da_time; 1082 boolean_t da_whatis; 1083 1084 hrtime_t da_curtime; 1085 } dbgmsg_arg_t; 1086 1087 static int 1088 dbgmsg_cb(uintptr_t addr, const void *unknown __unused, void *arg) 1089 { 1090 static mdb_ctf_id_t id; 1091 static boolean_t gotid; 1092 static ulong_t off; 1093 1094 dbgmsg_arg_t *da = arg; 1095 time_t timestamp; 1096 hrtime_t hrtime; 1097 char buf[1024]; 1098 1099 if (!gotid) { 1100 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "zfs_dbgmsg", &id) == 1101 -1) { 1102 mdb_warn("couldn't find struct zfs_dbgmsg"); 1103 return (WALK_ERR); 1104 } 1105 gotid = TRUE; 1106 if (mdb_ctf_offsetof(id, "zdm_msg", &off) == -1) { 1107 mdb_warn("couldn't find zdm_msg"); 1108 return (WALK_ERR); 1109 } 1110 off /= 8; 1111 } 1112 1113 if (GETMEMBID(addr, &id, zdm_timestamp, timestamp)) { 1114 return (WALK_ERR); 1115 } 1116 1117 if (da->da_hrtime || da->da_timedelta) { 1118 if (GETMEMBID(addr, &id, zdm_hrtime, hrtime)) { 1119 return (WALK_ERR); 1120 } 1121 } 1122 1123 if (mdb_readstr(buf, sizeof (buf), addr + off) == -1) { 1124 mdb_warn("failed to read zdm_msg at %p\n", addr + off); 1125 return (DCMD_ERR); 1126 } 1127 1128 if (da->da_address) 1129 mdb_printf("%p ", addr); 1130 1131 if (da->da_timedelta) { 1132 int64_t diff; 1133 char dbuf[32] = { 0 }; 1134 1135 if (da->da_curtime == 0) 1136 da->da_curtime = mdb_gethrtime(); 1137 1138 diff = (int64_t)hrtime - da->da_curtime; 1139 mdb_nicetime(diff, dbuf, sizeof (dbuf)); 1140 mdb_printf("%-20s ", dbuf); 1141 } else if (da->da_hrtime) { 1142 mdb_printf("%016x ", hrtime); 1143 } else if (da->da_time) { 1144 mdb_printf("%Y ", timestamp); 1145 } 1146 1147 mdb_printf("%s\n", buf); 1148 1149 if (da->da_whatis) 1150 (void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL); 1151 1152 return (WALK_NEXT); 1153 } 1154 1155 static int 1156 dbgmsg(uintptr_t addr, uint_t flags __unused, int argc, const mdb_arg_t *argv) 1157 { 1158 GElf_Sym sym; 1159 dbgmsg_arg_t da = { 0 }; 1160 boolean_t verbose = B_FALSE; 1161 1162 if (mdb_getopts(argc, argv, 1163 'a', MDB_OPT_SETBITS, B_TRUE, &da.da_address, 1164 'r', MDB_OPT_SETBITS, B_TRUE, &da.da_hrtime, 1165 't', MDB_OPT_SETBITS, B_TRUE, &da.da_timedelta, 1166 'T', MDB_OPT_SETBITS, B_TRUE, &da.da_time, 1167 'v', MDB_OPT_SETBITS, B_TRUE, &verbose, 1168 'w', MDB_OPT_SETBITS, B_TRUE, &da.da_whatis, 1169 NULL) != argc) { 1170 return (DCMD_USAGE); 1171 } 1172 1173 if (verbose) 1174 da.da_address = da.da_time = B_TRUE; 1175 1176 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "zfs_dbgmsgs", &sym)) { 1177 mdb_warn("can't find zfs_dbgmsgs"); 1178 return (DCMD_ERR); 1179 } 1180 1181 if (mdb_pwalk("list", dbgmsg_cb, &da, sym.st_value) != 0) { 1182 mdb_warn("can't walk zfs_dbgmsgs"); 1183 return (DCMD_ERR); 1184 } 1185 1186 return (DCMD_OK); 1187 } 1188 1189 1190 static void 1191 dbgmsg_help(void) 1192 { 1193 mdb_printf("Print entries from the ZFS debug log.\n\n" 1194 "%<b>OPTIONS%</b>\n" 1195 "\t-a\tInclude the address of each zfs_dbgmsg_t.\n" 1196 "\t-r\tDisplay high-resolution timestamps.\n" 1197 "\t-t\tInclude the age of the message.\n" 1198 "\t-T\tInclude the date/time of the message.\n" 1199 "\t-v\tEquivalent to -aT.\n" 1200 "\t-w\tRun ::whatis on each zfs_dbgmsg_t. Useful in DEBUG kernels\n" 1201 "\t\tto show the origin of the message.\n"); 1202 } 1203 1204 /*ARGSUSED*/ 1205 static int 1206 arc_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1207 { 1208 kstat_named_t *stats; 1209 GElf_Sym sym; 1210 int nstats, i; 1211 uint_t opt_a = FALSE; 1212 uint_t opt_b = FALSE; 1213 uint_t shift = 0; 1214 const char *suffix; 1215 1216 static const char *bytestats[] = { 1217 "p", "c", "c_min", "c_max", "size", "duplicate_buffers_size", 1218 "arc_meta_used", "arc_meta_limit", "arc_meta_max", 1219 "arc_meta_min", "hdr_size", "data_size", "metadata_size", 1220 "other_size", "anon_size", "anon_evictable_data", 1221 "anon_evictable_metadata", "mru_size", "mru_evictable_data", 1222 "mru_evictable_metadata", "mru_ghost_size", 1223 "mru_ghost_evictable_data", "mru_ghost_evictable_metadata", 1224 "mfu_size", "mfu_evictable_data", "mfu_evictable_metadata", 1225 "mfu_ghost_size", "mfu_ghost_evictable_data", 1226 "mfu_ghost_evictable_metadata", "evict_l2_cached", 1227 "evict_l2_eligible", "evict_l2_ineligible", "l2_read_bytes", 1228 "l2_write_bytes", "l2_size", "l2_asize", "l2_hdr_size", 1229 "compressed_size", "uncompressed_size", "overhead_size", 1230 NULL 1231 }; 1232 1233 static const char *extras[] = { 1234 "arc_no_grow", "arc_tempreserve", 1235 NULL 1236 }; 1237 1238 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "arc_stats", &sym) == -1) { 1239 mdb_warn("failed to find 'arc_stats'"); 1240 return (DCMD_ERR); 1241 } 1242 1243 stats = mdb_zalloc(sym.st_size, UM_SLEEP | UM_GC); 1244 1245 if (mdb_vread(stats, sym.st_size, sym.st_value) == -1) { 1246 mdb_warn("couldn't read 'arc_stats' at %p", sym.st_value); 1247 return (DCMD_ERR); 1248 } 1249 1250 nstats = sym.st_size / sizeof (kstat_named_t); 1251 1252 /* NB: -a / opt_a are ignored for backwards compatability */ 1253 if (mdb_getopts(argc, argv, 1254 'a', MDB_OPT_SETBITS, TRUE, &opt_a, 1255 'b', MDB_OPT_SETBITS, TRUE, &opt_b, 1256 'k', MDB_OPT_SETBITS, 10, &shift, 1257 'm', MDB_OPT_SETBITS, 20, &shift, 1258 'g', MDB_OPT_SETBITS, 30, &shift, 1259 NULL) != argc) 1260 return (DCMD_USAGE); 1261 1262 if (!opt_b && !shift) 1263 shift = 20; 1264 1265 switch (shift) { 1266 case 0: 1267 suffix = "B"; 1268 break; 1269 case 10: 1270 suffix = "KB"; 1271 break; 1272 case 20: 1273 suffix = "MB"; 1274 break; 1275 case 30: 1276 suffix = "GB"; 1277 break; 1278 default: 1279 suffix = "XX"; 1280 } 1281 1282 for (i = 0; i < nstats; i++) { 1283 int j; 1284 boolean_t bytes = B_FALSE; 1285 1286 for (j = 0; bytestats[j]; j++) { 1287 if (strcmp(stats[i].name, bytestats[j]) == 0) { 1288 bytes = B_TRUE; 1289 break; 1290 } 1291 } 1292 1293 if (bytes) { 1294 mdb_printf("%-25s = %9llu %s\n", stats[i].name, 1295 stats[i].value.ui64 >> shift, suffix); 1296 } else { 1297 mdb_printf("%-25s = %9llu\n", stats[i].name, 1298 stats[i].value.ui64); 1299 } 1300 } 1301 1302 for (i = 0; extras[i]; i++) { 1303 uint64_t buf; 1304 1305 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, extras[i], &sym) == -1) { 1306 mdb_warn("failed to find '%s'", extras[i]); 1307 return (DCMD_ERR); 1308 } 1309 1310 if (sym.st_size != sizeof (uint64_t) && 1311 sym.st_size != sizeof (uint32_t)) { 1312 mdb_warn("expected scalar for variable '%s'\n", 1313 extras[i]); 1314 return (DCMD_ERR); 1315 } 1316 1317 if (mdb_vread(&buf, sym.st_size, sym.st_value) == -1) { 1318 mdb_warn("couldn't read '%s'", extras[i]); 1319 return (DCMD_ERR); 1320 } 1321 1322 mdb_printf("%-25s = ", extras[i]); 1323 1324 /* NB: all the 64-bit extras happen to be byte counts */ 1325 if (sym.st_size == sizeof (uint64_t)) 1326 mdb_printf("%9llu %s\n", buf >> shift, suffix); 1327 1328 if (sym.st_size == sizeof (uint32_t)) 1329 mdb_printf("%9d\n", *((uint32_t *)&buf)); 1330 } 1331 return (DCMD_OK); 1332 } 1333 1334 typedef struct mdb_spa_print { 1335 pool_state_t spa_state; 1336 char spa_name[ZFS_MAX_DATASET_NAME_LEN]; 1337 uintptr_t spa_normal_class; 1338 } mdb_spa_print_t; 1339 1340 1341 const char histo_stars[] = "****************************************"; 1342 const int histo_width = sizeof (histo_stars) - 1; 1343 1344 static void 1345 dump_histogram(const uint64_t *histo, int size, int offset) 1346 { 1347 int i; 1348 int minidx = size - 1; 1349 int maxidx = 0; 1350 uint64_t max = 0; 1351 1352 for (i = 0; i < size; i++) { 1353 if (histo[i] > max) 1354 max = histo[i]; 1355 if (histo[i] > 0 && i > maxidx) 1356 maxidx = i; 1357 if (histo[i] > 0 && i < minidx) 1358 minidx = i; 1359 } 1360 1361 if (max < histo_width) 1362 max = histo_width; 1363 1364 for (i = minidx; i <= maxidx; i++) { 1365 mdb_printf("%3u: %6llu %s\n", 1366 i + offset, (u_longlong_t)histo[i], 1367 &histo_stars[(max - histo[i]) * histo_width / max]); 1368 } 1369 } 1370 1371 typedef struct mdb_metaslab_class { 1372 uint64_t mc_histogram[RANGE_TREE_HISTOGRAM_SIZE]; 1373 } mdb_metaslab_class_t; 1374 1375 /* 1376 * spa_class_histogram(uintptr_t class_addr) 1377 * 1378 * Prints free space histogram for a device class 1379 * 1380 * Returns DCMD_OK, or DCMD_ERR. 1381 */ 1382 static int 1383 spa_class_histogram(uintptr_t class_addr) 1384 { 1385 mdb_metaslab_class_t mc; 1386 if (mdb_ctf_vread(&mc, "metaslab_class_t", 1387 "mdb_metaslab_class_t", class_addr, 0) == -1) 1388 return (DCMD_ERR); 1389 1390 mdb_inc_indent(4); 1391 dump_histogram(mc.mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 1392 mdb_dec_indent(4); 1393 return (DCMD_OK); 1394 } 1395 1396 /* 1397 * ::spa 1398 * 1399 * -c Print configuration information as well 1400 * -v Print vdev state 1401 * -e Print vdev error stats 1402 * -m Print vdev metaslab info 1403 * -M print vdev metaslab group info 1404 * -h Print histogram info (must be combined with -m or -M) 1405 * 1406 * Print a summarized spa_t. When given no arguments, prints out a table of all 1407 * active pools on the system. 1408 */ 1409 /* ARGSUSED */ 1410 static int 1411 spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1412 { 1413 const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED", 1414 "SPARE", "L2CACHE", "UNINIT", "UNAVAIL", "POTENTIAL" }; 1415 const char *state; 1416 int spa_flags = 0; 1417 1418 if (mdb_getopts(argc, argv, 1419 'c', MDB_OPT_SETBITS, SPA_FLAG_CONFIG, &spa_flags, 1420 'v', MDB_OPT_SETBITS, SPA_FLAG_VDEVS, &spa_flags, 1421 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags, 1422 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags, 1423 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags, 1424 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags, 1425 NULL) != argc) 1426 return (DCMD_USAGE); 1427 1428 if (!(flags & DCMD_ADDRSPEC)) { 1429 if (mdb_walk_dcmd("spa", "spa", argc, argv) == -1) { 1430 mdb_warn("can't walk spa"); 1431 return (DCMD_ERR); 1432 } 1433 1434 return (DCMD_OK); 1435 } 1436 1437 if (flags & DCMD_PIPE_OUT) { 1438 mdb_printf("%#lr\n", addr); 1439 return (DCMD_OK); 1440 } 1441 1442 if (DCMD_HDRSPEC(flags)) 1443 mdb_printf("%<u>%-?s %9s %-*s%</u>\n", "ADDR", "STATE", 1444 sizeof (uintptr_t) == 4 ? 60 : 52, "NAME"); 1445 1446 mdb_spa_print_t spa; 1447 if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_print_t", addr, 0) == -1) 1448 return (DCMD_ERR); 1449 1450 if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL) 1451 state = "UNKNOWN"; 1452 else 1453 state = statetab[spa.spa_state]; 1454 1455 mdb_printf("%0?p %9s %s\n", addr, state, spa.spa_name); 1456 if (spa_flags & SPA_FLAG_HISTOGRAMS) 1457 spa_class_histogram(spa.spa_normal_class); 1458 1459 if (spa_flags & SPA_FLAG_CONFIG) { 1460 mdb_printf("\n"); 1461 mdb_inc_indent(4); 1462 if (mdb_call_dcmd("spa_config", addr, flags, 0, 1463 NULL) != DCMD_OK) 1464 return (DCMD_ERR); 1465 mdb_dec_indent(4); 1466 } 1467 1468 if (spa_flags & SPA_FLAG_ALL_VDEV) { 1469 mdb_arg_t v; 1470 char opts[100] = "-"; 1471 int args = 1472 (spa_flags | SPA_FLAG_VDEVS) == SPA_FLAG_VDEVS ? 0 : 1; 1473 1474 if (spa_flags & SPA_FLAG_ERRORS) 1475 strcat(opts, "e"); 1476 if (spa_flags & SPA_FLAG_METASLABS) 1477 strcat(opts, "m"); 1478 if (spa_flags & SPA_FLAG_METASLAB_GROUPS) 1479 strcat(opts, "M"); 1480 if (spa_flags & SPA_FLAG_HISTOGRAMS) 1481 strcat(opts, "h"); 1482 1483 v.a_type = MDB_TYPE_STRING; 1484 v.a_un.a_str = opts; 1485 1486 mdb_printf("\n"); 1487 mdb_inc_indent(4); 1488 if (mdb_call_dcmd("spa_vdevs", addr, flags, args, 1489 &v) != DCMD_OK) 1490 return (DCMD_ERR); 1491 mdb_dec_indent(4); 1492 } 1493 1494 return (DCMD_OK); 1495 } 1496 1497 typedef struct mdb_spa_config_spa { 1498 uintptr_t spa_config; 1499 } mdb_spa_config_spa_t; 1500 1501 /* 1502 * ::spa_config 1503 * 1504 * Given a spa_t, print the configuration information stored in spa_config. 1505 * Since it's just an nvlist, format it as an indented list of name=value pairs. 1506 * We simply read the value of spa_config and pass off to ::nvlist. 1507 */ 1508 /* ARGSUSED */ 1509 static int 1510 spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1511 { 1512 mdb_spa_config_spa_t spa; 1513 1514 if (argc != 0 || !(flags & DCMD_ADDRSPEC)) 1515 return (DCMD_USAGE); 1516 1517 if (mdb_ctf_vread(&spa, ZFS_STRUCT "spa", "mdb_spa_config_spa_t", 1518 addr, 0) == -1) 1519 return (DCMD_ERR); 1520 1521 if (spa.spa_config == 0) { 1522 mdb_printf("(none)\n"); 1523 return (DCMD_OK); 1524 } 1525 1526 return (mdb_call_dcmd("nvlist", spa.spa_config, flags, 1527 0, NULL)); 1528 } 1529 1530 typedef struct mdb_range_tree { 1531 struct { 1532 uint64_t bt_num_elems; 1533 uint64_t bt_num_nodes; 1534 } rt_root; 1535 uint64_t rt_space; 1536 range_seg_type_t rt_type; 1537 uint8_t rt_shift; 1538 uint64_t rt_start; 1539 } mdb_range_tree_t; 1540 1541 typedef struct mdb_metaslab_group { 1542 uint64_t mg_fragmentation; 1543 uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE]; 1544 uintptr_t mg_vd; 1545 } mdb_metaslab_group_t; 1546 1547 typedef struct mdb_metaslab { 1548 uint64_t ms_id; 1549 uint64_t ms_start; 1550 uint64_t ms_size; 1551 int64_t ms_deferspace; 1552 uint64_t ms_fragmentation; 1553 uint64_t ms_weight; 1554 uintptr_t ms_allocating[TXG_SIZE]; 1555 uintptr_t ms_checkpointing; 1556 uintptr_t ms_freeing; 1557 uintptr_t ms_freed; 1558 uintptr_t ms_allocatable; 1559 uintptr_t ms_unflushed_frees; 1560 uintptr_t ms_unflushed_allocs; 1561 uintptr_t ms_sm; 1562 } mdb_metaslab_t; 1563 1564 typedef struct mdb_space_map_phys_t { 1565 int64_t smp_alloc; 1566 uint64_t smp_histogram[SPACE_MAP_HISTOGRAM_SIZE]; 1567 } mdb_space_map_phys_t; 1568 1569 typedef struct mdb_space_map { 1570 uint64_t sm_size; 1571 uint8_t sm_shift; 1572 uintptr_t sm_phys; 1573 } mdb_space_map_t; 1574 1575 typedef struct mdb_vdev { 1576 uint64_t vdev_id; 1577 uint64_t vdev_state; 1578 uintptr_t vdev_ops; 1579 struct { 1580 uint64_t vs_aux; 1581 uint64_t vs_ops[VS_ZIO_TYPES]; 1582 uint64_t vs_bytes[VS_ZIO_TYPES]; 1583 uint64_t vs_read_errors; 1584 uint64_t vs_write_errors; 1585 uint64_t vs_checksum_errors; 1586 } vdev_stat; 1587 uintptr_t vdev_child; 1588 uint64_t vdev_children; 1589 uint64_t vdev_ms_count; 1590 uintptr_t vdev_mg; 1591 uintptr_t vdev_ms; 1592 uintptr_t vdev_path; 1593 } mdb_vdev_t; 1594 1595 typedef struct mdb_vdev_ops { 1596 char vdev_op_type[16]; 1597 } mdb_vdev_ops_t; 1598 1599 static int 1600 metaslab_stats(mdb_vdev_t *vd, int spa_flags) 1601 { 1602 mdb_inc_indent(4); 1603 mdb_printf("%<u>%-?s %6s %20s %10s %10s %10s%</u>\n", "ADDR", "ID", 1604 "OFFSET", "FREE", "FRAG", "UCMU"); 1605 1606 uintptr_t *vdev_ms = mdb_alloc(vd->vdev_ms_count * sizeof (vdev_ms), 1607 UM_SLEEP | UM_GC); 1608 if (mdb_vread(vdev_ms, vd->vdev_ms_count * sizeof (uintptr_t), 1609 vd->vdev_ms) == -1) { 1610 mdb_warn("failed to read vdev_ms at %p\n", vd->vdev_ms); 1611 return (DCMD_ERR); 1612 } 1613 1614 for (int m = 0; m < vd->vdev_ms_count; m++) { 1615 mdb_metaslab_t ms; 1616 mdb_space_map_t sm = { 0 }; 1617 mdb_space_map_phys_t smp = { 0 }; 1618 mdb_range_tree_t rt; 1619 uint64_t uallocs, ufrees, raw_free, raw_uchanges_mem; 1620 char free[MDB_NICENUM_BUFLEN]; 1621 char uchanges_mem[MDB_NICENUM_BUFLEN]; 1622 1623 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t", 1624 vdev_ms[m], 0) == -1) 1625 return (DCMD_ERR); 1626 1627 if (ms.ms_sm != 0 && 1628 mdb_ctf_vread(&sm, "space_map_t", "mdb_space_map_t", 1629 ms.ms_sm, 0) == -1) 1630 return (DCMD_ERR); 1631 1632 if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t", 1633 ms.ms_unflushed_frees, 0) == -1) 1634 return (DCMD_ERR); 1635 ufrees = rt.rt_space; 1636 raw_uchanges_mem = rt.rt_root.bt_num_nodes * BTREE_LEAF_SIZE; 1637 1638 if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t", 1639 ms.ms_unflushed_allocs, 0) == -1) 1640 return (DCMD_ERR); 1641 uallocs = rt.rt_space; 1642 raw_uchanges_mem += rt.rt_root.bt_num_nodes * BTREE_LEAF_SIZE; 1643 mdb_nicenum(raw_uchanges_mem, uchanges_mem); 1644 1645 raw_free = ms.ms_size; 1646 if (ms.ms_sm != 0 && sm.sm_phys != 0) { 1647 (void) mdb_ctf_vread(&smp, "space_map_phys_t", 1648 "mdb_space_map_phys_t", sm.sm_phys, 0); 1649 raw_free -= smp.smp_alloc; 1650 } 1651 raw_free += ufrees - uallocs; 1652 mdb_nicenum(raw_free, free); 1653 1654 mdb_printf("%0?p %6llu %20llx %10s ", vdev_ms[m], ms.ms_id, 1655 ms.ms_start, free); 1656 if (ms.ms_fragmentation == ZFS_FRAG_INVALID) 1657 mdb_printf("%9s ", "-"); 1658 else 1659 mdb_printf("%9llu%% ", ms.ms_fragmentation); 1660 mdb_printf("%10s\n", uchanges_mem); 1661 1662 if ((spa_flags & SPA_FLAG_HISTOGRAMS) && ms.ms_sm != 0 && 1663 sm.sm_phys != 0) { 1664 dump_histogram(smp.smp_histogram, 1665 SPACE_MAP_HISTOGRAM_SIZE, sm.sm_shift); 1666 } 1667 } 1668 mdb_dec_indent(4); 1669 return (DCMD_OK); 1670 } 1671 1672 static int 1673 metaslab_group_stats(mdb_vdev_t *vd, int spa_flags) 1674 { 1675 mdb_metaslab_group_t mg; 1676 if (mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t", 1677 vd->vdev_mg, 0) == -1) { 1678 mdb_warn("failed to read vdev_mg at %p\n", vd->vdev_mg); 1679 return (DCMD_ERR); 1680 } 1681 1682 mdb_inc_indent(4); 1683 mdb_printf("%<u>%-?s %7s %9s%</u>\n", "ADDR", "FRAG", "UCMU"); 1684 1685 if (mg.mg_fragmentation == ZFS_FRAG_INVALID) 1686 mdb_printf("%0?p %6s\n", vd->vdev_mg, "-"); 1687 else 1688 mdb_printf("%0?p %6llu%%", vd->vdev_mg, mg.mg_fragmentation); 1689 1690 1691 uintptr_t *vdev_ms = mdb_alloc(vd->vdev_ms_count * sizeof (vdev_ms), 1692 UM_SLEEP | UM_GC); 1693 if (mdb_vread(vdev_ms, vd->vdev_ms_count * sizeof (uintptr_t), 1694 vd->vdev_ms) == -1) { 1695 mdb_warn("failed to read vdev_ms at %p\n", vd->vdev_ms); 1696 return (DCMD_ERR); 1697 } 1698 1699 uint64_t raw_uchanges_mem = 0; 1700 char uchanges_mem[MDB_NICENUM_BUFLEN]; 1701 for (int m = 0; m < vd->vdev_ms_count; m++) { 1702 mdb_metaslab_t ms; 1703 mdb_range_tree_t rt; 1704 1705 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t", 1706 vdev_ms[m], 0) == -1) 1707 return (DCMD_ERR); 1708 1709 if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t", 1710 ms.ms_unflushed_frees, 0) == -1) 1711 return (DCMD_ERR); 1712 raw_uchanges_mem += rt.rt_root.bt_num_nodes * BTREE_LEAF_SIZE; 1713 1714 if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t", 1715 ms.ms_unflushed_allocs, 0) == -1) 1716 return (DCMD_ERR); 1717 raw_uchanges_mem += rt.rt_root.bt_num_nodes * BTREE_LEAF_SIZE; 1718 } 1719 mdb_nicenum(raw_uchanges_mem, uchanges_mem); 1720 mdb_printf("%10s\n", uchanges_mem); 1721 1722 if (spa_flags & SPA_FLAG_HISTOGRAMS) 1723 dump_histogram(mg.mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 1724 mdb_dec_indent(4); 1725 return (DCMD_OK); 1726 } 1727 1728 /* 1729 * ::vdev 1730 * 1731 * Print out a summarized vdev_t, in the following form: 1732 * 1733 * ADDR STATE AUX DESC 1734 * fffffffbcde23df0 HEALTHY - /dev/dsk/c0t0d0 1735 * 1736 * If '-r' is specified, recursively visit all children. 1737 * 1738 * With '-e', the statistics associated with the vdev are printed as well. 1739 */ 1740 static int 1741 do_print_vdev(uintptr_t addr, int flags, int depth, boolean_t recursive, 1742 int spa_flags) 1743 { 1744 mdb_vdev_t vd; 1745 if (mdb_ctf_vread(&vd, "vdev_t", "mdb_vdev_t", 1746 (uintptr_t)addr, 0) == -1) 1747 return (DCMD_ERR); 1748 1749 if (flags & DCMD_PIPE_OUT) { 1750 mdb_printf("%#lr\n", addr); 1751 } else { 1752 char desc[MAXNAMELEN]; 1753 if (vd.vdev_path != 0) { 1754 if (mdb_readstr(desc, sizeof (desc), 1755 (uintptr_t)vd.vdev_path) == -1) { 1756 mdb_warn("failed to read vdev_path at %p\n", 1757 vd.vdev_path); 1758 return (DCMD_ERR); 1759 } 1760 } else if (vd.vdev_ops != 0) { 1761 vdev_ops_t ops; 1762 if (mdb_vread(&ops, sizeof (ops), 1763 (uintptr_t)vd.vdev_ops) == -1) { 1764 mdb_warn("failed to read vdev_ops at %p\n", 1765 vd.vdev_ops); 1766 return (DCMD_ERR); 1767 } 1768 (void) strcpy(desc, ops.vdev_op_type); 1769 } else { 1770 (void) strcpy(desc, "<unknown>"); 1771 } 1772 1773 if (depth == 0 && DCMD_HDRSPEC(flags)) 1774 mdb_printf("%<u>%-?s %-9s %-12s %-*s%</u>\n", 1775 "ADDR", "STATE", "AUX", 1776 sizeof (uintptr_t) == 4 ? 43 : 35, 1777 "DESCRIPTION"); 1778 1779 mdb_printf("%0?p ", addr); 1780 1781 const char *state, *aux; 1782 switch (vd.vdev_state) { 1783 case VDEV_STATE_CLOSED: 1784 state = "CLOSED"; 1785 break; 1786 case VDEV_STATE_OFFLINE: 1787 state = "OFFLINE"; 1788 break; 1789 case VDEV_STATE_CANT_OPEN: 1790 state = "CANT_OPEN"; 1791 break; 1792 case VDEV_STATE_DEGRADED: 1793 state = "DEGRADED"; 1794 break; 1795 case VDEV_STATE_HEALTHY: 1796 state = "HEALTHY"; 1797 break; 1798 case VDEV_STATE_REMOVED: 1799 state = "REMOVED"; 1800 break; 1801 case VDEV_STATE_FAULTED: 1802 state = "FAULTED"; 1803 break; 1804 default: 1805 state = "UNKNOWN"; 1806 break; 1807 } 1808 1809 switch (vd.vdev_stat.vs_aux) { 1810 case VDEV_AUX_NONE: 1811 aux = "-"; 1812 break; 1813 case VDEV_AUX_OPEN_FAILED: 1814 aux = "OPEN_FAILED"; 1815 break; 1816 case VDEV_AUX_CORRUPT_DATA: 1817 aux = "CORRUPT_DATA"; 1818 break; 1819 case VDEV_AUX_NO_REPLICAS: 1820 aux = "NO_REPLICAS"; 1821 break; 1822 case VDEV_AUX_BAD_GUID_SUM: 1823 aux = "BAD_GUID_SUM"; 1824 break; 1825 case VDEV_AUX_TOO_SMALL: 1826 aux = "TOO_SMALL"; 1827 break; 1828 case VDEV_AUX_BAD_LABEL: 1829 aux = "BAD_LABEL"; 1830 break; 1831 case VDEV_AUX_VERSION_NEWER: 1832 aux = "VERS_NEWER"; 1833 break; 1834 case VDEV_AUX_VERSION_OLDER: 1835 aux = "VERS_OLDER"; 1836 break; 1837 case VDEV_AUX_UNSUP_FEAT: 1838 aux = "UNSUP_FEAT"; 1839 break; 1840 case VDEV_AUX_SPARED: 1841 aux = "SPARED"; 1842 break; 1843 case VDEV_AUX_ERR_EXCEEDED: 1844 aux = "ERR_EXCEEDED"; 1845 break; 1846 case VDEV_AUX_IO_FAILURE: 1847 aux = "IO_FAILURE"; 1848 break; 1849 case VDEV_AUX_BAD_LOG: 1850 aux = "BAD_LOG"; 1851 break; 1852 case VDEV_AUX_EXTERNAL: 1853 aux = "EXTERNAL"; 1854 break; 1855 case VDEV_AUX_SPLIT_POOL: 1856 aux = "SPLIT_POOL"; 1857 break; 1858 case VDEV_AUX_CHILDREN_OFFLINE: 1859 aux = "CHILDREN_OFFLINE"; 1860 break; 1861 default: 1862 aux = "UNKNOWN"; 1863 break; 1864 } 1865 1866 mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc); 1867 1868 if (spa_flags & SPA_FLAG_ERRORS) { 1869 int i; 1870 1871 mdb_inc_indent(4); 1872 mdb_printf("\n"); 1873 mdb_printf("%<u> %12s %12s %12s %12s " 1874 "%12s%</u>\n", "READ", "WRITE", "FREE", "CLAIM", 1875 "IOCTL"); 1876 mdb_printf("OPS "); 1877 for (i = 1; i < VS_ZIO_TYPES; i++) 1878 mdb_printf("%11#llx%s", 1879 vd.vdev_stat.vs_ops[i], 1880 i == VS_ZIO_TYPES - 1 ? "" : " "); 1881 mdb_printf("\n"); 1882 mdb_printf("BYTES "); 1883 for (i = 1; i < VS_ZIO_TYPES; i++) 1884 mdb_printf("%11#llx%s", 1885 vd.vdev_stat.vs_bytes[i], 1886 i == VS_ZIO_TYPES - 1 ? "" : " "); 1887 1888 1889 mdb_printf("\n"); 1890 mdb_printf("EREAD %10#llx\n", 1891 vd.vdev_stat.vs_read_errors); 1892 mdb_printf("EWRITE %10#llx\n", 1893 vd.vdev_stat.vs_write_errors); 1894 mdb_printf("ECKSUM %10#llx\n", 1895 vd.vdev_stat.vs_checksum_errors); 1896 mdb_dec_indent(4); 1897 mdb_printf("\n"); 1898 } 1899 1900 if ((spa_flags & SPA_FLAG_METASLAB_GROUPS) && 1901 vd.vdev_mg != 0) { 1902 metaslab_group_stats(&vd, spa_flags); 1903 } 1904 if ((spa_flags & SPA_FLAG_METASLABS) && vd.vdev_ms != 0) { 1905 metaslab_stats(&vd, spa_flags); 1906 } 1907 } 1908 1909 uint64_t children = vd.vdev_children; 1910 if (children == 0 || !recursive) 1911 return (DCMD_OK); 1912 1913 uintptr_t *child = mdb_alloc(children * sizeof (child), 1914 UM_SLEEP | UM_GC); 1915 if (mdb_vread(child, children * sizeof (void *), vd.vdev_child) == -1) { 1916 mdb_warn("failed to read vdev children at %p", vd.vdev_child); 1917 return (DCMD_ERR); 1918 } 1919 1920 for (uint64_t c = 0; c < children; c++) { 1921 if (do_print_vdev(child[c], flags, depth + 2, recursive, 1922 spa_flags)) { 1923 return (DCMD_ERR); 1924 } 1925 } 1926 1927 return (DCMD_OK); 1928 } 1929 1930 static int 1931 vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1932 { 1933 uint64_t depth = 0; 1934 boolean_t recursive = B_FALSE; 1935 int spa_flags = 0; 1936 1937 if (mdb_getopts(argc, argv, 1938 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags, 1939 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags, 1940 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags, 1941 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags, 1942 'r', MDB_OPT_SETBITS, TRUE, &recursive, 1943 'd', MDB_OPT_UINT64, &depth, NULL) != argc) 1944 return (DCMD_USAGE); 1945 1946 if (!(flags & DCMD_ADDRSPEC)) { 1947 mdb_warn("no vdev_t address given\n"); 1948 return (DCMD_ERR); 1949 } 1950 1951 return (do_print_vdev(addr, flags, (int)depth, recursive, spa_flags)); 1952 } 1953 1954 typedef struct mdb_metaslab_alloc_trace { 1955 uintptr_t mat_mg; 1956 uintptr_t mat_msp; 1957 uint64_t mat_size; 1958 uint64_t mat_weight; 1959 uint64_t mat_offset; 1960 uint32_t mat_dva_id; 1961 int mat_allocator; 1962 } mdb_metaslab_alloc_trace_t; 1963 1964 static void 1965 metaslab_print_weight(uint64_t weight) 1966 { 1967 char buf[100]; 1968 1969 if (WEIGHT_IS_SPACEBASED(weight)) { 1970 mdb_nicenum( 1971 weight & ~(METASLAB_ACTIVE_MASK | METASLAB_WEIGHT_TYPE), 1972 buf); 1973 } else { 1974 char size[MDB_NICENUM_BUFLEN]; 1975 mdb_nicenum(1ULL << WEIGHT_GET_INDEX(weight), size); 1976 (void) mdb_snprintf(buf, sizeof (buf), "%llu x %s", 1977 WEIGHT_GET_COUNT(weight), size); 1978 } 1979 mdb_printf("%11s ", buf); 1980 } 1981 1982 /* ARGSUSED */ 1983 static int 1984 metaslab_weight(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1985 { 1986 uint64_t weight = 0; 1987 char active; 1988 1989 if (argc == 0 && (flags & DCMD_ADDRSPEC)) { 1990 if (mdb_vread(&weight, sizeof (uint64_t), addr) == -1) { 1991 mdb_warn("failed to read weight at %p\n", addr); 1992 return (DCMD_ERR); 1993 } 1994 } else if (argc == 1 && !(flags & DCMD_ADDRSPEC)) { 1995 weight = (uint64_t)mdb_argtoull(&argv[0]); 1996 } else { 1997 return (DCMD_USAGE); 1998 } 1999 2000 if (DCMD_HDRSPEC(flags)) { 2001 mdb_printf("%<u>%-6s %9s %9s%</u>\n", 2002 "ACTIVE", "ALGORITHM", "WEIGHT"); 2003 } 2004 2005 if (weight & METASLAB_WEIGHT_PRIMARY) 2006 active = 'P'; 2007 else if (weight & METASLAB_WEIGHT_SECONDARY) 2008 active = 'S'; 2009 else 2010 active = '-'; 2011 mdb_printf("%6c %8s ", active, 2012 WEIGHT_IS_SPACEBASED(weight) ? "SPACE" : "SEGMENT"); 2013 metaslab_print_weight(weight); 2014 mdb_printf("\n"); 2015 2016 return (DCMD_OK); 2017 } 2018 2019 /* ARGSUSED */ 2020 static int 2021 metaslab_trace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2022 { 2023 mdb_metaslab_alloc_trace_t mat; 2024 mdb_metaslab_group_t mg = { 0 }; 2025 char result_type[100]; 2026 2027 if (mdb_ctf_vread(&mat, "metaslab_alloc_trace_t", 2028 "mdb_metaslab_alloc_trace_t", addr, 0) == -1) { 2029 return (DCMD_ERR); 2030 } 2031 2032 if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) { 2033 mdb_printf("%<u>%6s %6s %8s %11s %11s %18s %18s%</u>\n", 2034 "MSID", "DVA", "ASIZE", "ALLOCATOR", "WEIGHT", "RESULT", 2035 "VDEV"); 2036 } 2037 2038 if (mat.mat_msp != 0) { 2039 mdb_metaslab_t ms; 2040 2041 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t", 2042 mat.mat_msp, 0) == -1) { 2043 return (DCMD_ERR); 2044 } 2045 mdb_printf("%6llu ", ms.ms_id); 2046 } else { 2047 mdb_printf("%6s ", "-"); 2048 } 2049 2050 mdb_printf("%6d %8llx %11llx ", mat.mat_dva_id, mat.mat_size, 2051 mat.mat_allocator); 2052 2053 metaslab_print_weight(mat.mat_weight); 2054 2055 if ((int64_t)mat.mat_offset < 0) { 2056 if (enum_lookup("enum trace_alloc_type", mat.mat_offset, 2057 "TRACE_", sizeof (result_type), result_type) == -1) { 2058 mdb_warn("Could not find enum for trace_alloc_type"); 2059 return (DCMD_ERR); 2060 } 2061 mdb_printf("%18s ", result_type); 2062 } else { 2063 mdb_printf("%<b>%18llx%</b> ", mat.mat_offset); 2064 } 2065 2066 if (mat.mat_mg != 0 && 2067 mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t", 2068 mat.mat_mg, 0) == -1) { 2069 return (DCMD_ERR); 2070 } 2071 2072 if (mg.mg_vd != 0) { 2073 mdb_vdev_t vdev; 2074 char desc[MAXNAMELEN]; 2075 2076 if (mdb_ctf_vread(&vdev, "vdev_t", "mdb_vdev_t", 2077 mg.mg_vd, 0) == -1) { 2078 return (DCMD_ERR); 2079 } 2080 2081 if (vdev.vdev_path != 0) { 2082 char path[MAXNAMELEN]; 2083 2084 if (mdb_readstr(path, sizeof (path), 2085 vdev.vdev_path) == -1) { 2086 mdb_warn("failed to read vdev_path at %p\n", 2087 vdev.vdev_path); 2088 return (DCMD_ERR); 2089 } 2090 char *slash; 2091 if ((slash = strrchr(path, '/')) != NULL) { 2092 strcpy(desc, slash + 1); 2093 } else { 2094 strcpy(desc, path); 2095 } 2096 } else if (vdev.vdev_ops != 0) { 2097 mdb_vdev_ops_t ops; 2098 if (mdb_ctf_vread(&ops, "vdev_ops_t", "mdb_vdev_ops_t", 2099 vdev.vdev_ops, 0) == -1) { 2100 mdb_warn("failed to read vdev_ops at %p\n", 2101 vdev.vdev_ops); 2102 return (DCMD_ERR); 2103 } 2104 (void) mdb_snprintf(desc, sizeof (desc), 2105 "%s-%llu", ops.vdev_op_type, vdev.vdev_id); 2106 } else { 2107 (void) strcpy(desc, "<unknown>"); 2108 } 2109 mdb_printf("%18s\n", desc); 2110 } 2111 2112 return (DCMD_OK); 2113 } 2114 2115 typedef struct metaslab_walk_data { 2116 uint64_t mw_numvdevs; 2117 uintptr_t *mw_vdevs; 2118 int mw_curvdev; 2119 uint64_t mw_nummss; 2120 uintptr_t *mw_mss; 2121 int mw_curms; 2122 } metaslab_walk_data_t; 2123 2124 static int 2125 metaslab_walk_step(mdb_walk_state_t *wsp) 2126 { 2127 metaslab_walk_data_t *mw = wsp->walk_data; 2128 metaslab_t ms; 2129 uintptr_t msp; 2130 2131 if (mw->mw_curvdev >= mw->mw_numvdevs) 2132 return (WALK_DONE); 2133 2134 if (mw->mw_mss == NULL) { 2135 uintptr_t mssp; 2136 uintptr_t vdevp; 2137 2138 ASSERT(mw->mw_curms == 0); 2139 ASSERT(mw->mw_nummss == 0); 2140 2141 vdevp = mw->mw_vdevs[mw->mw_curvdev]; 2142 if (GETMEMB(vdevp, "vdev", vdev_ms, mssp) || 2143 GETMEMB(vdevp, "vdev", vdev_ms_count, mw->mw_nummss)) { 2144 return (WALK_ERR); 2145 } 2146 2147 mw->mw_mss = mdb_alloc(mw->mw_nummss * sizeof (void*), 2148 UM_SLEEP | UM_GC); 2149 if (mdb_vread(mw->mw_mss, mw->mw_nummss * sizeof (void*), 2150 mssp) == -1) { 2151 mdb_warn("failed to read vdev_ms at %p", mssp); 2152 return (WALK_ERR); 2153 } 2154 } 2155 2156 if (mw->mw_curms >= mw->mw_nummss) { 2157 mw->mw_mss = NULL; 2158 mw->mw_curms = 0; 2159 mw->mw_nummss = 0; 2160 mw->mw_curvdev++; 2161 return (WALK_NEXT); 2162 } 2163 2164 msp = mw->mw_mss[mw->mw_curms]; 2165 if (mdb_vread(&ms, sizeof (metaslab_t), msp) == -1) { 2166 mdb_warn("failed to read metaslab_t at %p", msp); 2167 return (WALK_ERR); 2168 } 2169 2170 mw->mw_curms++; 2171 2172 return (wsp->walk_callback(msp, &ms, wsp->walk_cbdata)); 2173 } 2174 2175 static int 2176 metaslab_walk_init(mdb_walk_state_t *wsp) 2177 { 2178 metaslab_walk_data_t *mw; 2179 uintptr_t root_vdevp; 2180 uintptr_t childp; 2181 2182 if (wsp->walk_addr == 0) { 2183 mdb_warn("must supply address of spa_t\n"); 2184 return (WALK_ERR); 2185 } 2186 2187 mw = mdb_zalloc(sizeof (metaslab_walk_data_t), UM_SLEEP | UM_GC); 2188 2189 if (GETMEMB(wsp->walk_addr, "spa", spa_root_vdev, root_vdevp) || 2190 GETMEMB(root_vdevp, "vdev", vdev_children, mw->mw_numvdevs) || 2191 GETMEMB(root_vdevp, "vdev", vdev_child, childp)) { 2192 return (DCMD_ERR); 2193 } 2194 2195 mw->mw_vdevs = mdb_alloc(mw->mw_numvdevs * sizeof (void *), 2196 UM_SLEEP | UM_GC); 2197 if (mdb_vread(mw->mw_vdevs, mw->mw_numvdevs * sizeof (void *), 2198 childp) == -1) { 2199 mdb_warn("failed to read root vdev children at %p", childp); 2200 return (DCMD_ERR); 2201 } 2202 2203 wsp->walk_data = mw; 2204 2205 return (WALK_NEXT); 2206 } 2207 2208 typedef struct mdb_spa { 2209 uintptr_t spa_dsl_pool; 2210 uintptr_t spa_root_vdev; 2211 } mdb_spa_t; 2212 2213 typedef struct mdb_dsl_pool { 2214 uintptr_t dp_root_dir; 2215 } mdb_dsl_pool_t; 2216 2217 typedef struct mdb_dsl_dir { 2218 uintptr_t dd_dbuf; 2219 int64_t dd_space_towrite[TXG_SIZE]; 2220 } mdb_dsl_dir_t; 2221 2222 typedef struct mdb_dsl_dir_phys { 2223 uint64_t dd_used_bytes; 2224 uint64_t dd_compressed_bytes; 2225 uint64_t dd_uncompressed_bytes; 2226 } mdb_dsl_dir_phys_t; 2227 2228 typedef struct space_data { 2229 uint64_t ms_allocating[TXG_SIZE]; 2230 uint64_t ms_checkpointing; 2231 uint64_t ms_freeing; 2232 uint64_t ms_freed; 2233 uint64_t ms_unflushed_frees; 2234 uint64_t ms_unflushed_allocs; 2235 uint64_t ms_allocatable; 2236 int64_t ms_deferspace; 2237 uint64_t avail; 2238 } space_data_t; 2239 2240 /* ARGSUSED */ 2241 static int 2242 space_cb(uintptr_t addr, const void *unknown, void *arg) 2243 { 2244 space_data_t *sd = arg; 2245 mdb_metaslab_t ms; 2246 mdb_range_tree_t rt; 2247 mdb_space_map_t sm = { 0 }; 2248 mdb_space_map_phys_t smp = { 0 }; 2249 uint64_t uallocs, ufrees; 2250 int i; 2251 2252 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t", 2253 addr, 0) == -1) 2254 return (WALK_ERR); 2255 2256 for (i = 0; i < TXG_SIZE; i++) { 2257 if (mdb_ctf_vread(&rt, "range_tree_t", 2258 "mdb_range_tree_t", ms.ms_allocating[i], 0) == -1) 2259 return (WALK_ERR); 2260 sd->ms_allocating[i] += rt.rt_space; 2261 } 2262 2263 if (mdb_ctf_vread(&rt, "range_tree_t", 2264 "mdb_range_tree_t", ms.ms_checkpointing, 0) == -1) 2265 return (WALK_ERR); 2266 sd->ms_checkpointing += rt.rt_space; 2267 2268 if (mdb_ctf_vread(&rt, "range_tree_t", 2269 "mdb_range_tree_t", ms.ms_freeing, 0) == -1) 2270 return (WALK_ERR); 2271 sd->ms_freeing += rt.rt_space; 2272 2273 if (mdb_ctf_vread(&rt, "range_tree_t", 2274 "mdb_range_tree_t", ms.ms_freed, 0) == -1) 2275 return (WALK_ERR); 2276 sd->ms_freed += rt.rt_space; 2277 2278 if (mdb_ctf_vread(&rt, "range_tree_t", 2279 "mdb_range_tree_t", ms.ms_allocatable, 0) == -1) 2280 return (WALK_ERR); 2281 sd->ms_allocatable += rt.rt_space; 2282 2283 if (mdb_ctf_vread(&rt, "range_tree_t", 2284 "mdb_range_tree_t", ms.ms_unflushed_frees, 0) == -1) 2285 return (WALK_ERR); 2286 sd->ms_unflushed_frees += rt.rt_space; 2287 ufrees = rt.rt_space; 2288 2289 if (mdb_ctf_vread(&rt, "range_tree_t", 2290 "mdb_range_tree_t", ms.ms_unflushed_allocs, 0) == -1) 2291 return (WALK_ERR); 2292 sd->ms_unflushed_allocs += rt.rt_space; 2293 uallocs = rt.rt_space; 2294 2295 if (ms.ms_sm != 0 && 2296 mdb_ctf_vread(&sm, "space_map_t", 2297 "mdb_space_map_t", ms.ms_sm, 0) == -1) 2298 return (WALK_ERR); 2299 2300 if (sm.sm_phys != 0) { 2301 (void) mdb_ctf_vread(&smp, "space_map_phys_t", 2302 "mdb_space_map_phys_t", sm.sm_phys, 0); 2303 } 2304 2305 sd->ms_deferspace += ms.ms_deferspace; 2306 sd->avail += sm.sm_size - smp.smp_alloc + ufrees - uallocs; 2307 2308 return (WALK_NEXT); 2309 } 2310 2311 /* 2312 * ::spa_space [-b] 2313 * 2314 * Given a spa_t, print out it's on-disk space usage and in-core 2315 * estimates of future usage. If -b is given, print space in bytes. 2316 * Otherwise print in megabytes. 2317 */ 2318 /* ARGSUSED */ 2319 static int 2320 spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2321 { 2322 mdb_spa_t spa; 2323 mdb_dsl_pool_t dp; 2324 mdb_dsl_dir_t dd; 2325 mdb_dmu_buf_impl_t db; 2326 mdb_dsl_dir_phys_t dsp; 2327 space_data_t sd; 2328 int shift = 20; 2329 char *suffix = "M"; 2330 int bytes = B_FALSE; 2331 2332 if (mdb_getopts(argc, argv, 'b', MDB_OPT_SETBITS, TRUE, &bytes, NULL) != 2333 argc) 2334 return (DCMD_USAGE); 2335 if (!(flags & DCMD_ADDRSPEC)) 2336 return (DCMD_USAGE); 2337 2338 if (bytes) { 2339 shift = 0; 2340 suffix = ""; 2341 } 2342 2343 if (mdb_ctf_vread(&spa, ZFS_STRUCT "spa", "mdb_spa_t", 2344 addr, 0) == -1 || 2345 mdb_ctf_vread(&dp, ZFS_STRUCT "dsl_pool", "mdb_dsl_pool_t", 2346 spa.spa_dsl_pool, 0) == -1 || 2347 mdb_ctf_vread(&dd, ZFS_STRUCT "dsl_dir", "mdb_dsl_dir_t", 2348 dp.dp_root_dir, 0) == -1 || 2349 mdb_ctf_vread(&db, ZFS_STRUCT "dmu_buf_impl", "mdb_dmu_buf_impl_t", 2350 dd.dd_dbuf, 0) == -1 || 2351 mdb_ctf_vread(&dsp, ZFS_STRUCT "dsl_dir_phys", 2352 "mdb_dsl_dir_phys_t", db.db.db_data, 0) == -1) { 2353 return (DCMD_ERR); 2354 } 2355 2356 mdb_printf("dd_space_towrite = %llu%s %llu%s %llu%s %llu%s\n", 2357 dd.dd_space_towrite[0] >> shift, suffix, 2358 dd.dd_space_towrite[1] >> shift, suffix, 2359 dd.dd_space_towrite[2] >> shift, suffix, 2360 dd.dd_space_towrite[3] >> shift, suffix); 2361 2362 mdb_printf("dd_phys.dd_used_bytes = %llu%s\n", 2363 dsp.dd_used_bytes >> shift, suffix); 2364 mdb_printf("dd_phys.dd_compressed_bytes = %llu%s\n", 2365 dsp.dd_compressed_bytes >> shift, suffix); 2366 mdb_printf("dd_phys.dd_uncompressed_bytes = %llu%s\n", 2367 dsp.dd_uncompressed_bytes >> shift, suffix); 2368 2369 bzero(&sd, sizeof (sd)); 2370 if (mdb_pwalk("metaslab", space_cb, &sd, addr) != 0) { 2371 mdb_warn("can't walk metaslabs"); 2372 return (DCMD_ERR); 2373 } 2374 2375 mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n", 2376 sd.ms_allocating[0] >> shift, suffix, 2377 sd.ms_allocating[1] >> shift, suffix, 2378 sd.ms_allocating[2] >> shift, suffix, 2379 sd.ms_allocating[3] >> shift, suffix); 2380 mdb_printf("ms_checkpointing = %llu%s\n", 2381 sd.ms_checkpointing >> shift, suffix); 2382 mdb_printf("ms_freeing = %llu%s\n", 2383 sd.ms_freeing >> shift, suffix); 2384 mdb_printf("ms_freed = %llu%s\n", 2385 sd.ms_freed >> shift, suffix); 2386 mdb_printf("ms_unflushed_frees = %llu%s\n", 2387 sd.ms_unflushed_frees >> shift, suffix); 2388 mdb_printf("ms_unflushed_allocs = %llu%s\n", 2389 sd.ms_unflushed_allocs >> shift, suffix); 2390 mdb_printf("ms_allocatable = %llu%s\n", 2391 sd.ms_allocatable >> shift, suffix); 2392 mdb_printf("ms_deferspace = %llu%s\n", 2393 sd.ms_deferspace >> shift, suffix); 2394 mdb_printf("current avail = %llu%s\n", 2395 sd.avail >> shift, suffix); 2396 2397 return (DCMD_OK); 2398 } 2399 2400 typedef struct mdb_spa_aux_vdev { 2401 int sav_count; 2402 uintptr_t sav_vdevs; 2403 } mdb_spa_aux_vdev_t; 2404 2405 typedef struct mdb_spa_vdevs { 2406 uintptr_t spa_root_vdev; 2407 mdb_spa_aux_vdev_t spa_l2cache; 2408 mdb_spa_aux_vdev_t spa_spares; 2409 } mdb_spa_vdevs_t; 2410 2411 static int 2412 spa_print_aux(mdb_spa_aux_vdev_t *sav, uint_t flags, mdb_arg_t *v, 2413 const char *name) 2414 { 2415 uintptr_t *aux; 2416 size_t len; 2417 int ret, i; 2418 2419 /* 2420 * Iterate over aux vdevs and print those out as well. This is a 2421 * little annoying because we don't have a root vdev to pass to ::vdev. 2422 * Instead, we print a single line and then call it for each child 2423 * vdev. 2424 */ 2425 if (sav->sav_count != 0) { 2426 v[1].a_type = MDB_TYPE_STRING; 2427 v[1].a_un.a_str = "-d"; 2428 v[2].a_type = MDB_TYPE_IMMEDIATE; 2429 v[2].a_un.a_val = 2; 2430 2431 len = sav->sav_count * sizeof (uintptr_t); 2432 aux = mdb_alloc(len, UM_SLEEP); 2433 if (mdb_vread(aux, len, sav->sav_vdevs) == -1) { 2434 mdb_free(aux, len); 2435 mdb_warn("failed to read l2cache vdevs at %p", 2436 sav->sav_vdevs); 2437 return (DCMD_ERR); 2438 } 2439 2440 mdb_printf("%-?s %-9s %-12s %s\n", "-", "-", "-", name); 2441 2442 for (i = 0; i < sav->sav_count; i++) { 2443 ret = mdb_call_dcmd("vdev", aux[i], flags, 3, v); 2444 if (ret != DCMD_OK) { 2445 mdb_free(aux, len); 2446 return (ret); 2447 } 2448 } 2449 2450 mdb_free(aux, len); 2451 } 2452 2453 return (0); 2454 } 2455 2456 /* 2457 * ::spa_vdevs 2458 * 2459 * -e Include error stats 2460 * -m Include metaslab information 2461 * -M Include metaslab group information 2462 * -h Include histogram information (requires -m or -M) 2463 * 2464 * Print out a summarized list of vdevs for the given spa_t. 2465 * This is accomplished by invoking "::vdev -re" on the root vdev, as well as 2466 * iterating over the cache devices. 2467 */ 2468 /* ARGSUSED */ 2469 static int 2470 spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2471 { 2472 mdb_arg_t v[3]; 2473 int ret; 2474 char opts[100] = "-r"; 2475 int spa_flags = 0; 2476 2477 if (mdb_getopts(argc, argv, 2478 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags, 2479 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags, 2480 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags, 2481 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags, 2482 NULL) != argc) 2483 return (DCMD_USAGE); 2484 2485 if (!(flags & DCMD_ADDRSPEC)) 2486 return (DCMD_USAGE); 2487 2488 mdb_spa_vdevs_t spa; 2489 if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_vdevs_t", addr, 0) == -1) 2490 return (DCMD_ERR); 2491 2492 /* 2493 * Unitialized spa_t structures can have a NULL root vdev. 2494 */ 2495 if (spa.spa_root_vdev == 0) { 2496 mdb_printf("no associated vdevs\n"); 2497 return (DCMD_OK); 2498 } 2499 2500 if (spa_flags & SPA_FLAG_ERRORS) 2501 strcat(opts, "e"); 2502 if (spa_flags & SPA_FLAG_METASLABS) 2503 strcat(opts, "m"); 2504 if (spa_flags & SPA_FLAG_METASLAB_GROUPS) 2505 strcat(opts, "M"); 2506 if (spa_flags & SPA_FLAG_HISTOGRAMS) 2507 strcat(opts, "h"); 2508 2509 v[0].a_type = MDB_TYPE_STRING; 2510 v[0].a_un.a_str = opts; 2511 2512 ret = mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev, 2513 flags, 1, v); 2514 if (ret != DCMD_OK) 2515 return (ret); 2516 2517 if (spa_print_aux(&spa.spa_l2cache, flags, v, "cache") != 0 || 2518 spa_print_aux(&spa.spa_spares, flags, v, "spares") != 0) 2519 return (DCMD_ERR); 2520 2521 return (DCMD_OK); 2522 } 2523 2524 /* 2525 * ::zio 2526 * 2527 * Print a summary of zio_t and all its children. This is intended to display a 2528 * zio tree, and hence we only pick the most important pieces of information for 2529 * the main summary. More detailed information can always be found by doing a 2530 * '::print zio' on the underlying zio_t. The columns we display are: 2531 * 2532 * ADDRESS TYPE STAGE WAITER TIME_ELAPSED 2533 * 2534 * The 'address' column is indented by one space for each depth level as we 2535 * descend down the tree. 2536 */ 2537 2538 #define ZIO_MAXINDENT 7 2539 #define ZIO_MAXWIDTH (sizeof (uintptr_t) * 2 + ZIO_MAXINDENT) 2540 #define ZIO_WALK_SELF 0 2541 #define ZIO_WALK_CHILD 1 2542 #define ZIO_WALK_PARENT 2 2543 2544 typedef struct zio_print_args { 2545 int zpa_current_depth; 2546 int zpa_min_depth; 2547 int zpa_max_depth; 2548 int zpa_type; 2549 uint_t zpa_flags; 2550 } zio_print_args_t; 2551 2552 typedef struct mdb_zio { 2553 enum zio_type io_type; 2554 enum zio_stage io_stage; 2555 uintptr_t io_waiter; 2556 uintptr_t io_spa; 2557 struct { 2558 struct { 2559 uintptr_t list_next; 2560 } list_head; 2561 } io_parent_list; 2562 int io_error; 2563 } mdb_zio_t; 2564 2565 typedef struct mdb_zio_timestamp { 2566 hrtime_t io_timestamp; 2567 } mdb_zio_timestamp_t; 2568 2569 static int zio_child_cb(uintptr_t addr, const void *unknown, void *arg); 2570 2571 static int 2572 zio_print_cb(uintptr_t addr, zio_print_args_t *zpa) 2573 { 2574 mdb_ctf_id_t type_enum, stage_enum; 2575 int indent = zpa->zpa_current_depth; 2576 const char *type, *stage; 2577 uintptr_t laddr; 2578 mdb_zio_t zio; 2579 mdb_zio_timestamp_t zio_timestamp = { 0 }; 2580 2581 if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", addr, 0) == -1) 2582 return (WALK_ERR); 2583 (void) mdb_ctf_vread(&zio_timestamp, ZFS_STRUCT "zio", 2584 "mdb_zio_timestamp_t", addr, MDB_CTF_VREAD_QUIET); 2585 2586 if (indent > ZIO_MAXINDENT) 2587 indent = ZIO_MAXINDENT; 2588 2589 if (mdb_ctf_lookup_by_name("enum zio_type", &type_enum) == -1 || 2590 mdb_ctf_lookup_by_name("enum zio_stage", &stage_enum) == -1) { 2591 mdb_warn("failed to lookup zio enums"); 2592 return (WALK_ERR); 2593 } 2594 2595 if ((type = mdb_ctf_enum_name(type_enum, zio.io_type)) != NULL) 2596 type += sizeof ("ZIO_TYPE_") - 1; 2597 else 2598 type = "?"; 2599 2600 if (zio.io_error == 0) { 2601 stage = mdb_ctf_enum_name(stage_enum, zio.io_stage); 2602 if (stage != NULL) 2603 stage += sizeof ("ZIO_STAGE_") - 1; 2604 else 2605 stage = "?"; 2606 } else { 2607 stage = "FAILED"; 2608 } 2609 2610 if (zpa->zpa_current_depth >= zpa->zpa_min_depth) { 2611 if (zpa->zpa_flags & DCMD_PIPE_OUT) { 2612 mdb_printf("%?p\n", addr); 2613 } else { 2614 mdb_printf("%*s%-*p %-5s %-16s ", indent, "", 2615 ZIO_MAXWIDTH - indent, addr, type, stage); 2616 if (zio.io_waiter != 0) 2617 mdb_printf("%-16lx ", zio.io_waiter); 2618 else 2619 mdb_printf("%-16s ", "-"); 2620 #ifdef _KERNEL 2621 if (zio_timestamp.io_timestamp != 0) { 2622 mdb_printf("%llums", (mdb_gethrtime() - 2623 zio_timestamp.io_timestamp) / 2624 1000000); 2625 } else { 2626 mdb_printf("%-12s ", "-"); 2627 } 2628 #else 2629 mdb_printf("%-12s ", "-"); 2630 #endif 2631 mdb_printf("\n"); 2632 } 2633 } 2634 2635 if (zpa->zpa_current_depth >= zpa->zpa_max_depth) 2636 return (WALK_NEXT); 2637 2638 if (zpa->zpa_type == ZIO_WALK_PARENT) 2639 laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", 2640 "io_parent_list"); 2641 else 2642 laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", 2643 "io_child_list"); 2644 2645 zpa->zpa_current_depth++; 2646 if (mdb_pwalk("list", zio_child_cb, zpa, laddr) != 0) { 2647 mdb_warn("failed to walk zio_t children at %p\n", laddr); 2648 return (WALK_ERR); 2649 } 2650 zpa->zpa_current_depth--; 2651 2652 return (WALK_NEXT); 2653 } 2654 2655 /* ARGSUSED */ 2656 static int 2657 zio_child_cb(uintptr_t addr, const void *unknown, void *arg) 2658 { 2659 zio_link_t zl; 2660 uintptr_t ziop; 2661 zio_print_args_t *zpa = arg; 2662 2663 if (mdb_vread(&zl, sizeof (zl), addr) == -1) { 2664 mdb_warn("failed to read zio_link_t at %p", addr); 2665 return (WALK_ERR); 2666 } 2667 2668 if (zpa->zpa_type == ZIO_WALK_PARENT) 2669 ziop = (uintptr_t)zl.zl_parent; 2670 else 2671 ziop = (uintptr_t)zl.zl_child; 2672 2673 return (zio_print_cb(ziop, zpa)); 2674 } 2675 2676 /* ARGSUSED */ 2677 static int 2678 zio_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2679 { 2680 zio_print_args_t zpa = { 0 }; 2681 2682 if (!(flags & DCMD_ADDRSPEC)) 2683 return (DCMD_USAGE); 2684 2685 if (mdb_getopts(argc, argv, 2686 'r', MDB_OPT_SETBITS, INT_MAX, &zpa.zpa_max_depth, 2687 'c', MDB_OPT_SETBITS, ZIO_WALK_CHILD, &zpa.zpa_type, 2688 'p', MDB_OPT_SETBITS, ZIO_WALK_PARENT, &zpa.zpa_type, 2689 NULL) != argc) 2690 return (DCMD_USAGE); 2691 2692 zpa.zpa_flags = flags; 2693 if (zpa.zpa_max_depth != 0) { 2694 if (zpa.zpa_type == ZIO_WALK_SELF) 2695 zpa.zpa_type = ZIO_WALK_CHILD; 2696 } else if (zpa.zpa_type != ZIO_WALK_SELF) { 2697 zpa.zpa_min_depth = 1; 2698 zpa.zpa_max_depth = 1; 2699 } 2700 2701 if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) { 2702 mdb_printf("%<u>%-*s %-5s %-16s %-16s %-12s%</u>\n", 2703 ZIO_MAXWIDTH, "ADDRESS", "TYPE", "STAGE", "WAITER", 2704 "TIME_ELAPSED"); 2705 } 2706 2707 if (zio_print_cb(addr, &zpa) != WALK_NEXT) 2708 return (DCMD_ERR); 2709 2710 return (DCMD_OK); 2711 } 2712 2713 /* 2714 * [addr]::zio_state 2715 * 2716 * Print a summary of all zio_t structures on the system, or for a particular 2717 * pool. This is equivalent to '::walk zio_root | ::zio'. 2718 */ 2719 /*ARGSUSED*/ 2720 static int 2721 zio_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2722 { 2723 /* 2724 * MDB will remember the last address of the pipeline, so if we don't 2725 * zero this we'll end up trying to walk zio structures for a 2726 * non-existent spa_t. 2727 */ 2728 if (!(flags & DCMD_ADDRSPEC)) 2729 addr = 0; 2730 2731 return (mdb_pwalk_dcmd("zio_root", "zio", argc, argv, addr)); 2732 } 2733 2734 2735 typedef struct mdb_zfs_btree_hdr { 2736 uintptr_t bth_parent; 2737 boolean_t bth_core; 2738 /* 2739 * For both leaf and core nodes, represents the number of elements in 2740 * the node. For core nodes, they will have bth_count + 1 children. 2741 */ 2742 uint32_t bth_count; 2743 } mdb_zfs_btree_hdr_t; 2744 2745 typedef struct mdb_zfs_btree_core { 2746 mdb_zfs_btree_hdr_t btc_hdr; 2747 uintptr_t btc_children[BTREE_CORE_ELEMS + 1]; 2748 uint8_t btc_elems[]; 2749 } mdb_zfs_btree_core_t; 2750 2751 typedef struct mdb_zfs_btree_leaf { 2752 mdb_zfs_btree_hdr_t btl_hdr; 2753 uint8_t btl_elems[]; 2754 } mdb_zfs_btree_leaf_t; 2755 2756 typedef struct mdb_zfs_btree { 2757 uintptr_t bt_root; 2758 size_t bt_elem_size; 2759 } mdb_zfs_btree_t; 2760 2761 typedef struct btree_walk_data { 2762 mdb_zfs_btree_t bwd_btree; 2763 mdb_zfs_btree_hdr_t *bwd_node; 2764 uint64_t bwd_offset; // In units of bt_node_size 2765 } btree_walk_data_t; 2766 2767 static uintptr_t 2768 btree_leftmost_child(uintptr_t addr, mdb_zfs_btree_hdr_t *buf) 2769 { 2770 size_t size = offsetof(zfs_btree_core_t, btc_children) + 2771 sizeof (uintptr_t); 2772 for (;;) { 2773 if (mdb_vread(buf, size, addr) == -1) { 2774 mdb_warn("failed to read at %p\n", addr); 2775 return ((uintptr_t)0ULL); 2776 } 2777 if (!buf->bth_core) 2778 return (addr); 2779 mdb_zfs_btree_core_t *node = (mdb_zfs_btree_core_t *)buf; 2780 addr = node->btc_children[0]; 2781 } 2782 } 2783 2784 static int 2785 btree_walk_step(mdb_walk_state_t *wsp) 2786 { 2787 btree_walk_data_t *bwd = wsp->walk_data; 2788 size_t elem_size = bwd->bwd_btree.bt_elem_size; 2789 if (wsp->walk_addr == 0ULL) 2790 return (WALK_DONE); 2791 2792 if (!bwd->bwd_node->bth_core) { 2793 /* 2794 * For the first element in a leaf node, read in the full 2795 * leaf, since we only had part of it read in before. 2796 */ 2797 if (bwd->bwd_offset == 0) { 2798 if (mdb_vread(bwd->bwd_node, BTREE_LEAF_SIZE, 2799 wsp->walk_addr) == -1) { 2800 mdb_warn("failed to read at %p\n", 2801 wsp->walk_addr); 2802 return (WALK_ERR); 2803 } 2804 } 2805 2806 int status = wsp->walk_callback((uintptr_t)(wsp->walk_addr + 2807 offsetof(mdb_zfs_btree_leaf_t, btl_elems) + 2808 bwd->bwd_offset * elem_size), bwd->bwd_node, 2809 wsp->walk_cbdata); 2810 if (status != WALK_NEXT) 2811 return (status); 2812 bwd->bwd_offset++; 2813 2814 /* Find the next element, if we're at the end of the leaf. */ 2815 while (bwd->bwd_offset == bwd->bwd_node->bth_count) { 2816 uintptr_t par = bwd->bwd_node->bth_parent; 2817 uintptr_t cur = wsp->walk_addr; 2818 wsp->walk_addr = par; 2819 if (par == 0ULL) 2820 return (WALK_NEXT); 2821 2822 size_t size = sizeof (zfs_btree_core_t) + 2823 BTREE_CORE_ELEMS * elem_size; 2824 if (mdb_vread(bwd->bwd_node, size, wsp->walk_addr) == 2825 -1) { 2826 mdb_warn("failed to read at %p\n", 2827 wsp->walk_addr); 2828 return (WALK_ERR); 2829 } 2830 mdb_zfs_btree_core_t *node = 2831 (mdb_zfs_btree_core_t *)bwd->bwd_node; 2832 int i; 2833 for (i = 0; i <= bwd->bwd_node->bth_count; i++) { 2834 if (node->btc_children[i] == cur) 2835 break; 2836 } 2837 if (i > bwd->bwd_node->bth_count) { 2838 mdb_warn("btree parent/child mismatch at " 2839 "%#lx\n", cur); 2840 return (WALK_ERR); 2841 } 2842 bwd->bwd_offset = i; 2843 } 2844 return (WALK_NEXT); 2845 } 2846 2847 if (!bwd->bwd_node->bth_core) { 2848 mdb_warn("Invalid btree node at %#lx\n", wsp->walk_addr); 2849 return (WALK_ERR); 2850 } 2851 mdb_zfs_btree_core_t *node = (mdb_zfs_btree_core_t *)bwd->bwd_node; 2852 int status = wsp->walk_callback((uintptr_t)(wsp->walk_addr + 2853 offsetof(mdb_zfs_btree_core_t, btc_elems) + bwd->bwd_offset * 2854 elem_size), bwd->bwd_node, wsp->walk_cbdata); 2855 if (status != WALK_NEXT) 2856 return (status); 2857 2858 uintptr_t new_child = node->btc_children[bwd->bwd_offset + 1]; 2859 wsp->walk_addr = btree_leftmost_child(new_child, bwd->bwd_node); 2860 if (wsp->walk_addr == 0ULL) 2861 return (WALK_ERR); 2862 2863 bwd->bwd_offset = 0; 2864 return (WALK_NEXT); 2865 } 2866 2867 static int 2868 btree_walk_init(mdb_walk_state_t *wsp) 2869 { 2870 btree_walk_data_t *bwd; 2871 2872 if (wsp->walk_addr == 0ULL) { 2873 mdb_warn("must supply address of zfs_btree_t\n"); 2874 return (WALK_ERR); 2875 } 2876 2877 bwd = mdb_zalloc(sizeof (btree_walk_data_t), UM_SLEEP); 2878 if (mdb_ctf_vread(&bwd->bwd_btree, "zfs_btree_t", "mdb_zfs_btree_t", 2879 wsp->walk_addr, 0) == -1) { 2880 mdb_free(bwd, sizeof (*bwd)); 2881 return (WALK_ERR); 2882 } 2883 2884 if (bwd->bwd_btree.bt_elem_size == 0) { 2885 mdb_warn("invalid or uninitialized btree at %#lx\n", 2886 wsp->walk_addr); 2887 mdb_free(bwd, sizeof (*bwd)); 2888 return (WALK_ERR); 2889 } 2890 2891 size_t size = MAX(BTREE_LEAF_SIZE, sizeof (zfs_btree_core_t) + 2892 BTREE_CORE_ELEMS * bwd->bwd_btree.bt_elem_size); 2893 bwd->bwd_node = mdb_zalloc(size, UM_SLEEP); 2894 2895 uintptr_t node = (uintptr_t)bwd->bwd_btree.bt_root; 2896 if (node == 0ULL) { 2897 wsp->walk_addr = 0ULL; 2898 wsp->walk_data = bwd; 2899 return (WALK_NEXT); 2900 } 2901 node = btree_leftmost_child(node, bwd->bwd_node); 2902 if (node == 0ULL) { 2903 mdb_free(bwd->bwd_node, size); 2904 mdb_free(bwd, sizeof (*bwd)); 2905 return (WALK_ERR); 2906 } 2907 bwd->bwd_offset = 0; 2908 2909 wsp->walk_addr = node; 2910 wsp->walk_data = bwd; 2911 return (WALK_NEXT); 2912 } 2913 2914 static void 2915 btree_walk_fini(mdb_walk_state_t *wsp) 2916 { 2917 btree_walk_data_t *bwd = (btree_walk_data_t *)wsp->walk_data; 2918 2919 if (bwd == NULL) 2920 return; 2921 2922 size_t size = MAX(BTREE_LEAF_SIZE, sizeof (zfs_btree_core_t) + 2923 BTREE_CORE_ELEMS * bwd->bwd_btree.bt_elem_size); 2924 if (bwd->bwd_node != NULL) 2925 mdb_free(bwd->bwd_node, size); 2926 2927 mdb_free(bwd, sizeof (*bwd)); 2928 } 2929 2930 typedef struct mdb_multilist { 2931 uint64_t ml_num_sublists; 2932 uintptr_t ml_sublists; 2933 } mdb_multilist_t; 2934 2935 static int 2936 multilist_walk_step(mdb_walk_state_t *wsp) 2937 { 2938 return (wsp->walk_callback(wsp->walk_addr, wsp->walk_layer, 2939 wsp->walk_cbdata)); 2940 } 2941 2942 static int 2943 multilist_walk_init(mdb_walk_state_t *wsp) 2944 { 2945 mdb_multilist_t ml; 2946 ssize_t sublist_sz; 2947 int list_offset; 2948 size_t i; 2949 2950 if (wsp->walk_addr == 0) { 2951 mdb_warn("must supply address of multilist_t\n"); 2952 return (WALK_ERR); 2953 } 2954 2955 if (mdb_ctf_vread(&ml, "multilist_t", "mdb_multilist_t", 2956 wsp->walk_addr, 0) == -1) { 2957 return (WALK_ERR); 2958 } 2959 2960 if (ml.ml_num_sublists == 0 || ml.ml_sublists == 0) { 2961 mdb_warn("invalid or uninitialized multilist at %#lx\n", 2962 wsp->walk_addr); 2963 return (WALK_ERR); 2964 } 2965 2966 /* mdb_ctf_sizeof_by_name() will print an error for us */ 2967 sublist_sz = mdb_ctf_sizeof_by_name("multilist_sublist_t"); 2968 if (sublist_sz == -1) 2969 return (WALK_ERR); 2970 2971 /* mdb_ctf_offsetof_by_name will print an error for us */ 2972 list_offset = mdb_ctf_offsetof_by_name("multilist_sublist_t", 2973 "mls_list"); 2974 if (list_offset == -1) 2975 return (WALK_ERR); 2976 2977 for (i = 0; i < ml.ml_num_sublists; i++) { 2978 wsp->walk_addr = ml.ml_sublists + i * sublist_sz + list_offset; 2979 2980 if (mdb_layered_walk("list", wsp) == -1) { 2981 mdb_warn("can't walk multilist sublist"); 2982 return (WALK_ERR); 2983 } 2984 } 2985 2986 return (WALK_NEXT); 2987 } 2988 2989 typedef struct mdb_txg_list { 2990 size_t tl_offset; 2991 uintptr_t tl_head[TXG_SIZE]; 2992 } mdb_txg_list_t; 2993 2994 typedef struct txg_list_walk_data { 2995 uintptr_t lw_head[TXG_SIZE]; 2996 int lw_txgoff; 2997 int lw_maxoff; 2998 size_t lw_offset; 2999 void *lw_obj; 3000 } txg_list_walk_data_t; 3001 3002 static int 3003 txg_list_walk_init_common(mdb_walk_state_t *wsp, int txg, int maxoff) 3004 { 3005 txg_list_walk_data_t *lwd; 3006 mdb_txg_list_t list; 3007 int i; 3008 3009 lwd = mdb_alloc(sizeof (txg_list_walk_data_t), UM_SLEEP | UM_GC); 3010 if (mdb_ctf_vread(&list, "txg_list_t", "mdb_txg_list_t", wsp->walk_addr, 3011 0) == -1) { 3012 mdb_warn("failed to read txg_list_t at %#lx", wsp->walk_addr); 3013 return (WALK_ERR); 3014 } 3015 3016 for (i = 0; i < TXG_SIZE; i++) 3017 lwd->lw_head[i] = list.tl_head[i]; 3018 lwd->lw_offset = list.tl_offset; 3019 lwd->lw_obj = mdb_alloc(lwd->lw_offset + sizeof (txg_node_t), 3020 UM_SLEEP | UM_GC); 3021 lwd->lw_txgoff = txg; 3022 lwd->lw_maxoff = maxoff; 3023 3024 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff]; 3025 wsp->walk_data = lwd; 3026 3027 return (WALK_NEXT); 3028 } 3029 3030 static int 3031 txg_list_walk_init(mdb_walk_state_t *wsp) 3032 { 3033 return (txg_list_walk_init_common(wsp, 0, TXG_SIZE-1)); 3034 } 3035 3036 static int 3037 txg_list0_walk_init(mdb_walk_state_t *wsp) 3038 { 3039 return (txg_list_walk_init_common(wsp, 0, 0)); 3040 } 3041 3042 static int 3043 txg_list1_walk_init(mdb_walk_state_t *wsp) 3044 { 3045 return (txg_list_walk_init_common(wsp, 1, 1)); 3046 } 3047 3048 static int 3049 txg_list2_walk_init(mdb_walk_state_t *wsp) 3050 { 3051 return (txg_list_walk_init_common(wsp, 2, 2)); 3052 } 3053 3054 static int 3055 txg_list3_walk_init(mdb_walk_state_t *wsp) 3056 { 3057 return (txg_list_walk_init_common(wsp, 3, 3)); 3058 } 3059 3060 static int 3061 txg_list_walk_step(mdb_walk_state_t *wsp) 3062 { 3063 txg_list_walk_data_t *lwd = wsp->walk_data; 3064 uintptr_t addr; 3065 txg_node_t *node; 3066 int status; 3067 3068 while (wsp->walk_addr == 0 && lwd->lw_txgoff < lwd->lw_maxoff) { 3069 lwd->lw_txgoff++; 3070 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff]; 3071 } 3072 3073 if (wsp->walk_addr == 0) 3074 return (WALK_DONE); 3075 3076 addr = wsp->walk_addr - lwd->lw_offset; 3077 3078 if (mdb_vread(lwd->lw_obj, 3079 lwd->lw_offset + sizeof (txg_node_t), addr) == -1) { 3080 mdb_warn("failed to read list element at %#lx", addr); 3081 return (WALK_ERR); 3082 } 3083 3084 status = wsp->walk_callback(addr, lwd->lw_obj, wsp->walk_cbdata); 3085 node = (txg_node_t *)((uintptr_t)lwd->lw_obj + lwd->lw_offset); 3086 wsp->walk_addr = (uintptr_t)node->tn_next[lwd->lw_txgoff]; 3087 3088 return (status); 3089 } 3090 3091 /* 3092 * ::walk spa 3093 * 3094 * Walk all named spa_t structures in the namespace. This is nothing more than 3095 * a layered avl walk. 3096 */ 3097 static int 3098 spa_walk_init(mdb_walk_state_t *wsp) 3099 { 3100 GElf_Sym sym; 3101 3102 if (wsp->walk_addr != 0) { 3103 mdb_warn("spa walk only supports global walks\n"); 3104 return (WALK_ERR); 3105 } 3106 3107 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "spa_namespace_avl", &sym) == -1) { 3108 mdb_warn("failed to find symbol 'spa_namespace_avl'"); 3109 return (WALK_ERR); 3110 } 3111 3112 wsp->walk_addr = (uintptr_t)sym.st_value; 3113 3114 if (mdb_layered_walk("avl", wsp) == -1) { 3115 mdb_warn("failed to walk 'avl'\n"); 3116 return (WALK_ERR); 3117 } 3118 3119 return (WALK_NEXT); 3120 } 3121 3122 static int 3123 spa_walk_step(mdb_walk_state_t *wsp) 3124 { 3125 return (wsp->walk_callback(wsp->walk_addr, NULL, wsp->walk_cbdata)); 3126 } 3127 3128 /* 3129 * [addr]::walk zio 3130 * 3131 * Walk all active zio_t structures on the system. This is simply a layered 3132 * walk on top of ::walk zio_cache, with the optional ability to limit the 3133 * structures to a particular pool. 3134 */ 3135 static int 3136 zio_walk_init(mdb_walk_state_t *wsp) 3137 { 3138 wsp->walk_data = (void *)wsp->walk_addr; 3139 3140 if (mdb_layered_walk("zio_cache", wsp) == -1) { 3141 mdb_warn("failed to walk 'zio_cache'\n"); 3142 return (WALK_ERR); 3143 } 3144 3145 return (WALK_NEXT); 3146 } 3147 3148 static int 3149 zio_walk_step(mdb_walk_state_t *wsp) 3150 { 3151 mdb_zio_t zio; 3152 uintptr_t spa = (uintptr_t)wsp->walk_data; 3153 3154 if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", 3155 wsp->walk_addr, 0) == -1) 3156 return (WALK_ERR); 3157 3158 if (spa != 0 && spa != zio.io_spa) 3159 return (WALK_NEXT); 3160 3161 return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata)); 3162 } 3163 3164 /* 3165 * [addr]::walk zio_root 3166 * 3167 * Walk only root zio_t structures, optionally for a particular spa_t. 3168 */ 3169 static int 3170 zio_walk_root_step(mdb_walk_state_t *wsp) 3171 { 3172 mdb_zio_t zio; 3173 uintptr_t spa = (uintptr_t)wsp->walk_data; 3174 3175 if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", 3176 wsp->walk_addr, 0) == -1) 3177 return (WALK_ERR); 3178 3179 if (spa != 0 && spa != zio.io_spa) 3180 return (WALK_NEXT); 3181 3182 /* If the parent list is not empty, ignore */ 3183 if (zio.io_parent_list.list_head.list_next != 3184 wsp->walk_addr + 3185 mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", "io_parent_list") + 3186 mdb_ctf_offsetof_by_name("struct list", "list_head")) 3187 return (WALK_NEXT); 3188 3189 return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata)); 3190 } 3191 3192 /* 3193 * ::zfs_blkstats 3194 * 3195 * -v print verbose per-level information 3196 * 3197 */ 3198 static int 3199 zfs_blkstats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3200 { 3201 boolean_t verbose = B_FALSE; 3202 zfs_all_blkstats_t stats; 3203 dmu_object_type_t t; 3204 zfs_blkstat_t *tzb; 3205 uint64_t ditto; 3206 3207 if (mdb_getopts(argc, argv, 3208 'v', MDB_OPT_SETBITS, TRUE, &verbose, 3209 NULL) != argc) 3210 return (DCMD_USAGE); 3211 3212 if (!(flags & DCMD_ADDRSPEC)) 3213 return (DCMD_USAGE); 3214 3215 if (GETMEMB(addr, "spa", spa_dsl_pool, addr) || 3216 GETMEMB(addr, "dsl_pool", dp_blkstats, addr) || 3217 mdb_vread(&stats, sizeof (zfs_all_blkstats_t), addr) == -1) { 3218 mdb_warn("failed to read data at %p;", addr); 3219 mdb_printf("maybe no stats? run \"zpool scrub\" first."); 3220 return (DCMD_ERR); 3221 } 3222 3223 tzb = &stats.zab_type[DN_MAX_LEVELS][DMU_OT_TOTAL]; 3224 if (tzb->zb_gangs != 0) { 3225 mdb_printf("Ganged blocks: %llu\n", 3226 (longlong_t)tzb->zb_gangs); 3227 } 3228 3229 ditto = tzb->zb_ditto_2_of_2_samevdev + tzb->zb_ditto_2_of_3_samevdev + 3230 tzb->zb_ditto_3_of_3_samevdev; 3231 if (ditto != 0) { 3232 mdb_printf("Dittoed blocks on same vdev: %llu\n", 3233 (longlong_t)ditto); 3234 } 3235 3236 mdb_printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" 3237 "\t avg\t comp\t%%Total\tType\n"); 3238 3239 for (t = 0; t <= DMU_OT_TOTAL; t++) { 3240 char csize[MDB_NICENUM_BUFLEN], lsize[MDB_NICENUM_BUFLEN]; 3241 char psize[MDB_NICENUM_BUFLEN], asize[MDB_NICENUM_BUFLEN]; 3242 char avg[MDB_NICENUM_BUFLEN]; 3243 char comp[MDB_NICENUM_BUFLEN], pct[MDB_NICENUM_BUFLEN]; 3244 char typename[64]; 3245 int l; 3246 3247 3248 if (t == DMU_OT_DEFERRED) 3249 strcpy(typename, "deferred free"); 3250 else if (t == DMU_OT_OTHER) 3251 strcpy(typename, "other"); 3252 else if (t == DMU_OT_TOTAL) 3253 strcpy(typename, "Total"); 3254 else if (enum_lookup("enum dmu_object_type", 3255 t, "DMU_OT_", sizeof (typename), typename) == -1) { 3256 mdb_warn("failed to read type name"); 3257 return (DCMD_ERR); 3258 } 3259 3260 if (stats.zab_type[DN_MAX_LEVELS][t].zb_asize == 0) 3261 continue; 3262 3263 for (l = -1; l < DN_MAX_LEVELS; l++) { 3264 int level = (l == -1 ? DN_MAX_LEVELS : l); 3265 zfs_blkstat_t *zb = &stats.zab_type[level][t]; 3266 3267 if (zb->zb_asize == 0) 3268 continue; 3269 3270 /* 3271 * Don't print each level unless requested. 3272 */ 3273 if (!verbose && level != DN_MAX_LEVELS) 3274 continue; 3275 3276 /* 3277 * If all the space is level 0, don't print the 3278 * level 0 separately. 3279 */ 3280 if (level == 0 && zb->zb_asize == 3281 stats.zab_type[DN_MAX_LEVELS][t].zb_asize) 3282 continue; 3283 3284 mdb_nicenum(zb->zb_count, csize); 3285 mdb_nicenum(zb->zb_lsize, lsize); 3286 mdb_nicenum(zb->zb_psize, psize); 3287 mdb_nicenum(zb->zb_asize, asize); 3288 mdb_nicenum(zb->zb_asize / zb->zb_count, avg); 3289 (void) mdb_snprintfrac(comp, MDB_NICENUM_BUFLEN, 3290 zb->zb_lsize, zb->zb_psize, 2); 3291 (void) mdb_snprintfrac(pct, MDB_NICENUM_BUFLEN, 3292 100 * zb->zb_asize, tzb->zb_asize, 2); 3293 3294 mdb_printf("%6s\t%5s\t%5s\t%5s\t%5s" 3295 "\t%5s\t%6s\t", 3296 csize, lsize, psize, asize, avg, comp, pct); 3297 3298 if (level == DN_MAX_LEVELS) 3299 mdb_printf("%s\n", typename); 3300 else 3301 mdb_printf(" L%d %s\n", 3302 level, typename); 3303 } 3304 } 3305 3306 return (DCMD_OK); 3307 } 3308 3309 typedef struct mdb_reference { 3310 uintptr_t ref_holder; 3311 uintptr_t ref_removed; 3312 uint64_t ref_number; 3313 } mdb_reference_t; 3314 3315 /* ARGSUSED */ 3316 static int 3317 reference_cb(uintptr_t addr, const void *ignored, void *arg) 3318 { 3319 mdb_reference_t ref; 3320 boolean_t holder_is_str = B_FALSE; 3321 char holder_str[128]; 3322 boolean_t removed = (boolean_t)arg; 3323 3324 if (mdb_ctf_vread(&ref, "reference_t", "mdb_reference_t", addr, 3325 0) == -1) 3326 return (DCMD_ERR); 3327 3328 if (mdb_readstr(holder_str, sizeof (holder_str), 3329 ref.ref_holder) != -1) 3330 holder_is_str = strisprint(holder_str); 3331 3332 if (removed) 3333 mdb_printf("removed "); 3334 mdb_printf("reference "); 3335 if (ref.ref_number != 1) 3336 mdb_printf("with count=%llu ", ref.ref_number); 3337 mdb_printf("with tag %lx", ref.ref_holder); 3338 if (holder_is_str) 3339 mdb_printf(" \"%s\"", holder_str); 3340 mdb_printf(", held at:\n"); 3341 3342 (void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL); 3343 3344 if (removed) { 3345 mdb_printf("removed at:\n"); 3346 (void) mdb_call_dcmd("whatis", ref.ref_removed, 3347 DCMD_ADDRSPEC, 0, NULL); 3348 } 3349 3350 mdb_printf("\n"); 3351 3352 return (WALK_NEXT); 3353 } 3354 3355 typedef struct mdb_zfs_refcount { 3356 uint64_t rc_count; 3357 } mdb_zfs_refcount_t; 3358 3359 typedef struct mdb_zfs_refcount_removed { 3360 uint_t rc_removed_count; 3361 } mdb_zfs_refcount_removed_t; 3362 3363 typedef struct mdb_zfs_refcount_tracked { 3364 boolean_t rc_tracked; 3365 } mdb_zfs_refcount_tracked_t; 3366 3367 /* ARGSUSED */ 3368 static int 3369 zfs_refcount(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3370 { 3371 mdb_zfs_refcount_t rc; 3372 mdb_zfs_refcount_removed_t rcr; 3373 mdb_zfs_refcount_tracked_t rct; 3374 int off; 3375 boolean_t released = B_FALSE; 3376 3377 if (!(flags & DCMD_ADDRSPEC)) 3378 return (DCMD_USAGE); 3379 3380 if (mdb_getopts(argc, argv, 3381 'r', MDB_OPT_SETBITS, B_TRUE, &released, 3382 NULL) != argc) 3383 return (DCMD_USAGE); 3384 3385 if (mdb_ctf_vread(&rc, "zfs_refcount_t", "mdb_zfs_refcount_t", addr, 3386 0) == -1) 3387 return (DCMD_ERR); 3388 3389 if (mdb_ctf_vread(&rcr, "zfs_refcount_t", "mdb_zfs_refcount_removed_t", 3390 addr, MDB_CTF_VREAD_QUIET) == -1) { 3391 mdb_printf("zfs_refcount_t at %p has %llu holds (untracked)\n", 3392 addr, (longlong_t)rc.rc_count); 3393 return (DCMD_OK); 3394 } 3395 3396 if (mdb_ctf_vread(&rct, "zfs_refcount_t", "mdb_zfs_refcount_tracked_t", 3397 addr, MDB_CTF_VREAD_QUIET) == -1) { 3398 /* If this is an old target, it might be tracked. */ 3399 rct.rc_tracked = B_TRUE; 3400 } 3401 3402 mdb_printf("zfs_refcount_t at %p has %llu current holds, " 3403 "%llu recently released holds\n", 3404 addr, (longlong_t)rc.rc_count, (longlong_t)rcr.rc_removed_count); 3405 3406 if (rct.rc_tracked && rc.rc_count > 0) 3407 mdb_printf("current holds:\n"); 3408 off = mdb_ctf_offsetof_by_name("zfs_refcount_t", "rc_tree"); 3409 if (off == -1) 3410 return (DCMD_ERR); 3411 mdb_pwalk("avl", reference_cb, (void *)B_FALSE, addr + off); 3412 3413 if (released && rcr.rc_removed_count > 0) { 3414 mdb_printf("released holds:\n"); 3415 3416 off = mdb_ctf_offsetof_by_name("zfs_refcount_t", "rc_removed"); 3417 if (off == -1) 3418 return (DCMD_ERR); 3419 mdb_pwalk("list", reference_cb, (void *)B_TRUE, addr + off); 3420 } 3421 3422 return (DCMD_OK); 3423 } 3424 3425 /* ARGSUSED */ 3426 static int 3427 sa_attr_table(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3428 { 3429 sa_attr_table_t *table; 3430 sa_os_t sa_os; 3431 char *name; 3432 int i; 3433 3434 if (mdb_vread(&sa_os, sizeof (sa_os_t), addr) == -1) { 3435 mdb_warn("failed to read sa_os at %p", addr); 3436 return (DCMD_ERR); 3437 } 3438 3439 table = mdb_alloc(sizeof (sa_attr_table_t) * sa_os.sa_num_attrs, 3440 UM_SLEEP | UM_GC); 3441 name = mdb_alloc(MAXPATHLEN, UM_SLEEP | UM_GC); 3442 3443 if (mdb_vread(table, sizeof (sa_attr_table_t) * sa_os.sa_num_attrs, 3444 (uintptr_t)sa_os.sa_attr_table) == -1) { 3445 mdb_warn("failed to read sa_os at %p", addr); 3446 return (DCMD_ERR); 3447 } 3448 3449 mdb_printf("%<u>%-10s %-10s %-10s %-10s %s%</u>\n", 3450 "ATTR ID", "REGISTERED", "LENGTH", "BSWAP", "NAME"); 3451 for (i = 0; i != sa_os.sa_num_attrs; i++) { 3452 mdb_readstr(name, MAXPATHLEN, (uintptr_t)table[i].sa_name); 3453 mdb_printf("%5x %8x %8x %8x %-s\n", 3454 (int)table[i].sa_attr, (int)table[i].sa_registered, 3455 (int)table[i].sa_length, table[i].sa_byteswap, name); 3456 } 3457 3458 return (DCMD_OK); 3459 } 3460 3461 static int 3462 sa_get_off_table(uintptr_t addr, uint32_t **off_tab, int attr_count) 3463 { 3464 uintptr_t idx_table; 3465 3466 if (GETMEMB(addr, "sa_idx_tab", sa_idx_tab, idx_table)) { 3467 mdb_printf("can't find offset table in sa_idx_tab\n"); 3468 return (-1); 3469 } 3470 3471 *off_tab = mdb_alloc(attr_count * sizeof (uint32_t), 3472 UM_SLEEP | UM_GC); 3473 3474 if (mdb_vread(*off_tab, 3475 attr_count * sizeof (uint32_t), idx_table) == -1) { 3476 mdb_warn("failed to attribute offset table %p", idx_table); 3477 return (-1); 3478 } 3479 3480 return (DCMD_OK); 3481 } 3482 3483 /*ARGSUSED*/ 3484 static int 3485 sa_attr_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3486 { 3487 uint32_t *offset_tab; 3488 int attr_count; 3489 uint64_t attr_id; 3490 uintptr_t attr_addr; 3491 uintptr_t bonus_tab, spill_tab; 3492 uintptr_t db_bonus, db_spill; 3493 uintptr_t os, os_sa; 3494 uintptr_t db_data; 3495 3496 if (argc != 1) 3497 return (DCMD_USAGE); 3498 3499 if (argv[0].a_type == MDB_TYPE_STRING) 3500 attr_id = mdb_strtoull(argv[0].a_un.a_str); 3501 else 3502 return (DCMD_USAGE); 3503 3504 if (GETMEMB(addr, "sa_handle", sa_bonus_tab, bonus_tab) || 3505 GETMEMB(addr, "sa_handle", sa_spill_tab, spill_tab) || 3506 GETMEMB(addr, "sa_handle", sa_os, os) || 3507 GETMEMB(addr, "sa_handle", sa_bonus, db_bonus) || 3508 GETMEMB(addr, "sa_handle", sa_spill, db_spill)) { 3509 mdb_printf("Can't find necessary information in sa_handle " 3510 "in sa_handle\n"); 3511 return (DCMD_ERR); 3512 } 3513 3514 if (GETMEMB(os, "objset", os_sa, os_sa)) { 3515 mdb_printf("Can't find os_sa in objset\n"); 3516 return (DCMD_ERR); 3517 } 3518 3519 if (GETMEMB(os_sa, "sa_os", sa_num_attrs, attr_count)) { 3520 mdb_printf("Can't find sa_num_attrs\n"); 3521 return (DCMD_ERR); 3522 } 3523 3524 if (attr_id > attr_count) { 3525 mdb_printf("attribute id number is out of range\n"); 3526 return (DCMD_ERR); 3527 } 3528 3529 if (bonus_tab) { 3530 if (sa_get_off_table(bonus_tab, &offset_tab, 3531 attr_count) == -1) { 3532 return (DCMD_ERR); 3533 } 3534 3535 if (GETMEMB(db_bonus, "dmu_buf", db_data, db_data)) { 3536 mdb_printf("can't find db_data in bonus dbuf\n"); 3537 return (DCMD_ERR); 3538 } 3539 } 3540 3541 if (bonus_tab && !TOC_ATTR_PRESENT(offset_tab[attr_id]) && 3542 spill_tab == 0) { 3543 mdb_printf("Attribute does not exist\n"); 3544 return (DCMD_ERR); 3545 } else if (!TOC_ATTR_PRESENT(offset_tab[attr_id]) && spill_tab) { 3546 if (sa_get_off_table(spill_tab, &offset_tab, 3547 attr_count) == -1) { 3548 return (DCMD_ERR); 3549 } 3550 if (GETMEMB(db_spill, "dmu_buf", db_data, db_data)) { 3551 mdb_printf("can't find db_data in spill dbuf\n"); 3552 return (DCMD_ERR); 3553 } 3554 if (!TOC_ATTR_PRESENT(offset_tab[attr_id])) { 3555 mdb_printf("Attribute does not exist\n"); 3556 return (DCMD_ERR); 3557 } 3558 } 3559 attr_addr = db_data + TOC_OFF(offset_tab[attr_id]); 3560 mdb_printf("%p\n", attr_addr); 3561 return (DCMD_OK); 3562 } 3563 3564 /* ARGSUSED */ 3565 static int 3566 zfs_ace_print_common(uintptr_t addr, uint_t flags, 3567 uint64_t id, uint32_t access_mask, uint16_t ace_flags, 3568 uint16_t ace_type, int verbose) 3569 { 3570 if (DCMD_HDRSPEC(flags) && !verbose) 3571 mdb_printf("%<u>%-?s %-8s %-8s %-8s %s%</u>\n", 3572 "ADDR", "FLAGS", "MASK", "TYPE", "ID"); 3573 3574 if (!verbose) { 3575 mdb_printf("%0?p %-8x %-8x %-8x %-llx\n", addr, 3576 ace_flags, access_mask, ace_type, id); 3577 return (DCMD_OK); 3578 } 3579 3580 switch (ace_flags & ACE_TYPE_FLAGS) { 3581 case ACE_OWNER: 3582 mdb_printf("owner@:"); 3583 break; 3584 case (ACE_IDENTIFIER_GROUP | ACE_GROUP): 3585 mdb_printf("group@:"); 3586 break; 3587 case ACE_EVERYONE: 3588 mdb_printf("everyone@:"); 3589 break; 3590 case ACE_IDENTIFIER_GROUP: 3591 mdb_printf("group:%llx:", (u_longlong_t)id); 3592 break; 3593 case 0: /* User entry */ 3594 mdb_printf("user:%llx:", (u_longlong_t)id); 3595 break; 3596 } 3597 3598 /* print out permission mask */ 3599 if (access_mask & ACE_READ_DATA) 3600 mdb_printf("r"); 3601 else 3602 mdb_printf("-"); 3603 if (access_mask & ACE_WRITE_DATA) 3604 mdb_printf("w"); 3605 else 3606 mdb_printf("-"); 3607 if (access_mask & ACE_EXECUTE) 3608 mdb_printf("x"); 3609 else 3610 mdb_printf("-"); 3611 if (access_mask & ACE_APPEND_DATA) 3612 mdb_printf("p"); 3613 else 3614 mdb_printf("-"); 3615 if (access_mask & ACE_DELETE) 3616 mdb_printf("d"); 3617 else 3618 mdb_printf("-"); 3619 if (access_mask & ACE_DELETE_CHILD) 3620 mdb_printf("D"); 3621 else 3622 mdb_printf("-"); 3623 if (access_mask & ACE_READ_ATTRIBUTES) 3624 mdb_printf("a"); 3625 else 3626 mdb_printf("-"); 3627 if (access_mask & ACE_WRITE_ATTRIBUTES) 3628 mdb_printf("A"); 3629 else 3630 mdb_printf("-"); 3631 if (access_mask & ACE_READ_NAMED_ATTRS) 3632 mdb_printf("R"); 3633 else 3634 mdb_printf("-"); 3635 if (access_mask & ACE_WRITE_NAMED_ATTRS) 3636 mdb_printf("W"); 3637 else 3638 mdb_printf("-"); 3639 if (access_mask & ACE_READ_ACL) 3640 mdb_printf("c"); 3641 else 3642 mdb_printf("-"); 3643 if (access_mask & ACE_WRITE_ACL) 3644 mdb_printf("C"); 3645 else 3646 mdb_printf("-"); 3647 if (access_mask & ACE_WRITE_OWNER) 3648 mdb_printf("o"); 3649 else 3650 mdb_printf("-"); 3651 if (access_mask & ACE_SYNCHRONIZE) 3652 mdb_printf("s"); 3653 else 3654 mdb_printf("-"); 3655 3656 mdb_printf(":"); 3657 3658 /* Print out inheritance flags */ 3659 if (ace_flags & ACE_FILE_INHERIT_ACE) 3660 mdb_printf("f"); 3661 else 3662 mdb_printf("-"); 3663 if (ace_flags & ACE_DIRECTORY_INHERIT_ACE) 3664 mdb_printf("d"); 3665 else 3666 mdb_printf("-"); 3667 if (ace_flags & ACE_INHERIT_ONLY_ACE) 3668 mdb_printf("i"); 3669 else 3670 mdb_printf("-"); 3671 if (ace_flags & ACE_NO_PROPAGATE_INHERIT_ACE) 3672 mdb_printf("n"); 3673 else 3674 mdb_printf("-"); 3675 if (ace_flags & ACE_SUCCESSFUL_ACCESS_ACE_FLAG) 3676 mdb_printf("S"); 3677 else 3678 mdb_printf("-"); 3679 if (ace_flags & ACE_FAILED_ACCESS_ACE_FLAG) 3680 mdb_printf("F"); 3681 else 3682 mdb_printf("-"); 3683 if (ace_flags & ACE_INHERITED_ACE) 3684 mdb_printf("I"); 3685 else 3686 mdb_printf("-"); 3687 3688 switch (ace_type) { 3689 case ACE_ACCESS_ALLOWED_ACE_TYPE: 3690 mdb_printf(":allow\n"); 3691 break; 3692 case ACE_ACCESS_DENIED_ACE_TYPE: 3693 mdb_printf(":deny\n"); 3694 break; 3695 case ACE_SYSTEM_AUDIT_ACE_TYPE: 3696 mdb_printf(":audit\n"); 3697 break; 3698 case ACE_SYSTEM_ALARM_ACE_TYPE: 3699 mdb_printf(":alarm\n"); 3700 break; 3701 default: 3702 mdb_printf(":?\n"); 3703 } 3704 return (DCMD_OK); 3705 } 3706 3707 /* ARGSUSED */ 3708 static int 3709 zfs_ace_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3710 { 3711 zfs_ace_t zace; 3712 int verbose = FALSE; 3713 uint64_t id; 3714 3715 if (!(flags & DCMD_ADDRSPEC)) 3716 return (DCMD_USAGE); 3717 3718 if (mdb_getopts(argc, argv, 3719 'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc) 3720 return (DCMD_USAGE); 3721 3722 if (mdb_vread(&zace, sizeof (zfs_ace_t), addr) == -1) { 3723 mdb_warn("failed to read zfs_ace_t"); 3724 return (DCMD_ERR); 3725 } 3726 3727 if ((zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == 0 || 3728 (zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP) 3729 id = zace.z_fuid; 3730 else 3731 id = -1; 3732 3733 return (zfs_ace_print_common(addr, flags, id, zace.z_hdr.z_access_mask, 3734 zace.z_hdr.z_flags, zace.z_hdr.z_type, verbose)); 3735 } 3736 3737 /* ARGSUSED */ 3738 static int 3739 zfs_ace0_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3740 { 3741 ace_t ace; 3742 uint64_t id; 3743 int verbose = FALSE; 3744 3745 if (!(flags & DCMD_ADDRSPEC)) 3746 return (DCMD_USAGE); 3747 3748 if (mdb_getopts(argc, argv, 3749 'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc) 3750 return (DCMD_USAGE); 3751 3752 if (mdb_vread(&ace, sizeof (ace_t), addr) == -1) { 3753 mdb_warn("failed to read ace_t"); 3754 return (DCMD_ERR); 3755 } 3756 3757 if ((ace.a_flags & ACE_TYPE_FLAGS) == 0 || 3758 (ace.a_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP) 3759 id = ace.a_who; 3760 else 3761 id = -1; 3762 3763 return (zfs_ace_print_common(addr, flags, id, ace.a_access_mask, 3764 ace.a_flags, ace.a_type, verbose)); 3765 } 3766 3767 typedef struct acl_dump_args { 3768 int a_argc; 3769 const mdb_arg_t *a_argv; 3770 uint16_t a_version; 3771 int a_flags; 3772 } acl_dump_args_t; 3773 3774 /* ARGSUSED */ 3775 static int 3776 acl_aces_cb(uintptr_t addr, const void *unknown, void *arg) 3777 { 3778 acl_dump_args_t *acl_args = (acl_dump_args_t *)arg; 3779 3780 if (acl_args->a_version == 1) { 3781 if (mdb_call_dcmd("zfs_ace", addr, 3782 DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc, 3783 acl_args->a_argv) != DCMD_OK) { 3784 return (WALK_ERR); 3785 } 3786 } else { 3787 if (mdb_call_dcmd("zfs_ace0", addr, 3788 DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc, 3789 acl_args->a_argv) != DCMD_OK) { 3790 return (WALK_ERR); 3791 } 3792 } 3793 acl_args->a_flags = DCMD_LOOP; 3794 return (WALK_NEXT); 3795 } 3796 3797 /* ARGSUSED */ 3798 static int 3799 acl_cb(uintptr_t addr, const void *unknown, void *arg) 3800 { 3801 acl_dump_args_t *acl_args = (acl_dump_args_t *)arg; 3802 3803 if (acl_args->a_version == 1) { 3804 if (mdb_pwalk("zfs_acl_node_aces", acl_aces_cb, 3805 arg, addr) != 0) { 3806 mdb_warn("can't walk ACEs"); 3807 return (DCMD_ERR); 3808 } 3809 } else { 3810 if (mdb_pwalk("zfs_acl_node_aces0", acl_aces_cb, 3811 arg, addr) != 0) { 3812 mdb_warn("can't walk ACEs"); 3813 return (DCMD_ERR); 3814 } 3815 } 3816 return (WALK_NEXT); 3817 } 3818 3819 /* ARGSUSED */ 3820 static int 3821 zfs_acl_dump(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3822 { 3823 zfs_acl_t zacl; 3824 int verbose = FALSE; 3825 acl_dump_args_t acl_args; 3826 3827 if (!(flags & DCMD_ADDRSPEC)) 3828 return (DCMD_USAGE); 3829 3830 if (mdb_getopts(argc, argv, 3831 'v', MDB_OPT_SETBITS, TRUE, &verbose, NULL) != argc) 3832 return (DCMD_USAGE); 3833 3834 if (mdb_vread(&zacl, sizeof (zfs_acl_t), addr) == -1) { 3835 mdb_warn("failed to read zfs_acl_t"); 3836 return (DCMD_ERR); 3837 } 3838 3839 acl_args.a_argc = argc; 3840 acl_args.a_argv = argv; 3841 acl_args.a_version = zacl.z_version; 3842 acl_args.a_flags = DCMD_LOOPFIRST; 3843 3844 if (mdb_pwalk("zfs_acl_node", acl_cb, &acl_args, addr) != 0) { 3845 mdb_warn("can't walk ACL"); 3846 return (DCMD_ERR); 3847 } 3848 3849 return (DCMD_OK); 3850 } 3851 3852 /* ARGSUSED */ 3853 static int 3854 zfs_acl_node_walk_init(mdb_walk_state_t *wsp) 3855 { 3856 if (wsp->walk_addr == 0) { 3857 mdb_warn("must supply address of zfs_acl_node_t\n"); 3858 return (WALK_ERR); 3859 } 3860 3861 wsp->walk_addr += 3862 mdb_ctf_offsetof_by_name(ZFS_STRUCT "zfs_acl", "z_acl"); 3863 3864 if (mdb_layered_walk("list", wsp) == -1) { 3865 mdb_warn("failed to walk 'list'\n"); 3866 return (WALK_ERR); 3867 } 3868 3869 return (WALK_NEXT); 3870 } 3871 3872 static int 3873 zfs_acl_node_walk_step(mdb_walk_state_t *wsp) 3874 { 3875 zfs_acl_node_t aclnode; 3876 3877 if (mdb_vread(&aclnode, sizeof (zfs_acl_node_t), 3878 wsp->walk_addr) == -1) { 3879 mdb_warn("failed to read zfs_acl_node at %p", wsp->walk_addr); 3880 return (WALK_ERR); 3881 } 3882 3883 return (wsp->walk_callback(wsp->walk_addr, &aclnode, wsp->walk_cbdata)); 3884 } 3885 3886 typedef struct ace_walk_data { 3887 int ace_count; 3888 int ace_version; 3889 } ace_walk_data_t; 3890 3891 static int 3892 zfs_aces_walk_init_common(mdb_walk_state_t *wsp, int version, 3893 int ace_count, uintptr_t ace_data) 3894 { 3895 ace_walk_data_t *ace_walk_data; 3896 3897 if (wsp->walk_addr == 0) { 3898 mdb_warn("must supply address of zfs_acl_node_t\n"); 3899 return (WALK_ERR); 3900 } 3901 3902 ace_walk_data = mdb_alloc(sizeof (ace_walk_data_t), UM_SLEEP | UM_GC); 3903 3904 ace_walk_data->ace_count = ace_count; 3905 ace_walk_data->ace_version = version; 3906 3907 wsp->walk_addr = ace_data; 3908 wsp->walk_data = ace_walk_data; 3909 3910 return (WALK_NEXT); 3911 } 3912 3913 static int 3914 zfs_acl_node_aces_walk_init_common(mdb_walk_state_t *wsp, int version) 3915 { 3916 static int gotid; 3917 static mdb_ctf_id_t acl_id; 3918 int z_ace_count; 3919 uintptr_t z_acldata; 3920 3921 if (!gotid) { 3922 if (mdb_ctf_lookup_by_name("struct zfs_acl_node", 3923 &acl_id) == -1) { 3924 mdb_warn("couldn't find struct zfs_acl_node"); 3925 return (DCMD_ERR); 3926 } 3927 gotid = TRUE; 3928 } 3929 3930 if (GETMEMBID(wsp->walk_addr, &acl_id, z_ace_count, z_ace_count)) { 3931 return (DCMD_ERR); 3932 } 3933 if (GETMEMBID(wsp->walk_addr, &acl_id, z_acldata, z_acldata)) { 3934 return (DCMD_ERR); 3935 } 3936 3937 return (zfs_aces_walk_init_common(wsp, version, 3938 z_ace_count, z_acldata)); 3939 } 3940 3941 /* ARGSUSED */ 3942 static int 3943 zfs_acl_node_aces_walk_init(mdb_walk_state_t *wsp) 3944 { 3945 return (zfs_acl_node_aces_walk_init_common(wsp, 1)); 3946 } 3947 3948 /* ARGSUSED */ 3949 static int 3950 zfs_acl_node_aces0_walk_init(mdb_walk_state_t *wsp) 3951 { 3952 return (zfs_acl_node_aces_walk_init_common(wsp, 0)); 3953 } 3954 3955 static int 3956 zfs_aces_walk_step(mdb_walk_state_t *wsp) 3957 { 3958 ace_walk_data_t *ace_data = wsp->walk_data; 3959 zfs_ace_t zace; 3960 ace_t *acep; 3961 int status; 3962 int entry_type; 3963 int allow_type; 3964 uintptr_t ptr; 3965 3966 if (ace_data->ace_count == 0) 3967 return (WALK_DONE); 3968 3969 if (mdb_vread(&zace, sizeof (zfs_ace_t), wsp->walk_addr) == -1) { 3970 mdb_warn("failed to read zfs_ace_t at %#lx", 3971 wsp->walk_addr); 3972 return (WALK_ERR); 3973 } 3974 3975 switch (ace_data->ace_version) { 3976 case 0: 3977 acep = (ace_t *)&zace; 3978 entry_type = acep->a_flags & ACE_TYPE_FLAGS; 3979 allow_type = acep->a_type; 3980 break; 3981 case 1: 3982 entry_type = zace.z_hdr.z_flags & ACE_TYPE_FLAGS; 3983 allow_type = zace.z_hdr.z_type; 3984 break; 3985 default: 3986 return (WALK_ERR); 3987 } 3988 3989 ptr = (uintptr_t)wsp->walk_addr; 3990 switch (entry_type) { 3991 case ACE_OWNER: 3992 case ACE_EVERYONE: 3993 case (ACE_IDENTIFIER_GROUP | ACE_GROUP): 3994 ptr += ace_data->ace_version == 0 ? 3995 sizeof (ace_t) : sizeof (zfs_ace_hdr_t); 3996 break; 3997 case ACE_IDENTIFIER_GROUP: 3998 default: 3999 switch (allow_type) { 4000 case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: 4001 case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: 4002 case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: 4003 case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: 4004 ptr += ace_data->ace_version == 0 ? 4005 sizeof (ace_t) : sizeof (zfs_object_ace_t); 4006 break; 4007 default: 4008 ptr += ace_data->ace_version == 0 ? 4009 sizeof (ace_t) : sizeof (zfs_ace_t); 4010 break; 4011 } 4012 } 4013 4014 ace_data->ace_count--; 4015 status = wsp->walk_callback(wsp->walk_addr, 4016 (void *)(uintptr_t)&zace, wsp->walk_cbdata); 4017 4018 wsp->walk_addr = ptr; 4019 return (status); 4020 } 4021 4022 typedef struct mdb_zfs_rrwlock { 4023 uintptr_t rr_writer; 4024 boolean_t rr_writer_wanted; 4025 } mdb_zfs_rrwlock_t; 4026 4027 static uint_t rrw_key; 4028 4029 /* ARGSUSED */ 4030 static int 4031 rrwlock(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4032 { 4033 mdb_zfs_rrwlock_t rrw; 4034 4035 if (rrw_key == 0) { 4036 if (mdb_ctf_readsym(&rrw_key, "uint_t", "rrw_tsd_key", 0) == -1) 4037 return (DCMD_ERR); 4038 } 4039 4040 if (mdb_ctf_vread(&rrw, "rrwlock_t", "mdb_zfs_rrwlock_t", addr, 4041 0) == -1) 4042 return (DCMD_ERR); 4043 4044 if (rrw.rr_writer != 0) { 4045 mdb_printf("write lock held by thread %lx\n", rrw.rr_writer); 4046 return (DCMD_OK); 4047 } 4048 4049 if (rrw.rr_writer_wanted) { 4050 mdb_printf("writer wanted\n"); 4051 } 4052 4053 mdb_printf("anonymous references:\n"); 4054 (void) mdb_call_dcmd("zfs_refcount", addr + 4055 mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_anon_rcount"), 4056 DCMD_ADDRSPEC, 0, NULL); 4057 4058 mdb_printf("linked references:\n"); 4059 (void) mdb_call_dcmd("zfs_refcount", addr + 4060 mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_linked_rcount"), 4061 DCMD_ADDRSPEC, 0, NULL); 4062 4063 /* 4064 * XXX This should find references from 4065 * "::walk thread | ::tsd -v <rrw_key>", but there is no support 4066 * for programmatic consumption of dcmds, so this would be 4067 * difficult, potentially requiring reimplementing ::tsd (both 4068 * user and kernel versions) in this MDB module. 4069 */ 4070 4071 return (DCMD_OK); 4072 } 4073 4074 typedef struct mdb_arc_buf_hdr_t { 4075 uint16_t b_psize; 4076 uint16_t b_lsize; 4077 struct { 4078 uint32_t b_bufcnt; 4079 uintptr_t b_state; 4080 } b_l1hdr; 4081 } mdb_arc_buf_hdr_t; 4082 4083 enum arc_cflags { 4084 ARC_CFLAG_VERBOSE = 1 << 0, 4085 ARC_CFLAG_ANON = 1 << 1, 4086 ARC_CFLAG_MRU = 1 << 2, 4087 ARC_CFLAG_MFU = 1 << 3, 4088 ARC_CFLAG_BUFS = 1 << 4, 4089 }; 4090 4091 typedef struct arc_compression_stats_data { 4092 GElf_Sym anon_sym; /* ARC_anon symbol */ 4093 GElf_Sym mru_sym; /* ARC_mru symbol */ 4094 GElf_Sym mrug_sym; /* ARC_mru_ghost symbol */ 4095 GElf_Sym mfu_sym; /* ARC_mfu symbol */ 4096 GElf_Sym mfug_sym; /* ARC_mfu_ghost symbol */ 4097 GElf_Sym l2c_sym; /* ARC_l2c_only symbol */ 4098 uint64_t *anon_c_hist; /* histogram of compressed sizes in anon */ 4099 uint64_t *anon_u_hist; /* histogram of uncompressed sizes in anon */ 4100 uint64_t *anon_bufs; /* histogram of buffer counts in anon state */ 4101 uint64_t *mru_c_hist; /* histogram of compressed sizes in mru */ 4102 uint64_t *mru_u_hist; /* histogram of uncompressed sizes in mru */ 4103 uint64_t *mru_bufs; /* histogram of buffer counts in mru */ 4104 uint64_t *mfu_c_hist; /* histogram of compressed sizes in mfu */ 4105 uint64_t *mfu_u_hist; /* histogram of uncompressed sizes in mfu */ 4106 uint64_t *mfu_bufs; /* histogram of buffer counts in mfu */ 4107 uint64_t *all_c_hist; /* histogram of compressed anon + mru + mfu */ 4108 uint64_t *all_u_hist; /* histogram of uncompressed anon + mru + mfu */ 4109 uint64_t *all_bufs; /* histogram of buffer counts in all states */ 4110 int arc_cflags; /* arc compression flags, specified by user */ 4111 int hist_nbuckets; /* number of buckets in each histogram */ 4112 4113 ulong_t l1hdr_off; /* offset of b_l1hdr in arc_buf_hdr_t */ 4114 } arc_compression_stats_data_t; 4115 4116 int 4117 highbit64(uint64_t i) 4118 { 4119 int h = 1; 4120 4121 if (i == 0) 4122 return (0); 4123 if (i & 0xffffffff00000000ULL) { 4124 h += 32; i >>= 32; 4125 } 4126 if (i & 0xffff0000) { 4127 h += 16; i >>= 16; 4128 } 4129 if (i & 0xff00) { 4130 h += 8; i >>= 8; 4131 } 4132 if (i & 0xf0) { 4133 h += 4; i >>= 4; 4134 } 4135 if (i & 0xc) { 4136 h += 2; i >>= 2; 4137 } 4138 if (i & 0x2) { 4139 h += 1; 4140 } 4141 return (h); 4142 } 4143 4144 /* ARGSUSED */ 4145 static int 4146 arc_compression_stats_cb(uintptr_t addr, const void *unknown, void *arg) 4147 { 4148 arc_compression_stats_data_t *data = arg; 4149 arc_flags_t flags; 4150 mdb_arc_buf_hdr_t hdr; 4151 int cbucket, ubucket, bufcnt; 4152 4153 /* 4154 * mdb_ctf_vread() uses the sizeof the target type (e.g. 4155 * sizeof (arc_buf_hdr_t) in the target) to read in the entire contents 4156 * of the target type into a buffer and then copy the values of the 4157 * desired members from the mdb typename (e.g. mdb_arc_buf_hdr_t) from 4158 * this buffer. Unfortunately, the way arc_buf_hdr_t is used by zfs, 4159 * the actual size allocated by the kernel for arc_buf_hdr_t is often 4160 * smaller than `sizeof (arc_buf_hdr_t)` (see the definitions of 4161 * l1arc_buf_hdr_t and arc_buf_hdr_t in 4162 * usr/src/uts/common/fs/zfs/arc.c). Attempting to read the entire 4163 * contents of arc_buf_hdr_t from the target (as mdb_ctf_vread() does) 4164 * can cause an error if the allocated size is indeed smaller--it's 4165 * possible that the 'missing' trailing members of arc_buf_hdr_t 4166 * (l1arc_buf_hdr_t and/or arc_buf_hdr_crypt_t) may fall into unmapped 4167 * memory. 4168 * 4169 * We use the GETMEMB macro instead which performs an mdb_vread() 4170 * but only reads enough of the target to retrieve the desired struct 4171 * member instead of the entire struct. 4172 */ 4173 if (GETMEMB(addr, "arc_buf_hdr", b_flags, flags) == -1) 4174 return (WALK_ERR); 4175 4176 /* 4177 * We only count headers that have data loaded in the kernel. 4178 * This means an L1 header must be present as well as the data 4179 * that corresponds to the L1 header. If there's no L1 header, 4180 * we can skip the arc_buf_hdr_t completely. If it's present, we 4181 * must look at the ARC state (b_l1hdr.b_state) to determine if 4182 * the data is present. 4183 */ 4184 if ((flags & ARC_FLAG_HAS_L1HDR) == 0) 4185 return (WALK_NEXT); 4186 4187 if (GETMEMB(addr, "arc_buf_hdr", b_psize, hdr.b_psize) == -1 || 4188 GETMEMB(addr, "arc_buf_hdr", b_lsize, hdr.b_lsize) == -1 || 4189 GETMEMB(addr + data->l1hdr_off, "l1arc_buf_hdr", b_bufcnt, 4190 hdr.b_l1hdr.b_bufcnt) == -1 || 4191 GETMEMB(addr + data->l1hdr_off, "l1arc_buf_hdr", b_state, 4192 hdr.b_l1hdr.b_state) == -1) 4193 return (WALK_ERR); 4194 4195 /* 4196 * Headers in the ghost states, or the l2c_only state don't have 4197 * arc buffers linked off of them. Thus, their compressed size 4198 * is meaningless, so we skip these from the stats. 4199 */ 4200 if (hdr.b_l1hdr.b_state == data->mrug_sym.st_value || 4201 hdr.b_l1hdr.b_state == data->mfug_sym.st_value || 4202 hdr.b_l1hdr.b_state == data->l2c_sym.st_value) { 4203 return (WALK_NEXT); 4204 } 4205 4206 /* 4207 * The physical size (compressed) and logical size 4208 * (uncompressed) are in units of SPA_MINBLOCKSIZE. By default, 4209 * we use the log2 of this value (rounded down to the nearest 4210 * integer) to determine the bucket to assign this header to. 4211 * Thus, the histogram is logarithmic with respect to the size 4212 * of the header. For example, the following is a mapping of the 4213 * bucket numbers and the range of header sizes they correspond to: 4214 * 4215 * 0: 0 byte headers 4216 * 1: 512 byte headers 4217 * 2: [1024 - 2048) byte headers 4218 * 3: [2048 - 4096) byte headers 4219 * 4: [4096 - 8192) byte headers 4220 * 5: [8192 - 16394) byte headers 4221 * 6: [16384 - 32768) byte headers 4222 * 7: [32768 - 65536) byte headers 4223 * 8: [65536 - 131072) byte headers 4224 * 9: 131072 byte headers 4225 * 4226 * If the ARC_CFLAG_VERBOSE flag was specified, we use the 4227 * physical and logical sizes directly. Thus, the histogram will 4228 * no longer be logarithmic; instead it will be linear with 4229 * respect to the size of the header. The following is a mapping 4230 * of the first many bucket numbers and the header size they 4231 * correspond to: 4232 * 4233 * 0: 0 byte headers 4234 * 1: 512 byte headers 4235 * 2: 1024 byte headers 4236 * 3: 1536 byte headers 4237 * 4: 2048 byte headers 4238 * 5: 2560 byte headers 4239 * 6: 3072 byte headers 4240 * 4241 * And so on. Keep in mind that a range of sizes isn't used in 4242 * the case of linear scale because the headers can only 4243 * increment or decrement in sizes of 512 bytes. So, it's not 4244 * possible for a header to be sized in between whats listed 4245 * above. 4246 * 4247 * Also, the above mapping values were calculated assuming a 4248 * SPA_MINBLOCKSHIFT of 512 bytes and a SPA_MAXBLOCKSIZE of 128K. 4249 */ 4250 4251 if (data->arc_cflags & ARC_CFLAG_VERBOSE) { 4252 cbucket = hdr.b_psize; 4253 ubucket = hdr.b_lsize; 4254 } else { 4255 cbucket = highbit64(hdr.b_psize); 4256 ubucket = highbit64(hdr.b_lsize); 4257 } 4258 4259 bufcnt = hdr.b_l1hdr.b_bufcnt; 4260 if (bufcnt >= data->hist_nbuckets) 4261 bufcnt = data->hist_nbuckets - 1; 4262 4263 /* Ensure we stay within the bounds of the histogram array */ 4264 ASSERT3U(cbucket, <, data->hist_nbuckets); 4265 ASSERT3U(ubucket, <, data->hist_nbuckets); 4266 4267 if (hdr.b_l1hdr.b_state == data->anon_sym.st_value) { 4268 data->anon_c_hist[cbucket]++; 4269 data->anon_u_hist[ubucket]++; 4270 data->anon_bufs[bufcnt]++; 4271 } else if (hdr.b_l1hdr.b_state == data->mru_sym.st_value) { 4272 data->mru_c_hist[cbucket]++; 4273 data->mru_u_hist[ubucket]++; 4274 data->mru_bufs[bufcnt]++; 4275 } else if (hdr.b_l1hdr.b_state == data->mfu_sym.st_value) { 4276 data->mfu_c_hist[cbucket]++; 4277 data->mfu_u_hist[ubucket]++; 4278 data->mfu_bufs[bufcnt]++; 4279 } 4280 4281 data->all_c_hist[cbucket]++; 4282 data->all_u_hist[ubucket]++; 4283 data->all_bufs[bufcnt]++; 4284 4285 return (WALK_NEXT); 4286 } 4287 4288 /* ARGSUSED */ 4289 static int 4290 arc_compression_stats(uintptr_t addr, uint_t flags, int argc, 4291 const mdb_arg_t *argv) 4292 { 4293 arc_compression_stats_data_t data = { 0 }; 4294 unsigned int max_shifted = SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; 4295 unsigned int hist_size; 4296 char range[32]; 4297 int rc = DCMD_OK; 4298 int off; 4299 4300 if (mdb_getopts(argc, argv, 4301 'v', MDB_OPT_SETBITS, ARC_CFLAG_VERBOSE, &data.arc_cflags, 4302 'a', MDB_OPT_SETBITS, ARC_CFLAG_ANON, &data.arc_cflags, 4303 'b', MDB_OPT_SETBITS, ARC_CFLAG_BUFS, &data.arc_cflags, 4304 'r', MDB_OPT_SETBITS, ARC_CFLAG_MRU, &data.arc_cflags, 4305 'f', MDB_OPT_SETBITS, ARC_CFLAG_MFU, &data.arc_cflags, 4306 NULL) != argc) 4307 return (DCMD_USAGE); 4308 4309 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_anon", &data.anon_sym) || 4310 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mru", &data.mru_sym) || 4311 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mru_ghost", &data.mrug_sym) || 4312 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mfu", &data.mfu_sym) || 4313 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mfu_ghost", &data.mfug_sym) || 4314 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_l2c_only", &data.l2c_sym)) { 4315 mdb_warn("can't find arc state symbol"); 4316 return (DCMD_ERR); 4317 } 4318 4319 /* 4320 * Determine the maximum expected size for any header, and use 4321 * this to determine the number of buckets needed for each 4322 * histogram. If ARC_CFLAG_VERBOSE is specified, this value is 4323 * used directly; otherwise the log2 of the maximum size is 4324 * used. Thus, if using a log2 scale there's a maximum of 10 4325 * possible buckets, while the linear scale (when using 4326 * ARC_CFLAG_VERBOSE) has a maximum of 257 buckets. 4327 */ 4328 if (data.arc_cflags & ARC_CFLAG_VERBOSE) 4329 data.hist_nbuckets = max_shifted + 1; 4330 else 4331 data.hist_nbuckets = highbit64(max_shifted) + 1; 4332 4333 hist_size = sizeof (uint64_t) * data.hist_nbuckets; 4334 4335 data.anon_c_hist = mdb_zalloc(hist_size, UM_SLEEP); 4336 data.anon_u_hist = mdb_zalloc(hist_size, UM_SLEEP); 4337 data.anon_bufs = mdb_zalloc(hist_size, UM_SLEEP); 4338 4339 data.mru_c_hist = mdb_zalloc(hist_size, UM_SLEEP); 4340 data.mru_u_hist = mdb_zalloc(hist_size, UM_SLEEP); 4341 data.mru_bufs = mdb_zalloc(hist_size, UM_SLEEP); 4342 4343 data.mfu_c_hist = mdb_zalloc(hist_size, UM_SLEEP); 4344 data.mfu_u_hist = mdb_zalloc(hist_size, UM_SLEEP); 4345 data.mfu_bufs = mdb_zalloc(hist_size, UM_SLEEP); 4346 4347 data.all_c_hist = mdb_zalloc(hist_size, UM_SLEEP); 4348 data.all_u_hist = mdb_zalloc(hist_size, UM_SLEEP); 4349 data.all_bufs = mdb_zalloc(hist_size, UM_SLEEP); 4350 4351 if ((off = mdb_ctf_offsetof_by_name(ZFS_STRUCT "arc_buf_hdr", 4352 "b_l1hdr")) == -1) { 4353 mdb_warn("could not get offset of b_l1hdr from arc_buf_hdr_t"); 4354 rc = DCMD_ERR; 4355 goto out; 4356 } 4357 data.l1hdr_off = off; 4358 4359 if (mdb_walk("arc_buf_hdr_t_full", arc_compression_stats_cb, 4360 &data) != 0) { 4361 mdb_warn("can't walk arc_buf_hdr's"); 4362 rc = DCMD_ERR; 4363 goto out; 4364 } 4365 4366 if (data.arc_cflags & ARC_CFLAG_VERBOSE) { 4367 rc = mdb_snprintf(range, sizeof (range), 4368 "[n*%llu, (n+1)*%llu)", SPA_MINBLOCKSIZE, 4369 SPA_MINBLOCKSIZE); 4370 } else { 4371 rc = mdb_snprintf(range, sizeof (range), 4372 "[2^(n-1)*%llu, 2^n*%llu)", SPA_MINBLOCKSIZE, 4373 SPA_MINBLOCKSIZE); 4374 } 4375 4376 if (rc < 0) { 4377 /* snprintf failed, abort the dcmd */ 4378 rc = DCMD_ERR; 4379 goto out; 4380 } else { 4381 /* snprintf succeeded above, reset return code */ 4382 rc = DCMD_OK; 4383 } 4384 4385 if (data.arc_cflags & ARC_CFLAG_ANON) { 4386 if (data.arc_cflags & ARC_CFLAG_BUFS) { 4387 mdb_printf("Histogram of the number of anon buffers " 4388 "that are associated with an arc hdr.\n"); 4389 dump_histogram(data.anon_bufs, data.hist_nbuckets, 0); 4390 mdb_printf("\n"); 4391 } 4392 mdb_printf("Histogram of compressed anon buffers.\n" 4393 "Each bucket represents buffers of size: %s.\n", range); 4394 dump_histogram(data.anon_c_hist, data.hist_nbuckets, 0); 4395 mdb_printf("\n"); 4396 4397 mdb_printf("Histogram of uncompressed anon buffers.\n" 4398 "Each bucket represents buffers of size: %s.\n", range); 4399 dump_histogram(data.anon_u_hist, data.hist_nbuckets, 0); 4400 mdb_printf("\n"); 4401 } 4402 4403 if (data.arc_cflags & ARC_CFLAG_MRU) { 4404 if (data.arc_cflags & ARC_CFLAG_BUFS) { 4405 mdb_printf("Histogram of the number of mru buffers " 4406 "that are associated with an arc hdr.\n"); 4407 dump_histogram(data.mru_bufs, data.hist_nbuckets, 0); 4408 mdb_printf("\n"); 4409 } 4410 mdb_printf("Histogram of compressed mru buffers.\n" 4411 "Each bucket represents buffers of size: %s.\n", range); 4412 dump_histogram(data.mru_c_hist, data.hist_nbuckets, 0); 4413 mdb_printf("\n"); 4414 4415 mdb_printf("Histogram of uncompressed mru buffers.\n" 4416 "Each bucket represents buffers of size: %s.\n", range); 4417 dump_histogram(data.mru_u_hist, data.hist_nbuckets, 0); 4418 mdb_printf("\n"); 4419 } 4420 4421 if (data.arc_cflags & ARC_CFLAG_MFU) { 4422 if (data.arc_cflags & ARC_CFLAG_BUFS) { 4423 mdb_printf("Histogram of the number of mfu buffers " 4424 "that are associated with an arc hdr.\n"); 4425 dump_histogram(data.mfu_bufs, data.hist_nbuckets, 0); 4426 mdb_printf("\n"); 4427 } 4428 4429 mdb_printf("Histogram of compressed mfu buffers.\n" 4430 "Each bucket represents buffers of size: %s.\n", range); 4431 dump_histogram(data.mfu_c_hist, data.hist_nbuckets, 0); 4432 mdb_printf("\n"); 4433 4434 mdb_printf("Histogram of uncompressed mfu buffers.\n" 4435 "Each bucket represents buffers of size: %s.\n", range); 4436 dump_histogram(data.mfu_u_hist, data.hist_nbuckets, 0); 4437 mdb_printf("\n"); 4438 } 4439 4440 if (data.arc_cflags & ARC_CFLAG_BUFS) { 4441 mdb_printf("Histogram of all buffers that " 4442 "are associated with an arc hdr.\n"); 4443 dump_histogram(data.all_bufs, data.hist_nbuckets, 0); 4444 mdb_printf("\n"); 4445 } 4446 4447 mdb_printf("Histogram of all compressed buffers.\n" 4448 "Each bucket represents buffers of size: %s.\n", range); 4449 dump_histogram(data.all_c_hist, data.hist_nbuckets, 0); 4450 mdb_printf("\n"); 4451 4452 mdb_printf("Histogram of all uncompressed buffers.\n" 4453 "Each bucket represents buffers of size: %s.\n", range); 4454 dump_histogram(data.all_u_hist, data.hist_nbuckets, 0); 4455 4456 out: 4457 mdb_free(data.anon_c_hist, hist_size); 4458 mdb_free(data.anon_u_hist, hist_size); 4459 mdb_free(data.anon_bufs, hist_size); 4460 4461 mdb_free(data.mru_c_hist, hist_size); 4462 mdb_free(data.mru_u_hist, hist_size); 4463 mdb_free(data.mru_bufs, hist_size); 4464 4465 mdb_free(data.mfu_c_hist, hist_size); 4466 mdb_free(data.mfu_u_hist, hist_size); 4467 mdb_free(data.mfu_bufs, hist_size); 4468 4469 mdb_free(data.all_c_hist, hist_size); 4470 mdb_free(data.all_u_hist, hist_size); 4471 mdb_free(data.all_bufs, hist_size); 4472 4473 return (rc); 4474 } 4475 4476 typedef struct mdb_range_seg64 { 4477 uint64_t rs_start; 4478 uint64_t rs_end; 4479 } mdb_range_seg64_t; 4480 4481 typedef struct mdb_range_seg32 { 4482 uint32_t rs_start; 4483 uint32_t rs_end; 4484 } mdb_range_seg32_t; 4485 4486 /* ARGSUSED */ 4487 static int 4488 range_tree_cb(uintptr_t addr, const void *unknown, void *arg) 4489 { 4490 mdb_range_tree_t *rt = (mdb_range_tree_t *)arg; 4491 uint64_t start, end; 4492 4493 if (rt->rt_type == RANGE_SEG64) { 4494 mdb_range_seg64_t rs; 4495 4496 if (mdb_ctf_vread(&rs, ZFS_STRUCT "range_seg64", 4497 "mdb_range_seg64_t", addr, 0) == -1) 4498 return (DCMD_ERR); 4499 start = rs.rs_start; 4500 end = rs.rs_end; 4501 } else { 4502 ASSERT3U(rt->rt_type, ==, RANGE_SEG32); 4503 mdb_range_seg32_t rs; 4504 4505 if (mdb_ctf_vread(&rs, ZFS_STRUCT "range_seg32", 4506 "mdb_range_seg32_t", addr, 0) == -1) 4507 return (DCMD_ERR); 4508 start = ((uint64_t)rs.rs_start << rt->rt_shift) + rt->rt_start; 4509 end = ((uint64_t)rs.rs_end << rt->rt_shift) + rt->rt_start; 4510 } 4511 4512 mdb_printf("\t[%llx %llx) (length %llx)\n", start, end, end - start); 4513 4514 return (0); 4515 } 4516 4517 /* ARGSUSED */ 4518 static int 4519 range_tree(uintptr_t addr, uint_t flags, int argc, 4520 const mdb_arg_t *argv) 4521 { 4522 mdb_range_tree_t rt; 4523 uintptr_t btree_addr; 4524 4525 if (!(flags & DCMD_ADDRSPEC)) 4526 return (DCMD_USAGE); 4527 4528 if (mdb_ctf_vread(&rt, ZFS_STRUCT "range_tree", "mdb_range_tree_t", 4529 addr, 0) == -1) 4530 return (DCMD_ERR); 4531 4532 mdb_printf("%p: range tree of %llu entries, %llu bytes\n", 4533 addr, rt.rt_root.bt_num_elems, rt.rt_space); 4534 4535 btree_addr = addr + 4536 mdb_ctf_offsetof_by_name(ZFS_STRUCT "range_tree", "rt_root"); 4537 4538 if (mdb_pwalk("zfs_btree", range_tree_cb, &rt, btree_addr) != 0) { 4539 mdb_warn("can't walk range_tree segments"); 4540 return (DCMD_ERR); 4541 } 4542 return (DCMD_OK); 4543 } 4544 4545 typedef struct mdb_spa_log_sm { 4546 uint64_t sls_sm_obj; 4547 uint64_t sls_txg; 4548 uint64_t sls_nblocks; 4549 uint64_t sls_mscount; 4550 } mdb_spa_log_sm_t; 4551 4552 /* ARGSUSED */ 4553 static int 4554 logsm_stats_cb(uintptr_t addr, const void *unknown, void *arg) 4555 { 4556 mdb_spa_log_sm_t sls; 4557 if (mdb_ctf_vread(&sls, ZFS_STRUCT "spa_log_sm", "mdb_spa_log_sm_t", 4558 addr, 0) == -1) 4559 return (WALK_ERR); 4560 4561 mdb_printf("%7lld %7lld %7lld %7lld\n", 4562 sls.sls_txg, sls.sls_nblocks, sls.sls_mscount, sls.sls_sm_obj); 4563 4564 return (WALK_NEXT); 4565 } 4566 typedef struct mdb_log_summary_entry { 4567 uint64_t lse_start; 4568 uint64_t lse_blkcount; 4569 uint64_t lse_mscount; 4570 } mdb_log_summary_entry_t; 4571 4572 /* ARGSUSED */ 4573 static int 4574 logsm_summary_cb(uintptr_t addr, const void *unknown, void *arg) 4575 { 4576 mdb_log_summary_entry_t lse; 4577 if (mdb_ctf_vread(&lse, ZFS_STRUCT "log_summary_entry", 4578 "mdb_log_summary_entry_t", addr, 0) == -1) 4579 return (WALK_ERR); 4580 4581 mdb_printf("%7lld %7lld %7lld\n", 4582 lse.lse_start, lse.lse_blkcount, lse.lse_mscount); 4583 return (WALK_NEXT); 4584 } 4585 4586 /* ARGSUSED */ 4587 static int 4588 logsm_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4589 { 4590 if (!(flags & DCMD_ADDRSPEC)) 4591 return (DCMD_USAGE); 4592 4593 uintptr_t sls_avl_addr = addr + 4594 mdb_ctf_offsetof_by_name(ZFS_STRUCT "spa", "spa_sm_logs_by_txg"); 4595 uintptr_t summary_addr = addr + 4596 mdb_ctf_offsetof_by_name(ZFS_STRUCT "spa", "spa_log_summary"); 4597 4598 mdb_printf("Log Entries:\n"); 4599 mdb_printf("%7s %7s %7s %7s\n", "txg", "blk", "ms", "obj"); 4600 if (mdb_pwalk("avl", logsm_stats_cb, NULL, sls_avl_addr) != 0) 4601 return (DCMD_ERR); 4602 4603 mdb_printf("\nSummary Entries:\n"); 4604 mdb_printf("%7s %7s %7s\n", "txg", "blk", "ms"); 4605 if (mdb_pwalk("list", logsm_summary_cb, NULL, summary_addr) != 0) 4606 return (DCMD_ERR); 4607 4608 return (DCMD_OK); 4609 } 4610 4611 /* 4612 * MDB module linkage information: 4613 * 4614 * We declare a list of structures describing our dcmds, and a function 4615 * named _mdb_init to return a pointer to our module information. 4616 */ 4617 4618 static const mdb_dcmd_t dcmds[] = { 4619 { "arc", "[-bkmg]", "print ARC variables", arc_print }, 4620 { "blkptr", ":", "print blkptr_t", blkptr }, 4621 { "dva", ":", "print dva_t", dva }, 4622 { "dbuf", ":", "print dmu_buf_impl_t", dbuf }, 4623 { "dbuf_stats", ":", "dbuf stats", dbuf_stats }, 4624 { "dbufs", 4625 "\t[-O objset_t*] [-n objset_name | \"mos\"] " 4626 "[-o object | \"mdn\"] \n" 4627 "\t[-l level] [-b blkid | \"bonus\"]", 4628 "find dmu_buf_impl_t's that match specified criteria", dbufs }, 4629 { "abuf_find", "dva_word[0] dva_word[1]", 4630 "find arc_buf_hdr_t of a specified DVA", 4631 abuf_find }, 4632 { "logsm_stats", ":", "print log space map statistics of a spa_t", 4633 logsm_stats}, 4634 { "spa", "?[-cevmMh]\n" 4635 "\t-c display spa config\n" 4636 "\t-e display vdev statistics\n" 4637 "\t-v display vdev information\n" 4638 "\t-m display metaslab statistics\n" 4639 "\t-M display metaslab group statistics\n" 4640 "\t-h display histogram (requires -m or -M)\n", 4641 "spa_t summary", spa_print }, 4642 { "spa_config", ":", "print spa_t configuration", spa_print_config }, 4643 { "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space }, 4644 { "spa_vdevs", ":[-emMh]\n" 4645 "\t-e display vdev statistics\n" 4646 "\t-m dispaly metaslab statistics\n" 4647 "\t-M display metaslab group statistic\n" 4648 "\t-h display histogram (requires -m or -M)\n", 4649 "given a spa_t, print vdev summary", spa_vdevs }, 4650 { "sm_entries", "<buffer length in bytes>", 4651 "print out space map entries from a buffer decoded", 4652 sm_entries}, 4653 { "vdev", ":[-remMh]\n" 4654 "\t-r display recursively\n" 4655 "\t-e display statistics\n" 4656 "\t-m display metaslab statistics (top level vdev only)\n" 4657 "\t-M display metaslab group statistics (top level vdev only)\n" 4658 "\t-h display histogram (requires -m or -M)\n", 4659 "vdev_t summary", vdev_print }, 4660 { "zio", ":[-cpr]\n" 4661 "\t-c display children\n" 4662 "\t-p display parents\n" 4663 "\t-r display recursively", 4664 "zio_t summary", zio_print }, 4665 { "zio_state", "?", "print out all zio_t structures on system or " 4666 "for a particular pool", zio_state }, 4667 { "zfs_blkstats", ":[-v]", 4668 "given a spa_t, print block type stats from last scrub", 4669 zfs_blkstats }, 4670 { "zfs_params", "", "print zfs tunable parameters", zfs_params }, 4671 { "zfs_refcount", ":[-r]\n" 4672 "\t-r display recently removed references", 4673 "print zfs_refcount_t holders", zfs_refcount }, 4674 { "zap_leaf", "", "print zap_leaf_phys_t", zap_leaf }, 4675 { "zfs_aces", ":[-v]", "print all ACEs from a zfs_acl_t", 4676 zfs_acl_dump }, 4677 { "zfs_ace", ":[-v]", "print zfs_ace", zfs_ace_print }, 4678 { "zfs_ace0", ":[-v]", "print zfs_ace0", zfs_ace0_print }, 4679 { "sa_attr_table", ":", "print SA attribute table from sa_os_t", 4680 sa_attr_table}, 4681 { "sa_attr", ": attr_id", 4682 "print SA attribute address when given sa_handle_t", sa_attr_print}, 4683 { "zfs_dbgmsg", ":[-artTvw]", 4684 "print zfs debug log", dbgmsg, dbgmsg_help}, 4685 { "rrwlock", ":", 4686 "print rrwlock_t, including readers", rrwlock}, 4687 { "metaslab_weight", "weight", 4688 "print metaslab weight", metaslab_weight}, 4689 { "metaslab_trace", ":", 4690 "print metaslab allocation trace records", metaslab_trace}, 4691 { "arc_compression_stats", ":[-vabrf]\n" 4692 "\t-v verbose, display a linearly scaled histogram\n" 4693 "\t-a display ARC_anon state statistics individually\n" 4694 "\t-r display ARC_mru state statistics individually\n" 4695 "\t-f display ARC_mfu state statistics individually\n" 4696 "\t-b display histogram of buffer counts\n", 4697 "print a histogram of compressed arc buffer sizes", 4698 arc_compression_stats}, 4699 { "range_tree", ":", 4700 "print entries in range_tree_t", range_tree}, 4701 { NULL } 4702 }; 4703 4704 static const mdb_walker_t walkers[] = { 4705 { "txg_list", "given any txg_list_t *, walk all entries in all txgs", 4706 txg_list_walk_init, txg_list_walk_step, NULL }, 4707 { "txg_list0", "given any txg_list_t *, walk all entries in txg 0", 4708 txg_list0_walk_init, txg_list_walk_step, NULL }, 4709 { "txg_list1", "given any txg_list_t *, walk all entries in txg 1", 4710 txg_list1_walk_init, txg_list_walk_step, NULL }, 4711 { "txg_list2", "given any txg_list_t *, walk all entries in txg 2", 4712 txg_list2_walk_init, txg_list_walk_step, NULL }, 4713 { "txg_list3", "given any txg_list_t *, walk all entries in txg 3", 4714 txg_list3_walk_init, txg_list_walk_step, NULL }, 4715 { "zio", "walk all zio structures, optionally for a particular spa_t", 4716 zio_walk_init, zio_walk_step, NULL }, 4717 { "zio_root", 4718 "walk all root zio_t structures, optionally for a particular spa_t", 4719 zio_walk_init, zio_walk_root_step, NULL }, 4720 { "spa", "walk all spa_t entries in the namespace", 4721 spa_walk_init, spa_walk_step, NULL }, 4722 { "metaslab", "given a spa_t *, walk all metaslab_t structures", 4723 metaslab_walk_init, metaslab_walk_step, NULL }, 4724 { "multilist", "given a multilist_t *, walk all list_t structures", 4725 multilist_walk_init, multilist_walk_step, NULL }, 4726 { "zfs_acl_node", "given a zfs_acl_t, walk all zfs_acl_nodes", 4727 zfs_acl_node_walk_init, zfs_acl_node_walk_step, NULL }, 4728 { "zfs_acl_node_aces", "given a zfs_acl_node_t, walk all ACEs", 4729 zfs_acl_node_aces_walk_init, zfs_aces_walk_step, NULL }, 4730 { "zfs_acl_node_aces0", 4731 "given a zfs_acl_node_t, walk all ACEs as ace_t", 4732 zfs_acl_node_aces0_walk_init, zfs_aces_walk_step, NULL }, 4733 { "zfs_btree", "given a zfs_btree_t *, walk all entries", 4734 btree_walk_init, btree_walk_step, btree_walk_fini }, 4735 { NULL } 4736 }; 4737 4738 static const mdb_modinfo_t modinfo = { 4739 MDB_API_VERSION, dcmds, walkers 4740 }; 4741 4742 const mdb_modinfo_t * 4743 _mdb_init(void) 4744 { 4745 return (&modinfo); 4746 } 4747