1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 * Copyright (c) 2011, 2018 by Delphix. All rights reserved. 25 * Copyright (c) 2019 Joyent, Inc. 26 */ 27 28 /* Portions Copyright 2010 Robert Milkowski */ 29 30 /* 31 * ZFS_MDB lets dmu.h know that we don't have dmu_ot, and we will define our 32 * own macros to access the target's dmu_ot. Therefore it must be defined 33 * before including any ZFS headers. Note that we don't define 34 * DMU_OT_IS_ENCRYPTED_IMPL() or DMU_OT_BYTESWAP_IMPL(), therefore using them 35 * will result in a compilation error. If they are needed in the future, we 36 * can implement them similarly to mdb_dmu_ot_is_encrypted_impl(). 37 */ 38 #define ZFS_MDB 39 #define DMU_OT_IS_ENCRYPTED_IMPL(ot) mdb_dmu_ot_is_encrypted_impl(ot) 40 41 #include <mdb/mdb_ctf.h> 42 #include <sys/zfs_context.h> 43 #include <sys/mdb_modapi.h> 44 #include <sys/dbuf.h> 45 #include <sys/dmu_objset.h> 46 #include <sys/dsl_dir.h> 47 #include <sys/dsl_pool.h> 48 #include <sys/metaslab_impl.h> 49 #include <sys/space_map.h> 50 #include <sys/list.h> 51 #include <sys/vdev_impl.h> 52 #include <sys/zap_leaf.h> 53 #include <sys/zap_impl.h> 54 #include <ctype.h> 55 #include <sys/zfs_acl.h> 56 #include <sys/sa_impl.h> 57 #include <sys/multilist.h> 58 59 #ifdef _KERNEL 60 #define ZFS_OBJ_NAME "zfs" 61 extern int64_t mdb_gethrtime(void); 62 #else 63 #define ZFS_OBJ_NAME "libzpool.so.1" 64 #endif 65 66 #define ZFS_STRUCT "struct " ZFS_OBJ_NAME "`" 67 68 #ifndef _KERNEL 69 int aok; 70 #endif 71 72 enum spa_flags { 73 SPA_FLAG_CONFIG = 1 << 0, 74 SPA_FLAG_VDEVS = 1 << 1, 75 SPA_FLAG_ERRORS = 1 << 2, 76 SPA_FLAG_METASLAB_GROUPS = 1 << 3, 77 SPA_FLAG_METASLABS = 1 << 4, 78 SPA_FLAG_HISTOGRAMS = 1 << 5 79 }; 80 81 /* 82 * If any of these flags are set, call spa_vdevs in spa_print 83 */ 84 #define SPA_FLAG_ALL_VDEV \ 85 (SPA_FLAG_VDEVS | SPA_FLAG_ERRORS | SPA_FLAG_METASLAB_GROUPS | \ 86 SPA_FLAG_METASLABS) 87 88 static int 89 getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp, 90 const char *member, int len, void *buf) 91 { 92 mdb_ctf_id_t id; 93 ulong_t off; 94 char name[64]; 95 96 if (idp == NULL) { 97 if (mdb_ctf_lookup_by_name(type, &id) == -1) { 98 mdb_warn("couldn't find type %s", type); 99 return (DCMD_ERR); 100 } 101 idp = &id; 102 } else { 103 type = name; 104 mdb_ctf_type_name(*idp, name, sizeof (name)); 105 } 106 107 if (mdb_ctf_offsetof(*idp, member, &off) == -1) { 108 mdb_warn("couldn't find member %s of type %s\n", member, type); 109 return (DCMD_ERR); 110 } 111 if (off % 8 != 0) { 112 mdb_warn("member %s of type %s is unsupported bitfield", 113 member, type); 114 return (DCMD_ERR); 115 } 116 off /= 8; 117 118 if (mdb_vread(buf, len, addr + off) == -1) { 119 mdb_warn("failed to read %s from %s at %p", 120 member, type, addr + off); 121 return (DCMD_ERR); 122 } 123 /* mdb_warn("read %s from %s at %p+%llx\n", member, type, addr, off); */ 124 125 return (0); 126 } 127 128 #define GETMEMB(addr, structname, member, dest) \ 129 getmember(addr, ZFS_STRUCT structname, NULL, #member, \ 130 sizeof (dest), &(dest)) 131 132 #define GETMEMBID(addr, ctfid, member, dest) \ 133 getmember(addr, NULL, ctfid, #member, sizeof (dest), &(dest)) 134 135 static boolean_t 136 strisprint(const char *cp) 137 { 138 for (; *cp; cp++) { 139 if (!isprint(*cp)) 140 return (B_FALSE); 141 } 142 return (B_TRUE); 143 } 144 145 /* 146 * <addr>::sm_entries <buffer length in bytes> 147 * 148 * Treat the buffer specified by the given address as a buffer that contains 149 * space map entries. Iterate over the specified number of entries and print 150 * them in both encoded and decoded form. 151 */ 152 /* ARGSUSED */ 153 static int 154 sm_entries(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 155 { 156 uint64_t bufsz = 0; 157 boolean_t preview = B_FALSE; 158 159 if (!(flags & DCMD_ADDRSPEC)) 160 return (DCMD_USAGE); 161 162 if (argc < 1) { 163 preview = B_TRUE; 164 bufsz = 2; 165 } else if (argc != 1) { 166 return (DCMD_USAGE); 167 } else { 168 switch (argv[0].a_type) { 169 case MDB_TYPE_STRING: 170 bufsz = mdb_strtoull(argv[0].a_un.a_str); 171 break; 172 case MDB_TYPE_IMMEDIATE: 173 bufsz = argv[0].a_un.a_val; 174 break; 175 default: 176 return (DCMD_USAGE); 177 } 178 } 179 180 char *actions[] = { "ALLOC", "FREE", "INVALID" }; 181 for (uintptr_t bufend = addr + bufsz; addr < bufend; 182 addr += sizeof (uint64_t)) { 183 uint64_t nwords; 184 uint64_t start_addr = addr; 185 186 uint64_t word = 0; 187 if (mdb_vread(&word, sizeof (word), addr) == -1) { 188 mdb_warn("failed to read space map entry %p", addr); 189 return (DCMD_ERR); 190 } 191 192 if (SM_PREFIX_DECODE(word) == SM_DEBUG_PREFIX) { 193 (void) mdb_printf("\t [%6llu] %s: txg %llu, " 194 "pass %llu\n", 195 (u_longlong_t)(addr), 196 actions[SM_DEBUG_ACTION_DECODE(word)], 197 (u_longlong_t)SM_DEBUG_TXG_DECODE(word), 198 (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word)); 199 continue; 200 } 201 202 char entry_type; 203 uint64_t raw_offset, raw_run, vdev_id = SM_NO_VDEVID; 204 205 if (SM_PREFIX_DECODE(word) != SM2_PREFIX) { 206 entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ? 207 'A' : 'F'; 208 raw_offset = SM_OFFSET_DECODE(word); 209 raw_run = SM_RUN_DECODE(word); 210 nwords = 1; 211 } else { 212 ASSERT3U(SM_PREFIX_DECODE(word), ==, SM2_PREFIX); 213 214 raw_run = SM2_RUN_DECODE(word); 215 vdev_id = SM2_VDEV_DECODE(word); 216 217 /* it is a two-word entry so we read another word */ 218 addr += sizeof (uint64_t); 219 if (addr >= bufend) { 220 mdb_warn("buffer ends in the middle of a two " 221 "word entry\n", addr); 222 return (DCMD_ERR); 223 } 224 225 if (mdb_vread(&word, sizeof (word), addr) == -1) { 226 mdb_warn("failed to read space map entry %p", 227 addr); 228 return (DCMD_ERR); 229 } 230 231 entry_type = (SM2_TYPE_DECODE(word) == SM_ALLOC) ? 232 'A' : 'F'; 233 raw_offset = SM2_OFFSET_DECODE(word); 234 nwords = 2; 235 } 236 237 (void) mdb_printf("\t [%6llx] %c range:" 238 " %010llx-%010llx size: %06llx vdev: %06llu words: %llu\n", 239 (u_longlong_t)start_addr, 240 entry_type, (u_longlong_t)raw_offset, 241 (u_longlong_t)(raw_offset + raw_run), 242 (u_longlong_t)raw_run, 243 (u_longlong_t)vdev_id, (u_longlong_t)nwords); 244 245 if (preview) 246 break; 247 } 248 return (DCMD_OK); 249 } 250 251 static int 252 mdb_dsl_dir_name(uintptr_t addr, char *buf) 253 { 254 static int gotid; 255 static mdb_ctf_id_t dd_id; 256 uintptr_t dd_parent; 257 char dd_myname[ZFS_MAX_DATASET_NAME_LEN]; 258 259 if (!gotid) { 260 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dir", 261 &dd_id) == -1) { 262 mdb_warn("couldn't find struct dsl_dir"); 263 return (DCMD_ERR); 264 } 265 gotid = TRUE; 266 } 267 if (GETMEMBID(addr, &dd_id, dd_parent, dd_parent) || 268 GETMEMBID(addr, &dd_id, dd_myname, dd_myname)) { 269 return (DCMD_ERR); 270 } 271 272 if (dd_parent) { 273 if (mdb_dsl_dir_name(dd_parent, buf)) 274 return (DCMD_ERR); 275 strcat(buf, "/"); 276 } 277 278 if (dd_myname[0]) 279 strcat(buf, dd_myname); 280 else 281 strcat(buf, "???"); 282 283 return (0); 284 } 285 286 static int 287 objset_name(uintptr_t addr, char *buf) 288 { 289 static int gotid; 290 static mdb_ctf_id_t os_id, ds_id; 291 uintptr_t os_dsl_dataset; 292 char ds_snapname[ZFS_MAX_DATASET_NAME_LEN]; 293 uintptr_t ds_dir; 294 295 buf[0] = '\0'; 296 297 if (!gotid) { 298 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "objset", 299 &os_id) == -1) { 300 mdb_warn("couldn't find struct objset"); 301 return (DCMD_ERR); 302 } 303 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dsl_dataset", 304 &ds_id) == -1) { 305 mdb_warn("couldn't find struct dsl_dataset"); 306 return (DCMD_ERR); 307 } 308 309 gotid = TRUE; 310 } 311 312 if (GETMEMBID(addr, &os_id, os_dsl_dataset, os_dsl_dataset)) 313 return (DCMD_ERR); 314 315 if (os_dsl_dataset == 0) { 316 strcat(buf, "mos"); 317 return (0); 318 } 319 320 if (GETMEMBID(os_dsl_dataset, &ds_id, ds_snapname, ds_snapname) || 321 GETMEMBID(os_dsl_dataset, &ds_id, ds_dir, ds_dir)) { 322 return (DCMD_ERR); 323 } 324 325 if (ds_dir && mdb_dsl_dir_name(ds_dir, buf)) 326 return (DCMD_ERR); 327 328 if (ds_snapname[0]) { 329 strcat(buf, "@"); 330 strcat(buf, ds_snapname); 331 } 332 return (0); 333 } 334 335 static int 336 enum_lookup(char *type, int val, const char *prefix, size_t size, char *out) 337 { 338 const char *cp; 339 size_t len = strlen(prefix); 340 mdb_ctf_id_t enum_type; 341 342 if (mdb_ctf_lookup_by_name(type, &enum_type) != 0) { 343 mdb_warn("Could not find enum for %s", type); 344 return (-1); 345 } 346 347 if ((cp = mdb_ctf_enum_name(enum_type, val)) != NULL) { 348 if (strncmp(cp, prefix, len) == 0) 349 cp += len; 350 (void) strncpy(out, cp, size); 351 } else { 352 mdb_snprintf(out, size, "? (%d)", val); 353 } 354 return (0); 355 } 356 357 /* ARGSUSED */ 358 static int 359 zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 360 { 361 /* 362 * This table can be approximately generated by running: 363 * egrep "^[a-z0-9_]+ [a-z0-9_]+( =.*)?;" *.c | cut -d ' ' -f 2 364 */ 365 static const char *params[] = { 366 "arc_lotsfree_percent", 367 "arc_pages_pp_reserve", 368 "arc_reduce_dnlc_percent", 369 "arc_swapfs_reserve", 370 "arc_zio_arena_free_shift", 371 "dbuf_cache_hiwater_pct", 372 "dbuf_cache_lowater_pct", 373 "dbuf_cache_max_bytes", 374 "dbuf_cache_max_shift", 375 "ddt_zap_indirect_blockshift", 376 "ddt_zap_leaf_blockshift", 377 "ditto_same_vdev_distance_shift", 378 "dmu_find_threads", 379 "dmu_rescan_dnode_threshold", 380 "dsl_scan_delay_completion", 381 "fzap_default_block_shift", 382 "l2arc_feed_again", 383 "l2arc_feed_min_ms", 384 "l2arc_feed_secs", 385 "l2arc_headroom", 386 "l2arc_headroom_boost", 387 "l2arc_noprefetch", 388 "l2arc_norw", 389 "l2arc_write_boost", 390 "l2arc_write_max", 391 "metaslab_aliquot", 392 "metaslab_bias_enabled", 393 "metaslab_debug_load", 394 "metaslab_debug_unload", 395 "metaslab_df_alloc_threshold", 396 "metaslab_df_free_pct", 397 "metaslab_fragmentation_factor_enabled", 398 "metaslab_force_ganging", 399 "metaslab_lba_weighting_enabled", 400 "metaslab_load_pct", 401 "metaslab_min_alloc_size", 402 "metaslab_ndf_clump_shift", 403 "metaslab_preload_enabled", 404 "metaslab_preload_limit", 405 "metaslab_trace_enabled", 406 "metaslab_trace_max_entries", 407 "metaslab_unload_delay", 408 "metaslabs_per_vdev", 409 "reference_history", 410 "reference_tracking_enable", 411 "send_holes_without_birth_time", 412 "spa_asize_inflation", 413 "spa_load_verify_data", 414 "spa_load_verify_maxinflight", 415 "spa_load_verify_metadata", 416 "spa_max_replication_override", 417 "spa_min_slop", 418 "spa_mode_global", 419 "spa_slop_shift", 420 "space_map_blksz", 421 "vdev_mirror_shift", 422 "zfetch_max_distance", 423 "zfs_abd_chunk_size", 424 "zfs_abd_scatter_enabled", 425 "zfs_arc_average_blocksize", 426 "zfs_arc_evict_batch_limit", 427 "zfs_arc_grow_retry", 428 "zfs_arc_max", 429 "zfs_arc_meta_limit", 430 "zfs_arc_meta_min", 431 "zfs_arc_min", 432 "zfs_arc_p_min_shift", 433 "zfs_arc_shrink_shift", 434 "zfs_async_block_max_blocks", 435 "zfs_ccw_retry_interval", 436 "zfs_commit_timeout_pct", 437 "zfs_compressed_arc_enabled", 438 "zfs_condense_indirect_commit_entry_delay_ticks", 439 "zfs_condense_indirect_vdevs_enable", 440 "zfs_condense_max_obsolete_bytes", 441 "zfs_condense_min_mapping_bytes", 442 "zfs_condense_pct", 443 "zfs_dbgmsg_maxsize", 444 "zfs_deadman_checktime_ms", 445 "zfs_deadman_enabled", 446 "zfs_deadman_synctime_ms", 447 "zfs_dedup_prefetch", 448 "zfs_default_bs", 449 "zfs_default_ibs", 450 "zfs_delay_max_ns", 451 "zfs_delay_min_dirty_percent", 452 "zfs_delay_resolution_ns", 453 "zfs_delay_scale", 454 "zfs_dirty_data_max", 455 "zfs_dirty_data_max_max", 456 "zfs_dirty_data_max_percent", 457 "zfs_dirty_data_sync", 458 "zfs_flags", 459 "zfs_free_bpobj_enabled", 460 "zfs_free_leak_on_eio", 461 "zfs_free_min_time_ms", 462 "zfs_fsync_sync_cnt", 463 "zfs_immediate_write_sz", 464 "zfs_indirect_condense_obsolete_pct", 465 "zfs_lua_check_instrlimit_interval", 466 "zfs_lua_max_instrlimit", 467 "zfs_lua_max_memlimit", 468 "zfs_max_recordsize", 469 "zfs_mdcomp_disable", 470 "zfs_metaslab_condense_block_threshold", 471 "zfs_metaslab_fragmentation_threshold", 472 "zfs_metaslab_segment_weight_enabled", 473 "zfs_metaslab_switch_threshold", 474 "zfs_mg_fragmentation_threshold", 475 "zfs_mg_noalloc_threshold", 476 "zfs_multilist_num_sublists", 477 "zfs_no_scrub_io", 478 "zfs_no_scrub_prefetch", 479 "zfs_nocacheflush", 480 "zfs_nopwrite_enabled", 481 "zfs_object_remap_one_indirect_delay_ticks", 482 "zfs_obsolete_min_time_ms", 483 "zfs_pd_bytes_max", 484 "zfs_per_txg_dirty_frees_percent", 485 "zfs_prefetch_disable", 486 "zfs_read_chunk_size", 487 "zfs_recover", 488 "zfs_recv_queue_length", 489 "zfs_redundant_metadata_most_ditto_level", 490 "zfs_remap_blkptr_enable", 491 "zfs_remove_max_copy_bytes", 492 "zfs_remove_max_segment", 493 "zfs_resilver_delay", 494 "zfs_resilver_min_time_ms", 495 "zfs_scan_idle", 496 "zfs_scan_min_time_ms", 497 "zfs_scrub_delay", 498 "zfs_scrub_limit", 499 "zfs_send_corrupt_data", 500 "zfs_send_queue_length", 501 "zfs_send_set_freerecords_bit", 502 "zfs_sync_pass_deferred_free", 503 "zfs_sync_pass_dont_compress", 504 "zfs_sync_pass_rewrite", 505 "zfs_sync_taskq_batch_pct", 506 "zfs_top_maxinflight", 507 "zfs_txg_timeout", 508 "zfs_vdev_aggregation_limit", 509 "zfs_vdev_async_read_max_active", 510 "zfs_vdev_async_read_min_active", 511 "zfs_vdev_async_write_active_max_dirty_percent", 512 "zfs_vdev_async_write_active_min_dirty_percent", 513 "zfs_vdev_async_write_max_active", 514 "zfs_vdev_async_write_min_active", 515 "zfs_vdev_cache_bshift", 516 "zfs_vdev_cache_max", 517 "zfs_vdev_cache_size", 518 "zfs_vdev_max_active", 519 "zfs_vdev_queue_depth_pct", 520 "zfs_vdev_read_gap_limit", 521 "zfs_vdev_removal_max_active", 522 "zfs_vdev_removal_min_active", 523 "zfs_vdev_scrub_max_active", 524 "zfs_vdev_scrub_min_active", 525 "zfs_vdev_sync_read_max_active", 526 "zfs_vdev_sync_read_min_active", 527 "zfs_vdev_sync_write_max_active", 528 "zfs_vdev_sync_write_min_active", 529 "zfs_vdev_write_gap_limit", 530 "zfs_write_implies_delete_child", 531 "zfs_zil_clean_taskq_maxalloc", 532 "zfs_zil_clean_taskq_minalloc", 533 "zfs_zil_clean_taskq_nthr_pct", 534 "zil_replay_disable", 535 "zil_slog_bulk", 536 "zio_buf_debug_limit", 537 "zio_dva_throttle_enabled", 538 "zio_injection_enabled", 539 "zvol_immediate_write_sz", 540 "zvol_maxphys", 541 "zvol_unmap_enabled", 542 "zvol_unmap_sync_enabled", 543 "zfs_max_dataset_nesting", 544 }; 545 546 for (int i = 0; i < sizeof (params) / sizeof (params[0]); i++) { 547 int sz; 548 uint64_t val64; 549 uint32_t *val32p = (uint32_t *)&val64; 550 551 sz = mdb_readvar(&val64, params[i]); 552 if (sz == 4) { 553 mdb_printf("%s = 0x%x\n", params[i], *val32p); 554 } else if (sz == 8) { 555 mdb_printf("%s = 0x%llx\n", params[i], val64); 556 } else { 557 mdb_warn("variable %s not found", params[i]); 558 } 559 } 560 561 return (DCMD_OK); 562 } 563 564 /* ARGSUSED */ 565 static int 566 dva(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 567 { 568 dva_t dva; 569 if (mdb_vread(&dva, sizeof (dva_t), addr) == -1) { 570 mdb_warn("failed to read dva_t"); 571 return (DCMD_ERR); 572 } 573 mdb_printf("<%llu:%llx:%llx>\n", 574 (u_longlong_t)DVA_GET_VDEV(&dva), 575 (u_longlong_t)DVA_GET_OFFSET(&dva), 576 (u_longlong_t)DVA_GET_ASIZE(&dva)); 577 578 return (DCMD_OK); 579 } 580 581 typedef struct mdb_dmu_object_type_info { 582 boolean_t ot_encrypt; 583 } mdb_dmu_object_type_info_t; 584 585 static boolean_t 586 mdb_dmu_ot_is_encrypted_impl(dmu_object_type_t ot) 587 { 588 mdb_dmu_object_type_info_t mdoti; 589 GElf_Sym sym; 590 size_t sz = mdb_ctf_sizeof_by_name("dmu_object_type_info_t"); 591 592 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "dmu_ot", &sym)) { 593 mdb_warn("failed to find " ZFS_OBJ_NAME "`dmu_ot"); 594 return (B_FALSE); 595 } 596 597 if (mdb_ctf_vread(&mdoti, "dmu_object_type_info_t", 598 "mdb_dmu_object_type_info_t", sym.st_value + sz * ot, 0) != 0) { 599 return (B_FALSE); 600 } 601 602 return (mdoti.ot_encrypt); 603 } 604 605 /* ARGSUSED */ 606 static int 607 blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 608 { 609 char type[80], checksum[80], compress[80]; 610 blkptr_t blk, *bp = &blk; 611 char buf[BP_SPRINTF_LEN]; 612 613 if (mdb_vread(&blk, sizeof (blkptr_t), addr) == -1) { 614 mdb_warn("failed to read blkptr_t"); 615 return (DCMD_ERR); 616 } 617 618 if (enum_lookup("enum dmu_object_type", BP_GET_TYPE(bp), "DMU_OT_", 619 sizeof (type), type) == -1 || 620 enum_lookup("enum zio_checksum", BP_GET_CHECKSUM(bp), 621 "ZIO_CHECKSUM_", sizeof (checksum), checksum) == -1 || 622 enum_lookup("enum zio_compress", BP_GET_COMPRESS(bp), 623 "ZIO_COMPRESS_", sizeof (compress), compress) == -1) { 624 mdb_warn("Could not find blkptr enumerated types"); 625 return (DCMD_ERR); 626 } 627 628 SNPRINTF_BLKPTR(mdb_snprintf, '\n', buf, sizeof (buf), bp, type, 629 checksum, compress); 630 631 mdb_printf("%s\n", buf); 632 633 return (DCMD_OK); 634 } 635 636 typedef struct mdb_dmu_buf_impl { 637 struct { 638 uint64_t db_object; 639 uintptr_t db_data; 640 } db; 641 uintptr_t db_objset; 642 uint64_t db_level; 643 uint64_t db_blkid; 644 struct { 645 uint64_t rc_count; 646 } db_holds; 647 } mdb_dmu_buf_impl_t; 648 649 /* ARGSUSED */ 650 static int 651 dbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 652 { 653 mdb_dmu_buf_impl_t db; 654 char objectname[32]; 655 char blkidname[32]; 656 char path[ZFS_MAX_DATASET_NAME_LEN]; 657 int ptr_width = (int)(sizeof (void *)) * 2; 658 659 if (DCMD_HDRSPEC(flags)) 660 mdb_printf("%*s %8s %3s %9s %5s %s\n", 661 ptr_width, "addr", "object", "lvl", "blkid", "holds", "os"); 662 663 if (mdb_ctf_vread(&db, ZFS_STRUCT "dmu_buf_impl", "mdb_dmu_buf_impl_t", 664 addr, 0) == -1) 665 return (DCMD_ERR); 666 667 if (db.db.db_object == DMU_META_DNODE_OBJECT) 668 (void) strcpy(objectname, "mdn"); 669 else 670 (void) mdb_snprintf(objectname, sizeof (objectname), "%llx", 671 (u_longlong_t)db.db.db_object); 672 673 if (db.db_blkid == DMU_BONUS_BLKID) 674 (void) strcpy(blkidname, "bonus"); 675 else 676 (void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx", 677 (u_longlong_t)db.db_blkid); 678 679 if (objset_name(db.db_objset, path)) { 680 return (DCMD_ERR); 681 } 682 683 mdb_printf("%*p %8s %3u %9s %5llu %s\n", ptr_width, addr, 684 objectname, (int)db.db_level, blkidname, 685 db.db_holds.rc_count, path); 686 687 return (DCMD_OK); 688 } 689 690 /* ARGSUSED */ 691 static int 692 dbuf_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 693 { 694 #define HISTOSZ 32 695 uintptr_t dbp; 696 dmu_buf_impl_t db; 697 dbuf_hash_table_t ht; 698 uint64_t bucket, ndbufs; 699 uint64_t histo[HISTOSZ]; 700 uint64_t histo2[HISTOSZ]; 701 int i, maxidx; 702 703 if (mdb_readvar(&ht, "dbuf_hash_table") == -1) { 704 mdb_warn("failed to read 'dbuf_hash_table'"); 705 return (DCMD_ERR); 706 } 707 708 for (i = 0; i < HISTOSZ; i++) { 709 histo[i] = 0; 710 histo2[i] = 0; 711 } 712 713 ndbufs = 0; 714 for (bucket = 0; bucket < ht.hash_table_mask+1; bucket++) { 715 int len; 716 717 if (mdb_vread(&dbp, sizeof (void *), 718 (uintptr_t)(ht.hash_table+bucket)) == -1) { 719 mdb_warn("failed to read hash bucket %u at %p", 720 bucket, ht.hash_table+bucket); 721 return (DCMD_ERR); 722 } 723 724 len = 0; 725 while (dbp != 0) { 726 if (mdb_vread(&db, sizeof (dmu_buf_impl_t), 727 dbp) == -1) { 728 mdb_warn("failed to read dbuf at %p", dbp); 729 return (DCMD_ERR); 730 } 731 dbp = (uintptr_t)db.db_hash_next; 732 for (i = MIN(len, HISTOSZ - 1); i >= 0; i--) 733 histo2[i]++; 734 len++; 735 ndbufs++; 736 } 737 738 if (len >= HISTOSZ) 739 len = HISTOSZ-1; 740 histo[len]++; 741 } 742 743 mdb_printf("hash table has %llu buckets, %llu dbufs " 744 "(avg %llu buckets/dbuf)\n", 745 ht.hash_table_mask+1, ndbufs, 746 (ht.hash_table_mask+1)/ndbufs); 747 748 mdb_printf("\n"); 749 maxidx = 0; 750 for (i = 0; i < HISTOSZ; i++) 751 if (histo[i] > 0) 752 maxidx = i; 753 mdb_printf("hash chain length number of buckets\n"); 754 for (i = 0; i <= maxidx; i++) 755 mdb_printf("%u %llu\n", i, histo[i]); 756 757 mdb_printf("\n"); 758 maxidx = 0; 759 for (i = 0; i < HISTOSZ; i++) 760 if (histo2[i] > 0) 761 maxidx = i; 762 mdb_printf("hash chain depth number of dbufs\n"); 763 for (i = 0; i <= maxidx; i++) 764 mdb_printf("%u or more %llu %llu%%\n", 765 i, histo2[i], histo2[i]*100/ndbufs); 766 767 768 return (DCMD_OK); 769 } 770 771 #define CHAIN_END 0xffff 772 /* 773 * ::zap_leaf [-v] 774 * 775 * Print a zap_leaf_phys_t, assumed to be 16k 776 */ 777 /* ARGSUSED */ 778 static int 779 zap_leaf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 780 { 781 char buf[16*1024]; 782 int verbose = B_FALSE; 783 int four = B_FALSE; 784 dmu_buf_t l_dbuf; 785 zap_leaf_t l; 786 zap_leaf_phys_t *zlp = (void *)buf; 787 int i; 788 789 if (mdb_getopts(argc, argv, 790 'v', MDB_OPT_SETBITS, TRUE, &verbose, 791 '4', MDB_OPT_SETBITS, TRUE, &four, 792 NULL) != argc) 793 return (DCMD_USAGE); 794 795 l_dbuf.db_data = zlp; 796 l.l_dbuf = &l_dbuf; 797 l.l_bs = 14; /* assume 16k blocks */ 798 if (four) 799 l.l_bs = 12; 800 801 if (!(flags & DCMD_ADDRSPEC)) { 802 return (DCMD_USAGE); 803 } 804 805 if (mdb_vread(buf, sizeof (buf), addr) == -1) { 806 mdb_warn("failed to read zap_leaf_phys_t at %p", addr); 807 return (DCMD_ERR); 808 } 809 810 if (zlp->l_hdr.lh_block_type != ZBT_LEAF || 811 zlp->l_hdr.lh_magic != ZAP_LEAF_MAGIC) { 812 mdb_warn("This does not appear to be a zap_leaf_phys_t"); 813 return (DCMD_ERR); 814 } 815 816 mdb_printf("zap_leaf_phys_t at %p:\n", addr); 817 mdb_printf(" lh_prefix_len = %u\n", zlp->l_hdr.lh_prefix_len); 818 mdb_printf(" lh_prefix = %llx\n", zlp->l_hdr.lh_prefix); 819 mdb_printf(" lh_nentries = %u\n", zlp->l_hdr.lh_nentries); 820 mdb_printf(" lh_nfree = %u\n", zlp->l_hdr.lh_nfree, 821 zlp->l_hdr.lh_nfree * 100 / (ZAP_LEAF_NUMCHUNKS(&l))); 822 mdb_printf(" lh_freelist = %u\n", zlp->l_hdr.lh_freelist); 823 mdb_printf(" lh_flags = %x (%s)\n", zlp->l_hdr.lh_flags, 824 zlp->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED ? 825 "ENTRIES_CDSORTED" : ""); 826 827 if (verbose) { 828 mdb_printf(" hash table:\n"); 829 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++) { 830 if (zlp->l_hash[i] != CHAIN_END) 831 mdb_printf(" %u: %u\n", i, zlp->l_hash[i]); 832 } 833 } 834 835 mdb_printf(" chunks:\n"); 836 for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) { 837 /* LINTED: alignment */ 838 zap_leaf_chunk_t *zlc = &ZAP_LEAF_CHUNK(&l, i); 839 switch (zlc->l_entry.le_type) { 840 case ZAP_CHUNK_FREE: 841 if (verbose) { 842 mdb_printf(" %u: free; lf_next = %u\n", 843 i, zlc->l_free.lf_next); 844 } 845 break; 846 case ZAP_CHUNK_ENTRY: 847 mdb_printf(" %u: entry\n", i); 848 if (verbose) { 849 mdb_printf(" le_next = %u\n", 850 zlc->l_entry.le_next); 851 } 852 mdb_printf(" le_name_chunk = %u\n", 853 zlc->l_entry.le_name_chunk); 854 mdb_printf(" le_name_numints = %u\n", 855 zlc->l_entry.le_name_numints); 856 mdb_printf(" le_value_chunk = %u\n", 857 zlc->l_entry.le_value_chunk); 858 mdb_printf(" le_value_intlen = %u\n", 859 zlc->l_entry.le_value_intlen); 860 mdb_printf(" le_value_numints = %u\n", 861 zlc->l_entry.le_value_numints); 862 mdb_printf(" le_cd = %u\n", 863 zlc->l_entry.le_cd); 864 mdb_printf(" le_hash = %llx\n", 865 zlc->l_entry.le_hash); 866 break; 867 case ZAP_CHUNK_ARRAY: 868 mdb_printf(" %u: array", i); 869 if (strisprint((char *)zlc->l_array.la_array)) 870 mdb_printf(" \"%s\"", zlc->l_array.la_array); 871 mdb_printf("\n"); 872 if (verbose) { 873 int j; 874 mdb_printf(" "); 875 for (j = 0; j < ZAP_LEAF_ARRAY_BYTES; j++) { 876 mdb_printf("%02x ", 877 zlc->l_array.la_array[j]); 878 } 879 mdb_printf("\n"); 880 } 881 if (zlc->l_array.la_next != CHAIN_END) { 882 mdb_printf(" lf_next = %u\n", 883 zlc->l_array.la_next); 884 } 885 break; 886 default: 887 mdb_printf(" %u: undefined type %u\n", 888 zlc->l_entry.le_type); 889 } 890 } 891 892 return (DCMD_OK); 893 } 894 895 typedef struct dbufs_data { 896 mdb_ctf_id_t id; 897 uint64_t objset; 898 uint64_t object; 899 uint64_t level; 900 uint64_t blkid; 901 char *osname; 902 } dbufs_data_t; 903 904 #define DBUFS_UNSET (0xbaddcafedeadbeefULL) 905 906 /* ARGSUSED */ 907 static int 908 dbufs_cb(uintptr_t addr, const void *unknown, void *arg) 909 { 910 dbufs_data_t *data = arg; 911 uintptr_t objset; 912 dmu_buf_t db; 913 uint8_t level; 914 uint64_t blkid; 915 char osname[ZFS_MAX_DATASET_NAME_LEN]; 916 917 if (GETMEMBID(addr, &data->id, db_objset, objset) || 918 GETMEMBID(addr, &data->id, db, db) || 919 GETMEMBID(addr, &data->id, db_level, level) || 920 GETMEMBID(addr, &data->id, db_blkid, blkid)) { 921 return (WALK_ERR); 922 } 923 924 if ((data->objset == DBUFS_UNSET || data->objset == objset) && 925 (data->osname == NULL || (objset_name(objset, osname) == 0 && 926 strcmp(data->osname, osname) == 0)) && 927 (data->object == DBUFS_UNSET || data->object == db.db_object) && 928 (data->level == DBUFS_UNSET || data->level == level) && 929 (data->blkid == DBUFS_UNSET || data->blkid == blkid)) { 930 mdb_printf("%#lr\n", addr); 931 } 932 return (WALK_NEXT); 933 } 934 935 /* ARGSUSED */ 936 static int 937 dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 938 { 939 dbufs_data_t data; 940 char *object = NULL; 941 char *blkid = NULL; 942 943 data.objset = data.object = data.level = data.blkid = DBUFS_UNSET; 944 data.osname = NULL; 945 946 if (mdb_getopts(argc, argv, 947 'O', MDB_OPT_UINT64, &data.objset, 948 'n', MDB_OPT_STR, &data.osname, 949 'o', MDB_OPT_STR, &object, 950 'l', MDB_OPT_UINT64, &data.level, 951 'b', MDB_OPT_STR, &blkid) != argc) { 952 return (DCMD_USAGE); 953 } 954 955 if (object) { 956 if (strcmp(object, "mdn") == 0) { 957 data.object = DMU_META_DNODE_OBJECT; 958 } else { 959 data.object = mdb_strtoull(object); 960 } 961 } 962 963 if (blkid) { 964 if (strcmp(blkid, "bonus") == 0) { 965 data.blkid = DMU_BONUS_BLKID; 966 } else { 967 data.blkid = mdb_strtoull(blkid); 968 } 969 } 970 971 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "dmu_buf_impl", &data.id) == -1) { 972 mdb_warn("couldn't find struct dmu_buf_impl_t"); 973 return (DCMD_ERR); 974 } 975 976 if (mdb_walk("dmu_buf_impl_t", dbufs_cb, &data) != 0) { 977 mdb_warn("can't walk dbufs"); 978 return (DCMD_ERR); 979 } 980 981 return (DCMD_OK); 982 } 983 984 typedef struct abuf_find_data { 985 dva_t dva; 986 mdb_ctf_id_t id; 987 } abuf_find_data_t; 988 989 /* ARGSUSED */ 990 static int 991 abuf_find_cb(uintptr_t addr, const void *unknown, void *arg) 992 { 993 abuf_find_data_t *data = arg; 994 dva_t dva; 995 996 if (GETMEMBID(addr, &data->id, b_dva, dva)) { 997 return (WALK_ERR); 998 } 999 1000 if (dva.dva_word[0] == data->dva.dva_word[0] && 1001 dva.dva_word[1] == data->dva.dva_word[1]) { 1002 mdb_printf("%#lr\n", addr); 1003 } 1004 return (WALK_NEXT); 1005 } 1006 1007 /* ARGSUSED */ 1008 static int 1009 abuf_find(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1010 { 1011 abuf_find_data_t data; 1012 GElf_Sym sym; 1013 int i; 1014 const char *syms[] = { 1015 "ARC_mru", 1016 "ARC_mru_ghost", 1017 "ARC_mfu", 1018 "ARC_mfu_ghost", 1019 }; 1020 1021 if (argc != 2) 1022 return (DCMD_USAGE); 1023 1024 for (i = 0; i < 2; i ++) { 1025 switch (argv[i].a_type) { 1026 case MDB_TYPE_STRING: 1027 data.dva.dva_word[i] = mdb_strtoull(argv[i].a_un.a_str); 1028 break; 1029 case MDB_TYPE_IMMEDIATE: 1030 data.dva.dva_word[i] = argv[i].a_un.a_val; 1031 break; 1032 default: 1033 return (DCMD_USAGE); 1034 } 1035 } 1036 1037 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "arc_buf_hdr", &data.id) == -1) { 1038 mdb_warn("couldn't find struct arc_buf_hdr"); 1039 return (DCMD_ERR); 1040 } 1041 1042 for (i = 0; i < sizeof (syms) / sizeof (syms[0]); i++) { 1043 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, syms[i], &sym)) { 1044 mdb_warn("can't find symbol %s", syms[i]); 1045 return (DCMD_ERR); 1046 } 1047 1048 if (mdb_pwalk("list", abuf_find_cb, &data, sym.st_value) != 0) { 1049 mdb_warn("can't walk %s", syms[i]); 1050 return (DCMD_ERR); 1051 } 1052 } 1053 1054 return (DCMD_OK); 1055 } 1056 1057 1058 typedef struct dbgmsg_arg { 1059 boolean_t da_verbose; 1060 boolean_t da_address; 1061 } dbgmsg_arg_t; 1062 1063 /* ARGSUSED */ 1064 static int 1065 dbgmsg_cb(uintptr_t addr, const void *unknown, void *arg) 1066 { 1067 static mdb_ctf_id_t id; 1068 static boolean_t gotid; 1069 static ulong_t off; 1070 1071 dbgmsg_arg_t *da = arg; 1072 time_t timestamp; 1073 char buf[1024]; 1074 1075 if (!gotid) { 1076 if (mdb_ctf_lookup_by_name(ZFS_STRUCT "zfs_dbgmsg", &id) == 1077 -1) { 1078 mdb_warn("couldn't find struct zfs_dbgmsg"); 1079 return (WALK_ERR); 1080 } 1081 gotid = TRUE; 1082 if (mdb_ctf_offsetof(id, "zdm_msg", &off) == -1) { 1083 mdb_warn("couldn't find zdm_msg"); 1084 return (WALK_ERR); 1085 } 1086 off /= 8; 1087 } 1088 1089 1090 if (GETMEMBID(addr, &id, zdm_timestamp, timestamp)) { 1091 return (WALK_ERR); 1092 } 1093 1094 if (mdb_readstr(buf, sizeof (buf), addr + off) == -1) { 1095 mdb_warn("failed to read zdm_msg at %p\n", addr + off); 1096 return (DCMD_ERR); 1097 } 1098 1099 if (da->da_address) 1100 mdb_printf("%p ", addr); 1101 if (da->da_verbose) 1102 mdb_printf("%Y ", timestamp); 1103 1104 mdb_printf("%s\n", buf); 1105 1106 if (da->da_verbose) 1107 (void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL); 1108 1109 return (WALK_NEXT); 1110 } 1111 1112 /* ARGSUSED */ 1113 static int 1114 dbgmsg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1115 { 1116 GElf_Sym sym; 1117 dbgmsg_arg_t da = { 0 }; 1118 1119 if (mdb_getopts(argc, argv, 1120 'v', MDB_OPT_SETBITS, B_TRUE, &da.da_verbose, 1121 'a', MDB_OPT_SETBITS, B_TRUE, &da.da_address, 1122 NULL) != argc) 1123 return (DCMD_USAGE); 1124 1125 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "zfs_dbgmsgs", &sym)) { 1126 mdb_warn("can't find zfs_dbgmsgs"); 1127 return (DCMD_ERR); 1128 } 1129 1130 if (mdb_pwalk("list", dbgmsg_cb, &da, sym.st_value) != 0) { 1131 mdb_warn("can't walk zfs_dbgmsgs"); 1132 return (DCMD_ERR); 1133 } 1134 1135 return (DCMD_OK); 1136 } 1137 1138 /*ARGSUSED*/ 1139 static int 1140 arc_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1141 { 1142 kstat_named_t *stats; 1143 GElf_Sym sym; 1144 int nstats, i; 1145 uint_t opt_a = FALSE; 1146 uint_t opt_b = FALSE; 1147 uint_t shift = 0; 1148 const char *suffix; 1149 1150 static const char *bytestats[] = { 1151 "p", "c", "c_min", "c_max", "size", "duplicate_buffers_size", 1152 "arc_meta_used", "arc_meta_limit", "arc_meta_max", 1153 "arc_meta_min", "hdr_size", "data_size", "metadata_size", 1154 "other_size", "anon_size", "anon_evictable_data", 1155 "anon_evictable_metadata", "mru_size", "mru_evictable_data", 1156 "mru_evictable_metadata", "mru_ghost_size", 1157 "mru_ghost_evictable_data", "mru_ghost_evictable_metadata", 1158 "mfu_size", "mfu_evictable_data", "mfu_evictable_metadata", 1159 "mfu_ghost_size", "mfu_ghost_evictable_data", 1160 "mfu_ghost_evictable_metadata", "evict_l2_cached", 1161 "evict_l2_eligible", "evict_l2_ineligible", "l2_read_bytes", 1162 "l2_write_bytes", "l2_size", "l2_asize", "l2_hdr_size", 1163 "compressed_size", "uncompressed_size", "overhead_size", 1164 NULL 1165 }; 1166 1167 static const char *extras[] = { 1168 "arc_no_grow", "arc_tempreserve", 1169 NULL 1170 }; 1171 1172 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "arc_stats", &sym) == -1) { 1173 mdb_warn("failed to find 'arc_stats'"); 1174 return (DCMD_ERR); 1175 } 1176 1177 stats = mdb_zalloc(sym.st_size, UM_SLEEP | UM_GC); 1178 1179 if (mdb_vread(stats, sym.st_size, sym.st_value) == -1) { 1180 mdb_warn("couldn't read 'arc_stats' at %p", sym.st_value); 1181 return (DCMD_ERR); 1182 } 1183 1184 nstats = sym.st_size / sizeof (kstat_named_t); 1185 1186 /* NB: -a / opt_a are ignored for backwards compatability */ 1187 if (mdb_getopts(argc, argv, 1188 'a', MDB_OPT_SETBITS, TRUE, &opt_a, 1189 'b', MDB_OPT_SETBITS, TRUE, &opt_b, 1190 'k', MDB_OPT_SETBITS, 10, &shift, 1191 'm', MDB_OPT_SETBITS, 20, &shift, 1192 'g', MDB_OPT_SETBITS, 30, &shift, 1193 NULL) != argc) 1194 return (DCMD_USAGE); 1195 1196 if (!opt_b && !shift) 1197 shift = 20; 1198 1199 switch (shift) { 1200 case 0: 1201 suffix = "B"; 1202 break; 1203 case 10: 1204 suffix = "KB"; 1205 break; 1206 case 20: 1207 suffix = "MB"; 1208 break; 1209 case 30: 1210 suffix = "GB"; 1211 break; 1212 default: 1213 suffix = "XX"; 1214 } 1215 1216 for (i = 0; i < nstats; i++) { 1217 int j; 1218 boolean_t bytes = B_FALSE; 1219 1220 for (j = 0; bytestats[j]; j++) { 1221 if (strcmp(stats[i].name, bytestats[j]) == 0) { 1222 bytes = B_TRUE; 1223 break; 1224 } 1225 } 1226 1227 if (bytes) { 1228 mdb_printf("%-25s = %9llu %s\n", stats[i].name, 1229 stats[i].value.ui64 >> shift, suffix); 1230 } else { 1231 mdb_printf("%-25s = %9llu\n", stats[i].name, 1232 stats[i].value.ui64); 1233 } 1234 } 1235 1236 for (i = 0; extras[i]; i++) { 1237 uint64_t buf; 1238 1239 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, extras[i], &sym) == -1) { 1240 mdb_warn("failed to find '%s'", extras[i]); 1241 return (DCMD_ERR); 1242 } 1243 1244 if (sym.st_size != sizeof (uint64_t) && 1245 sym.st_size != sizeof (uint32_t)) { 1246 mdb_warn("expected scalar for variable '%s'\n", 1247 extras[i]); 1248 return (DCMD_ERR); 1249 } 1250 1251 if (mdb_vread(&buf, sym.st_size, sym.st_value) == -1) { 1252 mdb_warn("couldn't read '%s'", extras[i]); 1253 return (DCMD_ERR); 1254 } 1255 1256 mdb_printf("%-25s = ", extras[i]); 1257 1258 /* NB: all the 64-bit extras happen to be byte counts */ 1259 if (sym.st_size == sizeof (uint64_t)) 1260 mdb_printf("%9llu %s\n", buf >> shift, suffix); 1261 1262 if (sym.st_size == sizeof (uint32_t)) 1263 mdb_printf("%9d\n", *((uint32_t *)&buf)); 1264 } 1265 return (DCMD_OK); 1266 } 1267 1268 typedef struct mdb_spa_print { 1269 pool_state_t spa_state; 1270 char spa_name[ZFS_MAX_DATASET_NAME_LEN]; 1271 uintptr_t spa_normal_class; 1272 } mdb_spa_print_t; 1273 1274 1275 const char histo_stars[] = "****************************************"; 1276 const int histo_width = sizeof (histo_stars) - 1; 1277 1278 static void 1279 dump_histogram(const uint64_t *histo, int size, int offset) 1280 { 1281 int i; 1282 int minidx = size - 1; 1283 int maxidx = 0; 1284 uint64_t max = 0; 1285 1286 for (i = 0; i < size; i++) { 1287 if (histo[i] > max) 1288 max = histo[i]; 1289 if (histo[i] > 0 && i > maxidx) 1290 maxidx = i; 1291 if (histo[i] > 0 && i < minidx) 1292 minidx = i; 1293 } 1294 1295 if (max < histo_width) 1296 max = histo_width; 1297 1298 for (i = minidx; i <= maxidx; i++) { 1299 mdb_printf("%3u: %6llu %s\n", 1300 i + offset, (u_longlong_t)histo[i], 1301 &histo_stars[(max - histo[i]) * histo_width / max]); 1302 } 1303 } 1304 1305 typedef struct mdb_metaslab_class { 1306 uint64_t mc_histogram[RANGE_TREE_HISTOGRAM_SIZE]; 1307 } mdb_metaslab_class_t; 1308 1309 /* 1310 * spa_class_histogram(uintptr_t class_addr) 1311 * 1312 * Prints free space histogram for a device class 1313 * 1314 * Returns DCMD_OK, or DCMD_ERR. 1315 */ 1316 static int 1317 spa_class_histogram(uintptr_t class_addr) 1318 { 1319 mdb_metaslab_class_t mc; 1320 if (mdb_ctf_vread(&mc, "metaslab_class_t", 1321 "mdb_metaslab_class_t", class_addr, 0) == -1) 1322 return (DCMD_ERR); 1323 1324 mdb_inc_indent(4); 1325 dump_histogram(mc.mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 1326 mdb_dec_indent(4); 1327 return (DCMD_OK); 1328 } 1329 1330 /* 1331 * ::spa 1332 * 1333 * -c Print configuration information as well 1334 * -v Print vdev state 1335 * -e Print vdev error stats 1336 * -m Print vdev metaslab info 1337 * -M print vdev metaslab group info 1338 * -h Print histogram info (must be combined with -m or -M) 1339 * 1340 * Print a summarized spa_t. When given no arguments, prints out a table of all 1341 * active pools on the system. 1342 */ 1343 /* ARGSUSED */ 1344 static int 1345 spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1346 { 1347 const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED", 1348 "SPARE", "L2CACHE", "UNINIT", "UNAVAIL", "POTENTIAL" }; 1349 const char *state; 1350 int spa_flags = 0; 1351 1352 if (mdb_getopts(argc, argv, 1353 'c', MDB_OPT_SETBITS, SPA_FLAG_CONFIG, &spa_flags, 1354 'v', MDB_OPT_SETBITS, SPA_FLAG_VDEVS, &spa_flags, 1355 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags, 1356 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags, 1357 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags, 1358 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags, 1359 NULL) != argc) 1360 return (DCMD_USAGE); 1361 1362 if (!(flags & DCMD_ADDRSPEC)) { 1363 if (mdb_walk_dcmd("spa", "spa", argc, argv) == -1) { 1364 mdb_warn("can't walk spa"); 1365 return (DCMD_ERR); 1366 } 1367 1368 return (DCMD_OK); 1369 } 1370 1371 if (flags & DCMD_PIPE_OUT) { 1372 mdb_printf("%#lr\n", addr); 1373 return (DCMD_OK); 1374 } 1375 1376 if (DCMD_HDRSPEC(flags)) 1377 mdb_printf("%<u>%-?s %9s %-*s%</u>\n", "ADDR", "STATE", 1378 sizeof (uintptr_t) == 4 ? 60 : 52, "NAME"); 1379 1380 mdb_spa_print_t spa; 1381 if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_print_t", addr, 0) == -1) 1382 return (DCMD_ERR); 1383 1384 if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL) 1385 state = "UNKNOWN"; 1386 else 1387 state = statetab[spa.spa_state]; 1388 1389 mdb_printf("%0?p %9s %s\n", addr, state, spa.spa_name); 1390 if (spa_flags & SPA_FLAG_HISTOGRAMS) 1391 spa_class_histogram(spa.spa_normal_class); 1392 1393 if (spa_flags & SPA_FLAG_CONFIG) { 1394 mdb_printf("\n"); 1395 mdb_inc_indent(4); 1396 if (mdb_call_dcmd("spa_config", addr, flags, 0, 1397 NULL) != DCMD_OK) 1398 return (DCMD_ERR); 1399 mdb_dec_indent(4); 1400 } 1401 1402 if (spa_flags & SPA_FLAG_ALL_VDEV) { 1403 mdb_arg_t v; 1404 char opts[100] = "-"; 1405 int args = 1406 (spa_flags | SPA_FLAG_VDEVS) == SPA_FLAG_VDEVS ? 0 : 1; 1407 1408 if (spa_flags & SPA_FLAG_ERRORS) 1409 strcat(opts, "e"); 1410 if (spa_flags & SPA_FLAG_METASLABS) 1411 strcat(opts, "m"); 1412 if (spa_flags & SPA_FLAG_METASLAB_GROUPS) 1413 strcat(opts, "M"); 1414 if (spa_flags & SPA_FLAG_HISTOGRAMS) 1415 strcat(opts, "h"); 1416 1417 v.a_type = MDB_TYPE_STRING; 1418 v.a_un.a_str = opts; 1419 1420 mdb_printf("\n"); 1421 mdb_inc_indent(4); 1422 if (mdb_call_dcmd("spa_vdevs", addr, flags, args, 1423 &v) != DCMD_OK) 1424 return (DCMD_ERR); 1425 mdb_dec_indent(4); 1426 } 1427 1428 return (DCMD_OK); 1429 } 1430 1431 typedef struct mdb_spa_config_spa { 1432 uintptr_t spa_config; 1433 } mdb_spa_config_spa_t; 1434 1435 /* 1436 * ::spa_config 1437 * 1438 * Given a spa_t, print the configuration information stored in spa_config. 1439 * Since it's just an nvlist, format it as an indented list of name=value pairs. 1440 * We simply read the value of spa_config and pass off to ::nvlist. 1441 */ 1442 /* ARGSUSED */ 1443 static int 1444 spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1445 { 1446 mdb_spa_config_spa_t spa; 1447 1448 if (argc != 0 || !(flags & DCMD_ADDRSPEC)) 1449 return (DCMD_USAGE); 1450 1451 if (mdb_ctf_vread(&spa, ZFS_STRUCT "spa", "mdb_spa_config_spa_t", 1452 addr, 0) == -1) 1453 return (DCMD_ERR); 1454 1455 if (spa.spa_config == 0) { 1456 mdb_printf("(none)\n"); 1457 return (DCMD_OK); 1458 } 1459 1460 return (mdb_call_dcmd("nvlist", spa.spa_config, flags, 1461 0, NULL)); 1462 } 1463 1464 1465 1466 typedef struct mdb_range_tree { 1467 uint64_t rt_space; 1468 } mdb_range_tree_t; 1469 1470 typedef struct mdb_metaslab_group { 1471 uint64_t mg_fragmentation; 1472 uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE]; 1473 uintptr_t mg_vd; 1474 } mdb_metaslab_group_t; 1475 1476 typedef struct mdb_metaslab { 1477 uint64_t ms_id; 1478 uint64_t ms_start; 1479 uint64_t ms_size; 1480 int64_t ms_deferspace; 1481 uint64_t ms_fragmentation; 1482 uint64_t ms_weight; 1483 uintptr_t ms_allocating[TXG_SIZE]; 1484 uintptr_t ms_checkpointing; 1485 uintptr_t ms_freeing; 1486 uintptr_t ms_freed; 1487 uintptr_t ms_allocatable; 1488 uintptr_t ms_sm; 1489 } mdb_metaslab_t; 1490 1491 typedef struct mdb_space_map_phys_t { 1492 int64_t smp_alloc; 1493 uint64_t smp_histogram[SPACE_MAP_HISTOGRAM_SIZE]; 1494 } mdb_space_map_phys_t; 1495 1496 typedef struct mdb_space_map { 1497 uint64_t sm_size; 1498 uint8_t sm_shift; 1499 uintptr_t sm_phys; 1500 } mdb_space_map_t; 1501 1502 typedef struct mdb_vdev { 1503 uintptr_t vdev_path; 1504 uintptr_t vdev_ms; 1505 uintptr_t vdev_ops; 1506 uint64_t vdev_ms_count; 1507 uint64_t vdev_id; 1508 vdev_stat_t vdev_stat; 1509 } mdb_vdev_t; 1510 1511 typedef struct mdb_vdev_ops { 1512 char vdev_op_type[16]; 1513 } mdb_vdev_ops_t; 1514 1515 static int 1516 metaslab_stats(uintptr_t addr, int spa_flags) 1517 { 1518 mdb_vdev_t vdev; 1519 uintptr_t *vdev_ms; 1520 1521 if (mdb_ctf_vread(&vdev, "vdev_t", "mdb_vdev_t", 1522 (uintptr_t)addr, 0) == -1) { 1523 mdb_warn("failed to read vdev at %p\n", addr); 1524 return (DCMD_ERR); 1525 } 1526 1527 mdb_inc_indent(4); 1528 mdb_printf("%<u>%-?s %6s %20s %10s %9s%</u>\n", "ADDR", "ID", 1529 "OFFSET", "FREE", "FRAGMENTATION"); 1530 1531 vdev_ms = mdb_alloc(vdev.vdev_ms_count * sizeof (void *), 1532 UM_SLEEP | UM_GC); 1533 if (mdb_vread(vdev_ms, vdev.vdev_ms_count * sizeof (void *), 1534 (uintptr_t)vdev.vdev_ms) == -1) { 1535 mdb_warn("failed to read vdev_ms at %p\n", vdev.vdev_ms); 1536 return (DCMD_ERR); 1537 } 1538 1539 for (int m = 0; m < vdev.vdev_ms_count; m++) { 1540 mdb_metaslab_t ms; 1541 mdb_space_map_t sm = { 0 }; 1542 mdb_space_map_phys_t smp; 1543 char free[MDB_NICENUM_BUFLEN]; 1544 1545 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t", 1546 (uintptr_t)vdev_ms[m], 0) == -1) 1547 return (DCMD_ERR); 1548 1549 if (ms.ms_sm != 0 && 1550 mdb_ctf_vread(&sm, "space_map_t", "mdb_space_map_t", 1551 ms.ms_sm, 0) == -1) 1552 return (DCMD_ERR); 1553 1554 if (sm.sm_phys != 0) { 1555 (void) mdb_ctf_vread(&smp, "space_map_phys_t", 1556 "mdb_space_map_phys_t", sm.sm_phys, 0); 1557 mdb_nicenum(ms.ms_size - smp.smp_alloc, free); 1558 } else { 1559 (void) mdb_snprintf(free, MDB_NICENUM_BUFLEN, "-"); 1560 } 1561 1562 mdb_printf("%0?p %6llu %20llx %10s ", vdev_ms[m], ms.ms_id, 1563 ms.ms_start, free); 1564 if (ms.ms_fragmentation == ZFS_FRAG_INVALID) 1565 mdb_printf("%9s\n", "-"); 1566 else 1567 mdb_printf("%9llu%%\n", ms.ms_fragmentation); 1568 1569 if ((spa_flags & SPA_FLAG_HISTOGRAMS) && ms.ms_sm != 0) { 1570 if (sm.sm_phys == 0) 1571 continue; 1572 1573 dump_histogram(smp.smp_histogram, 1574 SPACE_MAP_HISTOGRAM_SIZE, sm.sm_shift); 1575 } 1576 } 1577 mdb_dec_indent(4); 1578 return (DCMD_OK); 1579 } 1580 1581 static int 1582 metaslab_group_stats(uintptr_t addr, int spa_flags) 1583 { 1584 mdb_metaslab_group_t mg; 1585 if (mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t", 1586 (uintptr_t)addr, 0) == -1) { 1587 mdb_warn("failed to read vdev_mg at %p\n", addr); 1588 return (DCMD_ERR); 1589 } 1590 1591 mdb_inc_indent(4); 1592 mdb_printf("%<u>%-?s %15s%</u>\n", "ADDR", "FRAGMENTATION"); 1593 if (mg.mg_fragmentation == ZFS_FRAG_INVALID) 1594 mdb_printf("%0?p %15s\n", addr, "-"); 1595 else 1596 mdb_printf("%0?p %15llu%%\n", addr, mg.mg_fragmentation); 1597 1598 if (spa_flags & SPA_FLAG_HISTOGRAMS) 1599 dump_histogram(mg.mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); 1600 mdb_dec_indent(4); 1601 return (DCMD_OK); 1602 } 1603 1604 /* 1605 * ::vdev 1606 * 1607 * Print out a summarized vdev_t, in the following form: 1608 * 1609 * ADDR STATE AUX DESC 1610 * fffffffbcde23df0 HEALTHY - /dev/dsk/c0t0d0 1611 * 1612 * If '-r' is specified, recursively visit all children. 1613 * 1614 * With '-e', the statistics associated with the vdev are printed as well. 1615 */ 1616 static int 1617 do_print_vdev(uintptr_t addr, int flags, int depth, boolean_t recursive, 1618 int spa_flags) 1619 { 1620 vdev_t vdev; 1621 char desc[MAXNAMELEN]; 1622 int c, children; 1623 uintptr_t *child; 1624 const char *state, *aux; 1625 1626 if (mdb_vread(&vdev, sizeof (vdev), (uintptr_t)addr) == -1) { 1627 mdb_warn("failed to read vdev_t at %p\n", (uintptr_t)addr); 1628 return (DCMD_ERR); 1629 } 1630 1631 if (flags & DCMD_PIPE_OUT) { 1632 mdb_printf("%#lr\n", addr); 1633 } else { 1634 if (vdev.vdev_path != NULL) { 1635 if (mdb_readstr(desc, sizeof (desc), 1636 (uintptr_t)vdev.vdev_path) == -1) { 1637 mdb_warn("failed to read vdev_path at %p\n", 1638 vdev.vdev_path); 1639 return (DCMD_ERR); 1640 } 1641 } else if (vdev.vdev_ops != NULL) { 1642 vdev_ops_t ops; 1643 if (mdb_vread(&ops, sizeof (ops), 1644 (uintptr_t)vdev.vdev_ops) == -1) { 1645 mdb_warn("failed to read vdev_ops at %p\n", 1646 vdev.vdev_ops); 1647 return (DCMD_ERR); 1648 } 1649 (void) strcpy(desc, ops.vdev_op_type); 1650 } else { 1651 (void) strcpy(desc, "<unknown>"); 1652 } 1653 1654 if (depth == 0 && DCMD_HDRSPEC(flags)) 1655 mdb_printf("%<u>%-?s %-9s %-12s %-*s%</u>\n", 1656 "ADDR", "STATE", "AUX", 1657 sizeof (uintptr_t) == 4 ? 43 : 35, 1658 "DESCRIPTION"); 1659 1660 mdb_printf("%0?p ", addr); 1661 1662 switch (vdev.vdev_state) { 1663 case VDEV_STATE_CLOSED: 1664 state = "CLOSED"; 1665 break; 1666 case VDEV_STATE_OFFLINE: 1667 state = "OFFLINE"; 1668 break; 1669 case VDEV_STATE_CANT_OPEN: 1670 state = "CANT_OPEN"; 1671 break; 1672 case VDEV_STATE_DEGRADED: 1673 state = "DEGRADED"; 1674 break; 1675 case VDEV_STATE_HEALTHY: 1676 state = "HEALTHY"; 1677 break; 1678 case VDEV_STATE_REMOVED: 1679 state = "REMOVED"; 1680 break; 1681 case VDEV_STATE_FAULTED: 1682 state = "FAULTED"; 1683 break; 1684 default: 1685 state = "UNKNOWN"; 1686 break; 1687 } 1688 1689 switch (vdev.vdev_stat.vs_aux) { 1690 case VDEV_AUX_NONE: 1691 aux = "-"; 1692 break; 1693 case VDEV_AUX_OPEN_FAILED: 1694 aux = "OPEN_FAILED"; 1695 break; 1696 case VDEV_AUX_CORRUPT_DATA: 1697 aux = "CORRUPT_DATA"; 1698 break; 1699 case VDEV_AUX_NO_REPLICAS: 1700 aux = "NO_REPLICAS"; 1701 break; 1702 case VDEV_AUX_BAD_GUID_SUM: 1703 aux = "BAD_GUID_SUM"; 1704 break; 1705 case VDEV_AUX_TOO_SMALL: 1706 aux = "TOO_SMALL"; 1707 break; 1708 case VDEV_AUX_BAD_LABEL: 1709 aux = "BAD_LABEL"; 1710 break; 1711 case VDEV_AUX_VERSION_NEWER: 1712 aux = "VERS_NEWER"; 1713 break; 1714 case VDEV_AUX_VERSION_OLDER: 1715 aux = "VERS_OLDER"; 1716 break; 1717 case VDEV_AUX_UNSUP_FEAT: 1718 aux = "UNSUP_FEAT"; 1719 break; 1720 case VDEV_AUX_SPARED: 1721 aux = "SPARED"; 1722 break; 1723 case VDEV_AUX_ERR_EXCEEDED: 1724 aux = "ERR_EXCEEDED"; 1725 break; 1726 case VDEV_AUX_IO_FAILURE: 1727 aux = "IO_FAILURE"; 1728 break; 1729 case VDEV_AUX_BAD_LOG: 1730 aux = "BAD_LOG"; 1731 break; 1732 case VDEV_AUX_EXTERNAL: 1733 aux = "EXTERNAL"; 1734 break; 1735 case VDEV_AUX_SPLIT_POOL: 1736 aux = "SPLIT_POOL"; 1737 break; 1738 case VDEV_AUX_CHILDREN_OFFLINE: 1739 aux = "CHILDREN_OFFLINE"; 1740 break; 1741 default: 1742 aux = "UNKNOWN"; 1743 break; 1744 } 1745 1746 mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc); 1747 1748 if (spa_flags & SPA_FLAG_ERRORS) { 1749 vdev_stat_t *vs = &vdev.vdev_stat; 1750 int i; 1751 1752 mdb_inc_indent(4); 1753 mdb_printf("\n"); 1754 mdb_printf("%<u> %12s %12s %12s %12s " 1755 "%12s%</u>\n", "READ", "WRITE", "FREE", "CLAIM", 1756 "IOCTL"); 1757 mdb_printf("OPS "); 1758 for (i = 1; i < ZIO_TYPES; i++) 1759 mdb_printf("%11#llx%s", vs->vs_ops[i], 1760 i == ZIO_TYPES - 1 ? "" : " "); 1761 mdb_printf("\n"); 1762 mdb_printf("BYTES "); 1763 for (i = 1; i < ZIO_TYPES; i++) 1764 mdb_printf("%11#llx%s", vs->vs_bytes[i], 1765 i == ZIO_TYPES - 1 ? "" : " "); 1766 1767 1768 mdb_printf("\n"); 1769 mdb_printf("EREAD %10#llx\n", vs->vs_read_errors); 1770 mdb_printf("EWRITE %10#llx\n", vs->vs_write_errors); 1771 mdb_printf("ECKSUM %10#llx\n", 1772 vs->vs_checksum_errors); 1773 mdb_dec_indent(4); 1774 mdb_printf("\n"); 1775 } 1776 1777 if (spa_flags & SPA_FLAG_METASLAB_GROUPS && 1778 vdev.vdev_mg != NULL) { 1779 metaslab_group_stats((uintptr_t)vdev.vdev_mg, 1780 spa_flags); 1781 } 1782 if (spa_flags & SPA_FLAG_METASLABS && vdev.vdev_ms != NULL) { 1783 metaslab_stats((uintptr_t)addr, spa_flags); 1784 } 1785 } 1786 1787 children = vdev.vdev_children; 1788 1789 if (children == 0 || !recursive) 1790 return (DCMD_OK); 1791 1792 child = mdb_alloc(children * sizeof (void *), UM_SLEEP | UM_GC); 1793 if (mdb_vread(child, children * sizeof (void *), 1794 (uintptr_t)vdev.vdev_child) == -1) { 1795 mdb_warn("failed to read vdev children at %p", vdev.vdev_child); 1796 return (DCMD_ERR); 1797 } 1798 1799 for (c = 0; c < children; c++) { 1800 if (do_print_vdev(child[c], flags, depth + 2, recursive, 1801 spa_flags)) { 1802 return (DCMD_ERR); 1803 } 1804 } 1805 1806 return (DCMD_OK); 1807 } 1808 1809 static int 1810 vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1811 { 1812 uint64_t depth = 0; 1813 boolean_t recursive = B_FALSE; 1814 int spa_flags = 0; 1815 1816 if (mdb_getopts(argc, argv, 1817 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags, 1818 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags, 1819 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags, 1820 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags, 1821 'r', MDB_OPT_SETBITS, TRUE, &recursive, 1822 'd', MDB_OPT_UINT64, &depth, NULL) != argc) 1823 return (DCMD_USAGE); 1824 1825 if (!(flags & DCMD_ADDRSPEC)) { 1826 mdb_warn("no vdev_t address given\n"); 1827 return (DCMD_ERR); 1828 } 1829 1830 return (do_print_vdev(addr, flags, (int)depth, recursive, spa_flags)); 1831 } 1832 1833 typedef struct mdb_metaslab_alloc_trace { 1834 uintptr_t mat_mg; 1835 uintptr_t mat_msp; 1836 uint64_t mat_size; 1837 uint64_t mat_weight; 1838 uint64_t mat_offset; 1839 uint32_t mat_dva_id; 1840 int mat_allocator; 1841 } mdb_metaslab_alloc_trace_t; 1842 1843 static void 1844 metaslab_print_weight(uint64_t weight) 1845 { 1846 char buf[100]; 1847 1848 if (WEIGHT_IS_SPACEBASED(weight)) { 1849 mdb_nicenum( 1850 weight & ~(METASLAB_ACTIVE_MASK | METASLAB_WEIGHT_TYPE), 1851 buf); 1852 } else { 1853 char size[MDB_NICENUM_BUFLEN]; 1854 mdb_nicenum(1ULL << WEIGHT_GET_INDEX(weight), size); 1855 (void) mdb_snprintf(buf, sizeof (buf), "%llu x %s", 1856 WEIGHT_GET_COUNT(weight), size); 1857 } 1858 mdb_printf("%11s ", buf); 1859 } 1860 1861 /* ARGSUSED */ 1862 static int 1863 metaslab_weight(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1864 { 1865 uint64_t weight = 0; 1866 char active; 1867 1868 if (argc == 0 && (flags & DCMD_ADDRSPEC)) { 1869 if (mdb_vread(&weight, sizeof (uint64_t), addr) == -1) { 1870 mdb_warn("failed to read weight at %p\n", addr); 1871 return (DCMD_ERR); 1872 } 1873 } else if (argc == 1 && !(flags & DCMD_ADDRSPEC)) { 1874 weight = (argv[0].a_type == MDB_TYPE_IMMEDIATE) ? 1875 argv[0].a_un.a_val : mdb_strtoull(argv[0].a_un.a_str); 1876 } else { 1877 return (DCMD_USAGE); 1878 } 1879 1880 if (DCMD_HDRSPEC(flags)) { 1881 mdb_printf("%<u>%-6s %9s %9s%</u>\n", 1882 "ACTIVE", "ALGORITHM", "WEIGHT"); 1883 } 1884 1885 if (weight & METASLAB_WEIGHT_PRIMARY) 1886 active = 'P'; 1887 else if (weight & METASLAB_WEIGHT_SECONDARY) 1888 active = 'S'; 1889 else 1890 active = '-'; 1891 mdb_printf("%6c %8s ", active, 1892 WEIGHT_IS_SPACEBASED(weight) ? "SPACE" : "SEGMENT"); 1893 metaslab_print_weight(weight); 1894 mdb_printf("\n"); 1895 1896 return (DCMD_OK); 1897 } 1898 1899 /* ARGSUSED */ 1900 static int 1901 metaslab_trace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1902 { 1903 mdb_metaslab_alloc_trace_t mat; 1904 mdb_metaslab_group_t mg = { 0 }; 1905 char result_type[100]; 1906 1907 if (mdb_ctf_vread(&mat, "metaslab_alloc_trace_t", 1908 "mdb_metaslab_alloc_trace_t", addr, 0) == -1) { 1909 return (DCMD_ERR); 1910 } 1911 1912 if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) { 1913 mdb_printf("%<u>%6s %6s %8s %11s %11s %18s %18s%</u>\n", 1914 "MSID", "DVA", "ASIZE", "ALLOCATOR", "WEIGHT", "RESULT", 1915 "VDEV"); 1916 } 1917 1918 if (mat.mat_msp != 0) { 1919 mdb_metaslab_t ms; 1920 1921 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t", 1922 mat.mat_msp, 0) == -1) { 1923 return (DCMD_ERR); 1924 } 1925 mdb_printf("%6llu ", ms.ms_id); 1926 } else { 1927 mdb_printf("%6s ", "-"); 1928 } 1929 1930 mdb_printf("%6d %8llx %11llx ", mat.mat_dva_id, mat.mat_size, 1931 mat.mat_allocator); 1932 1933 metaslab_print_weight(mat.mat_weight); 1934 1935 if ((int64_t)mat.mat_offset < 0) { 1936 if (enum_lookup("enum trace_alloc_type", mat.mat_offset, 1937 "TRACE_", sizeof (result_type), result_type) == -1) { 1938 mdb_warn("Could not find enum for trace_alloc_type"); 1939 return (DCMD_ERR); 1940 } 1941 mdb_printf("%18s ", result_type); 1942 } else { 1943 mdb_printf("%<b>%18llx%</b> ", mat.mat_offset); 1944 } 1945 1946 if (mat.mat_mg != 0 && 1947 mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t", 1948 mat.mat_mg, 0) == -1) { 1949 return (DCMD_ERR); 1950 } 1951 1952 if (mg.mg_vd != 0) { 1953 mdb_vdev_t vdev; 1954 char desc[MAXNAMELEN]; 1955 1956 if (mdb_ctf_vread(&vdev, "vdev_t", "mdb_vdev_t", 1957 mg.mg_vd, 0) == -1) { 1958 return (DCMD_ERR); 1959 } 1960 1961 if (vdev.vdev_path != 0) { 1962 char path[MAXNAMELEN]; 1963 1964 if (mdb_readstr(path, sizeof (path), 1965 vdev.vdev_path) == -1) { 1966 mdb_warn("failed to read vdev_path at %p\n", 1967 vdev.vdev_path); 1968 return (DCMD_ERR); 1969 } 1970 char *slash; 1971 if ((slash = strrchr(path, '/')) != NULL) { 1972 strcpy(desc, slash + 1); 1973 } else { 1974 strcpy(desc, path); 1975 } 1976 } else if (vdev.vdev_ops != 0) { 1977 mdb_vdev_ops_t ops; 1978 if (mdb_ctf_vread(&ops, "vdev_ops_t", "mdb_vdev_ops_t", 1979 vdev.vdev_ops, 0) == -1) { 1980 mdb_warn("failed to read vdev_ops at %p\n", 1981 vdev.vdev_ops); 1982 return (DCMD_ERR); 1983 } 1984 (void) mdb_snprintf(desc, sizeof (desc), 1985 "%s-%llu", ops.vdev_op_type, vdev.vdev_id); 1986 } else { 1987 (void) strcpy(desc, "<unknown>"); 1988 } 1989 mdb_printf("%18s\n", desc); 1990 } 1991 1992 return (DCMD_OK); 1993 } 1994 1995 typedef struct metaslab_walk_data { 1996 uint64_t mw_numvdevs; 1997 uintptr_t *mw_vdevs; 1998 int mw_curvdev; 1999 uint64_t mw_nummss; 2000 uintptr_t *mw_mss; 2001 int mw_curms; 2002 } metaslab_walk_data_t; 2003 2004 static int 2005 metaslab_walk_step(mdb_walk_state_t *wsp) 2006 { 2007 metaslab_walk_data_t *mw = wsp->walk_data; 2008 metaslab_t ms; 2009 uintptr_t msp; 2010 2011 if (mw->mw_curvdev >= mw->mw_numvdevs) 2012 return (WALK_DONE); 2013 2014 if (mw->mw_mss == NULL) { 2015 uintptr_t mssp; 2016 uintptr_t vdevp; 2017 2018 ASSERT(mw->mw_curms == 0); 2019 ASSERT(mw->mw_nummss == 0); 2020 2021 vdevp = mw->mw_vdevs[mw->mw_curvdev]; 2022 if (GETMEMB(vdevp, "vdev", vdev_ms, mssp) || 2023 GETMEMB(vdevp, "vdev", vdev_ms_count, mw->mw_nummss)) { 2024 return (WALK_ERR); 2025 } 2026 2027 mw->mw_mss = mdb_alloc(mw->mw_nummss * sizeof (void*), 2028 UM_SLEEP | UM_GC); 2029 if (mdb_vread(mw->mw_mss, mw->mw_nummss * sizeof (void*), 2030 mssp) == -1) { 2031 mdb_warn("failed to read vdev_ms at %p", mssp); 2032 return (WALK_ERR); 2033 } 2034 } 2035 2036 if (mw->mw_curms >= mw->mw_nummss) { 2037 mw->mw_mss = NULL; 2038 mw->mw_curms = 0; 2039 mw->mw_nummss = 0; 2040 mw->mw_curvdev++; 2041 return (WALK_NEXT); 2042 } 2043 2044 msp = mw->mw_mss[mw->mw_curms]; 2045 if (mdb_vread(&ms, sizeof (metaslab_t), msp) == -1) { 2046 mdb_warn("failed to read metaslab_t at %p", msp); 2047 return (WALK_ERR); 2048 } 2049 2050 mw->mw_curms++; 2051 2052 return (wsp->walk_callback(msp, &ms, wsp->walk_cbdata)); 2053 } 2054 2055 static int 2056 metaslab_walk_init(mdb_walk_state_t *wsp) 2057 { 2058 metaslab_walk_data_t *mw; 2059 uintptr_t root_vdevp; 2060 uintptr_t childp; 2061 2062 if (wsp->walk_addr == 0) { 2063 mdb_warn("must supply address of spa_t\n"); 2064 return (WALK_ERR); 2065 } 2066 2067 mw = mdb_zalloc(sizeof (metaslab_walk_data_t), UM_SLEEP | UM_GC); 2068 2069 if (GETMEMB(wsp->walk_addr, "spa", spa_root_vdev, root_vdevp) || 2070 GETMEMB(root_vdevp, "vdev", vdev_children, mw->mw_numvdevs) || 2071 GETMEMB(root_vdevp, "vdev", vdev_child, childp)) { 2072 return (DCMD_ERR); 2073 } 2074 2075 mw->mw_vdevs = mdb_alloc(mw->mw_numvdevs * sizeof (void *), 2076 UM_SLEEP | UM_GC); 2077 if (mdb_vread(mw->mw_vdevs, mw->mw_numvdevs * sizeof (void *), 2078 childp) == -1) { 2079 mdb_warn("failed to read root vdev children at %p", childp); 2080 return (DCMD_ERR); 2081 } 2082 2083 wsp->walk_data = mw; 2084 2085 return (WALK_NEXT); 2086 } 2087 2088 typedef struct mdb_spa { 2089 uintptr_t spa_dsl_pool; 2090 uintptr_t spa_root_vdev; 2091 } mdb_spa_t; 2092 2093 typedef struct mdb_dsl_pool { 2094 uintptr_t dp_root_dir; 2095 } mdb_dsl_pool_t; 2096 2097 typedef struct mdb_dsl_dir { 2098 uintptr_t dd_dbuf; 2099 int64_t dd_space_towrite[TXG_SIZE]; 2100 } mdb_dsl_dir_t; 2101 2102 typedef struct mdb_dsl_dir_phys { 2103 uint64_t dd_used_bytes; 2104 uint64_t dd_compressed_bytes; 2105 uint64_t dd_uncompressed_bytes; 2106 } mdb_dsl_dir_phys_t; 2107 2108 typedef struct space_data { 2109 uint64_t ms_allocating[TXG_SIZE]; 2110 uint64_t ms_checkpointing; 2111 uint64_t ms_freeing; 2112 uint64_t ms_freed; 2113 uint64_t ms_allocatable; 2114 int64_t ms_deferspace; 2115 uint64_t nowavail; 2116 } space_data_t; 2117 2118 /* ARGSUSED */ 2119 static int 2120 space_cb(uintptr_t addr, const void *unknown, void *arg) 2121 { 2122 space_data_t *sd = arg; 2123 mdb_metaslab_t ms; 2124 mdb_range_tree_t rt; 2125 mdb_space_map_t sm = { 0 }; 2126 mdb_space_map_phys_t smp = { 0 }; 2127 int i; 2128 2129 if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t", 2130 addr, 0) == -1) 2131 return (WALK_ERR); 2132 2133 for (i = 0; i < TXG_SIZE; i++) { 2134 if (mdb_ctf_vread(&rt, "range_tree_t", 2135 "mdb_range_tree_t", ms.ms_allocating[i], 0) == -1) 2136 return (WALK_ERR); 2137 2138 sd->ms_allocating[i] += rt.rt_space; 2139 2140 } 2141 2142 if (mdb_ctf_vread(&rt, "range_tree_t", 2143 "mdb_range_tree_t", ms.ms_checkpointing, 0) == -1) 2144 return (WALK_ERR); 2145 sd->ms_checkpointing += rt.rt_space; 2146 2147 if (mdb_ctf_vread(&rt, "range_tree_t", 2148 "mdb_range_tree_t", ms.ms_freeing, 0) == -1) 2149 return (WALK_ERR); 2150 sd->ms_freeing += rt.rt_space; 2151 2152 if (mdb_ctf_vread(&rt, "range_tree_t", 2153 "mdb_range_tree_t", ms.ms_freed, 0) == -1) 2154 return (WALK_ERR); 2155 sd->ms_freed += rt.rt_space; 2156 2157 if (mdb_ctf_vread(&rt, "range_tree_t", 2158 "mdb_range_tree_t", ms.ms_allocatable, 0) == -1) 2159 return (WALK_ERR); 2160 sd->ms_allocatable += rt.rt_space; 2161 2162 if (ms.ms_sm != 0 && 2163 mdb_ctf_vread(&sm, "space_map_t", 2164 "mdb_space_map_t", ms.ms_sm, 0) == -1) 2165 return (WALK_ERR); 2166 2167 if (sm.sm_phys != 0) { 2168 (void) mdb_ctf_vread(&smp, "space_map_phys_t", 2169 "mdb_space_map_phys_t", sm.sm_phys, 0); 2170 } 2171 2172 sd->ms_deferspace += ms.ms_deferspace; 2173 sd->nowavail += sm.sm_size - smp.smp_alloc; 2174 2175 return (WALK_NEXT); 2176 } 2177 2178 /* 2179 * ::spa_space [-b] 2180 * 2181 * Given a spa_t, print out it's on-disk space usage and in-core 2182 * estimates of future usage. If -b is given, print space in bytes. 2183 * Otherwise print in megabytes. 2184 */ 2185 /* ARGSUSED */ 2186 static int 2187 spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2188 { 2189 mdb_spa_t spa; 2190 mdb_dsl_pool_t dp; 2191 mdb_dsl_dir_t dd; 2192 mdb_dmu_buf_impl_t db; 2193 mdb_dsl_dir_phys_t dsp; 2194 space_data_t sd; 2195 int shift = 20; 2196 char *suffix = "M"; 2197 int bytes = B_FALSE; 2198 2199 if (mdb_getopts(argc, argv, 'b', MDB_OPT_SETBITS, TRUE, &bytes, NULL) != 2200 argc) 2201 return (DCMD_USAGE); 2202 if (!(flags & DCMD_ADDRSPEC)) 2203 return (DCMD_USAGE); 2204 2205 if (bytes) { 2206 shift = 0; 2207 suffix = ""; 2208 } 2209 2210 if (mdb_ctf_vread(&spa, ZFS_STRUCT "spa", "mdb_spa_t", 2211 addr, 0) == -1 || 2212 mdb_ctf_vread(&dp, ZFS_STRUCT "dsl_pool", "mdb_dsl_pool_t", 2213 spa.spa_dsl_pool, 0) == -1 || 2214 mdb_ctf_vread(&dd, ZFS_STRUCT "dsl_dir", "mdb_dsl_dir_t", 2215 dp.dp_root_dir, 0) == -1 || 2216 mdb_ctf_vread(&db, ZFS_STRUCT "dmu_buf_impl", "mdb_dmu_buf_impl_t", 2217 dd.dd_dbuf, 0) == -1 || 2218 mdb_ctf_vread(&dsp, ZFS_STRUCT "dsl_dir_phys", 2219 "mdb_dsl_dir_phys_t", db.db.db_data, 0) == -1) { 2220 return (DCMD_ERR); 2221 } 2222 2223 mdb_printf("dd_space_towrite = %llu%s %llu%s %llu%s %llu%s\n", 2224 dd.dd_space_towrite[0] >> shift, suffix, 2225 dd.dd_space_towrite[1] >> shift, suffix, 2226 dd.dd_space_towrite[2] >> shift, suffix, 2227 dd.dd_space_towrite[3] >> shift, suffix); 2228 2229 mdb_printf("dd_phys.dd_used_bytes = %llu%s\n", 2230 dsp.dd_used_bytes >> shift, suffix); 2231 mdb_printf("dd_phys.dd_compressed_bytes = %llu%s\n", 2232 dsp.dd_compressed_bytes >> shift, suffix); 2233 mdb_printf("dd_phys.dd_uncompressed_bytes = %llu%s\n", 2234 dsp.dd_uncompressed_bytes >> shift, suffix); 2235 2236 bzero(&sd, sizeof (sd)); 2237 if (mdb_pwalk("metaslab", space_cb, &sd, addr) != 0) { 2238 mdb_warn("can't walk metaslabs"); 2239 return (DCMD_ERR); 2240 } 2241 2242 mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n", 2243 sd.ms_allocating[0] >> shift, suffix, 2244 sd.ms_allocating[1] >> shift, suffix, 2245 sd.ms_allocating[2] >> shift, suffix, 2246 sd.ms_allocating[3] >> shift, suffix); 2247 mdb_printf("ms_checkpointing = %llu%s\n", 2248 sd.ms_checkpointing >> shift, suffix); 2249 mdb_printf("ms_freeing = %llu%s\n", 2250 sd.ms_freeing >> shift, suffix); 2251 mdb_printf("ms_freed = %llu%s\n", 2252 sd.ms_freed >> shift, suffix); 2253 mdb_printf("ms_allocatable = %llu%s\n", 2254 sd.ms_allocatable >> shift, suffix); 2255 mdb_printf("ms_deferspace = %llu%s\n", 2256 sd.ms_deferspace >> shift, suffix); 2257 mdb_printf("current syncing avail = %llu%s\n", 2258 sd.nowavail >> shift, suffix); 2259 2260 return (DCMD_OK); 2261 } 2262 2263 typedef struct mdb_spa_aux_vdev { 2264 int sav_count; 2265 uintptr_t sav_vdevs; 2266 } mdb_spa_aux_vdev_t; 2267 2268 typedef struct mdb_spa_vdevs { 2269 uintptr_t spa_root_vdev; 2270 mdb_spa_aux_vdev_t spa_l2cache; 2271 mdb_spa_aux_vdev_t spa_spares; 2272 } mdb_spa_vdevs_t; 2273 2274 static int 2275 spa_print_aux(mdb_spa_aux_vdev_t *sav, uint_t flags, mdb_arg_t *v, 2276 const char *name) 2277 { 2278 uintptr_t *aux; 2279 size_t len; 2280 int ret, i; 2281 2282 /* 2283 * Iterate over aux vdevs and print those out as well. This is a 2284 * little annoying because we don't have a root vdev to pass to ::vdev. 2285 * Instead, we print a single line and then call it for each child 2286 * vdev. 2287 */ 2288 if (sav->sav_count != 0) { 2289 v[1].a_type = MDB_TYPE_STRING; 2290 v[1].a_un.a_str = "-d"; 2291 v[2].a_type = MDB_TYPE_IMMEDIATE; 2292 v[2].a_un.a_val = 2; 2293 2294 len = sav->sav_count * sizeof (uintptr_t); 2295 aux = mdb_alloc(len, UM_SLEEP); 2296 if (mdb_vread(aux, len, sav->sav_vdevs) == -1) { 2297 mdb_free(aux, len); 2298 mdb_warn("failed to read l2cache vdevs at %p", 2299 sav->sav_vdevs); 2300 return (DCMD_ERR); 2301 } 2302 2303 mdb_printf("%-?s %-9s %-12s %s\n", "-", "-", "-", name); 2304 2305 for (i = 0; i < sav->sav_count; i++) { 2306 ret = mdb_call_dcmd("vdev", aux[i], flags, 3, v); 2307 if (ret != DCMD_OK) { 2308 mdb_free(aux, len); 2309 return (ret); 2310 } 2311 } 2312 2313 mdb_free(aux, len); 2314 } 2315 2316 return (0); 2317 } 2318 2319 /* 2320 * ::spa_vdevs 2321 * 2322 * -e Include error stats 2323 * -m Include metaslab information 2324 * -M Include metaslab group information 2325 * -h Include histogram information (requires -m or -M) 2326 * 2327 * Print out a summarized list of vdevs for the given spa_t. 2328 * This is accomplished by invoking "::vdev -re" on the root vdev, as well as 2329 * iterating over the cache devices. 2330 */ 2331 /* ARGSUSED */ 2332 static int 2333 spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2334 { 2335 mdb_arg_t v[3]; 2336 int ret; 2337 char opts[100] = "-r"; 2338 int spa_flags = 0; 2339 2340 if (mdb_getopts(argc, argv, 2341 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags, 2342 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags, 2343 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags, 2344 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags, 2345 NULL) != argc) 2346 return (DCMD_USAGE); 2347 2348 if (!(flags & DCMD_ADDRSPEC)) 2349 return (DCMD_USAGE); 2350 2351 mdb_spa_vdevs_t spa; 2352 if (mdb_ctf_vread(&spa, "spa_t", "mdb_spa_vdevs_t", addr, 0) == -1) 2353 return (DCMD_ERR); 2354 2355 /* 2356 * Unitialized spa_t structures can have a NULL root vdev. 2357 */ 2358 if (spa.spa_root_vdev == 0) { 2359 mdb_printf("no associated vdevs\n"); 2360 return (DCMD_OK); 2361 } 2362 2363 if (spa_flags & SPA_FLAG_ERRORS) 2364 strcat(opts, "e"); 2365 if (spa_flags & SPA_FLAG_METASLABS) 2366 strcat(opts, "m"); 2367 if (spa_flags & SPA_FLAG_METASLAB_GROUPS) 2368 strcat(opts, "M"); 2369 if (spa_flags & SPA_FLAG_HISTOGRAMS) 2370 strcat(opts, "h"); 2371 2372 v[0].a_type = MDB_TYPE_STRING; 2373 v[0].a_un.a_str = opts; 2374 2375 ret = mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev, 2376 flags, 1, v); 2377 if (ret != DCMD_OK) 2378 return (ret); 2379 2380 if (spa_print_aux(&spa.spa_l2cache, flags, v, "cache") != 0 || 2381 spa_print_aux(&spa.spa_spares, flags, v, "spares") != 0) 2382 return (DCMD_ERR); 2383 2384 return (DCMD_OK); 2385 } 2386 2387 /* 2388 * ::zio 2389 * 2390 * Print a summary of zio_t and all its children. This is intended to display a 2391 * zio tree, and hence we only pick the most important pieces of information for 2392 * the main summary. More detailed information can always be found by doing a 2393 * '::print zio' on the underlying zio_t. The columns we display are: 2394 * 2395 * ADDRESS TYPE STAGE WAITER TIME_ELAPSED 2396 * 2397 * The 'address' column is indented by one space for each depth level as we 2398 * descend down the tree. 2399 */ 2400 2401 #define ZIO_MAXINDENT 7 2402 #define ZIO_MAXWIDTH (sizeof (uintptr_t) * 2 + ZIO_MAXINDENT) 2403 #define ZIO_WALK_SELF 0 2404 #define ZIO_WALK_CHILD 1 2405 #define ZIO_WALK_PARENT 2 2406 2407 typedef struct zio_print_args { 2408 int zpa_current_depth; 2409 int zpa_min_depth; 2410 int zpa_max_depth; 2411 int zpa_type; 2412 uint_t zpa_flags; 2413 } zio_print_args_t; 2414 2415 typedef struct mdb_zio { 2416 enum zio_type io_type; 2417 enum zio_stage io_stage; 2418 uintptr_t io_waiter; 2419 uintptr_t io_spa; 2420 struct { 2421 struct { 2422 uintptr_t list_next; 2423 } list_head; 2424 } io_parent_list; 2425 int io_error; 2426 } mdb_zio_t; 2427 2428 typedef struct mdb_zio_timestamp { 2429 hrtime_t io_timestamp; 2430 } mdb_zio_timestamp_t; 2431 2432 static int zio_child_cb(uintptr_t addr, const void *unknown, void *arg); 2433 2434 static int 2435 zio_print_cb(uintptr_t addr, zio_print_args_t *zpa) 2436 { 2437 mdb_ctf_id_t type_enum, stage_enum; 2438 int indent = zpa->zpa_current_depth; 2439 const char *type, *stage; 2440 uintptr_t laddr; 2441 mdb_zio_t zio; 2442 mdb_zio_timestamp_t zio_timestamp = { 0 }; 2443 2444 if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", addr, 0) == -1) 2445 return (WALK_ERR); 2446 (void) mdb_ctf_vread(&zio_timestamp, ZFS_STRUCT "zio", 2447 "mdb_zio_timestamp_t", addr, MDB_CTF_VREAD_QUIET); 2448 2449 if (indent > ZIO_MAXINDENT) 2450 indent = ZIO_MAXINDENT; 2451 2452 if (mdb_ctf_lookup_by_name("enum zio_type", &type_enum) == -1 || 2453 mdb_ctf_lookup_by_name("enum zio_stage", &stage_enum) == -1) { 2454 mdb_warn("failed to lookup zio enums"); 2455 return (WALK_ERR); 2456 } 2457 2458 if ((type = mdb_ctf_enum_name(type_enum, zio.io_type)) != NULL) 2459 type += sizeof ("ZIO_TYPE_") - 1; 2460 else 2461 type = "?"; 2462 2463 if (zio.io_error == 0) { 2464 stage = mdb_ctf_enum_name(stage_enum, zio.io_stage); 2465 if (stage != NULL) 2466 stage += sizeof ("ZIO_STAGE_") - 1; 2467 else 2468 stage = "?"; 2469 } else { 2470 stage = "FAILED"; 2471 } 2472 2473 if (zpa->zpa_current_depth >= zpa->zpa_min_depth) { 2474 if (zpa->zpa_flags & DCMD_PIPE_OUT) { 2475 mdb_printf("%?p\n", addr); 2476 } else { 2477 mdb_printf("%*s%-*p %-5s %-16s ", indent, "", 2478 ZIO_MAXWIDTH - indent, addr, type, stage); 2479 if (zio.io_waiter != 0) 2480 mdb_printf("%-16lx ", zio.io_waiter); 2481 else 2482 mdb_printf("%-16s ", "-"); 2483 #ifdef _KERNEL 2484 if (zio_timestamp.io_timestamp != 0) { 2485 mdb_printf("%llums", (mdb_gethrtime() - 2486 zio_timestamp.io_timestamp) / 2487 1000000); 2488 } else { 2489 mdb_printf("%-12s ", "-"); 2490 } 2491 #else 2492 mdb_printf("%-12s ", "-"); 2493 #endif 2494 mdb_printf("\n"); 2495 } 2496 } 2497 2498 if (zpa->zpa_current_depth >= zpa->zpa_max_depth) 2499 return (WALK_NEXT); 2500 2501 if (zpa->zpa_type == ZIO_WALK_PARENT) 2502 laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", 2503 "io_parent_list"); 2504 else 2505 laddr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", 2506 "io_child_list"); 2507 2508 zpa->zpa_current_depth++; 2509 if (mdb_pwalk("list", zio_child_cb, zpa, laddr) != 0) { 2510 mdb_warn("failed to walk zio_t children at %p\n", laddr); 2511 return (WALK_ERR); 2512 } 2513 zpa->zpa_current_depth--; 2514 2515 return (WALK_NEXT); 2516 } 2517 2518 /* ARGSUSED */ 2519 static int 2520 zio_child_cb(uintptr_t addr, const void *unknown, void *arg) 2521 { 2522 zio_link_t zl; 2523 uintptr_t ziop; 2524 zio_print_args_t *zpa = arg; 2525 2526 if (mdb_vread(&zl, sizeof (zl), addr) == -1) { 2527 mdb_warn("failed to read zio_link_t at %p", addr); 2528 return (WALK_ERR); 2529 } 2530 2531 if (zpa->zpa_type == ZIO_WALK_PARENT) 2532 ziop = (uintptr_t)zl.zl_parent; 2533 else 2534 ziop = (uintptr_t)zl.zl_child; 2535 2536 return (zio_print_cb(ziop, zpa)); 2537 } 2538 2539 /* ARGSUSED */ 2540 static int 2541 zio_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2542 { 2543 zio_print_args_t zpa = { 0 }; 2544 2545 if (!(flags & DCMD_ADDRSPEC)) 2546 return (DCMD_USAGE); 2547 2548 if (mdb_getopts(argc, argv, 2549 'r', MDB_OPT_SETBITS, INT_MAX, &zpa.zpa_max_depth, 2550 'c', MDB_OPT_SETBITS, ZIO_WALK_CHILD, &zpa.zpa_type, 2551 'p', MDB_OPT_SETBITS, ZIO_WALK_PARENT, &zpa.zpa_type, 2552 NULL) != argc) 2553 return (DCMD_USAGE); 2554 2555 zpa.zpa_flags = flags; 2556 if (zpa.zpa_max_depth != 0) { 2557 if (zpa.zpa_type == ZIO_WALK_SELF) 2558 zpa.zpa_type = ZIO_WALK_CHILD; 2559 } else if (zpa.zpa_type != ZIO_WALK_SELF) { 2560 zpa.zpa_min_depth = 1; 2561 zpa.zpa_max_depth = 1; 2562 } 2563 2564 if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) { 2565 mdb_printf("%<u>%-*s %-5s %-16s %-16s %-12s%</u>\n", 2566 ZIO_MAXWIDTH, "ADDRESS", "TYPE", "STAGE", "WAITER", 2567 "TIME_ELAPSED"); 2568 } 2569 2570 if (zio_print_cb(addr, &zpa) != WALK_NEXT) 2571 return (DCMD_ERR); 2572 2573 return (DCMD_OK); 2574 } 2575 2576 /* 2577 * [addr]::zio_state 2578 * 2579 * Print a summary of all zio_t structures on the system, or for a particular 2580 * pool. This is equivalent to '::walk zio_root | ::zio'. 2581 */ 2582 /*ARGSUSED*/ 2583 static int 2584 zio_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2585 { 2586 /* 2587 * MDB will remember the last address of the pipeline, so if we don't 2588 * zero this we'll end up trying to walk zio structures for a 2589 * non-existent spa_t. 2590 */ 2591 if (!(flags & DCMD_ADDRSPEC)) 2592 addr = 0; 2593 2594 return (mdb_pwalk_dcmd("zio_root", "zio", argc, argv, addr)); 2595 } 2596 2597 typedef struct mdb_multilist { 2598 uint64_t ml_num_sublists; 2599 uintptr_t ml_sublists; 2600 } mdb_multilist_t; 2601 2602 typedef struct multilist_walk_data { 2603 uint64_t mwd_idx; 2604 mdb_multilist_t mwd_ml; 2605 } multilist_walk_data_t; 2606 2607 /* ARGSUSED */ 2608 static int 2609 multilist_print_cb(uintptr_t addr, const void *unknown, void *arg) 2610 { 2611 mdb_printf("%#lr\n", addr); 2612 return (WALK_NEXT); 2613 } 2614 2615 static int 2616 multilist_walk_step(mdb_walk_state_t *wsp) 2617 { 2618 multilist_walk_data_t *mwd = wsp->walk_data; 2619 2620 if (mwd->mwd_idx >= mwd->mwd_ml.ml_num_sublists) 2621 return (WALK_DONE); 2622 2623 wsp->walk_addr = mwd->mwd_ml.ml_sublists + 2624 mdb_ctf_sizeof_by_name("multilist_sublist_t") * mwd->mwd_idx + 2625 mdb_ctf_offsetof_by_name("multilist_sublist_t", "mls_list"); 2626 2627 mdb_pwalk("list", multilist_print_cb, (void*)NULL, wsp->walk_addr); 2628 mwd->mwd_idx++; 2629 2630 return (WALK_NEXT); 2631 } 2632 2633 static int 2634 multilist_walk_init(mdb_walk_state_t *wsp) 2635 { 2636 multilist_walk_data_t *mwd; 2637 2638 if (wsp->walk_addr == 0) { 2639 mdb_warn("must supply address of multilist_t\n"); 2640 return (WALK_ERR); 2641 } 2642 2643 mwd = mdb_zalloc(sizeof (multilist_walk_data_t), UM_SLEEP | UM_GC); 2644 if (mdb_ctf_vread(&mwd->mwd_ml, "multilist_t", "mdb_multilist_t", 2645 wsp->walk_addr, 0) == -1) { 2646 return (WALK_ERR); 2647 } 2648 2649 if (mwd->mwd_ml.ml_num_sublists == 0 || 2650 mwd->mwd_ml.ml_sublists == 0) { 2651 mdb_warn("invalid or uninitialized multilist at %#lx\n", 2652 wsp->walk_addr); 2653 return (WALK_ERR); 2654 } 2655 2656 wsp->walk_data = mwd; 2657 return (WALK_NEXT); 2658 } 2659 2660 typedef struct mdb_txg_list { 2661 size_t tl_offset; 2662 uintptr_t tl_head[TXG_SIZE]; 2663 } mdb_txg_list_t; 2664 2665 typedef struct txg_list_walk_data { 2666 uintptr_t lw_head[TXG_SIZE]; 2667 int lw_txgoff; 2668 int lw_maxoff; 2669 size_t lw_offset; 2670 void *lw_obj; 2671 } txg_list_walk_data_t; 2672 2673 static int 2674 txg_list_walk_init_common(mdb_walk_state_t *wsp, int txg, int maxoff) 2675 { 2676 txg_list_walk_data_t *lwd; 2677 mdb_txg_list_t list; 2678 int i; 2679 2680 lwd = mdb_alloc(sizeof (txg_list_walk_data_t), UM_SLEEP | UM_GC); 2681 if (mdb_ctf_vread(&list, "txg_list_t", "mdb_txg_list_t", wsp->walk_addr, 2682 0) == -1) { 2683 mdb_warn("failed to read txg_list_t at %#lx", wsp->walk_addr); 2684 return (WALK_ERR); 2685 } 2686 2687 for (i = 0; i < TXG_SIZE; i++) 2688 lwd->lw_head[i] = list.tl_head[i]; 2689 lwd->lw_offset = list.tl_offset; 2690 lwd->lw_obj = mdb_alloc(lwd->lw_offset + sizeof (txg_node_t), 2691 UM_SLEEP | UM_GC); 2692 lwd->lw_txgoff = txg; 2693 lwd->lw_maxoff = maxoff; 2694 2695 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff]; 2696 wsp->walk_data = lwd; 2697 2698 return (WALK_NEXT); 2699 } 2700 2701 static int 2702 txg_list_walk_init(mdb_walk_state_t *wsp) 2703 { 2704 return (txg_list_walk_init_common(wsp, 0, TXG_SIZE-1)); 2705 } 2706 2707 static int 2708 txg_list0_walk_init(mdb_walk_state_t *wsp) 2709 { 2710 return (txg_list_walk_init_common(wsp, 0, 0)); 2711 } 2712 2713 static int 2714 txg_list1_walk_init(mdb_walk_state_t *wsp) 2715 { 2716 return (txg_list_walk_init_common(wsp, 1, 1)); 2717 } 2718 2719 static int 2720 txg_list2_walk_init(mdb_walk_state_t *wsp) 2721 { 2722 return (txg_list_walk_init_common(wsp, 2, 2)); 2723 } 2724 2725 static int 2726 txg_list3_walk_init(mdb_walk_state_t *wsp) 2727 { 2728 return (txg_list_walk_init_common(wsp, 3, 3)); 2729 } 2730 2731 static int 2732 txg_list_walk_step(mdb_walk_state_t *wsp) 2733 { 2734 txg_list_walk_data_t *lwd = wsp->walk_data; 2735 uintptr_t addr; 2736 txg_node_t *node; 2737 int status; 2738 2739 while (wsp->walk_addr == 0 && lwd->lw_txgoff < lwd->lw_maxoff) { 2740 lwd->lw_txgoff++; 2741 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff]; 2742 } 2743 2744 if (wsp->walk_addr == 0) 2745 return (WALK_DONE); 2746 2747 addr = wsp->walk_addr - lwd->lw_offset; 2748 2749 if (mdb_vread(lwd->lw_obj, 2750 lwd->lw_offset + sizeof (txg_node_t), addr) == -1) { 2751 mdb_warn("failed to read list element at %#lx", addr); 2752 return (WALK_ERR); 2753 } 2754 2755 status = wsp->walk_callback(addr, lwd->lw_obj, wsp->walk_cbdata); 2756 node = (txg_node_t *)((uintptr_t)lwd->lw_obj + lwd->lw_offset); 2757 wsp->walk_addr = (uintptr_t)node->tn_next[lwd->lw_txgoff]; 2758 2759 return (status); 2760 } 2761 2762 /* 2763 * ::walk spa 2764 * 2765 * Walk all named spa_t structures in the namespace. This is nothing more than 2766 * a layered avl walk. 2767 */ 2768 static int 2769 spa_walk_init(mdb_walk_state_t *wsp) 2770 { 2771 GElf_Sym sym; 2772 2773 if (wsp->walk_addr != 0) { 2774 mdb_warn("spa walk only supports global walks\n"); 2775 return (WALK_ERR); 2776 } 2777 2778 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "spa_namespace_avl", &sym) == -1) { 2779 mdb_warn("failed to find symbol 'spa_namespace_avl'"); 2780 return (WALK_ERR); 2781 } 2782 2783 wsp->walk_addr = (uintptr_t)sym.st_value; 2784 2785 if (mdb_layered_walk("avl", wsp) == -1) { 2786 mdb_warn("failed to walk 'avl'\n"); 2787 return (WALK_ERR); 2788 } 2789 2790 return (WALK_NEXT); 2791 } 2792 2793 static int 2794 spa_walk_step(mdb_walk_state_t *wsp) 2795 { 2796 return (wsp->walk_callback(wsp->walk_addr, NULL, wsp->walk_cbdata)); 2797 } 2798 2799 /* 2800 * [addr]::walk zio 2801 * 2802 * Walk all active zio_t structures on the system. This is simply a layered 2803 * walk on top of ::walk zio_cache, with the optional ability to limit the 2804 * structures to a particular pool. 2805 */ 2806 static int 2807 zio_walk_init(mdb_walk_state_t *wsp) 2808 { 2809 wsp->walk_data = (void *)wsp->walk_addr; 2810 2811 if (mdb_layered_walk("zio_cache", wsp) == -1) { 2812 mdb_warn("failed to walk 'zio_cache'\n"); 2813 return (WALK_ERR); 2814 } 2815 2816 return (WALK_NEXT); 2817 } 2818 2819 static int 2820 zio_walk_step(mdb_walk_state_t *wsp) 2821 { 2822 mdb_zio_t zio; 2823 uintptr_t spa = (uintptr_t)wsp->walk_data; 2824 2825 if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", 2826 wsp->walk_addr, 0) == -1) 2827 return (WALK_ERR); 2828 2829 if (spa != 0 && spa != zio.io_spa) 2830 return (WALK_NEXT); 2831 2832 return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata)); 2833 } 2834 2835 /* 2836 * [addr]::walk zio_root 2837 * 2838 * Walk only root zio_t structures, optionally for a particular spa_t. 2839 */ 2840 static int 2841 zio_walk_root_step(mdb_walk_state_t *wsp) 2842 { 2843 mdb_zio_t zio; 2844 uintptr_t spa = (uintptr_t)wsp->walk_data; 2845 2846 if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", 2847 wsp->walk_addr, 0) == -1) 2848 return (WALK_ERR); 2849 2850 if (spa != 0 && spa != zio.io_spa) 2851 return (WALK_NEXT); 2852 2853 /* If the parent list is not empty, ignore */ 2854 if (zio.io_parent_list.list_head.list_next != 2855 wsp->walk_addr + 2856 mdb_ctf_offsetof_by_name(ZFS_STRUCT "zio", "io_parent_list") + 2857 mdb_ctf_offsetof_by_name("struct list", "list_head")) 2858 return (WALK_NEXT); 2859 2860 return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata)); 2861 } 2862 2863 /* 2864 * ::zfs_blkstats 2865 * 2866 * -v print verbose per-level information 2867 * 2868 */ 2869 static int 2870 zfs_blkstats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2871 { 2872 boolean_t verbose = B_FALSE; 2873 zfs_all_blkstats_t stats; 2874 dmu_object_type_t t; 2875 zfs_blkstat_t *tzb; 2876 uint64_t ditto; 2877 2878 if (mdb_getopts(argc, argv, 2879 'v', MDB_OPT_SETBITS, TRUE, &verbose, 2880 NULL) != argc) 2881 return (DCMD_USAGE); 2882 2883 if (!(flags & DCMD_ADDRSPEC)) 2884 return (DCMD_USAGE); 2885 2886 if (GETMEMB(addr, "spa", spa_dsl_pool, addr) || 2887 GETMEMB(addr, "dsl_pool", dp_blkstats, addr) || 2888 mdb_vread(&stats, sizeof (zfs_all_blkstats_t), addr) == -1) { 2889 mdb_warn("failed to read data at %p;", addr); 2890 mdb_printf("maybe no stats? run \"zpool scrub\" first."); 2891 return (DCMD_ERR); 2892 } 2893 2894 tzb = &stats.zab_type[DN_MAX_LEVELS][DMU_OT_TOTAL]; 2895 if (tzb->zb_gangs != 0) { 2896 mdb_printf("Ganged blocks: %llu\n", 2897 (longlong_t)tzb->zb_gangs); 2898 } 2899 2900 ditto = tzb->zb_ditto_2_of_2_samevdev + tzb->zb_ditto_2_of_3_samevdev + 2901 tzb->zb_ditto_3_of_3_samevdev; 2902 if (ditto != 0) { 2903 mdb_printf("Dittoed blocks on same vdev: %llu\n", 2904 (longlong_t)ditto); 2905 } 2906 2907 mdb_printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" 2908 "\t avg\t comp\t%%Total\tType\n"); 2909 2910 for (t = 0; t <= DMU_OT_TOTAL; t++) { 2911 char csize[MDB_NICENUM_BUFLEN], lsize[MDB_NICENUM_BUFLEN]; 2912 char psize[MDB_NICENUM_BUFLEN], asize[MDB_NICENUM_BUFLEN]; 2913 char avg[MDB_NICENUM_BUFLEN]; 2914 char comp[MDB_NICENUM_BUFLEN], pct[MDB_NICENUM_BUFLEN]; 2915 char typename[64]; 2916 int l; 2917 2918 2919 if (t == DMU_OT_DEFERRED) 2920 strcpy(typename, "deferred free"); 2921 else if (t == DMU_OT_OTHER) 2922 strcpy(typename, "other"); 2923 else if (t == DMU_OT_TOTAL) 2924 strcpy(typename, "Total"); 2925 else if (enum_lookup("enum dmu_object_type", 2926 t, "DMU_OT_", sizeof (typename), typename) == -1) { 2927 mdb_warn("failed to read type name"); 2928 return (DCMD_ERR); 2929 } 2930 2931 if (stats.zab_type[DN_MAX_LEVELS][t].zb_asize == 0) 2932 continue; 2933 2934 for (l = -1; l < DN_MAX_LEVELS; l++) { 2935 int level = (l == -1 ? DN_MAX_LEVELS : l); 2936 zfs_blkstat_t *zb = &stats.zab_type[level][t]; 2937 2938 if (zb->zb_asize == 0) 2939 continue; 2940 2941 /* 2942 * Don't print each level unless requested. 2943 */ 2944 if (!verbose && level != DN_MAX_LEVELS) 2945 continue; 2946 2947 /* 2948 * If all the space is level 0, don't print the 2949 * level 0 separately. 2950 */ 2951 if (level == 0 && zb->zb_asize == 2952 stats.zab_type[DN_MAX_LEVELS][t].zb_asize) 2953 continue; 2954 2955 mdb_nicenum(zb->zb_count, csize); 2956 mdb_nicenum(zb->zb_lsize, lsize); 2957 mdb_nicenum(zb->zb_psize, psize); 2958 mdb_nicenum(zb->zb_asize, asize); 2959 mdb_nicenum(zb->zb_asize / zb->zb_count, avg); 2960 (void) mdb_snprintfrac(comp, MDB_NICENUM_BUFLEN, 2961 zb->zb_lsize, zb->zb_psize, 2); 2962 (void) mdb_snprintfrac(pct, MDB_NICENUM_BUFLEN, 2963 100 * zb->zb_asize, tzb->zb_asize, 2); 2964 2965 mdb_printf("%6s\t%5s\t%5s\t%5s\t%5s" 2966 "\t%5s\t%6s\t", 2967 csize, lsize, psize, asize, avg, comp, pct); 2968 2969 if (level == DN_MAX_LEVELS) 2970 mdb_printf("%s\n", typename); 2971 else 2972 mdb_printf(" L%d %s\n", 2973 level, typename); 2974 } 2975 } 2976 2977 return (DCMD_OK); 2978 } 2979 2980 typedef struct mdb_reference { 2981 uintptr_t ref_holder; 2982 uintptr_t ref_removed; 2983 uint64_t ref_number; 2984 } mdb_reference_t; 2985 2986 /* ARGSUSED */ 2987 static int 2988 reference_cb(uintptr_t addr, const void *ignored, void *arg) 2989 { 2990 mdb_reference_t ref; 2991 boolean_t holder_is_str = B_FALSE; 2992 char holder_str[128]; 2993 boolean_t removed = (boolean_t)arg; 2994 2995 if (mdb_ctf_vread(&ref, "reference_t", "mdb_reference_t", addr, 2996 0) == -1) 2997 return (DCMD_ERR); 2998 2999 if (mdb_readstr(holder_str, sizeof (holder_str), 3000 ref.ref_holder) != -1) 3001 holder_is_str = strisprint(holder_str); 3002 3003 if (removed) 3004 mdb_printf("removed "); 3005 mdb_printf("reference "); 3006 if (ref.ref_number != 1) 3007 mdb_printf("with count=%llu ", ref.ref_number); 3008 mdb_printf("with tag %lx", ref.ref_holder); 3009 if (holder_is_str) 3010 mdb_printf(" \"%s\"", holder_str); 3011 mdb_printf(", held at:\n"); 3012 3013 (void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL); 3014 3015 if (removed) { 3016 mdb_printf("removed at:\n"); 3017 (void) mdb_call_dcmd("whatis", ref.ref_removed, 3018 DCMD_ADDRSPEC, 0, NULL); 3019 } 3020 3021 mdb_printf("\n"); 3022 3023 return (WALK_NEXT); 3024 } 3025 3026 typedef struct mdb_refcount { 3027 uint64_t rc_count; 3028 } mdb_refcount_t; 3029 3030 typedef struct mdb_refcount_removed { 3031 uint64_t rc_removed_count; 3032 } mdb_refcount_removed_t; 3033 3034 typedef struct mdb_refcount_tracked { 3035 boolean_t rc_tracked; 3036 } mdb_refcount_tracked_t; 3037 3038 /* ARGSUSED */ 3039 static int 3040 refcount(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3041 { 3042 mdb_refcount_t rc; 3043 mdb_refcount_removed_t rcr; 3044 mdb_refcount_tracked_t rct; 3045 int off; 3046 boolean_t released = B_FALSE; 3047 3048 if (!(flags & DCMD_ADDRSPEC)) 3049 return (DCMD_USAGE); 3050 3051 if (mdb_getopts(argc, argv, 3052 'r', MDB_OPT_SETBITS, B_TRUE, &released, 3053 NULL) != argc) 3054 return (DCMD_USAGE); 3055 3056 if (mdb_ctf_vread(&rc, "refcount_t", "mdb_refcount_t", addr, 3057 0) == -1) 3058 return (DCMD_ERR); 3059 3060 if (mdb_ctf_vread(&rcr, "refcount_t", "mdb_refcount_removed_t", addr, 3061 MDB_CTF_VREAD_QUIET) == -1) { 3062 mdb_printf("refcount_t at %p has %llu holds (untracked)\n", 3063 addr, (longlong_t)rc.rc_count); 3064 return (DCMD_OK); 3065 } 3066 3067 if (mdb_ctf_vread(&rct, "refcount_t", "mdb_refcount_tracked_t", addr, 3068 MDB_CTF_VREAD_QUIET) == -1) { 3069 /* If this is an old target, it might be tracked. */ 3070 rct.rc_tracked = B_TRUE; 3071 } 3072 3073 mdb_printf("refcount_t at %p has %llu current holds, " 3074 "%llu recently released holds\n", 3075 addr, (longlong_t)rc.rc_count, (longlong_t)rcr.rc_removed_count); 3076 3077 if (rct.rc_tracked && rc.rc_count > 0) 3078 mdb_printf("current holds:\n"); 3079 off = mdb_ctf_offsetof_by_name("refcount_t", "rc_list"); 3080 if (off == -1) 3081 return (DCMD_ERR); 3082 mdb_pwalk("list", reference_cb, (void*)B_FALSE, addr + off); 3083 3084 if (released && rcr.rc_removed_count > 0) { 3085 mdb_printf("released holds:\n"); 3086 3087 off = mdb_ctf_offsetof_by_name("refcount_t", "rc_removed"); 3088 if (off == -1) 3089 return (DCMD_ERR); 3090 mdb_pwalk("list", reference_cb, (void*)B_TRUE, addr + off); 3091 } 3092 3093 return (DCMD_OK); 3094 } 3095 3096 /* ARGSUSED */ 3097 static int 3098 sa_attr_table(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3099 { 3100 sa_attr_table_t *table; 3101 sa_os_t sa_os; 3102 char *name; 3103 int i; 3104 3105 if (mdb_vread(&sa_os, sizeof (sa_os_t), addr) == -1) { 3106 mdb_warn("failed to read sa_os at %p", addr); 3107 return (DCMD_ERR); 3108 } 3109 3110 table = mdb_alloc(sizeof (sa_attr_table_t) * sa_os.sa_num_attrs, 3111 UM_SLEEP | UM_GC); 3112 name = mdb_alloc(MAXPATHLEN, UM_SLEEP | UM_GC); 3113 3114 if (mdb_vread(table, sizeof (sa_attr_table_t) * sa_os.sa_num_attrs, 3115 (uintptr_t)sa_os.sa_attr_table) == -1) { 3116 mdb_warn("failed to read sa_os at %p", addr); 3117 return (DCMD_ERR); 3118 } 3119 3120 mdb_printf("%<u>%-10s %-10s %-10s %-10s %s%</u>\n", 3121 "ATTR ID", "REGISTERED", "LENGTH", "BSWAP", "NAME"); 3122 for (i = 0; i != sa_os.sa_num_attrs; i++) { 3123 mdb_readstr(name, MAXPATHLEN, (uintptr_t)table[i].sa_name); 3124 mdb_printf("%5x %8x %8x %8x %-s\n", 3125 (int)table[i].sa_attr, (int)table[i].sa_registered, 3126 (int)table[i].sa_length, table[i].sa_byteswap, name); 3127 } 3128 3129 return (DCMD_OK); 3130 } 3131 3132 static int 3133 sa_get_off_table(uintptr_t addr, uint32_t **off_tab, int attr_count) 3134 { 3135 uintptr_t idx_table; 3136 3137 if (GETMEMB(addr, "sa_idx_tab", sa_idx_tab, idx_table)) { 3138 mdb_printf("can't find offset table in sa_idx_tab\n"); 3139 return (-1); 3140 } 3141 3142 *off_tab = mdb_alloc(attr_count * sizeof (uint32_t), 3143 UM_SLEEP | UM_GC); 3144 3145 if (mdb_vread(*off_tab, 3146 attr_count * sizeof (uint32_t), idx_table) == -1) { 3147 mdb_warn("failed to attribute offset table %p", idx_table); 3148 return (-1); 3149 } 3150 3151 return (DCMD_OK); 3152 } 3153 3154 /*ARGSUSED*/ 3155 static int 3156 sa_attr_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3157 { 3158 uint32_t *offset_tab; 3159 int attr_count; 3160 uint64_t attr_id; 3161 uintptr_t attr_addr; 3162 uintptr_t bonus_tab, spill_tab; 3163 uintptr_t db_bonus, db_spill; 3164 uintptr_t os, os_sa; 3165 uintptr_t db_data; 3166 3167 if (argc != 1) 3168 return (DCMD_USAGE); 3169 3170 if (argv[0].a_type == MDB_TYPE_STRING) 3171 attr_id = mdb_strtoull(argv[0].a_un.a_str); 3172 else 3173 return (DCMD_USAGE); 3174 3175 if (GETMEMB(addr, "sa_handle", sa_bonus_tab, bonus_tab) || 3176 GETMEMB(addr, "sa_handle", sa_spill_tab, spill_tab) || 3177 GETMEMB(addr, "sa_handle", sa_os, os) || 3178 GETMEMB(addr, "sa_handle", sa_bonus, db_bonus) || 3179 GETMEMB(addr, "sa_handle", sa_spill, db_spill)) { 3180 mdb_printf("Can't find necessary information in sa_handle " 3181 "in sa_handle\n"); 3182 return (DCMD_ERR); 3183 } 3184 3185 if (GETMEMB(os, "objset", os_sa, os_sa)) { 3186 mdb_printf("Can't find os_sa in objset\n"); 3187 return (DCMD_ERR); 3188 } 3189 3190 if (GETMEMB(os_sa, "sa_os", sa_num_attrs, attr_count)) { 3191 mdb_printf("Can't find sa_num_attrs\n"); 3192 return (DCMD_ERR); 3193 } 3194 3195 if (attr_id > attr_count) { 3196 mdb_printf("attribute id number is out of range\n"); 3197 return (DCMD_ERR); 3198 } 3199 3200 if (bonus_tab) { 3201 if (sa_get_off_table(bonus_tab, &offset_tab, 3202 attr_count) == -1) { 3203 return (DCMD_ERR); 3204 } 3205 3206 if (GETMEMB(db_bonus, "dmu_buf", db_data, db_data)) { 3207 mdb_printf("can't find db_data in bonus dbuf\n"); 3208 return (DCMD_ERR); 3209 } 3210 } 3211 3212 if (bonus_tab && !TOC_ATTR_PRESENT(offset_tab[attr_id]) && 3213 spill_tab == 0) { 3214 mdb_printf("Attribute does not exist\n"); 3215 return (DCMD_ERR); 3216 } else if (!TOC_ATTR_PRESENT(offset_tab[attr_id]) && spill_tab) { 3217 if (sa_get_off_table(spill_tab, &offset_tab, 3218 attr_count) == -1) { 3219 return (DCMD_ERR); 3220 } 3221 if (GETMEMB(db_spill, "dmu_buf", db_data, db_data)) { 3222 mdb_printf("can't find db_data in spill dbuf\n"); 3223 return (DCMD_ERR); 3224 } 3225 if (!TOC_ATTR_PRESENT(offset_tab[attr_id])) { 3226 mdb_printf("Attribute does not exist\n"); 3227 return (DCMD_ERR); 3228 } 3229 } 3230 attr_addr = db_data + TOC_OFF(offset_tab[attr_id]); 3231 mdb_printf("%p\n", attr_addr); 3232 return (DCMD_OK); 3233 } 3234 3235 /* ARGSUSED */ 3236 static int 3237 zfs_ace_print_common(uintptr_t addr, uint_t flags, 3238 uint64_t id, uint32_t access_mask, uint16_t ace_flags, 3239 uint16_t ace_type, int verbose) 3240 { 3241 if (DCMD_HDRSPEC(flags) && !verbose) 3242 mdb_printf("%<u>%-?s %-8s %-8s %-8s %s%</u>\n", 3243 "ADDR", "FLAGS", "MASK", "TYPE", "ID"); 3244 3245 if (!verbose) { 3246 mdb_printf("%0?p %-8x %-8x %-8x %-llx\n", addr, 3247 ace_flags, access_mask, ace_type, id); 3248 return (DCMD_OK); 3249 } 3250 3251 switch (ace_flags & ACE_TYPE_FLAGS) { 3252 case ACE_OWNER: 3253 mdb_printf("owner@:"); 3254 break; 3255 case (ACE_IDENTIFIER_GROUP | ACE_GROUP): 3256 mdb_printf("group@:"); 3257 break; 3258 case ACE_EVERYONE: 3259 mdb_printf("everyone@:"); 3260 break; 3261 case ACE_IDENTIFIER_GROUP: 3262 mdb_printf("group:%llx:", (u_longlong_t)id); 3263 break; 3264 case 0: /* User entry */ 3265 mdb_printf("user:%llx:", (u_longlong_t)id); 3266 break; 3267 } 3268 3269 /* print out permission mask */ 3270 if (access_mask & ACE_READ_DATA) 3271 mdb_printf("r"); 3272 else 3273 mdb_printf("-"); 3274 if (access_mask & ACE_WRITE_DATA) 3275 mdb_printf("w"); 3276 else 3277 mdb_printf("-"); 3278 if (access_mask & ACE_EXECUTE) 3279 mdb_printf("x"); 3280 else 3281 mdb_printf("-"); 3282 if (access_mask & ACE_APPEND_DATA) 3283 mdb_printf("p"); 3284 else 3285 mdb_printf("-"); 3286 if (access_mask & ACE_DELETE) 3287 mdb_printf("d"); 3288 else 3289 mdb_printf("-"); 3290 if (access_mask & ACE_DELETE_CHILD) 3291 mdb_printf("D"); 3292 else 3293 mdb_printf("-"); 3294 if (access_mask & ACE_READ_ATTRIBUTES) 3295 mdb_printf("a"); 3296 else 3297 mdb_printf("-"); 3298 if (access_mask & ACE_WRITE_ATTRIBUTES) 3299 mdb_printf("A"); 3300 else 3301 mdb_printf("-"); 3302 if (access_mask & ACE_READ_NAMED_ATTRS) 3303 mdb_printf("R"); 3304 else 3305 mdb_printf("-"); 3306 if (access_mask & ACE_WRITE_NAMED_ATTRS) 3307 mdb_printf("W"); 3308 else 3309 mdb_printf("-"); 3310 if (access_mask & ACE_READ_ACL) 3311 mdb_printf("c"); 3312 else 3313 mdb_printf("-"); 3314 if (access_mask & ACE_WRITE_ACL) 3315 mdb_printf("C"); 3316 else 3317 mdb_printf("-"); 3318 if (access_mask & ACE_WRITE_OWNER) 3319 mdb_printf("o"); 3320 else 3321 mdb_printf("-"); 3322 if (access_mask & ACE_SYNCHRONIZE) 3323 mdb_printf("s"); 3324 else 3325 mdb_printf("-"); 3326 3327 mdb_printf(":"); 3328 3329 /* Print out inheritance flags */ 3330 if (ace_flags & ACE_FILE_INHERIT_ACE) 3331 mdb_printf("f"); 3332 else 3333 mdb_printf("-"); 3334 if (ace_flags & ACE_DIRECTORY_INHERIT_ACE) 3335 mdb_printf("d"); 3336 else 3337 mdb_printf("-"); 3338 if (ace_flags & ACE_INHERIT_ONLY_ACE) 3339 mdb_printf("i"); 3340 else 3341 mdb_printf("-"); 3342 if (ace_flags & ACE_NO_PROPAGATE_INHERIT_ACE) 3343 mdb_printf("n"); 3344 else 3345 mdb_printf("-"); 3346 if (ace_flags & ACE_SUCCESSFUL_ACCESS_ACE_FLAG) 3347 mdb_printf("S"); 3348 else 3349 mdb_printf("-"); 3350 if (ace_flags & ACE_FAILED_ACCESS_ACE_FLAG) 3351 mdb_printf("F"); 3352 else 3353 mdb_printf("-"); 3354 if (ace_flags & ACE_INHERITED_ACE) 3355 mdb_printf("I"); 3356 else 3357 mdb_printf("-"); 3358 3359 switch (ace_type) { 3360 case ACE_ACCESS_ALLOWED_ACE_TYPE: 3361 mdb_printf(":allow\n"); 3362 break; 3363 case ACE_ACCESS_DENIED_ACE_TYPE: 3364 mdb_printf(":deny\n"); 3365 break; 3366 case ACE_SYSTEM_AUDIT_ACE_TYPE: 3367 mdb_printf(":audit\n"); 3368 break; 3369 case ACE_SYSTEM_ALARM_ACE_TYPE: 3370 mdb_printf(":alarm\n"); 3371 break; 3372 default: 3373 mdb_printf(":?\n"); 3374 } 3375 return (DCMD_OK); 3376 } 3377 3378 /* ARGSUSED */ 3379 static int 3380 zfs_ace_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3381 { 3382 zfs_ace_t zace; 3383 int verbose = FALSE; 3384 uint64_t id; 3385 3386 if (!(flags & DCMD_ADDRSPEC)) 3387 return (DCMD_USAGE); 3388 3389 if (mdb_getopts(argc, argv, 3390 'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc) 3391 return (DCMD_USAGE); 3392 3393 if (mdb_vread(&zace, sizeof (zfs_ace_t), addr) == -1) { 3394 mdb_warn("failed to read zfs_ace_t"); 3395 return (DCMD_ERR); 3396 } 3397 3398 if ((zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == 0 || 3399 (zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP) 3400 id = zace.z_fuid; 3401 else 3402 id = -1; 3403 3404 return (zfs_ace_print_common(addr, flags, id, zace.z_hdr.z_access_mask, 3405 zace.z_hdr.z_flags, zace.z_hdr.z_type, verbose)); 3406 } 3407 3408 /* ARGSUSED */ 3409 static int 3410 zfs_ace0_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3411 { 3412 ace_t ace; 3413 uint64_t id; 3414 int verbose = FALSE; 3415 3416 if (!(flags & DCMD_ADDRSPEC)) 3417 return (DCMD_USAGE); 3418 3419 if (mdb_getopts(argc, argv, 3420 'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc) 3421 return (DCMD_USAGE); 3422 3423 if (mdb_vread(&ace, sizeof (ace_t), addr) == -1) { 3424 mdb_warn("failed to read ace_t"); 3425 return (DCMD_ERR); 3426 } 3427 3428 if ((ace.a_flags & ACE_TYPE_FLAGS) == 0 || 3429 (ace.a_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP) 3430 id = ace.a_who; 3431 else 3432 id = -1; 3433 3434 return (zfs_ace_print_common(addr, flags, id, ace.a_access_mask, 3435 ace.a_flags, ace.a_type, verbose)); 3436 } 3437 3438 typedef struct acl_dump_args { 3439 int a_argc; 3440 const mdb_arg_t *a_argv; 3441 uint16_t a_version; 3442 int a_flags; 3443 } acl_dump_args_t; 3444 3445 /* ARGSUSED */ 3446 static int 3447 acl_aces_cb(uintptr_t addr, const void *unknown, void *arg) 3448 { 3449 acl_dump_args_t *acl_args = (acl_dump_args_t *)arg; 3450 3451 if (acl_args->a_version == 1) { 3452 if (mdb_call_dcmd("zfs_ace", addr, 3453 DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc, 3454 acl_args->a_argv) != DCMD_OK) { 3455 return (WALK_ERR); 3456 } 3457 } else { 3458 if (mdb_call_dcmd("zfs_ace0", addr, 3459 DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc, 3460 acl_args->a_argv) != DCMD_OK) { 3461 return (WALK_ERR); 3462 } 3463 } 3464 acl_args->a_flags = DCMD_LOOP; 3465 return (WALK_NEXT); 3466 } 3467 3468 /* ARGSUSED */ 3469 static int 3470 acl_cb(uintptr_t addr, const void *unknown, void *arg) 3471 { 3472 acl_dump_args_t *acl_args = (acl_dump_args_t *)arg; 3473 3474 if (acl_args->a_version == 1) { 3475 if (mdb_pwalk("zfs_acl_node_aces", acl_aces_cb, 3476 arg, addr) != 0) { 3477 mdb_warn("can't walk ACEs"); 3478 return (DCMD_ERR); 3479 } 3480 } else { 3481 if (mdb_pwalk("zfs_acl_node_aces0", acl_aces_cb, 3482 arg, addr) != 0) { 3483 mdb_warn("can't walk ACEs"); 3484 return (DCMD_ERR); 3485 } 3486 } 3487 return (WALK_NEXT); 3488 } 3489 3490 /* ARGSUSED */ 3491 static int 3492 zfs_acl_dump(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3493 { 3494 zfs_acl_t zacl; 3495 int verbose = FALSE; 3496 acl_dump_args_t acl_args; 3497 3498 if (!(flags & DCMD_ADDRSPEC)) 3499 return (DCMD_USAGE); 3500 3501 if (mdb_getopts(argc, argv, 3502 'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc) 3503 return (DCMD_USAGE); 3504 3505 if (mdb_vread(&zacl, sizeof (zfs_acl_t), addr) == -1) { 3506 mdb_warn("failed to read zfs_acl_t"); 3507 return (DCMD_ERR); 3508 } 3509 3510 acl_args.a_argc = argc; 3511 acl_args.a_argv = argv; 3512 acl_args.a_version = zacl.z_version; 3513 acl_args.a_flags = DCMD_LOOPFIRST; 3514 3515 if (mdb_pwalk("zfs_acl_node", acl_cb, &acl_args, addr) != 0) { 3516 mdb_warn("can't walk ACL"); 3517 return (DCMD_ERR); 3518 } 3519 3520 return (DCMD_OK); 3521 } 3522 3523 /* ARGSUSED */ 3524 static int 3525 zfs_acl_node_walk_init(mdb_walk_state_t *wsp) 3526 { 3527 if (wsp->walk_addr == 0) { 3528 mdb_warn("must supply address of zfs_acl_node_t\n"); 3529 return (WALK_ERR); 3530 } 3531 3532 wsp->walk_addr += 3533 mdb_ctf_offsetof_by_name(ZFS_STRUCT "zfs_acl", "z_acl"); 3534 3535 if (mdb_layered_walk("list", wsp) == -1) { 3536 mdb_warn("failed to walk 'list'\n"); 3537 return (WALK_ERR); 3538 } 3539 3540 return (WALK_NEXT); 3541 } 3542 3543 static int 3544 zfs_acl_node_walk_step(mdb_walk_state_t *wsp) 3545 { 3546 zfs_acl_node_t aclnode; 3547 3548 if (mdb_vread(&aclnode, sizeof (zfs_acl_node_t), 3549 wsp->walk_addr) == -1) { 3550 mdb_warn("failed to read zfs_acl_node at %p", wsp->walk_addr); 3551 return (WALK_ERR); 3552 } 3553 3554 return (wsp->walk_callback(wsp->walk_addr, &aclnode, wsp->walk_cbdata)); 3555 } 3556 3557 typedef struct ace_walk_data { 3558 int ace_count; 3559 int ace_version; 3560 } ace_walk_data_t; 3561 3562 static int 3563 zfs_aces_walk_init_common(mdb_walk_state_t *wsp, int version, 3564 int ace_count, uintptr_t ace_data) 3565 { 3566 ace_walk_data_t *ace_walk_data; 3567 3568 if (wsp->walk_addr == 0) { 3569 mdb_warn("must supply address of zfs_acl_node_t\n"); 3570 return (WALK_ERR); 3571 } 3572 3573 ace_walk_data = mdb_alloc(sizeof (ace_walk_data_t), UM_SLEEP | UM_GC); 3574 3575 ace_walk_data->ace_count = ace_count; 3576 ace_walk_data->ace_version = version; 3577 3578 wsp->walk_addr = ace_data; 3579 wsp->walk_data = ace_walk_data; 3580 3581 return (WALK_NEXT); 3582 } 3583 3584 static int 3585 zfs_acl_node_aces_walk_init_common(mdb_walk_state_t *wsp, int version) 3586 { 3587 static int gotid; 3588 static mdb_ctf_id_t acl_id; 3589 int z_ace_count; 3590 uintptr_t z_acldata; 3591 3592 if (!gotid) { 3593 if (mdb_ctf_lookup_by_name("struct zfs_acl_node", 3594 &acl_id) == -1) { 3595 mdb_warn("couldn't find struct zfs_acl_node"); 3596 return (DCMD_ERR); 3597 } 3598 gotid = TRUE; 3599 } 3600 3601 if (GETMEMBID(wsp->walk_addr, &acl_id, z_ace_count, z_ace_count)) { 3602 return (DCMD_ERR); 3603 } 3604 if (GETMEMBID(wsp->walk_addr, &acl_id, z_acldata, z_acldata)) { 3605 return (DCMD_ERR); 3606 } 3607 3608 return (zfs_aces_walk_init_common(wsp, version, 3609 z_ace_count, z_acldata)); 3610 } 3611 3612 /* ARGSUSED */ 3613 static int 3614 zfs_acl_node_aces_walk_init(mdb_walk_state_t *wsp) 3615 { 3616 return (zfs_acl_node_aces_walk_init_common(wsp, 1)); 3617 } 3618 3619 /* ARGSUSED */ 3620 static int 3621 zfs_acl_node_aces0_walk_init(mdb_walk_state_t *wsp) 3622 { 3623 return (zfs_acl_node_aces_walk_init_common(wsp, 0)); 3624 } 3625 3626 static int 3627 zfs_aces_walk_step(mdb_walk_state_t *wsp) 3628 { 3629 ace_walk_data_t *ace_data = wsp->walk_data; 3630 zfs_ace_t zace; 3631 ace_t *acep; 3632 int status; 3633 int entry_type; 3634 int allow_type; 3635 uintptr_t ptr; 3636 3637 if (ace_data->ace_count == 0) 3638 return (WALK_DONE); 3639 3640 if (mdb_vread(&zace, sizeof (zfs_ace_t), wsp->walk_addr) == -1) { 3641 mdb_warn("failed to read zfs_ace_t at %#lx", 3642 wsp->walk_addr); 3643 return (WALK_ERR); 3644 } 3645 3646 switch (ace_data->ace_version) { 3647 case 0: 3648 acep = (ace_t *)&zace; 3649 entry_type = acep->a_flags & ACE_TYPE_FLAGS; 3650 allow_type = acep->a_type; 3651 break; 3652 case 1: 3653 entry_type = zace.z_hdr.z_flags & ACE_TYPE_FLAGS; 3654 allow_type = zace.z_hdr.z_type; 3655 break; 3656 default: 3657 return (WALK_ERR); 3658 } 3659 3660 ptr = (uintptr_t)wsp->walk_addr; 3661 switch (entry_type) { 3662 case ACE_OWNER: 3663 case ACE_EVERYONE: 3664 case (ACE_IDENTIFIER_GROUP | ACE_GROUP): 3665 ptr += ace_data->ace_version == 0 ? 3666 sizeof (ace_t) : sizeof (zfs_ace_hdr_t); 3667 break; 3668 case ACE_IDENTIFIER_GROUP: 3669 default: 3670 switch (allow_type) { 3671 case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: 3672 case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: 3673 case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: 3674 case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: 3675 ptr += ace_data->ace_version == 0 ? 3676 sizeof (ace_t) : sizeof (zfs_object_ace_t); 3677 break; 3678 default: 3679 ptr += ace_data->ace_version == 0 ? 3680 sizeof (ace_t) : sizeof (zfs_ace_t); 3681 break; 3682 } 3683 } 3684 3685 ace_data->ace_count--; 3686 status = wsp->walk_callback(wsp->walk_addr, 3687 (void *)(uintptr_t)&zace, wsp->walk_cbdata); 3688 3689 wsp->walk_addr = ptr; 3690 return (status); 3691 } 3692 3693 typedef struct mdb_zfs_rrwlock { 3694 uintptr_t rr_writer; 3695 boolean_t rr_writer_wanted; 3696 } mdb_zfs_rrwlock_t; 3697 3698 static uint_t rrw_key; 3699 3700 /* ARGSUSED */ 3701 static int 3702 rrwlock(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3703 { 3704 mdb_zfs_rrwlock_t rrw; 3705 3706 if (rrw_key == 0) { 3707 if (mdb_ctf_readsym(&rrw_key, "uint_t", "rrw_tsd_key", 0) == -1) 3708 return (DCMD_ERR); 3709 } 3710 3711 if (mdb_ctf_vread(&rrw, "rrwlock_t", "mdb_zfs_rrwlock_t", addr, 3712 0) == -1) 3713 return (DCMD_ERR); 3714 3715 if (rrw.rr_writer != 0) { 3716 mdb_printf("write lock held by thread %lx\n", rrw.rr_writer); 3717 return (DCMD_OK); 3718 } 3719 3720 if (rrw.rr_writer_wanted) { 3721 mdb_printf("writer wanted\n"); 3722 } 3723 3724 mdb_printf("anonymous references:\n"); 3725 (void) mdb_call_dcmd("refcount", addr + 3726 mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_anon_rcount"), 3727 DCMD_ADDRSPEC, 0, NULL); 3728 3729 mdb_printf("linked references:\n"); 3730 (void) mdb_call_dcmd("refcount", addr + 3731 mdb_ctf_offsetof_by_name(ZFS_STRUCT "rrwlock", "rr_linked_rcount"), 3732 DCMD_ADDRSPEC, 0, NULL); 3733 3734 /* 3735 * XXX This should find references from 3736 * "::walk thread | ::tsd -v <rrw_key>", but there is no support 3737 * for programmatic consumption of dcmds, so this would be 3738 * difficult, potentially requiring reimplementing ::tsd (both 3739 * user and kernel versions) in this MDB module. 3740 */ 3741 3742 return (DCMD_OK); 3743 } 3744 3745 typedef struct mdb_arc_buf_hdr_t { 3746 uint16_t b_psize; 3747 uint16_t b_lsize; 3748 struct { 3749 uint32_t b_bufcnt; 3750 uintptr_t b_state; 3751 } b_l1hdr; 3752 } mdb_arc_buf_hdr_t; 3753 3754 enum arc_cflags { 3755 ARC_CFLAG_VERBOSE = 1 << 0, 3756 ARC_CFLAG_ANON = 1 << 1, 3757 ARC_CFLAG_MRU = 1 << 2, 3758 ARC_CFLAG_MFU = 1 << 3, 3759 ARC_CFLAG_BUFS = 1 << 4, 3760 }; 3761 3762 typedef struct arc_compression_stats_data { 3763 GElf_Sym anon_sym; /* ARC_anon symbol */ 3764 GElf_Sym mru_sym; /* ARC_mru symbol */ 3765 GElf_Sym mrug_sym; /* ARC_mru_ghost symbol */ 3766 GElf_Sym mfu_sym; /* ARC_mfu symbol */ 3767 GElf_Sym mfug_sym; /* ARC_mfu_ghost symbol */ 3768 GElf_Sym l2c_sym; /* ARC_l2c_only symbol */ 3769 uint64_t *anon_c_hist; /* histogram of compressed sizes in anon */ 3770 uint64_t *anon_u_hist; /* histogram of uncompressed sizes in anon */ 3771 uint64_t *anon_bufs; /* histogram of buffer counts in anon state */ 3772 uint64_t *mru_c_hist; /* histogram of compressed sizes in mru */ 3773 uint64_t *mru_u_hist; /* histogram of uncompressed sizes in mru */ 3774 uint64_t *mru_bufs; /* histogram of buffer counts in mru */ 3775 uint64_t *mfu_c_hist; /* histogram of compressed sizes in mfu */ 3776 uint64_t *mfu_u_hist; /* histogram of uncompressed sizes in mfu */ 3777 uint64_t *mfu_bufs; /* histogram of buffer counts in mfu */ 3778 uint64_t *all_c_hist; /* histogram of compressed anon + mru + mfu */ 3779 uint64_t *all_u_hist; /* histogram of uncompressed anon + mru + mfu */ 3780 uint64_t *all_bufs; /* histogram of buffer counts in all states */ 3781 int arc_cflags; /* arc compression flags, specified by user */ 3782 int hist_nbuckets; /* number of buckets in each histogram */ 3783 } arc_compression_stats_data_t; 3784 3785 int 3786 highbit64(uint64_t i) 3787 { 3788 int h = 1; 3789 3790 if (i == 0) 3791 return (0); 3792 if (i & 0xffffffff00000000ULL) { 3793 h += 32; i >>= 32; 3794 } 3795 if (i & 0xffff0000) { 3796 h += 16; i >>= 16; 3797 } 3798 if (i & 0xff00) { 3799 h += 8; i >>= 8; 3800 } 3801 if (i & 0xf0) { 3802 h += 4; i >>= 4; 3803 } 3804 if (i & 0xc) { 3805 h += 2; i >>= 2; 3806 } 3807 if (i & 0x2) { 3808 h += 1; 3809 } 3810 return (h); 3811 } 3812 3813 /* ARGSUSED */ 3814 static int 3815 arc_compression_stats_cb(uintptr_t addr, const void *unknown, void *arg) 3816 { 3817 arc_compression_stats_data_t *data = arg; 3818 mdb_arc_buf_hdr_t hdr; 3819 int cbucket, ubucket, bufcnt; 3820 3821 if (mdb_ctf_vread(&hdr, "arc_buf_hdr_t", "mdb_arc_buf_hdr_t", 3822 addr, 0) == -1) { 3823 return (WALK_ERR); 3824 } 3825 3826 /* 3827 * Headers in the ghost states, or the l2c_only state don't have 3828 * arc buffers linked off of them. Thus, their compressed size 3829 * is meaningless, so we skip these from the stats. 3830 */ 3831 if (hdr.b_l1hdr.b_state == data->mrug_sym.st_value || 3832 hdr.b_l1hdr.b_state == data->mfug_sym.st_value || 3833 hdr.b_l1hdr.b_state == data->l2c_sym.st_value) { 3834 return (WALK_NEXT); 3835 } 3836 3837 /* 3838 * The physical size (compressed) and logical size 3839 * (uncompressed) are in units of SPA_MINBLOCKSIZE. By default, 3840 * we use the log2 of this value (rounded down to the nearest 3841 * integer) to determine the bucket to assign this header to. 3842 * Thus, the histogram is logarithmic with respect to the size 3843 * of the header. For example, the following is a mapping of the 3844 * bucket numbers and the range of header sizes they correspond to: 3845 * 3846 * 0: 0 byte headers 3847 * 1: 512 byte headers 3848 * 2: [1024 - 2048) byte headers 3849 * 3: [2048 - 4096) byte headers 3850 * 4: [4096 - 8192) byte headers 3851 * 5: [8192 - 16394) byte headers 3852 * 6: [16384 - 32768) byte headers 3853 * 7: [32768 - 65536) byte headers 3854 * 8: [65536 - 131072) byte headers 3855 * 9: 131072 byte headers 3856 * 3857 * If the ARC_CFLAG_VERBOSE flag was specified, we use the 3858 * physical and logical sizes directly. Thus, the histogram will 3859 * no longer be logarithmic; instead it will be linear with 3860 * respect to the size of the header. The following is a mapping 3861 * of the first many bucket numbers and the header size they 3862 * correspond to: 3863 * 3864 * 0: 0 byte headers 3865 * 1: 512 byte headers 3866 * 2: 1024 byte headers 3867 * 3: 1536 byte headers 3868 * 4: 2048 byte headers 3869 * 5: 2560 byte headers 3870 * 6: 3072 byte headers 3871 * 3872 * And so on. Keep in mind that a range of sizes isn't used in 3873 * the case of linear scale because the headers can only 3874 * increment or decrement in sizes of 512 bytes. So, it's not 3875 * possible for a header to be sized in between whats listed 3876 * above. 3877 * 3878 * Also, the above mapping values were calculated assuming a 3879 * SPA_MINBLOCKSHIFT of 512 bytes and a SPA_MAXBLOCKSIZE of 128K. 3880 */ 3881 3882 if (data->arc_cflags & ARC_CFLAG_VERBOSE) { 3883 cbucket = hdr.b_psize; 3884 ubucket = hdr.b_lsize; 3885 } else { 3886 cbucket = highbit64(hdr.b_psize); 3887 ubucket = highbit64(hdr.b_lsize); 3888 } 3889 3890 bufcnt = hdr.b_l1hdr.b_bufcnt; 3891 if (bufcnt >= data->hist_nbuckets) 3892 bufcnt = data->hist_nbuckets - 1; 3893 3894 /* Ensure we stay within the bounds of the histogram array */ 3895 ASSERT3U(cbucket, <, data->hist_nbuckets); 3896 ASSERT3U(ubucket, <, data->hist_nbuckets); 3897 3898 if (hdr.b_l1hdr.b_state == data->anon_sym.st_value) { 3899 data->anon_c_hist[cbucket]++; 3900 data->anon_u_hist[ubucket]++; 3901 data->anon_bufs[bufcnt]++; 3902 } else if (hdr.b_l1hdr.b_state == data->mru_sym.st_value) { 3903 data->mru_c_hist[cbucket]++; 3904 data->mru_u_hist[ubucket]++; 3905 data->mru_bufs[bufcnt]++; 3906 } else if (hdr.b_l1hdr.b_state == data->mfu_sym.st_value) { 3907 data->mfu_c_hist[cbucket]++; 3908 data->mfu_u_hist[ubucket]++; 3909 data->mfu_bufs[bufcnt]++; 3910 } 3911 3912 data->all_c_hist[cbucket]++; 3913 data->all_u_hist[ubucket]++; 3914 data->all_bufs[bufcnt]++; 3915 3916 return (WALK_NEXT); 3917 } 3918 3919 /* ARGSUSED */ 3920 static int 3921 arc_compression_stats(uintptr_t addr, uint_t flags, int argc, 3922 const mdb_arg_t *argv) 3923 { 3924 arc_compression_stats_data_t data = { 0 }; 3925 unsigned int max_shifted = SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; 3926 unsigned int hist_size; 3927 char range[32]; 3928 int rc = DCMD_OK; 3929 3930 if (mdb_getopts(argc, argv, 3931 'v', MDB_OPT_SETBITS, ARC_CFLAG_VERBOSE, &data.arc_cflags, 3932 'a', MDB_OPT_SETBITS, ARC_CFLAG_ANON, &data.arc_cflags, 3933 'b', MDB_OPT_SETBITS, ARC_CFLAG_BUFS, &data.arc_cflags, 3934 'r', MDB_OPT_SETBITS, ARC_CFLAG_MRU, &data.arc_cflags, 3935 'f', MDB_OPT_SETBITS, ARC_CFLAG_MFU, &data.arc_cflags) != argc) 3936 return (DCMD_USAGE); 3937 3938 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_anon", &data.anon_sym) || 3939 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mru", &data.mru_sym) || 3940 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mru_ghost", &data.mrug_sym) || 3941 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mfu", &data.mfu_sym) || 3942 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_mfu_ghost", &data.mfug_sym) || 3943 mdb_lookup_by_obj(ZFS_OBJ_NAME, "ARC_l2c_only", &data.l2c_sym)) { 3944 mdb_warn("can't find arc state symbol"); 3945 return (DCMD_ERR); 3946 } 3947 3948 /* 3949 * Determine the maximum expected size for any header, and use 3950 * this to determine the number of buckets needed for each 3951 * histogram. If ARC_CFLAG_VERBOSE is specified, this value is 3952 * used directly; otherwise the log2 of the maximum size is 3953 * used. Thus, if using a log2 scale there's a maximum of 10 3954 * possible buckets, while the linear scale (when using 3955 * ARC_CFLAG_VERBOSE) has a maximum of 257 buckets. 3956 */ 3957 if (data.arc_cflags & ARC_CFLAG_VERBOSE) 3958 data.hist_nbuckets = max_shifted + 1; 3959 else 3960 data.hist_nbuckets = highbit64(max_shifted) + 1; 3961 3962 hist_size = sizeof (uint64_t) * data.hist_nbuckets; 3963 3964 data.anon_c_hist = mdb_zalloc(hist_size, UM_SLEEP); 3965 data.anon_u_hist = mdb_zalloc(hist_size, UM_SLEEP); 3966 data.anon_bufs = mdb_zalloc(hist_size, UM_SLEEP); 3967 3968 data.mru_c_hist = mdb_zalloc(hist_size, UM_SLEEP); 3969 data.mru_u_hist = mdb_zalloc(hist_size, UM_SLEEP); 3970 data.mru_bufs = mdb_zalloc(hist_size, UM_SLEEP); 3971 3972 data.mfu_c_hist = mdb_zalloc(hist_size, UM_SLEEP); 3973 data.mfu_u_hist = mdb_zalloc(hist_size, UM_SLEEP); 3974 data.mfu_bufs = mdb_zalloc(hist_size, UM_SLEEP); 3975 3976 data.all_c_hist = mdb_zalloc(hist_size, UM_SLEEP); 3977 data.all_u_hist = mdb_zalloc(hist_size, UM_SLEEP); 3978 data.all_bufs = mdb_zalloc(hist_size, UM_SLEEP); 3979 3980 if (mdb_walk("arc_buf_hdr_t_full", arc_compression_stats_cb, 3981 &data) != 0) { 3982 mdb_warn("can't walk arc_buf_hdr's"); 3983 rc = DCMD_ERR; 3984 goto out; 3985 } 3986 3987 if (data.arc_cflags & ARC_CFLAG_VERBOSE) { 3988 rc = mdb_snprintf(range, sizeof (range), 3989 "[n*%llu, (n+1)*%llu)", SPA_MINBLOCKSIZE, 3990 SPA_MINBLOCKSIZE); 3991 } else { 3992 rc = mdb_snprintf(range, sizeof (range), 3993 "[2^(n-1)*%llu, 2^n*%llu)", SPA_MINBLOCKSIZE, 3994 SPA_MINBLOCKSIZE); 3995 } 3996 3997 if (rc < 0) { 3998 /* snprintf failed, abort the dcmd */ 3999 rc = DCMD_ERR; 4000 goto out; 4001 } else { 4002 /* snprintf succeeded above, reset return code */ 4003 rc = DCMD_OK; 4004 } 4005 4006 if (data.arc_cflags & ARC_CFLAG_ANON) { 4007 if (data.arc_cflags & ARC_CFLAG_BUFS) { 4008 mdb_printf("Histogram of the number of anon buffers " 4009 "that are associated with an arc hdr.\n"); 4010 dump_histogram(data.anon_bufs, data.hist_nbuckets, 0); 4011 mdb_printf("\n"); 4012 } 4013 mdb_printf("Histogram of compressed anon buffers.\n" 4014 "Each bucket represents buffers of size: %s.\n", range); 4015 dump_histogram(data.anon_c_hist, data.hist_nbuckets, 0); 4016 mdb_printf("\n"); 4017 4018 mdb_printf("Histogram of uncompressed anon buffers.\n" 4019 "Each bucket represents buffers of size: %s.\n", range); 4020 dump_histogram(data.anon_u_hist, data.hist_nbuckets, 0); 4021 mdb_printf("\n"); 4022 } 4023 4024 if (data.arc_cflags & ARC_CFLAG_MRU) { 4025 if (data.arc_cflags & ARC_CFLAG_BUFS) { 4026 mdb_printf("Histogram of the number of mru buffers " 4027 "that are associated with an arc hdr.\n"); 4028 dump_histogram(data.mru_bufs, data.hist_nbuckets, 0); 4029 mdb_printf("\n"); 4030 } 4031 mdb_printf("Histogram of compressed mru buffers.\n" 4032 "Each bucket represents buffers of size: %s.\n", range); 4033 dump_histogram(data.mru_c_hist, data.hist_nbuckets, 0); 4034 mdb_printf("\n"); 4035 4036 mdb_printf("Histogram of uncompressed mru buffers.\n" 4037 "Each bucket represents buffers of size: %s.\n", range); 4038 dump_histogram(data.mru_u_hist, data.hist_nbuckets, 0); 4039 mdb_printf("\n"); 4040 } 4041 4042 if (data.arc_cflags & ARC_CFLAG_MFU) { 4043 if (data.arc_cflags & ARC_CFLAG_BUFS) { 4044 mdb_printf("Histogram of the number of mfu buffers " 4045 "that are associated with an arc hdr.\n"); 4046 dump_histogram(data.mfu_bufs, data.hist_nbuckets, 0); 4047 mdb_printf("\n"); 4048 } 4049 4050 mdb_printf("Histogram of compressed mfu buffers.\n" 4051 "Each bucket represents buffers of size: %s.\n", range); 4052 dump_histogram(data.mfu_c_hist, data.hist_nbuckets, 0); 4053 mdb_printf("\n"); 4054 4055 mdb_printf("Histogram of uncompressed mfu buffers.\n" 4056 "Each bucket represents buffers of size: %s.\n", range); 4057 dump_histogram(data.mfu_u_hist, data.hist_nbuckets, 0); 4058 mdb_printf("\n"); 4059 } 4060 4061 if (data.arc_cflags & ARC_CFLAG_BUFS) { 4062 mdb_printf("Histogram of all buffers that " 4063 "are associated with an arc hdr.\n"); 4064 dump_histogram(data.all_bufs, data.hist_nbuckets, 0); 4065 mdb_printf("\n"); 4066 } 4067 4068 mdb_printf("Histogram of all compressed buffers.\n" 4069 "Each bucket represents buffers of size: %s.\n", range); 4070 dump_histogram(data.all_c_hist, data.hist_nbuckets, 0); 4071 mdb_printf("\n"); 4072 4073 mdb_printf("Histogram of all uncompressed buffers.\n" 4074 "Each bucket represents buffers of size: %s.\n", range); 4075 dump_histogram(data.all_u_hist, data.hist_nbuckets, 0); 4076 4077 out: 4078 mdb_free(data.anon_c_hist, hist_size); 4079 mdb_free(data.anon_u_hist, hist_size); 4080 mdb_free(data.anon_bufs, hist_size); 4081 4082 mdb_free(data.mru_c_hist, hist_size); 4083 mdb_free(data.mru_u_hist, hist_size); 4084 mdb_free(data.mru_bufs, hist_size); 4085 4086 mdb_free(data.mfu_c_hist, hist_size); 4087 mdb_free(data.mfu_u_hist, hist_size); 4088 mdb_free(data.mfu_bufs, hist_size); 4089 4090 mdb_free(data.all_c_hist, hist_size); 4091 mdb_free(data.all_u_hist, hist_size); 4092 mdb_free(data.all_bufs, hist_size); 4093 4094 return (rc); 4095 } 4096 4097 /* 4098 * MDB module linkage information: 4099 * 4100 * We declare a list of structures describing our dcmds, and a function 4101 * named _mdb_init to return a pointer to our module information. 4102 */ 4103 4104 static const mdb_dcmd_t dcmds[] = { 4105 { "arc", "[-bkmg]", "print ARC variables", arc_print }, 4106 { "blkptr", ":", "print blkptr_t", blkptr }, 4107 { "dva", ":", "print dva_t", dva }, 4108 { "dbuf", ":", "print dmu_buf_impl_t", dbuf }, 4109 { "dbuf_stats", ":", "dbuf stats", dbuf_stats }, 4110 { "dbufs", 4111 "\t[-O objset_t*] [-n objset_name | \"mos\"] " 4112 "[-o object | \"mdn\"] \n" 4113 "\t[-l level] [-b blkid | \"bonus\"]", 4114 "find dmu_buf_impl_t's that match specified criteria", dbufs }, 4115 { "abuf_find", "dva_word[0] dva_word[1]", 4116 "find arc_buf_hdr_t of a specified DVA", 4117 abuf_find }, 4118 { "spa", "?[-cevmMh]\n" 4119 "\t-c display spa config\n" 4120 "\t-e display vdev statistics\n" 4121 "\t-v display vdev information\n" 4122 "\t-m display metaslab statistics\n" 4123 "\t-M display metaslab group statistics\n" 4124 "\t-h display histogram (requires -m or -M)\n", 4125 "spa_t summary", spa_print }, 4126 { "spa_config", ":", "print spa_t configuration", spa_print_config }, 4127 { "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space }, 4128 { "spa_vdevs", ":[-emMh]\n" 4129 "\t-e display vdev statistics\n" 4130 "\t-m dispaly metaslab statistics\n" 4131 "\t-M display metaslab group statistic\n" 4132 "\t-h display histogram (requires -m or -M)\n", 4133 "given a spa_t, print vdev summary", spa_vdevs }, 4134 { "sm_entries", "<buffer length in bytes>", 4135 "print out space map entries from a buffer decoded", 4136 sm_entries}, 4137 { "vdev", ":[-remMh]\n" 4138 "\t-r display recursively\n" 4139 "\t-e display statistics\n" 4140 "\t-m display metaslab statistics (top level vdev only)\n" 4141 "\t-M display metaslab group statistics (top level vdev only)\n" 4142 "\t-h display histogram (requires -m or -M)\n", 4143 "vdev_t summary", vdev_print }, 4144 { "zio", ":[-cpr]\n" 4145 "\t-c display children\n" 4146 "\t-p display parents\n" 4147 "\t-r display recursively", 4148 "zio_t summary", zio_print }, 4149 { "zio_state", "?", "print out all zio_t structures on system or " 4150 "for a particular pool", zio_state }, 4151 { "zfs_blkstats", ":[-v]", 4152 "given a spa_t, print block type stats from last scrub", 4153 zfs_blkstats }, 4154 { "zfs_params", "", "print zfs tunable parameters", zfs_params }, 4155 { "refcount", ":[-r]\n" 4156 "\t-r display recently removed references", 4157 "print refcount_t holders", refcount }, 4158 { "zap_leaf", "", "print zap_leaf_phys_t", zap_leaf }, 4159 { "zfs_aces", ":[-v]", "print all ACEs from a zfs_acl_t", 4160 zfs_acl_dump }, 4161 { "zfs_ace", ":[-v]", "print zfs_ace", zfs_ace_print }, 4162 { "zfs_ace0", ":[-v]", "print zfs_ace0", zfs_ace0_print }, 4163 { "sa_attr_table", ":", "print SA attribute table from sa_os_t", 4164 sa_attr_table}, 4165 { "sa_attr", ": attr_id", 4166 "print SA attribute address when given sa_handle_t", sa_attr_print}, 4167 { "zfs_dbgmsg", ":[-va]", 4168 "print zfs debug log", dbgmsg}, 4169 { "rrwlock", ":", 4170 "print rrwlock_t, including readers", rrwlock}, 4171 { "metaslab_weight", "weight", 4172 "print metaslab weight", metaslab_weight}, 4173 { "metaslab_trace", ":", 4174 "print metaslab allocation trace records", metaslab_trace}, 4175 { "arc_compression_stats", ":[-vabrf]\n" 4176 "\t-v verbose, display a linearly scaled histogram\n" 4177 "\t-a display ARC_anon state statistics individually\n" 4178 "\t-r display ARC_mru state statistics individually\n" 4179 "\t-f display ARC_mfu state statistics individually\n" 4180 "\t-b display histogram of buffer counts\n", 4181 "print a histogram of compressed arc buffer sizes", 4182 arc_compression_stats}, 4183 { NULL } 4184 }; 4185 4186 static const mdb_walker_t walkers[] = { 4187 { "txg_list", "given any txg_list_t *, walk all entries in all txgs", 4188 txg_list_walk_init, txg_list_walk_step, NULL }, 4189 { "txg_list0", "given any txg_list_t *, walk all entries in txg 0", 4190 txg_list0_walk_init, txg_list_walk_step, NULL }, 4191 { "txg_list1", "given any txg_list_t *, walk all entries in txg 1", 4192 txg_list1_walk_init, txg_list_walk_step, NULL }, 4193 { "txg_list2", "given any txg_list_t *, walk all entries in txg 2", 4194 txg_list2_walk_init, txg_list_walk_step, NULL }, 4195 { "txg_list3", "given any txg_list_t *, walk all entries in txg 3", 4196 txg_list3_walk_init, txg_list_walk_step, NULL }, 4197 { "zio", "walk all zio structures, optionally for a particular spa_t", 4198 zio_walk_init, zio_walk_step, NULL }, 4199 { "zio_root", 4200 "walk all root zio_t structures, optionally for a particular spa_t", 4201 zio_walk_init, zio_walk_root_step, NULL }, 4202 { "spa", "walk all spa_t entries in the namespace", 4203 spa_walk_init, spa_walk_step, NULL }, 4204 { "metaslab", "given a spa_t *, walk all metaslab_t structures", 4205 metaslab_walk_init, metaslab_walk_step, NULL }, 4206 { "multilist", "given a multilist_t *, walk all list_t structures", 4207 multilist_walk_init, multilist_walk_step, NULL }, 4208 { "zfs_acl_node", "given a zfs_acl_t, walk all zfs_acl_nodes", 4209 zfs_acl_node_walk_init, zfs_acl_node_walk_step, NULL }, 4210 { "zfs_acl_node_aces", "given a zfs_acl_node_t, walk all ACEs", 4211 zfs_acl_node_aces_walk_init, zfs_aces_walk_step, NULL }, 4212 { "zfs_acl_node_aces0", 4213 "given a zfs_acl_node_t, walk all ACEs as ace_t", 4214 zfs_acl_node_aces0_walk_init, zfs_aces_walk_step, NULL }, 4215 { NULL } 4216 }; 4217 4218 static const mdb_modinfo_t modinfo = { 4219 MDB_API_VERSION, dcmds, walkers 4220 }; 4221 4222 const mdb_modinfo_t * 4223 _mdb_init(void) 4224 { 4225 return (&modinfo); 4226 } 4227