xref: /titanic_50/usr/src/cmd/mdb/common/modules/zfs/zfs.c (revision d89fccd8788afe1e920f842edd883fe192a1b8fe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <mdb/mdb_ctf.h>
29 #include <sys/zfs_context.h>
30 #include <sys/mdb_modapi.h>
31 #include <sys/dbuf.h>
32 #include <sys/dmu_objset.h>
33 #include <sys/dsl_dir.h>
34 #include <sys/dsl_pool.h>
35 #include <sys/metaslab_impl.h>
36 #include <sys/space_map.h>
37 #include <sys/list.h>
38 #include <sys/spa_impl.h>
39 #include <sys/vdev_impl.h>
40 #include <sys/zio_compress.h>
41 
42 #ifndef _KERNEL
43 #include "../genunix/list.h"
44 #endif
45 
46 #ifdef _KERNEL
47 #define	ZFS_OBJ_NAME	"zfs"
48 #else
49 #define	ZFS_OBJ_NAME	"libzpool.so.1"
50 #endif
51 
52 static char *
53 local_strdup(const char *s)
54 {
55 	char *s1 = mdb_alloc(strlen(s) + 1, UM_SLEEP);
56 
57 	(void) strcpy(s1, s);
58 	return (s1);
59 }
60 
61 static int
62 getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp,
63     const char *member, int len, void *buf)
64 {
65 	mdb_ctf_id_t id;
66 	ulong_t off;
67 	char name[64];
68 
69 	if (idp == NULL) {
70 		if (mdb_ctf_lookup_by_name(type, &id) == -1) {
71 			mdb_warn("couldn't find type %s", type);
72 			return (DCMD_ERR);
73 		}
74 		idp = &id;
75 	} else {
76 		type = name;
77 		mdb_ctf_type_name(*idp, name, sizeof (name));
78 	}
79 
80 	if (mdb_ctf_offsetof(*idp, member, &off) == -1) {
81 		mdb_warn("couldn't find member %s of type %s\n", member, type);
82 		return (DCMD_ERR);
83 	}
84 	if (off % 8 != 0) {
85 		mdb_warn("member %s of type %s is unsupported bitfield",
86 		    member, type);
87 		return (DCMD_ERR);
88 	}
89 	off /= 8;
90 
91 	if (mdb_vread(buf, len, addr + off) == -1) {
92 		mdb_warn("failed to read %s from %s at %p",
93 		    member, type, addr + off);
94 		return (DCMD_ERR);
95 	}
96 	/* mdb_warn("read %s from %s at %p+%llx\n", member, type, addr, off); */
97 
98 	return (0);
99 }
100 
101 #define	GETMEMB(addr, type, member, dest) \
102 	getmember(addr, #type, NULL, #member, sizeof (dest), &(dest))
103 
104 #define	GETMEMBID(addr, ctfid, member, dest) \
105 	getmember(addr, NULL, ctfid, #member, sizeof (dest), &(dest))
106 
107 static int
108 getrefcount(uintptr_t addr, mdb_ctf_id_t *id,
109     const char *member, uint64_t *rc)
110 {
111 	static int gotid;
112 	static mdb_ctf_id_t rc_id;
113 	ulong_t off;
114 
115 	if (!gotid) {
116 		if (mdb_ctf_lookup_by_name("struct refcount", &rc_id) == -1) {
117 			mdb_warn("couldn't find struct refcount");
118 			return (DCMD_ERR);
119 		}
120 		gotid = TRUE;
121 	}
122 
123 	if (mdb_ctf_offsetof(*id, member, &off) == -1) {
124 		char name[64];
125 		mdb_ctf_type_name(*id, name, sizeof (name));
126 		mdb_warn("couldn't find member %s of type %s\n", member, name);
127 		return (DCMD_ERR);
128 	}
129 	off /= 8;
130 
131 	return (GETMEMBID(addr + off, &rc_id, rc_count, *rc));
132 }
133 
134 static int
135 read_symbol(char *sym_name, void **bufp)
136 {
137 	GElf_Sym sym;
138 
139 	if (mdb_lookup_by_obj(MDB_TGT_OBJ_EVERY, sym_name, &sym)) {
140 		mdb_warn("can't find symbol %s", sym_name);
141 		return (DCMD_ERR);
142 	}
143 
144 	*bufp = mdb_alloc(sym.st_size, UM_SLEEP);
145 
146 	if (mdb_vread(*bufp, sym.st_size, sym.st_value) == -1) {
147 		mdb_warn("can't read data for symbol %s", sym_name);
148 		mdb_free(*bufp, sym.st_size);
149 		return (DCMD_ERR);
150 	}
151 
152 	return (DCMD_OK);
153 }
154 
155 static int verbose;
156 
157 static int
158 freelist_walk_init(mdb_walk_state_t *wsp)
159 {
160 	if (wsp->walk_addr == NULL) {
161 		mdb_warn("must supply starting address\n");
162 		return (WALK_ERR);
163 	}
164 
165 	wsp->walk_data = 0;  /* Index into the freelist */
166 	return (WALK_NEXT);
167 }
168 
169 static int
170 freelist_walk_step(mdb_walk_state_t *wsp)
171 {
172 	uint64_t entry;
173 	uintptr_t number = (uintptr_t)wsp->walk_data;
174 	char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID" };
175 	int mapshift = SPA_MINBLOCKSHIFT;
176 
177 	if (mdb_vread(&entry, sizeof (entry), wsp->walk_addr) == -1) {
178 		mdb_warn("failed to read freelist entry %p", wsp->walk_addr);
179 		return (WALK_DONE);
180 	}
181 	wsp->walk_addr += sizeof (entry);
182 	wsp->walk_data = (void *)(number + 1);
183 
184 	if (SM_DEBUG_DECODE(entry)) {
185 		mdb_printf("DEBUG: %3u  %10s: txg=%llu  pass=%llu\n",
186 		    number,
187 		    ddata[SM_DEBUG_ACTION_DECODE(entry)],
188 		    SM_DEBUG_TXG_DECODE(entry),
189 		    SM_DEBUG_SYNCPASS_DECODE(entry));
190 	} else {
191 		mdb_printf("Entry: %3u  offsets=%08llx-%08llx  type=%c  "
192 		    "size=%06llx", number,
193 		    SM_OFFSET_DECODE(entry) << mapshift,
194 		    (SM_OFFSET_DECODE(entry) + SM_RUN_DECODE(entry)) <<
195 		    mapshift,
196 		    SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
197 		    SM_RUN_DECODE(entry) << mapshift);
198 		if (verbose)
199 			mdb_printf("      (raw=%012llx)\n", entry);
200 		mdb_printf("\n");
201 	}
202 	return (WALK_NEXT);
203 }
204 
205 /* ARGSUSED */
206 static void
207 freelist_walk_fini(mdb_walk_state_t *wsp)
208 {
209 }
210 
211 typedef struct dbuf_walk_data {
212 	dbuf_hash_table_t ht;
213 	int64_t bucket;
214 	uintptr_t dbp;
215 	dmu_buf_impl_t db;
216 } dbuf_walk_data_t;
217 
218 static int
219 dbuf_walk_init(mdb_walk_state_t *wsp)
220 {
221 	dbuf_walk_data_t *dwd;
222 
223 	if (wsp->walk_addr != NULL) {
224 		mdb_warn("must supply starting address\n");
225 		return (WALK_ERR);
226 	}
227 
228 	dwd = mdb_alloc(sizeof (dbuf_walk_data_t), UM_SLEEP);
229 
230 	if (mdb_readvar(&dwd->ht, "dbuf_hash_table") == -1) {
231 		mdb_warn("failed to read 'dbuf_hash_table'");
232 		mdb_free(dwd, sizeof (dbuf_walk_data_t));
233 		return (WALK_ERR);
234 	}
235 	dwd->bucket = -1;
236 	dwd->dbp = 0;
237 	wsp->walk_data = dwd;
238 	return (WALK_NEXT);
239 }
240 
241 static int
242 dbuf_walk_step(mdb_walk_state_t *wsp)
243 {
244 	int status;
245 	dbuf_walk_data_t *dwd = wsp->walk_data;
246 
247 	while (dwd->dbp == 0) {
248 		dwd->bucket++;
249 		if (dwd->bucket == dwd->ht.hash_table_mask+1)
250 			return (WALK_DONE);
251 
252 		if (mdb_vread(&dwd->dbp, sizeof (void *),
253 		    (uintptr_t)(dwd->ht.hash_table+dwd->bucket)) == -1) {
254 			mdb_warn("failed to read hash bucket %u at %p",
255 			    dwd->bucket, dwd->ht.hash_table+dwd->bucket);
256 			return (WALK_DONE);
257 		}
258 	}
259 
260 	wsp->walk_addr = dwd->dbp;
261 	if (mdb_vread(&dwd->db, sizeof (dmu_buf_impl_t),
262 	    wsp->walk_addr) == -1) {
263 		mdb_warn("failed to read dbuf at %p", wsp->walk_addr);
264 		return (WALK_DONE);
265 	}
266 	status = wsp->walk_callback(wsp->walk_addr, &dwd->db, wsp->walk_cbdata);
267 
268 	dwd->dbp = (uintptr_t)dwd->db.db_hash_next;
269 	return (status);
270 }
271 
272 static void
273 dbuf_walk_fini(mdb_walk_state_t *wsp)
274 {
275 	dbuf_walk_data_t *dwd = wsp->walk_data;
276 	mdb_free(dwd, sizeof (dbuf_walk_data_t));
277 }
278 
279 static int
280 dataset_name(uintptr_t addr, char *buf)
281 {
282 	static int gotid;
283 	static mdb_ctf_id_t dd_id;
284 	uintptr_t dd_parent;
285 	char dd_myname[MAXNAMELEN];
286 
287 	if (!gotid) {
288 		if (mdb_ctf_lookup_by_name("struct dsl_dir",
289 		    &dd_id) == -1) {
290 			mdb_warn("couldn't find struct dsl_dir");
291 			return (DCMD_ERR);
292 		}
293 		gotid = TRUE;
294 	}
295 	if (GETMEMBID(addr, &dd_id, dd_parent, dd_parent) ||
296 	    GETMEMBID(addr, &dd_id, dd_myname, dd_myname)) {
297 		return (DCMD_ERR);
298 	}
299 
300 	if (dd_parent) {
301 		if (dataset_name(dd_parent, buf))
302 			return (DCMD_ERR);
303 		strcat(buf, "/");
304 	}
305 
306 	if (dd_myname[0])
307 		strcat(buf, dd_myname);
308 	else
309 		strcat(buf, "???");
310 
311 	return (0);
312 }
313 
314 static int
315 objset_name(uintptr_t addr, char *buf)
316 {
317 	static int gotid;
318 	static mdb_ctf_id_t osi_id, ds_id;
319 	uintptr_t os_dsl_dataset;
320 	char ds_snapname[MAXNAMELEN];
321 	uintptr_t ds_dir;
322 
323 	buf[0] = '\0';
324 
325 	if (!gotid) {
326 		if (mdb_ctf_lookup_by_name("struct objset_impl",
327 		    &osi_id) == -1) {
328 			mdb_warn("couldn't find struct objset_impl");
329 			return (DCMD_ERR);
330 		}
331 		if (mdb_ctf_lookup_by_name("struct dsl_dataset",
332 		    &ds_id) == -1) {
333 			mdb_warn("couldn't find struct dsl_dataset");
334 			return (DCMD_ERR);
335 		}
336 
337 		gotid = TRUE;
338 	}
339 
340 	if (GETMEMBID(addr, &osi_id, os_dsl_dataset, os_dsl_dataset))
341 		return (DCMD_ERR);
342 
343 	if (os_dsl_dataset == 0) {
344 		strcat(buf, "mos");
345 		return (0);
346 	}
347 
348 	if (GETMEMBID(os_dsl_dataset, &ds_id, ds_snapname, ds_snapname) ||
349 	    GETMEMBID(os_dsl_dataset, &ds_id, ds_dir, ds_dir)) {
350 		return (DCMD_ERR);
351 	}
352 
353 	if (ds_dir && dataset_name(ds_dir, buf))
354 		return (DCMD_ERR);
355 
356 	if (ds_snapname[0]) {
357 		strcat(buf, "@");
358 		strcat(buf, ds_snapname);
359 	}
360 	return (0);
361 }
362 
363 static void
364 enum_lookup(char *out, size_t size, mdb_ctf_id_t id, int val,
365     const char *prefix)
366 {
367 	const char *cp;
368 	size_t len = strlen(prefix);
369 
370 	if ((cp = mdb_ctf_enum_name(id, val)) != NULL) {
371 		if (strncmp(cp, prefix, len) == 0)
372 			cp += len;
373 		(void) strncpy(out, cp, size);
374 	} else {
375 		mdb_snprintf(out, size, "? (%d)", val);
376 	}
377 }
378 
379 /* ARGSUSED */
380 static int
381 zio_pipeline(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
382 {
383 	mdb_ctf_id_t pipe_enum;
384 	int i;
385 	char stage[1024];
386 
387 	if (mdb_ctf_lookup_by_name("enum zio_stage", &pipe_enum) == -1) {
388 		mdb_warn("Could not find enum zio_stage");
389 		return (DCMD_ERR);
390 	}
391 
392 	for (i = 0; i < 32; i++) {
393 		if (addr & (1U << i)) {
394 			enum_lookup(stage, sizeof (stage), pipe_enum, i,
395 			    "ZIO_STAGE_");
396 			mdb_printf("    %s\n", stage);
397 		}
398 	}
399 
400 	return (DCMD_OK);
401 }
402 
403 /* ARGSUSED */
404 static int
405 blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
406 {
407 	blkptr_t bp;
408 	dmu_object_type_info_t *doti;
409 	zio_compress_info_t *zct;
410 	zio_checksum_info_t *zci;
411 	int i;
412 	char buf[MAXPATHLEN];
413 
414 	if (mdb_vread(&bp, sizeof (blkptr_t), addr) == -1) {
415 		mdb_warn("failed to read blkptr_t");
416 		return (DCMD_ERR);
417 	}
418 
419 	if (read_symbol("dmu_ot", (void **)&doti) != DCMD_OK)
420 		return (DCMD_ERR);
421 	for (i = 0; i < DMU_OT_NUMTYPES; i++) {
422 		mdb_readstr(buf, sizeof (buf), (uintptr_t)doti[i].ot_name);
423 		doti[i].ot_name = local_strdup(buf);
424 	}
425 
426 	if (read_symbol("zio_checksum_table", (void **)&zci) != DCMD_OK)
427 		return (DCMD_ERR);
428 	for (i = 0; i < ZIO_CHECKSUM_FUNCTIONS; i++) {
429 		mdb_readstr(buf, sizeof (buf), (uintptr_t)zci[i].ci_name);
430 		zci[i].ci_name = local_strdup(buf);
431 	}
432 
433 	if (read_symbol("zio_compress_table", (void **)&zct) != DCMD_OK)
434 		return (DCMD_ERR);
435 	for (i = 0; i < ZIO_COMPRESS_FUNCTIONS; i++) {
436 		mdb_readstr(buf, sizeof (buf), (uintptr_t)zct[i].ci_name);
437 		zct[i].ci_name = local_strdup(buf);
438 	}
439 
440 	/*
441 	 * Super-ick warning:  This code is also duplicated in
442 	 * cmd/zdb.c .   Yeah, I hate code replication, too.
443 	 */
444 	for (i = 0; i < BP_GET_NDVAS(&bp); i++) {
445 		dva_t *dva = &bp.blk_dva[i];
446 
447 		mdb_printf("DVA[%d]: vdev_id %lld / %llx\n", i,
448 		    DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva));
449 		mdb_printf("DVA[%d]:       GANG: %-5s  GRID:  %04x\t"
450 		    "ASIZE: %llx\n", i, DVA_GET_GANG(dva) ? "TRUE" : "FALSE",
451 		    DVA_GET_GRID(dva), DVA_GET_ASIZE(dva));
452 		mdb_printf("DVA[%d]: :%llu:%llx:%llx:%s%s%s%s\n", i,
453 		    DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), BP_GET_PSIZE(&bp),
454 		    BP_SHOULD_BYTESWAP(&bp) ? "e" : "",
455 		    !DVA_GET_GANG(dva) && BP_GET_LEVEL(&bp) != 0 ? "i" : "",
456 		    DVA_GET_GANG(dva) ? "g" : "",
457 		    BP_GET_COMPRESS(&bp) != 0 ? "d" : "");
458 	}
459 	mdb_printf("LSIZE:  %-16llx\t\tPSIZE: %llx\n",
460 	    BP_GET_LSIZE(&bp), BP_GET_PSIZE(&bp));
461 	mdb_printf("ENDIAN: %6s\t\t\t\t\tTYPE:  %s\n",
462 	    BP_GET_BYTEORDER(&bp) ? "LITTLE" : "BIG",
463 	    doti[BP_GET_TYPE(&bp)].ot_name);
464 	mdb_printf("BIRTH:  %-16llx   LEVEL: %-2d\tFILL:  %llx\n",
465 	    bp.blk_birth, BP_GET_LEVEL(&bp), bp.blk_fill);
466 	mdb_printf("CKFUNC: %-16s\t\tCOMP:  %s\n",
467 	    zci[BP_GET_CHECKSUM(&bp)].ci_name,
468 	    zct[BP_GET_COMPRESS(&bp)].ci_name);
469 	mdb_printf("CKSUM:  %llx:%llx:%llx:%llx\n",
470 	    bp.blk_cksum.zc_word[0],
471 	    bp.blk_cksum.zc_word[1],
472 	    bp.blk_cksum.zc_word[2],
473 	    bp.blk_cksum.zc_word[3]);
474 
475 	return (DCMD_OK);
476 }
477 
478 /* ARGSUSED */
479 static int
480 dbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
481 {
482 	mdb_ctf_id_t id;
483 	dmu_buf_t db;
484 	uintptr_t objset;
485 	uint8_t level;
486 	uint64_t blkid;
487 	uint64_t holds;
488 	char objectname[32];
489 	char blkidname[32];
490 	char path[MAXNAMELEN];
491 
492 	if (DCMD_HDRSPEC(flags)) {
493 		mdb_printf("        addr object lvl blkid holds os\n");
494 	}
495 
496 	if (mdb_ctf_lookup_by_name("struct dmu_buf_impl", &id) == -1) {
497 		mdb_warn("couldn't find struct dmu_buf_impl_t");
498 		return (DCMD_ERR);
499 	}
500 
501 	if (GETMEMBID(addr, &id, db_objset, objset) ||
502 	    GETMEMBID(addr, &id, db, db) ||
503 	    GETMEMBID(addr, &id, db_level, level) ||
504 	    GETMEMBID(addr, &id, db_blkid, blkid)) {
505 		return (WALK_ERR);
506 	}
507 
508 	if (getrefcount(addr, &id, "db_holds", &holds)) {
509 		return (WALK_ERR);
510 	}
511 
512 	if (db.db_object == DMU_META_DNODE_OBJECT)
513 		(void) strcpy(objectname, "mdn");
514 	else
515 		(void) mdb_snprintf(objectname, sizeof (objectname), "%llx",
516 		    (u_longlong_t)db.db_object);
517 
518 	if (blkid == DB_BONUS_BLKID)
519 		(void) strcpy(blkidname, "bonus");
520 	else
521 		(void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx",
522 		    (u_longlong_t)blkid);
523 
524 	if (objset_name(objset, path)) {
525 		return (WALK_ERR);
526 	}
527 
528 	mdb_printf("%p %8s %1u %9s %2llu %s\n",
529 	    addr, objectname, level, blkidname, holds, path);
530 
531 	return (DCMD_OK);
532 }
533 
534 /* ARGSUSED */
535 static int
536 dbuf_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
537 {
538 #define	HISTOSZ 32
539 	uintptr_t dbp;
540 	dmu_buf_impl_t db;
541 	dbuf_hash_table_t ht;
542 	uint64_t bucket, ndbufs;
543 	uint64_t histo[HISTOSZ];
544 	uint64_t histo2[HISTOSZ];
545 	int i, maxidx;
546 
547 	if (mdb_readvar(&ht, "dbuf_hash_table") == -1) {
548 		mdb_warn("failed to read 'dbuf_hash_table'");
549 		return (DCMD_ERR);
550 	}
551 
552 	for (i = 0; i < HISTOSZ; i++) {
553 		histo[i] = 0;
554 		histo2[i] = 0;
555 	}
556 
557 	ndbufs = 0;
558 	for (bucket = 0; bucket < ht.hash_table_mask+1; bucket++) {
559 		int len;
560 
561 		if (mdb_vread(&dbp, sizeof (void *),
562 		    (uintptr_t)(ht.hash_table+bucket)) == -1) {
563 			mdb_warn("failed to read hash bucket %u at %p",
564 			    bucket, ht.hash_table+bucket);
565 			return (DCMD_ERR);
566 		}
567 
568 		len = 0;
569 		while (dbp != 0) {
570 			if (mdb_vread(&db, sizeof (dmu_buf_impl_t),
571 			    dbp) == -1) {
572 				mdb_warn("failed to read dbuf at %p", dbp);
573 				return (DCMD_ERR);
574 			}
575 			dbp = (uintptr_t)db.db_hash_next;
576 			for (i = MIN(len, HISTOSZ - 1); i >= 0; i--)
577 				histo2[i]++;
578 			len++;
579 			ndbufs++;
580 		}
581 
582 		if (len >= HISTOSZ)
583 			len = HISTOSZ-1;
584 		histo[len]++;
585 	}
586 
587 	mdb_printf("hash table has %llu buckets, %llu dbufs "
588 	    "(avg %llu buckets/dbuf)\n",
589 	    ht.hash_table_mask+1, ndbufs,
590 	    (ht.hash_table_mask+1)/ndbufs);
591 
592 	mdb_printf("\n");
593 	maxidx = 0;
594 	for (i = 0; i < HISTOSZ; i++)
595 		if (histo[i] > 0)
596 			maxidx = i;
597 	mdb_printf("hash chain length	number of buckets\n");
598 	for (i = 0; i <= maxidx; i++)
599 		mdb_printf("%u			%llu\n", i, histo[i]);
600 
601 	mdb_printf("\n");
602 	maxidx = 0;
603 	for (i = 0; i < HISTOSZ; i++)
604 		if (histo2[i] > 0)
605 			maxidx = i;
606 	mdb_printf("hash chain depth	number of dbufs\n");
607 	for (i = 0; i <= maxidx; i++)
608 		mdb_printf("%u or more		%llu	%llu%%\n",
609 		    i, histo2[i], histo2[i]*100/ndbufs);
610 
611 
612 	return (DCMD_OK);
613 }
614 
615 typedef struct dbufs_data {
616 	mdb_ctf_id_t id;
617 	uint64_t objset;
618 	uint64_t object;
619 	uint64_t level;
620 	uint64_t blkid;
621 	char *osname;
622 } dbufs_data_t;
623 
624 #define	DBUFS_UNSET	(0xbaddcafedeadbeefULL)
625 
626 /* ARGSUSED */
627 static int
628 dbufs_cb(uintptr_t addr, const void *unknown, void *arg)
629 {
630 	dbufs_data_t *data = arg;
631 	uintptr_t objset;
632 	dmu_buf_t db;
633 	uint8_t level;
634 	uint64_t blkid;
635 	char osname[MAXNAMELEN];
636 
637 	if (GETMEMBID(addr, &data->id, db_objset, objset) ||
638 	    GETMEMBID(addr, &data->id, db, db) ||
639 	    GETMEMBID(addr, &data->id, db_level, level) ||
640 	    GETMEMBID(addr, &data->id, db_blkid, blkid)) {
641 		return (WALK_ERR);
642 	}
643 
644 	if ((data->objset == DBUFS_UNSET || data->objset == objset) &&
645 	    (data->osname == NULL || (objset_name(objset, osname) == 0 &&
646 		strcmp(data->osname, osname) == 0)) &&
647 	    (data->object == DBUFS_UNSET || data->object == db.db_object) &&
648 	    (data->level == DBUFS_UNSET || data->level == level) &&
649 	    (data->blkid == DBUFS_UNSET || data->blkid == blkid)) {
650 		mdb_printf("%#lr\n", addr);
651 	}
652 	return (WALK_NEXT);
653 }
654 
655 /* ARGSUSED */
656 static int
657 dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
658 {
659 	dbufs_data_t data;
660 	char *object = NULL;
661 	char *blkid = NULL;
662 
663 	data.objset = data.object = data.level = data.blkid = DBUFS_UNSET;
664 	data.osname = NULL;
665 
666 	if (mdb_getopts(argc, argv,
667 	    'O', MDB_OPT_UINT64, &data.objset,
668 	    'n', MDB_OPT_STR, &data.osname,
669 	    'o', MDB_OPT_STR, &object,
670 	    'l', MDB_OPT_UINT64, &data.level,
671 	    'b', MDB_OPT_STR, &blkid) != argc) {
672 		return (DCMD_USAGE);
673 	}
674 
675 	if (object) {
676 		if (strcmp(object, "mdn") == 0) {
677 			data.object = DMU_META_DNODE_OBJECT;
678 		} else {
679 			data.object = mdb_strtoull(object);
680 		}
681 	}
682 
683 	if (blkid) {
684 		if (strcmp(blkid, "bonus") == 0) {
685 			data.blkid = DB_BONUS_BLKID;
686 		} else {
687 			data.blkid = mdb_strtoull(blkid);
688 		}
689 	}
690 
691 	if (mdb_ctf_lookup_by_name("struct dmu_buf_impl", &data.id) == -1) {
692 		mdb_warn("couldn't find struct dmu_buf_impl_t");
693 		return (DCMD_ERR);
694 	}
695 
696 	if (mdb_pwalk("dbufs", dbufs_cb, &data, 0) != 0) {
697 		mdb_warn("can't walk dbufs");
698 		return (DCMD_ERR);
699 	}
700 
701 	return (DCMD_OK);
702 }
703 
704 typedef struct abuf_find_data {
705 	dva_t dva;
706 	mdb_ctf_id_t id;
707 } abuf_find_data_t;
708 
709 /* ARGSUSED */
710 static int
711 abuf_find_cb(uintptr_t addr, const void *unknown, void *arg)
712 {
713 	abuf_find_data_t *data = arg;
714 	dva_t dva;
715 
716 	if (GETMEMBID(addr, &data->id, b_dva, dva)) {
717 		return (WALK_ERR);
718 	}
719 
720 	if (dva.dva_word[0] == data->dva.dva_word[0] &&
721 	    dva.dva_word[1] == data->dva.dva_word[1]) {
722 		mdb_printf("%#lr\n", addr);
723 	}
724 	return (WALK_NEXT);
725 }
726 
727 /* ARGSUSED */
728 static int
729 abuf_find(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
730 {
731 	abuf_find_data_t data;
732 	GElf_Sym sym;
733 	int i;
734 	const char *syms[] = {
735 		"ARC_mru_top",
736 		"ARC_mru_bot",
737 		"ARC_mfu_top",
738 		"ARC_mfu_bot",
739 	};
740 
741 	if (argc != 2)
742 		return (DCMD_USAGE);
743 
744 	for (i = 0; i < 2; i ++) {
745 		switch (argv[i].a_type) {
746 		case MDB_TYPE_STRING:
747 			data.dva.dva_word[i] = mdb_strtoull(argv[i].a_un.a_str);
748 			break;
749 		case MDB_TYPE_IMMEDIATE:
750 			data.dva.dva_word[i] = argv[i].a_un.a_val;
751 			break;
752 		default:
753 			return (DCMD_USAGE);
754 		}
755 	}
756 
757 	if (mdb_ctf_lookup_by_name("struct arc_buf_hdr", &data.id) == -1) {
758 		mdb_warn("couldn't find struct arc_buf_hdr");
759 		return (DCMD_ERR);
760 	}
761 
762 	for (i = 0; i < sizeof (syms) / sizeof (syms[0]); i++) {
763 		if (mdb_lookup_by_name(syms[i], &sym)) {
764 			mdb_warn("can't find symbol %s", syms[i]);
765 			return (DCMD_ERR);
766 		}
767 
768 		if (mdb_pwalk("list", abuf_find_cb, &data, sym.st_value) != 0) {
769 			mdb_warn("can't walk %s", syms[i]);
770 			return (DCMD_ERR);
771 		}
772 	}
773 
774 	return (DCMD_OK);
775 }
776 
777 void
778 abuf_help(void)
779 {
780 	mdb_printf("::abuf_find dva_word[0] dva_word[1]\n");
781 }
782 
783 /*
784  * ::spa
785  *
786  * 	-c	Print configuration information as well
787  * 	-v	Print vdev state
788  * 	-e	Print vdev error stats
789  *
790  * Print a summarized spa_t.  When given no arguments, prints out a table of all
791  * active pools on the system.
792  */
793 /* ARGSUSED */
794 static int
795 spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
796 {
797 	spa_t spa;
798 	char poolname[MAXNAMELEN];
799 	const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED",
800 		"UNINIT", "UNAVAIL" };
801 	const char *state;
802 	int config = FALSE;
803 	int vdevs = FALSE;
804 	int errors = FALSE;
805 
806 	if (mdb_getopts(argc, argv,
807 	    'c', MDB_OPT_SETBITS, TRUE, &config,
808 	    'v', MDB_OPT_SETBITS, TRUE, &vdevs,
809 	    'e', MDB_OPT_SETBITS, TRUE, &errors,
810 	    NULL) != argc)
811 		return (DCMD_USAGE);
812 
813 	if (!(flags & DCMD_ADDRSPEC)) {
814 		if (mdb_walk_dcmd("spa", "spa", argc, argv) == -1) {
815 			mdb_warn("can't walk spa");
816 			return (DCMD_ERR);
817 		}
818 
819 		return (DCMD_OK);
820 	}
821 
822 	if (flags & DCMD_PIPE_OUT) {
823 		mdb_printf("%#lr\n", addr);
824 		return (DCMD_OK);
825 	}
826 
827 	if (DCMD_HDRSPEC(flags))
828 		mdb_printf("%<u>%-?s %9s %-*s%</u>\n", "ADDR", "STATE",
829 		    sizeof (uintptr_t) == 4 ? 60 : 52, "NAME");
830 
831 	if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
832 		mdb_warn("failed to read spa_t at %p", addr);
833 		return (DCMD_ERR);
834 	}
835 
836 	if (mdb_readstr(poolname, sizeof (poolname), (uintptr_t)spa.spa_name)
837 	    == -1) {
838 		mdb_warn("failed to read pool name at %p", spa.spa_name);
839 		return (DCMD_ERR);
840 	}
841 
842 	if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL)
843 		state = "UNKNOWN";
844 	else
845 		state = statetab[spa.spa_state];
846 
847 	mdb_printf("%0?p %9s %s\n", addr, state, poolname);
848 
849 	if (config) {
850 		mdb_printf("\n");
851 		mdb_inc_indent(4);
852 		if (mdb_call_dcmd("spa_config", addr, flags, 0,
853 		    NULL) != DCMD_OK)
854 			return (DCMD_ERR);
855 		mdb_dec_indent(4);
856 	}
857 
858 	if (vdevs || errors) {
859 		mdb_arg_t v;
860 
861 		v.a_type = MDB_TYPE_STRING;
862 		v.a_un.a_str = "-e";
863 
864 		mdb_printf("\n");
865 		mdb_inc_indent(4);
866 		if (mdb_call_dcmd("spa_vdevs", addr, flags, errors ? 1 : 0,
867 		    &v) != DCMD_OK)
868 			return (DCMD_ERR);
869 		mdb_dec_indent(4);
870 	}
871 
872 	return (DCMD_OK);
873 }
874 
875 /*
876  * ::spa_config
877  *
878  * Given a spa_t, print the configuration information stored in spa_config.
879  * Since it's just an nvlist, format it as an indented list of name=value pairs.
880  * We simply read the value of spa_config and pass off to ::nvlist.
881  */
882 /* ARGSUSED */
883 static int
884 spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
885 {
886 	spa_t spa;
887 
888 	if (argc != 0 || !(flags & DCMD_ADDRSPEC))
889 		return (DCMD_USAGE);
890 
891 	if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
892 		mdb_warn("failed to read spa_t at %p", addr);
893 		return (DCMD_ERR);
894 	}
895 
896 	if (spa.spa_config == NULL) {
897 		mdb_printf("(none)\n");
898 		return (DCMD_OK);
899 	}
900 
901 	return (mdb_call_dcmd("nvlist", (uintptr_t)spa.spa_config, flags,
902 	    0, NULL));
903 }
904 
905 void
906 vdev_help(void)
907 {
908 	mdb_printf("[vdev_t*]::vdev [-qr]\n"
909 		"\t-> -q display vdev_queue parameters\n"
910 		"\t-> -r recursive (visit all children)\n");
911 }
912 
913 /*
914  * ::vdev
915  *
916  * Print out a summarized vdev_t, in the following form:
917  *
918  * ADDR             STATE	AUX            DESC
919  * fffffffbcde23df0 HEALTHY	-              /dev/dsk/c0t0d0
920  *
921  * or with "-q" to print out a vdev_t's vdev_queue parameters:
922  *
923  *  vdev_t: c26ae4c0
924  *     c26ae73c min pending         0x2
925  *     c26ae744 max pending         0x23
926  *     c26ae74c agg limit           0x20000
927  *     c26ae754 time shift          0x4
928  *     c26ae75c ramp rate           0x2
929  *
930  * If '-r' is specified, recursively visit all children.
931  *
932  * With '-e', the statistics associated with the vdev are printed as well.
933  */
934 static int
935 do_print_vdev(uintptr_t addr, int flags, int depth, int queue, int stats,
936     int recursive)
937 {
938 	vdev_t vdev;
939 	char desc[MAXNAMELEN];
940 	int c, children;
941 	uintptr_t *child;
942 	const char *state, *aux;
943 
944 	if (mdb_vread(&vdev, sizeof (vdev), (uintptr_t)addr) == -1) {
945 		mdb_warn("failed to read vdev_t at %p\n", (uintptr_t)addr);
946 		return (DCMD_ERR);
947 	}
948 
949 	if (flags & DCMD_PIPE_OUT) {
950 		mdb_printf("%#lr", addr);
951 	} else {
952 		if (vdev.vdev_path != NULL) {
953 			if (mdb_readstr(desc, sizeof (desc),
954 			    (uintptr_t)vdev.vdev_path) == -1) {
955 				mdb_warn("failed to read vdev_path at %p\n",
956 				    vdev.vdev_path);
957 				return (DCMD_ERR);
958 			}
959 		} else if (vdev.vdev_ops != NULL) {
960 			vdev_ops_t ops;
961 			if (mdb_vread(&ops, sizeof (ops),
962 			    (uintptr_t)vdev.vdev_ops) == -1) {
963 				mdb_warn("failed to read vdev_ops at %p\n",
964 				    vdev.vdev_ops);
965 				return (DCMD_ERR);
966 			}
967 			(void) strcpy(desc, ops.vdev_op_type);
968 		} else {
969 			(void) strcpy(desc, "<unknown>");
970 		}
971 
972 		if (depth == 0 && DCMD_HDRSPEC(flags))
973 			mdb_printf("%<u>%-?s %-9s %-12s %-*s%</u>\n",
974 			    "ADDR", "STATE", "AUX",
975 			    sizeof (uintptr_t) == 4 ? 43 : 35,
976 			    "DESCRIPTION");
977 
978 		mdb_printf("%0?p ", addr);
979 
980 		switch (vdev.vdev_state) {
981 		case VDEV_STATE_CLOSED:
982 		    state = "CLOSED";
983 		    break;
984 		case VDEV_STATE_OFFLINE:
985 		    state = "OFFLINE";
986 		    break;
987 		case VDEV_STATE_CANT_OPEN:
988 		    state = "CANT_OPEN";
989 		    break;
990 		case VDEV_STATE_DEGRADED:
991 		    state = "DEGRADED";
992 		    break;
993 		case VDEV_STATE_HEALTHY:
994 		    state = "HEALTHY";
995 		    break;
996 		default:
997 		    state = "UNKNOWN";
998 		    break;
999 		}
1000 
1001 		switch (vdev.vdev_stat.vs_aux) {
1002 		case VDEV_AUX_NONE:
1003 			aux = "-";
1004 			break;
1005 		case VDEV_AUX_OPEN_FAILED:
1006 			aux = "OPEN_FAILED";
1007 			break;
1008 		case VDEV_AUX_CORRUPT_DATA:
1009 			aux = "CORRUPT_DATA";
1010 			break;
1011 		case VDEV_AUX_NO_REPLICAS:
1012 			aux = "NO_REPLICAS";
1013 			break;
1014 		case VDEV_AUX_BAD_GUID_SUM:
1015 			aux = "BAD_GUID_SUM";
1016 			break;
1017 		case VDEV_AUX_TOO_SMALL:
1018 			aux = "TOO_SMALL";
1019 			break;
1020 		case VDEV_AUX_BAD_LABEL:
1021 			aux = "BAD_LABEL";
1022 			break;
1023 		default:
1024 			aux = "UNKNOWN";
1025 			break;
1026 		}
1027 
1028 		mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc);
1029 
1030 		if (queue) {
1031 			mdb_inc_indent(4);
1032 			mdb_printf("\n");
1033 			mdb_printf("%p min pending		0x%llx\n",
1034 			    (uintptr_t)(addr + offsetof(vdev_t,
1035 			    vdev_queue.vq_min_pending)),
1036 			    vdev.vdev_queue.vq_min_pending);
1037 			mdb_printf("%p max pending		0x%llx\n",
1038 			    (uintptr_t)(addr + offsetof(vdev_t,
1039 			    vdev_queue.vq_max_pending)),
1040 			    vdev.vdev_queue.vq_max_pending);
1041 			mdb_printf("%p agg limit		0x%llx\n",
1042 			    (uintptr_t)(addr + offsetof(vdev_t,
1043 			    vdev_queue.vq_agg_limit)),
1044 			    vdev.vdev_queue.vq_agg_limit);
1045 			mdb_printf("%p time shift		0x%llx\n",
1046 			    (uintptr_t)(addr + offsetof(vdev_t,
1047 			    vdev_queue.vq_time_shift)),
1048 			    vdev.vdev_queue.vq_time_shift);
1049 			mdb_printf("%p ramp rate 		0x%llx\n",
1050 			    (uintptr_t)(addr + offsetof(vdev_t,
1051 			    vdev_queue.vq_ramp_rate)),
1052 			    vdev.vdev_queue.vq_ramp_rate);
1053 			mdb_dec_indent(4);
1054 		}
1055 
1056 		if (stats) {
1057 			vdev_stat_t *vs = &vdev.vdev_stat;
1058 			int i;
1059 
1060 			mdb_inc_indent(4);
1061 			mdb_printf("\n");
1062 			mdb_printf("%<u>       %12s %12s %12s %12s "
1063 			    "%12s%</u>\n", "READ", "WRITE", "FREE", "CLAIM",
1064 			    "IOCTL");
1065 			mdb_printf("OPS     ");
1066 			for (i = 1; i < ZIO_TYPES; i++)
1067 				mdb_printf("%11#llx%s", vs->vs_ops[i],
1068 				    i == ZIO_TYPES - 1 ? "" : "  ");
1069 			mdb_printf("\n");
1070 			mdb_printf("BYTES   ");
1071 			for (i = 1; i < ZIO_TYPES; i++)
1072 				mdb_printf("%11#llx%s", vs->vs_bytes[i],
1073 				    i == ZIO_TYPES - 1 ? "" : "  ");
1074 
1075 
1076 			mdb_printf("\n");
1077 			mdb_printf("EREAD    %10#llx\n", vs->vs_read_errors);
1078 			mdb_printf("EWRITE   %10#llx\n", vs->vs_write_errors);
1079 			mdb_printf("ECKSUM   %10#llx\n",
1080 			    vs->vs_checksum_errors);
1081 			mdb_dec_indent(4);
1082 		}
1083 
1084 		if (queue || stats)
1085 			mdb_printf("\n");
1086 	}
1087 
1088 	children = vdev.vdev_children;
1089 
1090 	if (children == 0 || !recursive)
1091 		return (DCMD_OK);
1092 
1093 	child = mdb_alloc(children * sizeof (void *), UM_SLEEP | UM_GC);
1094 	if (mdb_vread(child, children * sizeof (void *),
1095 	    (uintptr_t)vdev.vdev_child) == -1) {
1096 		mdb_warn("failed to read vdev children at %p", vdev.vdev_child);
1097 		return (DCMD_ERR);
1098 	}
1099 
1100 	for (c = 0; c < children; c++) {
1101 		if (do_print_vdev(child[c], flags, depth + 2, queue, stats,
1102 		    recursive))
1103 			return (DCMD_ERR);
1104 	}
1105 
1106 	return (DCMD_OK);
1107 }
1108 
1109 static int
1110 vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1111 {
1112 	int print_queue = FALSE;
1113 	int recursive = FALSE;
1114 	int stats = FALSE;
1115 
1116 	if (mdb_getopts(argc, argv,
1117 	    'q', MDB_OPT_SETBITS, TRUE, &print_queue,
1118 	    'r', MDB_OPT_SETBITS, TRUE, &recursive,
1119 	    'e', MDB_OPT_SETBITS, TRUE, &stats,
1120 	    NULL) != argc)
1121 		return (DCMD_USAGE);
1122 
1123 	if (!(flags & DCMD_ADDRSPEC)) {
1124 		mdb_warn("no vdev_t address given\n");
1125 		return (DCMD_ERR);
1126 	}
1127 
1128 	return (do_print_vdev(addr, flags, 0, print_queue, stats, recursive));
1129 }
1130 
1131 typedef struct mdb_spa {
1132 	uintptr_t spa_dsl_pool;
1133 	uintptr_t spa_root_vdev;
1134 } mdb_spa_t;
1135 
1136 typedef struct mdb_dsl_dir {
1137 	uintptr_t dd_phys;
1138 	uint64_t dd_used_bytes;
1139 	int64_t dd_space_towrite[TXG_SIZE];
1140 } mdb_dsl_dir_t;
1141 
1142 typedef struct mdb_dsl_dir_phys {
1143 	uint64_t dd_used_bytes;
1144 	uint64_t dd_compressed_bytes;
1145 	uint64_t dd_uncompressed_bytes;
1146 } mdb_dsl_dir_phys_t;
1147 
1148 typedef struct mdb_vdev {
1149 	uintptr_t vdev_parent;
1150 	uintptr_t vdev_ms;
1151 	uint64_t vdev_ms_count;
1152 	vdev_stat_t vdev_stat;
1153 } mdb_vdev_t;
1154 
1155 typedef struct mdb_metaslab {
1156 	space_map_t ms_allocmap[TXG_SIZE];
1157 	space_map_t ms_freemap[TXG_SIZE];
1158 	space_map_t ms_map;
1159 	space_map_obj_t ms_smo;
1160 } mdb_metaslab_t;
1161 
1162 /*
1163  * ::spa_space [-b]
1164  *
1165  * Given a spa_t, print out it's on-disk space usage and in-core
1166  * estimates of future usage.  If -b is given, print space in bytes.
1167  * Otherwise print in megabytes.
1168  */
1169 /* ARGSUSED */
1170 static int
1171 spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1172 {
1173 	mdb_spa_t spa;
1174 	uintptr_t dp_root_dir;
1175 	mdb_dsl_dir_t dd;
1176 	mdb_dsl_dir_phys_t dsp;
1177 	uint64_t children;
1178 	uintptr_t childaddr;
1179 	uintptr_t *child;
1180 	uint64_t ms_allocmap[TXG_SIZE] = {0, 0, 0, 0};
1181 	uint64_t ms_freemap[TXG_SIZE] = {0, 0, 0, 0};
1182 	uint64_t ms_map = 0;
1183 	uint64_t avail = 0;
1184 	int i, j;
1185 	int havecompressed = TRUE;
1186 	int shift = 20;
1187 	char *suffix = "M";
1188 	int bits = FALSE;
1189 
1190 	if (mdb_getopts(argc, argv, 'b', MDB_OPT_SETBITS, TRUE, &bits, NULL) !=
1191 	    argc)
1192 		return (DCMD_USAGE);
1193 	if (!(flags & DCMD_ADDRSPEC))
1194 		return (DCMD_USAGE);
1195 
1196 	if (bits) {
1197 		shift = 0;
1198 		suffix = "";
1199 	}
1200 
1201 	if (GETMEMB(addr, struct spa, spa_dsl_pool, spa.spa_dsl_pool) ||
1202 	    GETMEMB(addr, struct spa, spa_root_vdev, spa.spa_root_vdev) ||
1203 	    GETMEMB(spa.spa_root_vdev, struct vdev, vdev_children, children) ||
1204 	    GETMEMB(spa.spa_root_vdev, struct vdev, vdev_child, childaddr) ||
1205 	    GETMEMB(spa.spa_dsl_pool, struct dsl_pool,
1206 	    dp_root_dir, dp_root_dir) ||
1207 	    GETMEMB(dp_root_dir, struct dsl_dir, dd_phys, dd.dd_phys) ||
1208 	    GETMEMB(dp_root_dir, struct dsl_dir,
1209 	    dd_used_bytes, dd.dd_used_bytes) ||
1210 	    GETMEMB(dp_root_dir, struct dsl_dir,
1211 	    dd_space_towrite, dd.dd_space_towrite) ||
1212 	    GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1213 	    dd_used_bytes, dsp.dd_used_bytes)) {
1214 		return (DCMD_ERR);
1215 	}
1216 
1217 	if (GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1218 	    dd_compressed_bytes, dsp.dd_compressed_bytes) ||
1219 	    GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1220 	    dd_uncompressed_bytes, dsp.dd_uncompressed_bytes)) {
1221 		havecompressed = FALSE;
1222 	}
1223 
1224 	child = mdb_alloc(children * sizeof (void *), UM_SLEEP | UM_GC);
1225 	if (mdb_vread(child, children * sizeof (void *), childaddr) == -1) {
1226 		mdb_warn("failed to read root vdev children at %p", childaddr);
1227 		return (DCMD_ERR);
1228 	}
1229 
1230 	mdb_printf("dd_space_towrite = %llu%s %llu%s %llu%s %llu%s\n",
1231 	    dd.dd_space_towrite[0] >> shift, suffix,
1232 	    dd.dd_space_towrite[1] >> shift, suffix,
1233 	    dd.dd_space_towrite[2] >> shift, suffix,
1234 	    dd.dd_space_towrite[3] >> shift, suffix);
1235 	mdb_printf("dd_used_bytes = %llu%s\n",
1236 	    dd.dd_used_bytes >> shift, suffix);
1237 
1238 	mdb_printf("dd_phys.dd_used_bytes = %llu%s\n",
1239 	    dsp.dd_used_bytes >> shift, suffix);
1240 	if (havecompressed) {
1241 		mdb_printf("dd_phys.dd_compressed_bytes = %llu%s\n",
1242 		    dsp.dd_compressed_bytes >> shift, suffix);
1243 		mdb_printf("dd_phys.dd_uncompressed_bytes = %llu%s\n",
1244 		    dsp.dd_uncompressed_bytes >> shift, suffix);
1245 	}
1246 
1247 	for (i = 0; i < children; i++) {
1248 		mdb_vdev_t vd;
1249 		uintptr_t *vdev_ms;
1250 
1251 		if (GETMEMB(child[i], struct vdev,
1252 		    vdev_parent, vd.vdev_parent) ||
1253 		    GETMEMB(child[i], struct vdev,
1254 		    vdev_stat, vd.vdev_stat) ||
1255 		    GETMEMB(child[i], struct vdev, vdev_ms, vd.vdev_ms) ||
1256 		    GETMEMB(child[i], struct vdev,
1257 		    vdev_ms_count, vd.vdev_ms_count)) {
1258 			return (DCMD_ERR);
1259 		}
1260 
1261 		/*
1262 		 * If this is the root vdev, its stats are the pool-wide stats.
1263 		 */
1264 		if (vd.vdev_parent == NULL) {
1265 			mdb_printf("pool_alloc = %llu%s\n",
1266 			    vd.vdev_stat.vs_alloc >> shift, suffix);
1267 			mdb_printf("pool_space = %llu%s\n",
1268 			    vd.vdev_stat.vs_space >> shift, suffix);
1269 		}
1270 
1271 		/*
1272 		 * If this is not a top-level vdev, it doesn't have space.
1273 		 */
1274 		if (vd.vdev_parent != spa.spa_root_vdev)
1275 			continue;
1276 
1277 		vdev_ms = mdb_alloc(vd.vdev_ms_count * sizeof (void*),
1278 		    UM_SLEEP | UM_GC);
1279 		if (mdb_vread(vdev_ms, vd.vdev_ms_count * sizeof (void*),
1280 		    (uintptr_t)vd.vdev_ms) == -1) {
1281 			mdb_warn("failed to read vdev_ms at %p", vd.vdev_ms);
1282 			return (DCMD_ERR);
1283 		}
1284 
1285 		for (j = 0; j < vd.vdev_ms_count; j++) {
1286 			mdb_metaslab_t ms;
1287 
1288 			if (GETMEMB(vdev_ms[j], struct metaslab,
1289 			    ms_allocmap, ms.ms_allocmap) ||
1290 			    GETMEMB(vdev_ms[j], struct metaslab,
1291 			    ms_freemap, ms.ms_freemap) ||
1292 			    GETMEMB(vdev_ms[j], struct metaslab,
1293 			    ms_map, ms.ms_map) ||
1294 			    GETMEMB(vdev_ms[j], struct metaslab,
1295 			    ms_smo, ms.ms_smo)) {
1296 				return (DCMD_ERR);
1297 			}
1298 
1299 			ms_allocmap[0] += ms.ms_allocmap[0].sm_space;
1300 			ms_allocmap[1] += ms.ms_allocmap[1].sm_space;
1301 			ms_allocmap[2] += ms.ms_allocmap[2].sm_space;
1302 			ms_allocmap[3] += ms.ms_allocmap[3].sm_space;
1303 			ms_freemap[0] += ms.ms_freemap[0].sm_space;
1304 			ms_freemap[1] += ms.ms_freemap[1].sm_space;
1305 			ms_freemap[2] += ms.ms_freemap[2].sm_space;
1306 			ms_freemap[3] += ms.ms_freemap[3].sm_space;
1307 			ms_map += ms.ms_map.sm_space;
1308 			avail += ms.ms_map.sm_size - ms.ms_smo.smo_alloc;
1309 		}
1310 	}
1311 
1312 	mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n",
1313 	    ms_allocmap[0] >> shift, suffix,
1314 	    ms_allocmap[1] >> shift, suffix,
1315 	    ms_allocmap[2] >> shift, suffix,
1316 	    ms_allocmap[3] >> shift, suffix);
1317 	mdb_printf("ms_freemap = %llu%s %llu%s %llu%s %llu%s\n",
1318 	    ms_freemap[0] >> shift, suffix,
1319 	    ms_freemap[1] >> shift, suffix,
1320 	    ms_freemap[2] >> shift, suffix,
1321 	    ms_freemap[3] >> shift, suffix);
1322 	mdb_printf("ms_map = %llu%s\n", ms_map >> shift, suffix);
1323 	mdb_printf("avail = %llu%s\n", avail >> shift, suffix);
1324 
1325 	return (DCMD_OK);
1326 }
1327 
1328 /*
1329  * ::spa_verify
1330  *
1331  * Given a spa_t, verify that that the pool is self-consistent.
1332  * Currently, it only checks to make sure that the vdev tree exists.
1333  */
1334 /* ARGSUSED */
1335 static int
1336 spa_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1337 {
1338 	spa_t spa;
1339 
1340 	if (argc != 0 || !(flags & DCMD_ADDRSPEC))
1341 		return (DCMD_USAGE);
1342 
1343 	if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
1344 		mdb_warn("failed to read spa_t at %p", addr);
1345 		return (DCMD_ERR);
1346 	}
1347 
1348 	if (spa.spa_root_vdev == NULL) {
1349 		mdb_printf("no vdev tree present\n");
1350 		return (DCMD_OK);
1351 	}
1352 
1353 	return (DCMD_OK);
1354 }
1355 
1356 /*
1357  * ::spa_vdevs
1358  *
1359  * 	-e	Include error stats
1360  *
1361  * Print out a summarized list of vdevs for the given spa_t.
1362  * This is accomplished by invoking "::vdev -re" on the root vdev.
1363  */
1364 /* ARGSUSED */
1365 static int
1366 spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1367 {
1368 	spa_t spa;
1369 	mdb_arg_t v;
1370 	int errors = FALSE;
1371 
1372 	if (mdb_getopts(argc, argv,
1373 	    'e', MDB_OPT_SETBITS, TRUE, &errors,
1374 	    NULL) != argc)
1375 		return (DCMD_USAGE);
1376 
1377 	if (!(flags & DCMD_ADDRSPEC))
1378 		return (DCMD_USAGE);
1379 
1380 	if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
1381 		mdb_warn("failed to read spa_t at %p", addr);
1382 		return (DCMD_ERR);
1383 	}
1384 
1385 	/*
1386 	 * Unitialized spa_t structures can have a NULL root vdev.
1387 	 */
1388 	if (spa.spa_root_vdev == NULL) {
1389 		mdb_printf("no associated vdevs\n");
1390 		return (DCMD_OK);
1391 	}
1392 
1393 	v.a_type = MDB_TYPE_STRING;
1394 	v.a_un.a_str = errors ? "-re" : "-r";
1395 
1396 	return (mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev,
1397 	    flags, 1, &v));
1398 }
1399 
1400 typedef struct txg_list_walk_data {
1401 	uintptr_t lw_head[TXG_SIZE];
1402 	int	lw_txgoff;
1403 	int	lw_maxoff;
1404 	size_t	lw_offset;
1405 	void	*lw_obj;
1406 } txg_list_walk_data_t;
1407 
1408 static int
1409 txg_list_walk_init_common(mdb_walk_state_t *wsp, int txg, int maxoff)
1410 {
1411 	txg_list_walk_data_t *lwd;
1412 	txg_list_t list;
1413 	int i;
1414 
1415 	lwd = mdb_alloc(sizeof (txg_list_walk_data_t), UM_SLEEP | UM_GC);
1416 	if (mdb_vread(&list, sizeof (txg_list_t), wsp->walk_addr) == -1) {
1417 		mdb_warn("failed to read txg_list_t at %#lx", wsp->walk_addr);
1418 		return (WALK_ERR);
1419 	}
1420 
1421 	for (i = 0; i < TXG_SIZE; i++)
1422 		lwd->lw_head[i] = (uintptr_t)list.tl_head[i];
1423 	lwd->lw_offset = list.tl_offset;
1424 	lwd->lw_obj = mdb_alloc(lwd->lw_offset + sizeof (txg_node_t),
1425 	    UM_SLEEP | UM_GC);
1426 	lwd->lw_txgoff = txg;
1427 	lwd->lw_maxoff = maxoff;
1428 
1429 	wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
1430 	wsp->walk_data = lwd;
1431 
1432 	return (WALK_NEXT);
1433 }
1434 
1435 static int
1436 txg_list_walk_init(mdb_walk_state_t *wsp)
1437 {
1438 	return (txg_list_walk_init_common(wsp, 0, TXG_SIZE-1));
1439 }
1440 
1441 static int
1442 txg_list0_walk_init(mdb_walk_state_t *wsp)
1443 {
1444 	return (txg_list_walk_init_common(wsp, 0, 0));
1445 }
1446 
1447 static int
1448 txg_list1_walk_init(mdb_walk_state_t *wsp)
1449 {
1450 	return (txg_list_walk_init_common(wsp, 1, 1));
1451 }
1452 
1453 static int
1454 txg_list2_walk_init(mdb_walk_state_t *wsp)
1455 {
1456 	return (txg_list_walk_init_common(wsp, 2, 2));
1457 }
1458 
1459 static int
1460 txg_list3_walk_init(mdb_walk_state_t *wsp)
1461 {
1462 	return (txg_list_walk_init_common(wsp, 3, 3));
1463 }
1464 
1465 static int
1466 txg_list_walk_step(mdb_walk_state_t *wsp)
1467 {
1468 	txg_list_walk_data_t *lwd = wsp->walk_data;
1469 	uintptr_t addr;
1470 	txg_node_t *node;
1471 	int status;
1472 
1473 	while (wsp->walk_addr == NULL && lwd->lw_txgoff < lwd->lw_maxoff) {
1474 		lwd->lw_txgoff++;
1475 		wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
1476 	}
1477 
1478 	if (wsp->walk_addr == NULL)
1479 		return (WALK_DONE);
1480 
1481 	addr = wsp->walk_addr - lwd->lw_offset;
1482 
1483 	if (mdb_vread(lwd->lw_obj,
1484 	    lwd->lw_offset + sizeof (txg_node_t), addr) == -1) {
1485 		mdb_warn("failed to read list element at %#lx", addr);
1486 		return (WALK_ERR);
1487 	}
1488 
1489 	status = wsp->walk_callback(addr, lwd->lw_obj, wsp->walk_cbdata);
1490 	node = (txg_node_t *)((uintptr_t)lwd->lw_obj + lwd->lw_offset);
1491 	wsp->walk_addr = (uintptr_t)node->tn_next[lwd->lw_txgoff];
1492 
1493 	return (status);
1494 }
1495 
1496 /* ARGSUSED */
1497 static void
1498 txg_list_walk_fini(mdb_walk_state_t *wsp)
1499 {
1500 }
1501 
1502 /*
1503  * ::walk spa
1504  *
1505  * Walk all named spa_t structures in the namespace.  This is nothing more than
1506  * a layered avl walk.
1507  */
1508 static int
1509 spa_walk_init(mdb_walk_state_t *wsp)
1510 {
1511 	GElf_Sym sym;
1512 
1513 	if (wsp->walk_addr != NULL) {
1514 		mdb_warn("spa walk only supports global walks\n");
1515 		return (WALK_ERR);
1516 	}
1517 
1518 	if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "spa_namespace_avl", &sym) == -1) {
1519 		mdb_warn("failed to find symbol 'spa_namespace_avl'");
1520 		return (WALK_ERR);
1521 	}
1522 
1523 	wsp->walk_addr = (uintptr_t)sym.st_value;
1524 
1525 	if (mdb_layered_walk("avl", wsp) == -1) {
1526 		mdb_warn("failed to walk 'avl'\n");
1527 		return (WALK_ERR);
1528 	}
1529 
1530 	return (WALK_NEXT);
1531 }
1532 
1533 static int
1534 spa_walk_step(mdb_walk_state_t *wsp)
1535 {
1536 	spa_t	spa;
1537 
1538 	if (mdb_vread(&spa, sizeof (spa), wsp->walk_addr) == -1) {
1539 		mdb_warn("failed to read spa_t at %p", wsp->walk_addr);
1540 		return (WALK_ERR);
1541 	}
1542 
1543 	return (wsp->walk_callback(wsp->walk_addr, &spa, wsp->walk_cbdata));
1544 }
1545 
1546 /*
1547  * MDB module linkage information:
1548  *
1549  * We declare a list of structures describing our dcmds, and a function
1550  * named _mdb_init to return a pointer to our module information.
1551  */
1552 
1553 static const mdb_dcmd_t dcmds[] = {
1554 	{ "blkptr", ":", "print blkptr_t", blkptr },
1555 	{ "dbuf", ":", "print dmu_buf_impl_t", dbuf },
1556 	{ "dbuf_stats", ":", "dbuf stats", dbuf_stats },
1557 	{ "dbufs",
1558 	"\t[-O objset_t*] [-n objset_name | \"mos\"] [-o object | \"mdn\"] \n"
1559 	"\t[-l level] [-b blkid | \"bonus\"]",
1560 	"find dmu_buf_impl_t's that meet criterion", dbufs },
1561 	{ "abuf_find", "dva_word[0] dva_word[1]",
1562 	"find arc_buf_hdr_t of a specified DVA",
1563 	abuf_find },
1564 	{ "spa", "?[-cv]", "spa_t summary", spa_print },
1565 	{ "spa_config", ":", "print spa_t configuration", spa_print_config },
1566 	{ "spa_verify", ":", "verify spa_t consistency", spa_verify },
1567 	{ "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space },
1568 	{ "spa_vdevs", ":", "given a spa_t, print vdev summary", spa_vdevs },
1569 	{ "vdev", ":[-qre]", "vdev_t summary", vdev_print },
1570 	{ "zio_pipeline", ":", "decode a zio pipeline", zio_pipeline },
1571 	{ NULL }
1572 };
1573 
1574 static const mdb_walker_t walkers[] = {
1575 	/*
1576 	 * In userland, there is no generic provider of list_t walkers, so we
1577 	 * need to add it.
1578 	 */
1579 #ifndef _KERNEL
1580 	{ LIST_WALK_NAME, LIST_WALK_DESC,
1581 		list_walk_init, list_walk_step, list_walk_fini },
1582 #endif
1583 	{ "dbufs", "walk cached ZFS dbufs",
1584 		dbuf_walk_init, dbuf_walk_step, dbuf_walk_fini },
1585 	{ "zms_freelist", "walk ZFS metaslab freelist",
1586 		freelist_walk_init, freelist_walk_step, freelist_walk_fini },
1587 	{ "txg_list", "given any txg_list_t *, walk all entries in all txgs",
1588 		txg_list_walk_init, txg_list_walk_step, txg_list_walk_fini },
1589 	{ "txg_list0", "given any txg_list_t *, walk all entries in txg 0",
1590 		txg_list0_walk_init, txg_list_walk_step, txg_list_walk_fini },
1591 	{ "txg_list1", "given any txg_list_t *, walk all entries in txg 1",
1592 		txg_list1_walk_init, txg_list_walk_step, txg_list_walk_fini },
1593 	{ "txg_list2", "given any txg_list_t *, walk all entries in txg 2",
1594 		txg_list2_walk_init, txg_list_walk_step, txg_list_walk_fini },
1595 	{ "txg_list3", "given any txg_list_t *, walk all entries in txg 3",
1596 		txg_list3_walk_init, txg_list_walk_step, txg_list_walk_fini },
1597 	{ "spa", "walk all spa_t entries in the namespace",
1598 		spa_walk_init, spa_walk_step, NULL },
1599 	{ NULL }
1600 };
1601 
1602 static const mdb_modinfo_t modinfo = {
1603 	MDB_API_VERSION, dcmds, walkers
1604 };
1605 
1606 const mdb_modinfo_t *
1607 _mdb_init(void)
1608 {
1609 	return (&modinfo);
1610 }
1611