xref: /titanic_50/usr/src/cmd/mdb/common/modules/zfs/zfs.c (revision 088e9d477eee66081e407fbc5a33c4da25f66f6a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <mdb/mdb_ctf.h>
30 #include <sys/zfs_context.h>
31 #include <sys/mdb_modapi.h>
32 #include <sys/dbuf.h>
33 #include <sys/dmu_objset.h>
34 #include <sys/dsl_dir.h>
35 #include <sys/dsl_pool.h>
36 #include <sys/metaslab_impl.h>
37 #include <sys/space_map.h>
38 #include <sys/list.h>
39 #include <sys/spa_impl.h>
40 #include <sys/vdev_impl.h>
41 #include <sys/zio_compress.h>
42 
43 #ifndef _KERNEL
44 #include "../genunix/list.h"
45 #endif
46 
47 #ifdef _KERNEL
48 #define	ZFS_OBJ_NAME	"zfs"
49 #else
50 #define	ZFS_OBJ_NAME	"libzpool.so.1"
51 #endif
52 
53 static char *
54 local_strdup(const char *s)
55 {
56 	char *s1 = mdb_alloc(strlen(s) + 1, UM_SLEEP);
57 
58 	(void) strcpy(s1, s);
59 	return (s1);
60 }
61 
62 static int
63 getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp,
64     const char *member, int len, void *buf)
65 {
66 	mdb_ctf_id_t id;
67 	ulong_t off;
68 	char name[64];
69 
70 	if (idp == NULL) {
71 		if (mdb_ctf_lookup_by_name(type, &id) == -1) {
72 			mdb_warn("couldn't find type %s", type);
73 			return (DCMD_ERR);
74 		}
75 		idp = &id;
76 	} else {
77 		type = name;
78 		mdb_ctf_type_name(*idp, name, sizeof (name));
79 	}
80 
81 	if (mdb_ctf_offsetof(*idp, member, &off) == -1) {
82 		mdb_warn("couldn't find member %s of type %s\n", member, type);
83 		return (DCMD_ERR);
84 	}
85 	if (off % 8 != 0) {
86 		mdb_warn("member %s of type %s is unsupported bitfield",
87 		    member, type);
88 		return (DCMD_ERR);
89 	}
90 	off /= 8;
91 
92 	if (mdb_vread(buf, len, addr + off) == -1) {
93 		mdb_warn("failed to read %s from %s at %p",
94 		    member, type, addr + off);
95 		return (DCMD_ERR);
96 	}
97 	/* mdb_warn("read %s from %s at %p+%llx\n", member, type, addr, off); */
98 
99 	return (0);
100 }
101 
102 #define	GETMEMB(addr, type, member, dest) \
103 	getmember(addr, #type, NULL, #member, sizeof (dest), &(dest))
104 
105 #define	GETMEMBID(addr, ctfid, member, dest) \
106 	getmember(addr, NULL, ctfid, #member, sizeof (dest), &(dest))
107 
108 static int
109 getrefcount(uintptr_t addr, mdb_ctf_id_t *id,
110     const char *member, uint64_t *rc)
111 {
112 	static int gotid;
113 	static mdb_ctf_id_t rc_id;
114 	ulong_t off;
115 
116 	if (!gotid) {
117 		if (mdb_ctf_lookup_by_name("struct refcount", &rc_id) == -1) {
118 			mdb_warn("couldn't find struct refcount");
119 			return (DCMD_ERR);
120 		}
121 		gotid = TRUE;
122 	}
123 
124 	if (mdb_ctf_offsetof(*id, member, &off) == -1) {
125 		char name[64];
126 		mdb_ctf_type_name(*id, name, sizeof (name));
127 		mdb_warn("couldn't find member %s of type %s\n", member, name);
128 		return (DCMD_ERR);
129 	}
130 	off /= 8;
131 
132 	return (GETMEMBID(addr + off, &rc_id, rc_count, *rc));
133 }
134 
135 static int
136 read_symbol(char *sym_name, void **bufp)
137 {
138 	GElf_Sym sym;
139 
140 	if (mdb_lookup_by_obj(MDB_TGT_OBJ_EVERY, sym_name, &sym)) {
141 		mdb_warn("can't find symbol %s", sym_name);
142 		return (DCMD_ERR);
143 	}
144 
145 	*bufp = mdb_alloc(sym.st_size, UM_SLEEP);
146 
147 	if (mdb_vread(*bufp, sym.st_size, sym.st_value) == -1) {
148 		mdb_warn("can't read data for symbol %s", sym_name);
149 		mdb_free(*bufp, sym.st_size);
150 		return (DCMD_ERR);
151 	}
152 
153 	return (DCMD_OK);
154 }
155 
156 static int verbose;
157 
158 static int
159 freelist_walk_init(mdb_walk_state_t *wsp)
160 {
161 	if (wsp->walk_addr == NULL) {
162 		mdb_warn("must supply starting address\n");
163 		return (WALK_ERR);
164 	}
165 
166 	wsp->walk_data = 0;  /* Index into the freelist */
167 	return (WALK_NEXT);
168 }
169 
170 static int
171 freelist_walk_step(mdb_walk_state_t *wsp)
172 {
173 	uint64_t entry;
174 	uintptr_t number = (uintptr_t)wsp->walk_data;
175 	char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID" };
176 	int mapshift = SPA_MINBLOCKSHIFT;
177 
178 	if (mdb_vread(&entry, sizeof (entry), wsp->walk_addr) == -1) {
179 		mdb_warn("failed to read freelist entry %p", wsp->walk_addr);
180 		return (WALK_DONE);
181 	}
182 	wsp->walk_addr += sizeof (entry);
183 	wsp->walk_data = (void *)(number + 1);
184 
185 	if (SM_DEBUG_DECODE(entry)) {
186 		mdb_printf("DEBUG: %3u  %10s: txg=%llu  pass=%llu\n",
187 		    number,
188 		    ddata[SM_DEBUG_ACTION_DECODE(entry)],
189 		    SM_DEBUG_TXG_DECODE(entry),
190 		    SM_DEBUG_SYNCPASS_DECODE(entry));
191 	} else {
192 		mdb_printf("Entry: %3u  offsets=%08llx-%08llx  type=%c  "
193 		    "size=%06llx", number,
194 		    SM_OFFSET_DECODE(entry) << mapshift,
195 		    (SM_OFFSET_DECODE(entry) + SM_RUN_DECODE(entry)) <<
196 		    mapshift,
197 		    SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
198 		    SM_RUN_DECODE(entry) << mapshift);
199 		if (verbose)
200 			mdb_printf("      (raw=%012llx)\n", entry);
201 		mdb_printf("\n");
202 	}
203 	return (WALK_NEXT);
204 }
205 
206 /* ARGSUSED */
207 static void
208 freelist_walk_fini(mdb_walk_state_t *wsp)
209 {
210 }
211 
212 typedef struct dbuf_walk_data {
213 	dbuf_hash_table_t ht;
214 	int64_t bucket;
215 	uintptr_t dbp;
216 	dmu_buf_impl_t db;
217 } dbuf_walk_data_t;
218 
219 static int
220 dbuf_walk_init(mdb_walk_state_t *wsp)
221 {
222 	dbuf_walk_data_t *dwd;
223 
224 	if (wsp->walk_addr != NULL) {
225 		mdb_warn("must supply starting address\n");
226 		return (WALK_ERR);
227 	}
228 
229 	dwd = mdb_alloc(sizeof (dbuf_walk_data_t), UM_SLEEP);
230 
231 	if (mdb_readvar(&dwd->ht, "dbuf_hash_table") == -1) {
232 		mdb_warn("failed to read 'dbuf_hash_table'");
233 		mdb_free(dwd, sizeof (dbuf_walk_data_t));
234 		return (WALK_ERR);
235 	}
236 	dwd->bucket = -1;
237 	dwd->dbp = 0;
238 	wsp->walk_data = dwd;
239 	return (WALK_NEXT);
240 }
241 
242 static int
243 dbuf_walk_step(mdb_walk_state_t *wsp)
244 {
245 	int status;
246 	dbuf_walk_data_t *dwd = wsp->walk_data;
247 
248 	while (dwd->dbp == 0) {
249 		dwd->bucket++;
250 		if (dwd->bucket == dwd->ht.hash_table_mask+1)
251 			return (WALK_DONE);
252 
253 		if (mdb_vread(&dwd->dbp, sizeof (void *),
254 		    (uintptr_t)(dwd->ht.hash_table+dwd->bucket)) == -1) {
255 			mdb_warn("failed to read hash bucket %u at %p",
256 			    dwd->bucket, dwd->ht.hash_table+dwd->bucket);
257 			return (WALK_DONE);
258 		}
259 	}
260 
261 	wsp->walk_addr = dwd->dbp;
262 	if (mdb_vread(&dwd->db, sizeof (dmu_buf_impl_t),
263 	    wsp->walk_addr) == -1) {
264 		mdb_warn("failed to read dbuf at %p", wsp->walk_addr);
265 		return (WALK_DONE);
266 	}
267 	status = wsp->walk_callback(wsp->walk_addr, &dwd->db, wsp->walk_cbdata);
268 
269 	dwd->dbp = (uintptr_t)dwd->db.db_hash_next;
270 	return (status);
271 }
272 
273 static void
274 dbuf_walk_fini(mdb_walk_state_t *wsp)
275 {
276 	dbuf_walk_data_t *dwd = wsp->walk_data;
277 	mdb_free(dwd, sizeof (dbuf_walk_data_t));
278 }
279 
280 static int
281 dataset_name(uintptr_t addr, char *buf)
282 {
283 	static int gotid;
284 	static mdb_ctf_id_t dd_id;
285 	uintptr_t dd_parent;
286 	char dd_myname[MAXNAMELEN];
287 
288 	if (!gotid) {
289 		if (mdb_ctf_lookup_by_name("struct dsl_dir",
290 		    &dd_id) == -1) {
291 			mdb_warn("couldn't find struct dsl_dir");
292 			return (DCMD_ERR);
293 		}
294 		gotid = TRUE;
295 	}
296 	if (GETMEMBID(addr, &dd_id, dd_parent, dd_parent) ||
297 	    GETMEMBID(addr, &dd_id, dd_myname, dd_myname)) {
298 		return (DCMD_ERR);
299 	}
300 
301 	if (dd_parent) {
302 		if (dataset_name(dd_parent, buf))
303 			return (DCMD_ERR);
304 		strcat(buf, "/");
305 	}
306 
307 	if (dd_myname[0])
308 		strcat(buf, dd_myname);
309 	else
310 		strcat(buf, "???");
311 
312 	return (0);
313 }
314 
315 static int
316 objset_name(uintptr_t addr, char *buf)
317 {
318 	static int gotid;
319 	static mdb_ctf_id_t osi_id, ds_id;
320 	uintptr_t os_dsl_dataset;
321 	char ds_snapname[MAXNAMELEN];
322 	uintptr_t ds_dir;
323 
324 	buf[0] = '\0';
325 
326 	if (!gotid) {
327 		if (mdb_ctf_lookup_by_name("struct objset_impl",
328 		    &osi_id) == -1) {
329 			mdb_warn("couldn't find struct objset_impl");
330 			return (DCMD_ERR);
331 		}
332 		if (mdb_ctf_lookup_by_name("struct dsl_dataset",
333 		    &ds_id) == -1) {
334 			mdb_warn("couldn't find struct dsl_dataset");
335 			return (DCMD_ERR);
336 		}
337 
338 		gotid = TRUE;
339 	}
340 
341 	if (GETMEMBID(addr, &osi_id, os_dsl_dataset, os_dsl_dataset))
342 		return (DCMD_ERR);
343 
344 	if (os_dsl_dataset == 0) {
345 		strcat(buf, "mos");
346 		return (0);
347 	}
348 
349 	if (GETMEMBID(os_dsl_dataset, &ds_id, ds_snapname, ds_snapname) ||
350 	    GETMEMBID(os_dsl_dataset, &ds_id, ds_dir, ds_dir)) {
351 		return (DCMD_ERR);
352 	}
353 
354 	if (ds_dir && dataset_name(ds_dir, buf))
355 		return (DCMD_ERR);
356 
357 	if (ds_snapname[0]) {
358 		strcat(buf, "@");
359 		strcat(buf, ds_snapname);
360 	}
361 	return (0);
362 }
363 
364 static void
365 enum_lookup(char *out, size_t size, mdb_ctf_id_t id, int val,
366     const char *prefix)
367 {
368 	const char *cp;
369 	size_t len = strlen(prefix);
370 
371 	if ((cp = mdb_ctf_enum_name(id, val)) != NULL) {
372 		if (strncmp(cp, prefix, len) == 0)
373 			cp += len;
374 		(void) strncpy(out, cp, size);
375 	} else {
376 		mdb_snprintf(out, size, "? (%d)", val);
377 	}
378 }
379 
380 /* ARGSUSED */
381 static int
382 zio_pipeline(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
383 {
384 	mdb_ctf_id_t pipe_enum;
385 	int i;
386 	char stage[1024];
387 
388 	if (mdb_ctf_lookup_by_name("enum zio_stage", &pipe_enum) == -1) {
389 		mdb_warn("Could not find enum zio_stage");
390 		return (DCMD_ERR);
391 	}
392 
393 	for (i = 0; i < 32; i++) {
394 		if (addr & (1U << i)) {
395 			enum_lookup(stage, sizeof (stage), pipe_enum, i,
396 			    "ZIO_STAGE_");
397 			mdb_printf("    %s\n", stage);
398 		}
399 	}
400 
401 	return (DCMD_OK);
402 }
403 
404 /* ARGSUSED */
405 static int
406 blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
407 {
408 	blkptr_t bp;
409 	dva_t *dva;
410 	dmu_object_type_info_t *doti;
411 	zio_compress_info_t *zct;
412 	zio_checksum_info_t *zci;
413 	int i;
414 	char buf[MAXPATHLEN];
415 
416 	if (mdb_vread(&bp, sizeof (blkptr_t), addr) == -1) {
417 		mdb_warn("failed to read blkptr_t");
418 		return (DCMD_ERR);
419 	}
420 
421 	if (read_symbol("dmu_ot", (void **)&doti) != DCMD_OK)
422 		return (DCMD_ERR);
423 	for (i = 0; i < DMU_OT_NUMTYPES; i++) {
424 		mdb_readstr(buf, sizeof (buf), (uintptr_t)doti[i].ot_name);
425 		doti[i].ot_name = local_strdup(buf);
426 	}
427 
428 	if (read_symbol("zio_checksum_table", (void **)&zci) != DCMD_OK)
429 		return (DCMD_ERR);
430 	for (i = 0; i < ZIO_CHECKSUM_FUNCTIONS; i++) {
431 		mdb_readstr(buf, sizeof (buf), (uintptr_t)zci[i].ci_name);
432 		zci[i].ci_name = local_strdup(buf);
433 	}
434 
435 	if (read_symbol("zio_compress_table", (void **)&zct) != DCMD_OK)
436 		return (DCMD_ERR);
437 	for (i = 0; i < ZIO_COMPRESS_FUNCTIONS; i++) {
438 		mdb_readstr(buf, sizeof (buf), (uintptr_t)zct[i].ci_name);
439 		zct[i].ci_name = local_strdup(buf);
440 	}
441 
442 	for (i = 0; i < SPA_DVAS_PER_BP; i++) {
443 		dva = &bp.blk_dva[i];
444 		mdb_printf("DVA[%d]: vdev_id %lld / %llx\n", i,
445 		    DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva));
446 		mdb_printf("DVA[%d]:                    GRID:  %04x\t"
447 		    "ASIZE: %llx\n", i, DVA_GET_GRID(dva), DVA_GET_ASIZE(dva));
448 	}
449 	mdb_printf("LSIZE:  %-16llx\t\tPSIZE: %llx\n",
450 	    BP_GET_LSIZE(&bp), BP_GET_PSIZE(&bp));
451 	mdb_printf("ENDIAN: %6s             GANG:  %-5s\tTYPE:  %s\n",
452 	    BP_GET_BYTEORDER(&bp) ? "LITTLE" : "BIG",
453 	    DVA_GET_GANG(dva) ? "TRUE" : "FALSE",
454 	    doti[BP_GET_TYPE(&bp)].ot_name);
455 	mdb_printf("BIRTH:  %-16llx   LEVEL: %-2d\tFILL:  %llx\n",
456 	    bp.blk_birth, BP_GET_LEVEL(&bp), bp.blk_fill);
457 	mdb_printf("CKFUNC: %-16s\t\tCOMP:  %s\n",
458 	    zci[BP_GET_CHECKSUM(&bp)].ci_name,
459 	    zct[BP_GET_COMPRESS(&bp)].ci_name);
460 	mdb_printf("CKSUM:  %llx:%llx:%llx:%llx\n",
461 	    bp.blk_cksum.zc_word[0],
462 	    bp.blk_cksum.zc_word[1],
463 	    bp.blk_cksum.zc_word[2],
464 	    bp.blk_cksum.zc_word[3]);
465 
466 	return (DCMD_OK);
467 }
468 
469 /* ARGSUSED */
470 static int
471 dbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
472 {
473 	mdb_ctf_id_t id;
474 	dmu_buf_t db;
475 	uintptr_t objset;
476 	uint8_t level;
477 	uint64_t blkid;
478 	uint64_t holds;
479 	char objectname[32];
480 	char blkidname[32];
481 	char path[MAXNAMELEN];
482 
483 	if (DCMD_HDRSPEC(flags)) {
484 		mdb_printf("        addr object lvl blkid holds os\n");
485 	}
486 
487 	if (mdb_ctf_lookup_by_name("struct dmu_buf_impl", &id) == -1) {
488 		mdb_warn("couldn't find struct dmu_buf_impl_t");
489 		return (DCMD_ERR);
490 	}
491 
492 	if (GETMEMBID(addr, &id, db_objset, objset) ||
493 	    GETMEMBID(addr, &id, db, db) ||
494 	    GETMEMBID(addr, &id, db_level, level) ||
495 	    GETMEMBID(addr, &id, db_blkid, blkid)) {
496 		return (WALK_ERR);
497 	}
498 
499 	if (getrefcount(addr, &id, "db_holds", &holds)) {
500 		return (WALK_ERR);
501 	}
502 
503 	if (db.db_object == DMU_META_DNODE_OBJECT)
504 		(void) strcpy(objectname, "mdn");
505 	else
506 		(void) mdb_snprintf(objectname, sizeof (objectname), "%llx",
507 		    (u_longlong_t)db.db_object);
508 
509 	if (blkid == DB_BONUS_BLKID)
510 		(void) strcpy(blkidname, "bonus");
511 	else
512 		(void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx",
513 		    (u_longlong_t)blkid);
514 
515 	if (objset_name(objset, path)) {
516 		return (WALK_ERR);
517 	}
518 
519 	mdb_printf("%p %8s %1u %9s %2llu %s\n",
520 	    addr, objectname, level, blkidname, holds, path);
521 
522 	return (DCMD_OK);
523 }
524 
525 /* ARGSUSED */
526 static int
527 dbuf_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
528 {
529 #define	HISTOSZ 32
530 	uintptr_t dbp;
531 	dmu_buf_impl_t db;
532 	dbuf_hash_table_t ht;
533 	uint64_t bucket, ndbufs;
534 	uint64_t histo[HISTOSZ];
535 	uint64_t histo2[HISTOSZ];
536 	int i, maxidx;
537 
538 	if (mdb_readvar(&ht, "dbuf_hash_table") == -1) {
539 		mdb_warn("failed to read 'dbuf_hash_table'");
540 		return (DCMD_ERR);
541 	}
542 
543 	for (i = 0; i < HISTOSZ; i++) {
544 		histo[i] = 0;
545 		histo2[i] = 0;
546 	}
547 
548 	ndbufs = 0;
549 	for (bucket = 0; bucket < ht.hash_table_mask+1; bucket++) {
550 		int len;
551 
552 		if (mdb_vread(&dbp, sizeof (void *),
553 		    (uintptr_t)(ht.hash_table+bucket)) == -1) {
554 			mdb_warn("failed to read hash bucket %u at %p",
555 			    bucket, ht.hash_table+bucket);
556 			return (DCMD_ERR);
557 		}
558 
559 		len = 0;
560 		while (dbp != 0) {
561 			if (mdb_vread(&db, sizeof (dmu_buf_impl_t),
562 			    dbp) == -1) {
563 				mdb_warn("failed to read dbuf at %p", dbp);
564 				return (DCMD_ERR);
565 			}
566 			dbp = (uintptr_t)db.db_hash_next;
567 			for (i = MIN(len, HISTOSZ - 1); i >= 0; i--)
568 				histo2[i]++;
569 			len++;
570 			ndbufs++;
571 		}
572 
573 		if (len >= HISTOSZ)
574 			len = HISTOSZ-1;
575 		histo[len]++;
576 	}
577 
578 	mdb_printf("hash table has %llu buckets, %llu dbufs "
579 	    "(avg %llu buckets/dbuf)\n",
580 	    ht.hash_table_mask+1, ndbufs,
581 	    (ht.hash_table_mask+1)/ndbufs);
582 
583 	mdb_printf("\n");
584 	maxidx = 0;
585 	for (i = 0; i < HISTOSZ; i++)
586 		if (histo[i] > 0)
587 			maxidx = i;
588 	mdb_printf("hash chain length	number of buckets\n");
589 	for (i = 0; i <= maxidx; i++)
590 		mdb_printf("%u			%llu\n", i, histo[i]);
591 
592 	mdb_printf("\n");
593 	maxidx = 0;
594 	for (i = 0; i < HISTOSZ; i++)
595 		if (histo2[i] > 0)
596 			maxidx = i;
597 	mdb_printf("hash chain depth	number of dbufs\n");
598 	for (i = 0; i <= maxidx; i++)
599 		mdb_printf("%u or more		%llu	%llu%%\n",
600 		    i, histo2[i], histo2[i]*100/ndbufs);
601 
602 
603 	return (DCMD_OK);
604 }
605 
606 typedef struct dbufs_data {
607 	mdb_ctf_id_t id;
608 	uint64_t objset;
609 	uint64_t object;
610 	uint64_t level;
611 	uint64_t blkid;
612 	char *osname;
613 } dbufs_data_t;
614 
615 #define	DBUFS_UNSET	(0xbaddcafedeadbeefULL)
616 
617 /* ARGSUSED */
618 static int
619 dbufs_cb(uintptr_t addr, const void *unknown, void *arg)
620 {
621 	dbufs_data_t *data = arg;
622 	uintptr_t objset;
623 	dmu_buf_t db;
624 	uint8_t level;
625 	uint64_t blkid;
626 	char osname[MAXNAMELEN];
627 
628 	if (GETMEMBID(addr, &data->id, db_objset, objset) ||
629 	    GETMEMBID(addr, &data->id, db, db) ||
630 	    GETMEMBID(addr, &data->id, db_level, level) ||
631 	    GETMEMBID(addr, &data->id, db_blkid, blkid)) {
632 		return (WALK_ERR);
633 	}
634 
635 	if ((data->objset == DBUFS_UNSET || data->objset == objset) &&
636 	    (data->osname == NULL || (objset_name(objset, osname) == 0 &&
637 		strcmp(data->osname, osname) == 0)) &&
638 	    (data->object == DBUFS_UNSET || data->object == db.db_object) &&
639 	    (data->level == DBUFS_UNSET || data->level == level) &&
640 	    (data->blkid == DBUFS_UNSET || data->blkid == blkid)) {
641 		mdb_printf("%#lr\n", addr);
642 	}
643 	return (WALK_NEXT);
644 }
645 
646 /* ARGSUSED */
647 static int
648 dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
649 {
650 	dbufs_data_t data;
651 	char *object = NULL;
652 	char *blkid = NULL;
653 
654 	data.objset = data.object = data.level = data.blkid = DBUFS_UNSET;
655 	data.osname = NULL;
656 
657 	if (mdb_getopts(argc, argv,
658 	    'O', MDB_OPT_UINT64, &data.objset,
659 	    'n', MDB_OPT_STR, &data.osname,
660 	    'o', MDB_OPT_STR, &object,
661 	    'l', MDB_OPT_UINT64, &data.level,
662 	    'b', MDB_OPT_STR, &blkid) != argc) {
663 		return (DCMD_USAGE);
664 	}
665 
666 	if (object) {
667 		if (strcmp(object, "mdn") == 0) {
668 			data.object = DMU_META_DNODE_OBJECT;
669 		} else {
670 			data.object = mdb_strtoull(object);
671 		}
672 	}
673 
674 	if (blkid) {
675 		if (strcmp(blkid, "bonus") == 0) {
676 			data.blkid = DB_BONUS_BLKID;
677 		} else {
678 			data.blkid = mdb_strtoull(blkid);
679 		}
680 	}
681 
682 	if (mdb_ctf_lookup_by_name("struct dmu_buf_impl", &data.id) == -1) {
683 		mdb_warn("couldn't find struct dmu_buf_impl_t");
684 		return (DCMD_ERR);
685 	}
686 
687 	if (mdb_pwalk("dbufs", dbufs_cb, &data, 0) != 0) {
688 		mdb_warn("can't walk dbufs");
689 		return (DCMD_ERR);
690 	}
691 
692 	return (DCMD_OK);
693 }
694 
695 typedef struct abuf_find_data {
696 	dva_t dva;
697 	mdb_ctf_id_t id;
698 } abuf_find_data_t;
699 
700 /* ARGSUSED */
701 static int
702 abuf_find_cb(uintptr_t addr, const void *unknown, void *arg)
703 {
704 	abuf_find_data_t *data = arg;
705 	dva_t dva;
706 
707 	if (GETMEMBID(addr, &data->id, b_dva, dva)) {
708 		return (WALK_ERR);
709 	}
710 
711 	if (dva.dva_word[0] == data->dva.dva_word[0] &&
712 	    dva.dva_word[1] == data->dva.dva_word[1]) {
713 		mdb_printf("%#lr\n", addr);
714 	}
715 	return (WALK_NEXT);
716 }
717 
718 /* ARGSUSED */
719 static int
720 abuf_find(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
721 {
722 	abuf_find_data_t data;
723 	GElf_Sym sym;
724 	int i;
725 	const char *syms[] = {
726 		"ARC_mru_top",
727 		"ARC_mru_bot",
728 		"ARC_mfu_top",
729 		"ARC_mfu_bot",
730 	};
731 
732 	if (argc != 2)
733 		return (DCMD_USAGE);
734 
735 	for (i = 0; i < 2; i ++) {
736 		switch (argv[i].a_type) {
737 		case MDB_TYPE_STRING:
738 			data.dva.dva_word[i] = mdb_strtoull(argv[i].a_un.a_str);
739 			break;
740 		case MDB_TYPE_IMMEDIATE:
741 			data.dva.dva_word[i] = argv[i].a_un.a_val;
742 			break;
743 		default:
744 			return (DCMD_USAGE);
745 		}
746 	}
747 
748 	if (mdb_ctf_lookup_by_name("struct arc_buf_hdr", &data.id) == -1) {
749 		mdb_warn("couldn't find struct arc_buf_hdr");
750 		return (DCMD_ERR);
751 	}
752 
753 	for (i = 0; i < sizeof (syms) / sizeof (syms[0]); i++) {
754 		if (mdb_lookup_by_name(syms[i], &sym)) {
755 			mdb_warn("can't find symbol %s", syms[i]);
756 			return (DCMD_ERR);
757 		}
758 
759 		if (mdb_pwalk("list", abuf_find_cb, &data, sym.st_value) != 0) {
760 			mdb_warn("can't walk %s", syms[i]);
761 			return (DCMD_ERR);
762 		}
763 	}
764 
765 	return (DCMD_OK);
766 }
767 
768 void
769 abuf_help(void)
770 {
771 	mdb_printf("::abuf_find dva_word[0] dva_word[1]\n");
772 }
773 
774 /*
775  * ::spa
776  *
777  * 	-c	Print configuration information as well
778  * 	-v	Print vdev state
779  * 	-e	Print vdev error stats
780  *
781  * Print a summarized spa_t.  When given no arguments, prints out a table of all
782  * active pools on the system.
783  */
784 /* ARGSUSED */
785 static int
786 spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
787 {
788 	spa_t spa;
789 	char poolname[MAXNAMELEN];
790 	const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED",
791 		"UNINIT", "UNAVAIL" };
792 	const char *state;
793 	int config = FALSE;
794 	int vdevs = FALSE;
795 	int errors = FALSE;
796 
797 	if (mdb_getopts(argc, argv,
798 	    'c', MDB_OPT_SETBITS, TRUE, &config,
799 	    'v', MDB_OPT_SETBITS, TRUE, &vdevs,
800 	    'e', MDB_OPT_SETBITS, TRUE, &errors,
801 	    NULL) != argc)
802 		return (DCMD_USAGE);
803 
804 	if (!(flags & DCMD_ADDRSPEC)) {
805 		if (mdb_walk_dcmd("spa", "spa", argc, argv) == -1) {
806 			mdb_warn("can't walk spa");
807 			return (DCMD_ERR);
808 		}
809 
810 		return (DCMD_OK);
811 	}
812 
813 	if (flags & DCMD_PIPE_OUT) {
814 		mdb_printf("%#lr\n", addr);
815 		return (DCMD_OK);
816 	}
817 
818 	if (DCMD_HDRSPEC(flags))
819 		mdb_printf("%<u>%-?s %9s %-*s%</u>\n", "ADDR", "STATE",
820 		    sizeof (uintptr_t) == 4 ? 60 : 52, "NAME");
821 
822 	if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
823 		mdb_warn("failed to read spa_t at %p", addr);
824 		return (DCMD_ERR);
825 	}
826 
827 	if (mdb_readstr(poolname, sizeof (poolname), (uintptr_t)spa.spa_name)
828 	    == -1) {
829 		mdb_warn("failed to read pool name at %p", spa.spa_name);
830 		return (DCMD_ERR);
831 	}
832 
833 	if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL)
834 		state = "UKNNOWN";
835 	else
836 		state = statetab[spa.spa_state];
837 
838 	mdb_printf("%0?p %9s %s\n", addr, state, poolname);
839 
840 	if (config) {
841 		mdb_printf("\n");
842 		mdb_inc_indent(4);
843 		if (mdb_call_dcmd("spa_config", addr, flags, 0,
844 		    NULL) != DCMD_OK)
845 			return (DCMD_ERR);
846 		mdb_dec_indent(4);
847 	}
848 
849 	if (vdevs || errors) {
850 		mdb_arg_t v;
851 
852 		v.a_type = MDB_TYPE_STRING;
853 		v.a_un.a_str = "-e";
854 
855 		mdb_printf("\n");
856 		mdb_inc_indent(4);
857 		if (mdb_call_dcmd("spa_vdevs", addr, flags, errors ? 1 : 0,
858 		    &v) != DCMD_OK)
859 			return (DCMD_ERR);
860 		mdb_dec_indent(4);
861 	}
862 
863 	return (DCMD_OK);
864 }
865 
866 /*
867  * ::spa_config
868  *
869  * Given a spa_t, print the configuration information stored in spa_config.
870  * Since it's just an nvlist, format it as an indented list of name=value pairs.
871  * We simply read the value of spa_config and pass off to ::nvlist.
872  */
873 /* ARGSUSED */
874 static int
875 spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
876 {
877 	spa_t spa;
878 
879 	if (argc != 0 || !(flags & DCMD_ADDRSPEC))
880 		return (DCMD_USAGE);
881 
882 	if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
883 		mdb_warn("failed to read spa_t at %p", addr);
884 		return (DCMD_ERR);
885 	}
886 
887 	if (spa.spa_config == NULL) {
888 		mdb_printf("(none)\n");
889 		return (DCMD_OK);
890 	}
891 
892 	return (mdb_call_dcmd("nvlist", (uintptr_t)spa.spa_config, flags,
893 	    0, NULL));
894 }
895 
896 void
897 vdev_help(void)
898 {
899 	mdb_printf("[vdev_t*]::vdev [-qr]\n"
900 		"\t-> -q display vdev_queue parameters\n"
901 		"\t-> -r recursive (visit all children)\n");
902 }
903 
904 /*
905  * ::vdev
906  *
907  * Print out a summarized vdev_t, in the following form:
908  *
909  * ADDR             STATE	AUX            DESC
910  * fffffffbcde23df0 HEALTHY	-              /dev/dsk/c0t0d0
911  *
912  * or with "-q" to print out a vdev_t's vdev_queue parameters:
913  *
914  *  vdev_t: c26ae4c0
915  *     c26ae73c min pending         0x2
916  *     c26ae744 max pending         0x23
917  *     c26ae74c agg limit           0x20000
918  *     c26ae754 time shift          0x4
919  *     c26ae75c ramp rate           0x2
920  *
921  * If '-r' is specified, recursively visit all children.
922  *
923  * With '-e', the statistics associated with the vdev are printed as well.
924  */
925 static int
926 do_print_vdev(uintptr_t addr, int flags, int depth, int queue, int stats,
927     int recursive)
928 {
929 	vdev_t vdev;
930 	char desc[MAXNAMELEN];
931 	int c, children;
932 	uintptr_t *child;
933 	const char *state, *aux;
934 
935 	if (mdb_vread(&vdev, sizeof (vdev), (uintptr_t)addr) == -1) {
936 		mdb_warn("failed to read vdev_t at %p\n", (uintptr_t)addr);
937 		return (DCMD_ERR);
938 	}
939 
940 	if (flags & DCMD_PIPE_OUT) {
941 		mdb_printf("%#lr", addr);
942 	} else {
943 		if (vdev.vdev_path != NULL) {
944 			if (mdb_readstr(desc, sizeof (desc),
945 			    (uintptr_t)vdev.vdev_path) == -1) {
946 				mdb_warn("failed to read vdev_path at %p\n",
947 				    vdev.vdev_path);
948 				return (DCMD_ERR);
949 			}
950 		} else if (vdev.vdev_ops != NULL) {
951 			vdev_ops_t ops;
952 			if (mdb_vread(&ops, sizeof (ops),
953 			    (uintptr_t)vdev.vdev_ops) == -1) {
954 				mdb_warn("failed to read vdev_ops at %p\n",
955 				    vdev.vdev_ops);
956 				return (DCMD_ERR);
957 			}
958 			(void) strcpy(desc, ops.vdev_op_type);
959 		} else {
960 			(void) strcpy(desc, "<unknown>");
961 		}
962 
963 		if (depth == 0 && DCMD_HDRSPEC(flags))
964 			mdb_printf("%<u>%-?s %-9s %-12s %-*s%</u>\n",
965 			    "ADDR", "STATE", "AUX",
966 			    sizeof (uintptr_t) == 4 ? 43 : 35,
967 			    "DESCRIPTION");
968 
969 		mdb_printf("%0?p ", addr);
970 
971 		switch (vdev.vdev_state) {
972 		case VDEV_STATE_CLOSED:
973 		    state = "CLOSED";
974 		    break;
975 		case VDEV_STATE_OFFLINE:
976 		    state = "OFFLINE";
977 		    break;
978 		case VDEV_STATE_CANT_OPEN:
979 		    state = "CANT_OPEN";
980 		    break;
981 		case VDEV_STATE_DEGRADED:
982 		    state = "DEGRADED";
983 		    break;
984 		case VDEV_STATE_HEALTHY:
985 		    state = "HEALTHY";
986 		    break;
987 		default:
988 		    state = "UNKNOWN";
989 		    break;
990 		}
991 
992 		switch (vdev.vdev_stat.vs_aux) {
993 		case VDEV_AUX_NONE:
994 			aux = "-";
995 			break;
996 		case VDEV_AUX_OPEN_FAILED:
997 			aux = "OPEN_FAILED";
998 			break;
999 		case VDEV_AUX_CORRUPT_DATA:
1000 			aux = "CORRUPT_DATA";
1001 			break;
1002 		case VDEV_AUX_NO_REPLICAS:
1003 			aux = "NO_REPLICAS";
1004 			break;
1005 		case VDEV_AUX_BAD_GUID_SUM:
1006 			aux = "BAD_GUID_SUM";
1007 			break;
1008 		case VDEV_AUX_TOO_SMALL:
1009 			aux = "TOO_SMALL";
1010 			break;
1011 		case VDEV_AUX_BAD_LABEL:
1012 			aux = "BAD_LABEL";
1013 			break;
1014 		default:
1015 			aux = "UNKNOWN";
1016 			break;
1017 		}
1018 
1019 		mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc);
1020 
1021 		if (queue) {
1022 			mdb_inc_indent(4);
1023 			mdb_printf("\n");
1024 			mdb_printf("%p min pending		0x%llx\n",
1025 			    (uintptr_t)(addr + offsetof(vdev_t,
1026 			    vdev_queue.vq_min_pending)),
1027 			    vdev.vdev_queue.vq_min_pending);
1028 			mdb_printf("%p max pending		0x%llx\n",
1029 			    (uintptr_t)(addr + offsetof(vdev_t,
1030 			    vdev_queue.vq_max_pending)),
1031 			    vdev.vdev_queue.vq_max_pending);
1032 			mdb_printf("%p agg limit		0x%llx\n",
1033 			    (uintptr_t)(addr + offsetof(vdev_t,
1034 			    vdev_queue.vq_agg_limit)),
1035 			    vdev.vdev_queue.vq_agg_limit);
1036 			mdb_printf("%p time shift		0x%llx\n",
1037 			    (uintptr_t)(addr + offsetof(vdev_t,
1038 			    vdev_queue.vq_time_shift)),
1039 			    vdev.vdev_queue.vq_time_shift);
1040 			mdb_printf("%p ramp rate 		0x%llx\n",
1041 			    (uintptr_t)(addr + offsetof(vdev_t,
1042 			    vdev_queue.vq_ramp_rate)),
1043 			    vdev.vdev_queue.vq_ramp_rate);
1044 			mdb_dec_indent(4);
1045 		}
1046 
1047 		if (stats) {
1048 			vdev_stat_t *vs = &vdev.vdev_stat;
1049 			int i;
1050 
1051 			mdb_inc_indent(4);
1052 			mdb_printf("\n");
1053 			mdb_printf("%<u>       %12s %12s %12s %12s "
1054 			    "%12s%</u>\n", "READ", "WRITE", "FREE", "CLAIM",
1055 			    "IOCTL");
1056 			mdb_printf("OPS     ");
1057 			for (i = 1; i < ZIO_TYPES; i++)
1058 				mdb_printf("%11#llx%s", vs->vs_ops[i],
1059 				    i == ZIO_TYPES - 1 ? "" : "  ");
1060 			mdb_printf("\n");
1061 			mdb_printf("BYTES   ");
1062 			for (i = 1; i < ZIO_TYPES; i++)
1063 				mdb_printf("%11#llx%s", vs->vs_bytes[i],
1064 				    i == ZIO_TYPES - 1 ? "" : "  ");
1065 
1066 
1067 			mdb_printf("\n");
1068 			mdb_printf("EREAD    %10#llx\n", vs->vs_read_errors);
1069 			mdb_printf("EWRITE   %10#llx\n", vs->vs_write_errors);
1070 			mdb_printf("ECKSUM   %10#llx\n",
1071 			    vs->vs_checksum_errors);
1072 			mdb_dec_indent(4);
1073 		}
1074 
1075 		if (queue || stats)
1076 			mdb_printf("\n");
1077 	}
1078 
1079 	children = vdev.vdev_children;
1080 
1081 	if (children == 0 || !recursive)
1082 		return (DCMD_OK);
1083 
1084 	child = mdb_alloc(children * sizeof (void *), UM_SLEEP | UM_GC);
1085 	if (mdb_vread(child, children * sizeof (void *),
1086 	    (uintptr_t)vdev.vdev_child) == -1) {
1087 		mdb_warn("failed to read vdev children at %p", vdev.vdev_child);
1088 		return (DCMD_ERR);
1089 	}
1090 
1091 	for (c = 0; c < children; c++) {
1092 		if (do_print_vdev(child[c], flags, depth + 2, queue, stats,
1093 		    recursive))
1094 			return (DCMD_ERR);
1095 	}
1096 
1097 	return (DCMD_OK);
1098 }
1099 
1100 static int
1101 vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1102 {
1103 	int print_queue = FALSE;
1104 	int recursive = FALSE;
1105 	int stats = FALSE;
1106 
1107 	if (mdb_getopts(argc, argv,
1108 	    'q', MDB_OPT_SETBITS, TRUE, &print_queue,
1109 	    'r', MDB_OPT_SETBITS, TRUE, &recursive,
1110 	    'e', MDB_OPT_SETBITS, TRUE, &stats,
1111 	    NULL) != argc)
1112 		return (DCMD_USAGE);
1113 
1114 	if (!(flags & DCMD_ADDRSPEC)) {
1115 		mdb_warn("no vdev_t address given\n");
1116 		return (DCMD_ERR);
1117 	}
1118 
1119 	return (do_print_vdev(addr, flags, 0, print_queue, stats, recursive));
1120 }
1121 
1122 typedef struct mdb_spa {
1123 	uintptr_t spa_dsl_pool;
1124 	uintptr_t spa_root_vdev;
1125 } mdb_spa_t;
1126 
1127 typedef struct mdb_dsl_dir {
1128 	uintptr_t dd_phys;
1129 	uint64_t dd_used_bytes;
1130 	int64_t dd_space_towrite[TXG_SIZE];
1131 } mdb_dsl_dir_t;
1132 
1133 typedef struct mdb_dsl_dir_phys {
1134 	uint64_t dd_used_bytes;
1135 	uint64_t dd_compressed_bytes;
1136 	uint64_t dd_uncompressed_bytes;
1137 } mdb_dsl_dir_phys_t;
1138 
1139 typedef struct mdb_vdev {
1140 	uintptr_t vdev_parent;
1141 	uintptr_t vdev_ms;
1142 	uint64_t vdev_ms_count;
1143 	vdev_stat_t vdev_stat;
1144 } mdb_vdev_t;
1145 
1146 typedef struct mdb_metaslab {
1147 	space_map_t ms_allocmap[TXG_SIZE];
1148 	space_map_t ms_freemap[TXG_SIZE];
1149 	space_map_t ms_map;
1150 	uint64_t ms_usable_space;
1151 } mdb_metaslab_t;
1152 
1153 /*
1154  * ::spa_space [-b]
1155  *
1156  * Given a spa_t, print out it's on-disk space usage and in-core
1157  * estimates of future usage.  If -b is given, print space in bytes.
1158  * Otherwise print in megabytes.
1159  */
1160 /* ARGSUSED */
1161 static int
1162 spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1163 {
1164 	mdb_spa_t spa;
1165 	uintptr_t dp_root_dir;
1166 	mdb_dsl_dir_t dd;
1167 	mdb_dsl_dir_phys_t dsp;
1168 	uint64_t children;
1169 	uintptr_t childaddr;
1170 	uintptr_t *child;
1171 	uint64_t ms_allocmap[TXG_SIZE] = {0, 0, 0, 0};
1172 	uint64_t ms_freemap[TXG_SIZE] = {0, 0, 0, 0};
1173 	uint64_t ms_map = 0;
1174 	uint64_t ms_usable_space = 0;
1175 	int i, j;
1176 	int havecompressed = TRUE;
1177 	int shift = 20;
1178 	char *suffix = "M";
1179 	int bits = FALSE;
1180 
1181 	if (mdb_getopts(argc, argv, 'b', MDB_OPT_SETBITS, TRUE, &bits, NULL) !=
1182 	    argc)
1183 		return (DCMD_USAGE);
1184 	if (!(flags & DCMD_ADDRSPEC))
1185 		return (DCMD_USAGE);
1186 
1187 	if (bits) {
1188 		shift = 0;
1189 		suffix = "";
1190 	}
1191 
1192 	if (GETMEMB(addr, struct spa, spa_dsl_pool, spa.spa_dsl_pool) ||
1193 	    GETMEMB(addr, struct spa, spa_root_vdev, spa.spa_root_vdev) ||
1194 	    GETMEMB(spa.spa_root_vdev, struct vdev, vdev_children, children) ||
1195 	    GETMEMB(spa.spa_root_vdev, struct vdev, vdev_child, childaddr) ||
1196 	    GETMEMB(spa.spa_dsl_pool, struct dsl_pool,
1197 	    dp_root_dir, dp_root_dir) ||
1198 	    GETMEMB(dp_root_dir, struct dsl_dir, dd_phys, dd.dd_phys) ||
1199 	    GETMEMB(dp_root_dir, struct dsl_dir,
1200 	    dd_used_bytes, dd.dd_used_bytes) ||
1201 	    GETMEMB(dp_root_dir, struct dsl_dir,
1202 	    dd_space_towrite, dd.dd_space_towrite) ||
1203 	    GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1204 	    dd_used_bytes, dsp.dd_used_bytes)) {
1205 		return (DCMD_ERR);
1206 	}
1207 
1208 	if (GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1209 	    dd_compressed_bytes, dsp.dd_compressed_bytes) ||
1210 	    GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1211 	    dd_uncompressed_bytes, dsp.dd_uncompressed_bytes)) {
1212 		havecompressed = FALSE;
1213 	}
1214 
1215 	child = mdb_alloc(children * sizeof (void *), UM_SLEEP | UM_GC);
1216 	if (mdb_vread(child, children * sizeof (void *), childaddr) == -1) {
1217 		mdb_warn("failed to read root vdev children at %p", childaddr);
1218 		return (DCMD_ERR);
1219 	}
1220 
1221 	mdb_printf("dd_space_towrite = %llu%s %llu%s %llu%s %llu%s\n",
1222 	    dd.dd_space_towrite[0] >> shift, suffix,
1223 	    dd.dd_space_towrite[1] >> shift, suffix,
1224 	    dd.dd_space_towrite[2] >> shift, suffix,
1225 	    dd.dd_space_towrite[3] >> shift, suffix);
1226 	mdb_printf("dd_used_bytes = %llu%s\n",
1227 	    dd.dd_used_bytes >> shift, suffix);
1228 
1229 	mdb_printf("dd_phys.dd_used_bytes = %llu%s\n",
1230 	    dsp.dd_used_bytes >> shift, suffix);
1231 	if (havecompressed) {
1232 		mdb_printf("dd_phys.dd_compressed_bytes = %llu%s\n",
1233 		    dsp.dd_compressed_bytes >> shift, suffix);
1234 		mdb_printf("dd_phys.dd_uncompressed_bytes = %llu%s\n",
1235 		    dsp.dd_uncompressed_bytes >> shift, suffix);
1236 	}
1237 
1238 	for (i = 0; i < children; i++) {
1239 		mdb_vdev_t vd;
1240 		uintptr_t *vdev_ms;
1241 
1242 		if (GETMEMB(child[i], struct vdev,
1243 		    vdev_parent, vd.vdev_parent) ||
1244 		    GETMEMB(child[i], struct vdev,
1245 		    vdev_stat, vd.vdev_stat) ||
1246 		    GETMEMB(child[i], struct vdev, vdev_ms, vd.vdev_ms) ||
1247 		    GETMEMB(child[i], struct vdev,
1248 		    vdev_ms_count, vd.vdev_ms_count)) {
1249 			return (DCMD_ERR);
1250 		}
1251 
1252 		/*
1253 		 * If this is the root vdev, its stats are the pool-wide stats.
1254 		 */
1255 		if (vd.vdev_parent == NULL) {
1256 			mdb_printf("pool_alloc = %llu%s\n",
1257 			    vd.vdev_stat.vs_alloc >> shift, suffix);
1258 			mdb_printf("pool_space = %llu%s\n",
1259 			    vd.vdev_stat.vs_space >> shift, suffix);
1260 		}
1261 
1262 		/*
1263 		 * If this is not a top-level vdev, it doesn't have space.
1264 		 */
1265 		if (vd.vdev_parent != spa.spa_root_vdev)
1266 			continue;
1267 
1268 		vdev_ms = mdb_alloc(vd.vdev_ms_count * sizeof (void*),
1269 		    UM_SLEEP | UM_GC);
1270 		if (mdb_vread(vdev_ms, vd.vdev_ms_count * sizeof (void*),
1271 		    (uintptr_t)vd.vdev_ms) == -1) {
1272 			mdb_warn("failed to read vdev_ms at %p", vd.vdev_ms);
1273 			return (DCMD_ERR);
1274 		}
1275 
1276 		for (j = 0; j < vd.vdev_ms_count; j++) {
1277 			mdb_metaslab_t ms;
1278 
1279 			if (GETMEMB(vdev_ms[j], struct metaslab,
1280 			    ms_allocmap, ms.ms_allocmap) ||
1281 			    GETMEMB(vdev_ms[j], struct metaslab,
1282 			    ms_freemap, ms.ms_freemap) ||
1283 			    GETMEMB(vdev_ms[j], struct metaslab,
1284 			    ms_map, ms.ms_map) ||
1285 			    GETMEMB(vdev_ms[j], struct metaslab,
1286 			    ms_usable_space, ms.ms_usable_space)) {
1287 				return (DCMD_ERR);
1288 			}
1289 
1290 			ms_allocmap[0] += ms.ms_allocmap[0].sm_space;
1291 			ms_allocmap[1] += ms.ms_allocmap[1].sm_space;
1292 			ms_allocmap[2] += ms.ms_allocmap[2].sm_space;
1293 			ms_allocmap[3] += ms.ms_allocmap[3].sm_space;
1294 			ms_freemap[0] += ms.ms_freemap[0].sm_space;
1295 			ms_freemap[1] += ms.ms_freemap[1].sm_space;
1296 			ms_freemap[2] += ms.ms_freemap[2].sm_space;
1297 			ms_freemap[3] += ms.ms_freemap[3].sm_space;
1298 			ms_map += ms.ms_map.sm_space;
1299 			ms_usable_space += ms.ms_usable_space;
1300 		}
1301 	}
1302 
1303 	mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n",
1304 	    ms_allocmap[0] >> shift, suffix,
1305 	    ms_allocmap[1] >> shift, suffix,
1306 	    ms_allocmap[2] >> shift, suffix,
1307 	    ms_allocmap[3] >> shift, suffix);
1308 	mdb_printf("ms_freemap = %llu%s %llu%s %llu%s %llu%s\n",
1309 	    ms_freemap[0] >> shift, suffix,
1310 	    ms_freemap[1] >> shift, suffix,
1311 	    ms_freemap[2] >> shift, suffix,
1312 	    ms_freemap[3] >> shift, suffix);
1313 	mdb_printf("ms_map = %llu%s\n", ms_map >> shift, suffix);
1314 	mdb_printf("ms_usable_space = %llu%s\n",
1315 	    ms_usable_space >> shift, suffix);
1316 
1317 	return (DCMD_OK);
1318 }
1319 
1320 /*
1321  * ::spa_verify
1322  *
1323  * Given a spa_t, verify that that the pool is self-consistent.
1324  * Currently, it only checks to make sure that the vdev tree exists.
1325  */
1326 /* ARGSUSED */
1327 static int
1328 spa_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1329 {
1330 	spa_t spa;
1331 
1332 	if (argc != 0 || !(flags & DCMD_ADDRSPEC))
1333 		return (DCMD_USAGE);
1334 
1335 	if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
1336 		mdb_warn("failed to read spa_t at %p", addr);
1337 		return (DCMD_ERR);
1338 	}
1339 
1340 	if (spa.spa_root_vdev == NULL) {
1341 		mdb_printf("no vdev tree present\n");
1342 		return (DCMD_OK);
1343 	}
1344 
1345 	return (DCMD_OK);
1346 }
1347 
1348 /*
1349  * ::spa_vdevs
1350  *
1351  * 	-e	Include error stats
1352  *
1353  * Print out a summarized list of vdevs for the given spa_t.
1354  * This is accomplished by invoking "::vdev -re" on the root vdev.
1355  */
1356 /* ARGSUSED */
1357 static int
1358 spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1359 {
1360 	spa_t spa;
1361 	mdb_arg_t v;
1362 	int errors = FALSE;
1363 
1364 	if (mdb_getopts(argc, argv,
1365 	    'e', MDB_OPT_SETBITS, TRUE, &errors,
1366 	    NULL) != argc)
1367 		return (DCMD_USAGE);
1368 
1369 	if (!(flags & DCMD_ADDRSPEC))
1370 		return (DCMD_USAGE);
1371 
1372 	if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
1373 		mdb_warn("failed to read spa_t at %p", addr);
1374 		return (DCMD_ERR);
1375 	}
1376 
1377 	/*
1378 	 * Unitialized spa_t structures can have a NULL root vdev.
1379 	 */
1380 	if (spa.spa_root_vdev == NULL) {
1381 		mdb_printf("no associated vdevs\n");
1382 		return (DCMD_OK);
1383 	}
1384 
1385 	v.a_type = MDB_TYPE_STRING;
1386 	v.a_un.a_str = errors ? "-re" : "-r";
1387 
1388 	return (mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev,
1389 	    flags, 1, &v));
1390 }
1391 
1392 typedef struct txg_list_walk_data {
1393 	uintptr_t lw_head[TXG_SIZE];
1394 	int	lw_txgoff;
1395 	int	lw_maxoff;
1396 	size_t	lw_offset;
1397 	void	*lw_obj;
1398 } txg_list_walk_data_t;
1399 
1400 static int
1401 txg_list_walk_init_common(mdb_walk_state_t *wsp, int txg, int maxoff)
1402 {
1403 	txg_list_walk_data_t *lwd;
1404 	txg_list_t list;
1405 	int i;
1406 
1407 	lwd = mdb_alloc(sizeof (txg_list_walk_data_t), UM_SLEEP | UM_GC);
1408 	if (mdb_vread(&list, sizeof (txg_list_t), wsp->walk_addr) == -1) {
1409 		mdb_warn("failed to read txg_list_t at %#lx", wsp->walk_addr);
1410 		return (WALK_ERR);
1411 	}
1412 
1413 	for (i = 0; i < TXG_SIZE; i++)
1414 		lwd->lw_head[i] = (uintptr_t)list.tl_head[i];
1415 	lwd->lw_offset = list.tl_offset;
1416 	lwd->lw_obj = mdb_alloc(lwd->lw_offset + sizeof (txg_node_t),
1417 	    UM_SLEEP | UM_GC);
1418 	lwd->lw_txgoff = txg;
1419 	lwd->lw_maxoff = maxoff;
1420 
1421 	wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
1422 	wsp->walk_data = lwd;
1423 
1424 	return (WALK_NEXT);
1425 }
1426 
1427 static int
1428 txg_list_walk_init(mdb_walk_state_t *wsp)
1429 {
1430 	return (txg_list_walk_init_common(wsp, 0, TXG_SIZE-1));
1431 }
1432 
1433 static int
1434 txg_list0_walk_init(mdb_walk_state_t *wsp)
1435 {
1436 	return (txg_list_walk_init_common(wsp, 0, 0));
1437 }
1438 
1439 static int
1440 txg_list1_walk_init(mdb_walk_state_t *wsp)
1441 {
1442 	return (txg_list_walk_init_common(wsp, 1, 1));
1443 }
1444 
1445 static int
1446 txg_list2_walk_init(mdb_walk_state_t *wsp)
1447 {
1448 	return (txg_list_walk_init_common(wsp, 2, 2));
1449 }
1450 
1451 static int
1452 txg_list3_walk_init(mdb_walk_state_t *wsp)
1453 {
1454 	return (txg_list_walk_init_common(wsp, 3, 3));
1455 }
1456 
1457 static int
1458 txg_list_walk_step(mdb_walk_state_t *wsp)
1459 {
1460 	txg_list_walk_data_t *lwd = wsp->walk_data;
1461 	uintptr_t addr;
1462 	txg_node_t *node;
1463 	int status;
1464 
1465 	while (wsp->walk_addr == NULL && lwd->lw_txgoff < lwd->lw_maxoff) {
1466 		lwd->lw_txgoff++;
1467 		wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
1468 	}
1469 
1470 	if (wsp->walk_addr == NULL)
1471 		return (WALK_DONE);
1472 
1473 	addr = wsp->walk_addr - lwd->lw_offset;
1474 
1475 	if (mdb_vread(lwd->lw_obj,
1476 	    lwd->lw_offset + sizeof (txg_node_t), addr) == -1) {
1477 		mdb_warn("failed to read list element at %#lx", addr);
1478 		return (WALK_ERR);
1479 	}
1480 
1481 	status = wsp->walk_callback(addr, lwd->lw_obj, wsp->walk_cbdata);
1482 	node = (txg_node_t *)((uintptr_t)lwd->lw_obj + lwd->lw_offset);
1483 	wsp->walk_addr = (uintptr_t)node->tn_next[lwd->lw_txgoff];
1484 
1485 	return (status);
1486 }
1487 
1488 /* ARGSUSED */
1489 static void
1490 txg_list_walk_fini(mdb_walk_state_t *wsp)
1491 {
1492 }
1493 
1494 /*
1495  * ::walk spa
1496  *
1497  * Walk all named spa_t structures in the namespace.  This is nothing more than
1498  * a layered avl walk.
1499  */
1500 static int
1501 spa_walk_init(mdb_walk_state_t *wsp)
1502 {
1503 	GElf_Sym sym;
1504 
1505 	if (wsp->walk_addr != NULL) {
1506 		mdb_warn("spa walk only supports global walks\n");
1507 		return (WALK_ERR);
1508 	}
1509 
1510 	if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "spa_namespace_avl", &sym) == -1) {
1511 		mdb_warn("failed to find symbol 'spa_namespace_avl'");
1512 		return (WALK_ERR);
1513 	}
1514 
1515 	wsp->walk_addr = (uintptr_t)sym.st_value;
1516 
1517 	if (mdb_layered_walk("avl", wsp) == -1) {
1518 		mdb_warn("failed to walk 'avl'\n");
1519 		return (WALK_ERR);
1520 	}
1521 
1522 	return (WALK_NEXT);
1523 }
1524 
1525 static int
1526 spa_walk_step(mdb_walk_state_t *wsp)
1527 {
1528 	spa_t	spa;
1529 
1530 	if (mdb_vread(&spa, sizeof (spa), wsp->walk_addr) == -1) {
1531 		mdb_warn("failed to read spa_t at %p", wsp->walk_addr);
1532 		return (WALK_ERR);
1533 	}
1534 
1535 	return (wsp->walk_callback(wsp->walk_addr, &spa, wsp->walk_cbdata));
1536 }
1537 
1538 /*
1539  * MDB module linkage information:
1540  *
1541  * We declare a list of structures describing our dcmds, and a function
1542  * named _mdb_init to return a pointer to our module information.
1543  */
1544 
1545 static const mdb_dcmd_t dcmds[] = {
1546 	{ "blkptr", ":", "print blkptr_t", blkptr },
1547 	{ "dbuf", ":", "print dmu_buf_impl_t", dbuf },
1548 	{ "dbuf_stats", ":", "dbuf stats", dbuf_stats },
1549 	{ "dbufs",
1550 	"\t[-O objset_t*] [-n objset_name | \"mos\"] [-o object | \"mdn\"] \n"
1551 	"\t[-l level] [-b blkid | \"bonus\"]",
1552 	"find dmu_buf_impl_t's that meet criterion", dbufs },
1553 	{ "abuf_find", "dva_word[0] dva_word[1]",
1554 	"find arc_buf_hdr_t of a specified DVA",
1555 	abuf_find },
1556 	{ "spa", "?[-cv]", "spa_t summary", spa_print },
1557 	{ "spa_config", ":", "print spa_t configuration", spa_print_config },
1558 	{ "spa_verify", ":", "verify spa_t consistency", spa_verify },
1559 	{ "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space },
1560 	{ "spa_vdevs", ":", "given a spa_t, print vdev summary", spa_vdevs },
1561 	{ "vdev", ":[-qre]", "vdev_t summary", vdev_print },
1562 	{ "zio_pipeline", ":", "decode a zio pipeline", zio_pipeline },
1563 	{ NULL }
1564 };
1565 
1566 static const mdb_walker_t walkers[] = {
1567 	/*
1568 	 * In userland, there is no generic provider of list_t walkers, so we
1569 	 * need to add it.
1570 	 */
1571 #ifndef _KERNEL
1572 	{ LIST_WALK_NAME, LIST_WALK_DESC,
1573 		list_walk_init, list_walk_step, list_walk_fini },
1574 #endif
1575 	{ "dbufs", "walk cached ZFS dbufs",
1576 		dbuf_walk_init, dbuf_walk_step, dbuf_walk_fini },
1577 	{ "zms_freelist", "walk ZFS metaslab freelist",
1578 		freelist_walk_init, freelist_walk_step, freelist_walk_fini },
1579 	{ "txg_list", "given any txg_list_t *, walk all entries in all txgs",
1580 		txg_list_walk_init, txg_list_walk_step, txg_list_walk_fini },
1581 	{ "txg_list0", "given any txg_list_t *, walk all entries in txg 0",
1582 		txg_list0_walk_init, txg_list_walk_step, txg_list_walk_fini },
1583 	{ "txg_list1", "given any txg_list_t *, walk all entries in txg 1",
1584 		txg_list1_walk_init, txg_list_walk_step, txg_list_walk_fini },
1585 	{ "txg_list2", "given any txg_list_t *, walk all entries in txg 2",
1586 		txg_list2_walk_init, txg_list_walk_step, txg_list_walk_fini },
1587 	{ "txg_list3", "given any txg_list_t *, walk all entries in txg 3",
1588 		txg_list3_walk_init, txg_list_walk_step, txg_list_walk_fini },
1589 	{ "spa", "walk all spa_t entries in the namespace",
1590 		spa_walk_init, spa_walk_step, NULL },
1591 	{ NULL }
1592 };
1593 
1594 static const mdb_modinfo_t modinfo = {
1595 	MDB_API_VERSION, dcmds, walkers
1596 };
1597 
1598 const mdb_modinfo_t *
1599 _mdb_init(void)
1600 {
1601 	return (&modinfo);
1602 }
1603