xref: /illumos-gate/usr/src/cmd/mdb/common/modules/dtrace/dtrace.c (revision c0586b874d9179e81ca8a124fa6caf98fddb7696)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2013 by Delphix. All rights reserved.
25  * Copyright 2019 Joyent, Inc.
26  * Copyright 2022 Racktop Systems, Inc.
27  */
28 
29 /*
30  * explicitly define DTRACE_ERRDEBUG to pull in definition of dtrace_errhash_t
31  * explicitly define _STDARG_H to avoid stdarg.h/varargs.h u/k defn conflict
32  */
33 #define	DTRACE_ERRDEBUG
34 #define	_STDARG_H
35 
36 #include <mdb/mdb_param.h>
37 #include <mdb/mdb_modapi.h>
38 #include <mdb/mdb_ks.h>
39 #include <sys/dtrace_impl.h>
40 #include <sys/vmem_impl.h>
41 #include <sys/ddi_impldefs.h>
42 #include <sys/sysmacros.h>
43 #include <sys/kobj.h>
44 #include <dtrace.h>
45 #include <alloca.h>
46 #include <ctype.h>
47 #include <errno.h>
48 #include <math.h>
49 #include <stdio.h>
50 #include <unistd.h>
51 
52 /*ARGSUSED*/
53 int
54 id2probe(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
55 {
56 	uintptr_t probe = 0;
57 	uintptr_t probes;
58 
59 	if (!(flags & DCMD_ADDRSPEC))
60 		return (DCMD_USAGE);
61 
62 	if (addr == DTRACE_IDNONE || addr > UINT32_MAX)
63 		goto out;
64 
65 	if (mdb_readvar(&probes, "dtrace_probes") == -1) {
66 		mdb_warn("failed to read 'dtrace_probes'");
67 		return (DCMD_ERR);
68 	}
69 
70 	probes += (addr - 1) * sizeof (dtrace_probe_t *);
71 
72 	if (mdb_vread(&probe, sizeof (uintptr_t), probes) == -1) {
73 		mdb_warn("failed to read dtrace_probes[%d]", addr - 1);
74 		return (DCMD_ERR);
75 	}
76 
77 out:
78 	mdb_printf("%p\n", probe);
79 	return (DCMD_OK);
80 }
81 
82 void
83 dtrace_help(void)
84 {
85 
86 	mdb_printf("Given a dtrace_state_t structure that represents a "
87 	    "DTrace consumer, prints\n"
88 	    "dtrace(8)-like output for in-kernel DTrace data.  (The "
89 	    "dtrace_state_t\n"
90 	    "structures for all DTrace consumers may be obtained by running "
91 	    "the \n"
92 	    "::dtrace_state dcmd.)   When data is present on multiple CPUs, "
93 	    "data are\n"
94 	    "presented in CPU order, with records within each CPU ordered "
95 	    "oldest to \n"
96 	    "youngest.  Options:\n\n"
97 	    "-c cpu     Only provide output for specified CPU.\n");
98 }
99 
100 static int
101 dtracemdb_eprobe(dtrace_state_t *state, dtrace_eprobedesc_t *epd)
102 {
103 	dtrace_epid_t epid = epd->dtepd_epid;
104 	dtrace_probe_t probe;
105 	dtrace_ecb_t ecb;
106 	uintptr_t addr, paddr, ap;
107 	dtrace_action_t act;
108 	int nactions, nrecs;
109 
110 	addr = (uintptr_t)state->dts_ecbs +
111 	    (epid - 1) * sizeof (dtrace_ecb_t *);
112 
113 	if (mdb_vread(&addr, sizeof (addr), addr) == -1) {
114 		mdb_warn("failed to read ecb for epid %d", epid);
115 		return (-1);
116 	}
117 
118 	if (addr == 0) {
119 		mdb_warn("epid %d doesn't match an ecb\n", epid);
120 		return (-1);
121 	}
122 
123 	if (mdb_vread(&ecb, sizeof (ecb), addr) == -1) {
124 		mdb_warn("failed to read ecb at %p", addr);
125 		return (-1);
126 	}
127 
128 	paddr = (uintptr_t)ecb.dte_probe;
129 
130 	if (mdb_vread(&probe, sizeof (probe), paddr) == -1) {
131 		mdb_warn("failed to read probe for ecb %p", addr);
132 		return (-1);
133 	}
134 
135 	/*
136 	 * This is a little painful:  in order to find the number of actions,
137 	 * we need to first walk through them.
138 	 */
139 	for (ap = (uintptr_t)ecb.dte_action, nactions = 0; ap != 0; ) {
140 		if (mdb_vread(&act, sizeof (act), ap) == -1) {
141 			mdb_warn("failed to read action %p on ecb %p",
142 			    ap, addr);
143 			return (-1);
144 		}
145 
146 		if (!DTRACEACT_ISAGG(act.dta_kind) && !act.dta_intuple)
147 			nactions++;
148 
149 		ap = (uintptr_t)act.dta_next;
150 	}
151 
152 	nrecs = epd->dtepd_nrecs;
153 	epd->dtepd_nrecs = nactions;
154 	epd->dtepd_probeid = probe.dtpr_id;
155 	epd->dtepd_uarg = ecb.dte_uarg;
156 	epd->dtepd_size = ecb.dte_size;
157 
158 	for (ap = (uintptr_t)ecb.dte_action, nactions = 0; ap != 0; ) {
159 		if (mdb_vread(&act, sizeof (act), ap) == -1) {
160 			mdb_warn("failed to read action %p on ecb %p",
161 			    ap, addr);
162 			return (-1);
163 		}
164 
165 		if (!DTRACEACT_ISAGG(act.dta_kind) && !act.dta_intuple) {
166 			if (nrecs-- == 0)
167 				break;
168 
169 			epd->dtepd_rec[nactions++] = act.dta_rec;
170 		}
171 
172 		ap = (uintptr_t)act.dta_next;
173 	}
174 
175 	return (0);
176 }
177 
178 /*ARGSUSED*/
179 static int
180 dtracemdb_probe(dtrace_state_t *state, dtrace_probedesc_t *pd)
181 {
182 	uintptr_t base, addr, paddr, praddr;
183 	int nprobes, i;
184 	dtrace_probe_t probe;
185 	dtrace_provider_t prov;
186 
187 	if (pd->dtpd_id == DTRACE_IDNONE)
188 		pd->dtpd_id++;
189 
190 	if (mdb_readvar(&base, "dtrace_probes") == -1) {
191 		mdb_warn("failed to read 'dtrace_probes'");
192 		return (-1);
193 	}
194 
195 	if (mdb_readvar(&nprobes, "dtrace_nprobes") == -1) {
196 		mdb_warn("failed to read 'dtrace_nprobes'");
197 		return (-1);
198 	}
199 
200 	for (i = pd->dtpd_id; i <= nprobes; i++) {
201 		addr = base + (i - 1) * sizeof (dtrace_probe_t *);
202 
203 		if (mdb_vread(&paddr, sizeof (paddr), addr) == -1) {
204 			mdb_warn("couldn't read probe pointer at %p", addr);
205 			return (-1);
206 		}
207 
208 		if (paddr != 0)
209 			break;
210 	}
211 
212 	if (paddr == 0) {
213 		errno = ESRCH;
214 		return (-1);
215 	}
216 
217 	if (mdb_vread(&probe, sizeof (probe), paddr) == -1) {
218 		mdb_warn("couldn't read probe at %p", paddr);
219 		return (-1);
220 	}
221 
222 	pd->dtpd_id = probe.dtpr_id;
223 
224 	if (mdb_vread(pd->dtpd_name, DTRACE_NAMELEN,
225 	    (uintptr_t)probe.dtpr_name) == -1) {
226 		mdb_warn("failed to read probe name for probe %p", paddr);
227 		return (-1);
228 	}
229 
230 	if (mdb_vread(pd->dtpd_func, DTRACE_FUNCNAMELEN,
231 	    (uintptr_t)probe.dtpr_func) == -1) {
232 		mdb_warn("failed to read function name for probe %p", paddr);
233 		return (-1);
234 	}
235 
236 	if (mdb_vread(pd->dtpd_mod, DTRACE_MODNAMELEN,
237 	    (uintptr_t)probe.dtpr_mod) == -1) {
238 		mdb_warn("failed to read module name for probe %p", paddr);
239 		return (-1);
240 	}
241 
242 	praddr = (uintptr_t)probe.dtpr_provider;
243 
244 	if (mdb_vread(&prov, sizeof (prov), praddr) == -1) {
245 		mdb_warn("failed to read provider for probe %p", paddr);
246 		return (-1);
247 	}
248 
249 	if (mdb_vread(pd->dtpd_provider, DTRACE_PROVNAMELEN,
250 	    (uintptr_t)prov.dtpv_name) == -1) {
251 		mdb_warn("failed to read provider name for probe %p", paddr);
252 		return (-1);
253 	}
254 
255 	return (0);
256 }
257 
258 /*ARGSUSED*/
259 static int
260 dtracemdb_aggdesc(dtrace_state_t *state, dtrace_aggdesc_t *agd)
261 {
262 	dtrace_aggid_t aggid = agd->dtagd_id;
263 	dtrace_aggregation_t agg;
264 	dtrace_ecb_t ecb;
265 	uintptr_t addr, eaddr, ap, last;
266 	dtrace_action_t act;
267 	dtrace_recdesc_t *lrec;
268 	int nactions, nrecs;
269 
270 	addr = (uintptr_t)state->dts_aggregations +
271 	    (aggid - 1) * sizeof (dtrace_aggregation_t *);
272 
273 	if (mdb_vread(&addr, sizeof (addr), addr) == -1) {
274 		mdb_warn("failed to read aggregation for aggid %d", aggid);
275 		return (-1);
276 	}
277 
278 	if (addr == 0) {
279 		mdb_warn("aggid %d doesn't match an aggregation\n", aggid);
280 		return (-1);
281 	}
282 
283 	if (mdb_vread(&agg, sizeof (agg), addr) == -1) {
284 		mdb_warn("failed to read aggregation at %p", addr);
285 		return (-1);
286 	}
287 
288 	eaddr = (uintptr_t)agg.dtag_ecb;
289 
290 	if (mdb_vread(&ecb, sizeof (ecb), eaddr) == -1) {
291 		mdb_warn("failed to read ecb for aggregation %p", addr);
292 		return (-1);
293 	}
294 
295 	last = (uintptr_t)addr + offsetof(dtrace_aggregation_t, dtag_action);
296 
297 	/*
298 	 * This is a little painful:  in order to find the number of actions,
299 	 * we need to first walk through them.
300 	 */
301 	ap = (uintptr_t)agg.dtag_first;
302 	nactions = 0;
303 
304 	for (;;) {
305 		if (mdb_vread(&act, sizeof (act), ap) == -1) {
306 			mdb_warn("failed to read action %p on aggregation %p",
307 			    ap, addr);
308 			return (-1);
309 		}
310 
311 		nactions++;
312 
313 		if (ap == last)
314 			break;
315 
316 		ap = (uintptr_t)act.dta_next;
317 	}
318 
319 	lrec = &act.dta_rec;
320 	agd->dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - agg.dtag_base;
321 
322 	nrecs = agd->dtagd_nrecs;
323 	agd->dtagd_nrecs = nactions;
324 	agd->dtagd_epid = ecb.dte_epid;
325 
326 	ap = (uintptr_t)agg.dtag_first;
327 	nactions = 0;
328 
329 	for (;;) {
330 		dtrace_recdesc_t rec;
331 
332 		if (mdb_vread(&act, sizeof (act), ap) == -1) {
333 			mdb_warn("failed to read action %p on aggregation %p",
334 			    ap, addr);
335 			return (-1);
336 		}
337 
338 		if (nrecs-- == 0)
339 			break;
340 
341 		rec = act.dta_rec;
342 		rec.dtrd_offset -= agg.dtag_base;
343 		rec.dtrd_uarg = 0;
344 		agd->dtagd_rec[nactions++] = rec;
345 
346 		if (ap == last)
347 			break;
348 
349 		ap = (uintptr_t)act.dta_next;
350 	}
351 
352 	return (0);
353 }
354 
355 static int
356 dtracemdb_bufsnap(dtrace_buffer_t *which, dtrace_bufdesc_t *desc)
357 {
358 	static hrtime_t hr_offset = 0;
359 	static boolean_t offset_set = B_FALSE;
360 	uintptr_t addr;
361 	size_t bufsize;
362 	dtrace_buffer_t buf;
363 	caddr_t data = desc->dtbd_data;
364 	processorid_t max_cpuid, cpu = desc->dtbd_cpu;
365 
366 	if (mdb_readvar(&max_cpuid, "max_cpuid") == -1) {
367 		mdb_warn("failed to read 'max_cpuid'");
368 		errno = EIO;
369 		return (-1);
370 	}
371 
372 	if (cpu < 0 || cpu > max_cpuid) {
373 		errno = EINVAL;
374 		return (-1);
375 	}
376 
377 	addr = (uintptr_t)which + cpu * sizeof (dtrace_buffer_t);
378 
379 	if (mdb_vread(&buf, sizeof (buf), addr) == -1) {
380 		mdb_warn("failed to read buffer description at %p", addr);
381 		errno = EIO;
382 		return (-1);
383 	}
384 
385 	if (buf.dtb_tomax == NULL) {
386 		errno = ENOENT;
387 		return (-1);
388 	}
389 
390 	if (buf.dtb_flags & DTRACEBUF_WRAPPED) {
391 		bufsize = buf.dtb_size;
392 	} else {
393 		bufsize = buf.dtb_offset;
394 	}
395 
396 	if (mdb_vread(data, bufsize, (uintptr_t)buf.dtb_tomax) == -1) {
397 		mdb_warn("couldn't read buffer for CPU %d", cpu);
398 		errno = EIO;
399 		return (-1);
400 	}
401 
402 	if (buf.dtb_offset > buf.dtb_size) {
403 		mdb_warn("buffer for CPU %d has corrupt offset\n", cpu);
404 		errno = EIO;
405 		return (-1);
406 	}
407 
408 	if (buf.dtb_flags & DTRACEBUF_WRAPPED) {
409 		if (buf.dtb_xamot_offset > buf.dtb_size) {
410 			mdb_warn("ringbuffer for CPU %d has corrupt "
411 			    "wrapped offset\n", cpu);
412 			errno = EIO;
413 			return (-1);
414 		}
415 
416 		/*
417 		 * If the ring buffer has wrapped, it needs to be polished.
418 		 * See the comment in dtrace_buffer_polish() for details.
419 		 */
420 		if (buf.dtb_offset < buf.dtb_xamot_offset) {
421 			bzero(data + buf.dtb_offset,
422 			    buf.dtb_xamot_offset - buf.dtb_offset);
423 		}
424 
425 		if (buf.dtb_offset > buf.dtb_xamot_offset) {
426 			bzero(data + buf.dtb_offset,
427 			    buf.dtb_size - buf.dtb_offset);
428 			bzero(data, buf.dtb_xamot_offset);
429 		}
430 
431 		desc->dtbd_oldest = buf.dtb_xamot_offset;
432 	} else {
433 		desc->dtbd_oldest = 0;
434 	}
435 
436 	/*
437 	 * On a live system, dtbd_timestamp is set to gethrtime() when the
438 	 * DTRACEIOC_BUFSNAP ioctl is called. The effect of this is that the
439 	 * timestamps of all the enabled probe records in the buf will always
440 	 * be less than dtbd_timestamp. dtrace_consume() relies on this
441 	 * invariant to determine when it needs to retrieve more dtrace bufs
442 	 * from the kernel.
443 	 *
444 	 * However when mdb is reading a crash dump, the value of
445 	 * gethrtime() on the system running mdb may smaller than the
446 	 * enabled probe records in the crash dump, violating the invariant
447 	 * dtrace_consume() is relying on. This can cause dtrace_consume()
448 	 * to prematurely stop processing records.
449 	 *
450 	 * To preserve the invariant dtrace_consume() requires, we simply
451 	 * add the value of panic_hrtime to gethrtime() when setting
452 	 * dtdb_timestamp. On a live system, panic_hrtime will be 0, and
453 	 * the invariant will be preserved by virtue of being running on
454 	 * a live system. On a crash dump, no valid probe record can have a
455 	 * timestamp greater than panic_hrtime, so adding this to the value
456 	 * of gethrtime() will guarantee the invariant expected by
457 	 * dtrace_consume() is preserved.
458 	 */
459 	if (!offset_set) {
460 		hrtime_t panic_hrtime;
461 
462 		/*
463 		 * We could be slightly more clever and only set hr_offset
464 		 * if gethrtime() in mdb is < panic_hrtime, but it doesn't
465 		 * seem necessary. If for some reason, we cannot read
466 		 * panic_hrtime, we'll try to continue -- ::dtrace may
467 		 * still succeed, so we just warn and continue.
468 		 */
469 		if (mdb_readvar(&panic_hrtime, "panic_hrtime") == -1) {
470 			mdb_warn("failed to read 'panic_hrtime' -- "
471 			    "some dtrace data may not be displayed");
472 		} else {
473 			hr_offset = panic_hrtime;
474 		}
475 		offset_set = B_TRUE;
476 	}
477 
478 	desc->dtbd_size = bufsize;
479 	desc->dtbd_drops = buf.dtb_drops;
480 	desc->dtbd_errors = buf.dtb_errors;
481 	desc->dtbd_timestamp = gethrtime() + hr_offset;
482 
483 	return (0);
484 }
485 
486 /*
487  * This is essentially identical to its cousin in the kernel -- with the
488  * notable exception that we automatically set DTRACEOPT_GRABANON if this
489  * state is an anonymous enabling.
490  */
491 static dof_hdr_t *
492 dtracemdb_dof_create(dtrace_state_t *state, int isanon)
493 {
494 	dof_hdr_t *dof;
495 	dof_sec_t *sec;
496 	dof_optdesc_t *opt;
497 	int i, len = sizeof (dof_hdr_t) +
498 	    roundup(sizeof (dof_sec_t), sizeof (uint64_t)) +
499 	    sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
500 
501 	dof = mdb_zalloc(len, UM_SLEEP);
502 	dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0;
503 	dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1;
504 	dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2;
505 	dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3;
506 
507 	dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE;
508 	dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE;
509 	dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION;
510 	dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION;
511 	dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS;
512 	dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS;
513 
514 	dof->dofh_flags = 0;
515 	dof->dofh_hdrsize = sizeof (dof_hdr_t);
516 	dof->dofh_secsize = sizeof (dof_sec_t);
517 	dof->dofh_secnum = 1;	/* only DOF_SECT_OPTDESC */
518 	dof->dofh_secoff = sizeof (dof_hdr_t);
519 	dof->dofh_loadsz = len;
520 	dof->dofh_filesz = len;
521 	dof->dofh_pad = 0;
522 
523 	/*
524 	 * Fill in the option section header...
525 	 */
526 	sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t));
527 	sec->dofs_type = DOF_SECT_OPTDESC;
528 	sec->dofs_align = sizeof (uint64_t);
529 	sec->dofs_flags = DOF_SECF_LOAD;
530 	sec->dofs_entsize = sizeof (dof_optdesc_t);
531 
532 	opt = (dof_optdesc_t *)((uintptr_t)sec +
533 	    roundup(sizeof (dof_sec_t), sizeof (uint64_t)));
534 
535 	sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof;
536 	sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
537 
538 	for (i = 0; i < DTRACEOPT_MAX; i++) {
539 		opt[i].dofo_option = i;
540 		opt[i].dofo_strtab = DOF_SECIDX_NONE;
541 		opt[i].dofo_value = state->dts_options[i];
542 	}
543 
544 	if (isanon)
545 		opt[DTRACEOPT_GRABANON].dofo_value = 1;
546 
547 	return (dof);
548 }
549 
550 static int
551 dtracemdb_format(dtrace_state_t *state, dtrace_fmtdesc_t *desc)
552 {
553 	uintptr_t addr, faddr;
554 	char c;
555 	int len = 0;
556 
557 	if (desc->dtfd_format == 0 || desc->dtfd_format > state->dts_nformats) {
558 		errno = EINVAL;
559 		return (-1);
560 	}
561 
562 	faddr = (uintptr_t)state->dts_formats +
563 	    (desc->dtfd_format - 1) * sizeof (char *);
564 
565 	if (mdb_vread(&addr, sizeof (addr), faddr) == -1) {
566 		mdb_warn("failed to read format string pointer at %p", faddr);
567 		return (-1);
568 	}
569 
570 	do {
571 		if (mdb_vread(&c, sizeof (c), addr + len++) == -1) {
572 			mdb_warn("failed to read format string at %p", addr);
573 			return (-1);
574 		}
575 	} while (c != '\0');
576 
577 	if (len > desc->dtfd_length) {
578 		desc->dtfd_length = len;
579 		return (0);
580 	}
581 
582 	if (mdb_vread(desc->dtfd_string, len, addr) == -1) {
583 		mdb_warn("failed to reread format string at %p", addr);
584 		return (-1);
585 	}
586 
587 	return (0);
588 }
589 
590 static int
591 dtracemdb_status(dtrace_state_t *state, dtrace_status_t *status)
592 {
593 	dtrace_dstate_t *dstate;
594 	int i, j;
595 	uint64_t nerrs;
596 	uintptr_t addr;
597 	int ncpu;
598 
599 	if (mdb_readvar(&ncpu, "_ncpu") == -1) {
600 		mdb_warn("failed to read '_ncpu'");
601 		return (DCMD_ERR);
602 	}
603 
604 	bzero(status, sizeof (dtrace_status_t));
605 
606 	if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
607 		errno = ENOENT;
608 		return (-1);
609 	}
610 
611 	/*
612 	 * For the MDB backend, we never set dtst_exiting or dtst_filled.  This
613 	 * is by design:  we don't want the library to try to stop tracing,
614 	 * because it doesn't particularly mean anything.
615 	 */
616 	nerrs = state->dts_errors;
617 	dstate = &state->dts_vstate.dtvs_dynvars;
618 
619 	for (i = 0; i < ncpu; i++) {
620 		dtrace_dstate_percpu_t dcpu;
621 		dtrace_buffer_t buf;
622 
623 		addr = (uintptr_t)&dstate->dtds_percpu[i];
624 
625 		if (mdb_vread(&dcpu, sizeof (dcpu), addr) == -1) {
626 			mdb_warn("failed to read per-CPU dstate at %p", addr);
627 			return (-1);
628 		}
629 
630 		status->dtst_dyndrops += dcpu.dtdsc_drops;
631 		status->dtst_dyndrops_dirty += dcpu.dtdsc_dirty_drops;
632 		status->dtst_dyndrops_rinsing += dcpu.dtdsc_rinsing_drops;
633 
634 		addr = (uintptr_t)&state->dts_buffer[i];
635 
636 		if (mdb_vread(&buf, sizeof (buf), addr) == -1) {
637 			mdb_warn("failed to read per-CPU buffer at %p", addr);
638 			return (-1);
639 		}
640 
641 		nerrs += buf.dtb_errors;
642 
643 		for (j = 0; j < state->dts_nspeculations; j++) {
644 			dtrace_speculation_t spec;
645 
646 			addr = (uintptr_t)&state->dts_speculations[j];
647 
648 			if (mdb_vread(&spec, sizeof (spec), addr) == -1) {
649 				mdb_warn("failed to read "
650 				    "speculation at %p", addr);
651 				return (-1);
652 			}
653 
654 			addr = (uintptr_t)&spec.dtsp_buffer[i];
655 
656 			if (mdb_vread(&buf, sizeof (buf), addr) == -1) {
657 				mdb_warn("failed to read "
658 				    "speculative buffer at %p", addr);
659 				return (-1);
660 			}
661 
662 			status->dtst_specdrops += buf.dtb_xamot_drops;
663 		}
664 	}
665 
666 	status->dtst_specdrops_busy = state->dts_speculations_busy;
667 	status->dtst_specdrops_unavail = state->dts_speculations_unavail;
668 	status->dtst_errors = nerrs;
669 
670 	return (0);
671 }
672 
673 typedef struct dtracemdb_data {
674 	dtrace_state_t *dtmd_state;
675 	char *dtmd_symstr;
676 	char *dtmd_modstr;
677 	uintptr_t dtmd_addr;
678 	int dtmd_isanon;
679 } dtracemdb_data_t;
680 
681 static int
682 dtracemdb_ioctl(void *varg, int cmd, void *arg)
683 {
684 	dtracemdb_data_t *data = varg;
685 	dtrace_state_t *state = data->dtmd_state;
686 
687 	switch (cmd) {
688 	case DTRACEIOC_CONF: {
689 		dtrace_conf_t *conf = arg;
690 
691 		bzero(conf, sizeof (conf));
692 		conf->dtc_difversion = DIF_VERSION;
693 		conf->dtc_difintregs = DIF_DIR_NREGS;
694 		conf->dtc_diftupregs = DIF_DTR_NREGS;
695 		conf->dtc_ctfmodel = CTF_MODEL_NATIVE;
696 
697 		return (0);
698 	}
699 
700 	case DTRACEIOC_DOFGET: {
701 		dof_hdr_t *hdr = arg, *dof;
702 
703 		dof = dtracemdb_dof_create(state, data->dtmd_isanon);
704 		bcopy(dof, hdr, MIN(hdr->dofh_loadsz, dof->dofh_loadsz));
705 		mdb_free(dof, dof->dofh_loadsz);
706 
707 		return (0);
708 	}
709 
710 	case DTRACEIOC_BUFSNAP:
711 		return (dtracemdb_bufsnap(state->dts_buffer, arg));
712 
713 	case DTRACEIOC_AGGSNAP:
714 		return (dtracemdb_bufsnap(state->dts_aggbuffer, arg));
715 
716 	case DTRACEIOC_AGGDESC:
717 		return (dtracemdb_aggdesc(state, arg));
718 
719 	case DTRACEIOC_EPROBE:
720 		return (dtracemdb_eprobe(state, arg));
721 
722 	case DTRACEIOC_PROBES:
723 		return (dtracemdb_probe(state, arg));
724 
725 	case DTRACEIOC_FORMAT:
726 		return (dtracemdb_format(state, arg));
727 
728 	case DTRACEIOC_STATUS:
729 		return (dtracemdb_status(state, arg));
730 
731 	case DTRACEIOC_GO:
732 		*(processorid_t *)arg = -1;
733 		return (0);
734 
735 	case DTRACEIOC_ENABLE:
736 		errno = ENOTTY; /* see dt_open.c:dtrace_go() */
737 		return (-1);
738 
739 	case DTRACEIOC_PROVIDER:
740 	case DTRACEIOC_PROBEMATCH:
741 		errno = ESRCH;
742 		return (-1);
743 
744 	default:
745 		mdb_warn("unexpected ioctl 0x%x (%s)\n", cmd,
746 		    cmd == DTRACEIOC_PROVIDER	? "DTRACEIOC_PROVIDER" :
747 		    cmd == DTRACEIOC_PROBES	? "DTRACEIOC_PROBES" :
748 		    cmd == DTRACEIOC_BUFSNAP	? "DTRACEIOC_BUFSNAP" :
749 		    cmd == DTRACEIOC_PROBEMATCH	? "DTRACEIOC_PROBEMATCH" :
750 		    cmd == DTRACEIOC_ENABLE	? "DTRACEIOC_ENABLE" :
751 		    cmd == DTRACEIOC_AGGSNAP	? "DTRACEIOC_AGGSNAP" :
752 		    cmd == DTRACEIOC_EPROBE	? "DTRACEIOC_EPROBE" :
753 		    cmd == DTRACEIOC_PROBEARG	? "DTRACEIOC_PROBEARG" :
754 		    cmd == DTRACEIOC_CONF	? "DTRACEIOC_CONF" :
755 		    cmd == DTRACEIOC_STATUS	? "DTRACEIOC_STATUS" :
756 		    cmd == DTRACEIOC_GO		? "DTRACEIOC_GO" :
757 		    cmd == DTRACEIOC_STOP	? "DTRACEIOC_STOP" :
758 		    cmd == DTRACEIOC_AGGDESC	? "DTRACEIOC_AGGDESC" :
759 		    cmd == DTRACEIOC_FORMAT	? "DTRACEIOC_FORMAT" :
760 		    cmd == DTRACEIOC_DOFGET	? "DTRACEIOC_DOFGET" :
761 		    cmd == DTRACEIOC_REPLICATE	? "DTRACEIOC_REPLICATE" :
762 		    "???");
763 		errno = ENXIO;
764 		return (-1);
765 	}
766 }
767 
768 static int
769 dtracemdb_modctl(uintptr_t addr, const struct modctl *m, dtracemdb_data_t *data)
770 {
771 	struct module mod;
772 
773 	if (m->mod_mp == NULL)
774 		return (WALK_NEXT);
775 
776 	if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
777 		mdb_warn("couldn't read modctl %p's module", addr);
778 		return (WALK_NEXT);
779 	}
780 
781 	if ((uintptr_t)mod.text > data->dtmd_addr)
782 		return (WALK_NEXT);
783 
784 	if ((uintptr_t)mod.text + mod.text_size <= data->dtmd_addr)
785 		return (WALK_NEXT);
786 
787 	if (mdb_readstr(data->dtmd_modstr, MDB_SYM_NAMLEN,
788 	    (uintptr_t)m->mod_modname) == -1)
789 		return (WALK_ERR);
790 
791 	return (WALK_DONE);
792 }
793 
794 static int
795 dtracemdb_lookup_by_addr(void *varg, GElf_Addr addr, GElf_Sym *symp,
796     dtrace_syminfo_t *sip)
797 {
798 	dtracemdb_data_t *data = varg;
799 
800 	if (data->dtmd_symstr == NULL) {
801 		data->dtmd_symstr = mdb_zalloc(MDB_SYM_NAMLEN,
802 		    UM_SLEEP | UM_GC);
803 	}
804 
805 	if (data->dtmd_modstr == NULL) {
806 		data->dtmd_modstr = mdb_zalloc(MDB_SYM_NAMLEN,
807 		    UM_SLEEP | UM_GC);
808 	}
809 
810 	if (symp != NULL) {
811 		if (mdb_lookup_by_addr(addr, MDB_SYM_FUZZY, data->dtmd_symstr,
812 		    MDB_SYM_NAMLEN, symp) == -1)
813 			return (-1);
814 	}
815 
816 	if (sip != NULL) {
817 		data->dtmd_addr = addr;
818 
819 		(void) strcpy(data->dtmd_modstr, "???");
820 
821 		if (mdb_walk("modctl",
822 		    (mdb_walk_cb_t)dtracemdb_modctl, varg) == -1) {
823 			mdb_warn("couldn't walk 'modctl'");
824 			return (-1);
825 		}
826 
827 		sip->dts_object = data->dtmd_modstr;
828 		sip->dts_id = 0;
829 		sip->dts_name = symp != NULL ? data->dtmd_symstr : NULL;
830 	}
831 
832 	return (0);
833 }
834 
835 /*ARGSUSED*/
836 static int
837 dtracemdb_stat(void *varg, processorid_t cpu)
838 {
839 	GElf_Sym sym;
840 	cpu_t c;
841 	uintptr_t caddr, addr;
842 
843 	if (mdb_lookup_by_name("cpu", &sym) == -1) {
844 		mdb_warn("failed to find symbol for 'cpu'");
845 		return (-1);
846 	}
847 
848 	if (cpu * sizeof (uintptr_t) > sym.st_size)
849 		return (-1);
850 
851 	addr = (uintptr_t)sym.st_value + cpu * sizeof (uintptr_t);
852 
853 	if (mdb_vread(&caddr, sizeof (caddr), addr) == -1) {
854 		mdb_warn("failed to read cpu[%d]", cpu);
855 		return (-1);
856 	}
857 
858 	if (caddr == 0)
859 		return (-1);
860 
861 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
862 		mdb_warn("failed to read cpu at %p", caddr);
863 		return (-1);
864 	}
865 
866 	if (c.cpu_flags & CPU_POWEROFF) {
867 		return (P_POWEROFF);
868 	} else if (c.cpu_flags & CPU_SPARE) {
869 		return (P_SPARE);
870 	} else if (c.cpu_flags & CPU_FAULTED) {
871 		return (P_FAULTED);
872 	} else if (c.cpu_flags & CPU_DISABLED) {
873 		return (P_DISABLED);
874 	} else if ((c.cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY) {
875 		return (P_OFFLINE);
876 	} else if (c.cpu_flags & CPU_ENABLE) {
877 		return (P_ONLINE);
878 	} else {
879 		return (P_NOINTR);
880 	}
881 }
882 
883 /*ARGSUSED*/
884 static long
885 dtracemdb_sysconf(void *varg, int name)
886 {
887 	int max_ncpus;
888 	processorid_t max_cpuid;
889 
890 	switch (name) {
891 	case _SC_CPUID_MAX:
892 		if (mdb_readvar(&max_cpuid, "max_cpuid") == -1) {
893 			mdb_warn("failed to read 'max_cpuid'");
894 			return (-1);
895 		}
896 
897 		return (max_cpuid);
898 
899 	case _SC_NPROCESSORS_MAX:
900 		if (mdb_readvar(&max_ncpus, "max_ncpus") == -1) {
901 			mdb_warn("failed to read 'max_ncpus'");
902 			return (-1);
903 		}
904 
905 		return (max_ncpus);
906 
907 	default:
908 		mdb_warn("unexpected sysconf code %d\n", name);
909 		return (-1);
910 	}
911 }
912 
913 const dtrace_vector_t dtrace_mdbops = {
914 	dtracemdb_ioctl,
915 	dtracemdb_lookup_by_addr,
916 	dtracemdb_stat,
917 	dtracemdb_sysconf
918 };
919 
920 typedef struct dtrace_dcmddata {
921 	dtrace_hdl_t *dtdd_dtp;
922 	int dtdd_cpu;
923 	int dtdd_quiet;
924 	int dtdd_flowindent;
925 	int dtdd_heading;
926 	FILE *dtdd_output;
927 } dtrace_dcmddata_t;
928 
929 /*
930  * Helper to grab all the content from a file, spit it into a string, and erase
931  * and reset the file.
932  */
933 static void
934 print_and_truncate_file(FILE *fp)
935 {
936 	long len;
937 	char *out;
938 
939 	/* flush, find length of file, seek to beginning, initialize buffer */
940 	if (fflush(fp) || (len = ftell(fp)) < 0 ||
941 	    fseek(fp, 0, SEEK_SET) < 0) {
942 		mdb_warn("couldn't prepare DTrace output file: %d\n", errno);
943 		return;
944 	}
945 
946 	out = mdb_alloc(len + 1, UM_SLEEP);
947 	out[len] = '\0';
948 
949 	/* read file into buffer, truncate file, and seek to beginning */
950 	if ((fread(out, len + 1, sizeof (char), fp) == 0 && ferror(fp)) ||
951 	    ftruncate(fileno(fp), 0) < 0 || fseek(fp, 0, SEEK_SET) < 0) {
952 		mdb_warn("couldn't read DTrace output file: %d\n", errno);
953 		mdb_free(out, len + 1);
954 		return;
955 	}
956 
957 	mdb_printf("%s", out);
958 	mdb_free(out, len + 1);
959 }
960 
961 /*ARGSUSED*/
962 static int
963 dtrace_dcmdrec(const dtrace_probedata_t *data,
964     const dtrace_recdesc_t *rec, void *arg)
965 {
966 	dtrace_dcmddata_t *dd = arg;
967 
968 	print_and_truncate_file(dd->dtdd_output);
969 
970 	if (rec == NULL) {
971 		/*
972 		 * We have processed the final record; output the newline if
973 		 * we're not in quiet mode.
974 		 */
975 		if (!dd->dtdd_quiet)
976 			mdb_printf("\n");
977 
978 		return (DTRACE_CONSUME_NEXT);
979 	}
980 
981 	return (DTRACE_CONSUME_THIS);
982 }
983 
984 /*ARGSUSED*/
985 static int
986 dtrace_dcmdprobe(const dtrace_probedata_t *data, void *arg)
987 {
988 	dtrace_probedesc_t *pd = data->dtpda_pdesc;
989 	processorid_t cpu = data->dtpda_cpu;
990 	dtrace_dcmddata_t *dd = arg;
991 	char name[DTRACE_FUNCNAMELEN + DTRACE_NAMELEN + 2];
992 
993 	if (dd->dtdd_cpu != -1UL && dd->dtdd_cpu != cpu)
994 		return (DTRACE_CONSUME_NEXT);
995 
996 	if (dd->dtdd_heading == 0) {
997 		if (!dd->dtdd_flowindent) {
998 			if (!dd->dtdd_quiet) {
999 				mdb_printf("%3s %6s %32s\n",
1000 				    "CPU", "ID", "FUNCTION:NAME");
1001 			}
1002 		} else {
1003 			mdb_printf("%3s %-41s\n", "CPU", "FUNCTION");
1004 		}
1005 		dd->dtdd_heading = 1;
1006 	}
1007 
1008 	if (!dd->dtdd_flowindent) {
1009 		if (!dd->dtdd_quiet) {
1010 			(void) mdb_snprintf(name, sizeof (name), "%s:%s",
1011 			    pd->dtpd_func, pd->dtpd_name);
1012 
1013 			mdb_printf("%3d %6d %32s ", cpu, pd->dtpd_id, name);
1014 		}
1015 	} else {
1016 		int indent = data->dtpda_indent;
1017 
1018 		if (data->dtpda_flow == DTRACEFLOW_NONE) {
1019 			(void) mdb_snprintf(name, sizeof (name), "%*s%s%s:%s",
1020 			    indent, "", data->dtpda_prefix, pd->dtpd_func,
1021 			    pd->dtpd_name);
1022 		} else {
1023 			(void) mdb_snprintf(name, sizeof (name), "%*s%s%s",
1024 			    indent, "", data->dtpda_prefix, pd->dtpd_func);
1025 		}
1026 
1027 		mdb_printf("%3d %-41s ", cpu, name);
1028 	}
1029 
1030 	return (DTRACE_CONSUME_THIS);
1031 }
1032 
1033 /*ARGSUSED*/
1034 static int
1035 dtrace_dcmderr(const dtrace_errdata_t *data, void *arg)
1036 {
1037 	mdb_warn(data->dteda_msg);
1038 	return (DTRACE_HANDLE_OK);
1039 }
1040 
1041 /*ARGSUSED*/
1042 static int
1043 dtrace_dcmddrop(const dtrace_dropdata_t *data, void *arg)
1044 {
1045 	mdb_warn(data->dtdda_msg);
1046 	return (DTRACE_HANDLE_OK);
1047 }
1048 
1049 /*ARGSUSED*/
1050 static int
1051 dtrace_dcmdbuffered(const dtrace_bufdata_t *bufdata, void *arg)
1052 {
1053 	mdb_printf("%s", bufdata->dtbda_buffered);
1054 	return (DTRACE_HANDLE_OK);
1055 }
1056 
1057 /*ARGSUSED*/
1058 int
1059 dtrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1060 {
1061 	dtrace_state_t state;
1062 	dtrace_hdl_t *dtp;
1063 	int ncpu, err;
1064 	uintptr_t c = -1UL;
1065 	dtrace_dcmddata_t dd;
1066 	dtrace_optval_t val;
1067 	dtracemdb_data_t md;
1068 	int rval = DCMD_ERR;
1069 	dtrace_anon_t anon;
1070 
1071 	if (!(flags & DCMD_ADDRSPEC))
1072 		return (DCMD_USAGE);
1073 
1074 	if (mdb_getopts(argc, argv, 'c', MDB_OPT_UINTPTR, &c, NULL) != argc)
1075 		return (DCMD_USAGE);
1076 
1077 	if (mdb_readvar(&ncpu, "_ncpu") == -1) {
1078 		mdb_warn("failed to read '_ncpu'");
1079 		return (DCMD_ERR);
1080 	}
1081 
1082 	if (mdb_vread(&state, sizeof (state), addr) == -1) {
1083 		mdb_warn("couldn't read dtrace_state_t at %p", addr);
1084 		return (DCMD_ERR);
1085 	}
1086 
1087 	if (state.dts_anon != NULL) {
1088 		addr = (uintptr_t)state.dts_anon;
1089 
1090 		if (mdb_vread(&state, sizeof (state), addr) == -1) {
1091 			mdb_warn("couldn't read anonymous state at %p", addr);
1092 			return (DCMD_ERR);
1093 		}
1094 	}
1095 
1096 	bzero(&md, sizeof (md));
1097 	md.dtmd_state = &state;
1098 
1099 	if ((dtp = dtrace_vopen(DTRACE_VERSION, DTRACE_O_NOSYS, &err,
1100 	    &dtrace_mdbops, &md)) == NULL) {
1101 		mdb_warn("failed to initialize dtrace: %s\n",
1102 		    dtrace_errmsg(NULL, err));
1103 		return (DCMD_ERR);
1104 	}
1105 
1106 	/*
1107 	 * If this is the anonymous enabling, we need to set a bit indicating
1108 	 * that DTRACEOPT_GRABANON should be set.
1109 	 */
1110 	if (mdb_readvar(&anon, "dtrace_anon") == -1) {
1111 		mdb_warn("failed to read 'dtrace_anon'");
1112 		return (DCMD_ERR);
1113 	}
1114 
1115 	md.dtmd_isanon = ((uintptr_t)anon.dta_state == addr);
1116 
1117 	if (dtrace_go(dtp) != 0) {
1118 		mdb_warn("failed to initialize dtrace: %s\n",
1119 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1120 		goto err;
1121 	}
1122 
1123 	bzero(&dd, sizeof (dd));
1124 	dd.dtdd_dtp = dtp;
1125 	dd.dtdd_cpu = c;
1126 
1127 	if (dtrace_getopt(dtp, "flowindent", &val) == -1) {
1128 		mdb_warn("couldn't get 'flowindent' option: %s\n",
1129 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1130 		goto err;
1131 	}
1132 
1133 	dd.dtdd_flowindent = (val != DTRACEOPT_UNSET);
1134 
1135 	if (dtrace_getopt(dtp, "quiet", &val) == -1) {
1136 		mdb_warn("couldn't get 'quiet' option: %s\n",
1137 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1138 		goto err;
1139 	}
1140 
1141 	dd.dtdd_quiet = (val != DTRACEOPT_UNSET);
1142 
1143 	if (dtrace_handle_err(dtp, dtrace_dcmderr, NULL) == -1) {
1144 		mdb_warn("couldn't add err handler: %s\n",
1145 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1146 		goto err;
1147 	}
1148 
1149 	if (dtrace_handle_drop(dtp, dtrace_dcmddrop, NULL) == -1) {
1150 		mdb_warn("couldn't add drop handler: %s\n",
1151 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1152 		goto err;
1153 	}
1154 
1155 	if (dtrace_handle_buffered(dtp, dtrace_dcmdbuffered, NULL) == -1) {
1156 		mdb_warn("couldn't add buffered handler: %s\n",
1157 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1158 		goto err;
1159 	}
1160 
1161 	if (dtrace_status(dtp) == -1) {
1162 		mdb_warn("couldn't get status: %s\n",
1163 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1164 		goto err;
1165 	}
1166 
1167 	if (dtrace_aggregate_snap(dtp) == -1) {
1168 		mdb_warn("couldn't snapshot aggregation: %s\n",
1169 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1170 		goto err;
1171 	}
1172 
1173 	if ((dd.dtdd_output = tmpfile()) == NULL) {
1174 		mdb_warn("couldn't open DTrace output file: %d\n", errno);
1175 		goto err;
1176 	}
1177 
1178 	if (dtrace_consume(dtp, dd.dtdd_output,
1179 	    dtrace_dcmdprobe, dtrace_dcmdrec, &dd) == -1) {
1180 		mdb_warn("couldn't consume DTrace buffers: %s\n",
1181 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1182 	}
1183 
1184 	if (dtrace_aggregate_print(dtp, NULL, NULL) == -1) {
1185 		mdb_warn("couldn't print aggregation: %s\n",
1186 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1187 		goto err;
1188 	}
1189 
1190 	rval = DCMD_OK;
1191 err:
1192 	dtrace_close(dtp);
1193 	fclose(dd.dtdd_output);
1194 	return (rval);
1195 }
1196 
1197 static int
1198 dtrace_errhash_cmp(const void *l, const void *r)
1199 {
1200 	uintptr_t lhs = *((uintptr_t *)l);
1201 	uintptr_t rhs = *((uintptr_t *)r);
1202 	dtrace_errhash_t lerr, rerr;
1203 	char lmsg[256], rmsg[256];
1204 
1205 	(void) mdb_vread(&lerr, sizeof (lerr), lhs);
1206 	(void) mdb_vread(&rerr, sizeof (rerr), rhs);
1207 
1208 	if (lerr.dter_msg == NULL)
1209 		return (-1);
1210 
1211 	if (rerr.dter_msg == NULL)
1212 		return (1);
1213 
1214 	(void) mdb_readstr(lmsg, sizeof (lmsg), (uintptr_t)lerr.dter_msg);
1215 	(void) mdb_readstr(rmsg, sizeof (rmsg), (uintptr_t)rerr.dter_msg);
1216 
1217 	return (strcmp(lmsg, rmsg));
1218 }
1219 
1220 int
1221 dtrace_errhash_init(mdb_walk_state_t *wsp)
1222 {
1223 	GElf_Sym sym;
1224 	uintptr_t *hash, addr;
1225 	int i;
1226 
1227 	if (wsp->walk_addr != 0) {
1228 		mdb_warn("dtrace_errhash walk only supports global walks\n");
1229 		return (WALK_ERR);
1230 	}
1231 
1232 	if (mdb_lookup_by_name("dtrace_errhash", &sym) == -1) {
1233 		mdb_warn("couldn't find 'dtrace_errhash' (non-DEBUG kernel?)");
1234 		return (WALK_ERR);
1235 	}
1236 
1237 	addr = (uintptr_t)sym.st_value;
1238 	hash = mdb_alloc(DTRACE_ERRHASHSZ * sizeof (uintptr_t),
1239 	    UM_SLEEP | UM_GC);
1240 
1241 	for (i = 0; i < DTRACE_ERRHASHSZ; i++)
1242 		hash[i] = addr + i * sizeof (dtrace_errhash_t);
1243 
1244 	qsort(hash, DTRACE_ERRHASHSZ, sizeof (uintptr_t), dtrace_errhash_cmp);
1245 
1246 	wsp->walk_addr = 0;
1247 	wsp->walk_data = hash;
1248 
1249 	return (WALK_NEXT);
1250 }
1251 
1252 int
1253 dtrace_errhash_step(mdb_walk_state_t *wsp)
1254 {
1255 	int ndx = (int)wsp->walk_addr;
1256 	uintptr_t *hash = wsp->walk_data;
1257 	dtrace_errhash_t err;
1258 	uintptr_t addr;
1259 
1260 	if (ndx >= DTRACE_ERRHASHSZ)
1261 		return (WALK_DONE);
1262 
1263 	wsp->walk_addr = ndx + 1;
1264 	addr = hash[ndx];
1265 
1266 	if (mdb_vread(&err, sizeof (err), addr) == -1) {
1267 		mdb_warn("failed to read dtrace_errhash_t at %p", addr);
1268 		return (WALK_DONE);
1269 	}
1270 
1271 	if (err.dter_msg == NULL)
1272 		return (WALK_NEXT);
1273 
1274 	return (wsp->walk_callback(addr, &err, wsp->walk_cbdata));
1275 }
1276 
1277 /*ARGSUSED*/
1278 int
1279 dtrace_errhash(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1280 {
1281 	dtrace_errhash_t err;
1282 	char msg[256];
1283 
1284 	if (!(flags & DCMD_ADDRSPEC)) {
1285 		if (mdb_walk_dcmd("dtrace_errhash", "dtrace_errhash",
1286 		    argc, argv) == -1) {
1287 			mdb_warn("can't walk 'dtrace_errhash'");
1288 			return (DCMD_ERR);
1289 		}
1290 
1291 		return (DCMD_OK);
1292 	}
1293 
1294 	if (DCMD_HDRSPEC(flags))
1295 		mdb_printf("%8s %s\n", "COUNT", "ERROR");
1296 
1297 	if (mdb_vread(&err, sizeof (err), addr) == -1) {
1298 		mdb_warn("failed to read dtrace_errhash_t at %p", addr);
1299 		return (DCMD_ERR);
1300 	}
1301 
1302 	addr = (uintptr_t)err.dter_msg;
1303 
1304 	if (mdb_readstr(msg, sizeof (msg), addr) == -1) {
1305 		mdb_warn("failed to read error msg at %p", addr);
1306 		return (DCMD_ERR);
1307 	}
1308 
1309 	mdb_printf("%8d %s", err.dter_count, msg);
1310 
1311 	/*
1312 	 * Some error messages include a newline -- only print the newline
1313 	 * if the message doesn't have one.
1314 	 */
1315 	if (msg[strlen(msg) - 1] != '\n')
1316 		mdb_printf("\n");
1317 
1318 	return (DCMD_OK);
1319 }
1320 
1321 int
1322 dtrace_helptrace_init(mdb_walk_state_t *wsp)
1323 {
1324 	uint32_t next;
1325 	uintptr_t buffer;
1326 
1327 	if (wsp->walk_addr != 0) {
1328 		mdb_warn("dtrace_helptrace only supports global walks\n");
1329 		return (WALK_ERR);
1330 	}
1331 
1332 	if (mdb_readvar(&buffer, "dtrace_helptrace_buffer") == -1) {
1333 		mdb_warn("couldn't read 'dtrace_helptrace_buffer'");
1334 		return (WALK_ERR);
1335 	}
1336 
1337 	if (buffer == 0) {
1338 		mdb_warn("helper tracing is not enabled\n");
1339 		return (WALK_ERR);
1340 	}
1341 
1342 	if (mdb_readvar(&next, "dtrace_helptrace_next") == -1) {
1343 		mdb_warn("couldn't read 'dtrace_helptrace_next'");
1344 		return (WALK_ERR);
1345 	}
1346 
1347 	wsp->walk_addr = next;
1348 
1349 	return (WALK_NEXT);
1350 }
1351 
1352 int
1353 dtrace_helptrace_step(mdb_walk_state_t *wsp)
1354 {
1355 	uint32_t next, size, nlocals, bufsize;
1356 	uintptr_t buffer, addr;
1357 	dtrace_helptrace_t *ht;
1358 	int rval;
1359 
1360 	if (mdb_readvar(&next, "dtrace_helptrace_next") == -1) {
1361 		mdb_warn("couldn't read 'dtrace_helptrace_next'");
1362 		return (WALK_ERR);
1363 	}
1364 
1365 	if (mdb_readvar(&bufsize, "dtrace_helptrace_bufsize") == -1) {
1366 		mdb_warn("couldn't read 'dtrace_helptrace_bufsize'");
1367 		return (WALK_ERR);
1368 	}
1369 
1370 	if (mdb_readvar(&buffer, "dtrace_helptrace_buffer") == -1) {
1371 		mdb_warn("couldn't read 'dtrace_helptrace_buffer'");
1372 		return (WALK_ERR);
1373 	}
1374 
1375 	if (mdb_readvar(&nlocals, "dtrace_helptrace_nlocals") == -1) {
1376 		mdb_warn("couldn't read 'dtrace_helptrace_nlocals'");
1377 		return (WALK_ERR);
1378 	}
1379 
1380 	size = sizeof (dtrace_helptrace_t) +
1381 	    nlocals * sizeof (uint64_t) - sizeof (uint64_t);
1382 
1383 	if (wsp->walk_addr + size > bufsize) {
1384 		if (next == 0)
1385 			return (WALK_DONE);
1386 
1387 		wsp->walk_addr = 0;
1388 	}
1389 
1390 	addr = buffer + wsp->walk_addr;
1391 	ht = alloca(size);
1392 
1393 	if (mdb_vread(ht, size, addr) == -1) {
1394 		mdb_warn("couldn't read entry at %p", addr);
1395 		return (WALK_ERR);
1396 	}
1397 
1398 	if (ht->dtht_helper != NULL) {
1399 		rval = wsp->walk_callback(addr, ht, wsp->walk_cbdata);
1400 
1401 		if (rval != WALK_NEXT)
1402 			return (rval);
1403 	}
1404 
1405 	if (wsp->walk_addr < next && wsp->walk_addr + size >= next)
1406 		return (WALK_DONE);
1407 
1408 	wsp->walk_addr += size;
1409 	return (WALK_NEXT);
1410 }
1411 
1412 int
1413 dtrace_helptrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1414 {
1415 	dtrace_helptrace_t help;
1416 	dtrace_helper_action_t helper;
1417 	char where[30];
1418 	uint_t opt_v = FALSE;
1419 	uintptr_t haddr;
1420 
1421 	if (!(flags & DCMD_ADDRSPEC)) {
1422 		if (mdb_walk_dcmd("dtrace_helptrace", "dtrace_helptrace",
1423 		    argc, argv) == -1) {
1424 			mdb_warn("can't walk 'dtrace_helptrace'");
1425 			return (DCMD_ERR);
1426 		}
1427 
1428 		return (DCMD_OK);
1429 	}
1430 
1431 	if (mdb_getopts(argc, argv, 'v',
1432 	    MDB_OPT_SETBITS, TRUE, &opt_v, NULL) != argc)
1433 		return (DCMD_USAGE);
1434 
1435 	if (DCMD_HDRSPEC(flags)) {
1436 		mdb_printf(" %?s %?s %12s %s\n",
1437 		    "ADDR", "HELPER", "WHERE", "DIFO");
1438 	}
1439 
1440 	if (mdb_vread(&help, sizeof (help), addr) == -1) {
1441 		mdb_warn("failed to read dtrace_helptrace_t at %p", addr);
1442 		return (DCMD_ERR);
1443 	}
1444 
1445 	switch (help.dtht_where) {
1446 	case 0:
1447 		(void) mdb_snprintf(where, sizeof (where), "predicate");
1448 		break;
1449 
1450 	case DTRACE_HELPTRACE_NEXT:
1451 		(void) mdb_snprintf(where, sizeof (where), "next");
1452 		break;
1453 
1454 	case DTRACE_HELPTRACE_DONE:
1455 		(void) mdb_snprintf(where, sizeof (where), "done");
1456 		break;
1457 
1458 	case DTRACE_HELPTRACE_ERR:
1459 		(void) mdb_snprintf(where, sizeof (where), "err");
1460 		break;
1461 
1462 	default:
1463 		(void) mdb_snprintf(where, sizeof (where),
1464 		    "action #%d", help.dtht_where);
1465 		break;
1466 	}
1467 
1468 	mdb_printf(" %?p %?p %12s ", addr, help.dtht_helper, where);
1469 
1470 	haddr = (uintptr_t)help.dtht_helper;
1471 
1472 	if (mdb_vread(&helper, sizeof (helper), haddr) == -1) {
1473 		/*
1474 		 * We're not going to warn in this case -- we're just not going
1475 		 * to print anything exciting.
1476 		 */
1477 		mdb_printf("???\n");
1478 	} else {
1479 		switch (help.dtht_where) {
1480 		case 0:
1481 			mdb_printf("%p\n", helper.dtha_predicate);
1482 			break;
1483 
1484 		case DTRACE_HELPTRACE_NEXT:
1485 		case DTRACE_HELPTRACE_DONE:
1486 		case DTRACE_HELPTRACE_ERR:
1487 			mdb_printf("-\n");
1488 			break;
1489 
1490 		default:
1491 			haddr = (uintptr_t)helper.dtha_actions +
1492 			    (help.dtht_where - 1) * sizeof (uintptr_t);
1493 
1494 			if (mdb_vread(&haddr, sizeof (haddr), haddr) == -1) {
1495 				mdb_printf("???\n");
1496 			} else {
1497 				mdb_printf("%p\n", haddr);
1498 			}
1499 		}
1500 	}
1501 
1502 	if (opt_v) {
1503 		int i;
1504 
1505 		if (help.dtht_where == DTRACE_HELPTRACE_ERR) {
1506 			int f = help.dtht_fault;
1507 
1508 			mdb_printf("%?s| %?s %10s |\n", "", "", "");
1509 			mdb_printf("%?s| %?s %10s +->  fault: %s\n", "", "", "",
1510 			    f == DTRACEFLT_BADADDR ? "BADADDR" :
1511 			    f == DTRACEFLT_BADALIGN ? "BADALIGN" :
1512 			    f == DTRACEFLT_ILLOP ? "ILLOP" :
1513 			    f == DTRACEFLT_DIVZERO ? "DIVZERO" :
1514 			    f == DTRACEFLT_NOSCRATCH ? "NOSCRATCH" :
1515 			    f == DTRACEFLT_KPRIV ? "KPRIV" :
1516 			    f == DTRACEFLT_UPRIV ? "UPRIV" :
1517 			    f == DTRACEFLT_TUPOFLOW ? "TUPOFLOW" :
1518 			    f == DTRACEFLT_BADSTACK ? "BADSTACK" :
1519 			    "DTRACEFLT_UNKNOWN");
1520 			mdb_printf("%?s| %?s %12s     addr: 0x%x\n", "", "", "",
1521 			    help.dtht_illval);
1522 			mdb_printf("%?s| %?s %12s   offset: %d\n", "", "", "",
1523 			    help.dtht_fltoffs);
1524 		}
1525 
1526 		mdb_printf("%?s|\n%?s+--> %?s %4s %s\n", "", "",
1527 		    "ADDR", "NDX", "VALUE");
1528 		addr += sizeof (help) - sizeof (uint64_t);
1529 
1530 		for (i = 0; i < help.dtht_nlocals; i++) {
1531 			uint64_t val;
1532 
1533 			if (mdb_vread(&val, sizeof (val), addr) == -1) {
1534 				mdb_warn("couldn't read local at %p", addr);
1535 				continue;
1536 			}
1537 
1538 			mdb_printf("%?s     %?p %4d %p\n", "", addr, i, val);
1539 			addr += sizeof (uint64_t);
1540 		}
1541 
1542 		mdb_printf("\n");
1543 	}
1544 
1545 	return (DCMD_OK);
1546 }
1547 
1548 /*ARGSUSED*/
1549 static int
1550 dtrace_state_walk(uintptr_t addr, const vmem_seg_t *seg, minor_t *highest)
1551 {
1552 	if (seg->vs_end > *highest)
1553 		*highest = seg->vs_end;
1554 
1555 	return (WALK_NEXT);
1556 }
1557 
1558 typedef struct dtrace_state_walk {
1559 	uintptr_t dtsw_softstate;
1560 	minor_t dtsw_max;
1561 	minor_t dtsw_current;
1562 } dtrace_state_walk_t;
1563 
1564 int
1565 dtrace_state_init(mdb_walk_state_t *wsp)
1566 {
1567 	uintptr_t dtrace_minor;
1568 	minor_t max = 0;
1569 	dtrace_state_walk_t *dw;
1570 
1571 	if (wsp->walk_addr != 0) {
1572 		mdb_warn("dtrace_state only supports global walks\n");
1573 		return (WALK_ERR);
1574 	}
1575 
1576 	/*
1577 	 * Find the dtrace_minor vmem arena and walk it to get the maximum
1578 	 * minor number.
1579 	 */
1580 	if (mdb_readvar(&dtrace_minor, "dtrace_minor") == -1) {
1581 		mdb_warn("failed to read 'dtrace_minor'");
1582 		return (WALK_ERR);
1583 	}
1584 
1585 	if (mdb_pwalk("vmem_alloc", (mdb_walk_cb_t)dtrace_state_walk,
1586 	    &max, dtrace_minor) == -1) {
1587 		mdb_warn("couldn't walk 'vmem_alloc'");
1588 		return (WALK_ERR);
1589 	}
1590 
1591 	dw = mdb_zalloc(sizeof (dtrace_state_walk_t), UM_SLEEP | UM_GC);
1592 	dw->dtsw_current = 0;
1593 	dw->dtsw_max = max;
1594 
1595 	if (mdb_readvar(&dw->dtsw_softstate, "dtrace_softstate") == -1) {
1596 		mdb_warn("failed to read 'dtrace_softstate'");
1597 		return (DCMD_ERR);
1598 	}
1599 
1600 	wsp->walk_data = dw;
1601 
1602 	return (WALK_NEXT);
1603 }
1604 
1605 int
1606 dtrace_state_step(mdb_walk_state_t *wsp)
1607 {
1608 	dtrace_state_walk_t *dw = wsp->walk_data;
1609 	uintptr_t statep;
1610 	dtrace_state_t state;
1611 	int rval;
1612 
1613 	while (mdb_get_soft_state_byaddr(dw->dtsw_softstate, dw->dtsw_current,
1614 	    &statep, NULL, 0) == -1) {
1615 		if (dw->dtsw_current >= dw->dtsw_max)
1616 			return (WALK_DONE);
1617 
1618 		dw->dtsw_current++;
1619 	}
1620 
1621 	if (mdb_vread(&state, sizeof (state), statep) == -1) {
1622 		mdb_warn("couldn't read dtrace_state_t at %p", statep);
1623 		return (WALK_NEXT);
1624 	}
1625 
1626 	rval = wsp->walk_callback(statep, &state, wsp->walk_cbdata);
1627 	dw->dtsw_current++;
1628 
1629 	return (rval);
1630 }
1631 
1632 typedef struct dtrace_state_data {
1633 	int dtsd_major;
1634 	uintptr_t dtsd_proc;
1635 	uintptr_t dtsd_softstate;
1636 	uintptr_t dtsd_state;
1637 } dtrace_state_data_t;
1638 
1639 static int
1640 dtrace_state_file(uintptr_t addr, struct file *f, dtrace_state_data_t *data)
1641 {
1642 	vnode_t vnode;
1643 	proc_t proc;
1644 	minor_t minor;
1645 	uintptr_t statep;
1646 
1647 	if (mdb_vread(&vnode, sizeof (vnode), (uintptr_t)f->f_vnode) == -1) {
1648 		mdb_warn("couldn't read vnode at %p", (uintptr_t)f->f_vnode);
1649 		return (WALK_NEXT);
1650 	}
1651 
1652 	if (getmajor(vnode.v_rdev) != data->dtsd_major)
1653 		return (WALK_NEXT);
1654 
1655 	minor = getminor(vnode.v_rdev);
1656 
1657 	if (mdb_vread(&proc, sizeof (proc), data->dtsd_proc) == -1) {
1658 		mdb_warn("failed to read proc at %p", data->dtsd_proc);
1659 		return (WALK_NEXT);
1660 	}
1661 
1662 	if (mdb_get_soft_state_byaddr(data->dtsd_softstate, minor,
1663 	    &statep, NULL, 0) == -1) {
1664 		mdb_warn("failed to read softstate for minor %d", minor);
1665 		return (WALK_NEXT);
1666 	}
1667 
1668 	if (statep != data->dtsd_state)
1669 		return (WALK_NEXT);
1670 
1671 	mdb_printf("%?p %5d %?p %-*s %?p\n", statep, minor,
1672 	    data->dtsd_proc, MAXCOMLEN, proc.p_user.u_comm, addr);
1673 
1674 	return (WALK_NEXT);
1675 }
1676 
1677 /*ARGSUSED*/
1678 static int
1679 dtrace_state_proc(uintptr_t addr, void *ignored, dtrace_state_data_t *data)
1680 {
1681 	data->dtsd_proc = addr;
1682 
1683 	if (mdb_pwalk("file",
1684 	    (mdb_walk_cb_t)dtrace_state_file, data, addr) == -1) {
1685 		mdb_warn("couldn't walk 'file' for proc %p", addr);
1686 		return (WALK_ERR);
1687 	}
1688 
1689 	return (WALK_NEXT);
1690 }
1691 
1692 void
1693 dtrace_state_help(void)
1694 {
1695 	mdb_printf("Given a dtrace_state_t structure, displays all "
1696 	    /*CSTYLED*/
1697 	    "consumers, or \"<anonymous>\"\nif the consumer is anonymous.  If "
1698 	    "no state structure is provided, iterates\nover all state "
1699 	    "structures.\n\n"
1700 	    "Addresses in ADDR column may be provided to ::dtrace to obtain\n"
1701 	    "dtrace(8)-like output for in-kernel DTrace data.\n");
1702 }
1703 
1704 int
1705 dtrace_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1706 {
1707 	uintptr_t devi;
1708 	struct dev_info info;
1709 	dtrace_state_data_t data;
1710 	dtrace_anon_t anon;
1711 	dtrace_state_t state;
1712 
1713 	if (!(flags & DCMD_ADDRSPEC)) {
1714 		if (mdb_walk_dcmd("dtrace_state",
1715 		    "dtrace_state", argc, argv) == -1) {
1716 			mdb_warn("can't walk dtrace_state");
1717 			return (DCMD_ERR);
1718 		}
1719 		return (DCMD_OK);
1720 	}
1721 
1722 	if (DCMD_HDRSPEC(flags)) {
1723 		mdb_printf("%?s %5s %?s %-*s %?s\n", "ADDR", "MINOR", "PROC",
1724 		    MAXCOMLEN, "NAME", "FILE");
1725 	}
1726 
1727 	/*
1728 	 * First determine if this is anonymous state.
1729 	 */
1730 	if (mdb_readvar(&anon, "dtrace_anon") == -1) {
1731 		mdb_warn("failed to read 'dtrace_anon'");
1732 		return (DCMD_ERR);
1733 	}
1734 
1735 	if ((uintptr_t)anon.dta_state == addr) {
1736 		if (mdb_vread(&state, sizeof (state), addr) == -1) {
1737 			mdb_warn("failed to read anon at %p", addr);
1738 			return (DCMD_ERR);
1739 		}
1740 
1741 		mdb_printf("%?p %5d %?s %-*s %?s\n", addr,
1742 		    getminor(state.dts_dev), "-", MAXCOMLEN,
1743 		    "<anonymous>", "-");
1744 
1745 		return (DCMD_OK);
1746 	}
1747 
1748 	if (mdb_readvar(&devi, "dtrace_devi") == -1) {
1749 		mdb_warn("failed to read 'dtrace_devi'");
1750 		return (DCMD_ERR);
1751 	}
1752 
1753 	if (mdb_vread(&info, sizeof (struct dev_info), devi) == -1) {
1754 		mdb_warn("failed to read 'dev_info'");
1755 		return (DCMD_ERR);
1756 	}
1757 
1758 	data.dtsd_major = info.devi_major;
1759 
1760 	if (mdb_readvar(&data.dtsd_softstate, "dtrace_softstate") == -1) {
1761 		mdb_warn("failed to read 'dtrace_softstate'");
1762 		return (DCMD_ERR);
1763 	}
1764 
1765 	data.dtsd_state = addr;
1766 
1767 	/*
1768 	 * Walk through all processes and all open files looking for this
1769 	 * state.  It must be open somewhere...
1770 	 */
1771 	if (mdb_walk("proc", (mdb_walk_cb_t)dtrace_state_proc, &data) == -1) {
1772 		mdb_warn("couldn't walk 'proc'");
1773 		return (DCMD_ERR);
1774 	}
1775 
1776 	return (DCMD_OK);
1777 }
1778 
1779 typedef struct dtrace_aggkey_data {
1780 	uintptr_t *dtakd_hash;
1781 	uintptr_t dtakd_hashsize;
1782 	uintptr_t dtakd_next;
1783 	uintptr_t dtakd_ndx;
1784 } dtrace_aggkey_data_t;
1785 
1786 int
1787 dtrace_aggkey_init(mdb_walk_state_t *wsp)
1788 {
1789 	dtrace_buffer_t buf;
1790 	uintptr_t addr;
1791 	dtrace_aggbuffer_t agb;
1792 	dtrace_aggkey_data_t *data;
1793 	size_t hsize;
1794 
1795 	if ((addr = wsp->walk_addr) == 0) {
1796 		mdb_warn("dtrace_aggkey walk needs aggregation buffer\n");
1797 		return (WALK_ERR);
1798 	}
1799 
1800 	if (mdb_vread(&buf, sizeof (buf), addr) == -1) {
1801 		mdb_warn("failed to read aggregation buffer at %p", addr);
1802 		return (WALK_ERR);
1803 	}
1804 
1805 	addr = (uintptr_t)buf.dtb_tomax +
1806 	    buf.dtb_size - sizeof (dtrace_aggbuffer_t);
1807 
1808 	if (mdb_vread(&agb, sizeof (agb), addr) == -1) {
1809 		mdb_warn("failed to read dtrace_aggbuffer_t at %p", addr);
1810 		return (WALK_ERR);
1811 	}
1812 
1813 	data = mdb_zalloc(sizeof (dtrace_aggkey_data_t), UM_SLEEP);
1814 
1815 	data->dtakd_hashsize = agb.dtagb_hashsize;
1816 	hsize = agb.dtagb_hashsize * sizeof (dtrace_aggkey_t *);
1817 	data->dtakd_hash = mdb_alloc(hsize, UM_SLEEP);
1818 
1819 	if (mdb_vread(data->dtakd_hash, hsize,
1820 	    (uintptr_t)agb.dtagb_hash) == -1) {
1821 		mdb_warn("failed to read hash at %p",
1822 		    (uintptr_t)agb.dtagb_hash);
1823 		mdb_free(data->dtakd_hash, hsize);
1824 		mdb_free(data, sizeof (dtrace_aggkey_data_t));
1825 		return (WALK_ERR);
1826 	}
1827 
1828 	wsp->walk_data = data;
1829 	return (WALK_NEXT);
1830 }
1831 
1832 int
1833 dtrace_aggkey_step(mdb_walk_state_t *wsp)
1834 {
1835 	dtrace_aggkey_data_t *data = wsp->walk_data;
1836 	dtrace_aggkey_t key;
1837 	uintptr_t addr;
1838 
1839 	while ((addr = data->dtakd_next) == 0) {
1840 		if (data->dtakd_ndx == data->dtakd_hashsize)
1841 			return (WALK_DONE);
1842 
1843 		data->dtakd_next = data->dtakd_hash[data->dtakd_ndx++];
1844 	}
1845 
1846 	if (mdb_vread(&key, sizeof (key), addr) == -1) {
1847 		mdb_warn("failed to read dtrace_aggkey_t at %p", addr);
1848 		return (WALK_ERR);
1849 	}
1850 
1851 	data->dtakd_next = (uintptr_t)key.dtak_next;
1852 
1853 	return (wsp->walk_callback(addr, &key, wsp->walk_cbdata));
1854 }
1855 
1856 void
1857 dtrace_aggkey_fini(mdb_walk_state_t *wsp)
1858 {
1859 	dtrace_aggkey_data_t *data = wsp->walk_data;
1860 	size_t hsize;
1861 
1862 	hsize = data->dtakd_hashsize * sizeof (dtrace_aggkey_t *);
1863 	mdb_free(data->dtakd_hash, hsize);
1864 	mdb_free(data, sizeof (dtrace_aggkey_data_t));
1865 }
1866 
1867 typedef struct dtrace_dynvar_data {
1868 	dtrace_dynhash_t *dtdvd_hash;
1869 	uintptr_t dtdvd_hashsize;
1870 	uintptr_t dtdvd_next;
1871 	uintptr_t dtdvd_ndx;
1872 	uintptr_t dtdvd_sink;
1873 } dtrace_dynvar_data_t;
1874 
1875 int
1876 dtrace_dynvar_init(mdb_walk_state_t *wsp)
1877 {
1878 	uintptr_t addr;
1879 	dtrace_dstate_t dstate;
1880 	dtrace_dynvar_data_t *data;
1881 	size_t hsize;
1882 	GElf_Sym sym;
1883 
1884 	if ((addr = wsp->walk_addr) == 0) {
1885 		mdb_warn("dtrace_dynvar walk needs dtrace_dstate_t\n");
1886 		return (WALK_ERR);
1887 	}
1888 
1889 	if (mdb_vread(&dstate, sizeof (dstate), addr) == -1) {
1890 		mdb_warn("failed to read dynamic state at %p", addr);
1891 		return (WALK_ERR);
1892 	}
1893 
1894 	if (mdb_lookup_by_name("dtrace_dynhash_sink", &sym) == -1) {
1895 		mdb_warn("couldn't find 'dtrace_dynhash_sink'");
1896 		return (WALK_ERR);
1897 	}
1898 
1899 	data = mdb_zalloc(sizeof (dtrace_dynvar_data_t), UM_SLEEP);
1900 
1901 	data->dtdvd_hashsize = dstate.dtds_hashsize;
1902 	hsize = dstate.dtds_hashsize * sizeof (dtrace_dynhash_t);
1903 	data->dtdvd_hash = mdb_alloc(hsize, UM_SLEEP);
1904 	data->dtdvd_sink = (uintptr_t)sym.st_value;
1905 
1906 	if (mdb_vread(data->dtdvd_hash, hsize,
1907 	    (uintptr_t)dstate.dtds_hash) == -1) {
1908 		mdb_warn("failed to read hash at %p",
1909 		    (uintptr_t)dstate.dtds_hash);
1910 		mdb_free(data->dtdvd_hash, hsize);
1911 		mdb_free(data, sizeof (dtrace_dynvar_data_t));
1912 		return (WALK_ERR);
1913 	}
1914 
1915 	data->dtdvd_next = (uintptr_t)data->dtdvd_hash[0].dtdh_chain;
1916 
1917 	wsp->walk_data = data;
1918 	return (WALK_NEXT);
1919 }
1920 
1921 int
1922 dtrace_dynvar_step(mdb_walk_state_t *wsp)
1923 {
1924 	dtrace_dynvar_data_t *data = wsp->walk_data;
1925 	dtrace_dynvar_t dynvar, *dvar;
1926 	size_t dvarsize;
1927 	uintptr_t addr;
1928 	int nkeys;
1929 
1930 	while ((addr = data->dtdvd_next) == data->dtdvd_sink) {
1931 		if (data->dtdvd_ndx == data->dtdvd_hashsize)
1932 			return (WALK_DONE);
1933 
1934 		data->dtdvd_next =
1935 		    (uintptr_t)data->dtdvd_hash[data->dtdvd_ndx++].dtdh_chain;
1936 	}
1937 
1938 	if (mdb_vread(&dynvar, sizeof (dynvar), addr) == -1) {
1939 		mdb_warn("failed to read dtrace_dynvar_t at %p", addr);
1940 		return (WALK_ERR);
1941 	}
1942 
1943 	/*
1944 	 * Now we need to allocate the correct size.
1945 	 */
1946 	nkeys = dynvar.dtdv_tuple.dtt_nkeys;
1947 	dvarsize = (uintptr_t)&dynvar.dtdv_tuple.dtt_key[nkeys] -
1948 	    (uintptr_t)&dynvar;
1949 
1950 	dvar = alloca(dvarsize);
1951 
1952 	if (mdb_vread(dvar, dvarsize, addr) == -1) {
1953 		mdb_warn("failed to read dtrace_dynvar_t at %p", addr);
1954 		return (WALK_ERR);
1955 	}
1956 
1957 	data->dtdvd_next = (uintptr_t)dynvar.dtdv_next;
1958 
1959 	return (wsp->walk_callback(addr, dvar, wsp->walk_cbdata));
1960 }
1961 
1962 void
1963 dtrace_dynvar_fini(mdb_walk_state_t *wsp)
1964 {
1965 	dtrace_dynvar_data_t *data = wsp->walk_data;
1966 	size_t hsize;
1967 
1968 	hsize = data->dtdvd_hashsize * sizeof (dtrace_dynvar_t *);
1969 	mdb_free(data->dtdvd_hash, hsize);
1970 	mdb_free(data, sizeof (dtrace_dynvar_data_t));
1971 }
1972 
1973 typedef struct dtrace_hashstat_data {
1974 	size_t *dthsd_counts;
1975 	size_t dthsd_hashsize;
1976 	char *dthsd_data;
1977 	size_t dthsd_size;
1978 	int dthsd_header;
1979 } dtrace_hashstat_data_t;
1980 
1981 typedef void (*dtrace_hashstat_func_t)(dtrace_hashstat_data_t *);
1982 
1983 static void
1984 dtrace_hashstat_additive(dtrace_hashstat_data_t *data)
1985 {
1986 	int i;
1987 	int hval = 0;
1988 
1989 	for (i = 0; i < data->dthsd_size; i++)
1990 		hval += data->dthsd_data[i];
1991 
1992 	data->dthsd_counts[hval % data->dthsd_hashsize]++;
1993 }
1994 
1995 static void
1996 dtrace_hashstat_shifty(dtrace_hashstat_data_t *data)
1997 {
1998 	uint64_t hval = 0;
1999 	int i;
2000 
2001 	if (data->dthsd_size < sizeof (uint64_t)) {
2002 		dtrace_hashstat_additive(data);
2003 		return;
2004 	}
2005 
2006 	for (i = 0; i < data->dthsd_size; i += sizeof (uint64_t)) {
2007 		/* LINTED - alignment */
2008 		uint64_t val = *((uint64_t *)&data->dthsd_data[i]);
2009 
2010 		hval += (val & ((1 << NBBY) - 1)) +
2011 		    ((val >> NBBY) & ((1 << NBBY) - 1)) +
2012 		    ((val >> (NBBY << 1)) & ((1 << NBBY) - 1)) +
2013 		    ((val >> (NBBY << 2)) & ((1 << NBBY) - 1)) +
2014 		    (val & USHRT_MAX) + (val >> (NBBY << 1) & USHRT_MAX);
2015 	}
2016 
2017 	data->dthsd_counts[hval % data->dthsd_hashsize]++;
2018 }
2019 
2020 static void
2021 dtrace_hashstat_knuth(dtrace_hashstat_data_t *data)
2022 {
2023 	int i;
2024 	int hval = data->dthsd_size;
2025 
2026 	for (i = 0; i < data->dthsd_size; i++)
2027 		hval = (hval << 4) ^ (hval >> 28) ^ data->dthsd_data[i];
2028 
2029 	data->dthsd_counts[hval % data->dthsd_hashsize]++;
2030 }
2031 
2032 static void
2033 dtrace_hashstat_oneatatime(dtrace_hashstat_data_t *data)
2034 {
2035 	int i;
2036 	uint32_t hval = 0;
2037 
2038 	for (i = 0; i < data->dthsd_size; i++) {
2039 		hval += data->dthsd_data[i];
2040 		hval += (hval << 10);
2041 		hval ^= (hval >> 6);
2042 	}
2043 
2044 	hval += (hval << 3);
2045 	hval ^= (hval >> 11);
2046 	hval += (hval << 15);
2047 
2048 	data->dthsd_counts[hval % data->dthsd_hashsize]++;
2049 }
2050 
2051 static void
2052 dtrace_hashstat_fnv(dtrace_hashstat_data_t *data)
2053 {
2054 	static const uint32_t prime = 0x01000193;
2055 	uint32_t hval = 0;
2056 	int i;
2057 
2058 	for (i = 0; i < data->dthsd_size; i++) {
2059 		hval *= prime;
2060 		hval ^= data->dthsd_data[i];
2061 	}
2062 
2063 	data->dthsd_counts[hval % data->dthsd_hashsize]++;
2064 }
2065 
2066 static void
2067 dtrace_hashstat_stats(char *name, dtrace_hashstat_data_t *data)
2068 {
2069 	size_t nz = 0, i;
2070 	int longest = 0;
2071 	size_t ttl = 0;
2072 	double sum = 0.0;
2073 	double avg;
2074 	uint_t util, stddev;
2075 
2076 	if (!data->dthsd_header) {
2077 		mdb_printf("%15s %11s %11s %11s %11s %11s\n", "NAME",
2078 		    "HASHSIZE", "%UTIL", "LONGEST", "AVERAGE", "STDDEV");
2079 		data->dthsd_header = 1;
2080 	}
2081 
2082 	for (i = 0; i < data->dthsd_hashsize; i++) {
2083 		if (data->dthsd_counts[i] != 0) {
2084 			nz++;
2085 
2086 			if (data->dthsd_counts[i] > longest)
2087 				longest = data->dthsd_counts[i];
2088 
2089 			ttl += data->dthsd_counts[i];
2090 		}
2091 	}
2092 
2093 	if (nz == 0) {
2094 		mdb_printf("%15s %11d %11s %11s %11s %11s\n", name,
2095 		    data->dthsd_hashsize, "-", "-", "-", "-");
2096 		return;
2097 	}
2098 
2099 	avg = (double)ttl / (double)nz;
2100 
2101 	for (i = 0; i < data->dthsd_hashsize; i++) {
2102 		double delta = (double)data->dthsd_counts[i] - avg;
2103 
2104 		if (data->dthsd_counts[i] == 0)
2105 			continue;
2106 
2107 		sum += delta * delta;
2108 	}
2109 
2110 	util = (nz * 1000) / data->dthsd_hashsize;
2111 	stddev = (uint_t)sqrt(sum / (double)nz) * 10;
2112 
2113 	mdb_printf("%15s %11d %9u.%1u %11d %11d %9u.%1u\n", name,
2114 	    data->dthsd_hashsize, util / 10, util % 10, longest, ttl / nz,
2115 	    stddev / 10, stddev % 10);
2116 }
2117 
2118 static struct dtrace_hashstat {
2119 	char *dths_name;
2120 	dtrace_hashstat_func_t dths_func;
2121 } _dtrace_hashstat[] = {
2122 	{ "<actual>", NULL },
2123 	{ "additive", dtrace_hashstat_additive },
2124 	{ "shifty", dtrace_hashstat_shifty },
2125 	{ "knuth", dtrace_hashstat_knuth },
2126 	{ "one-at-a-time", dtrace_hashstat_oneatatime },
2127 	{ "fnv", dtrace_hashstat_fnv },
2128 	{ NULL, 0 }
2129 };
2130 
2131 typedef struct dtrace_aggstat_data {
2132 	dtrace_hashstat_data_t dtagsd_hash;
2133 	dtrace_hashstat_func_t dtagsd_func;
2134 } dtrace_aggstat_data_t;
2135 
2136 static int
2137 dtrace_aggstat_walk(uintptr_t addr, dtrace_aggkey_t *key,
2138     dtrace_aggstat_data_t *data)
2139 {
2140 	dtrace_hashstat_data_t *hdata = &data->dtagsd_hash;
2141 	size_t size;
2142 
2143 	if (data->dtagsd_func == NULL) {
2144 		size_t bucket = key->dtak_hashval % hdata->dthsd_hashsize;
2145 
2146 		hdata->dthsd_counts[bucket]++;
2147 		return (WALK_NEXT);
2148 	}
2149 
2150 	/*
2151 	 * We need to read the data.
2152 	 */
2153 	size = key->dtak_size - sizeof (dtrace_aggid_t);
2154 	addr = (uintptr_t)key->dtak_data + sizeof (dtrace_aggid_t);
2155 	hdata->dthsd_data = alloca(size);
2156 	hdata->dthsd_size = size;
2157 
2158 	if (mdb_vread(hdata->dthsd_data, size, addr) == -1) {
2159 		mdb_warn("couldn't read data at %p", addr);
2160 		return (WALK_ERR);
2161 	}
2162 
2163 	data->dtagsd_func(hdata);
2164 
2165 	return (WALK_NEXT);
2166 }
2167 
2168 /*ARGSUSED*/
2169 int
2170 dtrace_aggstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2171 {
2172 	dtrace_buffer_t buf;
2173 	uintptr_t aaddr;
2174 	dtrace_aggbuffer_t agb;
2175 	size_t hsize, i, actual, prime, evenpow;
2176 	dtrace_aggstat_data_t data;
2177 	dtrace_hashstat_data_t *hdata = &data.dtagsd_hash;
2178 
2179 	bzero(&data, sizeof (data));
2180 
2181 	if (!(flags & DCMD_ADDRSPEC))
2182 		return (DCMD_USAGE);
2183 
2184 	if (mdb_vread(&buf, sizeof (buf), addr) == -1) {
2185 		mdb_warn("failed to read aggregation buffer at %p", addr);
2186 		return (DCMD_ERR);
2187 	}
2188 
2189 	aaddr = (uintptr_t)buf.dtb_tomax +
2190 	    buf.dtb_size - sizeof (dtrace_aggbuffer_t);
2191 
2192 	if (mdb_vread(&agb, sizeof (agb), aaddr) == -1) {
2193 		mdb_warn("failed to read dtrace_aggbuffer_t at %p", aaddr);
2194 		return (DCMD_ERR);
2195 	}
2196 
2197 	hsize = (actual = agb.dtagb_hashsize) * sizeof (size_t);
2198 	hdata->dthsd_counts = mdb_alloc(hsize, UM_SLEEP | UM_GC);
2199 
2200 	/*
2201 	 * Now pick the largest prime smaller than the hash size.  (If the
2202 	 * existing size is prime, we'll pick a smaller prime just for the
2203 	 * hell of it.)
2204 	 */
2205 	for (prime = agb.dtagb_hashsize - 1; prime > 7; prime--) {
2206 		size_t limit = prime / 7;
2207 
2208 		for (i = 2; i < limit; i++) {
2209 			if ((prime % i) == 0)
2210 				break;
2211 		}
2212 
2213 		if (i == limit)
2214 			break;
2215 	}
2216 
2217 	/*
2218 	 * And now we want to pick the largest power of two smaller than the
2219 	 * hashsize.
2220 	 */
2221 	for (i = 0; (1 << i) < agb.dtagb_hashsize; i++)
2222 		continue;
2223 
2224 	evenpow = (1 << (i - 1));
2225 
2226 	for (i = 0; _dtrace_hashstat[i].dths_name != NULL; i++) {
2227 		data.dtagsd_func = _dtrace_hashstat[i].dths_func;
2228 
2229 		hdata->dthsd_hashsize = actual;
2230 		hsize = hdata->dthsd_hashsize * sizeof (size_t);
2231 		bzero(hdata->dthsd_counts, hsize);
2232 
2233 		if (mdb_pwalk("dtrace_aggkey",
2234 		    (mdb_walk_cb_t)dtrace_aggstat_walk, &data, addr) == -1) {
2235 			mdb_warn("failed to walk dtrace_aggkey at %p", addr);
2236 			return (DCMD_ERR);
2237 		}
2238 
2239 		dtrace_hashstat_stats(_dtrace_hashstat[i].dths_name, hdata);
2240 
2241 		/*
2242 		 * If we were just printing the actual value, we won't try
2243 		 * any of the sizing experiments.
2244 		 */
2245 		if (data.dtagsd_func == NULL)
2246 			continue;
2247 
2248 		hdata->dthsd_hashsize = prime;
2249 		hsize = hdata->dthsd_hashsize * sizeof (size_t);
2250 		bzero(hdata->dthsd_counts, hsize);
2251 
2252 		if (mdb_pwalk("dtrace_aggkey",
2253 		    (mdb_walk_cb_t)dtrace_aggstat_walk, &data, addr) == -1) {
2254 			mdb_warn("failed to walk dtrace_aggkey at %p", addr);
2255 			return (DCMD_ERR);
2256 		}
2257 
2258 		dtrace_hashstat_stats(_dtrace_hashstat[i].dths_name, hdata);
2259 
2260 		hdata->dthsd_hashsize = evenpow;
2261 		hsize = hdata->dthsd_hashsize * sizeof (size_t);
2262 		bzero(hdata->dthsd_counts, hsize);
2263 
2264 		if (mdb_pwalk("dtrace_aggkey",
2265 		    (mdb_walk_cb_t)dtrace_aggstat_walk, &data, addr) == -1) {
2266 			mdb_warn("failed to walk dtrace_aggkey at %p", addr);
2267 			return (DCMD_ERR);
2268 		}
2269 
2270 		dtrace_hashstat_stats(_dtrace_hashstat[i].dths_name, hdata);
2271 	}
2272 
2273 	return (DCMD_OK);
2274 }
2275 
2276 /*ARGSUSED*/
2277 static int
2278 dtrace_dynstat_walk(uintptr_t addr, dtrace_dynvar_t *dynvar,
2279     dtrace_aggstat_data_t *data)
2280 {
2281 	dtrace_hashstat_data_t *hdata = &data->dtagsd_hash;
2282 	dtrace_tuple_t *tuple = &dynvar->dtdv_tuple;
2283 	dtrace_key_t *key = tuple->dtt_key;
2284 	size_t size = 0, offs = 0;
2285 	int i, nkeys = tuple->dtt_nkeys;
2286 	char *buf;
2287 
2288 	if (data->dtagsd_func == NULL) {
2289 		size_t bucket = dynvar->dtdv_hashval % hdata->dthsd_hashsize;
2290 
2291 		hdata->dthsd_counts[bucket]++;
2292 		return (WALK_NEXT);
2293 	}
2294 
2295 	/*
2296 	 * We want to hand the hashing algorithm a contiguous buffer.  First
2297 	 * run through the tuple and determine the size.
2298 	 */
2299 	for (i = 0; i < nkeys; i++) {
2300 		if (key[i].dttk_size == 0) {
2301 			size += sizeof (uint64_t);
2302 		} else {
2303 			size += key[i].dttk_size;
2304 		}
2305 	}
2306 
2307 	buf = alloca(size);
2308 
2309 	/*
2310 	 * Now go back through the tuple and copy the data into the buffer.
2311 	 */
2312 	for (i = 0; i < nkeys; i++) {
2313 		if (key[i].dttk_size == 0) {
2314 			bcopy(&key[i].dttk_value, &buf[offs],
2315 			    sizeof (uint64_t));
2316 			offs += sizeof (uint64_t);
2317 		} else {
2318 			if (mdb_vread(&buf[offs], key[i].dttk_size,
2319 			    key[i].dttk_value) == -1) {
2320 				mdb_warn("couldn't read tuple data at %p",
2321 				    key[i].dttk_value);
2322 				return (WALK_ERR);
2323 			}
2324 
2325 			offs += key[i].dttk_size;
2326 		}
2327 	}
2328 
2329 	hdata->dthsd_data = buf;
2330 	hdata->dthsd_size = size;
2331 
2332 	data->dtagsd_func(hdata);
2333 
2334 	return (WALK_NEXT);
2335 }
2336 
2337 /*ARGSUSED*/
2338 int
2339 dtrace_dynstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2340 {
2341 	dtrace_dstate_t dstate;
2342 	size_t hsize, i, actual, prime;
2343 	dtrace_aggstat_data_t data;
2344 	dtrace_hashstat_data_t *hdata = &data.dtagsd_hash;
2345 
2346 	bzero(&data, sizeof (data));
2347 
2348 	if (!(flags & DCMD_ADDRSPEC))
2349 		return (DCMD_USAGE);
2350 
2351 	if (mdb_vread(&dstate, sizeof (dstate), addr) == -1) {
2352 		mdb_warn("failed to read dynamic variable state at %p", addr);
2353 		return (DCMD_ERR);
2354 	}
2355 
2356 	hsize = (actual = dstate.dtds_hashsize) * sizeof (size_t);
2357 	hdata->dthsd_counts = mdb_alloc(hsize, UM_SLEEP | UM_GC);
2358 
2359 	/*
2360 	 * Now pick the largest prime smaller than the hash size.  (If the
2361 	 * existing size is prime, we'll pick a smaller prime just for the
2362 	 * hell of it.)
2363 	 */
2364 	for (prime = dstate.dtds_hashsize - 1; prime > 7; prime--) {
2365 		size_t limit = prime / 7;
2366 
2367 		for (i = 2; i < limit; i++) {
2368 			if ((prime % i) == 0)
2369 				break;
2370 		}
2371 
2372 		if (i == limit)
2373 			break;
2374 	}
2375 
2376 	for (i = 0; _dtrace_hashstat[i].dths_name != NULL; i++) {
2377 		data.dtagsd_func = _dtrace_hashstat[i].dths_func;
2378 
2379 		hdata->dthsd_hashsize = actual;
2380 		hsize = hdata->dthsd_hashsize * sizeof (size_t);
2381 		bzero(hdata->dthsd_counts, hsize);
2382 
2383 		if (mdb_pwalk("dtrace_dynvar",
2384 		    (mdb_walk_cb_t)dtrace_dynstat_walk, &data, addr) == -1) {
2385 			mdb_warn("failed to walk dtrace_dynvar at %p", addr);
2386 			return (DCMD_ERR);
2387 		}
2388 
2389 		dtrace_hashstat_stats(_dtrace_hashstat[i].dths_name, hdata);
2390 
2391 		/*
2392 		 * If we were just printing the actual value, we won't try
2393 		 * any of the sizing experiments.
2394 		 */
2395 		if (data.dtagsd_func == NULL)
2396 			continue;
2397 
2398 		hdata->dthsd_hashsize = prime;
2399 		hsize = hdata->dthsd_hashsize * sizeof (size_t);
2400 		bzero(hdata->dthsd_counts, hsize);
2401 
2402 		if (mdb_pwalk("dtrace_dynvar",
2403 		    (mdb_walk_cb_t)dtrace_dynstat_walk, &data, addr) == -1) {
2404 			mdb_warn("failed to walk dtrace_aggkey at %p", addr);
2405 			return (DCMD_ERR);
2406 		}
2407 
2408 		dtrace_hashstat_stats(_dtrace_hashstat[i].dths_name, hdata);
2409 	}
2410 
2411 	return (DCMD_OK);
2412 }
2413 
2414 typedef struct dtrace_ecb_walk {
2415 	dtrace_ecb_t **dtew_ecbs;
2416 	int dtew_necbs;
2417 	int dtew_curecb;
2418 } dtrace_ecb_walk_t;
2419 
2420 static int
2421 dtrace_ecb_init(mdb_walk_state_t *wsp)
2422 {
2423 	uintptr_t addr;
2424 	dtrace_state_t state;
2425 	dtrace_ecb_walk_t *ecbwp;
2426 
2427 	if ((addr = wsp->walk_addr) == 0) {
2428 		mdb_warn("dtrace_ecb walk needs dtrace_state_t\n");
2429 		return (WALK_ERR);
2430 	}
2431 
2432 	if (mdb_vread(&state, sizeof (state), addr) == -1) {
2433 		mdb_warn("failed to read dtrace state pointer at %p", addr);
2434 		return (WALK_ERR);
2435 	}
2436 
2437 	ecbwp = mdb_zalloc(sizeof (dtrace_ecb_walk_t), UM_SLEEP | UM_GC);
2438 
2439 	ecbwp->dtew_ecbs = state.dts_ecbs;
2440 	ecbwp->dtew_necbs = state.dts_necbs;
2441 	ecbwp->dtew_curecb = 0;
2442 
2443 	wsp->walk_data = ecbwp;
2444 
2445 	return (WALK_NEXT);
2446 }
2447 
2448 static int
2449 dtrace_ecb_step(mdb_walk_state_t *wsp)
2450 {
2451 	uintptr_t ecbp, addr;
2452 	dtrace_ecb_walk_t *ecbwp = wsp->walk_data;
2453 
2454 	addr = (uintptr_t)ecbwp->dtew_ecbs +
2455 	    ecbwp->dtew_curecb * sizeof (dtrace_ecb_t *);
2456 
2457 	if (ecbwp->dtew_curecb++ == ecbwp->dtew_necbs)
2458 		return (WALK_DONE);
2459 
2460 	if (mdb_vread(&ecbp, sizeof (addr), addr) == -1) {
2461 		mdb_warn("failed to read ecb at entry %d\n",
2462 		    ecbwp->dtew_curecb);
2463 		return (WALK_ERR);
2464 	}
2465 
2466 	if (ecbp == 0)
2467 		return (WALK_NEXT);
2468 
2469 	return (wsp->walk_callback(ecbp, NULL, wsp->walk_cbdata));
2470 }
2471 
2472 static void
2473 dtrace_options_numtostr(uint64_t num, char *buf, size_t len)
2474 {
2475 	uint64_t n = num;
2476 	int index = 0;
2477 	char u;
2478 
2479 	while (n >= 1024) {
2480 		n = (n + (1024 / 2)) / 1024; /* Round up or down */
2481 		index++;
2482 	}
2483 
2484 	u = " KMGTPE"[index];
2485 
2486 	if (index == 0) {
2487 		(void) mdb_snprintf(buf, len, "%llu", (u_longlong_t)n);
2488 	} else if (n < 10 && (num & (num - 1)) != 0) {
2489 		(void) mdb_snprintf(buf, len, "%.2f%c",
2490 		    (double)num / (1ULL << 10 * index), u);
2491 	} else if (n < 100 && (num & (num - 1)) != 0) {
2492 		(void) mdb_snprintf(buf, len, "%.1f%c",
2493 		    (double)num / (1ULL << 10 * index), u);
2494 	} else {
2495 		(void) mdb_snprintf(buf, len, "%llu%c", (u_longlong_t)n, u);
2496 	}
2497 }
2498 
2499 static void
2500 dtrace_options_numtohz(uint64_t num, char *buf, size_t len)
2501 {
2502 	(void) mdb_snprintf(buf, len, "%dhz", NANOSEC/num);
2503 }
2504 
2505 static void
2506 dtrace_options_numtobufpolicy(uint64_t num, char *buf, size_t len)
2507 {
2508 	char *policy = "unknown";
2509 
2510 	switch (num) {
2511 		case DTRACEOPT_BUFPOLICY_RING:
2512 			policy = "ring";
2513 			break;
2514 
2515 		case DTRACEOPT_BUFPOLICY_FILL:
2516 			policy = "fill";
2517 			break;
2518 
2519 		case DTRACEOPT_BUFPOLICY_SWITCH:
2520 			policy = "switch";
2521 			break;
2522 	}
2523 
2524 	(void) mdb_snprintf(buf, len, "%s", policy);
2525 }
2526 
2527 static void
2528 dtrace_options_numtocpu(uint64_t cpu, char *buf, size_t len)
2529 {
2530 	if (cpu == DTRACE_CPUALL)
2531 		(void) mdb_snprintf(buf, len, "%7s", "unbound");
2532 	else
2533 		(void) mdb_snprintf(buf, len, "%d", cpu);
2534 }
2535 
2536 typedef void (*dtrace_options_func_t)(uint64_t, char *, size_t);
2537 
2538 static struct dtrace_options {
2539 	char *dtop_optstr;
2540 	dtrace_options_func_t dtop_func;
2541 } _dtrace_options[] = {
2542 	{ "bufsize", dtrace_options_numtostr },
2543 	{ "bufpolicy", dtrace_options_numtobufpolicy },
2544 	{ "dynvarsize", dtrace_options_numtostr },
2545 	{ "aggsize", dtrace_options_numtostr },
2546 	{ "specsize", dtrace_options_numtostr },
2547 	{ "nspec", dtrace_options_numtostr },
2548 	{ "strsize", dtrace_options_numtostr },
2549 	{ "cleanrate", dtrace_options_numtohz },
2550 	{ "cpu", dtrace_options_numtocpu },
2551 	{ "bufresize", dtrace_options_numtostr },
2552 	{ "grabanon", dtrace_options_numtostr },
2553 	{ "flowindent", dtrace_options_numtostr },
2554 	{ "quiet", dtrace_options_numtostr },
2555 	{ "stackframes", dtrace_options_numtostr },
2556 	{ "ustackframes", dtrace_options_numtostr },
2557 	{ "aggrate", dtrace_options_numtohz },
2558 	{ "switchrate", dtrace_options_numtohz },
2559 	{ "statusrate", dtrace_options_numtohz },
2560 	{ "destructive", dtrace_options_numtostr },
2561 	{ "stackindent", dtrace_options_numtostr },
2562 	{ "rawbytes", dtrace_options_numtostr },
2563 	{ "jstackframes", dtrace_options_numtostr },
2564 	{ "jstackstrsize", dtrace_options_numtostr },
2565 	{ "aggsortkey", dtrace_options_numtostr },
2566 	{ "aggsortrev", dtrace_options_numtostr },
2567 	{ "aggsortpos", dtrace_options_numtostr },
2568 	{ "aggsortkeypos", dtrace_options_numtostr },
2569 	{ "temporal", dtrace_options_numtostr },
2570 	{ "agghist", dtrace_options_numtostr },
2571 	{ "aggpack", dtrace_options_numtostr },
2572 	{ "aggzoom", dtrace_options_numtostr },
2573 	{ "zone", dtrace_options_numtostr }
2574 };
2575 
2576 CTASSERT(ARRAY_SIZE(_dtrace_options) == DTRACEOPT_MAX);
2577 
2578 static void
2579 dtrace_options_help(void)
2580 {
2581 	mdb_printf("Given a dtrace_state_t structure, displays the "
2582 	    "current tunable option\nsettings.\n");
2583 }
2584 
2585 /*ARGSUSED*/
2586 static int
2587 dtrace_options(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2588 {
2589 	dtrace_state_t state;
2590 	int i = 0;
2591 	dtrace_optval_t *options;
2592 	char val[32];
2593 
2594 	if (!(flags & DCMD_ADDRSPEC))
2595 		return (DCMD_USAGE);
2596 
2597 	if (mdb_vread(&state, sizeof (dtrace_state_t), (uintptr_t)addr) == -1) {
2598 		mdb_warn("failed to read state pointer at %p\n", addr);
2599 		return (DCMD_ERR);
2600 	}
2601 
2602 	options = &state.dts_options[0];
2603 
2604 	mdb_printf("%<u>%-25s %s%</u>\n", "OPTION", "VALUE");
2605 	for (i = 0; i < DTRACEOPT_MAX; i++) {
2606 		if (options[i] == DTRACEOPT_UNSET) {
2607 			mdb_printf("%-25s %s\n",
2608 			    _dtrace_options[i].dtop_optstr, "UNSET");
2609 		} else {
2610 			(void) _dtrace_options[i].dtop_func(options[i],
2611 			    val, 32);
2612 			mdb_printf("%-25s %s\n",
2613 			    _dtrace_options[i].dtop_optstr, val);
2614 		}
2615 	}
2616 
2617 	return (DCMD_OK);
2618 }
2619 
2620 static int
2621 pid2state_init(mdb_walk_state_t *wsp)
2622 {
2623 	dtrace_state_data_t *data;
2624 	uintptr_t devi;
2625 	uintptr_t proc;
2626 	struct dev_info info;
2627 	pid_t pid = (pid_t)wsp->walk_addr;
2628 
2629 	if (wsp->walk_addr == 0) {
2630 		mdb_warn("pid2state walk requires PID\n");
2631 		return (WALK_ERR);
2632 	}
2633 
2634 	data = mdb_zalloc(sizeof (dtrace_state_data_t), UM_SLEEP | UM_GC);
2635 
2636 	if (mdb_readvar(&data->dtsd_softstate, "dtrace_softstate") == -1) {
2637 		mdb_warn("failed to read 'dtrace_softstate'");
2638 		return (DCMD_ERR);
2639 	}
2640 
2641 	if ((proc = mdb_pid2proc(pid, NULL)) == 0) {
2642 		mdb_warn("PID 0t%d not found\n", pid);
2643 		return (DCMD_ERR);
2644 	}
2645 
2646 	if (mdb_readvar(&devi, "dtrace_devi") == -1) {
2647 		mdb_warn("failed to read 'dtrace_devi'");
2648 		return (DCMD_ERR);
2649 	}
2650 
2651 	if (mdb_vread(&info, sizeof (struct dev_info), devi) == -1) {
2652 		mdb_warn("failed to read 'dev_info'");
2653 		return (DCMD_ERR);
2654 	}
2655 
2656 	data->dtsd_major = info.devi_major;
2657 	data->dtsd_proc = proc;
2658 
2659 	wsp->walk_data = data;
2660 
2661 	return (WALK_NEXT);
2662 }
2663 
2664 /*ARGSUSED*/
2665 static int
2666 pid2state_file(uintptr_t addr, struct file *f, dtrace_state_data_t *data)
2667 {
2668 	vnode_t vnode;
2669 	minor_t minor;
2670 	uintptr_t statep;
2671 
2672 	/* Get the vnode for this file */
2673 	if (mdb_vread(&vnode, sizeof (vnode), (uintptr_t)f->f_vnode) == -1) {
2674 		mdb_warn("couldn't read vnode at %p", (uintptr_t)f->f_vnode);
2675 		return (WALK_NEXT);
2676 	}
2677 
2678 
2679 	/* Is this the dtrace device? */
2680 	if (getmajor(vnode.v_rdev) != data->dtsd_major)
2681 		return (WALK_NEXT);
2682 
2683 	/* Get the minor number for this device entry */
2684 	minor = getminor(vnode.v_rdev);
2685 
2686 	if (mdb_get_soft_state_byaddr(data->dtsd_softstate, minor,
2687 	    &statep, NULL, 0) == -1) {
2688 		mdb_warn("failed to read softstate for minor %d", minor);
2689 		return (WALK_NEXT);
2690 	}
2691 
2692 	mdb_printf("%p\n", statep);
2693 
2694 	return (WALK_NEXT);
2695 }
2696 
2697 static int
2698 pid2state_step(mdb_walk_state_t *wsp)
2699 {
2700 	dtrace_state_data_t *ds = wsp->walk_data;
2701 
2702 	if (mdb_pwalk("file",
2703 	    (mdb_walk_cb_t)pid2state_file, ds, ds->dtsd_proc) == -1) {
2704 		mdb_warn("couldn't walk 'file' for proc %p", ds->dtsd_proc);
2705 		return (WALK_ERR);
2706 	}
2707 
2708 	return (WALK_DONE);
2709 }
2710 
2711 /*ARGSUSED*/
2712 static int
2713 dtrace_probes_walk(uintptr_t addr, void *ignored, uintptr_t *target)
2714 {
2715 	dtrace_ecb_t ecb;
2716 	dtrace_probe_t probe;
2717 	dtrace_probedesc_t pd;
2718 
2719 	if (addr == 0)
2720 		return (WALK_ERR);
2721 
2722 	if (mdb_vread(&ecb, sizeof (dtrace_ecb_t), addr) == -1) {
2723 		mdb_warn("failed to read ecb %p\n", addr);
2724 		return (WALK_ERR);
2725 	}
2726 
2727 	if (ecb.dte_probe == NULL)
2728 		return (WALK_ERR);
2729 
2730 	if (mdb_vread(&probe, sizeof (dtrace_probe_t),
2731 	    (uintptr_t)ecb.dte_probe) == -1) {
2732 		mdb_warn("failed to read probe %p\n", ecb.dte_probe);
2733 		return (WALK_ERR);
2734 	}
2735 
2736 	pd.dtpd_id = probe.dtpr_id;
2737 	dtracemdb_probe(NULL, &pd);
2738 
2739 	mdb_printf("%5d %10s %17s %33s %s\n", pd.dtpd_id, pd.dtpd_provider,
2740 	    pd.dtpd_mod, pd.dtpd_func, pd.dtpd_name);
2741 
2742 	return (WALK_NEXT);
2743 }
2744 
2745 static void
2746 dtrace_probes_help(void)
2747 {
2748 	mdb_printf("Given a dtrace_state_t structure, displays all "
2749 	    "its active enablings.  If no\nstate structure is provided, "
2750 	    "all available probes are listed.\n");
2751 }
2752 
2753 /*ARGSUSED*/
2754 static int
2755 dtrace_probes(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2756 {
2757 	dtrace_probedesc_t pd;
2758 	uintptr_t caddr, base, paddr;
2759 	int nprobes, i;
2760 
2761 	mdb_printf("%5s %10s %17s %33s %s\n",
2762 	    "ID", "PROVIDER", "MODULE", "FUNCTION", "NAME");
2763 
2764 	if (!(flags & DCMD_ADDRSPEC)) {
2765 		/*
2766 		 * If no argument is provided just display all available
2767 		 * probes.
2768 		 */
2769 		if (mdb_readvar(&base, "dtrace_probes") == -1) {
2770 			mdb_warn("failed to read 'dtrace_probes'");
2771 			return (-1);
2772 		}
2773 
2774 		if (mdb_readvar(&nprobes, "dtrace_nprobes") == -1) {
2775 			mdb_warn("failed to read 'dtrace_nprobes'");
2776 			return (-1);
2777 		}
2778 
2779 		for (i = 0; i < nprobes; i++) {
2780 			caddr = base + i  * sizeof (dtrace_probe_t *);
2781 
2782 			if (mdb_vread(&paddr, sizeof (paddr), caddr) == -1) {
2783 				mdb_warn("couldn't read probe pointer at %p",
2784 				    caddr);
2785 				continue;
2786 			}
2787 
2788 			if (paddr == 0)
2789 				continue;
2790 
2791 			pd.dtpd_id = i + 1;
2792 			if (dtracemdb_probe(NULL, &pd) == 0) {
2793 				mdb_printf("%5d %10s %17s %33s %s\n",
2794 				    pd.dtpd_id, pd.dtpd_provider,
2795 				    pd.dtpd_mod, pd.dtpd_func, pd.dtpd_name);
2796 			}
2797 		}
2798 	} else {
2799 		if (mdb_pwalk("dtrace_ecb", (mdb_walk_cb_t)dtrace_probes_walk,
2800 		    NULL, addr) == -1) {
2801 			mdb_warn("couldn't walk 'dtrace_ecb'");
2802 			return (DCMD_ERR);
2803 		}
2804 	}
2805 
2806 	return (DCMD_OK);
2807 }
2808 
2809 const mdb_dcmd_t kernel_dcmds[] = {
2810 	{ "id2probe", ":", "translate a dtrace_id_t to a dtrace_probe_t",
2811 	    id2probe },
2812 	{ "dtrace", ":[-c cpu]", "print dtrace(8)-like output",
2813 	    dtrace, dtrace_help },
2814 	{ "dtrace_errhash", ":", "print DTrace error hash", dtrace_errhash },
2815 	{ "dtrace_helptrace", ":", "print DTrace helper trace",
2816 	    dtrace_helptrace },
2817 	{ "dtrace_state", ":", "print active DTrace consumers", dtrace_state,
2818 	    dtrace_state_help },
2819 	{ "dtrace_aggstat", ":",
2820 	    "print DTrace aggregation hash statistics", dtrace_aggstat },
2821 	{ "dtrace_dynstat", ":",
2822 	    "print DTrace dynamic variable hash statistics", dtrace_dynstat },
2823 	{ "dtrace_options", ":",
2824 	    "print a DTrace consumer's current tuneable options",
2825 	    dtrace_options, dtrace_options_help },
2826 	{ "dtrace_probes", "?", "print a DTrace consumer's enabled probes",
2827 	    dtrace_probes, dtrace_probes_help },
2828 	{ NULL }
2829 };
2830 
2831 const mdb_walker_t kernel_walkers[] = {
2832 	{ "dtrace_errhash", "walk hash of DTrace error messasges",
2833 		dtrace_errhash_init, dtrace_errhash_step },
2834 	{ "dtrace_helptrace", "walk DTrace helper trace entries",
2835 		dtrace_helptrace_init, dtrace_helptrace_step },
2836 	{ "dtrace_state", "walk DTrace per-consumer softstate",
2837 		dtrace_state_init, dtrace_state_step },
2838 	{ "dtrace_aggkey", "walk DTrace aggregation keys",
2839 		dtrace_aggkey_init, dtrace_aggkey_step, dtrace_aggkey_fini },
2840 	{ "dtrace_dynvar", "walk DTrace dynamic variables",
2841 		dtrace_dynvar_init, dtrace_dynvar_step, dtrace_dynvar_fini },
2842 	{ "dtrace_ecb", "walk a DTrace consumer's enabling control blocks",
2843 		dtrace_ecb_init, dtrace_ecb_step },
2844 	{ "pid2state", "walk a processes dtrace_state structures",
2845 	    pid2state_init, pid2state_step },
2846 	{ NULL }
2847 };
2848