xref: /illumos-gate/usr/src/cmd/mdb/common/modules/dtrace/dtrace.c (revision 6fc89bfc8e69fd45d8778b2f0ad45efc0ded99ed)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2013 by Delphix. All rights reserved.
25  * Copyright 2019 Joyent, Inc.
26  * Copyright 2022 Racktop Systems, Inc.
27  * Copyright 2025 Oxide Computer Company
28  */
29 
30 /*
31  * explicitly define DTRACE_ERRDEBUG to pull in definition of dtrace_errhash_t
32  * explicitly define _STDARG_H to avoid stdarg.h/varargs.h u/k defn conflict
33  */
34 #define	DTRACE_ERRDEBUG
35 #define	_STDARG_H
36 
37 #include <mdb/mdb_param.h>
38 #include <mdb/mdb_modapi.h>
39 #include <mdb/mdb_ctf.h>
40 #include <mdb/mdb_ks.h>
41 #include <sys/dtrace_impl.h>
42 #include <sys/vmem_impl.h>
43 #include <sys/ddi_impldefs.h>
44 #include <sys/sysmacros.h>
45 #include <sys/kobj.h>
46 #include <dtrace.h>
47 #include <alloca.h>
48 #include <ctype.h>
49 #include <errno.h>
50 #include <math.h>
51 #include <stdio.h>
52 #include <unistd.h>
53 
54 /*ARGSUSED*/
55 int
56 id2probe(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
57 {
58 	uintptr_t probe = 0;
59 	uintptr_t probes;
60 
61 	if (!(flags & DCMD_ADDRSPEC))
62 		return (DCMD_USAGE);
63 
64 	if (addr == DTRACE_IDNONE || addr > UINT32_MAX)
65 		goto out;
66 
67 	if (mdb_readvar(&probes, "dtrace_probes") == -1) {
68 		mdb_warn("failed to read 'dtrace_probes'");
69 		return (DCMD_ERR);
70 	}
71 
72 	probes += (addr - 1) * sizeof (dtrace_probe_t *);
73 
74 	if (mdb_vread(&probe, sizeof (uintptr_t), probes) == -1) {
75 		mdb_warn("failed to read dtrace_probes[%d]", addr - 1);
76 		return (DCMD_ERR);
77 	}
78 
79 out:
80 	mdb_printf("%p\n", probe);
81 	return (DCMD_OK);
82 }
83 
84 void
85 dtrace_help(void)
86 {
87 
88 	mdb_printf("Given a dtrace_state_t structure that represents a "
89 	    "DTrace consumer, prints\n"
90 	    "dtrace(8)-like output for in-kernel DTrace data.  (The "
91 	    "dtrace_state_t\n"
92 	    "structures for all DTrace consumers may be obtained by running "
93 	    "the \n"
94 	    "::dtrace_state dcmd.)   When data is present on multiple CPUs, "
95 	    "data are\n"
96 	    "presented in CPU order, with records within each CPU ordered "
97 	    "oldest to \n"
98 	    "youngest.  Options:\n\n"
99 	    "-c cpu     Only provide output for specified CPU.\n");
100 }
101 
102 static int
103 dtracemdb_eprobe(dtrace_state_t *state, dtrace_eprobedesc_t *epd)
104 {
105 	dtrace_epid_t epid = epd->dtepd_epid;
106 	dtrace_probe_t probe;
107 	dtrace_ecb_t ecb;
108 	uintptr_t addr, paddr, ap;
109 	dtrace_action_t act;
110 	int nactions, nrecs;
111 
112 	addr = (uintptr_t)state->dts_ecbs +
113 	    (epid - 1) * sizeof (dtrace_ecb_t *);
114 
115 	if (mdb_vread(&addr, sizeof (addr), addr) == -1) {
116 		mdb_warn("failed to read ecb for epid %d", epid);
117 		return (-1);
118 	}
119 
120 	if (addr == 0) {
121 		mdb_warn("epid %d doesn't match an ecb\n", epid);
122 		return (-1);
123 	}
124 
125 	if (mdb_vread(&ecb, sizeof (ecb), addr) == -1) {
126 		mdb_warn("failed to read ecb at %p", addr);
127 		return (-1);
128 	}
129 
130 	paddr = (uintptr_t)ecb.dte_probe;
131 
132 	if (mdb_vread(&probe, sizeof (probe), paddr) == -1) {
133 		mdb_warn("failed to read probe for ecb %p", addr);
134 		return (-1);
135 	}
136 
137 	/*
138 	 * This is a little painful:  in order to find the number of actions,
139 	 * we need to first walk through them.
140 	 */
141 	for (ap = (uintptr_t)ecb.dte_action, nactions = 0; ap != 0; ) {
142 		if (mdb_vread(&act, sizeof (act), ap) == -1) {
143 			mdb_warn("failed to read action %p on ecb %p",
144 			    ap, addr);
145 			return (-1);
146 		}
147 
148 		if (!DTRACEACT_ISAGG(act.dta_kind) && !act.dta_intuple)
149 			nactions++;
150 
151 		ap = (uintptr_t)act.dta_next;
152 	}
153 
154 	nrecs = epd->dtepd_nrecs;
155 	epd->dtepd_nrecs = nactions;
156 	epd->dtepd_probeid = probe.dtpr_id;
157 	epd->dtepd_uarg = ecb.dte_uarg;
158 	epd->dtepd_size = ecb.dte_size;
159 
160 	for (ap = (uintptr_t)ecb.dte_action, nactions = 0; ap != 0; ) {
161 		if (mdb_vread(&act, sizeof (act), ap) == -1) {
162 			mdb_warn("failed to read action %p on ecb %p",
163 			    ap, addr);
164 			return (-1);
165 		}
166 
167 		if (!DTRACEACT_ISAGG(act.dta_kind) && !act.dta_intuple) {
168 			if (nrecs-- == 0)
169 				break;
170 
171 			epd->dtepd_rec[nactions++] = act.dta_rec;
172 		}
173 
174 		ap = (uintptr_t)act.dta_next;
175 	}
176 
177 	return (0);
178 }
179 
180 /*ARGSUSED*/
181 static int
182 dtracemdb_probe(dtrace_state_t *state, dtrace_probedesc_t *pd)
183 {
184 	uintptr_t base, addr, paddr, praddr;
185 	int nprobes, i;
186 	dtrace_probe_t probe;
187 	dtrace_provider_t prov;
188 
189 	if (pd->dtpd_id == DTRACE_IDNONE)
190 		pd->dtpd_id++;
191 
192 	if (mdb_readvar(&base, "dtrace_probes") == -1) {
193 		mdb_warn("failed to read 'dtrace_probes'");
194 		return (-1);
195 	}
196 
197 	if (mdb_readvar(&nprobes, "dtrace_nprobes") == -1) {
198 		mdb_warn("failed to read 'dtrace_nprobes'");
199 		return (-1);
200 	}
201 
202 	for (i = pd->dtpd_id; i <= nprobes; i++) {
203 		addr = base + (i - 1) * sizeof (dtrace_probe_t *);
204 
205 		if (mdb_vread(&paddr, sizeof (paddr), addr) == -1) {
206 			mdb_warn("couldn't read probe pointer at %p", addr);
207 			return (-1);
208 		}
209 
210 		if (paddr != 0)
211 			break;
212 	}
213 
214 	if (paddr == 0) {
215 		errno = ESRCH;
216 		return (-1);
217 	}
218 
219 	if (mdb_vread(&probe, sizeof (probe), paddr) == -1) {
220 		mdb_warn("couldn't read probe at %p", paddr);
221 		return (-1);
222 	}
223 
224 	pd->dtpd_id = probe.dtpr_id;
225 
226 	if (mdb_vread(pd->dtpd_name, DTRACE_NAMELEN,
227 	    (uintptr_t)probe.dtpr_name) == -1) {
228 		mdb_warn("failed to read probe name for probe %p", paddr);
229 		return (-1);
230 	}
231 
232 	if (mdb_vread(pd->dtpd_func, DTRACE_FUNCNAMELEN,
233 	    (uintptr_t)probe.dtpr_func) == -1) {
234 		mdb_warn("failed to read function name for probe %p", paddr);
235 		return (-1);
236 	}
237 
238 	if (mdb_vread(pd->dtpd_mod, DTRACE_MODNAMELEN,
239 	    (uintptr_t)probe.dtpr_mod) == -1) {
240 		mdb_warn("failed to read module name for probe %p", paddr);
241 		return (-1);
242 	}
243 
244 	praddr = (uintptr_t)probe.dtpr_provider;
245 
246 	if (mdb_vread(&prov, sizeof (prov), praddr) == -1) {
247 		mdb_warn("failed to read provider for probe %p", paddr);
248 		return (-1);
249 	}
250 
251 	if (mdb_vread(pd->dtpd_provider, DTRACE_PROVNAMELEN,
252 	    (uintptr_t)prov.dtpv_name) == -1) {
253 		mdb_warn("failed to read provider name for probe %p", paddr);
254 		return (-1);
255 	}
256 
257 	return (0);
258 }
259 
260 /*ARGSUSED*/
261 static int
262 dtracemdb_aggdesc(dtrace_state_t *state, dtrace_aggdesc_t *agd)
263 {
264 	dtrace_aggid_t aggid = agd->dtagd_id;
265 	dtrace_aggregation_t agg;
266 	dtrace_ecb_t ecb;
267 	uintptr_t addr, eaddr, ap, last;
268 	dtrace_action_t act;
269 	dtrace_recdesc_t *lrec;
270 	int nactions, nrecs;
271 
272 	addr = (uintptr_t)state->dts_aggregations +
273 	    (aggid - 1) * sizeof (dtrace_aggregation_t *);
274 
275 	if (mdb_vread(&addr, sizeof (addr), addr) == -1) {
276 		mdb_warn("failed to read aggregation for aggid %d", aggid);
277 		return (-1);
278 	}
279 
280 	if (addr == 0) {
281 		mdb_warn("aggid %d doesn't match an aggregation\n", aggid);
282 		return (-1);
283 	}
284 
285 	if (mdb_vread(&agg, sizeof (agg), addr) == -1) {
286 		mdb_warn("failed to read aggregation at %p", addr);
287 		return (-1);
288 	}
289 
290 	eaddr = (uintptr_t)agg.dtag_ecb;
291 
292 	if (mdb_vread(&ecb, sizeof (ecb), eaddr) == -1) {
293 		mdb_warn("failed to read ecb for aggregation %p", addr);
294 		return (-1);
295 	}
296 
297 	last = (uintptr_t)addr + offsetof(dtrace_aggregation_t, dtag_action);
298 
299 	/*
300 	 * This is a little painful:  in order to find the number of actions,
301 	 * we need to first walk through them.
302 	 */
303 	ap = (uintptr_t)agg.dtag_first;
304 	nactions = 0;
305 
306 	for (;;) {
307 		if (mdb_vread(&act, sizeof (act), ap) == -1) {
308 			mdb_warn("failed to read action %p on aggregation %p",
309 			    ap, addr);
310 			return (-1);
311 		}
312 
313 		nactions++;
314 
315 		if (ap == last)
316 			break;
317 
318 		ap = (uintptr_t)act.dta_next;
319 	}
320 
321 	lrec = &act.dta_rec;
322 	agd->dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - agg.dtag_base;
323 
324 	nrecs = agd->dtagd_nrecs;
325 	agd->dtagd_nrecs = nactions;
326 	agd->dtagd_epid = ecb.dte_epid;
327 
328 	ap = (uintptr_t)agg.dtag_first;
329 	nactions = 0;
330 
331 	for (;;) {
332 		dtrace_recdesc_t rec;
333 
334 		if (mdb_vread(&act, sizeof (act), ap) == -1) {
335 			mdb_warn("failed to read action %p on aggregation %p",
336 			    ap, addr);
337 			return (-1);
338 		}
339 
340 		if (nrecs-- == 0)
341 			break;
342 
343 		rec = act.dta_rec;
344 		rec.dtrd_offset -= agg.dtag_base;
345 		rec.dtrd_uarg = 0;
346 		agd->dtagd_rec[nactions++] = rec;
347 
348 		if (ap == last)
349 			break;
350 
351 		ap = (uintptr_t)act.dta_next;
352 	}
353 
354 	return (0);
355 }
356 
357 static int
358 dtracemdb_bufsnap(dtrace_buffer_t *which, dtrace_bufdesc_t *desc)
359 {
360 	static hrtime_t hr_offset = 0;
361 	static boolean_t offset_set = B_FALSE;
362 	uintptr_t addr;
363 	size_t bufsize;
364 	dtrace_buffer_t buf;
365 	caddr_t data = desc->dtbd_data;
366 	processorid_t max_cpuid, cpu = desc->dtbd_cpu;
367 
368 	if (mdb_readvar(&max_cpuid, "max_cpuid") == -1) {
369 		mdb_warn("failed to read 'max_cpuid'");
370 		errno = EIO;
371 		return (-1);
372 	}
373 
374 	if (cpu < 0 || cpu > max_cpuid) {
375 		errno = EINVAL;
376 		return (-1);
377 	}
378 
379 	addr = (uintptr_t)which + cpu * sizeof (dtrace_buffer_t);
380 
381 	if (mdb_vread(&buf, sizeof (buf), addr) == -1) {
382 		mdb_warn("failed to read buffer description at %p", addr);
383 		errno = EIO;
384 		return (-1);
385 	}
386 
387 	if (buf.dtb_tomax == NULL) {
388 		errno = ENOENT;
389 		return (-1);
390 	}
391 
392 	if (buf.dtb_flags & DTRACEBUF_WRAPPED) {
393 		bufsize = buf.dtb_size;
394 	} else {
395 		bufsize = buf.dtb_offset;
396 	}
397 
398 	if (mdb_vread(data, bufsize, (uintptr_t)buf.dtb_tomax) == -1) {
399 		mdb_warn("couldn't read buffer for CPU %d", cpu);
400 		errno = EIO;
401 		return (-1);
402 	}
403 
404 	if (buf.dtb_offset > buf.dtb_size) {
405 		mdb_warn("buffer for CPU %d has corrupt offset\n", cpu);
406 		errno = EIO;
407 		return (-1);
408 	}
409 
410 	if (buf.dtb_flags & DTRACEBUF_WRAPPED) {
411 		if (buf.dtb_xamot_offset > buf.dtb_size) {
412 			mdb_warn("ringbuffer for CPU %d has corrupt "
413 			    "wrapped offset\n", cpu);
414 			errno = EIO;
415 			return (-1);
416 		}
417 
418 		/*
419 		 * If the ring buffer has wrapped, it needs to be polished.
420 		 * See the comment in dtrace_buffer_polish() for details.
421 		 */
422 		if (buf.dtb_offset < buf.dtb_xamot_offset) {
423 			bzero(data + buf.dtb_offset,
424 			    buf.dtb_xamot_offset - buf.dtb_offset);
425 		}
426 
427 		if (buf.dtb_offset > buf.dtb_xamot_offset) {
428 			bzero(data + buf.dtb_offset,
429 			    buf.dtb_size - buf.dtb_offset);
430 			bzero(data, buf.dtb_xamot_offset);
431 		}
432 
433 		desc->dtbd_oldest = buf.dtb_xamot_offset;
434 	} else {
435 		desc->dtbd_oldest = 0;
436 	}
437 
438 	/*
439 	 * On a live system, dtbd_timestamp is set to gethrtime() when the
440 	 * DTRACEIOC_BUFSNAP ioctl is called. The effect of this is that the
441 	 * timestamps of all the enabled probe records in the buf will always
442 	 * be less than dtbd_timestamp. dtrace_consume() relies on this
443 	 * invariant to determine when it needs to retrieve more dtrace bufs
444 	 * from the kernel.
445 	 *
446 	 * However when mdb is reading a crash dump, the value of
447 	 * gethrtime() on the system running mdb may smaller than the
448 	 * enabled probe records in the crash dump, violating the invariant
449 	 * dtrace_consume() is relying on. This can cause dtrace_consume()
450 	 * to prematurely stop processing records.
451 	 *
452 	 * To preserve the invariant dtrace_consume() requires, we simply
453 	 * add the value of panic_hrtime to gethrtime() when setting
454 	 * dtdb_timestamp. On a live system, panic_hrtime will be 0, and
455 	 * the invariant will be preserved by virtue of being running on
456 	 * a live system. On a crash dump, no valid probe record can have a
457 	 * timestamp greater than panic_hrtime, so adding this to the value
458 	 * of gethrtime() will guarantee the invariant expected by
459 	 * dtrace_consume() is preserved.
460 	 */
461 	if (!offset_set) {
462 		hrtime_t panic_hrtime;
463 
464 		/*
465 		 * We could be slightly more clever and only set hr_offset
466 		 * if gethrtime() in mdb is < panic_hrtime, but it doesn't
467 		 * seem necessary. If for some reason, we cannot read
468 		 * panic_hrtime, we'll try to continue -- ::dtrace may
469 		 * still succeed, so we just warn and continue.
470 		 */
471 		if (mdb_readvar(&panic_hrtime, "panic_hrtime") == -1) {
472 			mdb_warn("failed to read 'panic_hrtime' -- "
473 			    "some dtrace data may not be displayed");
474 		} else {
475 			hr_offset = panic_hrtime;
476 		}
477 		offset_set = B_TRUE;
478 	}
479 
480 	desc->dtbd_size = bufsize;
481 	desc->dtbd_drops = buf.dtb_drops;
482 	desc->dtbd_errors = buf.dtb_errors;
483 	desc->dtbd_timestamp = gethrtime() + hr_offset;
484 
485 	return (0);
486 }
487 
488 /*
489  * This is essentially identical to its cousin in the kernel -- with the
490  * notable exception that we automatically set DTRACEOPT_GRABANON if this
491  * state is an anonymous enabling.
492  */
493 static dof_hdr_t *
494 dtracemdb_dof_create(dtrace_state_t *state, int isanon)
495 {
496 	dof_hdr_t *dof;
497 	dof_sec_t *sec;
498 	dof_optdesc_t *opt;
499 	int i, len = sizeof (dof_hdr_t) +
500 	    roundup(sizeof (dof_sec_t), sizeof (uint64_t)) +
501 	    sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
502 
503 	dof = mdb_zalloc(len, UM_SLEEP);
504 	dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0;
505 	dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1;
506 	dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2;
507 	dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3;
508 
509 	dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE;
510 	dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE;
511 	dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION;
512 	dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION;
513 	dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS;
514 	dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS;
515 
516 	dof->dofh_flags = 0;
517 	dof->dofh_hdrsize = sizeof (dof_hdr_t);
518 	dof->dofh_secsize = sizeof (dof_sec_t);
519 	dof->dofh_secnum = 1;	/* only DOF_SECT_OPTDESC */
520 	dof->dofh_secoff = sizeof (dof_hdr_t);
521 	dof->dofh_loadsz = len;
522 	dof->dofh_filesz = len;
523 	dof->dofh_pad = 0;
524 
525 	/*
526 	 * Fill in the option section header...
527 	 */
528 	sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t));
529 	sec->dofs_type = DOF_SECT_OPTDESC;
530 	sec->dofs_align = sizeof (uint64_t);
531 	sec->dofs_flags = DOF_SECF_LOAD;
532 	sec->dofs_entsize = sizeof (dof_optdesc_t);
533 
534 	opt = (dof_optdesc_t *)((uintptr_t)sec +
535 	    roundup(sizeof (dof_sec_t), sizeof (uint64_t)));
536 
537 	sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof;
538 	sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
539 
540 	for (i = 0; i < DTRACEOPT_MAX; i++) {
541 		opt[i].dofo_option = i;
542 		opt[i].dofo_strtab = DOF_SECIDX_NONE;
543 		opt[i].dofo_value = state->dts_options[i];
544 	}
545 
546 	if (isanon)
547 		opt[DTRACEOPT_GRABANON].dofo_value = 1;
548 
549 	return (dof);
550 }
551 
552 static int
553 dtracemdb_format(dtrace_state_t *state, dtrace_fmtdesc_t *desc)
554 {
555 	uintptr_t addr, faddr;
556 	char c;
557 	int len = 0;
558 
559 	if (desc->dtfd_format == 0 || desc->dtfd_format > state->dts_nformats) {
560 		errno = EINVAL;
561 		return (-1);
562 	}
563 
564 	faddr = (uintptr_t)state->dts_formats +
565 	    (desc->dtfd_format - 1) * sizeof (char *);
566 
567 	if (mdb_vread(&addr, sizeof (addr), faddr) == -1) {
568 		mdb_warn("failed to read format string pointer at %p", faddr);
569 		return (-1);
570 	}
571 
572 	do {
573 		if (mdb_vread(&c, sizeof (c), addr + len++) == -1) {
574 			mdb_warn("failed to read format string at %p", addr);
575 			return (-1);
576 		}
577 	} while (c != '\0');
578 
579 	if (len > desc->dtfd_length) {
580 		desc->dtfd_length = len;
581 		return (0);
582 	}
583 
584 	if (mdb_vread(desc->dtfd_string, len, addr) == -1) {
585 		mdb_warn("failed to reread format string at %p", addr);
586 		return (-1);
587 	}
588 
589 	return (0);
590 }
591 
592 static int
593 dtracemdb_status(dtrace_state_t *state, dtrace_status_t *status)
594 {
595 	dtrace_dstate_t *dstate;
596 	int i, j;
597 	uint64_t nerrs;
598 	uintptr_t addr;
599 	int ncpu;
600 
601 	if (mdb_readvar(&ncpu, "_ncpu") == -1) {
602 		mdb_warn("failed to read '_ncpu'");
603 		return (DCMD_ERR);
604 	}
605 
606 	bzero(status, sizeof (dtrace_status_t));
607 
608 	if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
609 		errno = ENOENT;
610 		return (-1);
611 	}
612 
613 	/*
614 	 * For the MDB backend, we never set dtst_exiting or dtst_filled.  This
615 	 * is by design:  we don't want the library to try to stop tracing,
616 	 * because it doesn't particularly mean anything.
617 	 */
618 	nerrs = state->dts_errors;
619 	dstate = &state->dts_vstate.dtvs_dynvars;
620 
621 	for (i = 0; i < ncpu; i++) {
622 		dtrace_dstate_percpu_t dcpu;
623 		dtrace_buffer_t buf;
624 
625 		addr = (uintptr_t)&dstate->dtds_percpu[i];
626 
627 		if (mdb_vread(&dcpu, sizeof (dcpu), addr) == -1) {
628 			mdb_warn("failed to read per-CPU dstate at %p", addr);
629 			return (-1);
630 		}
631 
632 		status->dtst_dyndrops += dcpu.dtdsc_drops;
633 		status->dtst_dyndrops_dirty += dcpu.dtdsc_dirty_drops;
634 		status->dtst_dyndrops_rinsing += dcpu.dtdsc_rinsing_drops;
635 
636 		addr = (uintptr_t)&state->dts_buffer[i];
637 
638 		if (mdb_vread(&buf, sizeof (buf), addr) == -1) {
639 			mdb_warn("failed to read per-CPU buffer at %p", addr);
640 			return (-1);
641 		}
642 
643 		nerrs += buf.dtb_errors;
644 
645 		for (j = 0; j < state->dts_nspeculations; j++) {
646 			dtrace_speculation_t spec;
647 
648 			addr = (uintptr_t)&state->dts_speculations[j];
649 
650 			if (mdb_vread(&spec, sizeof (spec), addr) == -1) {
651 				mdb_warn("failed to read "
652 				    "speculation at %p", addr);
653 				return (-1);
654 			}
655 
656 			addr = (uintptr_t)&spec.dtsp_buffer[i];
657 
658 			if (mdb_vread(&buf, sizeof (buf), addr) == -1) {
659 				mdb_warn("failed to read "
660 				    "speculative buffer at %p", addr);
661 				return (-1);
662 			}
663 
664 			status->dtst_specdrops += buf.dtb_xamot_drops;
665 		}
666 	}
667 
668 	status->dtst_specdrops_busy = state->dts_speculations_busy;
669 	status->dtst_specdrops_unavail = state->dts_speculations_unavail;
670 	status->dtst_errors = nerrs;
671 
672 	return (0);
673 }
674 
675 typedef struct dtracemdb_data {
676 	dtrace_state_t *dtmd_state;
677 	char *dtmd_symstr;
678 	char *dtmd_modstr;
679 	uintptr_t dtmd_addr;
680 	int dtmd_isanon;
681 } dtracemdb_data_t;
682 
683 static int
684 dtracemdb_ioctl(void *varg, int cmd, void *arg)
685 {
686 	dtracemdb_data_t *data = varg;
687 	dtrace_state_t *state = data->dtmd_state;
688 
689 	switch (cmd) {
690 	case DTRACEIOC_CONF: {
691 		dtrace_conf_t *conf = arg;
692 
693 		bzero(conf, sizeof (conf));
694 		conf->dtc_difversion = DIF_VERSION;
695 		conf->dtc_difintregs = DIF_DIR_NREGS;
696 		conf->dtc_diftupregs = DIF_DTR_NREGS;
697 		conf->dtc_ctfmodel = CTF_MODEL_NATIVE;
698 
699 		return (0);
700 	}
701 
702 	case DTRACEIOC_DOFGET: {
703 		dof_hdr_t *hdr = arg, *dof;
704 
705 		dof = dtracemdb_dof_create(state, data->dtmd_isanon);
706 		bcopy(dof, hdr, MIN(hdr->dofh_loadsz, dof->dofh_loadsz));
707 		mdb_free(dof, dof->dofh_loadsz);
708 
709 		return (0);
710 	}
711 
712 	case DTRACEIOC_BUFSNAP:
713 		return (dtracemdb_bufsnap(state->dts_buffer, arg));
714 
715 	case DTRACEIOC_AGGSNAP:
716 		return (dtracemdb_bufsnap(state->dts_aggbuffer, arg));
717 
718 	case DTRACEIOC_AGGDESC:
719 		return (dtracemdb_aggdesc(state, arg));
720 
721 	case DTRACEIOC_EPROBE:
722 		return (dtracemdb_eprobe(state, arg));
723 
724 	case DTRACEIOC_PROBES:
725 		return (dtracemdb_probe(state, arg));
726 
727 	case DTRACEIOC_FORMAT:
728 		return (dtracemdb_format(state, arg));
729 
730 	case DTRACEIOC_STATUS:
731 		return (dtracemdb_status(state, arg));
732 
733 	case DTRACEIOC_GO:
734 		*(processorid_t *)arg = -1;
735 		return (0);
736 
737 	case DTRACEIOC_ENABLE:
738 		errno = ENOTTY; /* see dt_open.c:dtrace_go() */
739 		return (-1);
740 
741 	case DTRACEIOC_PROVIDER:
742 	case DTRACEIOC_PROBEMATCH:
743 		errno = ESRCH;
744 		return (-1);
745 
746 	default:
747 		mdb_warn("unexpected ioctl 0x%x (%s)\n", cmd,
748 		    cmd == DTRACEIOC_PROVIDER	? "DTRACEIOC_PROVIDER" :
749 		    cmd == DTRACEIOC_PROBES	? "DTRACEIOC_PROBES" :
750 		    cmd == DTRACEIOC_BUFSNAP	? "DTRACEIOC_BUFSNAP" :
751 		    cmd == DTRACEIOC_PROBEMATCH	? "DTRACEIOC_PROBEMATCH" :
752 		    cmd == DTRACEIOC_ENABLE	? "DTRACEIOC_ENABLE" :
753 		    cmd == DTRACEIOC_AGGSNAP	? "DTRACEIOC_AGGSNAP" :
754 		    cmd == DTRACEIOC_EPROBE	? "DTRACEIOC_EPROBE" :
755 		    cmd == DTRACEIOC_PROBEARG	? "DTRACEIOC_PROBEARG" :
756 		    cmd == DTRACEIOC_CONF	? "DTRACEIOC_CONF" :
757 		    cmd == DTRACEIOC_STATUS	? "DTRACEIOC_STATUS" :
758 		    cmd == DTRACEIOC_GO		? "DTRACEIOC_GO" :
759 		    cmd == DTRACEIOC_STOP	? "DTRACEIOC_STOP" :
760 		    cmd == DTRACEIOC_AGGDESC	? "DTRACEIOC_AGGDESC" :
761 		    cmd == DTRACEIOC_FORMAT	? "DTRACEIOC_FORMAT" :
762 		    cmd == DTRACEIOC_DOFGET	? "DTRACEIOC_DOFGET" :
763 		    cmd == DTRACEIOC_REPLICATE	? "DTRACEIOC_REPLICATE" :
764 		    "???");
765 		errno = ENXIO;
766 		return (-1);
767 	}
768 }
769 
770 struct dtrace_ctf_module {
771 	char *text;
772 	size_t text_size;
773 };
774 
775 static int
776 dtracemdb_modctl(uintptr_t addr, const struct modctl *m, dtracemdb_data_t *data)
777 {
778 	struct dtrace_ctf_module mod;
779 
780 	if (m->mod_mp == NULL)
781 		return (WALK_NEXT);
782 
783 	if (mdb_ctf_vread(&mod, "struct module", "struct dtrace_ctf_module",
784 	    (uintptr_t)m->mod_mp, 0) == -1) {
785 		mdb_warn("couldn't read modctl %p's module", addr);
786 		return (WALK_NEXT);
787 	}
788 
789 	if ((uintptr_t)mod.text > data->dtmd_addr)
790 		return (WALK_NEXT);
791 
792 	if ((uintptr_t)mod.text + mod.text_size <= data->dtmd_addr)
793 		return (WALK_NEXT);
794 
795 	if (mdb_readstr(data->dtmd_modstr, MDB_SYM_NAMLEN,
796 	    (uintptr_t)m->mod_modname) == -1)
797 		return (WALK_ERR);
798 
799 	return (WALK_DONE);
800 }
801 
802 static int
803 dtracemdb_lookup_by_addr(void *varg, GElf_Addr addr, GElf_Sym *symp,
804     dtrace_syminfo_t *sip)
805 {
806 	dtracemdb_data_t *data = varg;
807 
808 	if (data->dtmd_symstr == NULL) {
809 		data->dtmd_symstr = mdb_zalloc(MDB_SYM_NAMLEN,
810 		    UM_SLEEP | UM_GC);
811 	}
812 
813 	if (data->dtmd_modstr == NULL) {
814 		data->dtmd_modstr = mdb_zalloc(MDB_SYM_NAMLEN,
815 		    UM_SLEEP | UM_GC);
816 	}
817 
818 	if (symp != NULL) {
819 		if (mdb_lookup_by_addr(addr, MDB_SYM_FUZZY, data->dtmd_symstr,
820 		    MDB_SYM_NAMLEN, symp) == -1)
821 			return (-1);
822 	}
823 
824 	if (sip != NULL) {
825 		data->dtmd_addr = addr;
826 
827 		(void) strcpy(data->dtmd_modstr, "???");
828 
829 		if (mdb_walk("modctl",
830 		    (mdb_walk_cb_t)dtracemdb_modctl, varg) == -1) {
831 			mdb_warn("couldn't walk 'modctl'");
832 			return (-1);
833 		}
834 
835 		sip->dts_object = data->dtmd_modstr;
836 		sip->dts_id = 0;
837 		sip->dts_name = symp != NULL ? data->dtmd_symstr : NULL;
838 	}
839 
840 	return (0);
841 }
842 
843 /*ARGSUSED*/
844 static int
845 dtracemdb_stat(void *varg, processorid_t cpu)
846 {
847 	GElf_Sym sym;
848 	cpu_t c;
849 	uintptr_t caddr, addr;
850 
851 	if (mdb_lookup_by_name("cpu", &sym) == -1) {
852 		mdb_warn("failed to find symbol for 'cpu'");
853 		return (-1);
854 	}
855 
856 	if (cpu * sizeof (uintptr_t) > sym.st_size)
857 		return (-1);
858 
859 	addr = (uintptr_t)sym.st_value + cpu * sizeof (uintptr_t);
860 
861 	if (mdb_vread(&caddr, sizeof (caddr), addr) == -1) {
862 		mdb_warn("failed to read cpu[%d]", cpu);
863 		return (-1);
864 	}
865 
866 	if (caddr == 0)
867 		return (-1);
868 
869 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
870 		mdb_warn("failed to read cpu at %p", caddr);
871 		return (-1);
872 	}
873 
874 	if (c.cpu_flags & CPU_POWEROFF) {
875 		return (P_POWEROFF);
876 	} else if (c.cpu_flags & CPU_SPARE) {
877 		return (P_SPARE);
878 	} else if (c.cpu_flags & CPU_FAULTED) {
879 		return (P_FAULTED);
880 	} else if (c.cpu_flags & CPU_DISABLED) {
881 		return (P_DISABLED);
882 	} else if ((c.cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY) {
883 		return (P_OFFLINE);
884 	} else if (c.cpu_flags & CPU_ENABLE) {
885 		return (P_ONLINE);
886 	} else {
887 		return (P_NOINTR);
888 	}
889 }
890 
891 /*ARGSUSED*/
892 static long
893 dtracemdb_sysconf(void *varg, int name)
894 {
895 	int max_ncpus;
896 	processorid_t max_cpuid;
897 
898 	switch (name) {
899 	case _SC_CPUID_MAX:
900 		if (mdb_readvar(&max_cpuid, "max_cpuid") == -1) {
901 			mdb_warn("failed to read 'max_cpuid'");
902 			return (-1);
903 		}
904 
905 		return (max_cpuid);
906 
907 	case _SC_NPROCESSORS_MAX:
908 		if (mdb_readvar(&max_ncpus, "max_ncpus") == -1) {
909 			mdb_warn("failed to read 'max_ncpus'");
910 			return (-1);
911 		}
912 
913 		return (max_ncpus);
914 
915 	default:
916 		mdb_warn("unexpected sysconf code %d\n", name);
917 		return (-1);
918 	}
919 }
920 
921 const dtrace_vector_t dtrace_mdbops = {
922 	dtracemdb_ioctl,
923 	dtracemdb_lookup_by_addr,
924 	dtracemdb_stat,
925 	dtracemdb_sysconf
926 };
927 
928 typedef struct dtrace_dcmddata {
929 	dtrace_hdl_t *dtdd_dtp;
930 	int dtdd_cpu;
931 	int dtdd_quiet;
932 	int dtdd_flowindent;
933 	int dtdd_heading;
934 	FILE *dtdd_output;
935 } dtrace_dcmddata_t;
936 
937 /*
938  * Helper to grab all the content from a file, spit it into a string, and erase
939  * and reset the file.
940  */
941 static void
942 print_and_truncate_file(FILE *fp)
943 {
944 	long len;
945 	char *out;
946 
947 	/* flush, find length of file, seek to beginning, initialize buffer */
948 	if (fflush(fp) || (len = ftell(fp)) < 0 ||
949 	    fseek(fp, 0, SEEK_SET) < 0) {
950 		mdb_warn("couldn't prepare DTrace output file: %d\n", errno);
951 		return;
952 	}
953 
954 	out = mdb_alloc(len + 1, UM_SLEEP);
955 	out[len] = '\0';
956 
957 	/* read file into buffer, truncate file, and seek to beginning */
958 	if ((fread(out, len + 1, sizeof (char), fp) == 0 && ferror(fp)) ||
959 	    ftruncate(fileno(fp), 0) < 0 || fseek(fp, 0, SEEK_SET) < 0) {
960 		mdb_warn("couldn't read DTrace output file: %d\n", errno);
961 		mdb_free(out, len + 1);
962 		return;
963 	}
964 
965 	mdb_printf("%s", out);
966 	mdb_free(out, len + 1);
967 }
968 
969 /*ARGSUSED*/
970 static int
971 dtrace_dcmdrec(const dtrace_probedata_t *data,
972     const dtrace_recdesc_t *rec, void *arg)
973 {
974 	dtrace_dcmddata_t *dd = arg;
975 
976 	print_and_truncate_file(dd->dtdd_output);
977 
978 	if (rec == NULL) {
979 		/*
980 		 * We have processed the final record; output the newline if
981 		 * we're not in quiet mode.
982 		 */
983 		if (!dd->dtdd_quiet)
984 			mdb_printf("\n");
985 
986 		return (DTRACE_CONSUME_NEXT);
987 	}
988 
989 	return (DTRACE_CONSUME_THIS);
990 }
991 
992 /*ARGSUSED*/
993 static int
994 dtrace_dcmdprobe(const dtrace_probedata_t *data, void *arg)
995 {
996 	dtrace_probedesc_t *pd = data->dtpda_pdesc;
997 	processorid_t cpu = data->dtpda_cpu;
998 	dtrace_dcmddata_t *dd = arg;
999 	char name[DTRACE_FUNCNAMELEN + DTRACE_NAMELEN + 2];
1000 
1001 	if (dd->dtdd_cpu != -1UL && dd->dtdd_cpu != cpu)
1002 		return (DTRACE_CONSUME_NEXT);
1003 
1004 	if (dd->dtdd_heading == 0) {
1005 		if (!dd->dtdd_flowindent) {
1006 			if (!dd->dtdd_quiet) {
1007 				mdb_printf("%3s %6s %32s\n",
1008 				    "CPU", "ID", "FUNCTION:NAME");
1009 			}
1010 		} else {
1011 			mdb_printf("%3s %-41s\n", "CPU", "FUNCTION");
1012 		}
1013 		dd->dtdd_heading = 1;
1014 	}
1015 
1016 	if (!dd->dtdd_flowindent) {
1017 		if (!dd->dtdd_quiet) {
1018 			(void) mdb_snprintf(name, sizeof (name), "%s:%s",
1019 			    pd->dtpd_func, pd->dtpd_name);
1020 
1021 			mdb_printf("%3d %6d %32s ", cpu, pd->dtpd_id, name);
1022 		}
1023 	} else {
1024 		int indent = data->dtpda_indent;
1025 
1026 		if (data->dtpda_flow == DTRACEFLOW_NONE) {
1027 			(void) mdb_snprintf(name, sizeof (name), "%*s%s%s:%s",
1028 			    indent, "", data->dtpda_prefix, pd->dtpd_func,
1029 			    pd->dtpd_name);
1030 		} else {
1031 			(void) mdb_snprintf(name, sizeof (name), "%*s%s%s",
1032 			    indent, "", data->dtpda_prefix, pd->dtpd_func);
1033 		}
1034 
1035 		mdb_printf("%3d %-41s ", cpu, name);
1036 	}
1037 
1038 	return (DTRACE_CONSUME_THIS);
1039 }
1040 
1041 /*ARGSUSED*/
1042 static int
1043 dtrace_dcmderr(const dtrace_errdata_t *data, void *arg)
1044 {
1045 	mdb_warn(data->dteda_msg);
1046 	return (DTRACE_HANDLE_OK);
1047 }
1048 
1049 /*ARGSUSED*/
1050 static int
1051 dtrace_dcmddrop(const dtrace_dropdata_t *data, void *arg)
1052 {
1053 	mdb_warn(data->dtdda_msg);
1054 	return (DTRACE_HANDLE_OK);
1055 }
1056 
1057 /*ARGSUSED*/
1058 static int
1059 dtrace_dcmdbuffered(const dtrace_bufdata_t *bufdata, void *arg)
1060 {
1061 	mdb_printf("%s", bufdata->dtbda_buffered);
1062 	return (DTRACE_HANDLE_OK);
1063 }
1064 
1065 /*ARGSUSED*/
1066 int
1067 dtrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1068 {
1069 	dtrace_state_t state;
1070 	dtrace_hdl_t *dtp;
1071 	int ncpu, err;
1072 	uintptr_t c = -1UL;
1073 	dtrace_dcmddata_t dd;
1074 	dtrace_optval_t val;
1075 	dtracemdb_data_t md;
1076 	int rval = DCMD_ERR;
1077 	dtrace_anon_t anon;
1078 
1079 	if (!(flags & DCMD_ADDRSPEC))
1080 		return (DCMD_USAGE);
1081 
1082 	if (mdb_getopts(argc, argv, 'c', MDB_OPT_UINTPTR, &c, NULL) != argc)
1083 		return (DCMD_USAGE);
1084 
1085 	if (mdb_readvar(&ncpu, "_ncpu") == -1) {
1086 		mdb_warn("failed to read '_ncpu'");
1087 		return (DCMD_ERR);
1088 	}
1089 
1090 	if (mdb_vread(&state, sizeof (state), addr) == -1) {
1091 		mdb_warn("couldn't read dtrace_state_t at %p", addr);
1092 		return (DCMD_ERR);
1093 	}
1094 
1095 	if (state.dts_anon != NULL) {
1096 		addr = (uintptr_t)state.dts_anon;
1097 
1098 		if (mdb_vread(&state, sizeof (state), addr) == -1) {
1099 			mdb_warn("couldn't read anonymous state at %p", addr);
1100 			return (DCMD_ERR);
1101 		}
1102 	}
1103 
1104 	bzero(&md, sizeof (md));
1105 	md.dtmd_state = &state;
1106 
1107 	if ((dtp = dtrace_vopen(DTRACE_VERSION, DTRACE_O_NOSYS, &err,
1108 	    &dtrace_mdbops, &md)) == NULL) {
1109 		mdb_warn("failed to initialize dtrace: %s\n",
1110 		    dtrace_errmsg(NULL, err));
1111 		return (DCMD_ERR);
1112 	}
1113 
1114 	/*
1115 	 * If this is the anonymous enabling, we need to set a bit indicating
1116 	 * that DTRACEOPT_GRABANON should be set.
1117 	 */
1118 	if (mdb_readvar(&anon, "dtrace_anon") == -1) {
1119 		mdb_warn("failed to read 'dtrace_anon'");
1120 		return (DCMD_ERR);
1121 	}
1122 
1123 	md.dtmd_isanon = ((uintptr_t)anon.dta_state == addr);
1124 
1125 	if (dtrace_go(dtp) != 0) {
1126 		mdb_warn("failed to initialize dtrace: %s\n",
1127 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1128 		goto err;
1129 	}
1130 
1131 	bzero(&dd, sizeof (dd));
1132 	dd.dtdd_dtp = dtp;
1133 	dd.dtdd_cpu = c;
1134 
1135 	if (dtrace_getopt(dtp, "flowindent", &val) == -1) {
1136 		mdb_warn("couldn't get 'flowindent' option: %s\n",
1137 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1138 		goto err;
1139 	}
1140 
1141 	dd.dtdd_flowindent = (val != DTRACEOPT_UNSET);
1142 
1143 	if (dtrace_getopt(dtp, "quiet", &val) == -1) {
1144 		mdb_warn("couldn't get 'quiet' option: %s\n",
1145 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1146 		goto err;
1147 	}
1148 
1149 	dd.dtdd_quiet = (val != DTRACEOPT_UNSET);
1150 
1151 	if (dtrace_handle_err(dtp, dtrace_dcmderr, NULL) == -1) {
1152 		mdb_warn("couldn't add err handler: %s\n",
1153 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1154 		goto err;
1155 	}
1156 
1157 	if (dtrace_handle_drop(dtp, dtrace_dcmddrop, NULL) == -1) {
1158 		mdb_warn("couldn't add drop handler: %s\n",
1159 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1160 		goto err;
1161 	}
1162 
1163 	if (dtrace_handle_buffered(dtp, dtrace_dcmdbuffered, NULL) == -1) {
1164 		mdb_warn("couldn't add buffered handler: %s\n",
1165 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1166 		goto err;
1167 	}
1168 
1169 	if (dtrace_status(dtp) == -1) {
1170 		mdb_warn("couldn't get status: %s\n",
1171 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1172 		goto err;
1173 	}
1174 
1175 	if (dtrace_aggregate_snap(dtp) == -1) {
1176 		mdb_warn("couldn't snapshot aggregation: %s\n",
1177 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1178 		goto err;
1179 	}
1180 
1181 	if ((dd.dtdd_output = tmpfile()) == NULL) {
1182 		mdb_warn("couldn't open DTrace output file: %d\n", errno);
1183 		goto err;
1184 	}
1185 
1186 	if (dtrace_consume(dtp, dd.dtdd_output,
1187 	    dtrace_dcmdprobe, dtrace_dcmdrec, &dd) == -1) {
1188 		mdb_warn("couldn't consume DTrace buffers: %s\n",
1189 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1190 	}
1191 
1192 	if (dtrace_aggregate_print(dtp, NULL, NULL) == -1) {
1193 		mdb_warn("couldn't print aggregation: %s\n",
1194 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1195 		goto err;
1196 	}
1197 
1198 	rval = DCMD_OK;
1199 err:
1200 	dtrace_close(dtp);
1201 	fclose(dd.dtdd_output);
1202 	return (rval);
1203 }
1204 
1205 static int
1206 dtrace_errhash_cmp(const void *l, const void *r)
1207 {
1208 	uintptr_t lhs = *((uintptr_t *)l);
1209 	uintptr_t rhs = *((uintptr_t *)r);
1210 	dtrace_errhash_t lerr, rerr;
1211 	char lmsg[256], rmsg[256];
1212 
1213 	(void) mdb_vread(&lerr, sizeof (lerr), lhs);
1214 	(void) mdb_vread(&rerr, sizeof (rerr), rhs);
1215 
1216 	if (lerr.dter_msg == NULL)
1217 		return (-1);
1218 
1219 	if (rerr.dter_msg == NULL)
1220 		return (1);
1221 
1222 	(void) mdb_readstr(lmsg, sizeof (lmsg), (uintptr_t)lerr.dter_msg);
1223 	(void) mdb_readstr(rmsg, sizeof (rmsg), (uintptr_t)rerr.dter_msg);
1224 
1225 	return (strcmp(lmsg, rmsg));
1226 }
1227 
1228 int
1229 dtrace_errhash_init(mdb_walk_state_t *wsp)
1230 {
1231 	GElf_Sym sym;
1232 	uintptr_t *hash, addr;
1233 	int i;
1234 
1235 	if (wsp->walk_addr != 0) {
1236 		mdb_warn("dtrace_errhash walk only supports global walks\n");
1237 		return (WALK_ERR);
1238 	}
1239 
1240 	if (mdb_lookup_by_name("dtrace_errhash", &sym) == -1) {
1241 		mdb_warn("couldn't find 'dtrace_errhash' (non-DEBUG kernel?)");
1242 		return (WALK_ERR);
1243 	}
1244 
1245 	addr = (uintptr_t)sym.st_value;
1246 	hash = mdb_alloc(DTRACE_ERRHASHSZ * sizeof (uintptr_t),
1247 	    UM_SLEEP | UM_GC);
1248 
1249 	for (i = 0; i < DTRACE_ERRHASHSZ; i++)
1250 		hash[i] = addr + i * sizeof (dtrace_errhash_t);
1251 
1252 	qsort(hash, DTRACE_ERRHASHSZ, sizeof (uintptr_t), dtrace_errhash_cmp);
1253 
1254 	wsp->walk_addr = 0;
1255 	wsp->walk_data = hash;
1256 
1257 	return (WALK_NEXT);
1258 }
1259 
1260 int
1261 dtrace_errhash_step(mdb_walk_state_t *wsp)
1262 {
1263 	int ndx = (int)wsp->walk_addr;
1264 	uintptr_t *hash = wsp->walk_data;
1265 	dtrace_errhash_t err;
1266 	uintptr_t addr;
1267 
1268 	if (ndx >= DTRACE_ERRHASHSZ)
1269 		return (WALK_DONE);
1270 
1271 	wsp->walk_addr = ndx + 1;
1272 	addr = hash[ndx];
1273 
1274 	if (mdb_vread(&err, sizeof (err), addr) == -1) {
1275 		mdb_warn("failed to read dtrace_errhash_t at %p", addr);
1276 		return (WALK_DONE);
1277 	}
1278 
1279 	if (err.dter_msg == NULL)
1280 		return (WALK_NEXT);
1281 
1282 	return (wsp->walk_callback(addr, &err, wsp->walk_cbdata));
1283 }
1284 
1285 /*ARGSUSED*/
1286 int
1287 dtrace_errhash(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1288 {
1289 	dtrace_errhash_t err;
1290 	char msg[256];
1291 
1292 	if (!(flags & DCMD_ADDRSPEC)) {
1293 		if (mdb_walk_dcmd("dtrace_errhash", "dtrace_errhash",
1294 		    argc, argv) == -1) {
1295 			mdb_warn("can't walk 'dtrace_errhash'");
1296 			return (DCMD_ERR);
1297 		}
1298 
1299 		return (DCMD_OK);
1300 	}
1301 
1302 	if (DCMD_HDRSPEC(flags))
1303 		mdb_printf("%8s %s\n", "COUNT", "ERROR");
1304 
1305 	if (mdb_vread(&err, sizeof (err), addr) == -1) {
1306 		mdb_warn("failed to read dtrace_errhash_t at %p", addr);
1307 		return (DCMD_ERR);
1308 	}
1309 
1310 	addr = (uintptr_t)err.dter_msg;
1311 
1312 	if (mdb_readstr(msg, sizeof (msg), addr) == -1) {
1313 		mdb_warn("failed to read error msg at %p", addr);
1314 		return (DCMD_ERR);
1315 	}
1316 
1317 	mdb_printf("%8d %s", err.dter_count, msg);
1318 
1319 	/*
1320 	 * Some error messages include a newline -- only print the newline
1321 	 * if the message doesn't have one.
1322 	 */
1323 	if (msg[strlen(msg) - 1] != '\n')
1324 		mdb_printf("\n");
1325 
1326 	return (DCMD_OK);
1327 }
1328 
1329 int
1330 dtrace_helptrace_init(mdb_walk_state_t *wsp)
1331 {
1332 	uint32_t next;
1333 	uintptr_t buffer;
1334 
1335 	if (wsp->walk_addr != 0) {
1336 		mdb_warn("dtrace_helptrace only supports global walks\n");
1337 		return (WALK_ERR);
1338 	}
1339 
1340 	if (mdb_readvar(&buffer, "dtrace_helptrace_buffer") == -1) {
1341 		mdb_warn("couldn't read 'dtrace_helptrace_buffer'");
1342 		return (WALK_ERR);
1343 	}
1344 
1345 	if (buffer == 0) {
1346 		mdb_warn("helper tracing is not enabled\n");
1347 		return (WALK_ERR);
1348 	}
1349 
1350 	if (mdb_readvar(&next, "dtrace_helptrace_next") == -1) {
1351 		mdb_warn("couldn't read 'dtrace_helptrace_next'");
1352 		return (WALK_ERR);
1353 	}
1354 
1355 	wsp->walk_addr = next;
1356 
1357 	return (WALK_NEXT);
1358 }
1359 
1360 int
1361 dtrace_helptrace_step(mdb_walk_state_t *wsp)
1362 {
1363 	uint32_t next, size, nlocals, bufsize;
1364 	uintptr_t buffer, addr;
1365 	dtrace_helptrace_t *ht;
1366 	int rval;
1367 
1368 	if (mdb_readvar(&next, "dtrace_helptrace_next") == -1) {
1369 		mdb_warn("couldn't read 'dtrace_helptrace_next'");
1370 		return (WALK_ERR);
1371 	}
1372 
1373 	if (mdb_readvar(&bufsize, "dtrace_helptrace_bufsize") == -1) {
1374 		mdb_warn("couldn't read 'dtrace_helptrace_bufsize'");
1375 		return (WALK_ERR);
1376 	}
1377 
1378 	if (mdb_readvar(&buffer, "dtrace_helptrace_buffer") == -1) {
1379 		mdb_warn("couldn't read 'dtrace_helptrace_buffer'");
1380 		return (WALK_ERR);
1381 	}
1382 
1383 	if (mdb_readvar(&nlocals, "dtrace_helptrace_nlocals") == -1) {
1384 		mdb_warn("couldn't read 'dtrace_helptrace_nlocals'");
1385 		return (WALK_ERR);
1386 	}
1387 
1388 	size = sizeof (dtrace_helptrace_t) +
1389 	    nlocals * sizeof (uint64_t) - sizeof (uint64_t);
1390 
1391 	if (wsp->walk_addr + size > bufsize) {
1392 		if (next == 0)
1393 			return (WALK_DONE);
1394 
1395 		wsp->walk_addr = 0;
1396 	}
1397 
1398 	addr = buffer + wsp->walk_addr;
1399 	ht = alloca(size);
1400 
1401 	if (mdb_vread(ht, size, addr) == -1) {
1402 		mdb_warn("couldn't read entry at %p", addr);
1403 		return (WALK_ERR);
1404 	}
1405 
1406 	if (ht->dtht_helper != NULL) {
1407 		rval = wsp->walk_callback(addr, ht, wsp->walk_cbdata);
1408 
1409 		if (rval != WALK_NEXT)
1410 			return (rval);
1411 	}
1412 
1413 	if (wsp->walk_addr < next && wsp->walk_addr + size >= next)
1414 		return (WALK_DONE);
1415 
1416 	wsp->walk_addr += size;
1417 	return (WALK_NEXT);
1418 }
1419 
1420 int
1421 dtrace_helptrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1422 {
1423 	dtrace_helptrace_t help;
1424 	dtrace_helper_action_t helper;
1425 	char where[30];
1426 	uint_t opt_v = FALSE;
1427 	uintptr_t haddr;
1428 
1429 	if (!(flags & DCMD_ADDRSPEC)) {
1430 		if (mdb_walk_dcmd("dtrace_helptrace", "dtrace_helptrace",
1431 		    argc, argv) == -1) {
1432 			mdb_warn("can't walk 'dtrace_helptrace'");
1433 			return (DCMD_ERR);
1434 		}
1435 
1436 		return (DCMD_OK);
1437 	}
1438 
1439 	if (mdb_getopts(argc, argv, 'v',
1440 	    MDB_OPT_SETBITS, TRUE, &opt_v, NULL) != argc)
1441 		return (DCMD_USAGE);
1442 
1443 	if (DCMD_HDRSPEC(flags)) {
1444 		mdb_printf(" %?s %?s %12s %s\n",
1445 		    "ADDR", "HELPER", "WHERE", "DIFO");
1446 	}
1447 
1448 	if (mdb_vread(&help, sizeof (help), addr) == -1) {
1449 		mdb_warn("failed to read dtrace_helptrace_t at %p", addr);
1450 		return (DCMD_ERR);
1451 	}
1452 
1453 	switch (help.dtht_where) {
1454 	case 0:
1455 		(void) mdb_snprintf(where, sizeof (where), "predicate");
1456 		break;
1457 
1458 	case DTRACE_HELPTRACE_NEXT:
1459 		(void) mdb_snprintf(where, sizeof (where), "next");
1460 		break;
1461 
1462 	case DTRACE_HELPTRACE_DONE:
1463 		(void) mdb_snprintf(where, sizeof (where), "done");
1464 		break;
1465 
1466 	case DTRACE_HELPTRACE_ERR:
1467 		(void) mdb_snprintf(where, sizeof (where), "err");
1468 		break;
1469 
1470 	default:
1471 		(void) mdb_snprintf(where, sizeof (where),
1472 		    "action #%d", help.dtht_where);
1473 		break;
1474 	}
1475 
1476 	mdb_printf(" %?p %?p %12s ", addr, help.dtht_helper, where);
1477 
1478 	haddr = (uintptr_t)help.dtht_helper;
1479 
1480 	if (mdb_vread(&helper, sizeof (helper), haddr) == -1) {
1481 		/*
1482 		 * We're not going to warn in this case -- we're just not going
1483 		 * to print anything exciting.
1484 		 */
1485 		mdb_printf("???\n");
1486 	} else {
1487 		switch (help.dtht_where) {
1488 		case 0:
1489 			mdb_printf("%p\n", helper.dtha_predicate);
1490 			break;
1491 
1492 		case DTRACE_HELPTRACE_NEXT:
1493 		case DTRACE_HELPTRACE_DONE:
1494 		case DTRACE_HELPTRACE_ERR:
1495 			mdb_printf("-\n");
1496 			break;
1497 
1498 		default:
1499 			haddr = (uintptr_t)helper.dtha_actions +
1500 			    (help.dtht_where - 1) * sizeof (uintptr_t);
1501 
1502 			if (mdb_vread(&haddr, sizeof (haddr), haddr) == -1) {
1503 				mdb_printf("???\n");
1504 			} else {
1505 				mdb_printf("%p\n", haddr);
1506 			}
1507 		}
1508 	}
1509 
1510 	if (opt_v) {
1511 		int i;
1512 
1513 		if (help.dtht_where == DTRACE_HELPTRACE_ERR) {
1514 			int f = help.dtht_fault;
1515 
1516 			mdb_printf("%?s| %?s %10s |\n", "", "", "");
1517 			mdb_printf("%?s| %?s %10s +->  fault: %s\n", "", "", "",
1518 			    f == DTRACEFLT_BADADDR ? "BADADDR" :
1519 			    f == DTRACEFLT_BADALIGN ? "BADALIGN" :
1520 			    f == DTRACEFLT_ILLOP ? "ILLOP" :
1521 			    f == DTRACEFLT_DIVZERO ? "DIVZERO" :
1522 			    f == DTRACEFLT_NOSCRATCH ? "NOSCRATCH" :
1523 			    f == DTRACEFLT_KPRIV ? "KPRIV" :
1524 			    f == DTRACEFLT_UPRIV ? "UPRIV" :
1525 			    f == DTRACEFLT_TUPOFLOW ? "TUPOFLOW" :
1526 			    f == DTRACEFLT_BADSTACK ? "BADSTACK" :
1527 			    "DTRACEFLT_UNKNOWN");
1528 			mdb_printf("%?s| %?s %12s     addr: 0x%x\n", "", "", "",
1529 			    help.dtht_illval);
1530 			mdb_printf("%?s| %?s %12s   offset: %d\n", "", "", "",
1531 			    help.dtht_fltoffs);
1532 		}
1533 
1534 		mdb_printf("%?s|\n%?s+--> %?s %4s %s\n", "", "",
1535 		    "ADDR", "NDX", "VALUE");
1536 		addr += sizeof (help) - sizeof (uint64_t);
1537 
1538 		for (i = 0; i < help.dtht_nlocals; i++) {
1539 			uint64_t val;
1540 
1541 			if (mdb_vread(&val, sizeof (val), addr) == -1) {
1542 				mdb_warn("couldn't read local at %p", addr);
1543 				continue;
1544 			}
1545 
1546 			mdb_printf("%?s     %?p %4d %p\n", "", addr, i, val);
1547 			addr += sizeof (uint64_t);
1548 		}
1549 
1550 		mdb_printf("\n");
1551 	}
1552 
1553 	return (DCMD_OK);
1554 }
1555 
1556 /*ARGSUSED*/
1557 static int
1558 dtrace_state_walk(uintptr_t addr, const vmem_seg_t *seg, minor_t *highest)
1559 {
1560 	if (seg->vs_end > *highest)
1561 		*highest = seg->vs_end;
1562 
1563 	return (WALK_NEXT);
1564 }
1565 
1566 typedef struct dtrace_state_walk {
1567 	uintptr_t dtsw_softstate;
1568 	minor_t dtsw_max;
1569 	minor_t dtsw_current;
1570 } dtrace_state_walk_t;
1571 
1572 int
1573 dtrace_state_init(mdb_walk_state_t *wsp)
1574 {
1575 	uintptr_t dtrace_minor;
1576 	minor_t max = 0;
1577 	dtrace_state_walk_t *dw;
1578 
1579 	if (wsp->walk_addr != 0) {
1580 		mdb_warn("dtrace_state only supports global walks\n");
1581 		return (WALK_ERR);
1582 	}
1583 
1584 	/*
1585 	 * Find the dtrace_minor vmem arena and walk it to get the maximum
1586 	 * minor number.
1587 	 */
1588 	if (mdb_readvar(&dtrace_minor, "dtrace_minor") == -1) {
1589 		mdb_warn("failed to read 'dtrace_minor'");
1590 		return (WALK_ERR);
1591 	}
1592 
1593 	if (mdb_pwalk("vmem_alloc", (mdb_walk_cb_t)dtrace_state_walk,
1594 	    &max, dtrace_minor) == -1) {
1595 		mdb_warn("couldn't walk 'vmem_alloc'");
1596 		return (WALK_ERR);
1597 	}
1598 
1599 	dw = mdb_zalloc(sizeof (dtrace_state_walk_t), UM_SLEEP | UM_GC);
1600 	dw->dtsw_current = 0;
1601 	dw->dtsw_max = max;
1602 
1603 	if (mdb_readvar(&dw->dtsw_softstate, "dtrace_softstate") == -1) {
1604 		mdb_warn("failed to read 'dtrace_softstate'");
1605 		return (DCMD_ERR);
1606 	}
1607 
1608 	wsp->walk_data = dw;
1609 
1610 	return (WALK_NEXT);
1611 }
1612 
1613 int
1614 dtrace_state_step(mdb_walk_state_t *wsp)
1615 {
1616 	dtrace_state_walk_t *dw = wsp->walk_data;
1617 	uintptr_t statep;
1618 	dtrace_state_t state;
1619 	int rval;
1620 
1621 	while (mdb_get_soft_state_byaddr(dw->dtsw_softstate, dw->dtsw_current,
1622 	    &statep, NULL, 0) == -1) {
1623 		if (dw->dtsw_current >= dw->dtsw_max)
1624 			return (WALK_DONE);
1625 
1626 		dw->dtsw_current++;
1627 	}
1628 
1629 	if (mdb_vread(&state, sizeof (state), statep) == -1) {
1630 		mdb_warn("couldn't read dtrace_state_t at %p", statep);
1631 		return (WALK_NEXT);
1632 	}
1633 
1634 	rval = wsp->walk_callback(statep, &state, wsp->walk_cbdata);
1635 	dw->dtsw_current++;
1636 
1637 	return (rval);
1638 }
1639 
1640 typedef struct dtrace_state_data {
1641 	int dtsd_major;
1642 	uintptr_t dtsd_proc;
1643 	uintptr_t dtsd_softstate;
1644 	uintptr_t dtsd_state;
1645 } dtrace_state_data_t;
1646 
1647 static int
1648 dtrace_state_file(uintptr_t addr, struct file *f, dtrace_state_data_t *data)
1649 {
1650 	vnode_t vnode;
1651 	proc_t proc;
1652 	minor_t minor;
1653 	uintptr_t statep;
1654 
1655 	if (mdb_vread(&vnode, sizeof (vnode), (uintptr_t)f->f_vnode) == -1) {
1656 		mdb_warn("couldn't read vnode at %p", (uintptr_t)f->f_vnode);
1657 		return (WALK_NEXT);
1658 	}
1659 
1660 	if (getmajor(vnode.v_rdev) != data->dtsd_major)
1661 		return (WALK_NEXT);
1662 
1663 	minor = getminor(vnode.v_rdev);
1664 
1665 	if (mdb_vread(&proc, sizeof (proc), data->dtsd_proc) == -1) {
1666 		mdb_warn("failed to read proc at %p", data->dtsd_proc);
1667 		return (WALK_NEXT);
1668 	}
1669 
1670 	if (mdb_get_soft_state_byaddr(data->dtsd_softstate, minor,
1671 	    &statep, NULL, 0) == -1) {
1672 		mdb_warn("failed to read softstate for minor %d", minor);
1673 		return (WALK_NEXT);
1674 	}
1675 
1676 	if (statep != data->dtsd_state)
1677 		return (WALK_NEXT);
1678 
1679 	mdb_printf("%?p %5d %?p %-*s %?p\n", statep, minor,
1680 	    data->dtsd_proc, MAXCOMLEN, proc.p_user.u_comm, addr);
1681 
1682 	return (WALK_NEXT);
1683 }
1684 
1685 /*ARGSUSED*/
1686 static int
1687 dtrace_state_proc(uintptr_t addr, void *ignored, dtrace_state_data_t *data)
1688 {
1689 	data->dtsd_proc = addr;
1690 
1691 	if (mdb_pwalk("file",
1692 	    (mdb_walk_cb_t)dtrace_state_file, data, addr) == -1) {
1693 		mdb_warn("couldn't walk 'file' for proc %p", addr);
1694 		return (WALK_ERR);
1695 	}
1696 
1697 	return (WALK_NEXT);
1698 }
1699 
1700 void
1701 dtrace_state_help(void)
1702 {
1703 	mdb_printf("Given a dtrace_state_t structure, displays all "
1704 	    /*CSTYLED*/
1705 	    "consumers, or \"<anonymous>\"\nif the consumer is anonymous.  If "
1706 	    "no state structure is provided, iterates\nover all state "
1707 	    "structures.\n\n"
1708 	    "Addresses in ADDR column may be provided to ::dtrace to obtain\n"
1709 	    "dtrace(8)-like output for in-kernel DTrace data.\n");
1710 }
1711 
1712 int
1713 dtrace_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1714 {
1715 	uintptr_t devi;
1716 	struct dev_info info;
1717 	dtrace_state_data_t data;
1718 	dtrace_anon_t anon;
1719 	dtrace_state_t state;
1720 
1721 	if (!(flags & DCMD_ADDRSPEC)) {
1722 		if (mdb_walk_dcmd("dtrace_state",
1723 		    "dtrace_state", argc, argv) == -1) {
1724 			mdb_warn("can't walk dtrace_state");
1725 			return (DCMD_ERR);
1726 		}
1727 		return (DCMD_OK);
1728 	}
1729 
1730 	if (DCMD_HDRSPEC(flags)) {
1731 		mdb_printf("%?s %5s %?s %-*s %?s\n", "ADDR", "MINOR", "PROC",
1732 		    MAXCOMLEN, "NAME", "FILE");
1733 	}
1734 
1735 	/*
1736 	 * First determine if this is anonymous state.
1737 	 */
1738 	if (mdb_readvar(&anon, "dtrace_anon") == -1) {
1739 		mdb_warn("failed to read 'dtrace_anon'");
1740 		return (DCMD_ERR);
1741 	}
1742 
1743 	if ((uintptr_t)anon.dta_state == addr) {
1744 		if (mdb_vread(&state, sizeof (state), addr) == -1) {
1745 			mdb_warn("failed to read anon at %p", addr);
1746 			return (DCMD_ERR);
1747 		}
1748 
1749 		mdb_printf("%?p %5d %?s %-*s %?s\n", addr,
1750 		    getminor(state.dts_dev), "-", MAXCOMLEN,
1751 		    "<anonymous>", "-");
1752 
1753 		return (DCMD_OK);
1754 	}
1755 
1756 	if (mdb_readvar(&devi, "dtrace_devi") == -1) {
1757 		mdb_warn("failed to read 'dtrace_devi'");
1758 		return (DCMD_ERR);
1759 	}
1760 
1761 	if (mdb_vread(&info, sizeof (struct dev_info), devi) == -1) {
1762 		mdb_warn("failed to read 'dev_info'");
1763 		return (DCMD_ERR);
1764 	}
1765 
1766 	data.dtsd_major = info.devi_major;
1767 
1768 	if (mdb_readvar(&data.dtsd_softstate, "dtrace_softstate") == -1) {
1769 		mdb_warn("failed to read 'dtrace_softstate'");
1770 		return (DCMD_ERR);
1771 	}
1772 
1773 	data.dtsd_state = addr;
1774 
1775 	/*
1776 	 * Walk through all processes and all open files looking for this
1777 	 * state.  It must be open somewhere...
1778 	 */
1779 	if (mdb_walk("proc", (mdb_walk_cb_t)dtrace_state_proc, &data) == -1) {
1780 		mdb_warn("couldn't walk 'proc'");
1781 		return (DCMD_ERR);
1782 	}
1783 
1784 	return (DCMD_OK);
1785 }
1786 
1787 typedef struct dtrace_aggkey_data {
1788 	uintptr_t *dtakd_hash;
1789 	uintptr_t dtakd_hashsize;
1790 	uintptr_t dtakd_next;
1791 	uintptr_t dtakd_ndx;
1792 } dtrace_aggkey_data_t;
1793 
1794 int
1795 dtrace_aggkey_init(mdb_walk_state_t *wsp)
1796 {
1797 	dtrace_buffer_t buf;
1798 	uintptr_t addr;
1799 	dtrace_aggbuffer_t agb;
1800 	dtrace_aggkey_data_t *data;
1801 	size_t hsize;
1802 
1803 	if ((addr = wsp->walk_addr) == 0) {
1804 		mdb_warn("dtrace_aggkey walk needs aggregation buffer\n");
1805 		return (WALK_ERR);
1806 	}
1807 
1808 	if (mdb_vread(&buf, sizeof (buf), addr) == -1) {
1809 		mdb_warn("failed to read aggregation buffer at %p", addr);
1810 		return (WALK_ERR);
1811 	}
1812 
1813 	addr = (uintptr_t)buf.dtb_tomax +
1814 	    buf.dtb_size - sizeof (dtrace_aggbuffer_t);
1815 
1816 	if (mdb_vread(&agb, sizeof (agb), addr) == -1) {
1817 		mdb_warn("failed to read dtrace_aggbuffer_t at %p", addr);
1818 		return (WALK_ERR);
1819 	}
1820 
1821 	data = mdb_zalloc(sizeof (dtrace_aggkey_data_t), UM_SLEEP);
1822 
1823 	data->dtakd_hashsize = agb.dtagb_hashsize;
1824 	hsize = agb.dtagb_hashsize * sizeof (dtrace_aggkey_t *);
1825 	data->dtakd_hash = mdb_alloc(hsize, UM_SLEEP);
1826 
1827 	if (mdb_vread(data->dtakd_hash, hsize,
1828 	    (uintptr_t)agb.dtagb_hash) == -1) {
1829 		mdb_warn("failed to read hash at %p",
1830 		    (uintptr_t)agb.dtagb_hash);
1831 		mdb_free(data->dtakd_hash, hsize);
1832 		mdb_free(data, sizeof (dtrace_aggkey_data_t));
1833 		return (WALK_ERR);
1834 	}
1835 
1836 	wsp->walk_data = data;
1837 	return (WALK_NEXT);
1838 }
1839 
1840 int
1841 dtrace_aggkey_step(mdb_walk_state_t *wsp)
1842 {
1843 	dtrace_aggkey_data_t *data = wsp->walk_data;
1844 	dtrace_aggkey_t key;
1845 	uintptr_t addr;
1846 
1847 	while ((addr = data->dtakd_next) == 0) {
1848 		if (data->dtakd_ndx == data->dtakd_hashsize)
1849 			return (WALK_DONE);
1850 
1851 		data->dtakd_next = data->dtakd_hash[data->dtakd_ndx++];
1852 	}
1853 
1854 	if (mdb_vread(&key, sizeof (key), addr) == -1) {
1855 		mdb_warn("failed to read dtrace_aggkey_t at %p", addr);
1856 		return (WALK_ERR);
1857 	}
1858 
1859 	data->dtakd_next = (uintptr_t)key.dtak_next;
1860 
1861 	return (wsp->walk_callback(addr, &key, wsp->walk_cbdata));
1862 }
1863 
1864 void
1865 dtrace_aggkey_fini(mdb_walk_state_t *wsp)
1866 {
1867 	dtrace_aggkey_data_t *data = wsp->walk_data;
1868 	size_t hsize;
1869 
1870 	hsize = data->dtakd_hashsize * sizeof (dtrace_aggkey_t *);
1871 	mdb_free(data->dtakd_hash, hsize);
1872 	mdb_free(data, sizeof (dtrace_aggkey_data_t));
1873 }
1874 
1875 typedef struct dtrace_dynvar_data {
1876 	dtrace_dynhash_t *dtdvd_hash;
1877 	uintptr_t dtdvd_hashsize;
1878 	uintptr_t dtdvd_next;
1879 	uintptr_t dtdvd_ndx;
1880 	uintptr_t dtdvd_sink;
1881 } dtrace_dynvar_data_t;
1882 
1883 int
1884 dtrace_dynvar_init(mdb_walk_state_t *wsp)
1885 {
1886 	uintptr_t addr;
1887 	dtrace_dstate_t dstate;
1888 	dtrace_dynvar_data_t *data;
1889 	size_t hsize;
1890 	GElf_Sym sym;
1891 
1892 	if ((addr = wsp->walk_addr) == 0) {
1893 		mdb_warn("dtrace_dynvar walk needs dtrace_dstate_t\n");
1894 		return (WALK_ERR);
1895 	}
1896 
1897 	if (mdb_vread(&dstate, sizeof (dstate), addr) == -1) {
1898 		mdb_warn("failed to read dynamic state at %p", addr);
1899 		return (WALK_ERR);
1900 	}
1901 
1902 	if (mdb_lookup_by_name("dtrace_dynhash_sink", &sym) == -1) {
1903 		mdb_warn("couldn't find 'dtrace_dynhash_sink'");
1904 		return (WALK_ERR);
1905 	}
1906 
1907 	data = mdb_zalloc(sizeof (dtrace_dynvar_data_t), UM_SLEEP);
1908 
1909 	data->dtdvd_hashsize = dstate.dtds_hashsize;
1910 	hsize = dstate.dtds_hashsize * sizeof (dtrace_dynhash_t);
1911 	data->dtdvd_hash = mdb_alloc(hsize, UM_SLEEP);
1912 	data->dtdvd_sink = (uintptr_t)sym.st_value;
1913 
1914 	if (mdb_vread(data->dtdvd_hash, hsize,
1915 	    (uintptr_t)dstate.dtds_hash) == -1) {
1916 		mdb_warn("failed to read hash at %p",
1917 		    (uintptr_t)dstate.dtds_hash);
1918 		mdb_free(data->dtdvd_hash, hsize);
1919 		mdb_free(data, sizeof (dtrace_dynvar_data_t));
1920 		return (WALK_ERR);
1921 	}
1922 
1923 	data->dtdvd_next = (uintptr_t)data->dtdvd_hash[0].dtdh_chain;
1924 
1925 	wsp->walk_data = data;
1926 	return (WALK_NEXT);
1927 }
1928 
1929 int
1930 dtrace_dynvar_step(mdb_walk_state_t *wsp)
1931 {
1932 	dtrace_dynvar_data_t *data = wsp->walk_data;
1933 	dtrace_dynvar_t dynvar, *dvar;
1934 	size_t dvarsize;
1935 	uintptr_t addr;
1936 	int nkeys;
1937 
1938 	while ((addr = data->dtdvd_next) == data->dtdvd_sink) {
1939 		if (data->dtdvd_ndx == data->dtdvd_hashsize)
1940 			return (WALK_DONE);
1941 
1942 		data->dtdvd_next =
1943 		    (uintptr_t)data->dtdvd_hash[data->dtdvd_ndx++].dtdh_chain;
1944 	}
1945 
1946 	if (mdb_vread(&dynvar, sizeof (dynvar), addr) == -1) {
1947 		mdb_warn("failed to read dtrace_dynvar_t at %p", addr);
1948 		return (WALK_ERR);
1949 	}
1950 
1951 	/*
1952 	 * Now we need to allocate the correct size.
1953 	 */
1954 	nkeys = dynvar.dtdv_tuple.dtt_nkeys;
1955 	dvarsize = (uintptr_t)&dynvar.dtdv_tuple.dtt_key[nkeys] -
1956 	    (uintptr_t)&dynvar;
1957 
1958 	dvar = alloca(dvarsize);
1959 
1960 	if (mdb_vread(dvar, dvarsize, addr) == -1) {
1961 		mdb_warn("failed to read dtrace_dynvar_t at %p", addr);
1962 		return (WALK_ERR);
1963 	}
1964 
1965 	data->dtdvd_next = (uintptr_t)dynvar.dtdv_next;
1966 
1967 	return (wsp->walk_callback(addr, dvar, wsp->walk_cbdata));
1968 }
1969 
1970 void
1971 dtrace_dynvar_fini(mdb_walk_state_t *wsp)
1972 {
1973 	dtrace_dynvar_data_t *data = wsp->walk_data;
1974 	size_t hsize;
1975 
1976 	hsize = data->dtdvd_hashsize * sizeof (dtrace_dynvar_t *);
1977 	mdb_free(data->dtdvd_hash, hsize);
1978 	mdb_free(data, sizeof (dtrace_dynvar_data_t));
1979 }
1980 
1981 typedef struct dtrace_hashstat_data {
1982 	size_t *dthsd_counts;
1983 	size_t dthsd_hashsize;
1984 	char *dthsd_data;
1985 	size_t dthsd_size;
1986 	int dthsd_header;
1987 } dtrace_hashstat_data_t;
1988 
1989 typedef void (*dtrace_hashstat_func_t)(dtrace_hashstat_data_t *);
1990 
1991 static void
1992 dtrace_hashstat_additive(dtrace_hashstat_data_t *data)
1993 {
1994 	int i;
1995 	int hval = 0;
1996 
1997 	for (i = 0; i < data->dthsd_size; i++)
1998 		hval += data->dthsd_data[i];
1999 
2000 	data->dthsd_counts[hval % data->dthsd_hashsize]++;
2001 }
2002 
2003 static void
2004 dtrace_hashstat_shifty(dtrace_hashstat_data_t *data)
2005 {
2006 	uint64_t hval = 0;
2007 	int i;
2008 
2009 	if (data->dthsd_size < sizeof (uint64_t)) {
2010 		dtrace_hashstat_additive(data);
2011 		return;
2012 	}
2013 
2014 	for (i = 0; i < data->dthsd_size; i += sizeof (uint64_t)) {
2015 		/* LINTED - alignment */
2016 		uint64_t val = *((uint64_t *)&data->dthsd_data[i]);
2017 
2018 		hval += (val & ((1 << NBBY) - 1)) +
2019 		    ((val >> NBBY) & ((1 << NBBY) - 1)) +
2020 		    ((val >> (NBBY << 1)) & ((1 << NBBY) - 1)) +
2021 		    ((val >> (NBBY << 2)) & ((1 << NBBY) - 1)) +
2022 		    (val & USHRT_MAX) + (val >> (NBBY << 1) & USHRT_MAX);
2023 	}
2024 
2025 	data->dthsd_counts[hval % data->dthsd_hashsize]++;
2026 }
2027 
2028 static void
2029 dtrace_hashstat_knuth(dtrace_hashstat_data_t *data)
2030 {
2031 	int i;
2032 	int hval = data->dthsd_size;
2033 
2034 	for (i = 0; i < data->dthsd_size; i++)
2035 		hval = (hval << 4) ^ (hval >> 28) ^ data->dthsd_data[i];
2036 
2037 	data->dthsd_counts[hval % data->dthsd_hashsize]++;
2038 }
2039 
2040 static void
2041 dtrace_hashstat_oneatatime(dtrace_hashstat_data_t *data)
2042 {
2043 	int i;
2044 	uint32_t hval = 0;
2045 
2046 	for (i = 0; i < data->dthsd_size; i++) {
2047 		hval += data->dthsd_data[i];
2048 		hval += (hval << 10);
2049 		hval ^= (hval >> 6);
2050 	}
2051 
2052 	hval += (hval << 3);
2053 	hval ^= (hval >> 11);
2054 	hval += (hval << 15);
2055 
2056 	data->dthsd_counts[hval % data->dthsd_hashsize]++;
2057 }
2058 
2059 static void
2060 dtrace_hashstat_fnv(dtrace_hashstat_data_t *data)
2061 {
2062 	static const uint32_t prime = 0x01000193;
2063 	uint32_t hval = 0;
2064 	int i;
2065 
2066 	for (i = 0; i < data->dthsd_size; i++) {
2067 		hval *= prime;
2068 		hval ^= data->dthsd_data[i];
2069 	}
2070 
2071 	data->dthsd_counts[hval % data->dthsd_hashsize]++;
2072 }
2073 
2074 static void
2075 dtrace_hashstat_stats(char *name, dtrace_hashstat_data_t *data)
2076 {
2077 	size_t nz = 0, i;
2078 	int longest = 0;
2079 	size_t ttl = 0;
2080 	double sum = 0.0;
2081 	double avg;
2082 	uint_t util, stddev;
2083 
2084 	if (!data->dthsd_header) {
2085 		mdb_printf("%15s %11s %11s %11s %11s %11s\n", "NAME",
2086 		    "HASHSIZE", "%UTIL", "LONGEST", "AVERAGE", "STDDEV");
2087 		data->dthsd_header = 1;
2088 	}
2089 
2090 	for (i = 0; i < data->dthsd_hashsize; i++) {
2091 		if (data->dthsd_counts[i] != 0) {
2092 			nz++;
2093 
2094 			if (data->dthsd_counts[i] > longest)
2095 				longest = data->dthsd_counts[i];
2096 
2097 			ttl += data->dthsd_counts[i];
2098 		}
2099 	}
2100 
2101 	if (nz == 0) {
2102 		mdb_printf("%15s %11d %11s %11s %11s %11s\n", name,
2103 		    data->dthsd_hashsize, "-", "-", "-", "-");
2104 		return;
2105 	}
2106 
2107 	avg = (double)ttl / (double)nz;
2108 
2109 	for (i = 0; i < data->dthsd_hashsize; i++) {
2110 		double delta = (double)data->dthsd_counts[i] - avg;
2111 
2112 		if (data->dthsd_counts[i] == 0)
2113 			continue;
2114 
2115 		sum += delta * delta;
2116 	}
2117 
2118 	util = (nz * 1000) / data->dthsd_hashsize;
2119 	stddev = (uint_t)sqrt(sum / (double)nz) * 10;
2120 
2121 	mdb_printf("%15s %11d %9u.%1u %11d %11d %9u.%1u\n", name,
2122 	    data->dthsd_hashsize, util / 10, util % 10, longest, ttl / nz,
2123 	    stddev / 10, stddev % 10);
2124 }
2125 
2126 static struct dtrace_hashstat {
2127 	char *dths_name;
2128 	dtrace_hashstat_func_t dths_func;
2129 } _dtrace_hashstat[] = {
2130 	{ "<actual>", NULL },
2131 	{ "additive", dtrace_hashstat_additive },
2132 	{ "shifty", dtrace_hashstat_shifty },
2133 	{ "knuth", dtrace_hashstat_knuth },
2134 	{ "one-at-a-time", dtrace_hashstat_oneatatime },
2135 	{ "fnv", dtrace_hashstat_fnv },
2136 	{ NULL, 0 }
2137 };
2138 
2139 typedef struct dtrace_aggstat_data {
2140 	dtrace_hashstat_data_t dtagsd_hash;
2141 	dtrace_hashstat_func_t dtagsd_func;
2142 } dtrace_aggstat_data_t;
2143 
2144 static int
2145 dtrace_aggstat_walk(uintptr_t addr, dtrace_aggkey_t *key,
2146     dtrace_aggstat_data_t *data)
2147 {
2148 	dtrace_hashstat_data_t *hdata = &data->dtagsd_hash;
2149 	size_t size;
2150 
2151 	if (data->dtagsd_func == NULL) {
2152 		size_t bucket = key->dtak_hashval % hdata->dthsd_hashsize;
2153 
2154 		hdata->dthsd_counts[bucket]++;
2155 		return (WALK_NEXT);
2156 	}
2157 
2158 	/*
2159 	 * We need to read the data.
2160 	 */
2161 	size = key->dtak_size - sizeof (dtrace_aggid_t);
2162 	addr = (uintptr_t)key->dtak_data + sizeof (dtrace_aggid_t);
2163 	hdata->dthsd_data = alloca(size);
2164 	hdata->dthsd_size = size;
2165 
2166 	if (mdb_vread(hdata->dthsd_data, size, addr) == -1) {
2167 		mdb_warn("couldn't read data at %p", addr);
2168 		return (WALK_ERR);
2169 	}
2170 
2171 	data->dtagsd_func(hdata);
2172 
2173 	return (WALK_NEXT);
2174 }
2175 
2176 /*ARGSUSED*/
2177 int
2178 dtrace_aggstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2179 {
2180 	dtrace_buffer_t buf;
2181 	uintptr_t aaddr;
2182 	dtrace_aggbuffer_t agb;
2183 	size_t hsize, i, actual, prime, evenpow;
2184 	dtrace_aggstat_data_t data;
2185 	dtrace_hashstat_data_t *hdata = &data.dtagsd_hash;
2186 
2187 	bzero(&data, sizeof (data));
2188 
2189 	if (!(flags & DCMD_ADDRSPEC))
2190 		return (DCMD_USAGE);
2191 
2192 	if (mdb_vread(&buf, sizeof (buf), addr) == -1) {
2193 		mdb_warn("failed to read aggregation buffer at %p", addr);
2194 		return (DCMD_ERR);
2195 	}
2196 
2197 	aaddr = (uintptr_t)buf.dtb_tomax +
2198 	    buf.dtb_size - sizeof (dtrace_aggbuffer_t);
2199 
2200 	if (mdb_vread(&agb, sizeof (agb), aaddr) == -1) {
2201 		mdb_warn("failed to read dtrace_aggbuffer_t at %p", aaddr);
2202 		return (DCMD_ERR);
2203 	}
2204 
2205 	hsize = (actual = agb.dtagb_hashsize) * sizeof (size_t);
2206 	hdata->dthsd_counts = mdb_alloc(hsize, UM_SLEEP | UM_GC);
2207 
2208 	/*
2209 	 * Now pick the largest prime smaller than the hash size.  (If the
2210 	 * existing size is prime, we'll pick a smaller prime just for the
2211 	 * hell of it.)
2212 	 */
2213 	for (prime = agb.dtagb_hashsize - 1; prime > 7; prime--) {
2214 		size_t limit = prime / 7;
2215 
2216 		for (i = 2; i < limit; i++) {
2217 			if ((prime % i) == 0)
2218 				break;
2219 		}
2220 
2221 		if (i == limit)
2222 			break;
2223 	}
2224 
2225 	/*
2226 	 * And now we want to pick the largest power of two smaller than the
2227 	 * hashsize.
2228 	 */
2229 	for (i = 0; (1 << i) < agb.dtagb_hashsize; i++)
2230 		continue;
2231 
2232 	evenpow = (1 << (i - 1));
2233 
2234 	for (i = 0; _dtrace_hashstat[i].dths_name != NULL; i++) {
2235 		data.dtagsd_func = _dtrace_hashstat[i].dths_func;
2236 
2237 		hdata->dthsd_hashsize = actual;
2238 		hsize = hdata->dthsd_hashsize * sizeof (size_t);
2239 		bzero(hdata->dthsd_counts, hsize);
2240 
2241 		if (mdb_pwalk("dtrace_aggkey",
2242 		    (mdb_walk_cb_t)dtrace_aggstat_walk, &data, addr) == -1) {
2243 			mdb_warn("failed to walk dtrace_aggkey at %p", addr);
2244 			return (DCMD_ERR);
2245 		}
2246 
2247 		dtrace_hashstat_stats(_dtrace_hashstat[i].dths_name, hdata);
2248 
2249 		/*
2250 		 * If we were just printing the actual value, we won't try
2251 		 * any of the sizing experiments.
2252 		 */
2253 		if (data.dtagsd_func == NULL)
2254 			continue;
2255 
2256 		hdata->dthsd_hashsize = prime;
2257 		hsize = hdata->dthsd_hashsize * sizeof (size_t);
2258 		bzero(hdata->dthsd_counts, hsize);
2259 
2260 		if (mdb_pwalk("dtrace_aggkey",
2261 		    (mdb_walk_cb_t)dtrace_aggstat_walk, &data, addr) == -1) {
2262 			mdb_warn("failed to walk dtrace_aggkey at %p", addr);
2263 			return (DCMD_ERR);
2264 		}
2265 
2266 		dtrace_hashstat_stats(_dtrace_hashstat[i].dths_name, hdata);
2267 
2268 		hdata->dthsd_hashsize = evenpow;
2269 		hsize = hdata->dthsd_hashsize * sizeof (size_t);
2270 		bzero(hdata->dthsd_counts, hsize);
2271 
2272 		if (mdb_pwalk("dtrace_aggkey",
2273 		    (mdb_walk_cb_t)dtrace_aggstat_walk, &data, addr) == -1) {
2274 			mdb_warn("failed to walk dtrace_aggkey at %p", addr);
2275 			return (DCMD_ERR);
2276 		}
2277 
2278 		dtrace_hashstat_stats(_dtrace_hashstat[i].dths_name, hdata);
2279 	}
2280 
2281 	return (DCMD_OK);
2282 }
2283 
2284 /*ARGSUSED*/
2285 static int
2286 dtrace_dynstat_walk(uintptr_t addr, dtrace_dynvar_t *dynvar,
2287     dtrace_aggstat_data_t *data)
2288 {
2289 	dtrace_hashstat_data_t *hdata = &data->dtagsd_hash;
2290 	dtrace_tuple_t *tuple = &dynvar->dtdv_tuple;
2291 	dtrace_key_t *key = tuple->dtt_key;
2292 	size_t size = 0, offs = 0;
2293 	int i, nkeys = tuple->dtt_nkeys;
2294 	char *buf;
2295 
2296 	if (data->dtagsd_func == NULL) {
2297 		size_t bucket = dynvar->dtdv_hashval % hdata->dthsd_hashsize;
2298 
2299 		hdata->dthsd_counts[bucket]++;
2300 		return (WALK_NEXT);
2301 	}
2302 
2303 	/*
2304 	 * We want to hand the hashing algorithm a contiguous buffer.  First
2305 	 * run through the tuple and determine the size.
2306 	 */
2307 	for (i = 0; i < nkeys; i++) {
2308 		if (key[i].dttk_size == 0) {
2309 			size += sizeof (uint64_t);
2310 		} else {
2311 			size += key[i].dttk_size;
2312 		}
2313 	}
2314 
2315 	buf = alloca(size);
2316 
2317 	/*
2318 	 * Now go back through the tuple and copy the data into the buffer.
2319 	 */
2320 	for (i = 0; i < nkeys; i++) {
2321 		if (key[i].dttk_size == 0) {
2322 			bcopy(&key[i].dttk_value, &buf[offs],
2323 			    sizeof (uint64_t));
2324 			offs += sizeof (uint64_t);
2325 		} else {
2326 			if (mdb_vread(&buf[offs], key[i].dttk_size,
2327 			    key[i].dttk_value) == -1) {
2328 				mdb_warn("couldn't read tuple data at %p",
2329 				    key[i].dttk_value);
2330 				return (WALK_ERR);
2331 			}
2332 
2333 			offs += key[i].dttk_size;
2334 		}
2335 	}
2336 
2337 	hdata->dthsd_data = buf;
2338 	hdata->dthsd_size = size;
2339 
2340 	data->dtagsd_func(hdata);
2341 
2342 	return (WALK_NEXT);
2343 }
2344 
2345 /*ARGSUSED*/
2346 int
2347 dtrace_dynstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2348 {
2349 	dtrace_dstate_t dstate;
2350 	size_t hsize, i, actual, prime;
2351 	dtrace_aggstat_data_t data;
2352 	dtrace_hashstat_data_t *hdata = &data.dtagsd_hash;
2353 
2354 	bzero(&data, sizeof (data));
2355 
2356 	if (!(flags & DCMD_ADDRSPEC))
2357 		return (DCMD_USAGE);
2358 
2359 	if (mdb_vread(&dstate, sizeof (dstate), addr) == -1) {
2360 		mdb_warn("failed to read dynamic variable state at %p", addr);
2361 		return (DCMD_ERR);
2362 	}
2363 
2364 	hsize = (actual = dstate.dtds_hashsize) * sizeof (size_t);
2365 	hdata->dthsd_counts = mdb_alloc(hsize, UM_SLEEP | UM_GC);
2366 
2367 	/*
2368 	 * Now pick the largest prime smaller than the hash size.  (If the
2369 	 * existing size is prime, we'll pick a smaller prime just for the
2370 	 * hell of it.)
2371 	 */
2372 	for (prime = dstate.dtds_hashsize - 1; prime > 7; prime--) {
2373 		size_t limit = prime / 7;
2374 
2375 		for (i = 2; i < limit; i++) {
2376 			if ((prime % i) == 0)
2377 				break;
2378 		}
2379 
2380 		if (i == limit)
2381 			break;
2382 	}
2383 
2384 	for (i = 0; _dtrace_hashstat[i].dths_name != NULL; i++) {
2385 		data.dtagsd_func = _dtrace_hashstat[i].dths_func;
2386 
2387 		hdata->dthsd_hashsize = actual;
2388 		hsize = hdata->dthsd_hashsize * sizeof (size_t);
2389 		bzero(hdata->dthsd_counts, hsize);
2390 
2391 		if (mdb_pwalk("dtrace_dynvar",
2392 		    (mdb_walk_cb_t)dtrace_dynstat_walk, &data, addr) == -1) {
2393 			mdb_warn("failed to walk dtrace_dynvar at %p", addr);
2394 			return (DCMD_ERR);
2395 		}
2396 
2397 		dtrace_hashstat_stats(_dtrace_hashstat[i].dths_name, hdata);
2398 
2399 		/*
2400 		 * If we were just printing the actual value, we won't try
2401 		 * any of the sizing experiments.
2402 		 */
2403 		if (data.dtagsd_func == NULL)
2404 			continue;
2405 
2406 		hdata->dthsd_hashsize = prime;
2407 		hsize = hdata->dthsd_hashsize * sizeof (size_t);
2408 		bzero(hdata->dthsd_counts, hsize);
2409 
2410 		if (mdb_pwalk("dtrace_dynvar",
2411 		    (mdb_walk_cb_t)dtrace_dynstat_walk, &data, addr) == -1) {
2412 			mdb_warn("failed to walk dtrace_aggkey at %p", addr);
2413 			return (DCMD_ERR);
2414 		}
2415 
2416 		dtrace_hashstat_stats(_dtrace_hashstat[i].dths_name, hdata);
2417 	}
2418 
2419 	return (DCMD_OK);
2420 }
2421 
2422 typedef struct dtrace_ecb_walk {
2423 	dtrace_ecb_t **dtew_ecbs;
2424 	int dtew_necbs;
2425 	int dtew_curecb;
2426 } dtrace_ecb_walk_t;
2427 
2428 static int
2429 dtrace_ecb_init(mdb_walk_state_t *wsp)
2430 {
2431 	uintptr_t addr;
2432 	dtrace_state_t state;
2433 	dtrace_ecb_walk_t *ecbwp;
2434 
2435 	if ((addr = wsp->walk_addr) == 0) {
2436 		mdb_warn("dtrace_ecb walk needs dtrace_state_t\n");
2437 		return (WALK_ERR);
2438 	}
2439 
2440 	if (mdb_vread(&state, sizeof (state), addr) == -1) {
2441 		mdb_warn("failed to read dtrace state pointer at %p", addr);
2442 		return (WALK_ERR);
2443 	}
2444 
2445 	ecbwp = mdb_zalloc(sizeof (dtrace_ecb_walk_t), UM_SLEEP | UM_GC);
2446 
2447 	ecbwp->dtew_ecbs = state.dts_ecbs;
2448 	ecbwp->dtew_necbs = state.dts_necbs;
2449 	ecbwp->dtew_curecb = 0;
2450 
2451 	wsp->walk_data = ecbwp;
2452 
2453 	return (WALK_NEXT);
2454 }
2455 
2456 static int
2457 dtrace_ecb_step(mdb_walk_state_t *wsp)
2458 {
2459 	uintptr_t ecbp, addr;
2460 	dtrace_ecb_walk_t *ecbwp = wsp->walk_data;
2461 
2462 	addr = (uintptr_t)ecbwp->dtew_ecbs +
2463 	    ecbwp->dtew_curecb * sizeof (dtrace_ecb_t *);
2464 
2465 	if (ecbwp->dtew_curecb++ == ecbwp->dtew_necbs)
2466 		return (WALK_DONE);
2467 
2468 	if (mdb_vread(&ecbp, sizeof (addr), addr) == -1) {
2469 		mdb_warn("failed to read ecb at entry %d\n",
2470 		    ecbwp->dtew_curecb);
2471 		return (WALK_ERR);
2472 	}
2473 
2474 	if (ecbp == 0)
2475 		return (WALK_NEXT);
2476 
2477 	return (wsp->walk_callback(ecbp, NULL, wsp->walk_cbdata));
2478 }
2479 
2480 static void
2481 dtrace_options_numtostr(uint64_t num, char *buf, size_t len)
2482 {
2483 	uint64_t n = num;
2484 	int index = 0;
2485 	char u;
2486 
2487 	while (n >= 1024) {
2488 		n = (n + (1024 / 2)) / 1024; /* Round up or down */
2489 		index++;
2490 	}
2491 
2492 	u = " KMGTPE"[index];
2493 
2494 	if (index == 0) {
2495 		(void) mdb_snprintf(buf, len, "%llu", (u_longlong_t)n);
2496 	} else if (n < 10 && (num & (num - 1)) != 0) {
2497 		(void) mdb_snprintf(buf, len, "%.2f%c",
2498 		    (double)num / (1ULL << 10 * index), u);
2499 	} else if (n < 100 && (num & (num - 1)) != 0) {
2500 		(void) mdb_snprintf(buf, len, "%.1f%c",
2501 		    (double)num / (1ULL << 10 * index), u);
2502 	} else {
2503 		(void) mdb_snprintf(buf, len, "%llu%c", (u_longlong_t)n, u);
2504 	}
2505 }
2506 
2507 static void
2508 dtrace_options_numtohz(uint64_t num, char *buf, size_t len)
2509 {
2510 	(void) mdb_snprintf(buf, len, "%dhz", NANOSEC/num);
2511 }
2512 
2513 static void
2514 dtrace_options_numtobufpolicy(uint64_t num, char *buf, size_t len)
2515 {
2516 	char *policy = "unknown";
2517 
2518 	switch (num) {
2519 		case DTRACEOPT_BUFPOLICY_RING:
2520 			policy = "ring";
2521 			break;
2522 
2523 		case DTRACEOPT_BUFPOLICY_FILL:
2524 			policy = "fill";
2525 			break;
2526 
2527 		case DTRACEOPT_BUFPOLICY_SWITCH:
2528 			policy = "switch";
2529 			break;
2530 	}
2531 
2532 	(void) mdb_snprintf(buf, len, "%s", policy);
2533 }
2534 
2535 static void
2536 dtrace_options_numtocpu(uint64_t cpu, char *buf, size_t len)
2537 {
2538 	if (cpu == DTRACE_CPUALL)
2539 		(void) mdb_snprintf(buf, len, "%7s", "unbound");
2540 	else
2541 		(void) mdb_snprintf(buf, len, "%d", cpu);
2542 }
2543 
2544 typedef void (*dtrace_options_func_t)(uint64_t, char *, size_t);
2545 
2546 static struct dtrace_options {
2547 	char *dtop_optstr;
2548 	dtrace_options_func_t dtop_func;
2549 } _dtrace_options[] = {
2550 	{ "bufsize", dtrace_options_numtostr },
2551 	{ "bufpolicy", dtrace_options_numtobufpolicy },
2552 	{ "dynvarsize", dtrace_options_numtostr },
2553 	{ "aggsize", dtrace_options_numtostr },
2554 	{ "specsize", dtrace_options_numtostr },
2555 	{ "nspec", dtrace_options_numtostr },
2556 	{ "strsize", dtrace_options_numtostr },
2557 	{ "cleanrate", dtrace_options_numtohz },
2558 	{ "cpu", dtrace_options_numtocpu },
2559 	{ "bufresize", dtrace_options_numtostr },
2560 	{ "grabanon", dtrace_options_numtostr },
2561 	{ "flowindent", dtrace_options_numtostr },
2562 	{ "quiet", dtrace_options_numtostr },
2563 	{ "stackframes", dtrace_options_numtostr },
2564 	{ "ustackframes", dtrace_options_numtostr },
2565 	{ "aggrate", dtrace_options_numtohz },
2566 	{ "switchrate", dtrace_options_numtohz },
2567 	{ "statusrate", dtrace_options_numtohz },
2568 	{ "destructive", dtrace_options_numtostr },
2569 	{ "stackindent", dtrace_options_numtostr },
2570 	{ "rawbytes", dtrace_options_numtostr },
2571 	{ "jstackframes", dtrace_options_numtostr },
2572 	{ "jstackstrsize", dtrace_options_numtostr },
2573 	{ "aggsortkey", dtrace_options_numtostr },
2574 	{ "aggsortrev", dtrace_options_numtostr },
2575 	{ "aggsortpos", dtrace_options_numtostr },
2576 	{ "aggsortkeypos", dtrace_options_numtostr },
2577 	{ "temporal", dtrace_options_numtostr },
2578 	{ "agghist", dtrace_options_numtostr },
2579 	{ "aggpack", dtrace_options_numtostr },
2580 	{ "aggzoom", dtrace_options_numtostr },
2581 	{ "zone", dtrace_options_numtostr }
2582 };
2583 
2584 CTASSERT(ARRAY_SIZE(_dtrace_options) == DTRACEOPT_MAX);
2585 
2586 static void
2587 dtrace_options_help(void)
2588 {
2589 	mdb_printf("Given a dtrace_state_t structure, displays the "
2590 	    "current tunable option\nsettings.\n");
2591 }
2592 
2593 /*ARGSUSED*/
2594 static int
2595 dtrace_options(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2596 {
2597 	dtrace_state_t state;
2598 	int i = 0;
2599 	dtrace_optval_t *options;
2600 	char val[32];
2601 
2602 	if (!(flags & DCMD_ADDRSPEC))
2603 		return (DCMD_USAGE);
2604 
2605 	if (mdb_vread(&state, sizeof (dtrace_state_t), (uintptr_t)addr) == -1) {
2606 		mdb_warn("failed to read state pointer at %p\n", addr);
2607 		return (DCMD_ERR);
2608 	}
2609 
2610 	options = &state.dts_options[0];
2611 
2612 	mdb_printf("%<u>%-25s %s%</u>\n", "OPTION", "VALUE");
2613 	for (i = 0; i < DTRACEOPT_MAX; i++) {
2614 		if (options[i] == DTRACEOPT_UNSET) {
2615 			mdb_printf("%-25s %s\n",
2616 			    _dtrace_options[i].dtop_optstr, "UNSET");
2617 		} else {
2618 			(void) _dtrace_options[i].dtop_func(options[i],
2619 			    val, 32);
2620 			mdb_printf("%-25s %s\n",
2621 			    _dtrace_options[i].dtop_optstr, val);
2622 		}
2623 	}
2624 
2625 	return (DCMD_OK);
2626 }
2627 
2628 static int
2629 pid2state_init(mdb_walk_state_t *wsp)
2630 {
2631 	dtrace_state_data_t *data;
2632 	uintptr_t devi;
2633 	uintptr_t proc;
2634 	struct dev_info info;
2635 	pid_t pid = (pid_t)wsp->walk_addr;
2636 
2637 	if (wsp->walk_addr == 0) {
2638 		mdb_warn("pid2state walk requires PID\n");
2639 		return (WALK_ERR);
2640 	}
2641 
2642 	data = mdb_zalloc(sizeof (dtrace_state_data_t), UM_SLEEP | UM_GC);
2643 
2644 	if (mdb_readvar(&data->dtsd_softstate, "dtrace_softstate") == -1) {
2645 		mdb_warn("failed to read 'dtrace_softstate'");
2646 		return (DCMD_ERR);
2647 	}
2648 
2649 	if ((proc = mdb_pid2proc(pid, NULL)) == 0) {
2650 		mdb_warn("PID 0t%d not found\n", pid);
2651 		return (DCMD_ERR);
2652 	}
2653 
2654 	if (mdb_readvar(&devi, "dtrace_devi") == -1) {
2655 		mdb_warn("failed to read 'dtrace_devi'");
2656 		return (DCMD_ERR);
2657 	}
2658 
2659 	if (mdb_vread(&info, sizeof (struct dev_info), devi) == -1) {
2660 		mdb_warn("failed to read 'dev_info'");
2661 		return (DCMD_ERR);
2662 	}
2663 
2664 	data->dtsd_major = info.devi_major;
2665 	data->dtsd_proc = proc;
2666 
2667 	wsp->walk_data = data;
2668 
2669 	return (WALK_NEXT);
2670 }
2671 
2672 /*ARGSUSED*/
2673 static int
2674 pid2state_file(uintptr_t addr, struct file *f, dtrace_state_data_t *data)
2675 {
2676 	vnode_t vnode;
2677 	minor_t minor;
2678 	uintptr_t statep;
2679 
2680 	/* Get the vnode for this file */
2681 	if (mdb_vread(&vnode, sizeof (vnode), (uintptr_t)f->f_vnode) == -1) {
2682 		mdb_warn("couldn't read vnode at %p", (uintptr_t)f->f_vnode);
2683 		return (WALK_NEXT);
2684 	}
2685 
2686 
2687 	/* Is this the dtrace device? */
2688 	if (getmajor(vnode.v_rdev) != data->dtsd_major)
2689 		return (WALK_NEXT);
2690 
2691 	/* Get the minor number for this device entry */
2692 	minor = getminor(vnode.v_rdev);
2693 
2694 	if (mdb_get_soft_state_byaddr(data->dtsd_softstate, minor,
2695 	    &statep, NULL, 0) == -1) {
2696 		mdb_warn("failed to read softstate for minor %d", minor);
2697 		return (WALK_NEXT);
2698 	}
2699 
2700 	mdb_printf("%p\n", statep);
2701 
2702 	return (WALK_NEXT);
2703 }
2704 
2705 static int
2706 pid2state_step(mdb_walk_state_t *wsp)
2707 {
2708 	dtrace_state_data_t *ds = wsp->walk_data;
2709 
2710 	if (mdb_pwalk("file",
2711 	    (mdb_walk_cb_t)pid2state_file, ds, ds->dtsd_proc) == -1) {
2712 		mdb_warn("couldn't walk 'file' for proc %p", ds->dtsd_proc);
2713 		return (WALK_ERR);
2714 	}
2715 
2716 	return (WALK_DONE);
2717 }
2718 
2719 /*ARGSUSED*/
2720 static int
2721 dtrace_probes_walk(uintptr_t addr, void *ignored, uintptr_t *target)
2722 {
2723 	dtrace_ecb_t ecb;
2724 	dtrace_probe_t probe;
2725 	dtrace_probedesc_t pd;
2726 
2727 	if (addr == 0)
2728 		return (WALK_ERR);
2729 
2730 	if (mdb_vread(&ecb, sizeof (dtrace_ecb_t), addr) == -1) {
2731 		mdb_warn("failed to read ecb %p\n", addr);
2732 		return (WALK_ERR);
2733 	}
2734 
2735 	if (ecb.dte_probe == NULL)
2736 		return (WALK_ERR);
2737 
2738 	if (mdb_vread(&probe, sizeof (dtrace_probe_t),
2739 	    (uintptr_t)ecb.dte_probe) == -1) {
2740 		mdb_warn("failed to read probe %p\n", ecb.dte_probe);
2741 		return (WALK_ERR);
2742 	}
2743 
2744 	pd.dtpd_id = probe.dtpr_id;
2745 	dtracemdb_probe(NULL, &pd);
2746 
2747 	mdb_printf("%5d %10s %17s %33s %s\n", pd.dtpd_id, pd.dtpd_provider,
2748 	    pd.dtpd_mod, pd.dtpd_func, pd.dtpd_name);
2749 
2750 	return (WALK_NEXT);
2751 }
2752 
2753 static void
2754 dtrace_probes_help(void)
2755 {
2756 	mdb_printf("Given a dtrace_state_t structure, displays all "
2757 	    "its active enablings.  If no\nstate structure is provided, "
2758 	    "all available probes are listed.\n");
2759 }
2760 
2761 /*ARGSUSED*/
2762 static int
2763 dtrace_probes(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2764 {
2765 	dtrace_probedesc_t pd;
2766 	uintptr_t caddr, base, paddr;
2767 	int nprobes, i;
2768 
2769 	mdb_printf("%5s %10s %17s %33s %s\n",
2770 	    "ID", "PROVIDER", "MODULE", "FUNCTION", "NAME");
2771 
2772 	if (!(flags & DCMD_ADDRSPEC)) {
2773 		/*
2774 		 * If no argument is provided just display all available
2775 		 * probes.
2776 		 */
2777 		if (mdb_readvar(&base, "dtrace_probes") == -1) {
2778 			mdb_warn("failed to read 'dtrace_probes'");
2779 			return (-1);
2780 		}
2781 
2782 		if (mdb_readvar(&nprobes, "dtrace_nprobes") == -1) {
2783 			mdb_warn("failed to read 'dtrace_nprobes'");
2784 			return (-1);
2785 		}
2786 
2787 		for (i = 0; i < nprobes; i++) {
2788 			caddr = base + i  * sizeof (dtrace_probe_t *);
2789 
2790 			if (mdb_vread(&paddr, sizeof (paddr), caddr) == -1) {
2791 				mdb_warn("couldn't read probe pointer at %p",
2792 				    caddr);
2793 				continue;
2794 			}
2795 
2796 			if (paddr == 0)
2797 				continue;
2798 
2799 			pd.dtpd_id = i + 1;
2800 			if (dtracemdb_probe(NULL, &pd) == 0) {
2801 				mdb_printf("%5d %10s %17s %33s %s\n",
2802 				    pd.dtpd_id, pd.dtpd_provider,
2803 				    pd.dtpd_mod, pd.dtpd_func, pd.dtpd_name);
2804 			}
2805 		}
2806 	} else {
2807 		if (mdb_pwalk("dtrace_ecb", (mdb_walk_cb_t)dtrace_probes_walk,
2808 		    NULL, addr) == -1) {
2809 			mdb_warn("couldn't walk 'dtrace_ecb'");
2810 			return (DCMD_ERR);
2811 		}
2812 	}
2813 
2814 	return (DCMD_OK);
2815 }
2816 
2817 const mdb_dcmd_t kernel_dcmds[] = {
2818 	{ "id2probe", ":", "translate a dtrace_id_t to a dtrace_probe_t",
2819 	    id2probe },
2820 	{ "dtrace", ":[-c cpu]", "print dtrace(8)-like output",
2821 	    dtrace, dtrace_help },
2822 	{ "dtrace_errhash", ":", "print DTrace error hash", dtrace_errhash },
2823 	{ "dtrace_helptrace", ":", "print DTrace helper trace",
2824 	    dtrace_helptrace },
2825 	{ "dtrace_state", ":", "print active DTrace consumers", dtrace_state,
2826 	    dtrace_state_help },
2827 	{ "dtrace_aggstat", ":",
2828 	    "print DTrace aggregation hash statistics", dtrace_aggstat },
2829 	{ "dtrace_dynstat", ":",
2830 	    "print DTrace dynamic variable hash statistics", dtrace_dynstat },
2831 	{ "dtrace_options", ":",
2832 	    "print a DTrace consumer's current tuneable options",
2833 	    dtrace_options, dtrace_options_help },
2834 	{ "dtrace_probes", "?", "print a DTrace consumer's enabled probes",
2835 	    dtrace_probes, dtrace_probes_help },
2836 	{ NULL }
2837 };
2838 
2839 const mdb_walker_t kernel_walkers[] = {
2840 	{ "dtrace_errhash", "walk hash of DTrace error messasges",
2841 		dtrace_errhash_init, dtrace_errhash_step },
2842 	{ "dtrace_helptrace", "walk DTrace helper trace entries",
2843 		dtrace_helptrace_init, dtrace_helptrace_step },
2844 	{ "dtrace_state", "walk DTrace per-consumer softstate",
2845 		dtrace_state_init, dtrace_state_step },
2846 	{ "dtrace_aggkey", "walk DTrace aggregation keys",
2847 		dtrace_aggkey_init, dtrace_aggkey_step, dtrace_aggkey_fini },
2848 	{ "dtrace_dynvar", "walk DTrace dynamic variables",
2849 		dtrace_dynvar_init, dtrace_dynvar_step, dtrace_dynvar_fini },
2850 	{ "dtrace_ecb", "walk a DTrace consumer's enabling control blocks",
2851 		dtrace_ecb_init, dtrace_ecb_step },
2852 	{ "pid2state", "walk a processes dtrace_state structures",
2853 	    pid2state_init, pid2state_step },
2854 	{ NULL }
2855 };
2856