xref: /titanic_44/usr/src/cmd/mdb/common/modules/dtrace/dtrace.c (revision 22eb7cb54d8a6bcf6fe2674cb4b1f0cf2d85cfb6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * explicitly define DTRACE_ERRDEBUG to pull in definition of dtrace_errhash_t
31  * explicitly define _STDARG_H to avoid stdarg.h/varargs.h u/k defn conflict
32  */
33 #define	DTRACE_ERRDEBUG
34 #define	_STDARG_H
35 
36 #include <mdb/mdb_param.h>
37 #include <mdb/mdb_modapi.h>
38 #include <mdb/mdb_ks.h>
39 #include <sys/dtrace_impl.h>
40 #include <sys/vmem_impl.h>
41 #include <sys/ddi_impldefs.h>
42 #include <sys/sysmacros.h>
43 #include <sys/kobj.h>
44 #include <dtrace.h>
45 #include <alloca.h>
46 #include <ctype.h>
47 #include <errno.h>
48 #include <math.h>
49 
50 /*ARGSUSED*/
51 int
52 id2probe(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
53 {
54 	uintptr_t probe = NULL;
55 	uintptr_t probes;
56 
57 	if (!(flags & DCMD_ADDRSPEC))
58 		return (DCMD_USAGE);
59 
60 	if (addr == DTRACE_IDNONE || addr > UINT32_MAX)
61 		goto out;
62 
63 	if (mdb_readvar(&probes, "dtrace_probes") == -1) {
64 		mdb_warn("failed to read 'dtrace_probes'");
65 		return (DCMD_ERR);
66 	}
67 
68 	probes += (addr - 1) * sizeof (dtrace_probe_t *);
69 
70 	if (mdb_vread(&probe, sizeof (uintptr_t), probes) == -1) {
71 		mdb_warn("failed to read dtrace_probes[%d]", addr - 1);
72 		return (DCMD_ERR);
73 	}
74 
75 out:
76 	mdb_printf("%p\n", probe);
77 	return (DCMD_OK);
78 }
79 
80 void
81 dtrace_help(void)
82 {
83 
84 	mdb_printf("Given a dtrace_state_t structure that represents a "
85 	    "DTrace consumer, prints\n"
86 	    "dtrace(1M)-like output for in-kernel DTrace data.  (The "
87 	    "dtrace_state_t\n"
88 	    "structures for all DTrace consumers may be obtained by running "
89 	    "the \n"
90 	    "::dtrace_state dcmd.)   When data is present on multiple CPUs, "
91 	    "data are\n"
92 	    "presented in CPU order, with records within each CPU ordered "
93 	    "oldest to \n"
94 	    "youngest.  Options:\n\n"
95 	    "-c cpu     Only provide output for specified CPU.\n");
96 }
97 
98 static int
99 dtracemdb_eprobe(dtrace_state_t *state, dtrace_eprobedesc_t *epd)
100 {
101 	dtrace_epid_t epid = epd->dtepd_epid;
102 	dtrace_probe_t probe;
103 	dtrace_ecb_t ecb;
104 	uintptr_t addr, paddr, ap;
105 	dtrace_action_t act;
106 	int nactions, nrecs;
107 
108 	addr = (uintptr_t)state->dts_ecbs +
109 	    (epid - 1) * sizeof (dtrace_ecb_t *);
110 
111 	if (mdb_vread(&addr, sizeof (addr), addr) == -1) {
112 		mdb_warn("failed to read ecb for epid %d", epid);
113 		return (-1);
114 	}
115 
116 	if (addr == NULL) {
117 		mdb_warn("epid %d doesn't match an ecb\n", epid);
118 		return (-1);
119 	}
120 
121 	if (mdb_vread(&ecb, sizeof (ecb), addr) == -1) {
122 		mdb_warn("failed to read ecb at %p", addr);
123 		return (-1);
124 	}
125 
126 	paddr = (uintptr_t)ecb.dte_probe;
127 
128 	if (mdb_vread(&probe, sizeof (probe), paddr) == -1) {
129 		mdb_warn("failed to read probe for ecb %p", addr);
130 		return (-1);
131 	}
132 
133 	/*
134 	 * This is a little painful:  in order to find the number of actions,
135 	 * we need to first walk through them.
136 	 */
137 	for (ap = (uintptr_t)ecb.dte_action, nactions = 0; ap != NULL; ) {
138 		if (mdb_vread(&act, sizeof (act), ap) == -1) {
139 			mdb_warn("failed to read action %p on ecb %p",
140 			    ap, addr);
141 			return (-1);
142 		}
143 
144 		if (!DTRACEACT_ISAGG(act.dta_kind) && !act.dta_intuple)
145 			nactions++;
146 
147 		ap = (uintptr_t)act.dta_next;
148 	}
149 
150 	nrecs = epd->dtepd_nrecs;
151 	epd->dtepd_nrecs = nactions;
152 	epd->dtepd_probeid = probe.dtpr_id;
153 	epd->dtepd_uarg = ecb.dte_uarg;
154 	epd->dtepd_size = ecb.dte_size;
155 
156 	for (ap = (uintptr_t)ecb.dte_action, nactions = 0; ap != NULL; ) {
157 		if (mdb_vread(&act, sizeof (act), ap) == -1) {
158 			mdb_warn("failed to read action %p on ecb %p",
159 			    ap, addr);
160 			return (-1);
161 		}
162 
163 		if (!DTRACEACT_ISAGG(act.dta_kind) && !act.dta_intuple) {
164 			if (nrecs-- == 0)
165 				break;
166 
167 			epd->dtepd_rec[nactions++] = act.dta_rec;
168 		}
169 
170 		ap = (uintptr_t)act.dta_next;
171 	}
172 
173 	return (0);
174 }
175 
176 /*ARGSUSED*/
177 static int
178 dtracemdb_probe(dtrace_state_t *state, dtrace_probedesc_t *pd)
179 {
180 	uintptr_t base, addr, paddr, praddr;
181 	int nprobes, i;
182 	dtrace_probe_t probe;
183 	dtrace_provider_t prov;
184 
185 	if (pd->dtpd_id == DTRACE_IDNONE)
186 		pd->dtpd_id++;
187 
188 	if (mdb_readvar(&base, "dtrace_probes") == -1) {
189 		mdb_warn("failed to read 'dtrace_probes'");
190 		return (-1);
191 	}
192 
193 	if (mdb_readvar(&nprobes, "dtrace_nprobes") == -1) {
194 		mdb_warn("failed to read 'dtrace_nprobes'");
195 		return (-1);
196 	}
197 
198 	for (i = pd->dtpd_id; i <= nprobes; i++) {
199 		addr = base + (i - 1) * sizeof (dtrace_probe_t *);
200 
201 		if (mdb_vread(&paddr, sizeof (paddr), addr) == -1) {
202 			mdb_warn("couldn't read probe pointer at %p", addr);
203 			return (-1);
204 		}
205 
206 		if (paddr != NULL)
207 			break;
208 	}
209 
210 	if (paddr == NULL) {
211 		errno = ESRCH;
212 		return (-1);
213 	}
214 
215 	if (mdb_vread(&probe, sizeof (probe), paddr) == -1) {
216 		mdb_warn("couldn't read probe at %p", paddr);
217 		return (-1);
218 	}
219 
220 	pd->dtpd_id = probe.dtpr_id;
221 
222 	if (mdb_vread(pd->dtpd_name, DTRACE_NAMELEN,
223 	    (uintptr_t)probe.dtpr_name) == -1) {
224 		mdb_warn("failed to read probe name for probe %p", paddr);
225 		return (-1);
226 	}
227 
228 	if (mdb_vread(pd->dtpd_func, DTRACE_FUNCNAMELEN,
229 	    (uintptr_t)probe.dtpr_func) == -1) {
230 		mdb_warn("failed to read function name for probe %p", paddr);
231 		return (-1);
232 	}
233 
234 	if (mdb_vread(pd->dtpd_mod, DTRACE_MODNAMELEN,
235 	    (uintptr_t)probe.dtpr_mod) == -1) {
236 		mdb_warn("failed to read module name for probe %p", paddr);
237 		return (-1);
238 	}
239 
240 	praddr = (uintptr_t)probe.dtpr_provider;
241 
242 	if (mdb_vread(&prov, sizeof (prov), praddr) == -1) {
243 		mdb_warn("failed to read provider for probe %p", paddr);
244 		return (-1);
245 	}
246 
247 	if (mdb_vread(pd->dtpd_provider, DTRACE_PROVNAMELEN,
248 	    (uintptr_t)prov.dtpv_name) == -1) {
249 		mdb_warn("failed to read provider name for probe %p", paddr);
250 		return (-1);
251 	}
252 
253 	return (0);
254 }
255 
256 /*ARGSUSED*/
257 static int
258 dtracemdb_aggdesc(dtrace_state_t *state, dtrace_aggdesc_t *agd)
259 {
260 	dtrace_aggid_t aggid = agd->dtagd_id;
261 	dtrace_aggregation_t agg;
262 	dtrace_ecb_t ecb;
263 	uintptr_t addr, eaddr, ap, last;
264 	dtrace_action_t act;
265 	dtrace_recdesc_t *lrec;
266 	int nactions, nrecs;
267 
268 	addr = (uintptr_t)state->dts_aggregations +
269 	    (aggid - 1) * sizeof (dtrace_aggregation_t *);
270 
271 	if (mdb_vread(&addr, sizeof (addr), addr) == -1) {
272 		mdb_warn("failed to read aggregation for aggid %d", aggid);
273 		return (-1);
274 	}
275 
276 	if (addr == NULL) {
277 		mdb_warn("aggid %d doesn't match an aggregation\n", aggid);
278 		return (-1);
279 	}
280 
281 	if (mdb_vread(&agg, sizeof (agg), addr) == -1) {
282 		mdb_warn("failed to read aggregation at %p", addr);
283 		return (-1);
284 	}
285 
286 	eaddr = (uintptr_t)agg.dtag_ecb;
287 
288 	if (mdb_vread(&ecb, sizeof (ecb), eaddr) == -1) {
289 		mdb_warn("failed to read ecb for aggregation %p", addr);
290 		return (-1);
291 	}
292 
293 	last = (uintptr_t)addr + offsetof(dtrace_aggregation_t, dtag_action);
294 
295 	/*
296 	 * This is a little painful:  in order to find the number of actions,
297 	 * we need to first walk through them.
298 	 */
299 	ap = (uintptr_t)agg.dtag_first;
300 	nactions = 0;
301 
302 	for (;;) {
303 		if (mdb_vread(&act, sizeof (act), ap) == -1) {
304 			mdb_warn("failed to read action %p on aggregation %p",
305 			    ap, addr);
306 			return (-1);
307 		}
308 
309 		nactions++;
310 
311 		if (ap == last)
312 			break;
313 
314 		ap = (uintptr_t)act.dta_next;
315 	}
316 
317 	lrec = &act.dta_rec;
318 	agd->dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - agg.dtag_base;
319 
320 	nrecs = agd->dtagd_nrecs;
321 	agd->dtagd_nrecs = nactions;
322 	agd->dtagd_epid = ecb.dte_epid;
323 
324 	ap = (uintptr_t)agg.dtag_first;
325 	nactions = 0;
326 
327 	for (;;) {
328 		dtrace_recdesc_t rec;
329 
330 		if (mdb_vread(&act, sizeof (act), ap) == -1) {
331 			mdb_warn("failed to read action %p on aggregation %p",
332 			    ap, addr);
333 			return (-1);
334 		}
335 
336 		if (nrecs-- == 0)
337 			break;
338 
339 		rec = act.dta_rec;
340 		rec.dtrd_offset -= agg.dtag_base;
341 		rec.dtrd_uarg = 0;
342 		agd->dtagd_rec[nactions++] = rec;
343 
344 		if (ap == last)
345 			break;
346 
347 		ap = (uintptr_t)act.dta_next;
348 	}
349 
350 	return (0);
351 }
352 
353 static int
354 dtracemdb_bufsnap(dtrace_buffer_t *which, dtrace_bufdesc_t *desc)
355 {
356 	uintptr_t addr;
357 	size_t bufsize;
358 	dtrace_buffer_t buf;
359 	caddr_t data = desc->dtbd_data;
360 	processorid_t max_cpuid, cpu = desc->dtbd_cpu;
361 
362 	if (mdb_readvar(&max_cpuid, "max_cpuid") == -1) {
363 		mdb_warn("failed to read 'max_cpuid'");
364 		errno = EIO;
365 		return (-1);
366 	}
367 
368 	if (cpu < 0 || cpu > max_cpuid) {
369 		errno = EINVAL;
370 		return (-1);
371 	}
372 
373 	addr = (uintptr_t)which + cpu * sizeof (dtrace_buffer_t);
374 
375 	if (mdb_vread(&buf, sizeof (buf), addr) == -1) {
376 		mdb_warn("failed to read buffer description at %p", addr);
377 		errno = EIO;
378 		return (-1);
379 	}
380 
381 	if (buf.dtb_tomax == NULL) {
382 		errno = ENOENT;
383 		return (-1);
384 	}
385 
386 	if (buf.dtb_flags & DTRACEBUF_WRAPPED) {
387 		bufsize = buf.dtb_size;
388 	} else {
389 		bufsize = buf.dtb_offset;
390 	}
391 
392 	if (mdb_vread(data, bufsize, (uintptr_t)buf.dtb_tomax) == -1) {
393 		mdb_warn("couldn't read buffer for CPU %d", cpu);
394 		errno = EIO;
395 		return (-1);
396 	}
397 
398 	if (buf.dtb_offset > buf.dtb_size) {
399 		mdb_warn("buffer for CPU %d has corrupt offset\n", cpu);
400 		errno = EIO;
401 		return (-1);
402 	}
403 
404 	if (buf.dtb_flags & DTRACEBUF_WRAPPED) {
405 		if (buf.dtb_xamot_offset > buf.dtb_size) {
406 			mdb_warn("ringbuffer for CPU %d has corrupt "
407 			    "wrapped offset\n", cpu);
408 			errno = EIO;
409 			return (-1);
410 		}
411 
412 		/*
413 		 * If the ring buffer has wrapped, it needs to be polished.
414 		 * See the comment in dtrace_buffer_polish() for details.
415 		 */
416 		if (buf.dtb_offset < buf.dtb_xamot_offset) {
417 			bzero(data + buf.dtb_offset,
418 			    buf.dtb_xamot_offset - buf.dtb_offset);
419 		}
420 
421 		if (buf.dtb_offset > buf.dtb_xamot_offset) {
422 			bzero(data + buf.dtb_offset,
423 			    buf.dtb_size - buf.dtb_offset);
424 			bzero(data, buf.dtb_xamot_offset);
425 		}
426 
427 		desc->dtbd_oldest = buf.dtb_xamot_offset;
428 	} else {
429 		desc->dtbd_oldest = 0;
430 	}
431 
432 	desc->dtbd_size = bufsize;
433 	desc->dtbd_drops = buf.dtb_drops;
434 	desc->dtbd_errors = buf.dtb_errors;
435 
436 	return (0);
437 }
438 
439 /*
440  * This is essentially identical to its cousin in the kernel.
441  */
442 static dof_hdr_t *
443 dtracemdb_dof_create(dtrace_state_t *state)
444 {
445 	dof_hdr_t *dof;
446 	dof_sec_t *sec;
447 	dof_optdesc_t *opt;
448 	int i, len = sizeof (dof_hdr_t) +
449 	    roundup(sizeof (dof_sec_t), sizeof (uint64_t)) +
450 	    sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
451 
452 	dof = mdb_zalloc(len, UM_SLEEP);
453 	dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0;
454 	dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1;
455 	dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2;
456 	dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3;
457 
458 	dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE;
459 	dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE;
460 	dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION;
461 	dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION;
462 	dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS;
463 	dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS;
464 
465 	dof->dofh_flags = 0;
466 	dof->dofh_hdrsize = sizeof (dof_hdr_t);
467 	dof->dofh_secsize = sizeof (dof_sec_t);
468 	dof->dofh_secnum = 1;	/* only DOF_SECT_OPTDESC */
469 	dof->dofh_secoff = sizeof (dof_hdr_t);
470 	dof->dofh_loadsz = len;
471 	dof->dofh_filesz = len;
472 	dof->dofh_pad = 0;
473 
474 	/*
475 	 * Fill in the option section header...
476 	 */
477 	sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t));
478 	sec->dofs_type = DOF_SECT_OPTDESC;
479 	sec->dofs_align = sizeof (uint64_t);
480 	sec->dofs_flags = DOF_SECF_LOAD;
481 	sec->dofs_entsize = sizeof (dof_optdesc_t);
482 
483 	opt = (dof_optdesc_t *)((uintptr_t)sec +
484 	    roundup(sizeof (dof_sec_t), sizeof (uint64_t)));
485 
486 	sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof;
487 	sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
488 
489 	for (i = 0; i < DTRACEOPT_MAX; i++) {
490 		opt[i].dofo_option = i;
491 		opt[i].dofo_strtab = DOF_SECIDX_NONE;
492 		opt[i].dofo_value = state->dts_options[i];
493 	}
494 
495 	return (dof);
496 }
497 
498 static int
499 dtracemdb_format(dtrace_state_t *state, dtrace_fmtdesc_t *desc)
500 {
501 	uintptr_t addr, faddr;
502 	char c;
503 	int len = 0;
504 
505 	if (desc->dtfd_format == 0 || desc->dtfd_format > state->dts_nformats) {
506 		errno = EINVAL;
507 		return (-1);
508 	}
509 
510 	faddr = (uintptr_t)state->dts_formats +
511 	    (desc->dtfd_format - 1) * sizeof (char *);
512 
513 	if (mdb_vread(&addr, sizeof (addr), faddr) == -1) {
514 		mdb_warn("failed to read format string pointer at %p", faddr);
515 		return (-1);
516 	}
517 
518 	do {
519 		if (mdb_vread(&c, sizeof (c), addr + len++) == -1) {
520 			mdb_warn("failed to read format string at %p", addr);
521 			return (-1);
522 		}
523 	} while (c != '\0');
524 
525 	if (len > desc->dtfd_length) {
526 		desc->dtfd_length = len;
527 		return (0);
528 	}
529 
530 	if (mdb_vread(desc->dtfd_string, len, addr) == -1) {
531 		mdb_warn("failed to reread format string at %p", addr);
532 		return (-1);
533 	}
534 
535 	return (0);
536 }
537 
538 static int
539 dtracemdb_status(dtrace_state_t *state, dtrace_status_t *status)
540 {
541 	dtrace_dstate_t *dstate;
542 	int i, j;
543 	uint64_t nerrs;
544 	uintptr_t addr;
545 	int ncpu;
546 
547 	if (mdb_readvar(&ncpu, "_ncpu") == -1) {
548 		mdb_warn("failed to read '_ncpu'");
549 		return (DCMD_ERR);
550 	}
551 
552 	bzero(status, sizeof (dtrace_status_t));
553 
554 	if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
555 		errno = ENOENT;
556 		return (-1);
557 	}
558 
559 	/*
560 	 * For the MDB backend, we never set dtst_exiting or dtst_filled.  This
561 	 * is by design:  we don't want the library to try to stop tracing,
562 	 * because it doesn't particularly mean anything.
563 	 */
564 	nerrs = state->dts_errors;
565 	dstate = &state->dts_vstate.dtvs_dynvars;
566 
567 	for (i = 0; i < ncpu; i++) {
568 		dtrace_dstate_percpu_t dcpu;
569 		dtrace_buffer_t buf;
570 
571 		addr = (uintptr_t)&dstate->dtds_percpu[i];
572 
573 		if (mdb_vread(&dcpu, sizeof (dcpu), addr) == -1) {
574 			mdb_warn("failed to read per-CPU dstate at %p", addr);
575 			return (-1);
576 		}
577 
578 		status->dtst_dyndrops += dcpu.dtdsc_drops;
579 		status->dtst_dyndrops_dirty += dcpu.dtdsc_dirty_drops;
580 		status->dtst_dyndrops_rinsing += dcpu.dtdsc_rinsing_drops;
581 
582 		addr = (uintptr_t)&state->dts_buffer[i];
583 
584 		if (mdb_vread(&buf, sizeof (buf), addr) == -1) {
585 			mdb_warn("failed to read per-CPU buffer at %p", addr);
586 			return (-1);
587 		}
588 
589 		nerrs += buf.dtb_errors;
590 
591 		for (j = 0; j < state->dts_nspeculations; j++) {
592 			dtrace_speculation_t spec;
593 
594 			addr = (uintptr_t)&state->dts_speculations[j];
595 
596 			if (mdb_vread(&spec, sizeof (spec), addr) == -1) {
597 				mdb_warn("failed to read "
598 				    "speculation at %p", addr);
599 				return (-1);
600 			}
601 
602 			addr = (uintptr_t)&spec.dtsp_buffer[i];
603 
604 			if (mdb_vread(&buf, sizeof (buf), addr) == -1) {
605 				mdb_warn("failed to read "
606 				    "speculative buffer at %p", addr);
607 				return (-1);
608 			}
609 
610 			status->dtst_specdrops += buf.dtb_xamot_drops;
611 		}
612 	}
613 
614 	status->dtst_specdrops_busy = state->dts_speculations_busy;
615 	status->dtst_specdrops_unavail = state->dts_speculations_unavail;
616 	status->dtst_errors = nerrs;
617 
618 	return (0);
619 }
620 
621 typedef struct dtracemdb_data {
622 	dtrace_state_t *dtmd_state;
623 	char *dtmd_symstr;
624 	char *dtmd_modstr;
625 	uintptr_t dtmd_addr;
626 } dtracemdb_data_t;
627 
628 static int
629 dtracemdb_ioctl(void *varg, int cmd, void *arg)
630 {
631 	dtracemdb_data_t *data = varg;
632 	dtrace_state_t *state = data->dtmd_state;
633 
634 	switch (cmd) {
635 	case DTRACEIOC_CONF: {
636 		dtrace_conf_t *conf = arg;
637 
638 		bzero(conf, sizeof (conf));
639 		conf->dtc_difversion = DIF_VERSION;
640 		conf->dtc_difintregs = DIF_DIR_NREGS;
641 		conf->dtc_diftupregs = DIF_DTR_NREGS;
642 		conf->dtc_ctfmodel = CTF_MODEL_NATIVE;
643 
644 		return (0);
645 	}
646 
647 	case DTRACEIOC_DOFGET: {
648 		dof_hdr_t *hdr = arg, *dof;
649 
650 		dof = dtracemdb_dof_create(state);
651 		bcopy(dof, hdr, MIN(hdr->dofh_loadsz, dof->dofh_loadsz));
652 		mdb_free(dof, dof->dofh_loadsz);
653 
654 		return (0);
655 	}
656 
657 	case DTRACEIOC_BUFSNAP:
658 		return (dtracemdb_bufsnap(state->dts_buffer, arg));
659 
660 	case DTRACEIOC_AGGSNAP:
661 		return (dtracemdb_bufsnap(state->dts_aggbuffer, arg));
662 
663 	case DTRACEIOC_AGGDESC:
664 		return (dtracemdb_aggdesc(state, arg));
665 
666 	case DTRACEIOC_EPROBE:
667 		return (dtracemdb_eprobe(state, arg));
668 
669 	case DTRACEIOC_PROBES:
670 		return (dtracemdb_probe(state, arg));
671 
672 	case DTRACEIOC_FORMAT:
673 		return (dtracemdb_format(state, arg));
674 
675 	case DTRACEIOC_STATUS:
676 		return (dtracemdb_status(state, arg));
677 
678 	case DTRACEIOC_GO:
679 		*(processorid_t *)arg = -1;
680 		return (0);
681 
682 	case DTRACEIOC_ENABLE:
683 		errno = ENOTTY; /* see dt_open.c:dtrace_go() */
684 		return (-1);
685 
686 	case DTRACEIOC_PROVIDER:
687 	case DTRACEIOC_PROBEMATCH:
688 		errno = ESRCH;
689 		return (-1);
690 
691 	default:
692 		mdb_warn("unexpected ioctl 0x%x (%s)\n", cmd,
693 		    cmd == DTRACEIOC_PROVIDER	? "DTRACEIOC_PROVIDER" :
694 		    cmd == DTRACEIOC_PROBES	? "DTRACEIOC_PROBES" :
695 		    cmd == DTRACEIOC_BUFSNAP	? "DTRACEIOC_BUFSNAP" :
696 		    cmd == DTRACEIOC_PROBEMATCH	? "DTRACEIOC_PROBEMATCH" :
697 		    cmd == DTRACEIOC_ENABLE	? "DTRACEIOC_ENABLE" :
698 		    cmd == DTRACEIOC_AGGSNAP	? "DTRACEIOC_AGGSNAP" :
699 		    cmd == DTRACEIOC_EPROBE	? "DTRACEIOC_EPROBE" :
700 		    cmd == DTRACEIOC_PROBEARG	? "DTRACEIOC_PROBEARG" :
701 		    cmd == DTRACEIOC_CONF	? "DTRACEIOC_CONF" :
702 		    cmd == DTRACEIOC_STATUS	? "DTRACEIOC_STATUS" :
703 		    cmd == DTRACEIOC_GO		? "DTRACEIOC_GO" :
704 		    cmd == DTRACEIOC_STOP	? "DTRACEIOC_STOP" :
705 		    cmd == DTRACEIOC_AGGDESC	? "DTRACEIOC_AGGDESC" :
706 		    cmd == DTRACEIOC_FORMAT	? "DTRACEIOC_FORMAT" :
707 		    cmd == DTRACEIOC_DOFGET	? "DTRACEIOC_DOFGET" :
708 		    cmd == DTRACEIOC_REPLICATE	? "DTRACEIOC_REPLICATE" :
709 		    "???");
710 		errno = ENXIO;
711 		return (-1);
712 	}
713 }
714 
715 static int
716 dtracemdb_modctl(uintptr_t addr, const struct modctl *m, dtracemdb_data_t *data)
717 {
718 	struct module mod;
719 
720 	if (m->mod_mp == NULL)
721 		return (WALK_NEXT);
722 
723 	if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
724 		mdb_warn("couldn't read modctl %p's module", addr);
725 		return (WALK_NEXT);
726 	}
727 
728 	if ((uintptr_t)mod.text > data->dtmd_addr)
729 		return (WALK_NEXT);
730 
731 	if ((uintptr_t)mod.text + mod.text_size <= data->dtmd_addr)
732 		return (WALK_NEXT);
733 
734 	if (mdb_readstr(data->dtmd_modstr, MDB_SYM_NAMLEN,
735 	    (uintptr_t)m->mod_modname) == -1)
736 		return (WALK_ERR);
737 
738 	return (WALK_DONE);
739 }
740 
741 static int
742 dtracemdb_lookup_by_addr(void *varg, GElf_Addr addr, GElf_Sym *symp,
743     dtrace_syminfo_t *sip)
744 {
745 	dtracemdb_data_t *data = varg;
746 
747 	if (data->dtmd_symstr == NULL) {
748 		data->dtmd_symstr = mdb_zalloc(MDB_SYM_NAMLEN,
749 		    UM_SLEEP | UM_GC);
750 	}
751 
752 	if (data->dtmd_modstr == NULL) {
753 		data->dtmd_modstr = mdb_zalloc(MDB_SYM_NAMLEN,
754 		    UM_SLEEP | UM_GC);
755 	}
756 
757 	if (symp != NULL) {
758 		if (mdb_lookup_by_addr(addr, MDB_SYM_FUZZY, data->dtmd_symstr,
759 		    MDB_SYM_NAMLEN, symp) == -1)
760 			return (-1);
761 	}
762 
763 	if (sip != NULL) {
764 		data->dtmd_addr = addr;
765 
766 		(void) strcpy(data->dtmd_modstr, "???");
767 
768 		if (mdb_walk("modctl",
769 		    (mdb_walk_cb_t)dtracemdb_modctl, varg) == -1) {
770 			mdb_warn("couldn't walk 'modctl'");
771 			return (-1);
772 		}
773 
774 		sip->dts_object = data->dtmd_modstr;
775 		sip->dts_id = 0;
776 		sip->dts_name = symp != NULL ? data->dtmd_symstr : NULL;
777 	}
778 
779 	return (0);
780 }
781 
782 /*ARGSUSED*/
783 static int
784 dtracemdb_stat(void *varg, processorid_t cpu)
785 {
786 	GElf_Sym sym;
787 	cpu_t c;
788 	uintptr_t caddr, addr;
789 
790 	if (mdb_lookup_by_name("cpu", &sym) == -1) {
791 		mdb_warn("failed to find symbol for 'cpu'");
792 		return (-1);
793 	}
794 
795 	if (cpu * sizeof (uintptr_t) > sym.st_size)
796 		return (-1);
797 
798 	addr = (uintptr_t)sym.st_value + cpu * sizeof (uintptr_t);
799 
800 	if (mdb_vread(&caddr, sizeof (caddr), addr) == -1) {
801 		mdb_warn("failed to read cpu[%d]", cpu);
802 		return (-1);
803 	}
804 
805 	if (caddr == NULL)
806 		return (-1);
807 
808 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
809 		mdb_warn("failed to read cpu at %p", caddr);
810 		return (-1);
811 	}
812 
813 	if (c.cpu_flags & CPU_POWEROFF) {
814 		return (P_POWEROFF);
815 	} else if (c.cpu_flags & CPU_SPARE) {
816 		return (P_SPARE);
817 	} else if (c.cpu_flags & CPU_FAULTED) {
818 		return (P_FAULTED);
819 	} else if ((c.cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY) {
820 		return (P_OFFLINE);
821 	} else if (c.cpu_flags & CPU_ENABLE) {
822 		return (P_ONLINE);
823 	} else {
824 		return (P_NOINTR);
825 	}
826 }
827 
828 /*ARGSUSED*/
829 static long
830 dtracemdb_sysconf(void *varg, int name)
831 {
832 	int max_ncpus;
833 	processorid_t max_cpuid;
834 
835 	switch (name) {
836 	case _SC_CPUID_MAX:
837 		if (mdb_readvar(&max_cpuid, "max_cpuid") == -1) {
838 			mdb_warn("failed to read 'max_cpuid'");
839 			return (-1);
840 		}
841 
842 		return (max_cpuid);
843 
844 	case _SC_NPROCESSORS_MAX:
845 		if (mdb_readvar(&max_ncpus, "max_ncpus") == -1) {
846 			mdb_warn("failed to read 'max_ncpus'");
847 			return (-1);
848 		}
849 
850 		return (max_ncpus);
851 
852 	default:
853 		mdb_warn("unexpected sysconf code %d\n", name);
854 		return (-1);
855 	}
856 }
857 
858 const dtrace_vector_t dtrace_mdbops = {
859 	dtracemdb_ioctl,
860 	dtracemdb_lookup_by_addr,
861 	dtracemdb_stat,
862 	dtracemdb_sysconf
863 };
864 
865 typedef struct dtrace_dcmddata {
866 	dtrace_hdl_t *dtdd_dtp;
867 	int dtdd_cpu;
868 	int dtdd_quiet;
869 	int dtdd_flowindent;
870 	int dtdd_heading;
871 } dtrace_dcmddata_t;
872 
873 /*ARGSUSED*/
874 static int
875 dtrace_dcmdrec(const dtrace_probedata_t *data,
876     const dtrace_recdesc_t *rec, void *arg)
877 {
878 	dtrace_dcmddata_t *dd = arg;
879 
880 	if (rec == NULL) {
881 		/*
882 		 * We have processed the final record; output the newline if
883 		 * we're not in quiet mode.
884 		 */
885 		if (!dd->dtdd_quiet)
886 			mdb_printf("\n");
887 
888 		return (DTRACE_CONSUME_NEXT);
889 	}
890 
891 	return (DTRACE_CONSUME_THIS);
892 }
893 
894 /*ARGSUSED*/
895 static int
896 dtrace_dcmdprobe(const dtrace_probedata_t *data, void *arg)
897 {
898 	dtrace_probedesc_t *pd = data->dtpda_pdesc;
899 	processorid_t cpu = data->dtpda_cpu;
900 	dtrace_dcmddata_t *dd = arg;
901 	char name[DTRACE_FUNCNAMELEN + DTRACE_NAMELEN + 2];
902 
903 	if (dd->dtdd_cpu != -1UL && dd->dtdd_cpu != cpu)
904 		return (DTRACE_CONSUME_NEXT);
905 
906 	if (dd->dtdd_heading == 0) {
907 		if (!dd->dtdd_flowindent) {
908 			if (!dd->dtdd_quiet) {
909 				mdb_printf("%3s %6s %32s\n",
910 				    "CPU", "ID", "FUNCTION:NAME");
911 			}
912 		} else {
913 			mdb_printf("%3s %-41s\n", "CPU", "FUNCTION");
914 		}
915 		dd->dtdd_heading = 1;
916 	}
917 
918 	if (!dd->dtdd_flowindent) {
919 		if (!dd->dtdd_quiet) {
920 			(void) mdb_snprintf(name, sizeof (name), "%s:%s",
921 			    pd->dtpd_func, pd->dtpd_name);
922 
923 			mdb_printf("%3d %6d %32s ", cpu, pd->dtpd_id, name);
924 		}
925 	} else {
926 		int indent = data->dtpda_indent;
927 
928 		if (data->dtpda_flow == DTRACEFLOW_NONE) {
929 			(void) mdb_snprintf(name, sizeof (name), "%*s%s%s:%s",
930 			    indent, "", data->dtpda_prefix, pd->dtpd_func,
931 			    pd->dtpd_name);
932 		} else {
933 			(void) mdb_snprintf(name, sizeof (name), "%*s%s%s",
934 			    indent, "", data->dtpda_prefix, pd->dtpd_func);
935 		}
936 
937 		mdb_printf("%3d %-41s ", cpu, name);
938 	}
939 
940 	return (DTRACE_CONSUME_THIS);
941 }
942 
943 /*ARGSUSED*/
944 static int
945 dtrace_dcmderr(const dtrace_errdata_t *data, void *arg)
946 {
947 	mdb_warn(data->dteda_msg);
948 	return (DTRACE_HANDLE_OK);
949 }
950 
951 /*ARGSUSED*/
952 static int
953 dtrace_dcmddrop(const dtrace_dropdata_t *data, void *arg)
954 {
955 	mdb_warn(data->dtdda_msg);
956 	return (DTRACE_HANDLE_OK);
957 }
958 
959 /*ARGSUSED*/
960 static int
961 dtrace_dcmdbuffered(const dtrace_bufdata_t *bufdata, void *arg)
962 {
963 	mdb_printf("%s", bufdata->dtbda_buffered);
964 	return (DTRACE_HANDLE_OK);
965 }
966 
967 /*ARGSUSED*/
968 int
969 dtrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
970 {
971 	dtrace_state_t state;
972 	dtrace_hdl_t *dtp;
973 	int ncpu, err;
974 	uintptr_t c = -1UL;
975 	dtrace_dcmddata_t dd;
976 	dtrace_optval_t val;
977 	dtracemdb_data_t md;
978 	int rval = DCMD_ERR;
979 
980 	if (!(flags & DCMD_ADDRSPEC))
981 		return (DCMD_USAGE);
982 
983 	if (mdb_getopts(argc, argv, 'c', MDB_OPT_UINTPTR, &c, NULL) != argc)
984 		return (DCMD_USAGE);
985 
986 	if (mdb_readvar(&ncpu, "_ncpu") == -1) {
987 		mdb_warn("failed to read '_ncpu'");
988 		return (DCMD_ERR);
989 	}
990 
991 	if (mdb_vread(&state, sizeof (state), addr) == -1) {
992 		mdb_warn("couldn't read dtrace_state_t at %p", addr);
993 		return (DCMD_ERR);
994 	}
995 
996 	bzero(&md, sizeof (md));
997 	md.dtmd_state = &state;
998 
999 	if ((dtp = dtrace_vopen(DTRACE_VERSION, DTRACE_O_NOSYS, &err,
1000 	    &dtrace_mdbops, &md)) == NULL) {
1001 		mdb_warn("failed to initialize dtrace: %s\n",
1002 		    dtrace_errmsg(NULL, err));
1003 		return (DCMD_ERR);
1004 	}
1005 
1006 	if (dtrace_go(dtp) != 0) {
1007 		mdb_warn("failed to initialize dtrace: %s\n",
1008 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1009 		goto err;
1010 	}
1011 
1012 	bzero(&dd, sizeof (dd));
1013 	dd.dtdd_dtp = dtp;
1014 	dd.dtdd_cpu = c;
1015 
1016 	if (dtrace_getopt(dtp, "flowindent", &val) == -1) {
1017 		mdb_warn("couldn't get 'flowindent' option: %s\n",
1018 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1019 		goto err;
1020 	}
1021 
1022 	dd.dtdd_flowindent = (val != DTRACEOPT_UNSET);
1023 
1024 	if (dtrace_getopt(dtp, "quiet", &val) == -1) {
1025 		mdb_warn("couldn't get 'quiet' option: %s\n",
1026 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1027 		goto err;
1028 	}
1029 
1030 	dd.dtdd_quiet = (val != DTRACEOPT_UNSET);
1031 
1032 	if (dtrace_handle_err(dtp, dtrace_dcmderr, NULL) == -1) {
1033 		mdb_warn("couldn't add err handler: %s\n",
1034 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1035 		goto err;
1036 	}
1037 
1038 	if (dtrace_handle_drop(dtp, dtrace_dcmddrop, NULL) == -1) {
1039 		mdb_warn("couldn't add drop handler: %s\n",
1040 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1041 		goto err;
1042 	}
1043 
1044 	if (dtrace_handle_buffered(dtp, dtrace_dcmdbuffered, NULL) == -1) {
1045 		mdb_warn("couldn't add buffered handler: %s\n",
1046 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1047 		goto err;
1048 	}
1049 
1050 	if (dtrace_status(dtp) == -1) {
1051 		mdb_warn("couldn't get status: %s\n",
1052 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1053 		goto err;
1054 	}
1055 
1056 	if (dtrace_aggregate_snap(dtp) == -1) {
1057 		mdb_warn("couldn't snapshot aggregation: %s\n",
1058 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1059 		goto err;
1060 	}
1061 
1062 	if (dtrace_consume(dtp, NULL,
1063 	    dtrace_dcmdprobe, dtrace_dcmdrec, &dd) == -1) {
1064 		mdb_warn("couldn't consume DTrace buffers: %s\n",
1065 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1066 	}
1067 
1068 	if (dtrace_aggregate_print(dtp, NULL, NULL) == -1) {
1069 		mdb_warn("couldn't print aggregation: %s\n",
1070 		    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1071 		goto err;
1072 	}
1073 
1074 	rval = DCMD_OK;
1075 err:
1076 	dtrace_close(dtp);
1077 	return (rval);
1078 }
1079 
1080 static int
1081 dtrace_errhash_cmp(const void *l, const void *r)
1082 {
1083 	uintptr_t lhs = *((uintptr_t *)l);
1084 	uintptr_t rhs = *((uintptr_t *)r);
1085 	dtrace_errhash_t lerr, rerr;
1086 	char lmsg[256], rmsg[256];
1087 
1088 	(void) mdb_vread(&lerr, sizeof (lerr), lhs);
1089 	(void) mdb_vread(&rerr, sizeof (rerr), rhs);
1090 
1091 	if (lerr.dter_msg == NULL)
1092 		return (-1);
1093 
1094 	if (rerr.dter_msg == NULL)
1095 		return (1);
1096 
1097 	(void) mdb_readstr(lmsg, sizeof (lmsg), (uintptr_t)lerr.dter_msg);
1098 	(void) mdb_readstr(rmsg, sizeof (rmsg), (uintptr_t)rerr.dter_msg);
1099 
1100 	return (strcmp(lmsg, rmsg));
1101 }
1102 
1103 int
1104 dtrace_errhash_init(mdb_walk_state_t *wsp)
1105 {
1106 	GElf_Sym sym;
1107 	uintptr_t *hash, addr;
1108 	int i;
1109 
1110 	if (wsp->walk_addr != NULL) {
1111 		mdb_warn("dtrace_errhash walk only supports global walks\n");
1112 		return (WALK_ERR);
1113 	}
1114 
1115 	if (mdb_lookup_by_name("dtrace_errhash", &sym) == -1) {
1116 		mdb_warn("couldn't find 'dtrace_errhash' (non-DEBUG kernel?)");
1117 		return (WALK_ERR);
1118 	}
1119 
1120 	addr = (uintptr_t)sym.st_value;
1121 	hash = mdb_alloc(DTRACE_ERRHASHSZ * sizeof (uintptr_t),
1122 	    UM_SLEEP | UM_GC);
1123 
1124 	for (i = 0; i < DTRACE_ERRHASHSZ; i++)
1125 		hash[i] = addr + i * sizeof (dtrace_errhash_t);
1126 
1127 	qsort(hash, DTRACE_ERRHASHSZ, sizeof (uintptr_t), dtrace_errhash_cmp);
1128 
1129 	wsp->walk_addr = 0;
1130 	wsp->walk_data = hash;
1131 
1132 	return (WALK_NEXT);
1133 }
1134 
1135 int
1136 dtrace_errhash_step(mdb_walk_state_t *wsp)
1137 {
1138 	int ndx = (int)wsp->walk_addr;
1139 	uintptr_t *hash = wsp->walk_data;
1140 	dtrace_errhash_t err;
1141 	uintptr_t addr;
1142 
1143 	if (ndx >= DTRACE_ERRHASHSZ)
1144 		return (WALK_DONE);
1145 
1146 	wsp->walk_addr = ndx + 1;
1147 	addr = hash[ndx];
1148 
1149 	if (mdb_vread(&err, sizeof (err), addr) == -1) {
1150 		mdb_warn("failed to read dtrace_errhash_t at %p", addr);
1151 		return (WALK_DONE);
1152 	}
1153 
1154 	if (err.dter_msg == NULL)
1155 		return (WALK_NEXT);
1156 
1157 	return (wsp->walk_callback(addr, &err, wsp->walk_cbdata));
1158 }
1159 
1160 /*ARGSUSED*/
1161 int
1162 dtrace_errhash(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1163 {
1164 	dtrace_errhash_t err;
1165 	char msg[256];
1166 
1167 	if (!(flags & DCMD_ADDRSPEC)) {
1168 		if (mdb_walk_dcmd("dtrace_errhash", "dtrace_errhash",
1169 		    argc, argv) == -1) {
1170 			mdb_warn("can't walk 'dtrace_errhash'");
1171 			return (DCMD_ERR);
1172 		}
1173 
1174 		return (DCMD_OK);
1175 	}
1176 
1177 	if (DCMD_HDRSPEC(flags))
1178 		mdb_printf("%8s %s\n", "COUNT", "ERROR");
1179 
1180 	if (mdb_vread(&err, sizeof (err), addr) == -1) {
1181 		mdb_warn("failed to read dtrace_errhash_t at %p", addr);
1182 		return (DCMD_ERR);
1183 	}
1184 
1185 	addr = (uintptr_t)err.dter_msg;
1186 
1187 	if (mdb_readstr(msg, sizeof (msg), addr) == -1) {
1188 		mdb_warn("failed to read error msg at %p", addr);
1189 		return (DCMD_ERR);
1190 	}
1191 
1192 	mdb_printf("%8d %s", err.dter_count, msg);
1193 
1194 	/*
1195 	 * Some error messages include a newline -- only print the newline
1196 	 * if the message doesn't have one.
1197 	 */
1198 	if (msg[strlen(msg) - 1] != '\n')
1199 		mdb_printf("\n");
1200 
1201 	return (DCMD_OK);
1202 }
1203 
1204 int
1205 dtrace_helptrace_init(mdb_walk_state_t *wsp)
1206 {
1207 	uint32_t next;
1208 	int enabled;
1209 
1210 	if (wsp->walk_addr != NULL) {
1211 		mdb_warn("dtrace_helptrace only supports global walks\n");
1212 		return (WALK_ERR);
1213 	}
1214 
1215 	if (mdb_readvar(&enabled, "dtrace_helptrace_enabled") == -1) {
1216 		mdb_warn("couldn't read 'dtrace_helptrace_enabled'");
1217 		return (WALK_ERR);
1218 	}
1219 
1220 	if (!enabled) {
1221 		mdb_warn("helper tracing is not enabled\n");
1222 		return (WALK_ERR);
1223 	}
1224 
1225 	if (mdb_readvar(&next, "dtrace_helptrace_next") == -1) {
1226 		mdb_warn("couldn't read 'dtrace_helptrace_next'");
1227 		return (WALK_ERR);
1228 	}
1229 
1230 	wsp->walk_addr = next;
1231 
1232 	return (WALK_NEXT);
1233 }
1234 
1235 int
1236 dtrace_helptrace_step(mdb_walk_state_t *wsp)
1237 {
1238 	uint32_t next, size, nlocals, bufsize;
1239 	uintptr_t buffer, addr;
1240 	dtrace_helptrace_t *ht;
1241 	int rval;
1242 
1243 	if (mdb_readvar(&next, "dtrace_helptrace_next") == -1) {
1244 		mdb_warn("couldn't read 'dtrace_helptrace_next'");
1245 		return (WALK_ERR);
1246 	}
1247 
1248 	if (mdb_readvar(&bufsize, "dtrace_helptrace_bufsize") == -1) {
1249 		mdb_warn("couldn't read 'dtrace_helptrace_bufsize'");
1250 		return (WALK_ERR);
1251 	}
1252 
1253 	if (mdb_readvar(&buffer, "dtrace_helptrace_buffer") == -1) {
1254 		mdb_warn("couldn't read 'dtrace_helptrace_buffer'");
1255 		return (WALK_ERR);
1256 	}
1257 
1258 	if (mdb_readvar(&nlocals, "dtrace_helptrace_nlocals") == -1) {
1259 		mdb_warn("couldn't read 'dtrace_helptrace_nlocals'");
1260 		return (WALK_ERR);
1261 	}
1262 
1263 	size = sizeof (dtrace_helptrace_t) +
1264 	    nlocals * sizeof (uint64_t) - sizeof (uint64_t);
1265 
1266 	if (wsp->walk_addr + size > bufsize) {
1267 		if (next == 0)
1268 			return (WALK_DONE);
1269 
1270 		wsp->walk_addr = 0;
1271 	}
1272 
1273 	addr = buffer + wsp->walk_addr;
1274 	ht = alloca(size);
1275 
1276 	if (mdb_vread(ht, size, addr) == -1) {
1277 		mdb_warn("couldn't read entry at %p", addr);
1278 		return (WALK_ERR);
1279 	}
1280 
1281 	if (ht->dtht_helper != NULL) {
1282 		rval = wsp->walk_callback(addr, ht, wsp->walk_cbdata);
1283 
1284 		if (rval != WALK_NEXT)
1285 			return (rval);
1286 	}
1287 
1288 	if (wsp->walk_addr < next && wsp->walk_addr + size >= next)
1289 		return (WALK_DONE);
1290 
1291 	wsp->walk_addr += size;
1292 	return (WALK_NEXT);
1293 }
1294 
1295 int
1296 dtrace_helptrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1297 {
1298 	dtrace_helptrace_t help;
1299 	dtrace_helper_action_t helper;
1300 	char where[30];
1301 	uint_t opt_v = FALSE;
1302 	uintptr_t haddr;
1303 
1304 	if (!(flags & DCMD_ADDRSPEC)) {
1305 		if (mdb_walk_dcmd("dtrace_helptrace", "dtrace_helptrace",
1306 		    argc, argv) == -1) {
1307 			mdb_warn("can't walk 'dtrace_helptrace'");
1308 			return (DCMD_ERR);
1309 		}
1310 
1311 		return (DCMD_OK);
1312 	}
1313 
1314 	if (mdb_getopts(argc, argv, 'v',
1315 	    MDB_OPT_SETBITS, TRUE, &opt_v, NULL) != argc)
1316 		return (DCMD_USAGE);
1317 
1318 	if (DCMD_HDRSPEC(flags)) {
1319 		mdb_printf(" %?s %?s %12s %s\n",
1320 		    "ADDR", "HELPER", "WHERE", "DIFO");
1321 	}
1322 
1323 	if (mdb_vread(&help, sizeof (help), addr) == -1) {
1324 		mdb_warn("failed to read dtrace_helptrace_t at %p", addr);
1325 		return (DCMD_ERR);
1326 	}
1327 
1328 	switch (help.dtht_where) {
1329 	case 0:
1330 		(void) mdb_snprintf(where, sizeof (where), "predicate");
1331 		break;
1332 
1333 	case DTRACE_HELPTRACE_NEXT:
1334 		(void) mdb_snprintf(where, sizeof (where), "next");
1335 		break;
1336 
1337 	case DTRACE_HELPTRACE_DONE:
1338 		(void) mdb_snprintf(where, sizeof (where), "done");
1339 		break;
1340 
1341 	case DTRACE_HELPTRACE_ERR:
1342 		(void) mdb_snprintf(where, sizeof (where), "err");
1343 		break;
1344 
1345 	default:
1346 		(void) mdb_snprintf(where, sizeof (where),
1347 		    "action #%d", help.dtht_where);
1348 		break;
1349 	}
1350 
1351 	mdb_printf(" %?p %?p %12s ", addr, help.dtht_helper, where);
1352 
1353 	haddr = (uintptr_t)help.dtht_helper;
1354 
1355 	if (mdb_vread(&helper, sizeof (helper), haddr) == -1) {
1356 		/*
1357 		 * We're not going to warn in this case -- we're just not going
1358 		 * to print anything exciting.
1359 		 */
1360 		mdb_printf("???\n");
1361 	} else {
1362 		switch (help.dtht_where) {
1363 		case 0:
1364 			mdb_printf("%p\n", helper.dtha_predicate);
1365 			break;
1366 
1367 		case DTRACE_HELPTRACE_NEXT:
1368 		case DTRACE_HELPTRACE_DONE:
1369 		case DTRACE_HELPTRACE_ERR:
1370 			mdb_printf("-\n");
1371 			break;
1372 
1373 		default:
1374 			haddr = (uintptr_t)helper.dtha_actions +
1375 			    (help.dtht_where - 1) * sizeof (uintptr_t);
1376 
1377 			if (mdb_vread(&haddr, sizeof (haddr), haddr) == -1) {
1378 				mdb_printf("???\n");
1379 			} else {
1380 				mdb_printf("%p\n", haddr);
1381 			}
1382 		}
1383 	}
1384 
1385 	if (opt_v) {
1386 		int i;
1387 
1388 		if (help.dtht_where == DTRACE_HELPTRACE_ERR) {
1389 			int f = help.dtht_fault;
1390 
1391 			mdb_printf("%?s| %?s %10s |\n", "", "", "");
1392 			mdb_printf("%?s| %?s %10s +->  fault: %s\n", "", "", "",
1393 			    f == DTRACEFLT_BADADDR ? "BADADDR" :
1394 			    f == DTRACEFLT_BADALIGN ? "BADALIGN" :
1395 			    f == DTRACEFLT_ILLOP ? "ILLOP" :
1396 			    f == DTRACEFLT_DIVZERO ? "DIVZERO" :
1397 			    f == DTRACEFLT_NOSCRATCH ? "NOSCRATCH" :
1398 			    f == DTRACEFLT_KPRIV ? "KPRIV" :
1399 			    f == DTRACEFLT_UPRIV ? "UPRIV" :
1400 			    f == DTRACEFLT_TUPOFLOW ? "TUPOFLOW" :
1401 			    f == DTRACEFLT_BADSTACK ? "BADSTACK" :
1402 			    "DTRACEFLT_UNKNOWN");
1403 			mdb_printf("%?s| %?s %12s     addr: 0x%x\n", "", "", "",
1404 			    help.dtht_illval);
1405 			mdb_printf("%?s| %?s %12s   offset: %d\n", "", "", "",
1406 			    help.dtht_fltoffs);
1407 		}
1408 
1409 		mdb_printf("%?s|\n%?s+--> %?s %4s %s\n", "", "",
1410 		    "ADDR", "NDX", "VALUE");
1411 		addr += sizeof (help) - sizeof (uint64_t);
1412 
1413 		for (i = 0; i < help.dtht_nlocals; i++) {
1414 			uint64_t val;
1415 
1416 			if (mdb_vread(&val, sizeof (val), addr) == -1) {
1417 				mdb_warn("couldn't read local at %p", addr);
1418 				continue;
1419 			}
1420 
1421 			mdb_printf("%?s     %?p %4d %p\n", "", addr, i, val);
1422 			addr += sizeof (uint64_t);
1423 		}
1424 
1425 		mdb_printf("\n");
1426 	}
1427 
1428 	return (DCMD_OK);
1429 }
1430 
1431 /*ARGSUSED*/
1432 static int
1433 dtrace_state_walk(uintptr_t addr, const vmem_seg_t *seg, minor_t *highest)
1434 {
1435 	if (seg->vs_end > *highest)
1436 		*highest = seg->vs_end;
1437 
1438 	return (WALK_NEXT);
1439 }
1440 
1441 typedef struct dtrace_state_walk {
1442 	uintptr_t dtsw_softstate;
1443 	minor_t dtsw_max;
1444 	minor_t dtsw_current;
1445 } dtrace_state_walk_t;
1446 
1447 int
1448 dtrace_state_init(mdb_walk_state_t *wsp)
1449 {
1450 	uintptr_t dtrace_minor;
1451 	minor_t max = 0;
1452 	dtrace_state_walk_t *dw;
1453 
1454 	if (wsp->walk_addr != NULL) {
1455 		mdb_warn("dtrace_state only supports global walks\n");
1456 		return (WALK_ERR);
1457 	}
1458 
1459 	/*
1460 	 * Find the dtrace_minor vmem arena and walk it to get the maximum
1461 	 * minor number.
1462 	 */
1463 	if (mdb_readvar(&dtrace_minor, "dtrace_minor") == -1) {
1464 		mdb_warn("failed to read 'dtrace_minor'");
1465 		return (WALK_ERR);
1466 	}
1467 
1468 	if (mdb_pwalk("vmem_alloc", (mdb_walk_cb_t)dtrace_state_walk,
1469 	    &max, dtrace_minor) == -1) {
1470 		mdb_warn("couldn't walk 'vmem_alloc'");
1471 		return (WALK_ERR);
1472 	}
1473 
1474 	dw = mdb_zalloc(sizeof (dtrace_state_walk_t), UM_SLEEP | UM_GC);
1475 	dw->dtsw_current = 0;
1476 	dw->dtsw_max = max;
1477 
1478 	if (mdb_readvar(&dw->dtsw_softstate, "dtrace_softstate") == -1) {
1479 		mdb_warn("failed to read 'dtrace_softstate'");
1480 		return (DCMD_ERR);
1481 	}
1482 
1483 	wsp->walk_data = dw;
1484 
1485 	return (WALK_NEXT);
1486 }
1487 
1488 int
1489 dtrace_state_step(mdb_walk_state_t *wsp)
1490 {
1491 	dtrace_state_walk_t *dw = wsp->walk_data;
1492 	uintptr_t statep;
1493 	dtrace_state_t state;
1494 	int rval;
1495 
1496 	while (mdb_get_soft_state_byaddr(dw->dtsw_softstate, dw->dtsw_current,
1497 	    &statep, NULL, 0) == -1) {
1498 		if (dw->dtsw_current >= dw->dtsw_max)
1499 			return (WALK_DONE);
1500 
1501 		dw->dtsw_current++;
1502 	}
1503 
1504 	if (mdb_vread(&state, sizeof (state), statep) == -1) {
1505 		mdb_warn("couldn't read dtrace_state_t at %p", statep);
1506 		return (WALK_NEXT);
1507 	}
1508 
1509 	rval = wsp->walk_callback(statep, &state, wsp->walk_cbdata);
1510 	dw->dtsw_current++;
1511 
1512 	return (rval);
1513 }
1514 
1515 typedef struct dtrace_state_data {
1516 	int dtsd_major;
1517 	uintptr_t dtsd_proc;
1518 	uintptr_t dtsd_softstate;
1519 	uintptr_t dtsd_state;
1520 } dtrace_state_data_t;
1521 
1522 static int
1523 dtrace_state_file(uintptr_t addr, struct file *f, dtrace_state_data_t *data)
1524 {
1525 	vnode_t vnode;
1526 	proc_t proc;
1527 	minor_t minor;
1528 	uintptr_t statep;
1529 
1530 	if (mdb_vread(&vnode, sizeof (vnode), (uintptr_t)f->f_vnode) == -1) {
1531 		mdb_warn("couldn't read vnode at %p", (uintptr_t)f->f_vnode);
1532 		return (WALK_NEXT);
1533 	}
1534 
1535 	if (getmajor(vnode.v_rdev) != data->dtsd_major)
1536 		return (WALK_NEXT);
1537 
1538 	minor = getminor(vnode.v_rdev);
1539 
1540 	if (mdb_vread(&proc, sizeof (proc), data->dtsd_proc) == -1) {
1541 		mdb_warn("failed to read proc at %p", data->dtsd_proc);
1542 		return (WALK_NEXT);
1543 	}
1544 
1545 	if (mdb_get_soft_state_byaddr(data->dtsd_softstate, minor,
1546 	    &statep, NULL, 0) == -1) {
1547 		mdb_warn("failed to read softstate for minor %d", minor);
1548 		return (WALK_NEXT);
1549 	}
1550 
1551 	if (statep != data->dtsd_state)
1552 		return (WALK_NEXT);
1553 
1554 	mdb_printf("%?p %5d %?p %-*s %?p\n", statep, minor,
1555 	    data->dtsd_proc, MAXCOMLEN, proc.p_user.u_comm, addr);
1556 
1557 	return (WALK_NEXT);
1558 }
1559 
1560 /*ARGSUSED*/
1561 static int
1562 dtrace_state_proc(uintptr_t addr, void *ignored, dtrace_state_data_t *data)
1563 {
1564 	data->dtsd_proc = addr;
1565 
1566 	if (mdb_pwalk("file",
1567 	    (mdb_walk_cb_t)dtrace_state_file, data, addr) == -1) {
1568 		mdb_warn("couldn't walk 'file' for proc %p", addr);
1569 		return (WALK_ERR);
1570 	}
1571 
1572 	return (WALK_NEXT);
1573 }
1574 
1575 void
1576 dtrace_state_help(void)
1577 {
1578 	mdb_printf("Given a dtrace_state_t structure, displays all "
1579 	    /*CSTYLED*/
1580 	    "consumers, or \"<anonymous>\"\nif the consumer is anonymous.  If "
1581 	    "no state structure is provided, iterates\nover all state "
1582 	    "structures.\n\n"
1583 	    "Addresses in ADDR column may be provided to ::dtrace to obtain\n"
1584 	    "dtrace(1M)-like output for in-kernel DTrace data.\n");
1585 }
1586 
1587 int
1588 dtrace_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1589 {
1590 	uintptr_t devi;
1591 	struct dev_info info;
1592 	dtrace_state_data_t data;
1593 	dtrace_anon_t anon;
1594 	dtrace_state_t state;
1595 
1596 	if (!(flags & DCMD_ADDRSPEC)) {
1597 		if (mdb_walk_dcmd("dtrace_state",
1598 		    "dtrace_state", argc, argv) == -1) {
1599 			mdb_warn("can't walk dtrace_state");
1600 			return (DCMD_ERR);
1601 		}
1602 		return (DCMD_OK);
1603 	}
1604 
1605 	if (DCMD_HDRSPEC(flags)) {
1606 		mdb_printf("%?s %5s %?s %-*s %?s\n", "ADDR", "MINOR", "PROC",
1607 		    MAXCOMLEN, "NAME", "FILE");
1608 	}
1609 
1610 	/*
1611 	 * First determine if this is anonymous state.
1612 	 */
1613 	if (mdb_readvar(&anon, "dtrace_anon") == -1) {
1614 		mdb_warn("failed to read 'dtrace_anon'");
1615 		return (DCMD_ERR);
1616 	}
1617 
1618 	if ((uintptr_t)anon.dta_state == addr) {
1619 		if (mdb_vread(&state, sizeof (state), addr) == -1) {
1620 			mdb_warn("failed to read anon at %p", addr);
1621 			return (DCMD_ERR);
1622 		}
1623 
1624 		mdb_printf("%?p %5d %?s %-*s %?s\n", addr,
1625 		    getminor(state.dts_dev), "-", MAXCOMLEN,
1626 		    "<anonymous>", "-");
1627 
1628 		return (DCMD_OK);
1629 	}
1630 
1631 	if (mdb_readvar(&devi, "dtrace_devi") == -1) {
1632 		mdb_warn("failed to read 'dtrace_devi'");
1633 		return (DCMD_ERR);
1634 	}
1635 
1636 	if (mdb_vread(&info, sizeof (struct dev_info), devi) == -1) {
1637 		mdb_warn("failed to read 'dev_info'");
1638 		return (DCMD_ERR);
1639 	}
1640 
1641 	data.dtsd_major = info.devi_major;
1642 
1643 	if (mdb_readvar(&data.dtsd_softstate, "dtrace_softstate") == -1) {
1644 		mdb_warn("failed to read 'dtrace_softstate'");
1645 		return (DCMD_ERR);
1646 	}
1647 
1648 	data.dtsd_state = addr;
1649 
1650 	/*
1651 	 * Walk through all processes and all open files looking for this
1652 	 * state.  It must be open somewhere...
1653 	 */
1654 	if (mdb_walk("proc", (mdb_walk_cb_t)dtrace_state_proc, &data) == -1) {
1655 		mdb_warn("couldn't walk 'proc'");
1656 		return (DCMD_ERR);
1657 	}
1658 
1659 	return (DCMD_OK);
1660 }
1661 
1662 typedef struct dtrace_aggkey_data {
1663 	uintptr_t *dtakd_hash;
1664 	uintptr_t dtakd_hashsize;
1665 	uintptr_t dtakd_next;
1666 	uintptr_t dtakd_ndx;
1667 } dtrace_aggkey_data_t;
1668 
1669 int
1670 dtrace_aggkey_init(mdb_walk_state_t *wsp)
1671 {
1672 	dtrace_buffer_t buf;
1673 	uintptr_t addr;
1674 	dtrace_aggbuffer_t agb;
1675 	dtrace_aggkey_data_t *data;
1676 	size_t hsize;
1677 
1678 	if ((addr = wsp->walk_addr) == NULL) {
1679 		mdb_warn("dtrace_aggkey walk needs aggregation buffer\n");
1680 		return (WALK_ERR);
1681 	}
1682 
1683 	if (mdb_vread(&buf, sizeof (buf), addr) == -1) {
1684 		mdb_warn("failed to read aggregation buffer at %p", addr);
1685 		return (WALK_ERR);
1686 	}
1687 
1688 	addr = (uintptr_t)buf.dtb_tomax +
1689 	    buf.dtb_size - sizeof (dtrace_aggbuffer_t);
1690 
1691 	if (mdb_vread(&agb, sizeof (agb), addr) == -1) {
1692 		mdb_warn("failed to read dtrace_aggbuffer_t at %p", addr);
1693 		return (WALK_ERR);
1694 	}
1695 
1696 	data = mdb_zalloc(sizeof (dtrace_aggkey_data_t), UM_SLEEP);
1697 
1698 	data->dtakd_hashsize = agb.dtagb_hashsize;
1699 	hsize = agb.dtagb_hashsize * sizeof (dtrace_aggkey_t *);
1700 	data->dtakd_hash = mdb_alloc(hsize, UM_SLEEP);
1701 
1702 	if (mdb_vread(data->dtakd_hash, hsize,
1703 	    (uintptr_t)agb.dtagb_hash) == -1) {
1704 		mdb_warn("failed to read hash at %p",
1705 		    (uintptr_t)agb.dtagb_hash);
1706 		mdb_free(data->dtakd_hash, hsize);
1707 		mdb_free(data, sizeof (dtrace_aggkey_data_t));
1708 		return (WALK_ERR);
1709 	}
1710 
1711 	wsp->walk_data = data;
1712 	return (WALK_NEXT);
1713 }
1714 
1715 int
1716 dtrace_aggkey_step(mdb_walk_state_t *wsp)
1717 {
1718 	dtrace_aggkey_data_t *data = wsp->walk_data;
1719 	dtrace_aggkey_t key;
1720 	uintptr_t addr;
1721 
1722 	while ((addr = data->dtakd_next) == NULL) {
1723 		if (data->dtakd_ndx == data->dtakd_hashsize)
1724 			return (WALK_DONE);
1725 
1726 		data->dtakd_next = data->dtakd_hash[data->dtakd_ndx++];
1727 	}
1728 
1729 	if (mdb_vread(&key, sizeof (key), addr) == -1) {
1730 		mdb_warn("failed to read dtrace_aggkey_t at %p", addr);
1731 		return (WALK_ERR);
1732 	}
1733 
1734 	data->dtakd_next = (uintptr_t)key.dtak_next;
1735 
1736 	return (wsp->walk_callback(addr, &key, wsp->walk_cbdata));
1737 }
1738 
1739 void
1740 dtrace_aggkey_fini(mdb_walk_state_t *wsp)
1741 {
1742 	dtrace_aggkey_data_t *data = wsp->walk_data;
1743 	size_t hsize;
1744 
1745 	hsize = data->dtakd_hashsize * sizeof (dtrace_aggkey_t *);
1746 	mdb_free(data->dtakd_hash, hsize);
1747 	mdb_free(data, sizeof (dtrace_aggkey_data_t));
1748 }
1749 
1750 typedef struct dtrace_dynvar_data {
1751 	dtrace_dynhash_t *dtdvd_hash;
1752 	uintptr_t dtdvd_hashsize;
1753 	uintptr_t dtdvd_next;
1754 	uintptr_t dtdvd_ndx;
1755 } dtrace_dynvar_data_t;
1756 
1757 int
1758 dtrace_dynvar_init(mdb_walk_state_t *wsp)
1759 {
1760 	uintptr_t addr;
1761 	dtrace_dstate_t dstate;
1762 	dtrace_dynvar_data_t *data;
1763 	size_t hsize;
1764 
1765 	if ((addr = wsp->walk_addr) == NULL) {
1766 		mdb_warn("dtrace_dynvar walk needs dtrace_dstate_t\n");
1767 		return (WALK_ERR);
1768 	}
1769 
1770 	if (mdb_vread(&dstate, sizeof (dstate), addr) == -1) {
1771 		mdb_warn("failed to read dynamic state at %p", addr);
1772 		return (WALK_ERR);
1773 	}
1774 
1775 	data = mdb_zalloc(sizeof (dtrace_dynvar_data_t), UM_SLEEP);
1776 
1777 	data->dtdvd_hashsize = dstate.dtds_hashsize;
1778 	hsize = dstate.dtds_hashsize * sizeof (dtrace_dynhash_t);
1779 	data->dtdvd_hash = mdb_alloc(hsize, UM_SLEEP);
1780 
1781 	if (mdb_vread(data->dtdvd_hash, hsize,
1782 	    (uintptr_t)dstate.dtds_hash) == -1) {
1783 		mdb_warn("failed to read hash at %p",
1784 		    (uintptr_t)dstate.dtds_hash);
1785 		mdb_free(data->dtdvd_hash, hsize);
1786 		mdb_free(data, sizeof (dtrace_dynvar_data_t));
1787 		return (WALK_ERR);
1788 	}
1789 
1790 	wsp->walk_data = data;
1791 	return (WALK_NEXT);
1792 }
1793 
1794 int
1795 dtrace_dynvar_step(mdb_walk_state_t *wsp)
1796 {
1797 	dtrace_dynvar_data_t *data = wsp->walk_data;
1798 	dtrace_dynvar_t dynvar, *dvar;
1799 	size_t dvarsize;
1800 	uintptr_t addr;
1801 	int nkeys;
1802 
1803 	while ((addr = data->dtdvd_next) == NULL) {
1804 		if (data->dtdvd_ndx == data->dtdvd_hashsize)
1805 			return (WALK_DONE);
1806 
1807 		data->dtdvd_next =
1808 		    (uintptr_t)data->dtdvd_hash[data->dtdvd_ndx++].dtdh_chain;
1809 	}
1810 
1811 	if (mdb_vread(&dynvar, sizeof (dynvar), addr) == -1) {
1812 		mdb_warn("failed to read dtrace_dynvar_t at %p", addr);
1813 		return (WALK_ERR);
1814 	}
1815 
1816 	/*
1817 	 * Now we need to allocate the correct size.
1818 	 */
1819 	nkeys = dynvar.dtdv_tuple.dtt_nkeys;
1820 	dvarsize = (uintptr_t)&dynvar.dtdv_tuple.dtt_key[nkeys] -
1821 	    (uintptr_t)&dynvar;
1822 
1823 	dvar = alloca(dvarsize);
1824 
1825 	if (mdb_vread(dvar, dvarsize, addr) == -1) {
1826 		mdb_warn("failed to read dtrace_dynvar_t at %p", addr);
1827 		return (WALK_ERR);
1828 	}
1829 
1830 	data->dtdvd_next = (uintptr_t)dynvar.dtdv_next;
1831 
1832 	return (wsp->walk_callback(addr, dvar, wsp->walk_cbdata));
1833 }
1834 
1835 void
1836 dtrace_dynvar_fini(mdb_walk_state_t *wsp)
1837 {
1838 	dtrace_dynvar_data_t *data = wsp->walk_data;
1839 	size_t hsize;
1840 
1841 	hsize = data->dtdvd_hashsize * sizeof (dtrace_dynvar_t *);
1842 	mdb_free(data->dtdvd_hash, hsize);
1843 	mdb_free(data, sizeof (dtrace_dynvar_data_t));
1844 }
1845 
1846 typedef struct dtrace_hashstat_data {
1847 	size_t *dthsd_counts;
1848 	size_t dthsd_hashsize;
1849 	char *dthsd_data;
1850 	size_t dthsd_size;
1851 	int dthsd_header;
1852 } dtrace_hashstat_data_t;
1853 
1854 typedef void (*dtrace_hashstat_func_t)(dtrace_hashstat_data_t *);
1855 
1856 static void
1857 dtrace_hashstat_additive(dtrace_hashstat_data_t *data)
1858 {
1859 	int i;
1860 	int hval = 0;
1861 
1862 	for (i = 0; i < data->dthsd_size; i++)
1863 		hval += data->dthsd_data[i];
1864 
1865 	data->dthsd_counts[hval % data->dthsd_hashsize]++;
1866 }
1867 
1868 static void
1869 dtrace_hashstat_shifty(dtrace_hashstat_data_t *data)
1870 {
1871 	uint64_t hval = 0;
1872 	int i;
1873 
1874 	if (data->dthsd_size < sizeof (uint64_t)) {
1875 		dtrace_hashstat_additive(data);
1876 		return;
1877 	}
1878 
1879 	for (i = 0; i < data->dthsd_size; i += sizeof (uint64_t)) {
1880 		/* LINTED - alignment */
1881 		uint64_t val = *((uint64_t *)&data->dthsd_data[i]);
1882 
1883 		hval += (val & ((1 << NBBY) - 1)) +
1884 		    ((val >> NBBY) & ((1 << NBBY) - 1)) +
1885 		    ((val >> (NBBY << 1)) & ((1 << NBBY) - 1)) +
1886 		    ((val >> (NBBY << 2)) & ((1 << NBBY) - 1)) +
1887 		    (val & USHRT_MAX) + (val >> (NBBY << 1) & USHRT_MAX);
1888 	}
1889 
1890 	data->dthsd_counts[hval % data->dthsd_hashsize]++;
1891 }
1892 
1893 static void
1894 dtrace_hashstat_knuth(dtrace_hashstat_data_t *data)
1895 {
1896 	int i;
1897 	int hval = data->dthsd_size;
1898 
1899 	for (i = 0; i < data->dthsd_size; i++)
1900 		hval = (hval << 4) ^ (hval >> 28) ^ data->dthsd_data[i];
1901 
1902 	data->dthsd_counts[hval % data->dthsd_hashsize]++;
1903 }
1904 
1905 static void
1906 dtrace_hashstat_oneatatime(dtrace_hashstat_data_t *data)
1907 {
1908 	int i;
1909 	uint32_t hval = 0;
1910 
1911 	for (i = 0; i < data->dthsd_size; i++) {
1912 		hval += data->dthsd_data[i];
1913 		hval += (hval << 10);
1914 		hval ^= (hval >> 6);
1915 	}
1916 
1917 	hval += (hval << 3);
1918 	hval ^= (hval >> 11);
1919 	hval += (hval << 15);
1920 
1921 	data->dthsd_counts[hval % data->dthsd_hashsize]++;
1922 }
1923 
1924 static void
1925 dtrace_hashstat_fnv(dtrace_hashstat_data_t *data)
1926 {
1927 	static const uint32_t prime = 0x01000193;
1928 	uint32_t hval = 0;
1929 	int i;
1930 
1931 	for (i = 0; i < data->dthsd_size; i++) {
1932 		hval *= prime;
1933 		hval ^= data->dthsd_data[i];
1934 	}
1935 
1936 	data->dthsd_counts[hval % data->dthsd_hashsize]++;
1937 }
1938 
1939 static void
1940 dtrace_hashstat_stats(char *name, dtrace_hashstat_data_t *data)
1941 {
1942 	size_t nz = 0, i;
1943 	int longest = 0;
1944 	size_t ttl = 0;
1945 	double sum = 0.0;
1946 	double avg;
1947 	uint_t util, stddev;
1948 
1949 	if (!data->dthsd_header) {
1950 		mdb_printf("%15s %11s %11s %11s %11s %11s\n", "NAME",
1951 		    "HASHSIZE", "%UTIL", "LONGEST", "AVERAGE", "STDDEV");
1952 		data->dthsd_header = 1;
1953 	}
1954 
1955 	for (i = 0; i < data->dthsd_hashsize; i++) {
1956 		if (data->dthsd_counts[i] != 0) {
1957 			nz++;
1958 
1959 			if (data->dthsd_counts[i] > longest)
1960 				longest = data->dthsd_counts[i];
1961 
1962 			ttl += data->dthsd_counts[i];
1963 		}
1964 	}
1965 
1966 	if (nz == 0) {
1967 		mdb_printf("%15s %11d %11s %11s %11s %11s\n", name,
1968 		    data->dthsd_hashsize, "-", "-", "-", "-");
1969 		return;
1970 	}
1971 
1972 	avg = (double)ttl / (double)nz;
1973 
1974 	for (i = 0; i < data->dthsd_hashsize; i++) {
1975 		double delta = (double)data->dthsd_counts[i] - avg;
1976 
1977 		if (data->dthsd_counts[i] == 0)
1978 			continue;
1979 
1980 		sum += delta * delta;
1981 	}
1982 
1983 	util = (nz * 1000) / data->dthsd_hashsize;
1984 	stddev = (uint_t)sqrt(sum / (double)nz) * 10;
1985 
1986 	mdb_printf("%15s %11d %9u.%1u %11d %11d %9u.%1u\n", name,
1987 	    data->dthsd_hashsize, util / 10, util % 10, longest, ttl / nz,
1988 	    stddev / 10, stddev % 10);
1989 }
1990 
1991 static struct dtrace_hashstat {
1992 	char *dths_name;
1993 	dtrace_hashstat_func_t dths_func;
1994 } _dtrace_hashstat[] = {
1995 	{ "<actual>", NULL },
1996 	{ "additive", dtrace_hashstat_additive },
1997 	{ "shifty", dtrace_hashstat_shifty },
1998 	{ "knuth", dtrace_hashstat_knuth },
1999 	{ "one-at-a-time", dtrace_hashstat_oneatatime },
2000 	{ "fnv", dtrace_hashstat_fnv },
2001 	{ NULL, 0 }
2002 };
2003 
2004 typedef struct dtrace_aggstat_data {
2005 	dtrace_hashstat_data_t dtagsd_hash;
2006 	dtrace_hashstat_func_t dtagsd_func;
2007 } dtrace_aggstat_data_t;
2008 
2009 static int
2010 dtrace_aggstat_walk(uintptr_t addr, dtrace_aggkey_t *key,
2011     dtrace_aggstat_data_t *data)
2012 {
2013 	dtrace_hashstat_data_t *hdata = &data->dtagsd_hash;
2014 	size_t size;
2015 
2016 	if (data->dtagsd_func == NULL) {
2017 		size_t bucket = key->dtak_hashval % hdata->dthsd_hashsize;
2018 
2019 		hdata->dthsd_counts[bucket]++;
2020 		return (WALK_NEXT);
2021 	}
2022 
2023 	/*
2024 	 * We need to read the data.
2025 	 */
2026 	size = key->dtak_size - sizeof (dtrace_aggid_t);
2027 	addr = (uintptr_t)key->dtak_data + sizeof (dtrace_aggid_t);
2028 	hdata->dthsd_data = alloca(size);
2029 	hdata->dthsd_size = size;
2030 
2031 	if (mdb_vread(hdata->dthsd_data, size, addr) == -1) {
2032 		mdb_warn("couldn't read data at %p", addr);
2033 		return (WALK_ERR);
2034 	}
2035 
2036 	data->dtagsd_func(hdata);
2037 
2038 	return (WALK_NEXT);
2039 }
2040 
2041 /*ARGSUSED*/
2042 int
2043 dtrace_aggstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2044 {
2045 	dtrace_buffer_t buf;
2046 	uintptr_t aaddr;
2047 	dtrace_aggbuffer_t agb;
2048 	size_t hsize, i, actual, prime, evenpow;
2049 	dtrace_aggstat_data_t data;
2050 	dtrace_hashstat_data_t *hdata = &data.dtagsd_hash;
2051 
2052 	bzero(&data, sizeof (data));
2053 
2054 	if (!(flags & DCMD_ADDRSPEC))
2055 		return (DCMD_USAGE);
2056 
2057 	if (mdb_vread(&buf, sizeof (buf), addr) == -1) {
2058 		mdb_warn("failed to read aggregation buffer at %p", addr);
2059 		return (DCMD_ERR);
2060 	}
2061 
2062 	aaddr = (uintptr_t)buf.dtb_tomax +
2063 	    buf.dtb_size - sizeof (dtrace_aggbuffer_t);
2064 
2065 	if (mdb_vread(&agb, sizeof (agb), aaddr) == -1) {
2066 		mdb_warn("failed to read dtrace_aggbuffer_t at %p", aaddr);
2067 		return (DCMD_ERR);
2068 	}
2069 
2070 	hsize = (actual = agb.dtagb_hashsize) * sizeof (size_t);
2071 	hdata->dthsd_counts = mdb_alloc(hsize, UM_SLEEP | UM_GC);
2072 
2073 	/*
2074 	 * Now pick the largest prime smaller than the hash size.  (If the
2075 	 * existing size is prime, we'll pick a smaller prime just for the
2076 	 * hell of it.)
2077 	 */
2078 	for (prime = agb.dtagb_hashsize - 1; prime > 7; prime--) {
2079 		size_t limit = prime / 7;
2080 
2081 		for (i = 2; i < limit; i++) {
2082 			if ((prime % i) == 0)
2083 				break;
2084 		}
2085 
2086 		if (i == limit)
2087 			break;
2088 	}
2089 
2090 	/*
2091 	 * And now we want to pick the largest power of two smaller than the
2092 	 * hashsize.
2093 	 */
2094 	for (i = 0; (1 << i) < agb.dtagb_hashsize; i++)
2095 		continue;
2096 
2097 	evenpow = (1 << (i - 1));
2098 
2099 	for (i = 0; _dtrace_hashstat[i].dths_name != NULL; i++) {
2100 		data.dtagsd_func = _dtrace_hashstat[i].dths_func;
2101 
2102 		hdata->dthsd_hashsize = actual;
2103 		hsize = hdata->dthsd_hashsize * sizeof (size_t);
2104 		bzero(hdata->dthsd_counts, hsize);
2105 
2106 		if (mdb_pwalk("dtrace_aggkey",
2107 		    (mdb_walk_cb_t)dtrace_aggstat_walk, &data, addr) == -1) {
2108 			mdb_warn("failed to walk dtrace_aggkey at %p", addr);
2109 			return (DCMD_ERR);
2110 		}
2111 
2112 		dtrace_hashstat_stats(_dtrace_hashstat[i].dths_name, hdata);
2113 
2114 		/*
2115 		 * If we were just printing the actual value, we won't try
2116 		 * any of the sizing experiments.
2117 		 */
2118 		if (data.dtagsd_func == NULL)
2119 			continue;
2120 
2121 		hdata->dthsd_hashsize = prime;
2122 		hsize = hdata->dthsd_hashsize * sizeof (size_t);
2123 		bzero(hdata->dthsd_counts, hsize);
2124 
2125 		if (mdb_pwalk("dtrace_aggkey",
2126 		    (mdb_walk_cb_t)dtrace_aggstat_walk, &data, addr) == -1) {
2127 			mdb_warn("failed to walk dtrace_aggkey at %p", addr);
2128 			return (DCMD_ERR);
2129 		}
2130 
2131 		dtrace_hashstat_stats(_dtrace_hashstat[i].dths_name, hdata);
2132 
2133 		hdata->dthsd_hashsize = evenpow;
2134 		hsize = hdata->dthsd_hashsize * sizeof (size_t);
2135 		bzero(hdata->dthsd_counts, hsize);
2136 
2137 		if (mdb_pwalk("dtrace_aggkey",
2138 		    (mdb_walk_cb_t)dtrace_aggstat_walk, &data, addr) == -1) {
2139 			mdb_warn("failed to walk dtrace_aggkey at %p", addr);
2140 			return (DCMD_ERR);
2141 		}
2142 
2143 		dtrace_hashstat_stats(_dtrace_hashstat[i].dths_name, hdata);
2144 	}
2145 
2146 	return (DCMD_OK);
2147 }
2148 
2149 /*ARGSUSED*/
2150 static int
2151 dtrace_dynstat_walk(uintptr_t addr, dtrace_dynvar_t *dynvar,
2152     dtrace_aggstat_data_t *data)
2153 {
2154 	dtrace_hashstat_data_t *hdata = &data->dtagsd_hash;
2155 	dtrace_tuple_t *tuple = &dynvar->dtdv_tuple;
2156 	dtrace_key_t *key = tuple->dtt_key;
2157 	size_t size = 0, offs = 0;
2158 	int i, nkeys = tuple->dtt_nkeys;
2159 	char *buf;
2160 
2161 	if (data->dtagsd_func == NULL) {
2162 		size_t bucket = dynvar->dtdv_hashval % hdata->dthsd_hashsize;
2163 
2164 		hdata->dthsd_counts[bucket]++;
2165 		return (WALK_NEXT);
2166 	}
2167 
2168 	/*
2169 	 * We want to hand the hashing algorithm a contiguous buffer.  First
2170 	 * run through the tuple and determine the size.
2171 	 */
2172 	for (i = 0; i < nkeys; i++) {
2173 		if (key[i].dttk_size == 0) {
2174 			size += sizeof (uint64_t);
2175 		} else {
2176 			size += key[i].dttk_size;
2177 		}
2178 	}
2179 
2180 	buf = alloca(size);
2181 
2182 	/*
2183 	 * Now go back through the tuple and copy the data into the buffer.
2184 	 */
2185 	for (i = 0; i < nkeys; i++) {
2186 		if (key[i].dttk_size == 0) {
2187 			bcopy(&key[i].dttk_value, &buf[offs],
2188 			    sizeof (uint64_t));
2189 			offs += sizeof (uint64_t);
2190 		} else {
2191 			if (mdb_vread(&buf[offs], key[i].dttk_size,
2192 			    key[i].dttk_value) == -1) {
2193 				mdb_warn("couldn't read tuple data at %p",
2194 				    key[i].dttk_value);
2195 				return (WALK_ERR);
2196 			}
2197 
2198 			offs += key[i].dttk_size;
2199 		}
2200 	}
2201 
2202 	hdata->dthsd_data = buf;
2203 	hdata->dthsd_size = size;
2204 
2205 	data->dtagsd_func(hdata);
2206 
2207 	return (WALK_NEXT);
2208 }
2209 
2210 /*ARGSUSED*/
2211 int
2212 dtrace_dynstat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2213 {
2214 	dtrace_dstate_t dstate;
2215 	size_t hsize, i, actual, prime;
2216 	dtrace_aggstat_data_t data;
2217 	dtrace_hashstat_data_t *hdata = &data.dtagsd_hash;
2218 
2219 	bzero(&data, sizeof (data));
2220 
2221 	if (!(flags & DCMD_ADDRSPEC))
2222 		return (DCMD_USAGE);
2223 
2224 	if (mdb_vread(&dstate, sizeof (dstate), addr) == -1) {
2225 		mdb_warn("failed to read dynamic variable state at %p", addr);
2226 		return (DCMD_ERR);
2227 	}
2228 
2229 	hsize = (actual = dstate.dtds_hashsize) * sizeof (size_t);
2230 	hdata->dthsd_counts = mdb_alloc(hsize, UM_SLEEP | UM_GC);
2231 
2232 	/*
2233 	 * Now pick the largest prime smaller than the hash size.  (If the
2234 	 * existing size is prime, we'll pick a smaller prime just for the
2235 	 * hell of it.)
2236 	 */
2237 	for (prime = dstate.dtds_hashsize - 1; prime > 7; prime--) {
2238 		size_t limit = prime / 7;
2239 
2240 		for (i = 2; i < limit; i++) {
2241 			if ((prime % i) == 0)
2242 				break;
2243 		}
2244 
2245 		if (i == limit)
2246 			break;
2247 	}
2248 
2249 	for (i = 0; _dtrace_hashstat[i].dths_name != NULL; i++) {
2250 		data.dtagsd_func = _dtrace_hashstat[i].dths_func;
2251 
2252 		hdata->dthsd_hashsize = actual;
2253 		hsize = hdata->dthsd_hashsize * sizeof (size_t);
2254 		bzero(hdata->dthsd_counts, hsize);
2255 
2256 		if (mdb_pwalk("dtrace_dynvar",
2257 		    (mdb_walk_cb_t)dtrace_dynstat_walk, &data, addr) == -1) {
2258 			mdb_warn("failed to walk dtrace_dynvar at %p", addr);
2259 			return (DCMD_ERR);
2260 		}
2261 
2262 		dtrace_hashstat_stats(_dtrace_hashstat[i].dths_name, hdata);
2263 
2264 		/*
2265 		 * If we were just printing the actual value, we won't try
2266 		 * any of the sizing experiments.
2267 		 */
2268 		if (data.dtagsd_func == NULL)
2269 			continue;
2270 
2271 		hdata->dthsd_hashsize = prime;
2272 		hsize = hdata->dthsd_hashsize * sizeof (size_t);
2273 		bzero(hdata->dthsd_counts, hsize);
2274 
2275 		if (mdb_pwalk("dtrace_dynvar",
2276 		    (mdb_walk_cb_t)dtrace_dynstat_walk, &data, addr) == -1) {
2277 			mdb_warn("failed to walk dtrace_aggkey at %p", addr);
2278 			return (DCMD_ERR);
2279 		}
2280 
2281 		dtrace_hashstat_stats(_dtrace_hashstat[i].dths_name, hdata);
2282 	}
2283 
2284 	return (DCMD_OK);
2285 }
2286 
2287 typedef struct dtrace_ecb_walk {
2288 	dtrace_ecb_t **dtew_ecbs;
2289 	int dtew_necbs;
2290 	int dtew_curecb;
2291 } dtrace_ecb_walk_t;
2292 
2293 static int
2294 dtrace_ecb_init(mdb_walk_state_t *wsp)
2295 {
2296 	uintptr_t addr;
2297 	dtrace_state_t state;
2298 	dtrace_ecb_walk_t *ecbwp;
2299 
2300 	if ((addr = wsp->walk_addr) == NULL) {
2301 		mdb_warn("dtrace_ecb walk needs dtrace_state_t\n");
2302 		return (WALK_ERR);
2303 	}
2304 
2305 	if (mdb_vread(&state, sizeof (state), addr) == -1) {
2306 		mdb_warn("failed to read dtrace state pointer at %p", addr);
2307 		return (WALK_ERR);
2308 	}
2309 
2310 	ecbwp = mdb_zalloc(sizeof (dtrace_ecb_walk_t), UM_SLEEP | UM_GC);
2311 
2312 	ecbwp->dtew_ecbs = state.dts_ecbs;
2313 	ecbwp->dtew_necbs = state.dts_necbs;
2314 	ecbwp->dtew_curecb = 0;
2315 
2316 	wsp->walk_data = ecbwp;
2317 
2318 	return (WALK_NEXT);
2319 }
2320 
2321 static int
2322 dtrace_ecb_step(mdb_walk_state_t *wsp)
2323 {
2324 	uintptr_t ecbp, addr;
2325 	dtrace_ecb_walk_t *ecbwp = wsp->walk_data;
2326 
2327 	addr = (uintptr_t)ecbwp->dtew_ecbs +
2328 	    ecbwp->dtew_curecb * sizeof (dtrace_ecb_t *);
2329 
2330 	if (ecbwp->dtew_curecb++ == ecbwp->dtew_necbs)
2331 		return (WALK_DONE);
2332 
2333 	if (mdb_vread(&ecbp, sizeof (addr), addr) == -1) {
2334 		mdb_warn("failed to read ecb at entry %d\n",
2335 		    ecbwp->dtew_curecb);
2336 		return (WALK_ERR);
2337 	}
2338 
2339 	if (ecbp == NULL)
2340 		return (WALK_NEXT);
2341 
2342 	return (wsp->walk_callback(ecbp, NULL, wsp->walk_cbdata));
2343 }
2344 
2345 const mdb_dcmd_t kernel_dcmds[] = {
2346 	{ "id2probe", ":", "translate a dtrace_id_t to a dtrace_probe_t",
2347 	    id2probe },
2348 	{ "dtrace", ":[-c cpu]", "print dtrace(1M)-like output",
2349 	    dtrace, dtrace_help },
2350 	{ "dtrace_errhash", ":", "print DTrace error hash", dtrace_errhash },
2351 	{ "dtrace_helptrace", ":", "print DTrace helper trace",
2352 	    dtrace_helptrace },
2353 	{ "dtrace_state", ":", "print active DTrace consumers", dtrace_state,
2354 	    dtrace_state_help },
2355 	{ "dtrace_aggstat", ":",
2356 	    "print DTrace aggregation hash statistics", dtrace_aggstat },
2357 	{ "dtrace_dynstat", ":",
2358 	    "print DTrace dynamic variable hash statistics", dtrace_dynstat },
2359 	{ NULL }
2360 };
2361 
2362 const mdb_walker_t kernel_walkers[] = {
2363 	{ "dtrace_errhash", "walk hash of DTrace error messasges",
2364 		dtrace_errhash_init, dtrace_errhash_step },
2365 	{ "dtrace_helptrace", "walk DTrace helper trace entries",
2366 		dtrace_helptrace_init, dtrace_helptrace_step },
2367 	{ "dtrace_state", "walk DTrace per-consumer softstate",
2368 		dtrace_state_init, dtrace_state_step },
2369 	{ "dtrace_aggkey", "walk DTrace aggregation keys",
2370 		dtrace_aggkey_init, dtrace_aggkey_step, dtrace_aggkey_fini },
2371 	{ "dtrace_dynvar", "walk DTrace dynamic variables",
2372 		dtrace_dynvar_init, dtrace_dynvar_step, dtrace_dynvar_fini },
2373 	{ "dtrace_ecb", "walk a DTrace consumer's enabling control blocks",
2374 		dtrace_ecb_init, dtrace_ecb_step },
2375 	{ NULL }
2376 };
2377