xref: /titanic_52/usr/src/lib/libdtrace/common/dt_consume.c (revision bdfc6d18da790deeec2e0eb09c625902defe2498)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <stdlib.h>
30 #include <strings.h>
31 #include <errno.h>
32 #include <unistd.h>
33 #include <limits.h>
34 #include <assert.h>
35 #include <ctype.h>
36 #include <alloca.h>
37 
38 #include <dt_impl.h>
39 
40 static int
41 dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
42     dtrace_bufdesc_t *buf, size_t offs)
43 {
44 	dtrace_probedesc_t *pd = data->dtpda_pdesc, *npd;
45 	dtrace_eprobedesc_t *epd = data->dtpda_edesc, *nepd;
46 	char *p = pd->dtpd_provider, *n = pd->dtpd_name;
47 	dtrace_flowkind_t flow = DTRACEFLOW_NONE;
48 	const char *str = NULL;
49 	static const char *e_str[2] = { " -> ", " => " };
50 	static const char *r_str[2] = { " <- ", " <= " };
51 	dtrace_epid_t next, id = epd->dtepd_epid;
52 	int rval;
53 
54 	if (strcmp(n, "entry") == 0) {
55 		flow = DTRACEFLOW_ENTRY;
56 		str = e_str[strcmp(p, "syscall") == 0];
57 	} else if (strcmp(n, "return") == 0 ||
58 	    strcmp(n, "exit") == 0) {
59 		flow = DTRACEFLOW_RETURN;
60 		str = r_str[strcmp(p, "syscall") == 0];
61 	}
62 
63 	/*
64 	 * If we're going to indent this, we need to check the ID of our last
65 	 * call.  If we're looking at the same probe ID but a different EPID,
66 	 * we _don't_ want to indent.  (Yes, there are some minor holes in
67 	 * this scheme -- it's a heuristic.)
68 	 */
69 	if (flow == DTRACEFLOW_ENTRY) {
70 		if ((last != DTRACE_EPIDNONE && id != last &&
71 		    pd->dtpd_id == dtp->dt_pdesc[last]->dtpd_id))
72 			flow = DTRACEFLOW_NONE;
73 	}
74 
75 	/*
76 	 * If we're going to unindent this, it's more difficult to see if
77 	 * we don't actually want to unindent it -- we need to look at the
78 	 * _next_ EPID.
79 	 */
80 	if (flow == DTRACEFLOW_RETURN) {
81 		offs += epd->dtepd_size;
82 
83 		do {
84 			if (offs >= buf->dtbd_size) {
85 				/*
86 				 * We're at the end -- maybe.  If the oldest
87 				 * record is non-zero, we need to wrap.
88 				 */
89 				if (buf->dtbd_oldest != 0) {
90 					offs = 0;
91 				} else {
92 					goto out;
93 				}
94 			}
95 
96 			next = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
97 
98 			if (next == DTRACE_EPIDNONE)
99 				offs += sizeof (id);
100 		} while (next == DTRACE_EPIDNONE);
101 
102 		if ((rval = dt_epid_lookup(dtp, next, &nepd, &npd)) != 0)
103 			return (rval);
104 
105 		if (next != id && npd->dtpd_id == pd->dtpd_id)
106 			flow = DTRACEFLOW_NONE;
107 	}
108 
109 out:
110 	if (flow == DTRACEFLOW_ENTRY || flow == DTRACEFLOW_RETURN) {
111 		data->dtpda_prefix = str;
112 	} else {
113 		data->dtpda_prefix = "| ";
114 	}
115 
116 	if (flow == DTRACEFLOW_RETURN && data->dtpda_indent > 0)
117 		data->dtpda_indent -= 2;
118 
119 	data->dtpda_flow = flow;
120 
121 	return (0);
122 }
123 
124 static int
125 dt_nullprobe()
126 {
127 	return (DTRACE_CONSUME_THIS);
128 }
129 
130 static int
131 dt_nullrec()
132 {
133 	return (DTRACE_CONSUME_NEXT);
134 }
135 
136 int
137 dt_print_quantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
138     size_t size, uint64_t normal)
139 {
140 	const uint64_t *data = addr;
141 	int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;
142 	uint64_t total_bin_count = 0;
143 
144 	if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
145 		return (dt_set_errno(dtp, EDT_DMISMATCH));
146 
147 	while (first_bin < DTRACE_QUANTIZE_NBUCKETS - 1 && data[first_bin] == 0)
148 		first_bin++;
149 
150 	if (first_bin > 0)
151 		first_bin--;
152 
153 	while (last_bin > 0 && data[last_bin] == 0)
154 		last_bin--;
155 
156 	if (last_bin < DTRACE_QUANTIZE_NBUCKETS - 1)
157 		last_bin++;
158 
159 	for (i = first_bin; i <= last_bin; i++)
160 		total_bin_count += data[i];
161 
162 	if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
163 	    "------------- Distribution -------------", "count") < 0)
164 		return (-1);
165 
166 	for (i = first_bin; i <= last_bin; i++) {
167 		float f = ((float)data[i] * 40.0) / (float)total_bin_count;
168 		uint_t depth = (uint_t)(f + 0.5);
169 
170 		if (dt_printf(dtp, fp, "%16lld |%s%s %-9llu\n",
171 		    (long long)DTRACE_QUANTIZE_BUCKETVAL(i),
172 		    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + 40 - depth,
173 		    "                                        " + depth,
174 		    (u_longlong_t)data[i] / normal) < 0)
175 			return (-1);
176 	}
177 
178 	return (0);
179 }
180 
181 int
182 dt_print_lquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
183     size_t size, uint64_t normal)
184 {
185 	const uint64_t *data = addr;
186 	int i, first_bin, last_bin, base;
187 	uint64_t arg, total_bin_count = 0;
188 	uint16_t step, levels;
189 
190 	if (size < sizeof (uint64_t))
191 		return (dt_set_errno(dtp, EDT_DMISMATCH));
192 
193 	arg = *data++;
194 	size -= sizeof (uint64_t);
195 
196 	base = DTRACE_LQUANTIZE_BASE(arg);
197 	step = DTRACE_LQUANTIZE_STEP(arg);
198 	levels = DTRACE_LQUANTIZE_LEVELS(arg);
199 
200 	first_bin = 0;
201 	last_bin = levels + 1;
202 
203 	if (size != sizeof (uint64_t) * (levels + 2))
204 		return (dt_set_errno(dtp, EDT_DMISMATCH));
205 
206 	while (first_bin < levels + 1 && data[first_bin] == 0)
207 		first_bin++;
208 
209 	if (first_bin > 0)
210 		first_bin--;
211 
212 	while (last_bin > 0 && data[last_bin] == 0)
213 		last_bin--;
214 
215 	if (last_bin < levels + 1)
216 		last_bin++;
217 
218 	for (i = first_bin; i <= last_bin; i++)
219 		total_bin_count += data[i];
220 
221 	if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
222 	    "------------- Distribution -------------", "count") < 0)
223 		return (-1);
224 
225 	for (i = first_bin; i <= last_bin; i++) {
226 		float f = ((float)data[i] * 40.0) / (float)total_bin_count;
227 		uint_t depth = (uint_t)(f + 0.5);
228 		char c[32];
229 		int err;
230 
231 		if (i == 0) {
232 			(void) snprintf(c, sizeof (c), "< %d",
233 			    base / (uint32_t)normal);
234 			err = dt_printf(dtp, fp, "%16s ", c);
235 		} else if (i == levels + 1) {
236 			(void) snprintf(c, sizeof (c), ">= %d",
237 			    base + (levels * step));
238 			err = dt_printf(dtp, fp, "%16s ", c);
239 		} else {
240 			err = dt_printf(dtp, fp, "%16d ",
241 			    base + (i - 1) * step);
242 		}
243 
244 		if (err < 0 || dt_printf(dtp, fp, "|%s%s %-9llu\n",
245 		    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + 40 - depth,
246 		    "                                        " + depth,
247 		    (u_longlong_t)data[i] / normal) < 0)
248 			return (-1);
249 	}
250 
251 	return (0);
252 }
253 
254 /*ARGSUSED*/
255 static int
256 dt_print_average(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
257     size_t size, uint64_t normal)
258 {
259 	/* LINTED - alignment */
260 	uint64_t *data = (uint64_t *)addr;
261 
262 	return (dt_printf(dtp, fp, " %16lld", data[0] ?
263 	    (long long)(data[1] / normal / data[0]) : 0));
264 }
265 
266 /*ARGSUSED*/
267 int
268 dt_print_bytes(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
269     size_t nbytes, int width, int quiet)
270 {
271 	/*
272 	 * If the byte stream is a series of printable characters, followed by
273 	 * a terminating byte, we print it out as a string.  Otherwise, we
274 	 * assume that it's something else and just print the bytes.
275 	 */
276 	int i, j, margin = 5;
277 	char *c = (char *)addr;
278 
279 	if (nbytes == 0)
280 		return (0);
281 
282 	if (dtp->dt_options[DTRACEOPT_RAWBYTES] != DTRACEOPT_UNSET)
283 		goto raw;
284 
285 	for (i = 0; i < nbytes; i++) {
286 		/*
287 		 * We define a "printable character" to be one for which
288 		 * isprint(3C) returns non-zero, isspace(3C) returns non-zero,
289 		 * or a character which is either backspace or the bell.
290 		 * Backspace and the bell are regrettably special because
291 		 * they fail the first two tests -- and yet they are entirely
292 		 * printable.  These are the only two control characters that
293 		 * have meaning for the terminal and for which isprint(3C) and
294 		 * isspace(3C) return 0.
295 		 */
296 		if (isprint(c[i]) || isspace(c[i]) ||
297 		    c[i] == '\b' || c[i] == '\a')
298 			continue;
299 
300 		if (c[i] == '\0' && i > 0) {
301 			/*
302 			 * This looks like it might be a string.  Before we
303 			 * assume that it is indeed a string, check the
304 			 * remainder of the byte range; if it contains
305 			 * additional non-nul characters, we'll assume that
306 			 * it's a binary stream that just happens to look like
307 			 * a string, and we'll print out the individual bytes.
308 			 */
309 			for (j = i + 1; j < nbytes; j++) {
310 				if (c[j] != '\0')
311 					break;
312 			}
313 
314 			if (j != nbytes)
315 				break;
316 
317 			if (quiet)
318 				return (dt_printf(dtp, fp, "%s", c));
319 			else
320 				return (dt_printf(dtp, fp, "  %-*s", width, c));
321 		}
322 
323 		break;
324 	}
325 
326 	if (i == nbytes) {
327 		/*
328 		 * The byte range is all printable characters, but there is
329 		 * no trailing nul byte.  We'll assume that it's a string and
330 		 * print it as such.
331 		 */
332 		char *s = alloca(nbytes + 1);
333 		bcopy(c, s, nbytes);
334 		s[nbytes] = '\0';
335 		return (dt_printf(dtp, fp, "  %-*s", width, s));
336 	}
337 
338 raw:
339 	if (dt_printf(dtp, fp, "\n%*s      ", margin, "") < 0)
340 		return (-1);
341 
342 	for (i = 0; i < 16; i++)
343 		if (dt_printf(dtp, fp, "  %c", "0123456789abcdef"[i]) < 0)
344 			return (-1);
345 
346 	if (dt_printf(dtp, fp, "  0123456789abcdef\n") < 0)
347 		return (-1);
348 
349 
350 	for (i = 0; i < nbytes; i += 16) {
351 		if (dt_printf(dtp, fp, "%*s%5x:", margin, "", i) < 0)
352 			return (-1);
353 
354 		for (j = i; j < i + 16 && j < nbytes; j++) {
355 			if (dt_printf(dtp, fp, " %02x", (uchar_t)c[j]) < 0)
356 				return (-1);
357 		}
358 
359 		while (j++ % 16) {
360 			if (dt_printf(dtp, fp, "   ") < 0)
361 				return (-1);
362 		}
363 
364 		if (dt_printf(dtp, fp, "  ") < 0)
365 			return (-1);
366 
367 		for (j = i; j < i + 16 && j < nbytes; j++) {
368 			if (dt_printf(dtp, fp, "%c",
369 			    c[j] < ' ' || c[j] > '~' ? '.' : c[j]) < 0)
370 				return (-1);
371 		}
372 
373 		if (dt_printf(dtp, fp, "\n") < 0)
374 			return (-1);
375 	}
376 
377 	return (0);
378 }
379 
380 int
381 dt_print_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
382     caddr_t addr, int depth)
383 {
384 	pc_t *pc = (pc_t *)(uintptr_t)addr;
385 	dtrace_syminfo_t dts;
386 	GElf_Sym sym;
387 	int i, indent;
388 	char c[PATH_MAX * 2];
389 
390 	if (dt_printf(dtp, fp, "\n") < 0)
391 		return (-1);
392 
393 	if (format == NULL)
394 		format = "%s";
395 
396 	if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
397 		indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
398 	else
399 		indent = _dtrace_stkindent;
400 
401 	for (i = 0; i < depth && pc[i] != NULL; i++) {
402 		if (dt_printf(dtp, fp, "%*s", indent, "") < 0)
403 			return (-1);
404 
405 		if (dtrace_lookup_by_addr(dtp, pc[i], &sym, &dts) == 0) {
406 			if (pc[i] > sym.st_value) {
407 				(void) snprintf(c, sizeof (c), "%s`%s+0x%llx",
408 				    dts.dts_object, dts.dts_name,
409 				    (u_longlong_t)pc[i] - sym.st_value);
410 			} else {
411 				(void) snprintf(c, sizeof (c), "%s`%s",
412 				    dts.dts_object, dts.dts_name);
413 			}
414 		} else {
415 			/*
416 			 * We'll repeat the lookup, but this time we'll specify
417 			 * a NULL GElf_Sym -- indicating that we're only
418 			 * interested in the containing module.
419 			 */
420 			if (dtrace_lookup_by_addr(dtp, pc[i],
421 			    NULL, &dts) == 0) {
422 				(void) snprintf(c, sizeof (c), "%s`0x%llx",
423 				    dts.dts_object, (u_longlong_t)pc[i]);
424 			} else {
425 				(void) snprintf(c, sizeof (c), "0x%llx",
426 				    (u_longlong_t)pc[i]);
427 			}
428 		}
429 
430 		if (dt_printf(dtp, fp, format, c) < 0)
431 			return (-1);
432 
433 		if (dt_printf(dtp, fp, "\n") < 0)
434 			return (-1);
435 	}
436 
437 	return (0);
438 }
439 
440 int
441 dt_print_ustack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
442     caddr_t addr, uint64_t arg)
443 {
444 	uint64_t *pc = (uint64_t *)(uintptr_t)addr;
445 	uint32_t depth = DTRACE_USTACK_NFRAMES(arg);
446 	uint32_t strsize = DTRACE_USTACK_STRSIZE(arg);
447 	const char *strbase = addr + (depth + 1) * sizeof (uint64_t);
448 	const char *str = strsize ? strbase : NULL;
449 	int err = 0;
450 
451 	char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2];
452 	struct ps_prochandle *P;
453 	GElf_Sym sym;
454 	int i, indent;
455 	pid_t pid;
456 
457 	if (depth == 0)
458 		return (0);
459 
460 	pid = (pid_t)*pc++;
461 
462 	if (dt_printf(dtp, fp, "\n") < 0)
463 		return (-1);
464 
465 	if (format == NULL)
466 		format = "%s";
467 
468 	if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
469 		indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
470 	else
471 		indent = _dtrace_stkindent;
472 
473 	/*
474 	 * Ultimately, we need to add an entry point in the library vector for
475 	 * determining <symbol, offset> from <pid, address>.  For now, if
476 	 * this is a vector open, we just print the raw address or string.
477 	 */
478 	if (dtp->dt_vector == NULL)
479 		P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
480 	else
481 		P = NULL;
482 
483 	if (P != NULL)
484 		dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
485 
486 	for (i = 0; i < depth && pc[i] != NULL; i++) {
487 		if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
488 			break;
489 
490 		if (P != NULL && Plookup_by_addr(P, pc[i],
491 		    name, sizeof (name), &sym) == 0) {
492 			(void) Pobjname(P, pc[i], objname, sizeof (objname));
493 
494 			if (pc[i] > sym.st_value) {
495 				(void) snprintf(c, sizeof (c),
496 				    "%s`%s+0x%llx", dt_basename(objname), name,
497 				    (u_longlong_t)(pc[i] - sym.st_value));
498 			} else {
499 				(void) snprintf(c, sizeof (c),
500 				    "%s`%s", dt_basename(objname), name);
501 			}
502 		} else if (str != NULL && str[0] != '\0') {
503 			(void) snprintf(c, sizeof (c), "%s", str);
504 		} else {
505 			if (P != NULL && Pobjname(P, pc[i], objname,
506 			    sizeof (objname)) != NULL) {
507 				(void) snprintf(c, sizeof (c), "%s`0x%llx",
508 				    dt_basename(objname), (u_longlong_t)pc[i]);
509 			} else {
510 				(void) snprintf(c, sizeof (c), "0x%llx",
511 				    (u_longlong_t)pc[i]);
512 			}
513 		}
514 
515 		if ((err = dt_printf(dtp, fp, format, c)) < 0)
516 			break;
517 
518 		if ((err = dt_printf(dtp, fp, "\n")) < 0)
519 			break;
520 
521 		if (str != NULL) {
522 			str += strlen(str) + 1;
523 			if (str - strbase >= strsize)
524 				str = NULL;
525 		}
526 	}
527 
528 	if (P != NULL) {
529 		dt_proc_unlock(dtp, P);
530 		dt_proc_release(dtp, P);
531 	}
532 
533 	return (err);
534 }
535 
536 typedef struct dt_normal {
537 	dtrace_aggvarid_t dtnd_id;
538 	uint64_t dtnd_normal;
539 } dt_normal_t;
540 
541 static int
542 dt_normalize_agg(dtrace_aggdata_t *aggdata, void *arg)
543 {
544 	dt_normal_t *normal = arg;
545 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
546 	dtrace_aggvarid_t id = normal->dtnd_id;
547 	uintptr_t data = (uintptr_t)aggdata->dtada_data;
548 
549 	if (agg->dtagd_nrecs == 0)
550 		return (DTRACE_AGGWALK_NEXT);
551 
552 	if (id != *(dtrace_aggvarid_t *)(data + agg->dtagd_rec[0].dtrd_offset))
553 		return (DTRACE_AGGWALK_NEXT);
554 
555 	aggdata->dtada_normal = normal->dtnd_normal;
556 	return (DTRACE_AGGWALK_NORMALIZE);
557 }
558 
559 static int
560 dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
561 {
562 	dt_normal_t normal;
563 	caddr_t addr;
564 
565 	/*
566 	 * We (should) have two records:  the aggregation ID followed by the
567 	 * normalization value.
568 	 */
569 	addr = base + rec->dtrd_offset;
570 
571 	if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
572 		return (dt_set_errno(dtp, EDT_BADNORMAL));
573 
574 	/* LINTED - alignment */
575 	normal.dtnd_id = *((dtrace_aggvarid_t *)addr);
576 	rec++;
577 
578 	if (rec->dtrd_action != DTRACEACT_LIBACT)
579 		return (dt_set_errno(dtp, EDT_BADNORMAL));
580 
581 	if (rec->dtrd_arg != DT_ACT_NORMALIZE)
582 		return (dt_set_errno(dtp, EDT_BADNORMAL));
583 
584 	addr = base + rec->dtrd_offset;
585 
586 	switch (rec->dtrd_size) {
587 	case sizeof (uint64_t):
588 		/* LINTED - alignment */
589 		normal.dtnd_normal = *((uint64_t *)addr);
590 		break;
591 	case sizeof (uint32_t):
592 		/* LINTED - alignment */
593 		normal.dtnd_normal = *((uint32_t *)addr);
594 		break;
595 	case sizeof (uint16_t):
596 		/* LINTED - alignment */
597 		normal.dtnd_normal = *((uint16_t *)addr);
598 		break;
599 	case sizeof (uint8_t):
600 		normal.dtnd_normal = *((uint8_t *)addr);
601 		break;
602 	default:
603 		return (dt_set_errno(dtp, EDT_BADNORMAL));
604 	}
605 
606 	(void) dtrace_aggregate_walk(dtp, dt_normalize_agg, &normal);
607 
608 	return (0);
609 }
610 
611 static int
612 dt_denormalize_agg(dtrace_aggdata_t *aggdata, void *arg)
613 {
614 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
615 	dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
616 	uintptr_t data = (uintptr_t)aggdata->dtada_data;
617 
618 	if (agg->dtagd_nrecs == 0)
619 		return (DTRACE_AGGWALK_NEXT);
620 
621 	if (id != *(dtrace_aggvarid_t *)(data + agg->dtagd_rec[0].dtrd_offset))
622 		return (DTRACE_AGGWALK_NEXT);
623 
624 	return (DTRACE_AGGWALK_DENORMALIZE);
625 }
626 
627 static int
628 dt_clear_agg(dtrace_aggdata_t *aggdata, void *arg)
629 {
630 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
631 	dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
632 	uintptr_t data = (uintptr_t)aggdata->dtada_data;
633 
634 	if (agg->dtagd_nrecs == 0)
635 		return (DTRACE_AGGWALK_NEXT);
636 
637 	if (id != *(dtrace_aggvarid_t *)(data + agg->dtagd_rec[0].dtrd_offset))
638 		return (DTRACE_AGGWALK_NEXT);
639 
640 	return (DTRACE_AGGWALK_CLEAR);
641 }
642 
643 typedef struct dt_trunc {
644 	dtrace_aggvarid_t dttd_id;
645 	uint64_t dttd_remaining;
646 } dt_trunc_t;
647 
648 static int
649 dt_trunc_agg(dtrace_aggdata_t *aggdata, void *arg)
650 {
651 	dt_trunc_t *trunc = arg;
652 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
653 	dtrace_aggvarid_t id = trunc->dttd_id;
654 	uintptr_t data = (uintptr_t)aggdata->dtada_data;
655 
656 	if (agg->dtagd_nrecs == 0)
657 		return (DTRACE_AGGWALK_NEXT);
658 
659 	if (id != *(dtrace_aggvarid_t *)(data + agg->dtagd_rec[0].dtrd_offset))
660 		return (DTRACE_AGGWALK_NEXT);
661 
662 	if (trunc->dttd_remaining == 0)
663 		return (DTRACE_AGGWALK_REMOVE);
664 
665 	trunc->dttd_remaining--;
666 	return (DTRACE_AGGWALK_NEXT);
667 }
668 
669 static int
670 dt_trunc(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
671 {
672 	dt_trunc_t trunc;
673 	caddr_t addr;
674 	int64_t remaining;
675 	int (*func)(dtrace_hdl_t *, dtrace_aggregate_f *, void *);
676 
677 	/*
678 	 * We (should) have two records:  the aggregation ID followed by the
679 	 * number of aggregation entries after which the aggregation is to be
680 	 * truncated.
681 	 */
682 	addr = base + rec->dtrd_offset;
683 
684 	if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
685 		return (dt_set_errno(dtp, EDT_BADTRUNC));
686 
687 	/* LINTED - alignment */
688 	trunc.dttd_id = *((dtrace_aggvarid_t *)addr);
689 	rec++;
690 
691 	if (rec->dtrd_action != DTRACEACT_LIBACT)
692 		return (dt_set_errno(dtp, EDT_BADTRUNC));
693 
694 	if (rec->dtrd_arg != DT_ACT_TRUNC)
695 		return (dt_set_errno(dtp, EDT_BADTRUNC));
696 
697 	addr = base + rec->dtrd_offset;
698 
699 	switch (rec->dtrd_size) {
700 	case sizeof (uint64_t):
701 		/* LINTED - alignment */
702 		remaining = *((int64_t *)addr);
703 		break;
704 	case sizeof (uint32_t):
705 		/* LINTED - alignment */
706 		remaining = *((int32_t *)addr);
707 		break;
708 	case sizeof (uint16_t):
709 		/* LINTED - alignment */
710 		remaining = *((int16_t *)addr);
711 		break;
712 	case sizeof (uint8_t):
713 		remaining = *((int8_t *)addr);
714 		break;
715 	default:
716 		return (dt_set_errno(dtp, EDT_BADNORMAL));
717 	}
718 
719 	if (remaining < 0) {
720 		func = dtrace_aggregate_walk_valsorted;
721 		remaining = -remaining;
722 	} else {
723 		func = dtrace_aggregate_walk_valrevsorted;
724 	}
725 
726 	assert(remaining >= 0);
727 	trunc.dttd_remaining = remaining;
728 
729 	(void) func(dtp, dt_trunc_agg, &trunc);
730 
731 	return (0);
732 }
733 
734 int
735 dt_print_agg(dtrace_aggdata_t *aggdata, void *arg)
736 {
737 	int i, err = 0;
738 	dt_print_aggdata_t *pd = arg;
739 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
740 	FILE *fp = pd->dtpa_fp;
741 	dtrace_hdl_t *dtp = pd->dtpa_dtp;
742 	dtrace_aggvarid_t aggvarid = pd->dtpa_id;
743 	uintptr_t data = (uintptr_t)aggdata->dtada_data;
744 
745 	if (pd->dtpa_allunprint) {
746 		if (agg->dtagd_flags & DTRACE_AGD_PRINTED)
747 			return (0);
748 	} else {
749 		/*
750 		 * If we're not printing all unprinted aggregations, then the
751 		 * aggregation variable ID denotes a specific aggregation
752 		 * variable that we should print -- skip any other aggregations
753 		 * that we encounter.
754 		 */
755 		if (agg->dtagd_nrecs == 0)
756 			return (0);
757 
758 		if (aggvarid != *(dtrace_aggvarid_t *)(data +
759 		    agg->dtagd_rec[0].dtrd_offset))
760 			return (0);
761 	}
762 
763 	/*
764 	 * Iterate over each record description, printing the traced data,
765 	 * skipping the first datum (the tuple member created by the compiler).
766 	 */
767 	for (i = 1; err >= 0 && i < agg->dtagd_nrecs; i++) {
768 		dtrace_recdesc_t *rec = &agg->dtagd_rec[i];
769 		dtrace_actkind_t act = rec->dtrd_action;
770 		caddr_t addr = aggdata->dtada_data + rec->dtrd_offset;
771 		size_t size = rec->dtrd_size;
772 		uint64_t normal;
773 
774 		normal = DTRACEACT_ISAGG(act) ? aggdata->dtada_normal : 1;
775 
776 		if (act == DTRACEACT_STACK) {
777 			int depth = rec->dtrd_size / sizeof (pc_t);
778 			err = dt_print_stack(dtp, fp, NULL, addr, depth);
779 			goto nextrec;
780 		}
781 
782 		if (act == DTRACEACT_USTACK || act == DTRACEACT_JSTACK) {
783 			err = dt_print_ustack(dtp, fp, NULL, addr,
784 			    rec->dtrd_arg);
785 			goto nextrec;
786 		}
787 
788 		if (act == DTRACEAGG_QUANTIZE) {
789 			err = dt_print_quantize(dtp, fp, addr, size, normal);
790 			goto nextrec;
791 		}
792 
793 		if (act == DTRACEAGG_LQUANTIZE) {
794 			err = dt_print_lquantize(dtp, fp, addr, size, normal);
795 			goto nextrec;
796 		}
797 
798 		if (act == DTRACEAGG_AVG) {
799 			err = dt_print_average(dtp, fp, addr, size, normal);
800 			goto nextrec;
801 		}
802 
803 		switch (size) {
804 		case sizeof (uint64_t):
805 			err = dt_printf(dtp, fp, " %16lld",
806 			    /* LINTED - alignment */
807 			    (long long)*((uint64_t *)addr) / normal);
808 			break;
809 		case sizeof (uint32_t):
810 			/* LINTED - alignment */
811 			err = dt_printf(dtp, fp, " %8d", *((uint32_t *)addr) /
812 			    (uint32_t)normal);
813 			break;
814 		case sizeof (uint16_t):
815 			/* LINTED - alignment */
816 			err = dt_printf(dtp, fp, " %5d", *((uint16_t *)addr) /
817 			    (uint32_t)normal);
818 			break;
819 		case sizeof (uint8_t):
820 			err = dt_printf(dtp, fp, " %3d", *((uint8_t *)addr) /
821 			    (uint32_t)normal);
822 			break;
823 		default:
824 			err = dt_print_bytes(dtp, fp, addr, size, 50, 0);
825 			break;
826 		}
827 
828 nextrec:
829 		if (dt_buffered_flush(dtp, NULL, rec, aggdata) < 0)
830 			return (-1);
831 	}
832 
833 	if (err >= 0)
834 		err = dt_printf(dtp, fp, "\n");
835 
836 	if (dt_buffered_flush(dtp, NULL, NULL, aggdata) < 0)
837 		return (-1);
838 
839 	if (!pd->dtpa_allunprint)
840 		agg->dtagd_flags |= DTRACE_AGD_PRINTED;
841 
842 	return (err < 0 ? -1 : 0);
843 }
844 
845 static int
846 dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu, dtrace_bufdesc_t *buf,
847     dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc, void *arg)
848 {
849 	dtrace_epid_t id;
850 	size_t offs, start = buf->dtbd_oldest, end = buf->dtbd_size;
851 	int flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET);
852 	int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
853 	int rval, i, n;
854 	dtrace_epid_t last = DTRACE_EPIDNONE;
855 	dtrace_probedata_t data;
856 	uint64_t drops;
857 	caddr_t addr;
858 
859 	bzero(&data, sizeof (data));
860 	data.dtpda_handle = dtp;
861 	data.dtpda_cpu = cpu;
862 
863 again:
864 	for (offs = start; offs < end; ) {
865 		dtrace_eprobedesc_t *epd;
866 
867 		/*
868 		 * We're guaranteed to have an ID.
869 		 */
870 		id = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
871 
872 		if (id == DTRACE_EPIDNONE) {
873 			/*
874 			 * This is filler to assure proper alignment of the
875 			 * next record; we simply ignore it.
876 			 */
877 			offs += sizeof (id);
878 			continue;
879 		}
880 
881 		if ((rval = dt_epid_lookup(dtp, id, &data.dtpda_edesc,
882 		    &data.dtpda_pdesc)) != 0)
883 			return (rval);
884 
885 		epd = data.dtpda_edesc;
886 		data.dtpda_data = buf->dtbd_data + offs;
887 
888 		if (data.dtpda_edesc->dtepd_uarg != DT_ECB_DEFAULT) {
889 			rval = dt_handle(dtp, &data);
890 
891 			if (rval == DTRACE_CONSUME_NEXT)
892 				goto nextepid;
893 
894 			if (rval == DTRACE_CONSUME_ERROR)
895 				return (-1);
896 		}
897 
898 		if (flow)
899 			(void) dt_flowindent(dtp, &data, last, buf, offs);
900 
901 		rval = (*efunc)(&data, arg);
902 
903 		if (flow) {
904 			if (data.dtpda_flow == DTRACEFLOW_ENTRY)
905 				data.dtpda_indent += 2;
906 		}
907 
908 		if (rval == DTRACE_CONSUME_NEXT)
909 			goto nextepid;
910 
911 		if (rval == DTRACE_CONSUME_ABORT)
912 			return (dt_set_errno(dtp, EDT_DIRABORT));
913 
914 		if (rval != DTRACE_CONSUME_THIS)
915 			return (dt_set_errno(dtp, EDT_BADRVAL));
916 
917 		for (i = 0; i < epd->dtepd_nrecs; i++) {
918 			dtrace_recdesc_t *rec = &epd->dtepd_rec[i];
919 			dtrace_actkind_t act = rec->dtrd_action;
920 
921 			data.dtpda_data = buf->dtbd_data + offs +
922 			    rec->dtrd_offset;
923 			addr = data.dtpda_data;
924 
925 			if (act == DTRACEACT_LIBACT) {
926 				if (rec->dtrd_arg == DT_ACT_CLEAR) {
927 					dtrace_aggvarid_t id;
928 
929 					/* LINTED - alignment */
930 					id = *((dtrace_aggvarid_t *)addr);
931 					(void) dtrace_aggregate_walk(dtp,
932 					    dt_clear_agg, &id);
933 					continue;
934 				}
935 
936 				if (rec->dtrd_arg == DT_ACT_DENORMALIZE) {
937 					dtrace_aggvarid_t id;
938 
939 					/* LINTED - alignment */
940 					id = *((dtrace_aggvarid_t *)addr);
941 					(void) dtrace_aggregate_walk(dtp,
942 					    dt_denormalize_agg, &id);
943 					continue;
944 				}
945 
946 				if (rec->dtrd_arg == DT_ACT_NORMALIZE) {
947 					if (i == epd->dtepd_nrecs - 1)
948 						return (dt_set_errno(dtp,
949 						    EDT_BADNORMAL));
950 
951 					if (dt_normalize(dtp,
952 					    buf->dtbd_data + offs, rec) != 0)
953 						return (-1);
954 
955 					i++;
956 					continue;
957 				}
958 
959 				if (rec->dtrd_arg == DT_ACT_TRUNC) {
960 					if (i == epd->dtepd_nrecs - 1)
961 						return (dt_set_errno(dtp,
962 						    EDT_BADTRUNC));
963 
964 					if (dt_trunc(dtp,
965 					    buf->dtbd_data + offs, rec) != 0)
966 						return (-1);
967 
968 					i++;
969 					continue;
970 				}
971 
972 				if (rec->dtrd_arg == DT_ACT_FTRUNCATE) {
973 					if (fp == NULL)
974 						continue;
975 
976 					(void) fflush(fp);
977 					(void) ftruncate(fileno(fp), 0);
978 					(void) fseeko(fp, 0, SEEK_SET);
979 					continue;
980 				}
981 			}
982 
983 			rval = (*rfunc)(&data, rec, arg);
984 
985 			if (rval == DTRACE_CONSUME_NEXT)
986 				continue;
987 
988 			if (rval == DTRACE_CONSUME_ABORT)
989 				return (dt_set_errno(dtp, EDT_DIRABORT));
990 
991 			if (rval != DTRACE_CONSUME_THIS)
992 				return (dt_set_errno(dtp, EDT_BADRVAL));
993 
994 			if (act == DTRACEACT_STACK) {
995 				int depth = rec->dtrd_size / sizeof (pc_t);
996 				if (dt_print_stack(dtp, fp, NULL,
997 				    addr, depth) < 0)
998 					return (-1);
999 				goto nextrec;
1000 			}
1001 
1002 			if (act == DTRACEACT_USTACK ||
1003 			    act == DTRACEACT_JSTACK) {
1004 				if (dt_print_ustack(dtp, fp, NULL,
1005 				    addr, rec->dtrd_arg) < 0)
1006 					return (-1);
1007 				goto nextrec;
1008 			}
1009 
1010 			if (DTRACEACT_ISPRINTFLIKE(act)) {
1011 				void *fmtdata;
1012 				int (*func)(dtrace_hdl_t *, FILE *, void *,
1013 				    const dtrace_probedata_t *,
1014 				    const dtrace_recdesc_t *, uint_t,
1015 				    const void *buf, size_t);
1016 
1017 				if ((fmtdata = dt_format_lookup(dtp,
1018 				    rec->dtrd_format)) == NULL)
1019 					goto nofmt;
1020 
1021 				switch (act) {
1022 				case DTRACEACT_PRINTF:
1023 					func = dtrace_fprintf;
1024 					break;
1025 				case DTRACEACT_PRINTA:
1026 					func = dtrace_fprinta;
1027 					break;
1028 				case DTRACEACT_SYSTEM:
1029 					func = dtrace_system;
1030 					break;
1031 				case DTRACEACT_FREOPEN:
1032 					func = dtrace_freopen;
1033 					break;
1034 				}
1035 
1036 				n = (*func)(dtp, fp, fmtdata, &data,
1037 				    rec, epd->dtepd_nrecs - i,
1038 				    (uchar_t *)buf->dtbd_data + offs,
1039 				    buf->dtbd_size - offs);
1040 
1041 				if (n < 0)
1042 					return (-1); /* errno is set for us */
1043 
1044 				if (n > 0)
1045 					i += n - 1;
1046 				goto nextrec;
1047 			}
1048 
1049 nofmt:
1050 			if (act == DTRACEACT_PRINTA) {
1051 				dt_print_aggdata_t pd;
1052 
1053 				bzero(&pd, sizeof (pd));
1054 				pd.dtpa_dtp = dtp;
1055 				pd.dtpa_fp = fp;
1056 				/* LINTED - alignment */
1057 				pd.dtpa_id = *((dtrace_aggvarid_t *)addr);
1058 
1059 				if (dt_printf(dtp, fp, "\n") < 0 ||
1060 				    dtrace_aggregate_walk_valsorted(dtp,
1061 				    dt_print_agg, &pd) < 0)
1062 					return (-1);
1063 
1064 				goto nextrec;
1065 			}
1066 
1067 			switch (rec->dtrd_size) {
1068 			case sizeof (uint64_t):
1069 				n = dt_printf(dtp, fp,
1070 				    quiet ? "%lld" : " %16lld",
1071 				    /* LINTED - alignment */
1072 				    *((unsigned long long *)addr));
1073 				break;
1074 			case sizeof (uint32_t):
1075 				n = dt_printf(dtp, fp, quiet ? "%d" : " %8d",
1076 				    /* LINTED - alignment */
1077 				    *((uint32_t *)addr));
1078 				break;
1079 			case sizeof (uint16_t):
1080 				n = dt_printf(dtp, fp, quiet ? "%d" : " %5d",
1081 				    /* LINTED - alignment */
1082 				    *((uint16_t *)addr));
1083 				break;
1084 			case sizeof (uint8_t):
1085 				n = dt_printf(dtp, fp, quiet ? "%d" : " %3d",
1086 				    *((uint8_t *)addr));
1087 				break;
1088 			default:
1089 				n = dt_print_bytes(dtp, fp, addr,
1090 				    rec->dtrd_size, 33, quiet);
1091 				break;
1092 			}
1093 
1094 			if (n < 0)
1095 				return (-1); /* errno is set for us */
1096 
1097 nextrec:
1098 			if (dt_buffered_flush(dtp, &data, rec, NULL) < 0)
1099 				return (-1); /* errno is set for us */
1100 		}
1101 
1102 		/*
1103 		 * Call the record callback with a NULL record to indicate
1104 		 * that we're done processing this EPID.
1105 		 */
1106 		rval = (*rfunc)(&data, NULL, arg);
1107 nextepid:
1108 		offs += epd->dtepd_size;
1109 		last = id;
1110 	}
1111 
1112 	if (buf->dtbd_oldest != 0 && start == buf->dtbd_oldest) {
1113 		end = buf->dtbd_oldest;
1114 		start = 0;
1115 		goto again;
1116 	}
1117 
1118 	if ((drops = buf->dtbd_drops) == 0)
1119 		return (0);
1120 
1121 	/*
1122 	 * Explicitly zero the drops to prevent us from processing them again.
1123 	 */
1124 	buf->dtbd_drops = 0;
1125 
1126 	return (dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops));
1127 }
1128 
1129 typedef struct dt_begin {
1130 	dtrace_consume_probe_f *dtbgn_probefunc;
1131 	dtrace_consume_rec_f *dtbgn_recfunc;
1132 	void *dtbgn_arg;
1133 	dtrace_handle_err_f *dtbgn_errhdlr;
1134 	void *dtbgn_errarg;
1135 	int dtbgn_beginonly;
1136 } dt_begin_t;
1137 
1138 static int
1139 dt_consume_begin_probe(const dtrace_probedata_t *data, void *arg)
1140 {
1141 	dt_begin_t *begin = (dt_begin_t *)arg;
1142 	dtrace_probedesc_t *pd = data->dtpda_pdesc;
1143 
1144 	int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
1145 	int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
1146 
1147 	if (begin->dtbgn_beginonly) {
1148 		if (!(r1 && r2))
1149 			return (DTRACE_CONSUME_NEXT);
1150 	} else {
1151 		if (r1 && r2)
1152 			return (DTRACE_CONSUME_NEXT);
1153 	}
1154 
1155 	/*
1156 	 * We have a record that we're interested in.  Now call the underlying
1157 	 * probe function...
1158 	 */
1159 	return (begin->dtbgn_probefunc(data, begin->dtbgn_arg));
1160 }
1161 
1162 static int
1163 dt_consume_begin_record(const dtrace_probedata_t *data,
1164     const dtrace_recdesc_t *rec, void *arg)
1165 {
1166 	dt_begin_t *begin = (dt_begin_t *)arg;
1167 
1168 	return (begin->dtbgn_recfunc(data, rec, begin->dtbgn_arg));
1169 }
1170 
1171 static int
1172 dt_consume_begin_error(dtrace_errdata_t *data, void *arg)
1173 {
1174 	dt_begin_t *begin = (dt_begin_t *)arg;
1175 	dtrace_probedesc_t *pd = data->dteda_pdesc;
1176 
1177 	int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
1178 	int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
1179 
1180 	if (begin->dtbgn_beginonly) {
1181 		if (!(r1 && r2))
1182 			return (DTRACE_HANDLE_OK);
1183 	} else {
1184 		if (r1 && r2)
1185 			return (DTRACE_HANDLE_OK);
1186 	}
1187 
1188 	return (begin->dtbgn_errhdlr(data, begin->dtbgn_errarg));
1189 }
1190 
1191 static int
1192 dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf,
1193     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
1194 {
1195 	/*
1196 	 * There's this idea that the BEGIN probe should be processed before
1197 	 * everything else, and that the END probe should be processed after
1198 	 * anything else.  In the common case, this is pretty easy to deal
1199 	 * with.  However, a situation may arise where the BEGIN enabling and
1200 	 * END enabling are on the same CPU, and some enabling in the middle
1201 	 * occurred on a different CPU.  To deal with this (blech!) we need to
1202 	 * consume the BEGIN buffer up until the end of the BEGIN probe, and
1203 	 * then set it aside.  We will then process every other CPU, and then
1204 	 * we'll return to the BEGIN CPU and process the rest of the data
1205 	 * (which will inevitably include the END probe, if any).  Making this
1206 	 * even more complicated (!) is the library's ERROR enabling.  Because
1207 	 * this enabling is processed before we even get into the consume call
1208 	 * back, any ERROR firing would result in the library's ERROR enabling
1209 	 * being processed twice -- once in our first pass (for BEGIN probes),
1210 	 * and again in our second pass (for everything but BEGIN probes).  To
1211 	 * deal with this, we interpose on the ERROR handler to assure that we
1212 	 * only process ERROR enablings induced by BEGIN enablings in the
1213 	 * first pass, and that we only process ERROR enablings _not_ induced
1214 	 * by BEGIN enablings in the second pass.
1215 	 */
1216 	dt_begin_t begin;
1217 	processorid_t cpu = dtp->dt_beganon;
1218 	dtrace_bufdesc_t nbuf;
1219 	int rval, i;
1220 	static int max_ncpus;
1221 	dtrace_optval_t size;
1222 
1223 	dtp->dt_beganon = -1;
1224 
1225 	if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
1226 		/*
1227 		 * We really don't expect this to fail, but it is at least
1228 		 * technically possible for this to fail with ENOENT.  In this
1229 		 * case, we just drive on...
1230 		 */
1231 		if (errno == ENOENT)
1232 			return (0);
1233 
1234 		return (dt_set_errno(dtp, errno));
1235 	}
1236 
1237 	if (!dtp->dt_stopped || buf->dtbd_cpu != dtp->dt_endedon) {
1238 		/*
1239 		 * This is the simple case.  We're either not stopped, or if
1240 		 * we are, we actually processed any END probes on another
1241 		 * CPU.  We can simply consume this buffer and return.
1242 		 */
1243 		return (dt_consume_cpu(dtp, fp, cpu, buf, pf, rf, arg));
1244 	}
1245 
1246 	begin.dtbgn_probefunc = pf;
1247 	begin.dtbgn_recfunc = rf;
1248 	begin.dtbgn_arg = arg;
1249 	begin.dtbgn_beginonly = 1;
1250 
1251 	/*
1252 	 * We need to interpose on the ERROR handler to be sure that we
1253 	 * only process ERRORs induced by BEGIN.
1254 	 */
1255 	begin.dtbgn_errhdlr = dtp->dt_errhdlr;
1256 	begin.dtbgn_errarg = dtp->dt_errarg;
1257 	dtp->dt_errhdlr = dt_consume_begin_error;
1258 	dtp->dt_errarg = &begin;
1259 
1260 	rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
1261 	    dt_consume_begin_record, &begin);
1262 
1263 	dtp->dt_errhdlr = begin.dtbgn_errhdlr;
1264 	dtp->dt_errarg = begin.dtbgn_errarg;
1265 
1266 	if (rval != 0)
1267 		return (rval);
1268 
1269 	/*
1270 	 * Now allocate a new buffer.  We'll use this to deal with every other
1271 	 * CPU.
1272 	 */
1273 	bzero(&nbuf, sizeof (dtrace_bufdesc_t));
1274 	(void) dtrace_getopt(dtp, "bufsize", &size);
1275 	if ((nbuf.dtbd_data = malloc(size)) == NULL)
1276 		return (dt_set_errno(dtp, EDT_NOMEM));
1277 
1278 	if (max_ncpus == 0)
1279 		max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
1280 
1281 	for (i = 0; i < max_ncpus; i++) {
1282 		nbuf.dtbd_cpu = i;
1283 
1284 		if (i == cpu)
1285 			continue;
1286 
1287 		if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &nbuf) == -1) {
1288 			/*
1289 			 * If we failed with ENOENT, it may be because the
1290 			 * CPU was unconfigured -- this is okay.  Any other
1291 			 * error, however, is unexpected.
1292 			 */
1293 			if (errno == ENOENT)
1294 				continue;
1295 
1296 			free(nbuf.dtbd_data);
1297 
1298 			return (dt_set_errno(dtp, errno));
1299 		}
1300 
1301 		if ((rval = dt_consume_cpu(dtp, fp,
1302 		    i, &nbuf, pf, rf, arg)) != 0) {
1303 			free(nbuf.dtbd_data);
1304 			return (rval);
1305 		}
1306 	}
1307 
1308 	free(nbuf.dtbd_data);
1309 
1310 	/*
1311 	 * Okay -- we're done with the other buffers.  Now we want to
1312 	 * reconsume the first buffer -- but this time we're looking for
1313 	 * everything _but_ BEGIN.  And of course, in order to only consume
1314 	 * those ERRORs _not_ associated with BEGIN, we need to reinstall our
1315 	 * ERROR interposition function...
1316 	 */
1317 	begin.dtbgn_beginonly = 0;
1318 
1319 	assert(begin.dtbgn_errhdlr == dtp->dt_errhdlr);
1320 	assert(begin.dtbgn_errarg == dtp->dt_errarg);
1321 	dtp->dt_errhdlr = dt_consume_begin_error;
1322 	dtp->dt_errarg = &begin;
1323 
1324 	rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
1325 	    dt_consume_begin_record, &begin);
1326 
1327 	dtp->dt_errhdlr = begin.dtbgn_errhdlr;
1328 	dtp->dt_errarg = begin.dtbgn_errarg;
1329 
1330 	return (rval);
1331 }
1332 
1333 int
1334 dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
1335     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
1336 {
1337 	dtrace_bufdesc_t *buf = &dtp->dt_buf;
1338 	dtrace_optval_t size;
1339 	static int max_ncpus;
1340 	int i, rval;
1341 	dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_SWITCHRATE];
1342 	hrtime_t now = gethrtime();
1343 
1344 	if (dtp->dt_lastswitch != 0) {
1345 		if (now - dtp->dt_lastswitch < interval)
1346 			return (0);
1347 
1348 		dtp->dt_lastswitch += interval;
1349 	} else {
1350 		dtp->dt_lastswitch = now;
1351 	}
1352 
1353 	if (!dtp->dt_active)
1354 		return (dt_set_errno(dtp, EINVAL));
1355 
1356 	if (max_ncpus == 0)
1357 		max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
1358 
1359 	if (pf == NULL)
1360 		pf = (dtrace_consume_probe_f *)dt_nullprobe;
1361 
1362 	if (rf == NULL)
1363 		rf = (dtrace_consume_rec_f *)dt_nullrec;
1364 
1365 	if (buf->dtbd_data == NULL) {
1366 		(void) dtrace_getopt(dtp, "bufsize", &size);
1367 		if ((buf->dtbd_data = malloc(size)) == NULL)
1368 			return (dt_set_errno(dtp, EDT_NOMEM));
1369 
1370 		buf->dtbd_size = size;
1371 	}
1372 
1373 	/*
1374 	 * If we have just begun, we want to first process the CPU that
1375 	 * executed the BEGIN probe (if any).
1376 	 */
1377 	if (dtp->dt_active && dtp->dt_beganon != -1) {
1378 		buf->dtbd_cpu = dtp->dt_beganon;
1379 		if ((rval = dt_consume_begin(dtp, fp, buf, pf, rf, arg)) != 0)
1380 			return (rval);
1381 	}
1382 
1383 	for (i = 0; i < max_ncpus; i++) {
1384 		buf->dtbd_cpu = i;
1385 
1386 		/*
1387 		 * If we have stopped, we want to process the CPU on which the
1388 		 * END probe was processed only _after_ we have processed
1389 		 * everything else.
1390 		 */
1391 		if (dtp->dt_stopped && (i == dtp->dt_endedon))
1392 			continue;
1393 
1394 		if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
1395 			/*
1396 			 * If we failed with ENOENT, it may be because the
1397 			 * CPU was unconfigured -- this is okay.  Any other
1398 			 * error, however, is unexpected.
1399 			 */
1400 			if (errno == ENOENT)
1401 				continue;
1402 
1403 			return (dt_set_errno(dtp, errno));
1404 		}
1405 
1406 		if ((rval = dt_consume_cpu(dtp, fp, i, buf, pf, rf, arg)) != 0)
1407 			return (rval);
1408 	}
1409 
1410 	if (!dtp->dt_stopped)
1411 		return (0);
1412 
1413 	buf->dtbd_cpu = dtp->dt_endedon;
1414 
1415 	if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
1416 		/*
1417 		 * This _really_ shouldn't fail, but it is strictly speaking
1418 		 * possible for this to return ENOENT if the CPU that called
1419 		 * the END enabling somehow managed to become unconfigured.
1420 		 * It's unclear how the user can possibly expect anything
1421 		 * rational to happen in this case -- the state has been thrown
1422 		 * out along with the unconfigured CPU -- so we'll just drive
1423 		 * on...
1424 		 */
1425 		if (errno == ENOENT)
1426 			return (0);
1427 
1428 		return (dt_set_errno(dtp, errno));
1429 	}
1430 
1431 	return (dt_consume_cpu(dtp, fp, dtp->dt_endedon, buf, pf, rf, arg));
1432 }
1433