/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #include #include #include static int dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last, dtrace_bufdesc_t *buf, size_t offs) { dtrace_probedesc_t *pd = data->dtpda_pdesc, *npd; dtrace_eprobedesc_t *epd = data->dtpda_edesc, *nepd; char *p = pd->dtpd_provider, *n = pd->dtpd_name; dtrace_flowkind_t flow = DTRACEFLOW_NONE; const char *str = NULL; static const char *e_str[2] = { " -> ", " => " }; static const char *r_str[2] = { " <- ", " <= " }; dtrace_epid_t next, id = epd->dtepd_epid; int rval; if (strcmp(n, "entry") == 0) { flow = DTRACEFLOW_ENTRY; str = e_str[strcmp(p, "syscall") == 0]; } else if (strcmp(n, "return") == 0 || strcmp(n, "exit") == 0) { flow = DTRACEFLOW_RETURN; str = r_str[strcmp(p, "syscall") == 0]; } /* * If we're going to indent this, we need to check the ID of our last * call. If we're looking at the same probe ID but a different EPID, * we _don't_ want to indent. (Yes, there are some minor holes in * this scheme -- it's a heuristic.) */ if (flow == DTRACEFLOW_ENTRY) { if ((last != DTRACE_EPIDNONE && id != last && pd->dtpd_id == dtp->dt_pdesc[last]->dtpd_id)) flow = DTRACEFLOW_NONE; } /* * If we're going to unindent this, it's more difficult to see if * we don't actually want to unindent it -- we need to look at the * _next_ EPID. */ if (flow == DTRACEFLOW_RETURN) { offs += epd->dtepd_size; do { if (offs >= buf->dtbd_size) { /* * We're at the end -- maybe. If the oldest * record is non-zero, we need to wrap. */ if (buf->dtbd_oldest != 0) { offs = 0; } else { goto out; } } next = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs); if (next == DTRACE_EPIDNONE) offs += sizeof (id); } while (next == DTRACE_EPIDNONE); if ((rval = dt_epid_lookup(dtp, next, &nepd, &npd)) != 0) return (rval); if (next != id && npd->dtpd_id == pd->dtpd_id) flow = DTRACEFLOW_NONE; } out: if (flow == DTRACEFLOW_ENTRY || flow == DTRACEFLOW_RETURN) { data->dtpda_prefix = str; } else { data->dtpda_prefix = "| "; } if (flow == DTRACEFLOW_RETURN && data->dtpda_indent > 0) data->dtpda_indent -= 2; data->dtpda_flow = flow; return (0); } static int dt_nullprobe() { return (DTRACE_CONSUME_THIS); } static int dt_nullrec() { return (DTRACE_CONSUME_NEXT); } int dt_print_quantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr, size_t size, uint64_t normal) { const uint64_t *data = addr; int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1; uint64_t total_bin_count = 0; if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t)) return (dt_set_errno(dtp, EDT_DMISMATCH)); while (first_bin < DTRACE_QUANTIZE_NBUCKETS - 1 && data[first_bin] == 0) first_bin++; if (first_bin > 0) first_bin--; while (last_bin > 0 && data[last_bin] == 0) last_bin--; if (last_bin < DTRACE_QUANTIZE_NBUCKETS - 1) last_bin++; for (i = first_bin; i <= last_bin; i++) total_bin_count += data[i]; if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value", "------------- Distribution -------------", "count") < 0) return (-1); for (i = first_bin; i <= last_bin; i++) { float f = ((float)data[i] * 40.0) / (float)total_bin_count; uint_t depth = (uint_t)(f + 0.5); if (dt_printf(dtp, fp, "%16lld |%s%s %-9llu\n", (long long)DTRACE_QUANTIZE_BUCKETVAL(i), "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + 40 - depth, " " + depth, (u_longlong_t)data[i] / normal) < 0) return (-1); } return (0); } int dt_print_lquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr, size_t size, uint64_t normal) { const uint64_t *data = addr; int i, first_bin, last_bin, base; uint64_t arg, total_bin_count = 0; uint16_t step, levels; if (size < sizeof (uint64_t)) return (dt_set_errno(dtp, EDT_DMISMATCH)); arg = *data++; size -= sizeof (uint64_t); base = DTRACE_LQUANTIZE_BASE(arg); step = DTRACE_LQUANTIZE_STEP(arg); levels = DTRACE_LQUANTIZE_LEVELS(arg); first_bin = 0; last_bin = levels + 1; if (size != sizeof (uint64_t) * (levels + 2)) return (dt_set_errno(dtp, EDT_DMISMATCH)); while (first_bin < levels + 1 && data[first_bin] == 0) first_bin++; if (first_bin > 0) first_bin--; while (last_bin > 0 && data[last_bin] == 0) last_bin--; if (last_bin < levels + 1) last_bin++; for (i = first_bin; i <= last_bin; i++) total_bin_count += data[i]; if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value", "------------- Distribution -------------", "count") < 0) return (-1); for (i = first_bin; i <= last_bin; i++) { float f = ((float)data[i] * 40.0) / (float)total_bin_count; uint_t depth = (uint_t)(f + 0.5); char c[32]; int err; if (i == 0) { (void) snprintf(c, sizeof (c), "< %d", base / (uint32_t)normal); err = dt_printf(dtp, fp, "%16s ", c); } else if (i == levels + 1) { (void) snprintf(c, sizeof (c), ">= %d", base + (levels * step)); err = dt_printf(dtp, fp, "%16s ", c); } else { err = dt_printf(dtp, fp, "%16d ", base + (i - 1) * step); } if (err < 0 || dt_printf(dtp, fp, "|%s%s %-9llu\n", "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + 40 - depth, " " + depth, (u_longlong_t)data[i] / normal) < 0) return (-1); } return (0); } /*ARGSUSED*/ static int dt_print_average(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, size_t size, uint64_t normal) { /* LINTED - alignment */ uint64_t *data = (uint64_t *)addr; return (dt_printf(dtp, fp, " %16lld", data[0] ? (long long)(data[1] / normal / data[0]) : 0)); } /*ARGSUSED*/ int dt_print_bytes(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, size_t nbytes, int width, int quiet) { /* * If the byte stream is a series of printable characters, followed by * a terminating byte, we print it out as a string. Otherwise, we * assume that it's something else and just print the bytes. */ int i, j, margin = 5; char *c = (char *)addr; if (nbytes == 0) return (0); if (dtp->dt_options[DTRACEOPT_RAWBYTES] != DTRACEOPT_UNSET) goto raw; for (i = 0; i < nbytes; i++) { /* * We define a "printable character" to be one for which * isprint(3C) returns non-zero, isspace(3C) returns non-zero, * or a character which is either backspace or the bell. * Backspace and the bell are regrettably special because * they fail the first two tests -- and yet they are entirely * printable. These are the only two control characters that * have meaning for the terminal and for which isprint(3C) and * isspace(3C) return 0. */ if (isprint(c[i]) || isspace(c[i]) || c[i] == '\b' || c[i] == '\a') continue; if (c[i] == '\0' && i > 0) { /* * This looks like it might be a string. Before we * assume that it is indeed a string, check the * remainder of the byte range; if it contains * additional non-nul characters, we'll assume that * it's a binary stream that just happens to look like * a string, and we'll print out the individual bytes. */ for (j = i + 1; j < nbytes; j++) { if (c[j] != '\0') break; } if (j != nbytes) break; if (quiet) return (dt_printf(dtp, fp, "%s", c)); else return (dt_printf(dtp, fp, " %-*s", width, c)); } break; } if (i == nbytes) { /* * The byte range is all printable characters, but there is * no trailing nul byte. We'll assume that it's a string and * print it as such. */ char *s = alloca(nbytes + 1); bcopy(c, s, nbytes); s[nbytes] = '\0'; return (dt_printf(dtp, fp, " %-*s", width, s)); } raw: if (dt_printf(dtp, fp, "\n%*s ", margin, "") < 0) return (-1); for (i = 0; i < 16; i++) if (dt_printf(dtp, fp, " %c", "0123456789abcdef"[i]) < 0) return (-1); if (dt_printf(dtp, fp, " 0123456789abcdef\n") < 0) return (-1); for (i = 0; i < nbytes; i += 16) { if (dt_printf(dtp, fp, "%*s%5x:", margin, "", i) < 0) return (-1); for (j = i; j < i + 16 && j < nbytes; j++) { if (dt_printf(dtp, fp, " %02x", (uchar_t)c[j]) < 0) return (-1); } while (j++ % 16) { if (dt_printf(dtp, fp, " ") < 0) return (-1); } if (dt_printf(dtp, fp, " ") < 0) return (-1); for (j = i; j < i + 16 && j < nbytes; j++) { if (dt_printf(dtp, fp, "%c", c[j] < ' ' || c[j] > '~' ? '.' : c[j]) < 0) return (-1); } if (dt_printf(dtp, fp, "\n") < 0) return (-1); } return (0); } int dt_print_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr, int depth) { pc_t *pc = (pc_t *)(uintptr_t)addr; dtrace_syminfo_t dts; GElf_Sym sym; int i, indent; char c[PATH_MAX * 2]; if (dt_printf(dtp, fp, "\n") < 0) return (-1); if (format == NULL) format = "%s"; if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET) indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT]; else indent = _dtrace_stkindent; for (i = 0; i < depth && pc[i] != NULL; i++) { if (dt_printf(dtp, fp, "%*s", indent, "") < 0) return (-1); if (dtrace_lookup_by_addr(dtp, pc[i], &sym, &dts) == 0) { if (pc[i] > sym.st_value) { (void) snprintf(c, sizeof (c), "%s`%s+0x%llx", dts.dts_object, dts.dts_name, (u_longlong_t)pc[i] - sym.st_value); } else { (void) snprintf(c, sizeof (c), "%s`%s", dts.dts_object, dts.dts_name); } } else { /* * We'll repeat the lookup, but this time we'll specify * a NULL GElf_Sym -- indicating that we're only * interested in the containing module. */ if (dtrace_lookup_by_addr(dtp, pc[i], NULL, &dts) == 0) { (void) snprintf(c, sizeof (c), "%s`0x%llx", dts.dts_object, (u_longlong_t)pc[i]); } else { (void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc[i]); } } if (dt_printf(dtp, fp, format, c) < 0) return (-1); if (dt_printf(dtp, fp, "\n") < 0) return (-1); } return (0); } int dt_print_ustack(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr, uint64_t arg) { uint64_t *pc = (uint64_t *)(uintptr_t)addr; uint32_t depth = DTRACE_USTACK_NFRAMES(arg); uint32_t strsize = DTRACE_USTACK_STRSIZE(arg); const char *strbase = addr + (depth + 1) * sizeof (uint64_t); const char *str = strsize ? strbase : NULL; int err = 0; char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2]; struct ps_prochandle *P; GElf_Sym sym; int i, indent; pid_t pid; if (depth == 0) return (0); pid = (pid_t)*pc++; if (dt_printf(dtp, fp, "\n") < 0) return (-1); if (format == NULL) format = "%s"; if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET) indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT]; else indent = _dtrace_stkindent; /* * Ultimately, we need to add an entry point in the library vector for * determining from . For now, if * this is a vector open, we just print the raw address or string. */ if (dtp->dt_vector == NULL) P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0); else P = NULL; if (P != NULL) dt_proc_lock(dtp, P); /* lock handle while we perform lookups */ for (i = 0; i < depth && pc[i] != NULL; i++) { if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0) break; if (P != NULL && Plookup_by_addr(P, pc[i], name, sizeof (name), &sym) == 0) { (void) Pobjname(P, pc[i], objname, sizeof (objname)); if (pc[i] > sym.st_value) { (void) snprintf(c, sizeof (c), "%s`%s+0x%llx", dt_basename(objname), name, (u_longlong_t)(pc[i] - sym.st_value)); } else { (void) snprintf(c, sizeof (c), "%s`%s", dt_basename(objname), name); } } else if (str != NULL && str[0] != '\0') { (void) snprintf(c, sizeof (c), "%s", str); } else { if (P != NULL && Pobjname(P, pc[i], objname, sizeof (objname)) != NULL) { (void) snprintf(c, sizeof (c), "%s`0x%llx", dt_basename(objname), (u_longlong_t)pc[i]); } else { (void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc[i]); } } if ((err = dt_printf(dtp, fp, format, c)) < 0) break; if ((err = dt_printf(dtp, fp, "\n")) < 0) break; if (str != NULL) { str += strlen(str) + 1; if (str - strbase >= strsize) str = NULL; } } if (P != NULL) { dt_proc_unlock(dtp, P); dt_proc_release(dtp, P); } return (err); } typedef struct dt_normal { dtrace_aggvarid_t dtnd_id; uint64_t dtnd_normal; } dt_normal_t; static int dt_normalize_agg(dtrace_aggdata_t *aggdata, void *arg) { dt_normal_t *normal = arg; dtrace_aggdesc_t *agg = aggdata->dtada_desc; dtrace_aggvarid_t id = normal->dtnd_id; uintptr_t data = (uintptr_t)aggdata->dtada_data; if (agg->dtagd_nrecs == 0) return (DTRACE_AGGWALK_NEXT); if (id != *(dtrace_aggvarid_t *)(data + agg->dtagd_rec[0].dtrd_offset)) return (DTRACE_AGGWALK_NEXT); aggdata->dtada_normal = normal->dtnd_normal; return (DTRACE_AGGWALK_NORMALIZE); } static int dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec) { dt_normal_t normal; caddr_t addr; /* * We (should) have two records: the aggregation ID followed by the * normalization value. */ addr = base + rec->dtrd_offset; if (rec->dtrd_size != sizeof (dtrace_aggvarid_t)) return (dt_set_errno(dtp, EDT_BADNORMAL)); /* LINTED - alignment */ normal.dtnd_id = *((dtrace_aggvarid_t *)addr); rec++; if (rec->dtrd_action != DTRACEACT_LIBACT) return (dt_set_errno(dtp, EDT_BADNORMAL)); if (rec->dtrd_arg != DT_ACT_NORMALIZE) return (dt_set_errno(dtp, EDT_BADNORMAL)); addr = base + rec->dtrd_offset; switch (rec->dtrd_size) { case sizeof (uint64_t): /* LINTED - alignment */ normal.dtnd_normal = *((uint64_t *)addr); break; case sizeof (uint32_t): /* LINTED - alignment */ normal.dtnd_normal = *((uint32_t *)addr); break; case sizeof (uint16_t): /* LINTED - alignment */ normal.dtnd_normal = *((uint16_t *)addr); break; case sizeof (uint8_t): normal.dtnd_normal = *((uint8_t *)addr); break; default: return (dt_set_errno(dtp, EDT_BADNORMAL)); } (void) dtrace_aggregate_walk(dtp, dt_normalize_agg, &normal); return (0); } static int dt_denormalize_agg(dtrace_aggdata_t *aggdata, void *arg) { dtrace_aggdesc_t *agg = aggdata->dtada_desc; dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg); uintptr_t data = (uintptr_t)aggdata->dtada_data; if (agg->dtagd_nrecs == 0) return (DTRACE_AGGWALK_NEXT); if (id != *(dtrace_aggvarid_t *)(data + agg->dtagd_rec[0].dtrd_offset)) return (DTRACE_AGGWALK_NEXT); return (DTRACE_AGGWALK_DENORMALIZE); } static int dt_clear_agg(dtrace_aggdata_t *aggdata, void *arg) { dtrace_aggdesc_t *agg = aggdata->dtada_desc; dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg); uintptr_t data = (uintptr_t)aggdata->dtada_data; if (agg->dtagd_nrecs == 0) return (DTRACE_AGGWALK_NEXT); if (id != *(dtrace_aggvarid_t *)(data + agg->dtagd_rec[0].dtrd_offset)) return (DTRACE_AGGWALK_NEXT); return (DTRACE_AGGWALK_CLEAR); } typedef struct dt_trunc { dtrace_aggvarid_t dttd_id; uint64_t dttd_remaining; } dt_trunc_t; static int dt_trunc_agg(dtrace_aggdata_t *aggdata, void *arg) { dt_trunc_t *trunc = arg; dtrace_aggdesc_t *agg = aggdata->dtada_desc; dtrace_aggvarid_t id = trunc->dttd_id; uintptr_t data = (uintptr_t)aggdata->dtada_data; if (agg->dtagd_nrecs == 0) return (DTRACE_AGGWALK_NEXT); if (id != *(dtrace_aggvarid_t *)(data + agg->dtagd_rec[0].dtrd_offset)) return (DTRACE_AGGWALK_NEXT); if (trunc->dttd_remaining == 0) return (DTRACE_AGGWALK_REMOVE); trunc->dttd_remaining--; return (DTRACE_AGGWALK_NEXT); } static int dt_trunc(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec) { dt_trunc_t trunc; caddr_t addr; int64_t remaining; int (*func)(dtrace_hdl_t *, dtrace_aggregate_f *, void *); /* * We (should) have two records: the aggregation ID followed by the * number of aggregation entries after which the aggregation is to be * truncated. */ addr = base + rec->dtrd_offset; if (rec->dtrd_size != sizeof (dtrace_aggvarid_t)) return (dt_set_errno(dtp, EDT_BADTRUNC)); /* LINTED - alignment */ trunc.dttd_id = *((dtrace_aggvarid_t *)addr); rec++; if (rec->dtrd_action != DTRACEACT_LIBACT) return (dt_set_errno(dtp, EDT_BADTRUNC)); if (rec->dtrd_arg != DT_ACT_TRUNC) return (dt_set_errno(dtp, EDT_BADTRUNC)); addr = base + rec->dtrd_offset; switch (rec->dtrd_size) { case sizeof (uint64_t): /* LINTED - alignment */ remaining = *((int64_t *)addr); break; case sizeof (uint32_t): /* LINTED - alignment */ remaining = *((int32_t *)addr); break; case sizeof (uint16_t): /* LINTED - alignment */ remaining = *((int16_t *)addr); break; case sizeof (uint8_t): remaining = *((int8_t *)addr); break; default: return (dt_set_errno(dtp, EDT_BADNORMAL)); } if (remaining < 0) { func = dtrace_aggregate_walk_valsorted; remaining = -remaining; } else { func = dtrace_aggregate_walk_valrevsorted; } assert(remaining >= 0); trunc.dttd_remaining = remaining; (void) func(dtp, dt_trunc_agg, &trunc); return (0); } int dt_print_agg(dtrace_aggdata_t *aggdata, void *arg) { int i, err = 0; dt_print_aggdata_t *pd = arg; dtrace_aggdesc_t *agg = aggdata->dtada_desc; FILE *fp = pd->dtpa_fp; dtrace_hdl_t *dtp = pd->dtpa_dtp; dtrace_aggvarid_t aggvarid = pd->dtpa_id; uintptr_t data = (uintptr_t)aggdata->dtada_data; if (pd->dtpa_allunprint) { if (agg->dtagd_flags & DTRACE_AGD_PRINTED) return (0); } else { /* * If we're not printing all unprinted aggregations, then the * aggregation variable ID denotes a specific aggregation * variable that we should print -- skip any other aggregations * that we encounter. */ if (agg->dtagd_nrecs == 0) return (0); if (aggvarid != *(dtrace_aggvarid_t *)(data + agg->dtagd_rec[0].dtrd_offset)) return (0); } /* * Iterate over each record description, printing the traced data, * skipping the first datum (the tuple member created by the compiler). */ for (i = 1; err >= 0 && i < agg->dtagd_nrecs; i++) { dtrace_recdesc_t *rec = &agg->dtagd_rec[i]; dtrace_actkind_t act = rec->dtrd_action; caddr_t addr = aggdata->dtada_data + rec->dtrd_offset; size_t size = rec->dtrd_size; uint64_t normal; normal = DTRACEACT_ISAGG(act) ? aggdata->dtada_normal : 1; if (act == DTRACEACT_STACK) { int depth = rec->dtrd_size / sizeof (pc_t); err = dt_print_stack(dtp, fp, NULL, addr, depth); goto nextrec; } if (act == DTRACEACT_USTACK || act == DTRACEACT_JSTACK) { err = dt_print_ustack(dtp, fp, NULL, addr, rec->dtrd_arg); goto nextrec; } if (act == DTRACEAGG_QUANTIZE) { err = dt_print_quantize(dtp, fp, addr, size, normal); goto nextrec; } if (act == DTRACEAGG_LQUANTIZE) { err = dt_print_lquantize(dtp, fp, addr, size, normal); goto nextrec; } if (act == DTRACEAGG_AVG) { err = dt_print_average(dtp, fp, addr, size, normal); goto nextrec; } switch (size) { case sizeof (uint64_t): err = dt_printf(dtp, fp, " %16lld", /* LINTED - alignment */ (long long)*((uint64_t *)addr) / normal); break; case sizeof (uint32_t): /* LINTED - alignment */ err = dt_printf(dtp, fp, " %8d", *((uint32_t *)addr) / (uint32_t)normal); break; case sizeof (uint16_t): /* LINTED - alignment */ err = dt_printf(dtp, fp, " %5d", *((uint16_t *)addr) / (uint32_t)normal); break; case sizeof (uint8_t): err = dt_printf(dtp, fp, " %3d", *((uint8_t *)addr) / (uint32_t)normal); break; default: err = dt_print_bytes(dtp, fp, addr, size, 50, 0); break; } nextrec: if (dt_buffered_flush(dtp, NULL, rec, aggdata) < 0) return (-1); } if (err >= 0) err = dt_printf(dtp, fp, "\n"); if (dt_buffered_flush(dtp, NULL, NULL, aggdata) < 0) return (-1); if (!pd->dtpa_allunprint) agg->dtagd_flags |= DTRACE_AGD_PRINTED; return (err < 0 ? -1 : 0); } static int dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu, dtrace_bufdesc_t *buf, dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc, void *arg) { dtrace_epid_t id; size_t offs, start = buf->dtbd_oldest, end = buf->dtbd_size; int flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET); int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET); int rval, i, n; dtrace_epid_t last = DTRACE_EPIDNONE; dtrace_probedata_t data; uint64_t drops; caddr_t addr; bzero(&data, sizeof (data)); data.dtpda_handle = dtp; data.dtpda_cpu = cpu; again: for (offs = start; offs < end; ) { dtrace_eprobedesc_t *epd; /* * We're guaranteed to have an ID. */ id = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs); if (id == DTRACE_EPIDNONE) { /* * This is filler to assure proper alignment of the * next record; we simply ignore it. */ offs += sizeof (id); continue; } if ((rval = dt_epid_lookup(dtp, id, &data.dtpda_edesc, &data.dtpda_pdesc)) != 0) return (rval); epd = data.dtpda_edesc; data.dtpda_data = buf->dtbd_data + offs; if (data.dtpda_edesc->dtepd_uarg != DT_ECB_DEFAULT) { rval = dt_handle(dtp, &data); if (rval == DTRACE_CONSUME_NEXT) goto nextepid; if (rval == DTRACE_CONSUME_ERROR) return (-1); } if (flow) (void) dt_flowindent(dtp, &data, last, buf, offs); rval = (*efunc)(&data, arg); if (flow) { if (data.dtpda_flow == DTRACEFLOW_ENTRY) data.dtpda_indent += 2; } if (rval == DTRACE_CONSUME_NEXT) goto nextepid; if (rval == DTRACE_CONSUME_ABORT) return (dt_set_errno(dtp, EDT_DIRABORT)); if (rval != DTRACE_CONSUME_THIS) return (dt_set_errno(dtp, EDT_BADRVAL)); for (i = 0; i < epd->dtepd_nrecs; i++) { dtrace_recdesc_t *rec = &epd->dtepd_rec[i]; dtrace_actkind_t act = rec->dtrd_action; data.dtpda_data = buf->dtbd_data + offs + rec->dtrd_offset; addr = data.dtpda_data; if (act == DTRACEACT_LIBACT) { if (rec->dtrd_arg == DT_ACT_CLEAR) { dtrace_aggvarid_t id; /* LINTED - alignment */ id = *((dtrace_aggvarid_t *)addr); (void) dtrace_aggregate_walk(dtp, dt_clear_agg, &id); continue; } if (rec->dtrd_arg == DT_ACT_DENORMALIZE) { dtrace_aggvarid_t id; /* LINTED - alignment */ id = *((dtrace_aggvarid_t *)addr); (void) dtrace_aggregate_walk(dtp, dt_denormalize_agg, &id); continue; } if (rec->dtrd_arg == DT_ACT_NORMALIZE) { if (i == epd->dtepd_nrecs - 1) return (dt_set_errno(dtp, EDT_BADNORMAL)); if (dt_normalize(dtp, buf->dtbd_data + offs, rec) != 0) return (-1); i++; continue; } if (rec->dtrd_arg == DT_ACT_TRUNC) { if (i == epd->dtepd_nrecs - 1) return (dt_set_errno(dtp, EDT_BADTRUNC)); if (dt_trunc(dtp, buf->dtbd_data + offs, rec) != 0) return (-1); i++; continue; } if (rec->dtrd_arg == DT_ACT_FTRUNCATE) { if (fp == NULL) continue; (void) fflush(fp); (void) ftruncate(fileno(fp), 0); (void) fseeko(fp, 0, SEEK_SET); continue; } } rval = (*rfunc)(&data, rec, arg); if (rval == DTRACE_CONSUME_NEXT) continue; if (rval == DTRACE_CONSUME_ABORT) return (dt_set_errno(dtp, EDT_DIRABORT)); if (rval != DTRACE_CONSUME_THIS) return (dt_set_errno(dtp, EDT_BADRVAL)); if (act == DTRACEACT_STACK) { int depth = rec->dtrd_size / sizeof (pc_t); if (dt_print_stack(dtp, fp, NULL, addr, depth) < 0) return (-1); goto nextrec; } if (act == DTRACEACT_USTACK || act == DTRACEACT_JSTACK) { if (dt_print_ustack(dtp, fp, NULL, addr, rec->dtrd_arg) < 0) return (-1); goto nextrec; } if (DTRACEACT_ISPRINTFLIKE(act)) { void *fmtdata; int (*func)(dtrace_hdl_t *, FILE *, void *, const dtrace_probedata_t *, const dtrace_recdesc_t *, uint_t, const void *buf, size_t); if ((fmtdata = dt_format_lookup(dtp, rec->dtrd_format)) == NULL) goto nofmt; switch (act) { case DTRACEACT_PRINTF: func = dtrace_fprintf; break; case DTRACEACT_PRINTA: func = dtrace_fprinta; break; case DTRACEACT_SYSTEM: func = dtrace_system; break; case DTRACEACT_FREOPEN: func = dtrace_freopen; break; } n = (*func)(dtp, fp, fmtdata, &data, rec, epd->dtepd_nrecs - i, (uchar_t *)buf->dtbd_data + offs, buf->dtbd_size - offs); if (n < 0) return (-1); /* errno is set for us */ if (n > 0) i += n - 1; goto nextrec; } nofmt: if (act == DTRACEACT_PRINTA) { dt_print_aggdata_t pd; bzero(&pd, sizeof (pd)); pd.dtpa_dtp = dtp; pd.dtpa_fp = fp; /* LINTED - alignment */ pd.dtpa_id = *((dtrace_aggvarid_t *)addr); if (dt_printf(dtp, fp, "\n") < 0 || dtrace_aggregate_walk_valsorted(dtp, dt_print_agg, &pd) < 0) return (-1); goto nextrec; } switch (rec->dtrd_size) { case sizeof (uint64_t): n = dt_printf(dtp, fp, quiet ? "%lld" : " %16lld", /* LINTED - alignment */ *((unsigned long long *)addr)); break; case sizeof (uint32_t): n = dt_printf(dtp, fp, quiet ? "%d" : " %8d", /* LINTED - alignment */ *((uint32_t *)addr)); break; case sizeof (uint16_t): n = dt_printf(dtp, fp, quiet ? "%d" : " %5d", /* LINTED - alignment */ *((uint16_t *)addr)); break; case sizeof (uint8_t): n = dt_printf(dtp, fp, quiet ? "%d" : " %3d", *((uint8_t *)addr)); break; default: n = dt_print_bytes(dtp, fp, addr, rec->dtrd_size, 33, quiet); break; } if (n < 0) return (-1); /* errno is set for us */ nextrec: if (dt_buffered_flush(dtp, &data, rec, NULL) < 0) return (-1); /* errno is set for us */ } /* * Call the record callback with a NULL record to indicate * that we're done processing this EPID. */ rval = (*rfunc)(&data, NULL, arg); nextepid: offs += epd->dtepd_size; last = id; } if (buf->dtbd_oldest != 0 && start == buf->dtbd_oldest) { end = buf->dtbd_oldest; start = 0; goto again; } if ((drops = buf->dtbd_drops) == 0) return (0); /* * Explicitly zero the drops to prevent us from processing them again. */ buf->dtbd_drops = 0; return (dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops)); } typedef struct dt_begin { dtrace_consume_probe_f *dtbgn_probefunc; dtrace_consume_rec_f *dtbgn_recfunc; void *dtbgn_arg; dtrace_handle_err_f *dtbgn_errhdlr; void *dtbgn_errarg; int dtbgn_beginonly; } dt_begin_t; static int dt_consume_begin_probe(const dtrace_probedata_t *data, void *arg) { dt_begin_t *begin = (dt_begin_t *)arg; dtrace_probedesc_t *pd = data->dtpda_pdesc; int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0); int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0); if (begin->dtbgn_beginonly) { if (!(r1 && r2)) return (DTRACE_CONSUME_NEXT); } else { if (r1 && r2) return (DTRACE_CONSUME_NEXT); } /* * We have a record that we're interested in. Now call the underlying * probe function... */ return (begin->dtbgn_probefunc(data, begin->dtbgn_arg)); } static int dt_consume_begin_record(const dtrace_probedata_t *data, const dtrace_recdesc_t *rec, void *arg) { dt_begin_t *begin = (dt_begin_t *)arg; return (begin->dtbgn_recfunc(data, rec, begin->dtbgn_arg)); } static int dt_consume_begin_error(dtrace_errdata_t *data, void *arg) { dt_begin_t *begin = (dt_begin_t *)arg; dtrace_probedesc_t *pd = data->dteda_pdesc; int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0); int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0); if (begin->dtbgn_beginonly) { if (!(r1 && r2)) return (DTRACE_HANDLE_OK); } else { if (r1 && r2) return (DTRACE_HANDLE_OK); } return (begin->dtbgn_errhdlr(data, begin->dtbgn_errarg)); } static int dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf, dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg) { /* * There's this idea that the BEGIN probe should be processed before * everything else, and that the END probe should be processed after * anything else. In the common case, this is pretty easy to deal * with. However, a situation may arise where the BEGIN enabling and * END enabling are on the same CPU, and some enabling in the middle * occurred on a different CPU. To deal with this (blech!) we need to * consume the BEGIN buffer up until the end of the BEGIN probe, and * then set it aside. We will then process every other CPU, and then * we'll return to the BEGIN CPU and process the rest of the data * (which will inevitably include the END probe, if any). Making this * even more complicated (!) is the library's ERROR enabling. Because * this enabling is processed before we even get into the consume call * back, any ERROR firing would result in the library's ERROR enabling * being processed twice -- once in our first pass (for BEGIN probes), * and again in our second pass (for everything but BEGIN probes). To * deal with this, we interpose on the ERROR handler to assure that we * only process ERROR enablings induced by BEGIN enablings in the * first pass, and that we only process ERROR enablings _not_ induced * by BEGIN enablings in the second pass. */ dt_begin_t begin; processorid_t cpu = dtp->dt_beganon; dtrace_bufdesc_t nbuf; int rval, i; static int max_ncpus; dtrace_optval_t size; dtp->dt_beganon = -1; if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) { /* * We really don't expect this to fail, but it is at least * technically possible for this to fail with ENOENT. In this * case, we just drive on... */ if (errno == ENOENT) return (0); return (dt_set_errno(dtp, errno)); } if (!dtp->dt_stopped || buf->dtbd_cpu != dtp->dt_endedon) { /* * This is the simple case. We're either not stopped, or if * we are, we actually processed any END probes on another * CPU. We can simply consume this buffer and return. */ return (dt_consume_cpu(dtp, fp, cpu, buf, pf, rf, arg)); } begin.dtbgn_probefunc = pf; begin.dtbgn_recfunc = rf; begin.dtbgn_arg = arg; begin.dtbgn_beginonly = 1; /* * We need to interpose on the ERROR handler to be sure that we * only process ERRORs induced by BEGIN. */ begin.dtbgn_errhdlr = dtp->dt_errhdlr; begin.dtbgn_errarg = dtp->dt_errarg; dtp->dt_errhdlr = dt_consume_begin_error; dtp->dt_errarg = &begin; rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe, dt_consume_begin_record, &begin); dtp->dt_errhdlr = begin.dtbgn_errhdlr; dtp->dt_errarg = begin.dtbgn_errarg; if (rval != 0) return (rval); /* * Now allocate a new buffer. We'll use this to deal with every other * CPU. */ bzero(&nbuf, sizeof (dtrace_bufdesc_t)); (void) dtrace_getopt(dtp, "bufsize", &size); if ((nbuf.dtbd_data = malloc(size)) == NULL) return (dt_set_errno(dtp, EDT_NOMEM)); if (max_ncpus == 0) max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1; for (i = 0; i < max_ncpus; i++) { nbuf.dtbd_cpu = i; if (i == cpu) continue; if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &nbuf) == -1) { /* * If we failed with ENOENT, it may be because the * CPU was unconfigured -- this is okay. Any other * error, however, is unexpected. */ if (errno == ENOENT) continue; free(nbuf.dtbd_data); return (dt_set_errno(dtp, errno)); } if ((rval = dt_consume_cpu(dtp, fp, i, &nbuf, pf, rf, arg)) != 0) { free(nbuf.dtbd_data); return (rval); } } free(nbuf.dtbd_data); /* * Okay -- we're done with the other buffers. Now we want to * reconsume the first buffer -- but this time we're looking for * everything _but_ BEGIN. And of course, in order to only consume * those ERRORs _not_ associated with BEGIN, we need to reinstall our * ERROR interposition function... */ begin.dtbgn_beginonly = 0; assert(begin.dtbgn_errhdlr == dtp->dt_errhdlr); assert(begin.dtbgn_errarg == dtp->dt_errarg); dtp->dt_errhdlr = dt_consume_begin_error; dtp->dt_errarg = &begin; rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe, dt_consume_begin_record, &begin); dtp->dt_errhdlr = begin.dtbgn_errhdlr; dtp->dt_errarg = begin.dtbgn_errarg; return (rval); } int dtrace_consume(dtrace_hdl_t *dtp, FILE *fp, dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg) { dtrace_bufdesc_t *buf = &dtp->dt_buf; dtrace_optval_t size; static int max_ncpus; int i, rval; dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_SWITCHRATE]; hrtime_t now = gethrtime(); if (dtp->dt_lastswitch != 0) { if (now - dtp->dt_lastswitch < interval) return (0); dtp->dt_lastswitch += interval; } else { dtp->dt_lastswitch = now; } if (!dtp->dt_active) return (dt_set_errno(dtp, EINVAL)); if (max_ncpus == 0) max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1; if (pf == NULL) pf = (dtrace_consume_probe_f *)dt_nullprobe; if (rf == NULL) rf = (dtrace_consume_rec_f *)dt_nullrec; if (buf->dtbd_data == NULL) { (void) dtrace_getopt(dtp, "bufsize", &size); if ((buf->dtbd_data = malloc(size)) == NULL) return (dt_set_errno(dtp, EDT_NOMEM)); buf->dtbd_size = size; } /* * If we have just begun, we want to first process the CPU that * executed the BEGIN probe (if any). */ if (dtp->dt_active && dtp->dt_beganon != -1) { buf->dtbd_cpu = dtp->dt_beganon; if ((rval = dt_consume_begin(dtp, fp, buf, pf, rf, arg)) != 0) return (rval); } for (i = 0; i < max_ncpus; i++) { buf->dtbd_cpu = i; /* * If we have stopped, we want to process the CPU on which the * END probe was processed only _after_ we have processed * everything else. */ if (dtp->dt_stopped && (i == dtp->dt_endedon)) continue; if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) { /* * If we failed with ENOENT, it may be because the * CPU was unconfigured -- this is okay. Any other * error, however, is unexpected. */ if (errno == ENOENT) continue; return (dt_set_errno(dtp, errno)); } if ((rval = dt_consume_cpu(dtp, fp, i, buf, pf, rf, arg)) != 0) return (rval); } if (!dtp->dt_stopped) return (0); buf->dtbd_cpu = dtp->dt_endedon; if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) { /* * This _really_ shouldn't fail, but it is strictly speaking * possible for this to return ENOENT if the CPU that called * the END enabling somehow managed to become unconfigured. * It's unclear how the user can possibly expect anything * rational to happen in this case -- the state has been thrown * out along with the unconfigured CPU -- so we'll just drive * on... */ if (errno == ENOENT) return (0); return (dt_set_errno(dtp, errno)); } return (dt_consume_cpu(dtp, fp, dtp->dt_endedon, buf, pf, rf, arg)); }