xref: /linux/tools/perf/util/data-convert-json.c (revision 8a5f956a9fb7d74fff681145082acfad5afa6bb8)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * JSON export.
4  *
5  * Copyright (C) 2021, CodeWeavers Inc. <nfraser@codeweavers.com>
6  */
7 
8 #include "data-convert.h"
9 
10 #include <fcntl.h>
11 #include <inttypes.h>
12 #include <sys/stat.h>
13 #include <unistd.h>
14 
15 #include "linux/compiler.h"
16 #include "linux/err.h"
17 #include "util/auxtrace.h"
18 #include "util/debug.h"
19 #include "util/dso.h"
20 #include "util/event.h"
21 #include "util/evsel.h"
22 #include "util/evlist.h"
23 #include "util/header.h"
24 #include "util/map.h"
25 #include "util/session.h"
26 #include "util/symbol.h"
27 #include "util/thread.h"
28 #include "util/tool.h"
29 
30 #ifdef HAVE_LIBTRACEEVENT
31 #include <event-parse.h>
32 #endif
33 
34 struct convert_json {
35 	struct perf_tool tool;
36 	FILE *out;
37 	bool first;
38 	u64 events_count;
39 };
40 
41 // Outputs a JSON-encoded string surrounded by quotes with characters escaped.
42 static void output_json_string(FILE *out, const char *s)
43 {
44 	fputc('"', out);
45 	while (*s) {
46 		switch (*s) {
47 
48 		// required escapes with special forms as per RFC 8259
49 		case '"':  fputs("\\\"", out); break;
50 		case '\\': fputs("\\\\", out); break;
51 		case '\b': fputs("\\b", out);  break;
52 		case '\f': fputs("\\f", out);  break;
53 		case '\n': fputs("\\n", out);  break;
54 		case '\r': fputs("\\r", out);  break;
55 		case '\t': fputs("\\t", out);  break;
56 
57 		default:
58 			// all other control characters must be escaped by hex code
59 			if (*s <= 0x1f)
60 				fprintf(out, "\\u%04x", *s);
61 			else
62 				fputc(*s, out);
63 			break;
64 		}
65 
66 		++s;
67 	}
68 	fputc('"', out);
69 }
70 
71 // Outputs an optional comma, newline and indentation to delimit a new value
72 // from the previous one in a JSON object or array.
73 static void output_json_delimiters(FILE *out, bool comma, int depth)
74 {
75 	int i;
76 
77 	if (comma)
78 		fputc(',', out);
79 	fputc('\n', out);
80 	for (i = 0; i < depth; ++i)
81 		fputc('\t', out);
82 }
83 
84 // Outputs a printf format string (with delimiter) as a JSON value.
85 __printf(4, 5)
86 static void output_json_format(FILE *out, bool comma, int depth, const char *format, ...)
87 {
88 	va_list args;
89 
90 	output_json_delimiters(out, comma, depth);
91 	va_start(args, format);
92 	vfprintf(out,  format, args);
93 	va_end(args);
94 }
95 
96 // Outputs a JSON key-value pair where the value is a string.
97 static void output_json_key_string(FILE *out, bool comma, int depth,
98 		const char *key, const char *value)
99 {
100 	output_json_delimiters(out, comma, depth);
101 	output_json_string(out, key);
102 	fputs(": ", out);
103 	output_json_string(out, value);
104 }
105 
106 // Outputs a JSON key-value pair where the value is a printf format string.
107 __printf(5, 6)
108 static void output_json_key_format(FILE *out, bool comma, int depth,
109 		const char *key, const char *format, ...)
110 {
111 	va_list args;
112 
113 	output_json_delimiters(out, comma, depth);
114 	output_json_string(out, key);
115 	fputs(": ", out);
116 	va_start(args, format);
117 	vfprintf(out,  format, args);
118 	va_end(args);
119 }
120 
121 static void output_sample_callchain_entry(const struct perf_tool *tool,
122 		u64 ip, struct addr_location *al)
123 {
124 	struct convert_json *c = container_of(tool, struct convert_json, tool);
125 	FILE *out = c->out;
126 
127 	output_json_format(out, false, 4, "{");
128 	output_json_key_format(out, false, 5, "ip", "\"0x%" PRIx64 "\"", ip);
129 
130 	if (al && al->sym && al->sym->namelen) {
131 		struct dso *dso = al->map ? map__dso(al->map) : NULL;
132 
133 		fputc(',', out);
134 		output_json_key_string(out, false, 5, "symbol", al->sym->name);
135 
136 		if (dso) {
137 			const char *dso_name = dso__short_name(dso);
138 
139 			if (dso_name && strlen(dso_name) > 0) {
140 				fputc(',', out);
141 				output_json_key_string(out, false, 5, "dso", dso_name);
142 			}
143 		}
144 	}
145 
146 	output_json_format(out, false, 4, "}");
147 }
148 
149 static int process_sample_event(const struct perf_tool *tool,
150 				union perf_event *event __maybe_unused,
151 				struct perf_sample *sample,
152 				struct evsel *evsel __maybe_unused,
153 				struct machine *machine)
154 {
155 	struct convert_json *c = container_of(tool, struct convert_json, tool);
156 	FILE *out = c->out;
157 	struct addr_location al;
158 	u64 sample_type = __evlist__combined_sample_type(evsel->evlist);
159 	u8 cpumode = PERF_RECORD_MISC_USER;
160 
161 	addr_location__init(&al);
162 	if (machine__resolve(machine, &al, sample) < 0) {
163 		pr_err("Sample resolution failed!\n");
164 		addr_location__exit(&al);
165 		return -1;
166 	}
167 
168 	++c->events_count;
169 
170 	if (c->first)
171 		c->first = false;
172 	else
173 		fputc(',', out);
174 	output_json_format(out, false, 2, "{");
175 
176 	output_json_key_format(out, false, 3, "timestamp", "%" PRIi64, sample->time);
177 	output_json_key_format(out, true, 3, "pid", "%i", thread__pid(al.thread));
178 	output_json_key_format(out, true, 3, "tid", "%i", thread__tid(al.thread));
179 
180 	if ((sample_type & PERF_SAMPLE_CPU))
181 		output_json_key_format(out, true, 3, "cpu", "%i", sample->cpu);
182 	else if (thread__cpu(al.thread) >= 0)
183 		output_json_key_format(out, true, 3, "cpu", "%i", thread__cpu(al.thread));
184 
185 	output_json_key_string(out, true, 3, "comm", thread__comm_str(al.thread));
186 
187 	output_json_key_format(out, true, 3, "callchain", "[");
188 	if (sample->callchain) {
189 		unsigned int i;
190 		bool ok;
191 		bool first_callchain = true;
192 
193 		for (i = 0; i < sample->callchain->nr; ++i) {
194 			u64 ip = sample->callchain->ips[i];
195 			struct addr_location tal;
196 
197 			if (ip >= PERF_CONTEXT_MAX) {
198 				switch (ip) {
199 				case PERF_CONTEXT_HV:
200 					cpumode = PERF_RECORD_MISC_HYPERVISOR;
201 					break;
202 				case PERF_CONTEXT_KERNEL:
203 					cpumode = PERF_RECORD_MISC_KERNEL;
204 					break;
205 				case PERF_CONTEXT_USER:
206 					cpumode = PERF_RECORD_MISC_USER;
207 					break;
208 				default:
209 					pr_debug("invalid callchain context: %"
210 							PRId64 "\n", (s64) ip);
211 					break;
212 				}
213 				continue;
214 			}
215 
216 			if (first_callchain)
217 				first_callchain = false;
218 			else
219 				fputc(',', out);
220 
221 			addr_location__init(&tal);
222 			ok = thread__find_symbol(al.thread, cpumode, ip, &tal);
223 			output_sample_callchain_entry(tool, ip, ok ? &tal : NULL);
224 			addr_location__exit(&tal);
225 		}
226 	} else {
227 		output_sample_callchain_entry(tool, sample->ip, &al);
228 	}
229 	output_json_format(out, false, 3, "]");
230 
231 #ifdef HAVE_LIBTRACEEVENT
232 	if (sample->raw_data) {
233 		struct tep_event *tp_format = evsel__tp_format(evsel);
234 		struct tep_format_field **fields = tp_format ? tep_event_fields(tp_format) : NULL;
235 
236 		if (fields) {
237 			int i = 0;
238 
239 			while (fields[i]) {
240 				struct trace_seq s;
241 
242 				trace_seq_init(&s);
243 				tep_print_field(&s, sample->raw_data, fields[i]);
244 				output_json_key_string(out, true, 3, fields[i]->name, s.buffer);
245 
246 				i++;
247 			}
248 			free(fields);
249 		}
250 	}
251 #endif
252 	output_json_format(out, false, 2, "}");
253 	addr_location__exit(&al);
254 	return 0;
255 }
256 
257 static void output_headers(struct perf_session *session, struct convert_json *c)
258 {
259 	struct stat st;
260 	const struct perf_header *header = &session->header;
261 	const struct perf_env *env = perf_session__env(session);
262 	int ret;
263 	int fd = perf_data__fd(session->data);
264 	int i;
265 	FILE *out = c->out;
266 
267 	output_json_key_format(out, false, 2, "header-version", "%u", header->version);
268 
269 	ret = fstat(fd, &st);
270 	if (ret >= 0) {
271 		time_t stctime = st.st_mtime;
272 		char buf[256];
273 
274 		strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&stctime));
275 		output_json_key_string(out, true, 2, "captured-on", buf);
276 	} else {
277 		pr_debug("Failed to get mtime of source file, not writing captured-on");
278 	}
279 
280 	output_json_key_format(out, true, 2, "data-offset", "%" PRIu64, header->data_offset);
281 	output_json_key_format(out, true, 2, "data-size", "%" PRIu64, header->data_size);
282 	output_json_key_format(out, true, 2, "feat-offset", "%" PRIu64, header->feat_offset);
283 
284 	output_json_key_string(out, true, 2, "hostname", env->hostname);
285 	output_json_key_string(out, true, 2, "os-release", env->os_release);
286 	output_json_key_string(out, true, 2, "arch", env->arch);
287 
288 	if (env->cpu_desc)
289 		output_json_key_string(out, true, 2, "cpu-desc", env->cpu_desc);
290 
291 	output_json_key_string(out, true, 2, "cpuid", env->cpuid);
292 	output_json_key_format(out, true, 2, "nrcpus-online", "%u", env->nr_cpus_online);
293 	output_json_key_format(out, true, 2, "nrcpus-avail", "%u", env->nr_cpus_avail);
294 
295 	if (env->clock.enabled) {
296 		output_json_key_format(out, true, 2, "clockid",
297 				"%u", env->clock.clockid);
298 		output_json_key_format(out, true, 2, "clock-time",
299 				"%" PRIu64, env->clock.clockid_ns);
300 		output_json_key_format(out, true, 2, "real-time",
301 				"%" PRIu64, env->clock.tod_ns);
302 	}
303 
304 	output_json_key_string(out, true, 2, "perf-version", env->version);
305 
306 	output_json_key_format(out, true, 2, "cmdline", "[");
307 	for (i = 0; i < env->nr_cmdline; i++) {
308 		output_json_delimiters(out, i != 0, 3);
309 		output_json_string(c->out, env->cmdline_argv[i]);
310 	}
311 	output_json_format(out, false, 2, "]");
312 }
313 
314 int bt_convert__perf2json(const char *input_name, const char *output_name,
315 		struct perf_data_convert_opts *opts __maybe_unused)
316 {
317 	struct perf_session *session;
318 	int fd;
319 	int ret = -1;
320 	struct convert_json c = {
321 		.first = true,
322 		.events_count = 0,
323 	};
324 	struct perf_data data = {
325 		.mode = PERF_DATA_MODE_READ,
326 		.path = input_name,
327 		.force = opts->force,
328 	};
329 
330 	perf_tool__init(&c.tool, /*ordered_events=*/true);
331 	c.tool.sample         = process_sample_event;
332 	c.tool.mmap           = perf_event__process_mmap;
333 	c.tool.mmap2          = perf_event__process_mmap2;
334 	c.tool.comm           = perf_event__process_comm;
335 	c.tool.namespaces     = perf_event__process_namespaces;
336 	c.tool.cgroup         = perf_event__process_cgroup;
337 	c.tool.exit           = perf_event__process_exit;
338 	c.tool.fork           = perf_event__process_fork;
339 	c.tool.lost           = perf_event__process_lost;
340 #ifdef HAVE_LIBTRACEEVENT
341 	c.tool.tracing_data   = perf_event__process_tracing_data;
342 #endif
343 	c.tool.build_id       = perf_event__process_build_id;
344 	c.tool.id_index       = perf_event__process_id_index;
345 	c.tool.auxtrace_info  = perf_event__process_auxtrace_info;
346 	c.tool.auxtrace       = perf_event__process_auxtrace;
347 	c.tool.event_update   = perf_event__process_event_update;
348 	c.tool.ordering_requires_timestamps = true;
349 
350 	if (opts->all) {
351 		pr_err("--all is currently unsupported for JSON output.\n");
352 		goto err;
353 	}
354 	if (opts->tod) {
355 		pr_err("--tod is currently unsupported for JSON output.\n");
356 		goto err;
357 	}
358 
359 	fd = open(output_name, O_CREAT | O_WRONLY | (opts->force ? O_TRUNC : O_EXCL), 0666);
360 	if (fd == -1) {
361 		if (errno == EEXIST)
362 			pr_err("Output file exists. Use --force to overwrite it.\n");
363 		else
364 			pr_err("Error opening output file!\n");
365 		goto err;
366 	}
367 
368 	c.out = fdopen(fd, "w");
369 	if (!c.out) {
370 		fprintf(stderr, "Error opening output file!\n");
371 		close(fd);
372 		goto err;
373 	}
374 
375 	session = perf_session__new(&data, &c.tool);
376 	if (IS_ERR(session)) {
377 		fprintf(stderr, "Error creating perf session!\n");
378 		goto err_fclose;
379 	}
380 	if (symbol__init(perf_session__env(session)) < 0) {
381 		fprintf(stderr, "Symbol init error!\n");
382 		goto err_session_delete;
383 	}
384 
385 	// The opening brace is printed manually because it isn't delimited from a
386 	// previous value (i.e. we don't want a leading newline)
387 	fputc('{', c.out);
388 
389 	// Version number for future-proofing. Most additions should be able to be
390 	// done in a backwards-compatible way so this should only need to be bumped
391 	// if some major breaking change must be made.
392 	output_json_format(c.out, false, 1, "\"linux-perf-json-version\": 1");
393 
394 	// Output headers
395 	output_json_format(c.out, true, 1, "\"headers\": {");
396 	output_headers(session, &c);
397 	output_json_format(c.out, false, 1, "}");
398 
399 	// Output samples
400 	output_json_format(c.out, true, 1, "\"samples\": [");
401 	perf_session__process_events(session);
402 	output_json_format(c.out, false, 1, "]");
403 	output_json_format(c.out, false, 0, "}");
404 	fputc('\n', c.out);
405 
406 	fprintf(stderr,
407 			"[ perf data convert: Converted '%s' into JSON data '%s' ]\n",
408 			data.path, output_name);
409 
410 	fprintf(stderr,
411 			"[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n",
412 			(ftell(c.out)) / 1024.0 / 1024.0, c.events_count);
413 
414 	ret = 0;
415 err_session_delete:
416 	perf_session__delete(session);
417 err_fclose:
418 	fclose(c.out);
419 err:
420 	return ret;
421 }
422