xref: /linux/tools/perf/util/bpf-trace-summary.c (revision 68a052239fc4b351e961f698b824f7654a346091)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <inttypes.h>
3 #include <math.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 
7 #include "dwarf-regs.h" /* for EM_HOST */
8 #include "syscalltbl.h"
9 #include "util/cgroup.h"
10 #include "util/hashmap.h"
11 #include "util/trace.h"
12 #include "util/util.h"
13 #include <bpf/bpf.h>
14 #include <linux/rbtree.h>
15 #include <linux/time64.h>
16 #include <tools/libc_compat.h> /* reallocarray */
17 
18 #include "bpf_skel/syscall_summary.h"
19 #include "bpf_skel/syscall_summary.skel.h"
20 
21 
22 static struct syscall_summary_bpf *skel;
23 static struct rb_root cgroups = RB_ROOT;
24 
25 int trace_prepare_bpf_summary(enum trace_summary_mode mode)
26 {
27 	skel = syscall_summary_bpf__open();
28 	if (skel == NULL) {
29 		fprintf(stderr, "failed to open syscall summary bpf skeleton\n");
30 		return -1;
31 	}
32 
33 	if (mode == SUMMARY__BY_THREAD)
34 		skel->rodata->aggr_mode = SYSCALL_AGGR_THREAD;
35 	else if (mode == SUMMARY__BY_CGROUP)
36 		skel->rodata->aggr_mode = SYSCALL_AGGR_CGROUP;
37 	else
38 		skel->rodata->aggr_mode = SYSCALL_AGGR_CPU;
39 
40 	if (cgroup_is_v2("perf_event") > 0)
41 		skel->rodata->use_cgroup_v2 = 1;
42 
43 	if (syscall_summary_bpf__load(skel) < 0) {
44 		fprintf(stderr, "failed to load syscall summary bpf skeleton\n");
45 		return -1;
46 	}
47 
48 	if (syscall_summary_bpf__attach(skel) < 0) {
49 		fprintf(stderr, "failed to attach syscall summary bpf skeleton\n");
50 		return -1;
51 	}
52 
53 	if (mode == SUMMARY__BY_CGROUP)
54 		read_all_cgroups(&cgroups);
55 
56 	return 0;
57 }
58 
59 void trace_start_bpf_summary(void)
60 {
61 	skel->bss->enabled = 1;
62 }
63 
64 void trace_end_bpf_summary(void)
65 {
66 	skel->bss->enabled = 0;
67 }
68 
69 struct syscall_node {
70 	int syscall_nr;
71 	struct syscall_stats stats;
72 };
73 
74 static double rel_stddev(struct syscall_stats *stat)
75 {
76 	double variance, average;
77 
78 	if (stat->count < 2)
79 		return 0;
80 
81 	average = (double)stat->total_time / stat->count;
82 
83 	variance = stat->squared_sum;
84 	variance -= (stat->total_time * stat->total_time) / stat->count;
85 	variance /= stat->count - 1;
86 
87 	return 100 * sqrt(variance / stat->count) / average;
88 }
89 
90 /*
91  * The syscall_data is to maintain syscall stats ordered by total time.
92  * It supports different summary modes like per-thread or global.
93  *
94  * For per-thread stats, it uses two-level data strurcture -
95  * syscall_data is keyed by TID and has an array of nodes which
96  * represents each syscall for the thread.
97  *
98  * For global stats, it's still two-level technically but we don't need
99  * per-cpu analysis so it's keyed by the syscall number to combine stats
100  * from different CPUs.  And syscall_data always has a syscall_node so
101  * it can effectively work as flat hierarchy.
102  *
103  * For per-cgroup stats, it uses two-level data structure like thread
104  * syscall_data is keyed by CGROUP and has an array of node which
105  * represents each syscall for the cgroup.
106  */
107 struct syscall_data {
108 	u64 key; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU, cgroup if AGGR_CGROUP */
109 	int nr_events;
110 	int nr_nodes;
111 	u64 total_time;
112 	struct syscall_node *nodes;
113 };
114 
115 static int datacmp(const void *a, const void *b)
116 {
117 	const struct syscall_data * const *sa = a;
118 	const struct syscall_data * const *sb = b;
119 
120 	return (*sa)->total_time > (*sb)->total_time ? -1 : 1;
121 }
122 
123 static int nodecmp(const void *a, const void *b)
124 {
125 	const struct syscall_node *na = a;
126 	const struct syscall_node *nb = b;
127 
128 	return na->stats.total_time > nb->stats.total_time ? -1 : 1;
129 }
130 
131 static size_t sc_node_hash(long key, void *ctx __maybe_unused)
132 {
133 	return key;
134 }
135 
136 static bool sc_node_equal(long key1, long key2, void *ctx __maybe_unused)
137 {
138 	return key1 == key2;
139 }
140 
141 static int print_common_stats(struct syscall_data *data, int max_summary, FILE *fp)
142 {
143 	int printed = 0;
144 
145 	if (max_summary == 0 || max_summary > data->nr_nodes)
146 		max_summary = data->nr_nodes;
147 
148 	for (int i = 0; i < max_summary; i++) {
149 		struct syscall_node *node = &data->nodes[i];
150 		struct syscall_stats *stat = &node->stats;
151 		double total = (double)(stat->total_time) / NSEC_PER_MSEC;
152 		double min = (double)(stat->min_time) / NSEC_PER_MSEC;
153 		double max = (double)(stat->max_time) / NSEC_PER_MSEC;
154 		double avg = total / stat->count;
155 		const char *name;
156 
157 		/* TODO: support other ABIs */
158 		name = syscalltbl__name(EM_HOST, node->syscall_nr);
159 		if (name)
160 			printed += fprintf(fp, "   %-15s", name);
161 		else
162 			printed += fprintf(fp, "   syscall:%-7d", node->syscall_nr);
163 
164 		printed += fprintf(fp, " %8u %6u %9.3f %9.3f %9.3f %9.3f %9.2f%%\n",
165 				   stat->count, stat->error, total, min, avg, max,
166 				   rel_stddev(stat));
167 	}
168 	return printed;
169 }
170 
171 static int update_thread_stats(struct hashmap *hash, struct syscall_key *map_key,
172 			       struct syscall_stats *map_data)
173 {
174 	struct syscall_data *data;
175 	struct syscall_node *nodes;
176 
177 	if (!hashmap__find(hash, map_key->cpu_or_tid, &data)) {
178 		data = zalloc(sizeof(*data));
179 		if (data == NULL)
180 			return -ENOMEM;
181 
182 		data->key = map_key->cpu_or_tid;
183 		if (hashmap__add(hash, data->key, data) < 0) {
184 			free(data);
185 			return -ENOMEM;
186 		}
187 	}
188 
189 	/* update thread total stats */
190 	data->nr_events += map_data->count;
191 	data->total_time += map_data->total_time;
192 
193 	nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes));
194 	if (nodes == NULL)
195 		return -ENOMEM;
196 
197 	data->nodes = nodes;
198 	nodes = &data->nodes[data->nr_nodes++];
199 	nodes->syscall_nr = map_key->nr;
200 
201 	/* each thread has an entry for each syscall, just use the stat */
202 	memcpy(&nodes->stats, map_data, sizeof(*map_data));
203 	return 0;
204 }
205 
206 static int print_thread_stat(struct syscall_data *data, int max_summary, FILE *fp)
207 {
208 	int printed = 0;
209 
210 	qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp);
211 
212 	printed += fprintf(fp, " thread (%d), ", (int)data->key);
213 	printed += fprintf(fp, "%d events\n\n", data->nr_events);
214 
215 	printed += fprintf(fp, "   syscall            calls  errors  total       min       avg       max       stddev\n");
216 	printed += fprintf(fp, "                                     (msec)    (msec)    (msec)    (msec)        (%%)\n");
217 	printed += fprintf(fp, "   --------------- --------  ------ -------- --------- --------- ---------     ------\n");
218 
219 	printed += print_common_stats(data, max_summary, fp);
220 	printed += fprintf(fp, "\n\n");
221 
222 	return printed;
223 }
224 
225 static int print_thread_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp)
226 {
227 	int printed = 0;
228 
229 	for (int i = 0; i < nr_data; i++)
230 		printed += print_thread_stat(data[i], max_summary, fp);
231 
232 	return printed;
233 }
234 
235 static int update_total_stats(struct hashmap *hash, struct syscall_key *map_key,
236 			      struct syscall_stats *map_data)
237 {
238 	struct syscall_data *data;
239 	struct syscall_stats *stat;
240 
241 	if (!hashmap__find(hash, map_key->nr, &data)) {
242 		data = zalloc(sizeof(*data));
243 		if (data == NULL)
244 			return -ENOMEM;
245 
246 		data->nodes = zalloc(sizeof(*data->nodes));
247 		if (data->nodes == NULL) {
248 			free(data);
249 			return -ENOMEM;
250 		}
251 
252 		data->nr_nodes = 1;
253 		data->key = map_key->nr;
254 		data->nodes->syscall_nr = data->key;
255 
256 		if (hashmap__add(hash, data->key, data) < 0) {
257 			free(data->nodes);
258 			free(data);
259 			return -ENOMEM;
260 		}
261 	}
262 
263 	/* update total stats for this syscall */
264 	data->nr_events += map_data->count;
265 	data->total_time += map_data->total_time;
266 
267 	/* This is sum of the same syscall from different CPUs */
268 	stat = &data->nodes->stats;
269 
270 	stat->total_time += map_data->total_time;
271 	stat->squared_sum += map_data->squared_sum;
272 	stat->count += map_data->count;
273 	stat->error += map_data->error;
274 
275 	if (stat->max_time < map_data->max_time)
276 		stat->max_time = map_data->max_time;
277 	if (stat->min_time > map_data->min_time || stat->min_time == 0)
278 		stat->min_time = map_data->min_time;
279 
280 	return 0;
281 }
282 
283 static int print_total_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp)
284 {
285 	int printed = 0;
286 	int nr_events = 0;
287 
288 	for (int i = 0; i < nr_data; i++)
289 		nr_events += data[i]->nr_events;
290 
291 	printed += fprintf(fp, " total, %d events\n\n", nr_events);
292 
293 	printed += fprintf(fp, "   syscall            calls  errors  total       min       avg       max       stddev\n");
294 	printed += fprintf(fp, "                                     (msec)    (msec)    (msec)    (msec)        (%%)\n");
295 	printed += fprintf(fp, "   --------------- --------  ------ -------- --------- --------- ---------     ------\n");
296 
297 	if (max_summary == 0 || max_summary > nr_data)
298 		max_summary = nr_data;
299 
300 	for (int i = 0; i < max_summary; i++)
301 		printed += print_common_stats(data[i], max_summary, fp);
302 
303 	printed += fprintf(fp, "\n\n");
304 	return printed;
305 }
306 
307 static int update_cgroup_stats(struct hashmap *hash, struct syscall_key *map_key,
308 			       struct syscall_stats *map_data)
309 {
310 	struct syscall_data *data;
311 	struct syscall_node *nodes;
312 
313 	if (!hashmap__find(hash, map_key->cgroup, &data)) {
314 		data = zalloc(sizeof(*data));
315 		if (data == NULL)
316 			return -ENOMEM;
317 
318 		data->key = map_key->cgroup;
319 		if (hashmap__add(hash, data->key, data) < 0) {
320 			free(data);
321 			return -ENOMEM;
322 		}
323 	}
324 
325 	/* update thread total stats */
326 	data->nr_events += map_data->count;
327 	data->total_time += map_data->total_time;
328 
329 	nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes));
330 	if (nodes == NULL)
331 		return -ENOMEM;
332 
333 	data->nodes = nodes;
334 	nodes = &data->nodes[data->nr_nodes++];
335 	nodes->syscall_nr = map_key->nr;
336 
337 	/* each thread has an entry for each syscall, just use the stat */
338 	memcpy(&nodes->stats, map_data, sizeof(*map_data));
339 	return 0;
340 }
341 
342 static int print_cgroup_stat(struct syscall_data *data, int max_summary, FILE *fp)
343 {
344 	int printed = 0;
345 	struct cgroup *cgrp = __cgroup__find(&cgroups, data->key);
346 
347 	qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp);
348 
349 	if (cgrp)
350 		printed += fprintf(fp, " cgroup %s,", cgrp->name);
351 	else
352 		printed += fprintf(fp, " cgroup id:%lu,", (unsigned long)data->key);
353 
354 	printed += fprintf(fp, " %d events\n\n", data->nr_events);
355 
356 	printed += fprintf(fp, "   syscall            calls  errors  total       min       avg       max       stddev\n");
357 	printed += fprintf(fp, "                                     (msec)    (msec)    (msec)    (msec)        (%%)\n");
358 	printed += fprintf(fp, "   --------------- --------  ------ -------- --------- --------- ---------     ------\n");
359 
360 	printed += print_common_stats(data, max_summary, fp);
361 	printed += fprintf(fp, "\n\n");
362 
363 	return printed;
364 }
365 
366 static int print_cgroup_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp)
367 {
368 	int printed = 0;
369 
370 	for (int i = 0; i < nr_data; i++)
371 		printed += print_cgroup_stat(data[i], max_summary, fp);
372 
373 	return printed;
374 }
375 
376 int trace_print_bpf_summary(FILE *fp, int max_summary)
377 {
378 	struct bpf_map *map = skel->maps.syscall_stats_map;
379 	struct syscall_key *prev_key, key;
380 	struct syscall_data **data = NULL;
381 	struct hashmap schash;
382 	struct hashmap_entry *entry;
383 	int nr_data = 0;
384 	int printed = 0;
385 	int i;
386 	size_t bkt;
387 
388 	hashmap__init(&schash, sc_node_hash, sc_node_equal, /*ctx=*/NULL);
389 
390 	printed = fprintf(fp, "\n Summary of events:\n\n");
391 
392 	/* get stats from the bpf map */
393 	prev_key = NULL;
394 	while (!bpf_map__get_next_key(map, prev_key, &key, sizeof(key))) {
395 		struct syscall_stats stat;
396 
397 		if (!bpf_map__lookup_elem(map, &key, sizeof(key), &stat, sizeof(stat), 0)) {
398 			switch (skel->rodata->aggr_mode) {
399 			case SYSCALL_AGGR_THREAD:
400 				update_thread_stats(&schash, &key, &stat);
401 				break;
402 			case SYSCALL_AGGR_CPU:
403 				update_total_stats(&schash, &key, &stat);
404 				break;
405 			case SYSCALL_AGGR_CGROUP:
406 				update_cgroup_stats(&schash, &key, &stat);
407 				break;
408 			default:
409 				break;
410 			}
411 		}
412 
413 		prev_key = &key;
414 	}
415 
416 	nr_data = hashmap__size(&schash);
417 	data = calloc(nr_data, sizeof(*data));
418 	if (data == NULL)
419 		goto out;
420 
421 	i = 0;
422 	hashmap__for_each_entry(&schash, entry, bkt)
423 		data[i++] = entry->pvalue;
424 
425 	qsort(data, nr_data, sizeof(*data), datacmp);
426 
427 	switch (skel->rodata->aggr_mode) {
428 	case SYSCALL_AGGR_THREAD:
429 		printed += print_thread_stats(data, nr_data, max_summary, fp);
430 		break;
431 	case SYSCALL_AGGR_CPU:
432 		printed += print_total_stats(data, nr_data, max_summary, fp);
433 		break;
434 	case SYSCALL_AGGR_CGROUP:
435 		printed += print_cgroup_stats(data, nr_data, max_summary, fp);
436 		break;
437 	default:
438 		break;
439 	}
440 
441 	for (i = 0; i < nr_data && data; i++) {
442 		free(data[i]->nodes);
443 		free(data[i]);
444 	}
445 	free(data);
446 
447 out:
448 	hashmap__clear(&schash);
449 	return printed;
450 }
451 
452 void trace_cleanup_bpf_summary(void)
453 {
454 	if (!RB_EMPTY_ROOT(&cgroups)) {
455 		struct cgroup *cgrp, *tmp;
456 
457 		rbtree_postorder_for_each_entry_safe(cgrp, tmp, &cgroups, node)
458 			cgroup__put(cgrp);
459 
460 		cgroups = RB_ROOT;
461 	}
462 
463 	syscall_summary_bpf__destroy(skel);
464 }
465