xref: /linux/tools/perf/util/bpf-trace-summary.c (revision 9e906a9dead17d81d6c2687f65e159231d0e3286)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <errno.h>
3 #include <inttypes.h>
4 #include <math.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 
8 #include "dwarf-regs.h" /* for EM_HOST */
9 #include "syscalltbl.h"
10 #include "util/cgroup.h"
11 #include "util/hashmap.h"
12 #include "util/trace.h"
13 #include "util/util.h"
14 #include <bpf/bpf.h>
15 #include <linux/rbtree.h>
16 #include <linux/time64.h>
17 #include <tools/libc_compat.h> /* reallocarray */
18 
19 #include "bpf_skel/syscall_summary.h"
20 #include "bpf_skel/syscall_summary.skel.h"
21 
22 
23 static struct syscall_summary_bpf *skel;
24 static struct rb_root cgroups = RB_ROOT;
25 
trace_prepare_bpf_summary(enum trace_summary_mode mode)26 int trace_prepare_bpf_summary(enum trace_summary_mode mode)
27 {
28 	skel = syscall_summary_bpf__open();
29 	if (skel == NULL) {
30 		fprintf(stderr, "failed to open syscall summary bpf skeleton\n");
31 		return -1;
32 	}
33 
34 	if (mode == SUMMARY__BY_THREAD)
35 		skel->rodata->aggr_mode = SYSCALL_AGGR_THREAD;
36 	else if (mode == SUMMARY__BY_CGROUP)
37 		skel->rodata->aggr_mode = SYSCALL_AGGR_CGROUP;
38 	else
39 		skel->rodata->aggr_mode = SYSCALL_AGGR_CPU;
40 
41 	if (cgroup_is_v2("perf_event") > 0)
42 		skel->rodata->use_cgroup_v2 = 1;
43 
44 	if (syscall_summary_bpf__load(skel) < 0) {
45 		fprintf(stderr, "failed to load syscall summary bpf skeleton\n");
46 		return -1;
47 	}
48 
49 	if (syscall_summary_bpf__attach(skel) < 0) {
50 		fprintf(stderr, "failed to attach syscall summary bpf skeleton\n");
51 		return -1;
52 	}
53 
54 	if (mode == SUMMARY__BY_CGROUP)
55 		read_all_cgroups(&cgroups);
56 
57 	return 0;
58 }
59 
trace_start_bpf_summary(void)60 void trace_start_bpf_summary(void)
61 {
62 	skel->bss->enabled = 1;
63 }
64 
trace_end_bpf_summary(void)65 void trace_end_bpf_summary(void)
66 {
67 	skel->bss->enabled = 0;
68 }
69 
70 struct syscall_node {
71 	int syscall_nr;
72 	struct syscall_stats stats;
73 };
74 
rel_stddev(struct syscall_stats * stat)75 static double rel_stddev(struct syscall_stats *stat)
76 {
77 	double variance, average;
78 
79 	if (stat->count < 2)
80 		return 0;
81 
82 	average = (double)stat->total_time / stat->count;
83 
84 	variance = stat->squared_sum;
85 	variance -= (stat->total_time * stat->total_time) / stat->count;
86 	variance /= stat->count - 1;
87 
88 	return 100 * sqrt(variance / stat->count) / average;
89 }
90 
91 /*
92  * The syscall_data is to maintain syscall stats ordered by total time.
93  * It supports different summary modes like per-thread or global.
94  *
95  * For per-thread stats, it uses two-level data strurcture -
96  * syscall_data is keyed by TID and has an array of nodes which
97  * represents each syscall for the thread.
98  *
99  * For global stats, it's still two-level technically but we don't need
100  * per-cpu analysis so it's keyed by the syscall number to combine stats
101  * from different CPUs.  And syscall_data always has a syscall_node so
102  * it can effectively work as flat hierarchy.
103  *
104  * For per-cgroup stats, it uses two-level data structure like thread
105  * syscall_data is keyed by CGROUP and has an array of node which
106  * represents each syscall for the cgroup.
107  */
108 struct syscall_data {
109 	u64 key; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU, cgroup if AGGR_CGROUP */
110 	int nr_events;
111 	int nr_nodes;
112 	u64 total_time;
113 	struct syscall_node *nodes;
114 };
115 
datacmp(const void * a,const void * b)116 static int datacmp(const void *a, const void *b)
117 {
118 	const struct syscall_data * const *sa = a;
119 	const struct syscall_data * const *sb = b;
120 
121 	return (*sa)->total_time > (*sb)->total_time ? -1 : 1;
122 }
123 
nodecmp(const void * a,const void * b)124 static int nodecmp(const void *a, const void *b)
125 {
126 	const struct syscall_node *na = a;
127 	const struct syscall_node *nb = b;
128 
129 	return na->stats.total_time > nb->stats.total_time ? -1 : 1;
130 }
131 
sc_node_hash(long key,void * ctx __maybe_unused)132 static size_t sc_node_hash(long key, void *ctx __maybe_unused)
133 {
134 	return key;
135 }
136 
sc_node_equal(long key1,long key2,void * ctx __maybe_unused)137 static bool sc_node_equal(long key1, long key2, void *ctx __maybe_unused)
138 {
139 	return key1 == key2;
140 }
141 
print_common_stats(struct syscall_data * data,int max_summary,FILE * fp)142 static int print_common_stats(struct syscall_data *data, int max_summary, FILE *fp)
143 {
144 	int printed = 0;
145 
146 	if (max_summary == 0 || max_summary > data->nr_nodes)
147 		max_summary = data->nr_nodes;
148 
149 	for (int i = 0; i < max_summary; i++) {
150 		struct syscall_node *node = &data->nodes[i];
151 		struct syscall_stats *stat = &node->stats;
152 		double total = (double)(stat->total_time) / NSEC_PER_MSEC;
153 		double min = (double)(stat->min_time) / NSEC_PER_MSEC;
154 		double max = (double)(stat->max_time) / NSEC_PER_MSEC;
155 		double avg = total / stat->count;
156 		const char *name;
157 
158 		/* TODO: support other ABIs */
159 		name = syscalltbl__name(EM_HOST, node->syscall_nr);
160 		if (name)
161 			printed += fprintf(fp, "   %-15s", name);
162 		else
163 			printed += fprintf(fp, "   syscall:%-7d", node->syscall_nr);
164 
165 		printed += fprintf(fp, " %8u %6u %9.3f %9.3f %9.3f %9.3f %9.2f%%\n",
166 				   stat->count, stat->error, total, min, avg, max,
167 				   rel_stddev(stat));
168 	}
169 	return printed;
170 }
171 
update_thread_stats(struct hashmap * hash,struct syscall_key * map_key,struct syscall_stats * map_data)172 static int update_thread_stats(struct hashmap *hash, struct syscall_key *map_key,
173 			       struct syscall_stats *map_data)
174 {
175 	struct syscall_data *data;
176 	struct syscall_node *nodes;
177 
178 	if (!hashmap__find(hash, map_key->cpu_or_tid, &data)) {
179 		data = zalloc(sizeof(*data));
180 		if (data == NULL)
181 			return -ENOMEM;
182 
183 		data->key = map_key->cpu_or_tid;
184 		if (hashmap__add(hash, data->key, data) < 0) {
185 			free(data);
186 			return -ENOMEM;
187 		}
188 	}
189 
190 	/* update thread total stats */
191 	data->nr_events += map_data->count;
192 	data->total_time += map_data->total_time;
193 
194 	nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes));
195 	if (nodes == NULL)
196 		return -ENOMEM;
197 
198 	data->nodes = nodes;
199 	nodes = &data->nodes[data->nr_nodes++];
200 	nodes->syscall_nr = map_key->nr;
201 
202 	/* each thread has an entry for each syscall, just use the stat */
203 	memcpy(&nodes->stats, map_data, sizeof(*map_data));
204 	return 0;
205 }
206 
print_thread_stat(struct syscall_data * data,int max_summary,FILE * fp)207 static int print_thread_stat(struct syscall_data *data, int max_summary, FILE *fp)
208 {
209 	int printed = 0;
210 
211 	qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp);
212 
213 	printed += fprintf(fp, " thread (%d), ", (int)data->key);
214 	printed += fprintf(fp, "%d events\n\n", data->nr_events);
215 
216 	printed += fprintf(fp, "   syscall            calls  errors  total       min       avg       max       stddev\n");
217 	printed += fprintf(fp, "                                     (msec)    (msec)    (msec)    (msec)        (%%)\n");
218 	printed += fprintf(fp, "   --------------- --------  ------ -------- --------- --------- ---------     ------\n");
219 
220 	printed += print_common_stats(data, max_summary, fp);
221 	printed += fprintf(fp, "\n\n");
222 
223 	return printed;
224 }
225 
print_thread_stats(struct syscall_data ** data,int nr_data,int max_summary,FILE * fp)226 static int print_thread_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp)
227 {
228 	int printed = 0;
229 
230 	for (int i = 0; i < nr_data; i++)
231 		printed += print_thread_stat(data[i], max_summary, fp);
232 
233 	return printed;
234 }
235 
update_total_stats(struct hashmap * hash,struct syscall_key * map_key,struct syscall_stats * map_data)236 static int update_total_stats(struct hashmap *hash, struct syscall_key *map_key,
237 			      struct syscall_stats *map_data)
238 {
239 	struct syscall_data *data;
240 	struct syscall_stats *stat;
241 
242 	if (!hashmap__find(hash, map_key->nr, &data)) {
243 		data = zalloc(sizeof(*data));
244 		if (data == NULL)
245 			return -ENOMEM;
246 
247 		data->nodes = zalloc(sizeof(*data->nodes));
248 		if (data->nodes == NULL) {
249 			free(data);
250 			return -ENOMEM;
251 		}
252 
253 		data->nr_nodes = 1;
254 		data->key = map_key->nr;
255 		data->nodes->syscall_nr = data->key;
256 
257 		if (hashmap__add(hash, data->key, data) < 0) {
258 			free(data->nodes);
259 			free(data);
260 			return -ENOMEM;
261 		}
262 	}
263 
264 	/* update total stats for this syscall */
265 	data->nr_events += map_data->count;
266 	data->total_time += map_data->total_time;
267 
268 	/* This is sum of the same syscall from different CPUs */
269 	stat = &data->nodes->stats;
270 
271 	stat->total_time += map_data->total_time;
272 	stat->squared_sum += map_data->squared_sum;
273 	stat->count += map_data->count;
274 	stat->error += map_data->error;
275 
276 	if (stat->max_time < map_data->max_time)
277 		stat->max_time = map_data->max_time;
278 	if (stat->min_time > map_data->min_time || stat->min_time == 0)
279 		stat->min_time = map_data->min_time;
280 
281 	return 0;
282 }
283 
print_total_stats(struct syscall_data ** data,int nr_data,int max_summary,FILE * fp)284 static int print_total_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp)
285 {
286 	int printed = 0;
287 	int nr_events = 0;
288 
289 	for (int i = 0; i < nr_data; i++)
290 		nr_events += data[i]->nr_events;
291 
292 	printed += fprintf(fp, " total, %d events\n\n", nr_events);
293 
294 	printed += fprintf(fp, "   syscall            calls  errors  total       min       avg       max       stddev\n");
295 	printed += fprintf(fp, "                                     (msec)    (msec)    (msec)    (msec)        (%%)\n");
296 	printed += fprintf(fp, "   --------------- --------  ------ -------- --------- --------- ---------     ------\n");
297 
298 	if (max_summary == 0 || max_summary > nr_data)
299 		max_summary = nr_data;
300 
301 	for (int i = 0; i < max_summary; i++)
302 		printed += print_common_stats(data[i], max_summary, fp);
303 
304 	printed += fprintf(fp, "\n\n");
305 	return printed;
306 }
307 
update_cgroup_stats(struct hashmap * hash,struct syscall_key * map_key,struct syscall_stats * map_data)308 static int update_cgroup_stats(struct hashmap *hash, struct syscall_key *map_key,
309 			       struct syscall_stats *map_data)
310 {
311 	struct syscall_data *data;
312 	struct syscall_node *nodes;
313 
314 	if (!hashmap__find(hash, map_key->cgroup, &data)) {
315 		data = zalloc(sizeof(*data));
316 		if (data == NULL)
317 			return -ENOMEM;
318 
319 		data->key = map_key->cgroup;
320 		if (hashmap__add(hash, data->key, data) < 0) {
321 			free(data);
322 			return -ENOMEM;
323 		}
324 	}
325 
326 	/* update thread total stats */
327 	data->nr_events += map_data->count;
328 	data->total_time += map_data->total_time;
329 
330 	nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes));
331 	if (nodes == NULL)
332 		return -ENOMEM;
333 
334 	data->nodes = nodes;
335 	nodes = &data->nodes[data->nr_nodes++];
336 	nodes->syscall_nr = map_key->nr;
337 
338 	/* each thread has an entry for each syscall, just use the stat */
339 	memcpy(&nodes->stats, map_data, sizeof(*map_data));
340 	return 0;
341 }
342 
print_cgroup_stat(struct syscall_data * data,int max_summary,FILE * fp)343 static int print_cgroup_stat(struct syscall_data *data, int max_summary, FILE *fp)
344 {
345 	int printed = 0;
346 	struct cgroup *cgrp = __cgroup__find(&cgroups, data->key);
347 
348 	qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp);
349 
350 	if (cgrp)
351 		printed += fprintf(fp, " cgroup %s,", cgrp->name);
352 	else
353 		printed += fprintf(fp, " cgroup id:%lu,", (unsigned long)data->key);
354 
355 	printed += fprintf(fp, " %d events\n\n", data->nr_events);
356 
357 	printed += fprintf(fp, "   syscall            calls  errors  total       min       avg       max       stddev\n");
358 	printed += fprintf(fp, "                                     (msec)    (msec)    (msec)    (msec)        (%%)\n");
359 	printed += fprintf(fp, "   --------------- --------  ------ -------- --------- --------- ---------     ------\n");
360 
361 	printed += print_common_stats(data, max_summary, fp);
362 	printed += fprintf(fp, "\n\n");
363 
364 	return printed;
365 }
366 
print_cgroup_stats(struct syscall_data ** data,int nr_data,int max_summary,FILE * fp)367 static int print_cgroup_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp)
368 {
369 	int printed = 0;
370 
371 	for (int i = 0; i < nr_data; i++)
372 		printed += print_cgroup_stat(data[i], max_summary, fp);
373 
374 	return printed;
375 }
376 
trace_print_bpf_summary(FILE * fp,int max_summary)377 int trace_print_bpf_summary(FILE *fp, int max_summary)
378 {
379 	struct bpf_map *map = skel->maps.syscall_stats_map;
380 	struct syscall_key *prev_key, key;
381 	struct syscall_data **data = NULL;
382 	struct hashmap schash;
383 	struct hashmap_entry *entry;
384 	int nr_data = 0;
385 	int printed = 0;
386 	int i;
387 	size_t bkt;
388 
389 	hashmap__init(&schash, sc_node_hash, sc_node_equal, /*ctx=*/NULL);
390 
391 	printed = fprintf(fp, "\n Summary of events:\n\n");
392 
393 	/* get stats from the bpf map */
394 	prev_key = NULL;
395 	while (!bpf_map__get_next_key(map, prev_key, &key, sizeof(key))) {
396 		struct syscall_stats stat;
397 
398 		if (!bpf_map__lookup_elem(map, &key, sizeof(key), &stat, sizeof(stat), 0)) {
399 			switch (skel->rodata->aggr_mode) {
400 			case SYSCALL_AGGR_THREAD:
401 				update_thread_stats(&schash, &key, &stat);
402 				break;
403 			case SYSCALL_AGGR_CPU:
404 				update_total_stats(&schash, &key, &stat);
405 				break;
406 			case SYSCALL_AGGR_CGROUP:
407 				update_cgroup_stats(&schash, &key, &stat);
408 				break;
409 			default:
410 				break;
411 			}
412 		}
413 
414 		prev_key = &key;
415 	}
416 
417 	nr_data = hashmap__size(&schash);
418 	data = calloc(nr_data, sizeof(*data));
419 	if (data == NULL)
420 		goto out;
421 
422 	i = 0;
423 	hashmap__for_each_entry(&schash, entry, bkt)
424 		data[i++] = entry->pvalue;
425 
426 	qsort(data, nr_data, sizeof(*data), datacmp);
427 
428 	switch (skel->rodata->aggr_mode) {
429 	case SYSCALL_AGGR_THREAD:
430 		printed += print_thread_stats(data, nr_data, max_summary, fp);
431 		break;
432 	case SYSCALL_AGGR_CPU:
433 		printed += print_total_stats(data, nr_data, max_summary, fp);
434 		break;
435 	case SYSCALL_AGGR_CGROUP:
436 		printed += print_cgroup_stats(data, nr_data, max_summary, fp);
437 		break;
438 	default:
439 		break;
440 	}
441 
442 	for (i = 0; i < nr_data && data; i++) {
443 		free(data[i]->nodes);
444 		free(data[i]);
445 	}
446 	free(data);
447 
448 out:
449 	hashmap__clear(&schash);
450 	return printed;
451 }
452 
trace_cleanup_bpf_summary(void)453 void trace_cleanup_bpf_summary(void)
454 {
455 	if (!RB_EMPTY_ROOT(&cgroups)) {
456 		struct cgroup *cgrp, *tmp;
457 
458 		rbtree_postorder_for_each_entry_safe(cgrp, tmp, &cgroups, node)
459 			cgroup__put(cgrp);
460 
461 		cgroups = RB_ROOT;
462 	}
463 
464 	syscall_summary_bpf__destroy(skel);
465 }
466