1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <inttypes.h>
3 #include <math.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6
7 #include "dwarf-regs.h" /* for EM_HOST */
8 #include "syscalltbl.h"
9 #include "util/cgroup.h"
10 #include "util/hashmap.h"
11 #include "util/trace.h"
12 #include "util/util.h"
13 #include <bpf/bpf.h>
14 #include <linux/rbtree.h>
15 #include <linux/time64.h>
16 #include <tools/libc_compat.h> /* reallocarray */
17
18 #include "bpf_skel/syscall_summary.h"
19 #include "bpf_skel/syscall_summary.skel.h"
20
21
22 static struct syscall_summary_bpf *skel;
23 static struct rb_root cgroups = RB_ROOT;
24
trace_prepare_bpf_summary(enum trace_summary_mode mode)25 int trace_prepare_bpf_summary(enum trace_summary_mode mode)
26 {
27 skel = syscall_summary_bpf__open();
28 if (skel == NULL) {
29 fprintf(stderr, "failed to open syscall summary bpf skeleton\n");
30 return -1;
31 }
32
33 if (mode == SUMMARY__BY_THREAD)
34 skel->rodata->aggr_mode = SYSCALL_AGGR_THREAD;
35 else if (mode == SUMMARY__BY_CGROUP)
36 skel->rodata->aggr_mode = SYSCALL_AGGR_CGROUP;
37 else
38 skel->rodata->aggr_mode = SYSCALL_AGGR_CPU;
39
40 if (cgroup_is_v2("perf_event") > 0)
41 skel->rodata->use_cgroup_v2 = 1;
42
43 if (syscall_summary_bpf__load(skel) < 0) {
44 fprintf(stderr, "failed to load syscall summary bpf skeleton\n");
45 return -1;
46 }
47
48 if (syscall_summary_bpf__attach(skel) < 0) {
49 fprintf(stderr, "failed to attach syscall summary bpf skeleton\n");
50 return -1;
51 }
52
53 if (mode == SUMMARY__BY_CGROUP)
54 read_all_cgroups(&cgroups);
55
56 return 0;
57 }
58
trace_start_bpf_summary(void)59 void trace_start_bpf_summary(void)
60 {
61 skel->bss->enabled = 1;
62 }
63
trace_end_bpf_summary(void)64 void trace_end_bpf_summary(void)
65 {
66 skel->bss->enabled = 0;
67 }
68
69 struct syscall_node {
70 int syscall_nr;
71 struct syscall_stats stats;
72 };
73
rel_stddev(struct syscall_stats * stat)74 static double rel_stddev(struct syscall_stats *stat)
75 {
76 double variance, average;
77
78 if (stat->count < 2)
79 return 0;
80
81 average = (double)stat->total_time / stat->count;
82
83 variance = stat->squared_sum;
84 variance -= (stat->total_time * stat->total_time) / stat->count;
85 variance /= stat->count - 1;
86
87 return 100 * sqrt(variance / stat->count) / average;
88 }
89
90 /*
91 * The syscall_data is to maintain syscall stats ordered by total time.
92 * It supports different summary modes like per-thread or global.
93 *
94 * For per-thread stats, it uses two-level data strurcture -
95 * syscall_data is keyed by TID and has an array of nodes which
96 * represents each syscall for the thread.
97 *
98 * For global stats, it's still two-level technically but we don't need
99 * per-cpu analysis so it's keyed by the syscall number to combine stats
100 * from different CPUs. And syscall_data always has a syscall_node so
101 * it can effectively work as flat hierarchy.
102 *
103 * For per-cgroup stats, it uses two-level data structure like thread
104 * syscall_data is keyed by CGROUP and has an array of node which
105 * represents each syscall for the cgroup.
106 */
107 struct syscall_data {
108 u64 key; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU, cgroup if AGGR_CGROUP */
109 int nr_events;
110 int nr_nodes;
111 u64 total_time;
112 struct syscall_node *nodes;
113 };
114
datacmp(const void * a,const void * b)115 static int datacmp(const void *a, const void *b)
116 {
117 const struct syscall_data * const *sa = a;
118 const struct syscall_data * const *sb = b;
119
120 return (*sa)->total_time > (*sb)->total_time ? -1 : 1;
121 }
122
nodecmp(const void * a,const void * b)123 static int nodecmp(const void *a, const void *b)
124 {
125 const struct syscall_node *na = a;
126 const struct syscall_node *nb = b;
127
128 return na->stats.total_time > nb->stats.total_time ? -1 : 1;
129 }
130
sc_node_hash(long key,void * ctx __maybe_unused)131 static size_t sc_node_hash(long key, void *ctx __maybe_unused)
132 {
133 return key;
134 }
135
sc_node_equal(long key1,long key2,void * ctx __maybe_unused)136 static bool sc_node_equal(long key1, long key2, void *ctx __maybe_unused)
137 {
138 return key1 == key2;
139 }
140
print_common_stats(struct syscall_data * data,int max_summary,FILE * fp)141 static int print_common_stats(struct syscall_data *data, int max_summary, FILE *fp)
142 {
143 int printed = 0;
144
145 if (max_summary == 0 || max_summary > data->nr_nodes)
146 max_summary = data->nr_nodes;
147
148 for (int i = 0; i < max_summary; i++) {
149 struct syscall_node *node = &data->nodes[i];
150 struct syscall_stats *stat = &node->stats;
151 double total = (double)(stat->total_time) / NSEC_PER_MSEC;
152 double min = (double)(stat->min_time) / NSEC_PER_MSEC;
153 double max = (double)(stat->max_time) / NSEC_PER_MSEC;
154 double avg = total / stat->count;
155 const char *name;
156
157 /* TODO: support other ABIs */
158 name = syscalltbl__name(EM_HOST, node->syscall_nr);
159 if (name)
160 printed += fprintf(fp, " %-15s", name);
161 else
162 printed += fprintf(fp, " syscall:%-7d", node->syscall_nr);
163
164 printed += fprintf(fp, " %8u %6u %9.3f %9.3f %9.3f %9.3f %9.2f%%\n",
165 stat->count, stat->error, total, min, avg, max,
166 rel_stddev(stat));
167 }
168 return printed;
169 }
170
update_thread_stats(struct hashmap * hash,struct syscall_key * map_key,struct syscall_stats * map_data)171 static int update_thread_stats(struct hashmap *hash, struct syscall_key *map_key,
172 struct syscall_stats *map_data)
173 {
174 struct syscall_data *data;
175 struct syscall_node *nodes;
176
177 if (!hashmap__find(hash, map_key->cpu_or_tid, &data)) {
178 data = zalloc(sizeof(*data));
179 if (data == NULL)
180 return -ENOMEM;
181
182 data->key = map_key->cpu_or_tid;
183 if (hashmap__add(hash, data->key, data) < 0) {
184 free(data);
185 return -ENOMEM;
186 }
187 }
188
189 /* update thread total stats */
190 data->nr_events += map_data->count;
191 data->total_time += map_data->total_time;
192
193 nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes));
194 if (nodes == NULL)
195 return -ENOMEM;
196
197 data->nodes = nodes;
198 nodes = &data->nodes[data->nr_nodes++];
199 nodes->syscall_nr = map_key->nr;
200
201 /* each thread has an entry for each syscall, just use the stat */
202 memcpy(&nodes->stats, map_data, sizeof(*map_data));
203 return 0;
204 }
205
print_thread_stat(struct syscall_data * data,int max_summary,FILE * fp)206 static int print_thread_stat(struct syscall_data *data, int max_summary, FILE *fp)
207 {
208 int printed = 0;
209
210 qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp);
211
212 printed += fprintf(fp, " thread (%d), ", (int)data->key);
213 printed += fprintf(fp, "%d events\n\n", data->nr_events);
214
215 printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
216 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
217 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
218
219 printed += print_common_stats(data, max_summary, fp);
220 printed += fprintf(fp, "\n\n");
221
222 return printed;
223 }
224
print_thread_stats(struct syscall_data ** data,int nr_data,int max_summary,FILE * fp)225 static int print_thread_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp)
226 {
227 int printed = 0;
228
229 for (int i = 0; i < nr_data; i++)
230 printed += print_thread_stat(data[i], max_summary, fp);
231
232 return printed;
233 }
234
update_total_stats(struct hashmap * hash,struct syscall_key * map_key,struct syscall_stats * map_data)235 static int update_total_stats(struct hashmap *hash, struct syscall_key *map_key,
236 struct syscall_stats *map_data)
237 {
238 struct syscall_data *data;
239 struct syscall_stats *stat;
240
241 if (!hashmap__find(hash, map_key->nr, &data)) {
242 data = zalloc(sizeof(*data));
243 if (data == NULL)
244 return -ENOMEM;
245
246 data->nodes = zalloc(sizeof(*data->nodes));
247 if (data->nodes == NULL) {
248 free(data);
249 return -ENOMEM;
250 }
251
252 data->nr_nodes = 1;
253 data->key = map_key->nr;
254 data->nodes->syscall_nr = data->key;
255
256 if (hashmap__add(hash, data->key, data) < 0) {
257 free(data->nodes);
258 free(data);
259 return -ENOMEM;
260 }
261 }
262
263 /* update total stats for this syscall */
264 data->nr_events += map_data->count;
265 data->total_time += map_data->total_time;
266
267 /* This is sum of the same syscall from different CPUs */
268 stat = &data->nodes->stats;
269
270 stat->total_time += map_data->total_time;
271 stat->squared_sum += map_data->squared_sum;
272 stat->count += map_data->count;
273 stat->error += map_data->error;
274
275 if (stat->max_time < map_data->max_time)
276 stat->max_time = map_data->max_time;
277 if (stat->min_time > map_data->min_time || stat->min_time == 0)
278 stat->min_time = map_data->min_time;
279
280 return 0;
281 }
282
print_total_stats(struct syscall_data ** data,int nr_data,int max_summary,FILE * fp)283 static int print_total_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp)
284 {
285 int printed = 0;
286 int nr_events = 0;
287
288 for (int i = 0; i < nr_data; i++)
289 nr_events += data[i]->nr_events;
290
291 printed += fprintf(fp, " total, %d events\n\n", nr_events);
292
293 printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
294 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
295 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
296
297 if (max_summary == 0 || max_summary > nr_data)
298 max_summary = nr_data;
299
300 for (int i = 0; i < max_summary; i++)
301 printed += print_common_stats(data[i], max_summary, fp);
302
303 printed += fprintf(fp, "\n\n");
304 return printed;
305 }
306
update_cgroup_stats(struct hashmap * hash,struct syscall_key * map_key,struct syscall_stats * map_data)307 static int update_cgroup_stats(struct hashmap *hash, struct syscall_key *map_key,
308 struct syscall_stats *map_data)
309 {
310 struct syscall_data *data;
311 struct syscall_node *nodes;
312
313 if (!hashmap__find(hash, map_key->cgroup, &data)) {
314 data = zalloc(sizeof(*data));
315 if (data == NULL)
316 return -ENOMEM;
317
318 data->key = map_key->cgroup;
319 if (hashmap__add(hash, data->key, data) < 0) {
320 free(data);
321 return -ENOMEM;
322 }
323 }
324
325 /* update thread total stats */
326 data->nr_events += map_data->count;
327 data->total_time += map_data->total_time;
328
329 nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes));
330 if (nodes == NULL)
331 return -ENOMEM;
332
333 data->nodes = nodes;
334 nodes = &data->nodes[data->nr_nodes++];
335 nodes->syscall_nr = map_key->nr;
336
337 /* each thread has an entry for each syscall, just use the stat */
338 memcpy(&nodes->stats, map_data, sizeof(*map_data));
339 return 0;
340 }
341
print_cgroup_stat(struct syscall_data * data,int max_summary,FILE * fp)342 static int print_cgroup_stat(struct syscall_data *data, int max_summary, FILE *fp)
343 {
344 int printed = 0;
345 struct cgroup *cgrp = __cgroup__find(&cgroups, data->key);
346
347 qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp);
348
349 if (cgrp)
350 printed += fprintf(fp, " cgroup %s,", cgrp->name);
351 else
352 printed += fprintf(fp, " cgroup id:%lu,", (unsigned long)data->key);
353
354 printed += fprintf(fp, " %d events\n\n", data->nr_events);
355
356 printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
357 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
358 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
359
360 printed += print_common_stats(data, max_summary, fp);
361 printed += fprintf(fp, "\n\n");
362
363 return printed;
364 }
365
print_cgroup_stats(struct syscall_data ** data,int nr_data,int max_summary,FILE * fp)366 static int print_cgroup_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp)
367 {
368 int printed = 0;
369
370 for (int i = 0; i < nr_data; i++)
371 printed += print_cgroup_stat(data[i], max_summary, fp);
372
373 return printed;
374 }
375
trace_print_bpf_summary(FILE * fp,int max_summary)376 int trace_print_bpf_summary(FILE *fp, int max_summary)
377 {
378 struct bpf_map *map = skel->maps.syscall_stats_map;
379 struct syscall_key *prev_key, key;
380 struct syscall_data **data = NULL;
381 struct hashmap schash;
382 struct hashmap_entry *entry;
383 int nr_data = 0;
384 int printed = 0;
385 int i;
386 size_t bkt;
387
388 hashmap__init(&schash, sc_node_hash, sc_node_equal, /*ctx=*/NULL);
389
390 printed = fprintf(fp, "\n Summary of events:\n\n");
391
392 /* get stats from the bpf map */
393 prev_key = NULL;
394 while (!bpf_map__get_next_key(map, prev_key, &key, sizeof(key))) {
395 struct syscall_stats stat;
396
397 if (!bpf_map__lookup_elem(map, &key, sizeof(key), &stat, sizeof(stat), 0)) {
398 switch (skel->rodata->aggr_mode) {
399 case SYSCALL_AGGR_THREAD:
400 update_thread_stats(&schash, &key, &stat);
401 break;
402 case SYSCALL_AGGR_CPU:
403 update_total_stats(&schash, &key, &stat);
404 break;
405 case SYSCALL_AGGR_CGROUP:
406 update_cgroup_stats(&schash, &key, &stat);
407 break;
408 default:
409 break;
410 }
411 }
412
413 prev_key = &key;
414 }
415
416 nr_data = hashmap__size(&schash);
417 data = calloc(nr_data, sizeof(*data));
418 if (data == NULL)
419 goto out;
420
421 i = 0;
422 hashmap__for_each_entry(&schash, entry, bkt)
423 data[i++] = entry->pvalue;
424
425 qsort(data, nr_data, sizeof(*data), datacmp);
426
427 switch (skel->rodata->aggr_mode) {
428 case SYSCALL_AGGR_THREAD:
429 printed += print_thread_stats(data, nr_data, max_summary, fp);
430 break;
431 case SYSCALL_AGGR_CPU:
432 printed += print_total_stats(data, nr_data, max_summary, fp);
433 break;
434 case SYSCALL_AGGR_CGROUP:
435 printed += print_cgroup_stats(data, nr_data, max_summary, fp);
436 break;
437 default:
438 break;
439 }
440
441 for (i = 0; i < nr_data && data; i++) {
442 free(data[i]->nodes);
443 free(data[i]);
444 }
445 free(data);
446
447 out:
448 hashmap__clear(&schash);
449 return printed;
450 }
451
trace_cleanup_bpf_summary(void)452 void trace_cleanup_bpf_summary(void)
453 {
454 if (!RB_EMPTY_ROOT(&cgroups)) {
455 struct cgroup *cgrp, *tmp;
456
457 rbtree_postorder_for_each_entry_safe(cgrp, tmp, &cgroups, node)
458 cgroup__put(cgrp);
459
460 cgroups = RB_ROOT;
461 }
462
463 syscall_summary_bpf__destroy(skel);
464 }
465