1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <errno.h>
3 #include <inttypes.h>
4 #include <math.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7
8 #include "dwarf-regs.h" /* for EM_HOST */
9 #include "syscalltbl.h"
10 #include "util/cgroup.h"
11 #include "util/hashmap.h"
12 #include "util/trace.h"
13 #include "util/util.h"
14 #include <bpf/bpf.h>
15 #include <linux/rbtree.h>
16 #include <linux/time64.h>
17 #include <tools/libc_compat.h> /* reallocarray */
18
19 #include "bpf_skel/syscall_summary.h"
20 #include "bpf_skel/syscall_summary.skel.h"
21
22
23 static struct syscall_summary_bpf *skel;
24 static struct rb_root cgroups = RB_ROOT;
25
trace_prepare_bpf_summary(enum trace_summary_mode mode)26 int trace_prepare_bpf_summary(enum trace_summary_mode mode)
27 {
28 skel = syscall_summary_bpf__open();
29 if (skel == NULL) {
30 fprintf(stderr, "failed to open syscall summary bpf skeleton\n");
31 return -1;
32 }
33
34 if (mode == SUMMARY__BY_THREAD)
35 skel->rodata->aggr_mode = SYSCALL_AGGR_THREAD;
36 else if (mode == SUMMARY__BY_CGROUP)
37 skel->rodata->aggr_mode = SYSCALL_AGGR_CGROUP;
38 else
39 skel->rodata->aggr_mode = SYSCALL_AGGR_CPU;
40
41 if (cgroup_is_v2("perf_event") > 0)
42 skel->rodata->use_cgroup_v2 = 1;
43
44 if (syscall_summary_bpf__load(skel) < 0) {
45 fprintf(stderr, "failed to load syscall summary bpf skeleton\n");
46 return -1;
47 }
48
49 if (syscall_summary_bpf__attach(skel) < 0) {
50 fprintf(stderr, "failed to attach syscall summary bpf skeleton\n");
51 return -1;
52 }
53
54 if (mode == SUMMARY__BY_CGROUP)
55 read_all_cgroups(&cgroups);
56
57 return 0;
58 }
59
trace_start_bpf_summary(void)60 void trace_start_bpf_summary(void)
61 {
62 skel->bss->enabled = 1;
63 }
64
trace_end_bpf_summary(void)65 void trace_end_bpf_summary(void)
66 {
67 skel->bss->enabled = 0;
68 }
69
70 struct syscall_node {
71 int syscall_nr;
72 struct syscall_stats stats;
73 };
74
rel_stddev(struct syscall_stats * stat)75 static double rel_stddev(struct syscall_stats *stat)
76 {
77 double variance, average;
78
79 if (stat->count < 2)
80 return 0;
81
82 average = (double)stat->total_time / stat->count;
83
84 variance = stat->squared_sum;
85 variance -= (stat->total_time * stat->total_time) / stat->count;
86 variance /= stat->count - 1;
87
88 return 100 * sqrt(variance / stat->count) / average;
89 }
90
91 /*
92 * The syscall_data is to maintain syscall stats ordered by total time.
93 * It supports different summary modes like per-thread or global.
94 *
95 * For per-thread stats, it uses two-level data strurcture -
96 * syscall_data is keyed by TID and has an array of nodes which
97 * represents each syscall for the thread.
98 *
99 * For global stats, it's still two-level technically but we don't need
100 * per-cpu analysis so it's keyed by the syscall number to combine stats
101 * from different CPUs. And syscall_data always has a syscall_node so
102 * it can effectively work as flat hierarchy.
103 *
104 * For per-cgroup stats, it uses two-level data structure like thread
105 * syscall_data is keyed by CGROUP and has an array of node which
106 * represents each syscall for the cgroup.
107 */
108 struct syscall_data {
109 u64 key; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU, cgroup if AGGR_CGROUP */
110 int nr_events;
111 int nr_nodes;
112 u64 total_time;
113 struct syscall_node *nodes;
114 };
115
datacmp(const void * a,const void * b)116 static int datacmp(const void *a, const void *b)
117 {
118 const struct syscall_data * const *sa = a;
119 const struct syscall_data * const *sb = b;
120
121 return (*sa)->total_time > (*sb)->total_time ? -1 : 1;
122 }
123
nodecmp(const void * a,const void * b)124 static int nodecmp(const void *a, const void *b)
125 {
126 const struct syscall_node *na = a;
127 const struct syscall_node *nb = b;
128
129 return na->stats.total_time > nb->stats.total_time ? -1 : 1;
130 }
131
sc_node_hash(long key,void * ctx __maybe_unused)132 static size_t sc_node_hash(long key, void *ctx __maybe_unused)
133 {
134 return key;
135 }
136
sc_node_equal(long key1,long key2,void * ctx __maybe_unused)137 static bool sc_node_equal(long key1, long key2, void *ctx __maybe_unused)
138 {
139 return key1 == key2;
140 }
141
print_common_stats(struct syscall_data * data,int max_summary,FILE * fp)142 static int print_common_stats(struct syscall_data *data, int max_summary, FILE *fp)
143 {
144 int printed = 0;
145
146 if (max_summary == 0 || max_summary > data->nr_nodes)
147 max_summary = data->nr_nodes;
148
149 for (int i = 0; i < max_summary; i++) {
150 struct syscall_node *node = &data->nodes[i];
151 struct syscall_stats *stat = &node->stats;
152 double total = (double)(stat->total_time) / NSEC_PER_MSEC;
153 double min = (double)(stat->min_time) / NSEC_PER_MSEC;
154 double max = (double)(stat->max_time) / NSEC_PER_MSEC;
155 double avg = total / stat->count;
156 const char *name;
157
158 /* TODO: support other ABIs */
159 name = syscalltbl__name(EM_HOST, node->syscall_nr);
160 if (name)
161 printed += fprintf(fp, " %-15s", name);
162 else
163 printed += fprintf(fp, " syscall:%-7d", node->syscall_nr);
164
165 printed += fprintf(fp, " %8u %6u %9.3f %9.3f %9.3f %9.3f %9.2f%%\n",
166 stat->count, stat->error, total, min, avg, max,
167 rel_stddev(stat));
168 }
169 return printed;
170 }
171
update_thread_stats(struct hashmap * hash,struct syscall_key * map_key,struct syscall_stats * map_data)172 static int update_thread_stats(struct hashmap *hash, struct syscall_key *map_key,
173 struct syscall_stats *map_data)
174 {
175 struct syscall_data *data;
176 struct syscall_node *nodes;
177
178 if (!hashmap__find(hash, map_key->cpu_or_tid, &data)) {
179 data = zalloc(sizeof(*data));
180 if (data == NULL)
181 return -ENOMEM;
182
183 data->key = map_key->cpu_or_tid;
184 if (hashmap__add(hash, data->key, data) < 0) {
185 free(data);
186 return -ENOMEM;
187 }
188 }
189
190 /* update thread total stats */
191 data->nr_events += map_data->count;
192 data->total_time += map_data->total_time;
193
194 nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes));
195 if (nodes == NULL)
196 return -ENOMEM;
197
198 data->nodes = nodes;
199 nodes = &data->nodes[data->nr_nodes++];
200 nodes->syscall_nr = map_key->nr;
201
202 /* each thread has an entry for each syscall, just use the stat */
203 memcpy(&nodes->stats, map_data, sizeof(*map_data));
204 return 0;
205 }
206
print_thread_stat(struct syscall_data * data,int max_summary,FILE * fp)207 static int print_thread_stat(struct syscall_data *data, int max_summary, FILE *fp)
208 {
209 int printed = 0;
210
211 qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp);
212
213 printed += fprintf(fp, " thread (%d), ", (int)data->key);
214 printed += fprintf(fp, "%d events\n\n", data->nr_events);
215
216 printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
217 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
218 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
219
220 printed += print_common_stats(data, max_summary, fp);
221 printed += fprintf(fp, "\n\n");
222
223 return printed;
224 }
225
print_thread_stats(struct syscall_data ** data,int nr_data,int max_summary,FILE * fp)226 static int print_thread_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp)
227 {
228 int printed = 0;
229
230 for (int i = 0; i < nr_data; i++)
231 printed += print_thread_stat(data[i], max_summary, fp);
232
233 return printed;
234 }
235
update_total_stats(struct hashmap * hash,struct syscall_key * map_key,struct syscall_stats * map_data)236 static int update_total_stats(struct hashmap *hash, struct syscall_key *map_key,
237 struct syscall_stats *map_data)
238 {
239 struct syscall_data *data;
240 struct syscall_stats *stat;
241
242 if (!hashmap__find(hash, map_key->nr, &data)) {
243 data = zalloc(sizeof(*data));
244 if (data == NULL)
245 return -ENOMEM;
246
247 data->nodes = zalloc(sizeof(*data->nodes));
248 if (data->nodes == NULL) {
249 free(data);
250 return -ENOMEM;
251 }
252
253 data->nr_nodes = 1;
254 data->key = map_key->nr;
255 data->nodes->syscall_nr = data->key;
256
257 if (hashmap__add(hash, data->key, data) < 0) {
258 free(data->nodes);
259 free(data);
260 return -ENOMEM;
261 }
262 }
263
264 /* update total stats for this syscall */
265 data->nr_events += map_data->count;
266 data->total_time += map_data->total_time;
267
268 /* This is sum of the same syscall from different CPUs */
269 stat = &data->nodes->stats;
270
271 stat->total_time += map_data->total_time;
272 stat->squared_sum += map_data->squared_sum;
273 stat->count += map_data->count;
274 stat->error += map_data->error;
275
276 if (stat->max_time < map_data->max_time)
277 stat->max_time = map_data->max_time;
278 if (stat->min_time > map_data->min_time || stat->min_time == 0)
279 stat->min_time = map_data->min_time;
280
281 return 0;
282 }
283
print_total_stats(struct syscall_data ** data,int nr_data,int max_summary,FILE * fp)284 static int print_total_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp)
285 {
286 int printed = 0;
287 int nr_events = 0;
288
289 for (int i = 0; i < nr_data; i++)
290 nr_events += data[i]->nr_events;
291
292 printed += fprintf(fp, " total, %d events\n\n", nr_events);
293
294 printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
295 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
296 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
297
298 if (max_summary == 0 || max_summary > nr_data)
299 max_summary = nr_data;
300
301 for (int i = 0; i < max_summary; i++)
302 printed += print_common_stats(data[i], max_summary, fp);
303
304 printed += fprintf(fp, "\n\n");
305 return printed;
306 }
307
update_cgroup_stats(struct hashmap * hash,struct syscall_key * map_key,struct syscall_stats * map_data)308 static int update_cgroup_stats(struct hashmap *hash, struct syscall_key *map_key,
309 struct syscall_stats *map_data)
310 {
311 struct syscall_data *data;
312 struct syscall_node *nodes;
313
314 if (!hashmap__find(hash, map_key->cgroup, &data)) {
315 data = zalloc(sizeof(*data));
316 if (data == NULL)
317 return -ENOMEM;
318
319 data->key = map_key->cgroup;
320 if (hashmap__add(hash, data->key, data) < 0) {
321 free(data);
322 return -ENOMEM;
323 }
324 }
325
326 /* update thread total stats */
327 data->nr_events += map_data->count;
328 data->total_time += map_data->total_time;
329
330 nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes));
331 if (nodes == NULL)
332 return -ENOMEM;
333
334 data->nodes = nodes;
335 nodes = &data->nodes[data->nr_nodes++];
336 nodes->syscall_nr = map_key->nr;
337
338 /* each thread has an entry for each syscall, just use the stat */
339 memcpy(&nodes->stats, map_data, sizeof(*map_data));
340 return 0;
341 }
342
print_cgroup_stat(struct syscall_data * data,int max_summary,FILE * fp)343 static int print_cgroup_stat(struct syscall_data *data, int max_summary, FILE *fp)
344 {
345 int printed = 0;
346 struct cgroup *cgrp = __cgroup__find(&cgroups, data->key);
347
348 qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp);
349
350 if (cgrp)
351 printed += fprintf(fp, " cgroup %s,", cgrp->name);
352 else
353 printed += fprintf(fp, " cgroup id:%lu,", (unsigned long)data->key);
354
355 printed += fprintf(fp, " %d events\n\n", data->nr_events);
356
357 printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
358 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
359 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
360
361 printed += print_common_stats(data, max_summary, fp);
362 printed += fprintf(fp, "\n\n");
363
364 return printed;
365 }
366
print_cgroup_stats(struct syscall_data ** data,int nr_data,int max_summary,FILE * fp)367 static int print_cgroup_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp)
368 {
369 int printed = 0;
370
371 for (int i = 0; i < nr_data; i++)
372 printed += print_cgroup_stat(data[i], max_summary, fp);
373
374 return printed;
375 }
376
trace_print_bpf_summary(FILE * fp,int max_summary)377 int trace_print_bpf_summary(FILE *fp, int max_summary)
378 {
379 struct bpf_map *map = skel->maps.syscall_stats_map;
380 struct syscall_key *prev_key, key;
381 struct syscall_data **data = NULL;
382 struct hashmap schash;
383 struct hashmap_entry *entry;
384 int nr_data = 0;
385 int printed = 0;
386 int i;
387 size_t bkt;
388
389 hashmap__init(&schash, sc_node_hash, sc_node_equal, /*ctx=*/NULL);
390
391 printed = fprintf(fp, "\n Summary of events:\n\n");
392
393 /* get stats from the bpf map */
394 prev_key = NULL;
395 while (!bpf_map__get_next_key(map, prev_key, &key, sizeof(key))) {
396 struct syscall_stats stat;
397
398 if (!bpf_map__lookup_elem(map, &key, sizeof(key), &stat, sizeof(stat), 0)) {
399 switch (skel->rodata->aggr_mode) {
400 case SYSCALL_AGGR_THREAD:
401 update_thread_stats(&schash, &key, &stat);
402 break;
403 case SYSCALL_AGGR_CPU:
404 update_total_stats(&schash, &key, &stat);
405 break;
406 case SYSCALL_AGGR_CGROUP:
407 update_cgroup_stats(&schash, &key, &stat);
408 break;
409 default:
410 break;
411 }
412 }
413
414 prev_key = &key;
415 }
416
417 nr_data = hashmap__size(&schash);
418 data = calloc(nr_data, sizeof(*data));
419 if (data == NULL)
420 goto out;
421
422 i = 0;
423 hashmap__for_each_entry(&schash, entry, bkt)
424 data[i++] = entry->pvalue;
425
426 qsort(data, nr_data, sizeof(*data), datacmp);
427
428 switch (skel->rodata->aggr_mode) {
429 case SYSCALL_AGGR_THREAD:
430 printed += print_thread_stats(data, nr_data, max_summary, fp);
431 break;
432 case SYSCALL_AGGR_CPU:
433 printed += print_total_stats(data, nr_data, max_summary, fp);
434 break;
435 case SYSCALL_AGGR_CGROUP:
436 printed += print_cgroup_stats(data, nr_data, max_summary, fp);
437 break;
438 default:
439 break;
440 }
441
442 for (i = 0; i < nr_data && data; i++) {
443 free(data[i]->nodes);
444 free(data[i]);
445 }
446 free(data);
447
448 out:
449 hashmap__clear(&schash);
450 return printed;
451 }
452
trace_cleanup_bpf_summary(void)453 void trace_cleanup_bpf_summary(void)
454 {
455 if (!RB_EMPTY_ROOT(&cgroups)) {
456 struct cgroup *cgrp, *tmp;
457
458 rbtree_postorder_for_each_entry_safe(cgrp, tmp, &cgroups, node)
459 cgroup__put(cgrp);
460
461 cgroups = RB_ROOT;
462 }
463
464 syscall_summary_bpf__destroy(skel);
465 }
466