1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #include <inttypes.h> 3 #include <math.h> 4 #include <stdio.h> 5 #include <stdlib.h> 6 7 #include "dwarf-regs.h" /* for EM_HOST */ 8 #include "syscalltbl.h" 9 #include "util/hashmap.h" 10 #include "util/trace.h" 11 #include "util/util.h" 12 #include <bpf/bpf.h> 13 #include <linux/time64.h> 14 #include <tools/libc_compat.h> /* reallocarray */ 15 16 #include "bpf_skel/syscall_summary.h" 17 #include "bpf_skel/syscall_summary.skel.h" 18 19 20 static struct syscall_summary_bpf *skel; 21 22 int trace_prepare_bpf_summary(enum trace_summary_mode mode) 23 { 24 skel = syscall_summary_bpf__open(); 25 if (skel == NULL) { 26 fprintf(stderr, "failed to open syscall summary bpf skeleton\n"); 27 return -1; 28 } 29 30 if (mode == SUMMARY__BY_THREAD) 31 skel->rodata->aggr_mode = SYSCALL_AGGR_THREAD; 32 else 33 skel->rodata->aggr_mode = SYSCALL_AGGR_CPU; 34 35 if (syscall_summary_bpf__load(skel) < 0) { 36 fprintf(stderr, "failed to load syscall summary bpf skeleton\n"); 37 return -1; 38 } 39 40 if (syscall_summary_bpf__attach(skel) < 0) { 41 fprintf(stderr, "failed to attach syscall summary bpf skeleton\n"); 42 return -1; 43 } 44 45 return 0; 46 } 47 48 void trace_start_bpf_summary(void) 49 { 50 skel->bss->enabled = 1; 51 } 52 53 void trace_end_bpf_summary(void) 54 { 55 skel->bss->enabled = 0; 56 } 57 58 struct syscall_node { 59 int syscall_nr; 60 struct syscall_stats stats; 61 }; 62 63 static double rel_stddev(struct syscall_stats *stat) 64 { 65 double variance, average; 66 67 if (stat->count < 2) 68 return 0; 69 70 average = (double)stat->total_time / stat->count; 71 72 variance = stat->squared_sum; 73 variance -= (stat->total_time * stat->total_time) / stat->count; 74 variance /= stat->count - 1; 75 76 return 100 * sqrt(variance / stat->count) / average; 77 } 78 79 /* 80 * The syscall_data is to maintain syscall stats ordered by total time. 81 * It supports different summary modes like per-thread or global. 82 * 83 * For per-thread stats, it uses two-level data strurcture - 84 * syscall_data is keyed by TID and has an array of nodes which 85 * represents each syscall for the thread. 86 * 87 * For global stats, it's still two-level technically but we don't need 88 * per-cpu analysis so it's keyed by the syscall number to combine stats 89 * from different CPUs. And syscall_data always has a syscall_node so 90 * it can effectively work as flat hierarchy. 91 */ 92 struct syscall_data { 93 int key; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU */ 94 int nr_events; 95 int nr_nodes; 96 u64 total_time; 97 struct syscall_node *nodes; 98 }; 99 100 static int datacmp(const void *a, const void *b) 101 { 102 const struct syscall_data * const *sa = a; 103 const struct syscall_data * const *sb = b; 104 105 return (*sa)->total_time > (*sb)->total_time ? -1 : 1; 106 } 107 108 static int nodecmp(const void *a, const void *b) 109 { 110 const struct syscall_node *na = a; 111 const struct syscall_node *nb = b; 112 113 return na->stats.total_time > nb->stats.total_time ? -1 : 1; 114 } 115 116 static size_t sc_node_hash(long key, void *ctx __maybe_unused) 117 { 118 return key; 119 } 120 121 static bool sc_node_equal(long key1, long key2, void *ctx __maybe_unused) 122 { 123 return key1 == key2; 124 } 125 126 static int print_common_stats(struct syscall_data *data, FILE *fp) 127 { 128 int printed = 0; 129 130 for (int i = 0; i < data->nr_nodes; i++) { 131 struct syscall_node *node = &data->nodes[i]; 132 struct syscall_stats *stat = &node->stats; 133 double total = (double)(stat->total_time) / NSEC_PER_MSEC; 134 double min = (double)(stat->min_time) / NSEC_PER_MSEC; 135 double max = (double)(stat->max_time) / NSEC_PER_MSEC; 136 double avg = total / stat->count; 137 const char *name; 138 139 /* TODO: support other ABIs */ 140 name = syscalltbl__name(EM_HOST, node->syscall_nr); 141 if (name) 142 printed += fprintf(fp, " %-15s", name); 143 else 144 printed += fprintf(fp, " syscall:%-7d", node->syscall_nr); 145 146 printed += fprintf(fp, " %8u %6u %9.3f %9.3f %9.3f %9.3f %9.2f%%\n", 147 stat->count, stat->error, total, min, avg, max, 148 rel_stddev(stat)); 149 } 150 return printed; 151 } 152 153 static int update_thread_stats(struct hashmap *hash, struct syscall_key *map_key, 154 struct syscall_stats *map_data) 155 { 156 struct syscall_data *data; 157 struct syscall_node *nodes; 158 159 if (!hashmap__find(hash, map_key->cpu_or_tid, &data)) { 160 data = zalloc(sizeof(*data)); 161 if (data == NULL) 162 return -ENOMEM; 163 164 data->key = map_key->cpu_or_tid; 165 if (hashmap__add(hash, data->key, data) < 0) { 166 free(data); 167 return -ENOMEM; 168 } 169 } 170 171 /* update thread total stats */ 172 data->nr_events += map_data->count; 173 data->total_time += map_data->total_time; 174 175 nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes)); 176 if (nodes == NULL) 177 return -ENOMEM; 178 179 data->nodes = nodes; 180 nodes = &data->nodes[data->nr_nodes++]; 181 nodes->syscall_nr = map_key->nr; 182 183 /* each thread has an entry for each syscall, just use the stat */ 184 memcpy(&nodes->stats, map_data, sizeof(*map_data)); 185 return 0; 186 } 187 188 static int print_thread_stat(struct syscall_data *data, FILE *fp) 189 { 190 int printed = 0; 191 192 qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp); 193 194 printed += fprintf(fp, " thread (%d), ", data->key); 195 printed += fprintf(fp, "%d events\n\n", data->nr_events); 196 197 printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); 198 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); 199 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); 200 201 printed += print_common_stats(data, fp); 202 printed += fprintf(fp, "\n\n"); 203 204 return printed; 205 } 206 207 static int print_thread_stats(struct syscall_data **data, int nr_data, FILE *fp) 208 { 209 int printed = 0; 210 211 for (int i = 0; i < nr_data; i++) 212 printed += print_thread_stat(data[i], fp); 213 214 return printed; 215 } 216 217 static int update_total_stats(struct hashmap *hash, struct syscall_key *map_key, 218 struct syscall_stats *map_data) 219 { 220 struct syscall_data *data; 221 struct syscall_stats *stat; 222 223 if (!hashmap__find(hash, map_key->nr, &data)) { 224 data = zalloc(sizeof(*data)); 225 if (data == NULL) 226 return -ENOMEM; 227 228 data->nodes = zalloc(sizeof(*data->nodes)); 229 if (data->nodes == NULL) { 230 free(data); 231 return -ENOMEM; 232 } 233 234 data->nr_nodes = 1; 235 data->key = map_key->nr; 236 data->nodes->syscall_nr = data->key; 237 238 if (hashmap__add(hash, data->key, data) < 0) { 239 free(data->nodes); 240 free(data); 241 return -ENOMEM; 242 } 243 } 244 245 /* update total stats for this syscall */ 246 data->nr_events += map_data->count; 247 data->total_time += map_data->total_time; 248 249 /* This is sum of the same syscall from different CPUs */ 250 stat = &data->nodes->stats; 251 252 stat->total_time += map_data->total_time; 253 stat->squared_sum += map_data->squared_sum; 254 stat->count += map_data->count; 255 stat->error += map_data->error; 256 257 if (stat->max_time < map_data->max_time) 258 stat->max_time = map_data->max_time; 259 if (stat->min_time > map_data->min_time || stat->min_time == 0) 260 stat->min_time = map_data->min_time; 261 262 return 0; 263 } 264 265 static int print_total_stats(struct syscall_data **data, int nr_data, FILE *fp) 266 { 267 int printed = 0; 268 int nr_events = 0; 269 270 for (int i = 0; i < nr_data; i++) 271 nr_events += data[i]->nr_events; 272 273 printed += fprintf(fp, " total, %d events\n\n", nr_events); 274 275 printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); 276 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); 277 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); 278 279 for (int i = 0; i < nr_data; i++) 280 printed += print_common_stats(data[i], fp); 281 282 printed += fprintf(fp, "\n\n"); 283 return printed; 284 } 285 286 int trace_print_bpf_summary(FILE *fp) 287 { 288 struct bpf_map *map = skel->maps.syscall_stats_map; 289 struct syscall_key *prev_key, key; 290 struct syscall_data **data = NULL; 291 struct hashmap schash; 292 struct hashmap_entry *entry; 293 int nr_data = 0; 294 int printed = 0; 295 int i; 296 size_t bkt; 297 298 hashmap__init(&schash, sc_node_hash, sc_node_equal, /*ctx=*/NULL); 299 300 printed = fprintf(fp, "\n Summary of events:\n\n"); 301 302 /* get stats from the bpf map */ 303 prev_key = NULL; 304 while (!bpf_map__get_next_key(map, prev_key, &key, sizeof(key))) { 305 struct syscall_stats stat; 306 307 if (!bpf_map__lookup_elem(map, &key, sizeof(key), &stat, sizeof(stat), 0)) { 308 if (skel->rodata->aggr_mode == SYSCALL_AGGR_THREAD) 309 update_thread_stats(&schash, &key, &stat); 310 else 311 update_total_stats(&schash, &key, &stat); 312 } 313 314 prev_key = &key; 315 } 316 317 nr_data = hashmap__size(&schash); 318 data = calloc(nr_data, sizeof(*data)); 319 if (data == NULL) 320 goto out; 321 322 i = 0; 323 hashmap__for_each_entry(&schash, entry, bkt) 324 data[i++] = entry->pvalue; 325 326 qsort(data, nr_data, sizeof(*data), datacmp); 327 328 if (skel->rodata->aggr_mode == SYSCALL_AGGR_THREAD) 329 printed += print_thread_stats(data, nr_data, fp); 330 else 331 printed += print_total_stats(data, nr_data, fp); 332 333 for (i = 0; i < nr_data && data; i++) { 334 free(data[i]->nodes); 335 free(data[i]); 336 } 337 free(data); 338 339 out: 340 hashmap__clear(&schash); 341 return printed; 342 } 343 344 void trace_cleanup_bpf_summary(void) 345 { 346 syscall_summary_bpf__destroy(skel); 347 } 348