1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
3 #define _GNU_SOURCE
4 #include <argp.h>
5 #include <libgen.h>
6 #include <string.h>
7 #include <stdlib.h>
8 #include <sched.h>
9 #include <pthread.h>
10 #include <dirent.h>
11 #include <signal.h>
12 #include <fcntl.h>
13 #include <unistd.h>
14 #include <sys/time.h>
15 #include <sys/sysinfo.h>
16 #include <sys/stat.h>
17 #include <bpf/libbpf.h>
18 #include <bpf/btf.h>
19 #include <bpf/bpf.h>
20 #include <libelf.h>
21 #include <gelf.h>
22 #include <float.h>
23 #include <math.h>
24 #include <limits.h>
25
26 #ifndef ARRAY_SIZE
27 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
28 #endif
29
30 #ifndef max
31 #define max(a, b) ((a) > (b) ? (a) : (b))
32 #endif
33
34 #ifndef min
35 #define min(a, b) ((a) < (b) ? (a) : (b))
36 #endif
37
38 enum stat_id {
39 VERDICT,
40 DURATION,
41 TOTAL_INSNS,
42 TOTAL_STATES,
43 PEAK_STATES,
44 MAX_STATES_PER_INSN,
45 MARK_READ_MAX_LEN,
46 SIZE,
47 JITED_SIZE,
48 STACK,
49 PROG_TYPE,
50 ATTACH_TYPE,
51
52 FILE_NAME,
53 PROG_NAME,
54
55 ALL_STATS_CNT,
56 NUM_STATS_CNT = FILE_NAME - VERDICT,
57 };
58
59 /* In comparison mode each stat can specify up to four different values:
60 * - A side value;
61 * - B side value;
62 * - absolute diff value;
63 * - relative (percentage) diff value.
64 *
65 * When specifying stat specs in comparison mode, user can use one of the
66 * following variant suffixes to specify which exact variant should be used for
67 * ordering or filtering:
68 * - `_a` for A side value;
69 * - `_b` for B side value;
70 * - `_diff` for absolute diff value;
71 * - `_pct` for relative (percentage) diff value.
72 *
73 * If no variant suffix is provided, then `_b` (control data) is assumed.
74 *
75 * As an example, let's say instructions stat has the following output:
76 *
77 * Insns (A) Insns (B) Insns (DIFF)
78 * --------- --------- --------------
79 * 21547 20920 -627 (-2.91%)
80 *
81 * Then:
82 * - 21547 is A side value (insns_a);
83 * - 20920 is B side value (insns_b);
84 * - -627 is absolute diff value (insns_diff);
85 * - -2.91% is relative diff value (insns_pct).
86 *
87 * For verdict there is no verdict_pct variant.
88 * For file and program name, _a and _b variants are equivalent and there are
89 * no _diff or _pct variants.
90 */
91 enum stat_variant {
92 VARIANT_A,
93 VARIANT_B,
94 VARIANT_DIFF,
95 VARIANT_PCT,
96 };
97
98 struct verif_stats {
99 char *file_name;
100 char *prog_name;
101
102 long stats[NUM_STATS_CNT];
103 };
104
105 /* joined comparison mode stats */
106 struct verif_stats_join {
107 char *file_name;
108 char *prog_name;
109
110 const struct verif_stats *stats_a;
111 const struct verif_stats *stats_b;
112 };
113
114 struct stat_specs {
115 int spec_cnt;
116 enum stat_id ids[ALL_STATS_CNT];
117 enum stat_variant variants[ALL_STATS_CNT];
118 bool asc[ALL_STATS_CNT];
119 bool abs[ALL_STATS_CNT];
120 int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
121 };
122
123 enum resfmt {
124 RESFMT_TABLE,
125 RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */
126 RESFMT_CSV,
127 };
128
129 enum filter_kind {
130 FILTER_NAME,
131 FILTER_STAT,
132 };
133
134 enum operator_kind {
135 OP_EQ, /* == or = */
136 OP_NEQ, /* != or <> */
137 OP_LT, /* < */
138 OP_LE, /* <= */
139 OP_GT, /* > */
140 OP_GE, /* >= */
141 };
142
143 struct filter {
144 enum filter_kind kind;
145 /* FILTER_NAME */
146 char *any_glob;
147 char *file_glob;
148 char *prog_glob;
149 /* FILTER_STAT */
150 enum operator_kind op;
151 int stat_id;
152 enum stat_variant stat_var;
153 long value;
154 bool abs;
155 };
156
157 static struct env {
158 char **filenames;
159 int filename_cnt;
160 bool verbose;
161 bool debug;
162 bool quiet;
163 bool force_checkpoints;
164 bool force_reg_invariants;
165 enum resfmt out_fmt;
166 bool show_version;
167 bool comparison_mode;
168 bool replay_mode;
169 int top_n;
170
171 int log_level;
172 int log_size;
173 bool log_fixed;
174
175 struct verif_stats *prog_stats;
176 int prog_stat_cnt;
177
178 /* baseline_stats is allocated and used only in comparison mode */
179 struct verif_stats *baseline_stats;
180 int baseline_stat_cnt;
181
182 struct verif_stats_join *join_stats;
183 int join_stat_cnt;
184
185 struct stat_specs output_spec;
186 struct stat_specs sort_spec;
187
188 struct filter *allow_filters;
189 struct filter *deny_filters;
190 int allow_filter_cnt;
191 int deny_filter_cnt;
192
193 int files_processed;
194 int files_skipped;
195 int progs_processed;
196 int progs_skipped;
197 int top_src_lines;
198 } env;
199
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)200 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
201 {
202 if (!env.verbose)
203 return 0;
204 if (level == LIBBPF_DEBUG && !env.debug)
205 return 0;
206 return vfprintf(stderr, format, args);
207 }
208
209 #ifndef VERISTAT_VERSION
210 #define VERISTAT_VERSION "<kernel>"
211 #endif
212
213 const char *argp_program_version = "veristat v" VERISTAT_VERSION;
214 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
215 const char argp_program_doc[] =
216 "veristat BPF verifier stats collection and comparison tool.\n"
217 "\n"
218 "USAGE: veristat <obj-file> [<obj-file>...]\n"
219 " OR: veristat -C <baseline.csv> <comparison.csv>\n"
220 " OR: veristat -R <results.csv>\n"
221 " OR: veristat -vl2 <to_analyze.bpf.o>\n";
222
223 enum {
224 OPT_LOG_FIXED = 1000,
225 OPT_LOG_SIZE = 1001,
226 };
227
228 static const struct argp_option opts[] = {
229 { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
230 { "version", 'V', NULL, 0, "Print version" },
231 { "verbose", 'v', NULL, 0, "Verbose mode" },
232 { "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" },
233 { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode, 2 for full verification log)" },
234 { "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
235 { "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
236 { "top-n", 'n', "N", 0, "Emit only up to first N results." },
237 { "quiet", 'q', NULL, 0, "Quiet mode" },
238 { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
239 { "sort", 's', "SPEC", 0, "Specify sort order" },
240 { "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
241 { "compare", 'C', NULL, 0, "Comparison mode" },
242 { "replay", 'R', NULL, 0, "Replay mode" },
243 { "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
244 { "test-states", 't', NULL, 0,
245 "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
246 { "test-reg-invariants", 'r', NULL, 0,
247 "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
248 { "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" },
249 {},
250 };
251
252 static int parse_stats(const char *stats_str, struct stat_specs *specs);
253 static int append_filter(struct filter **filters, int *cnt, const char *str);
254 static int append_filter_file(const char *path);
255
parse_arg(int key,char * arg,struct argp_state * state)256 static error_t parse_arg(int key, char *arg, struct argp_state *state)
257 {
258 void *tmp;
259 int err;
260
261 switch (key) {
262 case 'h':
263 argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
264 break;
265 case 'V':
266 env.show_version = true;
267 break;
268 case 'v':
269 env.verbose = true;
270 break;
271 case 'd':
272 env.debug = true;
273 env.verbose = true;
274 break;
275 case 'q':
276 env.quiet = true;
277 break;
278 case 'e':
279 err = parse_stats(arg, &env.output_spec);
280 if (err)
281 return err;
282 break;
283 case 's':
284 err = parse_stats(arg, &env.sort_spec);
285 if (err)
286 return err;
287 break;
288 case 'o':
289 if (strcmp(arg, "table") == 0) {
290 env.out_fmt = RESFMT_TABLE;
291 } else if (strcmp(arg, "csv") == 0) {
292 env.out_fmt = RESFMT_CSV;
293 } else {
294 fprintf(stderr, "Unrecognized output format '%s'\n", arg);
295 return -EINVAL;
296 }
297 break;
298 case 'l':
299 errno = 0;
300 env.log_level = strtol(arg, NULL, 10);
301 if (errno) {
302 fprintf(stderr, "invalid log level: %s\n", arg);
303 argp_usage(state);
304 }
305 break;
306 case OPT_LOG_FIXED:
307 env.log_fixed = true;
308 break;
309 case OPT_LOG_SIZE:
310 errno = 0;
311 env.log_size = strtol(arg, NULL, 10);
312 if (errno) {
313 fprintf(stderr, "invalid log size: %s\n", arg);
314 argp_usage(state);
315 }
316 break;
317 case 't':
318 env.force_checkpoints = true;
319 break;
320 case 'r':
321 env.force_reg_invariants = true;
322 break;
323 case 'n':
324 errno = 0;
325 env.top_n = strtol(arg, NULL, 10);
326 if (errno) {
327 fprintf(stderr, "invalid top N specifier: %s\n", arg);
328 argp_usage(state);
329 }
330 case 'C':
331 env.comparison_mode = true;
332 break;
333 case 'R':
334 env.replay_mode = true;
335 break;
336 case 'f':
337 if (arg[0] == '@')
338 err = append_filter_file(arg + 1);
339 else if (arg[0] == '!')
340 err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1);
341 else
342 err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg);
343 if (err) {
344 fprintf(stderr, "Failed to collect program filter expressions: %d\n", err);
345 return err;
346 }
347 break;
348 case 'S':
349 errno = 0;
350 env.top_src_lines = strtol(arg, NULL, 10);
351 if (errno) {
352 fprintf(stderr, "invalid top lines N specifier: %s\n", arg);
353 argp_usage(state);
354 }
355 break;
356 case ARGP_KEY_ARG:
357 tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
358 if (!tmp)
359 return -ENOMEM;
360 env.filenames = tmp;
361 env.filenames[env.filename_cnt] = strdup(arg);
362 if (!env.filenames[env.filename_cnt])
363 return -ENOMEM;
364 env.filename_cnt++;
365 break;
366 default:
367 return ARGP_ERR_UNKNOWN;
368 }
369 return 0;
370 }
371
372 static const struct argp argp = {
373 .options = opts,
374 .parser = parse_arg,
375 .doc = argp_program_doc,
376 };
377
378
379 /* Adapted from perf/util/string.c */
glob_matches(const char * str,const char * pat)380 static bool glob_matches(const char *str, const char *pat)
381 {
382 while (*str && *pat && *pat != '*') {
383 if (*str != *pat)
384 return false;
385 str++;
386 pat++;
387 }
388 /* Check wild card */
389 if (*pat == '*') {
390 while (*pat == '*')
391 pat++;
392 if (!*pat) /* Tail wild card matches all */
393 return true;
394 while (*str)
395 if (glob_matches(str++, pat))
396 return true;
397 }
398 return !*str && !*pat;
399 }
400
is_bpf_obj_file(const char * path)401 static bool is_bpf_obj_file(const char *path) {
402 Elf64_Ehdr *ehdr;
403 int fd, err = -EINVAL;
404 Elf *elf = NULL;
405
406 fd = open(path, O_RDONLY | O_CLOEXEC);
407 if (fd < 0)
408 return true; /* we'll fail later and propagate error */
409
410 /* ensure libelf is initialized */
411 (void)elf_version(EV_CURRENT);
412
413 elf = elf_begin(fd, ELF_C_READ, NULL);
414 if (!elf)
415 goto cleanup;
416
417 if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64)
418 goto cleanup;
419
420 ehdr = elf64_getehdr(elf);
421 /* Old LLVM set e_machine to EM_NONE */
422 if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF))
423 goto cleanup;
424
425 err = 0;
426 cleanup:
427 if (elf)
428 elf_end(elf);
429 close(fd);
430 return err == 0;
431 }
432
should_process_file_prog(const char * filename,const char * prog_name)433 static bool should_process_file_prog(const char *filename, const char *prog_name)
434 {
435 struct filter *f;
436 int i, allow_cnt = 0;
437
438 for (i = 0; i < env.deny_filter_cnt; i++) {
439 f = &env.deny_filters[i];
440 if (f->kind != FILTER_NAME)
441 continue;
442
443 if (f->any_glob && glob_matches(filename, f->any_glob))
444 return false;
445 if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob))
446 return false;
447 if (f->file_glob && glob_matches(filename, f->file_glob))
448 return false;
449 if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob))
450 return false;
451 }
452
453 for (i = 0; i < env.allow_filter_cnt; i++) {
454 f = &env.allow_filters[i];
455 if (f->kind != FILTER_NAME)
456 continue;
457
458 allow_cnt++;
459 if (f->any_glob) {
460 if (glob_matches(filename, f->any_glob))
461 return true;
462 /* If we don't know program name yet, any_glob filter
463 * has to assume that current BPF object file might be
464 * relevant; we'll check again later on after opening
465 * BPF object file, at which point program name will
466 * be known finally.
467 */
468 if (!prog_name || glob_matches(prog_name, f->any_glob))
469 return true;
470 } else {
471 if (f->file_glob && !glob_matches(filename, f->file_glob))
472 continue;
473 if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob))
474 continue;
475 return true;
476 }
477 }
478
479 /* if there are no file/prog name allow filters, allow all progs,
480 * unless they are denied earlier explicitly
481 */
482 return allow_cnt == 0;
483 }
484
485 static struct {
486 enum operator_kind op_kind;
487 const char *op_str;
488 } operators[] = {
489 /* Order of these definitions matter to avoid situations like '<'
490 * matching part of what is actually a '<>' operator. That is,
491 * substrings should go last.
492 */
493 { OP_EQ, "==" },
494 { OP_NEQ, "!=" },
495 { OP_NEQ, "<>" },
496 { OP_LE, "<=" },
497 { OP_LT, "<" },
498 { OP_GE, ">=" },
499 { OP_GT, ">" },
500 { OP_EQ, "=" },
501 };
502
503 static bool parse_stat_id_var(const char *name, size_t len, int *id,
504 enum stat_variant *var, bool *is_abs);
505
append_filter(struct filter ** filters,int * cnt,const char * str)506 static int append_filter(struct filter **filters, int *cnt, const char *str)
507 {
508 struct filter *f;
509 void *tmp;
510 const char *p;
511 int i;
512
513 tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
514 if (!tmp)
515 return -ENOMEM;
516 *filters = tmp;
517
518 f = &(*filters)[*cnt];
519 memset(f, 0, sizeof(*f));
520
521 /* First, let's check if it's a stats filter of the following form:
522 * <stat><op><value, where:
523 * - <stat> is one of supported numerical stats (verdict is also
524 * considered numerical, failure == 0, success == 1);
525 * - <op> is comparison operator (see `operators` definitions);
526 * - <value> is an integer (or failure/success, or false/true as
527 * special aliases for 0 and 1, respectively).
528 * If the form doesn't match what user provided, we assume file/prog
529 * glob filter.
530 */
531 for (i = 0; i < ARRAY_SIZE(operators); i++) {
532 enum stat_variant var;
533 int id;
534 long val;
535 const char *end = str;
536 const char *op_str;
537 bool is_abs;
538
539 op_str = operators[i].op_str;
540 p = strstr(str, op_str);
541 if (!p)
542 continue;
543
544 if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) {
545 fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
546 return -EINVAL;
547 }
548 if (id >= FILE_NAME) {
549 fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str);
550 return -EINVAL;
551 }
552
553 p += strlen(op_str);
554
555 if (strcasecmp(p, "true") == 0 ||
556 strcasecmp(p, "t") == 0 ||
557 strcasecmp(p, "success") == 0 ||
558 strcasecmp(p, "succ") == 0 ||
559 strcasecmp(p, "s") == 0 ||
560 strcasecmp(p, "match") == 0 ||
561 strcasecmp(p, "m") == 0) {
562 val = 1;
563 } else if (strcasecmp(p, "false") == 0 ||
564 strcasecmp(p, "f") == 0 ||
565 strcasecmp(p, "failure") == 0 ||
566 strcasecmp(p, "fail") == 0 ||
567 strcasecmp(p, "mismatch") == 0 ||
568 strcasecmp(p, "mis") == 0) {
569 val = 0;
570 } else {
571 errno = 0;
572 val = strtol(p, (char **)&end, 10);
573 if (errno || end == p || *end != '\0' ) {
574 fprintf(stderr, "Invalid integer value in '%s'!\n", str);
575 return -EINVAL;
576 }
577 }
578
579 f->kind = FILTER_STAT;
580 f->stat_id = id;
581 f->stat_var = var;
582 f->op = operators[i].op_kind;
583 f->abs = true;
584 f->value = val;
585
586 *cnt += 1;
587 return 0;
588 }
589
590 /* File/prog filter can be specified either as '<glob>' or
591 * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
592 * both file and program names. This seems to be way more useful in
593 * practice. If user needs full control, they can use '/<prog-glob>'
594 * form to glob just program name, or '<file-glob>/' to glob only file
595 * name. But usually common <glob> seems to be the most useful and
596 * ergonomic way.
597 */
598 f->kind = FILTER_NAME;
599 p = strchr(str, '/');
600 if (!p) {
601 f->any_glob = strdup(str);
602 if (!f->any_glob)
603 return -ENOMEM;
604 } else {
605 if (str != p) {
606 /* non-empty file glob */
607 f->file_glob = strndup(str, p - str);
608 if (!f->file_glob)
609 return -ENOMEM;
610 }
611 if (strlen(p + 1) > 0) {
612 /* non-empty prog glob */
613 f->prog_glob = strdup(p + 1);
614 if (!f->prog_glob) {
615 free(f->file_glob);
616 f->file_glob = NULL;
617 return -ENOMEM;
618 }
619 }
620 }
621
622 *cnt += 1;
623 return 0;
624 }
625
append_filter_file(const char * path)626 static int append_filter_file(const char *path)
627 {
628 char buf[1024];
629 FILE *f;
630 int err = 0;
631
632 f = fopen(path, "r");
633 if (!f) {
634 err = -errno;
635 fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
636 return err;
637 }
638
639 while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
640 /* lines starting with # are comments, skip them */
641 if (buf[0] == '\0' || buf[0] == '#')
642 continue;
643 /* lines starting with ! are negative match filters */
644 if (buf[0] == '!')
645 err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1);
646 else
647 err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf);
648 if (err)
649 goto cleanup;
650 }
651
652 cleanup:
653 fclose(f);
654 return err;
655 }
656
657 static const struct stat_specs default_output_spec = {
658 .spec_cnt = 8,
659 .ids = {
660 FILE_NAME, PROG_NAME, VERDICT, DURATION,
661 TOTAL_INSNS, TOTAL_STATES, SIZE, JITED_SIZE
662 },
663 };
664
665 static const struct stat_specs default_csv_output_spec = {
666 .spec_cnt = 14,
667 .ids = {
668 FILE_NAME, PROG_NAME, VERDICT, DURATION,
669 TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
670 MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
671 SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE,
672 STACK,
673 },
674 };
675
676 static const struct stat_specs default_sort_spec = {
677 .spec_cnt = 2,
678 .ids = {
679 FILE_NAME, PROG_NAME,
680 },
681 .asc = { true, true, },
682 };
683
684 /* sorting for comparison mode to join two data sets */
685 static const struct stat_specs join_sort_spec = {
686 .spec_cnt = 2,
687 .ids = {
688 FILE_NAME, PROG_NAME,
689 },
690 .asc = { true, true, },
691 };
692
693 static struct stat_def {
694 const char *header;
695 const char *names[4];
696 bool asc_by_default;
697 bool left_aligned;
698 } stat_defs[] = {
699 [FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ },
700 [PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ },
701 [VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ },
702 [DURATION] = { "Duration (us)", {"duration", "dur"}, },
703 [TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, },
704 [TOTAL_STATES] = { "States", {"total_states", "states"}, },
705 [PEAK_STATES] = { "Peak states", {"peak_states"}, },
706 [MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
707 [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
708 [SIZE] = { "Program size", {"prog_size"}, },
709 [JITED_SIZE] = { "Jited size", {"prog_size_jited"}, },
710 [STACK] = {"Stack depth", {"stack_depth", "stack"}, },
711 [PROG_TYPE] = { "Program type", {"prog_type"}, },
712 [ATTACH_TYPE] = { "Attach type", {"attach_type", }, },
713 };
714
parse_stat_id_var(const char * name,size_t len,int * id,enum stat_variant * var,bool * is_abs)715 static bool parse_stat_id_var(const char *name, size_t len, int *id,
716 enum stat_variant *var, bool *is_abs)
717 {
718 static const char *var_sfxs[] = {
719 [VARIANT_A] = "_a",
720 [VARIANT_B] = "_b",
721 [VARIANT_DIFF] = "_diff",
722 [VARIANT_PCT] = "_pct",
723 };
724 int i, j, k;
725
726 /* |<stat>| means we take absolute value of given stat */
727 *is_abs = false;
728 if (len > 2 && name[0] == '|' && name[len - 1] == '|') {
729 *is_abs = true;
730 name += 1;
731 len -= 2;
732 }
733
734 for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
735 struct stat_def *def = &stat_defs[i];
736 size_t alias_len, sfx_len;
737 const char *alias;
738
739 for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
740 alias = def->names[j];
741 if (!alias)
742 continue;
743
744 alias_len = strlen(alias);
745 if (strncmp(name, alias, alias_len) != 0)
746 continue;
747
748 if (alias_len == len) {
749 /* If no variant suffix is specified, we
750 * assume control group (just in case we are
751 * in comparison mode. Variant is ignored in
752 * non-comparison mode.
753 */
754 *var = VARIANT_B;
755 *id = i;
756 return true;
757 }
758
759 for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) {
760 sfx_len = strlen(var_sfxs[k]);
761 if (alias_len + sfx_len != len)
762 continue;
763
764 if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) {
765 *var = (enum stat_variant)k;
766 *id = i;
767 return true;
768 }
769 }
770 }
771 }
772
773 return false;
774 }
775
is_asc_sym(char c)776 static bool is_asc_sym(char c)
777 {
778 return c == '^';
779 }
780
is_desc_sym(char c)781 static bool is_desc_sym(char c)
782 {
783 return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
784 }
785
parse_stat(const char * stat_name,struct stat_specs * specs)786 static int parse_stat(const char *stat_name, struct stat_specs *specs)
787 {
788 int id;
789 bool has_order = false, is_asc = false, is_abs = false;
790 size_t len = strlen(stat_name);
791 enum stat_variant var;
792
793 if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
794 fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
795 return -E2BIG;
796 }
797
798 if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) {
799 has_order = true;
800 is_asc = is_asc_sym(stat_name[len - 1]);
801 len -= 1;
802 }
803
804 if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) {
805 fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
806 return -ESRCH;
807 }
808
809 specs->ids[specs->spec_cnt] = id;
810 specs->variants[specs->spec_cnt] = var;
811 specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
812 specs->abs[specs->spec_cnt] = is_abs;
813 specs->spec_cnt++;
814
815 return 0;
816 }
817
parse_stats(const char * stats_str,struct stat_specs * specs)818 static int parse_stats(const char *stats_str, struct stat_specs *specs)
819 {
820 char *input, *state = NULL, *next;
821 int err, cnt = 0;
822
823 input = strdup(stats_str);
824 if (!input)
825 return -ENOMEM;
826
827 while ((next = strtok_r(cnt++ ? NULL : input, ",", &state))) {
828 err = parse_stat(next, specs);
829 if (err) {
830 free(input);
831 return err;
832 }
833 }
834
835 free(input);
836 return 0;
837 }
838
free_verif_stats(struct verif_stats * stats,size_t stat_cnt)839 static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
840 {
841 int i;
842
843 if (!stats)
844 return;
845
846 for (i = 0; i < stat_cnt; i++) {
847 free(stats[i].file_name);
848 free(stats[i].prog_name);
849 }
850 free(stats);
851 }
852
853 static char verif_log_buf[64 * 1024];
854
855 #define MAX_PARSED_LOG_LINES 100
856
parse_verif_log(char * const buf,size_t buf_sz,struct verif_stats * s)857 static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
858 {
859 const char *cur;
860 int pos, lines, sub_stack, cnt = 0;
861 char *state = NULL, *token, stack[512];
862
863 buf[buf_sz - 1] = '\0';
864
865 for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) {
866 /* find previous endline or otherwise take the start of log buf */
867 for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) {
868 }
869 /* next time start from end of previous line (or pos goes to <0) */
870 pos--;
871 /* if we found endline, point right after endline symbol;
872 * otherwise, stay at the beginning of log buf
873 */
874 if (cur[0] == '\n')
875 cur++;
876
877 if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
878 continue;
879 if (5 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
880 &s->stats[TOTAL_INSNS],
881 &s->stats[MAX_STATES_PER_INSN],
882 &s->stats[TOTAL_STATES],
883 &s->stats[PEAK_STATES],
884 &s->stats[MARK_READ_MAX_LEN]))
885 continue;
886
887 if (1 == sscanf(cur, "stack depth %511s", stack))
888 continue;
889 }
890 while ((token = strtok_r(cnt++ ? NULL : stack, "+", &state))) {
891 if (sscanf(token, "%d", &sub_stack) == 0)
892 break;
893 s->stats[STACK] += sub_stack;
894 }
895 return 0;
896 }
897
898 struct line_cnt {
899 char *line;
900 int cnt;
901 };
902
str_cmp(const void * a,const void * b)903 static int str_cmp(const void *a, const void *b)
904 {
905 const char **str1 = (const char **)a;
906 const char **str2 = (const char **)b;
907
908 return strcmp(*str1, *str2);
909 }
910
line_cnt_cmp(const void * a,const void * b)911 static int line_cnt_cmp(const void *a, const void *b)
912 {
913 const struct line_cnt *a_cnt = (const struct line_cnt *)a;
914 const struct line_cnt *b_cnt = (const struct line_cnt *)b;
915
916 if (a_cnt->cnt != b_cnt->cnt)
917 return a_cnt->cnt > b_cnt->cnt ? -1 : 1;
918 return strcmp(a_cnt->line, b_cnt->line);
919 }
920
print_top_src_lines(char * const buf,size_t buf_sz,const char * prog_name)921 static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name)
922 {
923 int lines_cap = 0;
924 int lines_size = 0;
925 char **lines = NULL;
926 char *line = NULL;
927 char *state;
928 struct line_cnt *freq = NULL;
929 struct line_cnt *cur;
930 int unique_lines;
931 int err = 0;
932 int i;
933
934 while ((line = strtok_r(line ? NULL : buf, "\n", &state))) {
935 if (strncmp(line, "; ", 2) != 0)
936 continue;
937 line += 2;
938
939 if (lines_size == lines_cap) {
940 char **tmp;
941
942 lines_cap = max(16, lines_cap * 2);
943 tmp = realloc(lines, lines_cap * sizeof(*tmp));
944 if (!tmp) {
945 err = -ENOMEM;
946 goto cleanup;
947 }
948 lines = tmp;
949 }
950 lines[lines_size] = line;
951 lines_size++;
952 }
953
954 if (lines_size == 0)
955 goto cleanup;
956
957 qsort(lines, lines_size, sizeof(*lines), str_cmp);
958
959 freq = calloc(lines_size, sizeof(*freq));
960 if (!freq) {
961 err = -ENOMEM;
962 goto cleanup;
963 }
964
965 cur = freq;
966 cur->line = lines[0];
967 cur->cnt = 1;
968 for (i = 1; i < lines_size; ++i) {
969 if (strcmp(lines[i], cur->line) != 0) {
970 cur++;
971 cur->line = lines[i];
972 cur->cnt = 0;
973 }
974 cur->cnt++;
975 }
976 unique_lines = cur - freq + 1;
977
978 qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp);
979
980 printf("Top source lines (%s):\n", prog_name);
981 for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
982 const char *src_code = freq[i].line;
983 const char *src_line = NULL;
984 char *split = strrchr(freq[i].line, '@');
985
986 if (split) {
987 src_line = split + 1;
988
989 while (*src_line && isspace(*src_line))
990 src_line++;
991
992 while (split > src_code && isspace(*split))
993 split--;
994 *split = '\0';
995 }
996
997 if (src_line)
998 printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line, src_code);
999 else
1000 printf("%5d: %s\n", freq[i].cnt, src_code);
1001 }
1002 printf("\n");
1003
1004 cleanup:
1005 free(freq);
1006 free(lines);
1007 return err;
1008 }
1009
guess_prog_type_by_ctx_name(const char * ctx_name,enum bpf_prog_type * prog_type,enum bpf_attach_type * attach_type)1010 static int guess_prog_type_by_ctx_name(const char *ctx_name,
1011 enum bpf_prog_type *prog_type,
1012 enum bpf_attach_type *attach_type)
1013 {
1014 /* We need to guess program type based on its declared context type.
1015 * This guess can't be perfect as many different program types might
1016 * share the same context type. So we can only hope to reasonably
1017 * well guess this and get lucky.
1018 *
1019 * Just in case, we support both UAPI-side type names and
1020 * kernel-internal names.
1021 */
1022 static struct {
1023 const char *uapi_name;
1024 const char *kern_name;
1025 enum bpf_prog_type prog_type;
1026 enum bpf_attach_type attach_type;
1027 } ctx_map[] = {
1028 /* __sk_buff is most ambiguous, we assume TC program */
1029 { "__sk_buff", "sk_buff", BPF_PROG_TYPE_SCHED_CLS },
1030 { "bpf_sock", "sock", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND },
1031 { "bpf_sock_addr", "bpf_sock_addr_kern", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND },
1032 { "bpf_sock_ops", "bpf_sock_ops_kern", BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS },
1033 { "sk_msg_md", "sk_msg", BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT },
1034 { "bpf_cgroup_dev_ctx", "bpf_cgroup_dev_ctx", BPF_PROG_TYPE_CGROUP_DEVICE, BPF_CGROUP_DEVICE },
1035 { "bpf_sysctl", "bpf_sysctl_kern", BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL },
1036 { "bpf_sockopt", "bpf_sockopt_kern", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT },
1037 { "sk_reuseport_md", "sk_reuseport_kern", BPF_PROG_TYPE_SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE },
1038 { "bpf_sk_lookup", "bpf_sk_lookup_kern", BPF_PROG_TYPE_SK_LOOKUP, BPF_SK_LOOKUP },
1039 { "xdp_md", "xdp_buff", BPF_PROG_TYPE_XDP, BPF_XDP },
1040 /* tracing types with no expected attach type */
1041 { "bpf_user_pt_regs_t", "pt_regs", BPF_PROG_TYPE_KPROBE },
1042 { "bpf_perf_event_data", "bpf_perf_event_data_kern", BPF_PROG_TYPE_PERF_EVENT },
1043 /* raw_tp programs use u64[] from kernel side, we don't want
1044 * to match on that, probably; so NULL for kern-side type
1045 */
1046 { "bpf_raw_tracepoint_args", NULL, BPF_PROG_TYPE_RAW_TRACEPOINT },
1047 };
1048 int i;
1049
1050 if (!ctx_name)
1051 return -EINVAL;
1052
1053 for (i = 0; i < ARRAY_SIZE(ctx_map); i++) {
1054 if (strcmp(ctx_map[i].uapi_name, ctx_name) == 0 ||
1055 (ctx_map[i].kern_name && strcmp(ctx_map[i].kern_name, ctx_name) == 0)) {
1056 *prog_type = ctx_map[i].prog_type;
1057 *attach_type = ctx_map[i].attach_type;
1058 return 0;
1059 }
1060 }
1061
1062 return -ESRCH;
1063 }
1064
1065 /* Make sure only target program is referenced from struct_ops map,
1066 * otherwise libbpf would automatically set autocreate for all
1067 * referenced programs.
1068 * See libbpf.c:bpf_object_adjust_struct_ops_autoload.
1069 */
mask_unrelated_struct_ops_progs(struct bpf_object * obj,struct bpf_map * map,struct bpf_program * prog)1070 static void mask_unrelated_struct_ops_progs(struct bpf_object *obj,
1071 struct bpf_map *map,
1072 struct bpf_program *prog)
1073 {
1074 struct btf *btf = bpf_object__btf(obj);
1075 const struct btf_type *t, *mt;
1076 struct btf_member *m;
1077 int i, moff;
1078 size_t data_sz, ptr_sz = sizeof(void *);
1079 void *data;
1080
1081 t = btf__type_by_id(btf, bpf_map__btf_value_type_id(map));
1082 if (!btf_is_struct(t))
1083 return;
1084
1085 data = bpf_map__initial_value(map, &data_sz);
1086 for (i = 0; i < btf_vlen(t); i++) {
1087 m = &btf_members(t)[i];
1088 mt = btf__type_by_id(btf, m->type);
1089 if (!btf_is_ptr(mt))
1090 continue;
1091 moff = m->offset / 8;
1092 if (moff + ptr_sz > data_sz)
1093 continue;
1094 if (memcmp(data + moff, &prog, ptr_sz) == 0)
1095 continue;
1096 memset(data + moff, 0, ptr_sz);
1097 }
1098 }
1099
fixup_obj(struct bpf_object * obj,struct bpf_program * prog,const char * filename)1100 static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename)
1101 {
1102 struct bpf_map *map;
1103
1104 bpf_object__for_each_map(map, obj) {
1105 /* disable pinning */
1106 bpf_map__set_pin_path(map, NULL);
1107
1108 /* fix up map size, if necessary */
1109 switch (bpf_map__type(map)) {
1110 case BPF_MAP_TYPE_SK_STORAGE:
1111 case BPF_MAP_TYPE_TASK_STORAGE:
1112 case BPF_MAP_TYPE_INODE_STORAGE:
1113 case BPF_MAP_TYPE_CGROUP_STORAGE:
1114 break;
1115 case BPF_MAP_TYPE_STRUCT_OPS:
1116 mask_unrelated_struct_ops_progs(obj, map, prog);
1117 break;
1118 default:
1119 if (bpf_map__max_entries(map) == 0)
1120 bpf_map__set_max_entries(map, 1);
1121 }
1122 }
1123
1124 /* SEC(freplace) programs can't be loaded with veristat as is,
1125 * but we can try guessing their target program's expected type by
1126 * looking at the type of program's first argument and substituting
1127 * corresponding program type
1128 */
1129 if (bpf_program__type(prog) == BPF_PROG_TYPE_EXT) {
1130 const struct btf *btf = bpf_object__btf(obj);
1131 const char *prog_name = bpf_program__name(prog);
1132 enum bpf_prog_type prog_type;
1133 enum bpf_attach_type attach_type;
1134 const struct btf_type *t;
1135 const char *ctx_name;
1136 int id;
1137
1138 if (!btf)
1139 goto skip_freplace_fixup;
1140
1141 id = btf__find_by_name_kind(btf, prog_name, BTF_KIND_FUNC);
1142 t = btf__type_by_id(btf, id);
1143 t = btf__type_by_id(btf, t->type);
1144 if (!btf_is_func_proto(t) || btf_vlen(t) != 1)
1145 goto skip_freplace_fixup;
1146
1147 /* context argument is a pointer to a struct/typedef */
1148 t = btf__type_by_id(btf, btf_params(t)[0].type);
1149 while (t && btf_is_mod(t))
1150 t = btf__type_by_id(btf, t->type);
1151 if (!t || !btf_is_ptr(t))
1152 goto skip_freplace_fixup;
1153 t = btf__type_by_id(btf, t->type);
1154 while (t && btf_is_mod(t))
1155 t = btf__type_by_id(btf, t->type);
1156 if (!t)
1157 goto skip_freplace_fixup;
1158
1159 ctx_name = btf__name_by_offset(btf, t->name_off);
1160
1161 if (guess_prog_type_by_ctx_name(ctx_name, &prog_type, &attach_type) == 0) {
1162 bpf_program__set_type(prog, prog_type);
1163 bpf_program__set_expected_attach_type(prog, attach_type);
1164
1165 if (!env.quiet) {
1166 printf("Using guessed program type '%s' for %s/%s...\n",
1167 libbpf_bpf_prog_type_str(prog_type),
1168 filename, prog_name);
1169 }
1170 } else {
1171 if (!env.quiet) {
1172 printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
1173 ctx_name, filename, prog_name);
1174 }
1175 }
1176 }
1177 skip_freplace_fixup:
1178 return;
1179 }
1180
max_verifier_log_size(void)1181 static int max_verifier_log_size(void)
1182 {
1183 const int SMALL_LOG_SIZE = UINT_MAX >> 8;
1184 const int BIG_LOG_SIZE = UINT_MAX >> 2;
1185 struct bpf_insn insns[] = {
1186 { .code = BPF_ALU | BPF_MOV | BPF_X, .dst_reg = BPF_REG_0, },
1187 { .code = BPF_JMP | BPF_EXIT, },
1188 };
1189 LIBBPF_OPTS(bpf_prog_load_opts, opts,
1190 .log_size = BIG_LOG_SIZE,
1191 .log_buf = (void *)-1,
1192 .log_level = 4
1193 );
1194 int ret, insn_cnt = ARRAY_SIZE(insns);
1195 static int log_size;
1196
1197 if (log_size != 0)
1198 return log_size;
1199
1200 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
1201
1202 if (ret == -EFAULT)
1203 log_size = BIG_LOG_SIZE;
1204 else /* ret == -EINVAL, big log size is not supported by the verifier */
1205 log_size = SMALL_LOG_SIZE;
1206
1207 return log_size;
1208 }
1209
process_prog(const char * filename,struct bpf_object * obj,struct bpf_program * prog)1210 static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
1211 {
1212 const char *base_filename = basename(strdupa(filename));
1213 const char *prog_name = bpf_program__name(prog);
1214 char *buf;
1215 int buf_sz, log_level;
1216 struct verif_stats *stats;
1217 struct bpf_prog_info info;
1218 __u32 info_len = sizeof(info);
1219 int err = 0;
1220 void *tmp;
1221 int fd;
1222
1223 if (!should_process_file_prog(base_filename, bpf_program__name(prog))) {
1224 env.progs_skipped++;
1225 return 0;
1226 }
1227
1228 tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
1229 if (!tmp)
1230 return -ENOMEM;
1231 env.prog_stats = tmp;
1232 stats = &env.prog_stats[env.prog_stat_cnt++];
1233 memset(stats, 0, sizeof(*stats));
1234
1235 if (env.verbose || env.top_src_lines > 0) {
1236 buf_sz = env.log_size ? env.log_size : max_verifier_log_size();
1237 buf = malloc(buf_sz);
1238 if (!buf)
1239 return -ENOMEM;
1240 /* ensure we always request stats */
1241 log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0);
1242 /* --top-src-lines needs verifier log */
1243 if (env.top_src_lines > 0 && env.log_level == 0)
1244 log_level |= 2;
1245 } else {
1246 buf = verif_log_buf;
1247 buf_sz = sizeof(verif_log_buf);
1248 /* request only verifier stats */
1249 log_level = 4 | (env.log_fixed ? 8 : 0);
1250 }
1251 verif_log_buf[0] = '\0';
1252
1253 bpf_program__set_log_buf(prog, buf, buf_sz);
1254 bpf_program__set_log_level(prog, log_level);
1255
1256 /* increase chances of successful BPF object loading */
1257 fixup_obj(obj, prog, base_filename);
1258
1259 if (env.force_checkpoints)
1260 bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
1261 if (env.force_reg_invariants)
1262 bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
1263
1264 err = bpf_object__load(obj);
1265 env.progs_processed++;
1266
1267 stats->file_name = strdup(base_filename);
1268 stats->prog_name = strdup(bpf_program__name(prog));
1269 stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
1270 stats->stats[SIZE] = bpf_program__insn_cnt(prog);
1271 stats->stats[PROG_TYPE] = bpf_program__type(prog);
1272 stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog);
1273
1274 memset(&info, 0, info_len);
1275 fd = bpf_program__fd(prog);
1276 if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0)
1277 stats->stats[JITED_SIZE] = info.jited_prog_len;
1278
1279 parse_verif_log(buf, buf_sz, stats);
1280
1281 if (env.verbose) {
1282 printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
1283 filename, prog_name, stats->stats[DURATION],
1284 err ? "failure" : "success", buf);
1285 }
1286 if (env.top_src_lines > 0)
1287 print_top_src_lines(buf, buf_sz, stats->prog_name);
1288
1289 if (verif_log_buf != buf)
1290 free(buf);
1291
1292 return 0;
1293 };
1294
process_obj(const char * filename)1295 static int process_obj(const char *filename)
1296 {
1297 const char *base_filename = basename(strdupa(filename));
1298 struct bpf_object *obj = NULL, *tobj;
1299 struct bpf_program *prog, *tprog, *lprog;
1300 libbpf_print_fn_t old_libbpf_print_fn;
1301 LIBBPF_OPTS(bpf_object_open_opts, opts);
1302 int err = 0, prog_cnt = 0;
1303
1304 if (!should_process_file_prog(base_filename, NULL)) {
1305 if (env.verbose)
1306 printf("Skipping '%s' due to filters...\n", filename);
1307 env.files_skipped++;
1308 return 0;
1309 }
1310 if (!is_bpf_obj_file(filename)) {
1311 if (env.verbose)
1312 printf("Skipping '%s' as it's not a BPF object file...\n", filename);
1313 env.files_skipped++;
1314 return 0;
1315 }
1316
1317 if (!env.quiet && env.out_fmt == RESFMT_TABLE)
1318 printf("Processing '%s'...\n", base_filename);
1319
1320 old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
1321 obj = bpf_object__open_file(filename, &opts);
1322 if (!obj) {
1323 /* if libbpf can't open BPF object file, it could be because
1324 * that BPF object file is incomplete and has to be statically
1325 * linked into a final BPF object file; instead of bailing
1326 * out, report it into stderr, mark it as skipped, and
1327 * proceed
1328 */
1329 fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
1330 env.files_skipped++;
1331 err = 0;
1332 goto cleanup;
1333 }
1334
1335 env.files_processed++;
1336
1337 bpf_object__for_each_program(prog, obj) {
1338 prog_cnt++;
1339 }
1340
1341 if (prog_cnt == 1) {
1342 prog = bpf_object__next_program(obj, NULL);
1343 bpf_program__set_autoload(prog, true);
1344 process_prog(filename, obj, prog);
1345 goto cleanup;
1346 }
1347
1348 bpf_object__for_each_program(prog, obj) {
1349 const char *prog_name = bpf_program__name(prog);
1350
1351 tobj = bpf_object__open_file(filename, &opts);
1352 if (!tobj) {
1353 err = -errno;
1354 fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1355 goto cleanup;
1356 }
1357
1358 lprog = NULL;
1359 bpf_object__for_each_program(tprog, tobj) {
1360 const char *tprog_name = bpf_program__name(tprog);
1361
1362 if (strcmp(prog_name, tprog_name) == 0) {
1363 bpf_program__set_autoload(tprog, true);
1364 lprog = tprog;
1365 } else {
1366 bpf_program__set_autoload(tprog, false);
1367 }
1368 }
1369
1370 process_prog(filename, tobj, lprog);
1371 bpf_object__close(tobj);
1372 }
1373
1374 cleanup:
1375 bpf_object__close(obj);
1376 libbpf_set_print(old_libbpf_print_fn);
1377 return err;
1378 }
1379
cmp_stat(const struct verif_stats * s1,const struct verif_stats * s2,enum stat_id id,bool asc,bool abs)1380 static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
1381 enum stat_id id, bool asc, bool abs)
1382 {
1383 int cmp = 0;
1384
1385 switch (id) {
1386 case FILE_NAME:
1387 cmp = strcmp(s1->file_name, s2->file_name);
1388 break;
1389 case PROG_NAME:
1390 cmp = strcmp(s1->prog_name, s2->prog_name);
1391 break;
1392 case ATTACH_TYPE:
1393 case PROG_TYPE:
1394 case SIZE:
1395 case JITED_SIZE:
1396 case STACK:
1397 case VERDICT:
1398 case DURATION:
1399 case TOTAL_INSNS:
1400 case TOTAL_STATES:
1401 case PEAK_STATES:
1402 case MAX_STATES_PER_INSN:
1403 case MARK_READ_MAX_LEN: {
1404 long v1 = s1->stats[id];
1405 long v2 = s2->stats[id];
1406
1407 if (abs) {
1408 v1 = v1 < 0 ? -v1 : v1;
1409 v2 = v2 < 0 ? -v2 : v2;
1410 }
1411
1412 if (v1 != v2)
1413 cmp = v1 < v2 ? -1 : 1;
1414 break;
1415 }
1416 default:
1417 fprintf(stderr, "Unrecognized stat #%d\n", id);
1418 exit(1);
1419 }
1420
1421 return asc ? cmp : -cmp;
1422 }
1423
cmp_prog_stats(const void * v1,const void * v2)1424 static int cmp_prog_stats(const void *v1, const void *v2)
1425 {
1426 const struct verif_stats *s1 = v1, *s2 = v2;
1427 int i, cmp;
1428
1429 for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1430 cmp = cmp_stat(s1, s2, env.sort_spec.ids[i],
1431 env.sort_spec.asc[i], env.sort_spec.abs[i]);
1432 if (cmp != 0)
1433 return cmp;
1434 }
1435
1436 /* always disambiguate with file+prog, which are unique */
1437 cmp = strcmp(s1->file_name, s2->file_name);
1438 if (cmp != 0)
1439 return cmp;
1440 return strcmp(s1->prog_name, s2->prog_name);
1441 }
1442
fetch_join_stat_value(const struct verif_stats_join * s,enum stat_id id,enum stat_variant var,const char ** str_val,double * num_val)1443 static void fetch_join_stat_value(const struct verif_stats_join *s,
1444 enum stat_id id, enum stat_variant var,
1445 const char **str_val,
1446 double *num_val)
1447 {
1448 long v1, v2;
1449
1450 if (id == FILE_NAME) {
1451 *str_val = s->file_name;
1452 return;
1453 }
1454 if (id == PROG_NAME) {
1455 *str_val = s->prog_name;
1456 return;
1457 }
1458
1459 v1 = s->stats_a ? s->stats_a->stats[id] : 0;
1460 v2 = s->stats_b ? s->stats_b->stats[id] : 0;
1461
1462 switch (var) {
1463 case VARIANT_A:
1464 if (!s->stats_a)
1465 *num_val = -DBL_MAX;
1466 else
1467 *num_val = s->stats_a->stats[id];
1468 return;
1469 case VARIANT_B:
1470 if (!s->stats_b)
1471 *num_val = -DBL_MAX;
1472 else
1473 *num_val = s->stats_b->stats[id];
1474 return;
1475 case VARIANT_DIFF:
1476 if (!s->stats_a || !s->stats_b)
1477 *num_val = -DBL_MAX;
1478 else if (id == VERDICT)
1479 *num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */;
1480 else
1481 *num_val = (double)(v2 - v1);
1482 return;
1483 case VARIANT_PCT:
1484 if (!s->stats_a || !s->stats_b) {
1485 *num_val = -DBL_MAX;
1486 } else if (v1 == 0) {
1487 if (v1 == v2)
1488 *num_val = 0.0;
1489 else
1490 *num_val = v2 < v1 ? -100.0 : 100.0;
1491 } else {
1492 *num_val = (v2 - v1) * 100.0 / v1;
1493 }
1494 return;
1495 }
1496 }
1497
cmp_join_stat(const struct verif_stats_join * s1,const struct verif_stats_join * s2,enum stat_id id,enum stat_variant var,bool asc,bool abs)1498 static int cmp_join_stat(const struct verif_stats_join *s1,
1499 const struct verif_stats_join *s2,
1500 enum stat_id id, enum stat_variant var,
1501 bool asc, bool abs)
1502 {
1503 const char *str1 = NULL, *str2 = NULL;
1504 double v1 = 0.0, v2 = 0.0;
1505 int cmp = 0;
1506
1507 fetch_join_stat_value(s1, id, var, &str1, &v1);
1508 fetch_join_stat_value(s2, id, var, &str2, &v2);
1509
1510 if (abs) {
1511 v1 = fabs(v1);
1512 v2 = fabs(v2);
1513 }
1514
1515 if (str1)
1516 cmp = strcmp(str1, str2);
1517 else if (v1 != v2)
1518 cmp = v1 < v2 ? -1 : 1;
1519
1520 return asc ? cmp : -cmp;
1521 }
1522
cmp_join_stats(const void * v1,const void * v2)1523 static int cmp_join_stats(const void *v1, const void *v2)
1524 {
1525 const struct verif_stats_join *s1 = v1, *s2 = v2;
1526 int i, cmp;
1527
1528 for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1529 cmp = cmp_join_stat(s1, s2,
1530 env.sort_spec.ids[i],
1531 env.sort_spec.variants[i],
1532 env.sort_spec.asc[i],
1533 env.sort_spec.abs[i]);
1534 if (cmp != 0)
1535 return cmp;
1536 }
1537
1538 /* always disambiguate with file+prog, which are unique */
1539 cmp = strcmp(s1->file_name, s2->file_name);
1540 if (cmp != 0)
1541 return cmp;
1542 return strcmp(s1->prog_name, s2->prog_name);
1543 }
1544
1545 #define HEADER_CHAR '-'
1546 #define COLUMN_SEP " "
1547
output_header_underlines(void)1548 static void output_header_underlines(void)
1549 {
1550 int i, j, len;
1551
1552 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1553 len = env.output_spec.lens[i];
1554
1555 printf("%s", i == 0 ? "" : COLUMN_SEP);
1556 for (j = 0; j < len; j++)
1557 printf("%c", HEADER_CHAR);
1558 }
1559 printf("\n");
1560 }
1561
output_headers(enum resfmt fmt)1562 static void output_headers(enum resfmt fmt)
1563 {
1564 const char *fmt_str;
1565 int i, len;
1566
1567 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1568 int id = env.output_spec.ids[i];
1569 int *max_len = &env.output_spec.lens[i];
1570
1571 switch (fmt) {
1572 case RESFMT_TABLE_CALCLEN:
1573 len = snprintf(NULL, 0, "%s", stat_defs[id].header);
1574 if (len > *max_len)
1575 *max_len = len;
1576 break;
1577 case RESFMT_TABLE:
1578 fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s";
1579 printf(fmt_str, i == 0 ? "" : COLUMN_SEP, *max_len, stat_defs[id].header);
1580 if (i == env.output_spec.spec_cnt - 1)
1581 printf("\n");
1582 break;
1583 case RESFMT_CSV:
1584 printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]);
1585 if (i == env.output_spec.spec_cnt - 1)
1586 printf("\n");
1587 break;
1588 }
1589 }
1590
1591 if (fmt == RESFMT_TABLE)
1592 output_header_underlines();
1593 }
1594
prepare_value(const struct verif_stats * s,enum stat_id id,const char ** str,long * val)1595 static void prepare_value(const struct verif_stats *s, enum stat_id id,
1596 const char **str, long *val)
1597 {
1598 switch (id) {
1599 case FILE_NAME:
1600 *str = s ? s->file_name : "N/A";
1601 break;
1602 case PROG_NAME:
1603 *str = s ? s->prog_name : "N/A";
1604 break;
1605 case VERDICT:
1606 if (!s)
1607 *str = "N/A";
1608 else
1609 *str = s->stats[VERDICT] ? "success" : "failure";
1610 break;
1611 case ATTACH_TYPE:
1612 if (!s)
1613 *str = "N/A";
1614 else
1615 *str = libbpf_bpf_attach_type_str(s->stats[ATTACH_TYPE]) ?: "N/A";
1616 break;
1617 case PROG_TYPE:
1618 if (!s)
1619 *str = "N/A";
1620 else
1621 *str = libbpf_bpf_prog_type_str(s->stats[PROG_TYPE]) ?: "N/A";
1622 break;
1623 case DURATION:
1624 case TOTAL_INSNS:
1625 case TOTAL_STATES:
1626 case PEAK_STATES:
1627 case MAX_STATES_PER_INSN:
1628 case MARK_READ_MAX_LEN:
1629 case STACK:
1630 case SIZE:
1631 case JITED_SIZE:
1632 *val = s ? s->stats[id] : 0;
1633 break;
1634 default:
1635 fprintf(stderr, "Unrecognized stat #%d\n", id);
1636 exit(1);
1637 }
1638 }
1639
output_stats(const struct verif_stats * s,enum resfmt fmt,bool last)1640 static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
1641 {
1642 int i;
1643
1644 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1645 int id = env.output_spec.ids[i];
1646 int *max_len = &env.output_spec.lens[i], len;
1647 const char *str = NULL;
1648 long val = 0;
1649
1650 prepare_value(s, id, &str, &val);
1651
1652 switch (fmt) {
1653 case RESFMT_TABLE_CALCLEN:
1654 if (str)
1655 len = snprintf(NULL, 0, "%s", str);
1656 else
1657 len = snprintf(NULL, 0, "%ld", val);
1658 if (len > *max_len)
1659 *max_len = len;
1660 break;
1661 case RESFMT_TABLE:
1662 if (str)
1663 printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
1664 else
1665 printf("%s%*ld", i == 0 ? "" : COLUMN_SEP, *max_len, val);
1666 if (i == env.output_spec.spec_cnt - 1)
1667 printf("\n");
1668 break;
1669 case RESFMT_CSV:
1670 if (str)
1671 printf("%s%s", i == 0 ? "" : ",", str);
1672 else
1673 printf("%s%ld", i == 0 ? "" : ",", val);
1674 if (i == env.output_spec.spec_cnt - 1)
1675 printf("\n");
1676 break;
1677 }
1678 }
1679
1680 if (last && fmt == RESFMT_TABLE) {
1681 output_header_underlines();
1682 printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
1683 env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
1684 }
1685 }
1686
parse_stat_value(const char * str,enum stat_id id,struct verif_stats * st)1687 static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
1688 {
1689 switch (id) {
1690 case FILE_NAME:
1691 st->file_name = strdup(str);
1692 if (!st->file_name)
1693 return -ENOMEM;
1694 break;
1695 case PROG_NAME:
1696 st->prog_name = strdup(str);
1697 if (!st->prog_name)
1698 return -ENOMEM;
1699 break;
1700 case VERDICT:
1701 if (strcmp(str, "success") == 0) {
1702 st->stats[VERDICT] = true;
1703 } else if (strcmp(str, "failure") == 0) {
1704 st->stats[VERDICT] = false;
1705 } else {
1706 fprintf(stderr, "Unrecognized verification verdict '%s'\n", str);
1707 return -EINVAL;
1708 }
1709 break;
1710 case DURATION:
1711 case TOTAL_INSNS:
1712 case TOTAL_STATES:
1713 case PEAK_STATES:
1714 case MAX_STATES_PER_INSN:
1715 case MARK_READ_MAX_LEN:
1716 case SIZE:
1717 case JITED_SIZE:
1718 case STACK: {
1719 long val;
1720 int err, n;
1721
1722 if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) {
1723 err = -errno;
1724 fprintf(stderr, "Failed to parse '%s' as integer\n", str);
1725 return err;
1726 }
1727
1728 st->stats[id] = val;
1729 break;
1730 }
1731 case PROG_TYPE: {
1732 enum bpf_prog_type prog_type = 0;
1733 const char *type;
1734
1735 while ((type = libbpf_bpf_prog_type_str(prog_type))) {
1736 if (strcmp(type, str) == 0) {
1737 st->stats[id] = prog_type;
1738 break;
1739 }
1740 prog_type++;
1741 }
1742
1743 if (!type) {
1744 fprintf(stderr, "Unrecognized prog type %s\n", str);
1745 return -EINVAL;
1746 }
1747 break;
1748 }
1749 case ATTACH_TYPE: {
1750 enum bpf_attach_type attach_type = 0;
1751 const char *type;
1752
1753 while ((type = libbpf_bpf_attach_type_str(attach_type))) {
1754 if (strcmp(type, str) == 0) {
1755 st->stats[id] = attach_type;
1756 break;
1757 }
1758 attach_type++;
1759 }
1760
1761 if (!type) {
1762 fprintf(stderr, "Unrecognized attach type %s\n", str);
1763 return -EINVAL;
1764 }
1765 break;
1766 }
1767 default:
1768 fprintf(stderr, "Unrecognized stat #%d\n", id);
1769 return -EINVAL;
1770 }
1771 return 0;
1772 }
1773
parse_stats_csv(const char * filename,struct stat_specs * specs,struct verif_stats ** statsp,int * stat_cntp)1774 static int parse_stats_csv(const char *filename, struct stat_specs *specs,
1775 struct verif_stats **statsp, int *stat_cntp)
1776 {
1777 char line[4096];
1778 FILE *f;
1779 int err = 0;
1780 bool header = true;
1781
1782 f = fopen(filename, "r");
1783 if (!f) {
1784 err = -errno;
1785 fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1786 return err;
1787 }
1788
1789 *stat_cntp = 0;
1790
1791 while (fgets(line, sizeof(line), f)) {
1792 char *input = line, *state = NULL, *next;
1793 struct verif_stats *st = NULL;
1794 int col = 0, cnt = 0;
1795
1796 if (!header) {
1797 void *tmp;
1798
1799 tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp));
1800 if (!tmp) {
1801 err = -ENOMEM;
1802 goto cleanup;
1803 }
1804 *statsp = tmp;
1805
1806 st = &(*statsp)[*stat_cntp];
1807 memset(st, 0, sizeof(*st));
1808
1809 *stat_cntp += 1;
1810 }
1811
1812 while ((next = strtok_r(cnt++ ? NULL : input, ",\n", &state))) {
1813 if (header) {
1814 /* for the first line, set up spec stats */
1815 err = parse_stat(next, specs);
1816 if (err)
1817 goto cleanup;
1818 continue;
1819 }
1820
1821 /* for all other lines, parse values based on spec */
1822 if (col >= specs->spec_cnt) {
1823 fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n",
1824 col, *stat_cntp, filename);
1825 err = -EINVAL;
1826 goto cleanup;
1827 }
1828 err = parse_stat_value(next, specs->ids[col], st);
1829 if (err)
1830 goto cleanup;
1831 col++;
1832 }
1833
1834 if (header) {
1835 header = false;
1836 continue;
1837 }
1838
1839 if (col < specs->spec_cnt) {
1840 fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
1841 *stat_cntp, filename);
1842 err = -EINVAL;
1843 goto cleanup;
1844 }
1845
1846 if (!st->file_name || !st->prog_name) {
1847 fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n",
1848 *stat_cntp, filename);
1849 err = -EINVAL;
1850 goto cleanup;
1851 }
1852
1853 /* in comparison mode we can only check filters after we
1854 * parsed entire line; if row should be ignored we pretend we
1855 * never parsed it
1856 */
1857 if (!should_process_file_prog(st->file_name, st->prog_name)) {
1858 free(st->file_name);
1859 free(st->prog_name);
1860 *stat_cntp -= 1;
1861 }
1862 }
1863
1864 if (!feof(f)) {
1865 err = -errno;
1866 fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err);
1867 }
1868
1869 cleanup:
1870 fclose(f);
1871 return err;
1872 }
1873
1874 /* empty/zero stats for mismatched rows */
1875 static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" };
1876
is_key_stat(enum stat_id id)1877 static bool is_key_stat(enum stat_id id)
1878 {
1879 return id == FILE_NAME || id == PROG_NAME;
1880 }
1881
output_comp_header_underlines(void)1882 static void output_comp_header_underlines(void)
1883 {
1884 int i, j, k;
1885
1886 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1887 int id = env.output_spec.ids[i];
1888 int max_j = is_key_stat(id) ? 1 : 3;
1889
1890 for (j = 0; j < max_j; j++) {
1891 int len = env.output_spec.lens[3 * i + j];
1892
1893 printf("%s", i + j == 0 ? "" : COLUMN_SEP);
1894
1895 for (k = 0; k < len; k++)
1896 printf("%c", HEADER_CHAR);
1897 }
1898 }
1899 printf("\n");
1900 }
1901
output_comp_headers(enum resfmt fmt)1902 static void output_comp_headers(enum resfmt fmt)
1903 {
1904 static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"};
1905 static const char *name_sfxs[3] = {"_base", "_comp", "_diff"};
1906 int i, j, len;
1907
1908 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1909 int id = env.output_spec.ids[i];
1910 /* key stats don't have A/B/DIFF columns, they are common for both data sets */
1911 int max_j = is_key_stat(id) ? 1 : 3;
1912
1913 for (j = 0; j < max_j; j++) {
1914 int *max_len = &env.output_spec.lens[3 * i + j];
1915 bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1);
1916 const char *sfx;
1917
1918 switch (fmt) {
1919 case RESFMT_TABLE_CALCLEN:
1920 sfx = is_key_stat(id) ? "" : table_sfxs[j];
1921 len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx);
1922 if (len > *max_len)
1923 *max_len = len;
1924 break;
1925 case RESFMT_TABLE:
1926 sfx = is_key_stat(id) ? "" : table_sfxs[j];
1927 printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP,
1928 *max_len - (int)strlen(sfx), stat_defs[id].header, sfx);
1929 if (last)
1930 printf("\n");
1931 break;
1932 case RESFMT_CSV:
1933 sfx = is_key_stat(id) ? "" : name_sfxs[j];
1934 printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx);
1935 if (last)
1936 printf("\n");
1937 break;
1938 }
1939 }
1940 }
1941
1942 if (fmt == RESFMT_TABLE)
1943 output_comp_header_underlines();
1944 }
1945
output_comp_stats(const struct verif_stats_join * join_stats,enum resfmt fmt,bool last)1946 static void output_comp_stats(const struct verif_stats_join *join_stats,
1947 enum resfmt fmt, bool last)
1948 {
1949 const struct verif_stats *base = join_stats->stats_a;
1950 const struct verif_stats *comp = join_stats->stats_b;
1951 char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
1952 int i;
1953
1954 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1955 int id = env.output_spec.ids[i], len;
1956 int *max_len_base = &env.output_spec.lens[3 * i + 0];
1957 int *max_len_comp = &env.output_spec.lens[3 * i + 1];
1958 int *max_len_diff = &env.output_spec.lens[3 * i + 2];
1959 const char *base_str = NULL, *comp_str = NULL;
1960 long base_val = 0, comp_val = 0, diff_val = 0;
1961
1962 prepare_value(base, id, &base_str, &base_val);
1963 prepare_value(comp, id, &comp_str, &comp_val);
1964
1965 /* normalize all the outputs to be in string buffers for simplicity */
1966 if (is_key_stat(id)) {
1967 /* key stats (file and program name) are always strings */
1968 if (base)
1969 snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1970 else
1971 snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
1972 } else if (base_str) {
1973 snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1974 snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
1975 if (!base || !comp)
1976 snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1977 else if (strcmp(base_str, comp_str) == 0)
1978 snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
1979 else
1980 snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
1981 } else {
1982 double p = 0.0;
1983
1984 if (base)
1985 snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
1986 else
1987 snprintf(base_buf, sizeof(base_buf), "%s", "N/A");
1988 if (comp)
1989 snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
1990 else
1991 snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A");
1992
1993 diff_val = comp_val - base_val;
1994 if (!base || !comp) {
1995 snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1996 } else {
1997 if (base_val == 0) {
1998 if (comp_val == base_val)
1999 p = 0.0; /* avoid +0 (+100%) case */
2000 else
2001 p = comp_val < base_val ? -100.0 : 100.0;
2002 } else {
2003 p = diff_val * 100.0 / base_val;
2004 }
2005 snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
2006 }
2007 }
2008
2009 switch (fmt) {
2010 case RESFMT_TABLE_CALCLEN:
2011 len = strlen(base_buf);
2012 if (len > *max_len_base)
2013 *max_len_base = len;
2014 if (!is_key_stat(id)) {
2015 len = strlen(comp_buf);
2016 if (len > *max_len_comp)
2017 *max_len_comp = len;
2018 len = strlen(diff_buf);
2019 if (len > *max_len_diff)
2020 *max_len_diff = len;
2021 }
2022 break;
2023 case RESFMT_TABLE: {
2024 /* string outputs are left-aligned, number outputs are right-aligned */
2025 const char *fmt = base_str ? "%s%-*s" : "%s%*s";
2026
2027 printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf);
2028 if (!is_key_stat(id)) {
2029 printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf);
2030 printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf);
2031 }
2032 if (i == env.output_spec.spec_cnt - 1)
2033 printf("\n");
2034 break;
2035 }
2036 case RESFMT_CSV:
2037 printf("%s%s", i == 0 ? "" : ",", base_buf);
2038 if (!is_key_stat(id)) {
2039 printf("%s%s", i == 0 ? "" : ",", comp_buf);
2040 printf("%s%s", i == 0 ? "" : ",", diff_buf);
2041 }
2042 if (i == env.output_spec.spec_cnt - 1)
2043 printf("\n");
2044 break;
2045 }
2046 }
2047
2048 if (last && fmt == RESFMT_TABLE)
2049 output_comp_header_underlines();
2050 }
2051
cmp_stats_key(const struct verif_stats * base,const struct verif_stats * comp)2052 static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp)
2053 {
2054 int r;
2055
2056 r = strcmp(base->file_name, comp->file_name);
2057 if (r != 0)
2058 return r;
2059 return strcmp(base->prog_name, comp->prog_name);
2060 }
2061
is_join_stat_filter_matched(struct filter * f,const struct verif_stats_join * stats)2062 static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats)
2063 {
2064 static const double eps = 1e-9;
2065 const char *str = NULL;
2066 double value = 0.0;
2067
2068 fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
2069
2070 if (f->abs)
2071 value = fabs(value);
2072
2073 switch (f->op) {
2074 case OP_EQ: return value > f->value - eps && value < f->value + eps;
2075 case OP_NEQ: return value < f->value - eps || value > f->value + eps;
2076 case OP_LT: return value < f->value - eps;
2077 case OP_LE: return value <= f->value + eps;
2078 case OP_GT: return value > f->value + eps;
2079 case OP_GE: return value >= f->value - eps;
2080 }
2081
2082 fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
2083 return false;
2084 }
2085
should_output_join_stats(const struct verif_stats_join * stats)2086 static bool should_output_join_stats(const struct verif_stats_join *stats)
2087 {
2088 struct filter *f;
2089 int i, allow_cnt = 0;
2090
2091 for (i = 0; i < env.deny_filter_cnt; i++) {
2092 f = &env.deny_filters[i];
2093 if (f->kind != FILTER_STAT)
2094 continue;
2095
2096 if (is_join_stat_filter_matched(f, stats))
2097 return false;
2098 }
2099
2100 for (i = 0; i < env.allow_filter_cnt; i++) {
2101 f = &env.allow_filters[i];
2102 if (f->kind != FILTER_STAT)
2103 continue;
2104 allow_cnt++;
2105
2106 if (is_join_stat_filter_matched(f, stats))
2107 return true;
2108 }
2109
2110 /* if there are no stat allowed filters, pass everything through */
2111 return allow_cnt == 0;
2112 }
2113
handle_comparison_mode(void)2114 static int handle_comparison_mode(void)
2115 {
2116 struct stat_specs base_specs = {}, comp_specs = {};
2117 struct stat_specs tmp_sort_spec;
2118 enum resfmt cur_fmt;
2119 int err, i, j, last_idx, cnt;
2120
2121 if (env.filename_cnt != 2) {
2122 fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
2123 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2124 return -EINVAL;
2125 }
2126
2127 err = parse_stats_csv(env.filenames[0], &base_specs,
2128 &env.baseline_stats, &env.baseline_stat_cnt);
2129 if (err) {
2130 fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
2131 return err;
2132 }
2133 err = parse_stats_csv(env.filenames[1], &comp_specs,
2134 &env.prog_stats, &env.prog_stat_cnt);
2135 if (err) {
2136 fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err);
2137 return err;
2138 }
2139
2140 /* To keep it simple we validate that the set and order of stats in
2141 * both CSVs are exactly the same. This can be lifted with a bit more
2142 * pre-processing later.
2143 */
2144 if (base_specs.spec_cnt != comp_specs.spec_cnt) {
2145 fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n",
2146 env.filenames[0], env.filenames[1],
2147 base_specs.spec_cnt, comp_specs.spec_cnt);
2148 return -EINVAL;
2149 }
2150 for (i = 0; i < base_specs.spec_cnt; i++) {
2151 if (base_specs.ids[i] != comp_specs.ids[i]) {
2152 fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n",
2153 env.filenames[0], env.filenames[1],
2154 stat_defs[base_specs.ids[i]].names[0],
2155 stat_defs[comp_specs.ids[i]].names[0]);
2156 return -EINVAL;
2157 }
2158 }
2159
2160 /* Replace user-specified sorting spec with file+prog sorting rule to
2161 * be able to join two datasets correctly. Once we are done, we will
2162 * restore the original sort spec.
2163 */
2164 tmp_sort_spec = env.sort_spec;
2165 env.sort_spec = join_sort_spec;
2166 qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2167 qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
2168 env.sort_spec = tmp_sort_spec;
2169
2170 /* Join two datasets together. If baseline and comparison datasets
2171 * have different subset of rows (we match by 'object + prog' as
2172 * a unique key) then assume empty/missing/zero value for rows that
2173 * are missing in the opposite data set.
2174 */
2175 i = j = 0;
2176 while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
2177 const struct verif_stats *base, *comp;
2178 struct verif_stats_join *join;
2179 void *tmp;
2180 int r;
2181
2182 base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
2183 comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats;
2184
2185 if (!base->file_name || !base->prog_name) {
2186 fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
2187 i, env.filenames[0]);
2188 return -EINVAL;
2189 }
2190 if (!comp->file_name || !comp->prog_name) {
2191 fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
2192 j, env.filenames[1]);
2193 return -EINVAL;
2194 }
2195
2196 tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats));
2197 if (!tmp)
2198 return -ENOMEM;
2199 env.join_stats = tmp;
2200
2201 join = &env.join_stats[env.join_stat_cnt];
2202 memset(join, 0, sizeof(*join));
2203
2204 r = cmp_stats_key(base, comp);
2205 if (r == 0) {
2206 join->file_name = base->file_name;
2207 join->prog_name = base->prog_name;
2208 join->stats_a = base;
2209 join->stats_b = comp;
2210 i++;
2211 j++;
2212 } else if (base != &fallback_stats && (comp == &fallback_stats || r < 0)) {
2213 join->file_name = base->file_name;
2214 join->prog_name = base->prog_name;
2215 join->stats_a = base;
2216 join->stats_b = NULL;
2217 i++;
2218 } else if (comp != &fallback_stats && (base == &fallback_stats || r > 0)) {
2219 join->file_name = comp->file_name;
2220 join->prog_name = comp->prog_name;
2221 join->stats_a = NULL;
2222 join->stats_b = comp;
2223 j++;
2224 } else {
2225 fprintf(stderr, "%s:%d: should never reach here i=%i, j=%i",
2226 __FILE__, __LINE__, i, j);
2227 return -EINVAL;
2228 }
2229 env.join_stat_cnt += 1;
2230 }
2231
2232 /* now sort joined results according to sort spec */
2233 qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
2234
2235 /* for human-readable table output we need to do extra pass to
2236 * calculate column widths, so we substitute current output format
2237 * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
2238 * and do everything again.
2239 */
2240 if (env.out_fmt == RESFMT_TABLE)
2241 cur_fmt = RESFMT_TABLE_CALCLEN;
2242 else
2243 cur_fmt = env.out_fmt;
2244
2245 one_more_time:
2246 output_comp_headers(cur_fmt);
2247
2248 last_idx = -1;
2249 cnt = 0;
2250 for (i = 0; i < env.join_stat_cnt; i++) {
2251 const struct verif_stats_join *join = &env.join_stats[i];
2252
2253 if (!should_output_join_stats(join))
2254 continue;
2255
2256 if (env.top_n && cnt >= env.top_n)
2257 break;
2258
2259 if (cur_fmt == RESFMT_TABLE_CALCLEN)
2260 last_idx = i;
2261
2262 output_comp_stats(join, cur_fmt, i == last_idx);
2263
2264 cnt++;
2265 }
2266
2267 if (cur_fmt == RESFMT_TABLE_CALCLEN) {
2268 cur_fmt = RESFMT_TABLE;
2269 goto one_more_time; /* ... this time with feeling */
2270 }
2271
2272 return 0;
2273 }
2274
is_stat_filter_matched(struct filter * f,const struct verif_stats * stats)2275 static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats)
2276 {
2277 long value = stats->stats[f->stat_id];
2278
2279 if (f->abs)
2280 value = value < 0 ? -value : value;
2281
2282 switch (f->op) {
2283 case OP_EQ: return value == f->value;
2284 case OP_NEQ: return value != f->value;
2285 case OP_LT: return value < f->value;
2286 case OP_LE: return value <= f->value;
2287 case OP_GT: return value > f->value;
2288 case OP_GE: return value >= f->value;
2289 }
2290
2291 fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
2292 return false;
2293 }
2294
should_output_stats(const struct verif_stats * stats)2295 static bool should_output_stats(const struct verif_stats *stats)
2296 {
2297 struct filter *f;
2298 int i, allow_cnt = 0;
2299
2300 for (i = 0; i < env.deny_filter_cnt; i++) {
2301 f = &env.deny_filters[i];
2302 if (f->kind != FILTER_STAT)
2303 continue;
2304
2305 if (is_stat_filter_matched(f, stats))
2306 return false;
2307 }
2308
2309 for (i = 0; i < env.allow_filter_cnt; i++) {
2310 f = &env.allow_filters[i];
2311 if (f->kind != FILTER_STAT)
2312 continue;
2313 allow_cnt++;
2314
2315 if (is_stat_filter_matched(f, stats))
2316 return true;
2317 }
2318
2319 /* if there are no stat allowed filters, pass everything through */
2320 return allow_cnt == 0;
2321 }
2322
output_prog_stats(void)2323 static void output_prog_stats(void)
2324 {
2325 const struct verif_stats *stats;
2326 int i, last_stat_idx = 0, cnt = 0;
2327
2328 if (env.out_fmt == RESFMT_TABLE) {
2329 /* calculate column widths */
2330 output_headers(RESFMT_TABLE_CALCLEN);
2331 for (i = 0; i < env.prog_stat_cnt; i++) {
2332 stats = &env.prog_stats[i];
2333 if (!should_output_stats(stats))
2334 continue;
2335 output_stats(stats, RESFMT_TABLE_CALCLEN, false);
2336 last_stat_idx = i;
2337 }
2338 }
2339
2340 /* actually output the table */
2341 output_headers(env.out_fmt);
2342 for (i = 0; i < env.prog_stat_cnt; i++) {
2343 stats = &env.prog_stats[i];
2344 if (!should_output_stats(stats))
2345 continue;
2346 if (env.top_n && cnt >= env.top_n)
2347 break;
2348 output_stats(stats, env.out_fmt, i == last_stat_idx);
2349 cnt++;
2350 }
2351 }
2352
handle_verif_mode(void)2353 static int handle_verif_mode(void)
2354 {
2355 int i, err;
2356
2357 if (env.filename_cnt == 0) {
2358 fprintf(stderr, "Please provide path to BPF object file!\n\n");
2359 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2360 return -EINVAL;
2361 }
2362
2363 for (i = 0; i < env.filename_cnt; i++) {
2364 err = process_obj(env.filenames[i]);
2365 if (err) {
2366 fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
2367 return err;
2368 }
2369 }
2370
2371 qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2372
2373 output_prog_stats();
2374
2375 return 0;
2376 }
2377
handle_replay_mode(void)2378 static int handle_replay_mode(void)
2379 {
2380 struct stat_specs specs = {};
2381 int err;
2382
2383 if (env.filename_cnt != 1) {
2384 fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n");
2385 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2386 return -EINVAL;
2387 }
2388
2389 err = parse_stats_csv(env.filenames[0], &specs,
2390 &env.prog_stats, &env.prog_stat_cnt);
2391 if (err) {
2392 fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
2393 return err;
2394 }
2395
2396 qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2397
2398 output_prog_stats();
2399
2400 return 0;
2401 }
2402
main(int argc,char ** argv)2403 int main(int argc, char **argv)
2404 {
2405 int err = 0, i;
2406
2407 if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
2408 return 1;
2409
2410 if (env.show_version) {
2411 printf("%s\n", argp_program_version);
2412 return 0;
2413 }
2414
2415 if (env.verbose && env.quiet) {
2416 fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
2417 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2418 return 1;
2419 }
2420 if (env.verbose && env.log_level == 0)
2421 env.log_level = 1;
2422
2423 if (env.output_spec.spec_cnt == 0) {
2424 if (env.out_fmt == RESFMT_CSV)
2425 env.output_spec = default_csv_output_spec;
2426 else
2427 env.output_spec = default_output_spec;
2428 }
2429 if (env.sort_spec.spec_cnt == 0)
2430 env.sort_spec = default_sort_spec;
2431
2432 if (env.comparison_mode && env.replay_mode) {
2433 fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n");
2434 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2435 return 1;
2436 }
2437
2438 if (env.comparison_mode)
2439 err = handle_comparison_mode();
2440 else if (env.replay_mode)
2441 err = handle_replay_mode();
2442 else
2443 err = handle_verif_mode();
2444
2445 free_verif_stats(env.prog_stats, env.prog_stat_cnt);
2446 free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
2447 free(env.join_stats);
2448 for (i = 0; i < env.filename_cnt; i++)
2449 free(env.filenames[i]);
2450 free(env.filenames);
2451 for (i = 0; i < env.allow_filter_cnt; i++) {
2452 free(env.allow_filters[i].any_glob);
2453 free(env.allow_filters[i].file_glob);
2454 free(env.allow_filters[i].prog_glob);
2455 }
2456 free(env.allow_filters);
2457 for (i = 0; i < env.deny_filter_cnt; i++) {
2458 free(env.deny_filters[i].any_glob);
2459 free(env.deny_filters[i].file_glob);
2460 free(env.deny_filters[i].prog_glob);
2461 }
2462 free(env.deny_filters);
2463 return -err;
2464 }
2465