1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
3 #define _GNU_SOURCE
4 #include <argp.h>
5 #include <libgen.h>
6 #include <string.h>
7 #include <stdlib.h>
8 #include <sched.h>
9 #include <pthread.h>
10 #include <dirent.h>
11 #include <signal.h>
12 #include <fcntl.h>
13 #include <unistd.h>
14 #include <sys/time.h>
15 #include <sys/sysinfo.h>
16 #include <sys/stat.h>
17 #include <bpf/libbpf.h>
18 #include <bpf/btf.h>
19 #include <bpf/bpf.h>
20 #include <libelf.h>
21 #include <gelf.h>
22 #include <float.h>
23 #include <math.h>
24
25 #ifndef ARRAY_SIZE
26 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
27 #endif
28
29 enum stat_id {
30 VERDICT,
31 DURATION,
32 TOTAL_INSNS,
33 TOTAL_STATES,
34 PEAK_STATES,
35 MAX_STATES_PER_INSN,
36 MARK_READ_MAX_LEN,
37
38 FILE_NAME,
39 PROG_NAME,
40
41 ALL_STATS_CNT,
42 NUM_STATS_CNT = FILE_NAME - VERDICT,
43 };
44
45 /* In comparison mode each stat can specify up to four different values:
46 * - A side value;
47 * - B side value;
48 * - absolute diff value;
49 * - relative (percentage) diff value.
50 *
51 * When specifying stat specs in comparison mode, user can use one of the
52 * following variant suffixes to specify which exact variant should be used for
53 * ordering or filtering:
54 * - `_a` for A side value;
55 * - `_b` for B side value;
56 * - `_diff` for absolute diff value;
57 * - `_pct` for relative (percentage) diff value.
58 *
59 * If no variant suffix is provided, then `_b` (control data) is assumed.
60 *
61 * As an example, let's say instructions stat has the following output:
62 *
63 * Insns (A) Insns (B) Insns (DIFF)
64 * --------- --------- --------------
65 * 21547 20920 -627 (-2.91%)
66 *
67 * Then:
68 * - 21547 is A side value (insns_a);
69 * - 20920 is B side value (insns_b);
70 * - -627 is absolute diff value (insns_diff);
71 * - -2.91% is relative diff value (insns_pct).
72 *
73 * For verdict there is no verdict_pct variant.
74 * For file and program name, _a and _b variants are equivalent and there are
75 * no _diff or _pct variants.
76 */
77 enum stat_variant {
78 VARIANT_A,
79 VARIANT_B,
80 VARIANT_DIFF,
81 VARIANT_PCT,
82 };
83
84 struct verif_stats {
85 char *file_name;
86 char *prog_name;
87
88 long stats[NUM_STATS_CNT];
89 };
90
91 /* joined comparison mode stats */
92 struct verif_stats_join {
93 char *file_name;
94 char *prog_name;
95
96 const struct verif_stats *stats_a;
97 const struct verif_stats *stats_b;
98 };
99
100 struct stat_specs {
101 int spec_cnt;
102 enum stat_id ids[ALL_STATS_CNT];
103 enum stat_variant variants[ALL_STATS_CNT];
104 bool asc[ALL_STATS_CNT];
105 bool abs[ALL_STATS_CNT];
106 int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
107 };
108
109 enum resfmt {
110 RESFMT_TABLE,
111 RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */
112 RESFMT_CSV,
113 };
114
115 enum filter_kind {
116 FILTER_NAME,
117 FILTER_STAT,
118 };
119
120 enum operator_kind {
121 OP_EQ, /* == or = */
122 OP_NEQ, /* != or <> */
123 OP_LT, /* < */
124 OP_LE, /* <= */
125 OP_GT, /* > */
126 OP_GE, /* >= */
127 };
128
129 struct filter {
130 enum filter_kind kind;
131 /* FILTER_NAME */
132 char *any_glob;
133 char *file_glob;
134 char *prog_glob;
135 /* FILTER_STAT */
136 enum operator_kind op;
137 int stat_id;
138 enum stat_variant stat_var;
139 long value;
140 bool abs;
141 };
142
143 static struct env {
144 char **filenames;
145 int filename_cnt;
146 bool verbose;
147 bool debug;
148 bool quiet;
149 bool force_checkpoints;
150 bool force_reg_invariants;
151 enum resfmt out_fmt;
152 bool show_version;
153 bool comparison_mode;
154 bool replay_mode;
155 int top_n;
156
157 int log_level;
158 int log_size;
159 bool log_fixed;
160
161 struct verif_stats *prog_stats;
162 int prog_stat_cnt;
163
164 /* baseline_stats is allocated and used only in comparison mode */
165 struct verif_stats *baseline_stats;
166 int baseline_stat_cnt;
167
168 struct verif_stats_join *join_stats;
169 int join_stat_cnt;
170
171 struct stat_specs output_spec;
172 struct stat_specs sort_spec;
173
174 struct filter *allow_filters;
175 struct filter *deny_filters;
176 int allow_filter_cnt;
177 int deny_filter_cnt;
178
179 int files_processed;
180 int files_skipped;
181 int progs_processed;
182 int progs_skipped;
183 int top_src_lines;
184 } env;
185
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)186 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
187 {
188 if (!env.verbose)
189 return 0;
190 if (level == LIBBPF_DEBUG && !env.debug)
191 return 0;
192 return vfprintf(stderr, format, args);
193 }
194
195 #ifndef VERISTAT_VERSION
196 #define VERISTAT_VERSION "<kernel>"
197 #endif
198
199 const char *argp_program_version = "veristat v" VERISTAT_VERSION;
200 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
201 const char argp_program_doc[] =
202 "veristat BPF verifier stats collection and comparison tool.\n"
203 "\n"
204 "USAGE: veristat <obj-file> [<obj-file>...]\n"
205 " OR: veristat -C <baseline.csv> <comparison.csv>\n"
206 " OR: veristat -R <results.csv>\n";
207
208 enum {
209 OPT_LOG_FIXED = 1000,
210 OPT_LOG_SIZE = 1001,
211 };
212
213 static const struct argp_option opts[] = {
214 { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
215 { "version", 'V', NULL, 0, "Print version" },
216 { "verbose", 'v', NULL, 0, "Verbose mode" },
217 { "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" },
218 { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
219 { "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
220 { "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
221 { "top-n", 'n', "N", 0, "Emit only up to first N results." },
222 { "quiet", 'q', NULL, 0, "Quiet mode" },
223 { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
224 { "sort", 's', "SPEC", 0, "Specify sort order" },
225 { "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
226 { "compare", 'C', NULL, 0, "Comparison mode" },
227 { "replay", 'R', NULL, 0, "Replay mode" },
228 { "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
229 { "test-states", 't', NULL, 0,
230 "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
231 { "test-reg-invariants", 'r', NULL, 0,
232 "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
233 { "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" },
234 {},
235 };
236
237 static int parse_stats(const char *stats_str, struct stat_specs *specs);
238 static int append_filter(struct filter **filters, int *cnt, const char *str);
239 static int append_filter_file(const char *path);
240
parse_arg(int key,char * arg,struct argp_state * state)241 static error_t parse_arg(int key, char *arg, struct argp_state *state)
242 {
243 void *tmp;
244 int err;
245
246 switch (key) {
247 case 'h':
248 argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
249 break;
250 case 'V':
251 env.show_version = true;
252 break;
253 case 'v':
254 env.verbose = true;
255 break;
256 case 'd':
257 env.debug = true;
258 env.verbose = true;
259 break;
260 case 'q':
261 env.quiet = true;
262 break;
263 case 'e':
264 err = parse_stats(arg, &env.output_spec);
265 if (err)
266 return err;
267 break;
268 case 's':
269 err = parse_stats(arg, &env.sort_spec);
270 if (err)
271 return err;
272 break;
273 case 'o':
274 if (strcmp(arg, "table") == 0) {
275 env.out_fmt = RESFMT_TABLE;
276 } else if (strcmp(arg, "csv") == 0) {
277 env.out_fmt = RESFMT_CSV;
278 } else {
279 fprintf(stderr, "Unrecognized output format '%s'\n", arg);
280 return -EINVAL;
281 }
282 break;
283 case 'l':
284 errno = 0;
285 env.log_level = strtol(arg, NULL, 10);
286 if (errno) {
287 fprintf(stderr, "invalid log level: %s\n", arg);
288 argp_usage(state);
289 }
290 break;
291 case OPT_LOG_FIXED:
292 env.log_fixed = true;
293 break;
294 case OPT_LOG_SIZE:
295 errno = 0;
296 env.log_size = strtol(arg, NULL, 10);
297 if (errno) {
298 fprintf(stderr, "invalid log size: %s\n", arg);
299 argp_usage(state);
300 }
301 break;
302 case 't':
303 env.force_checkpoints = true;
304 break;
305 case 'r':
306 env.force_reg_invariants = true;
307 break;
308 case 'n':
309 errno = 0;
310 env.top_n = strtol(arg, NULL, 10);
311 if (errno) {
312 fprintf(stderr, "invalid top N specifier: %s\n", arg);
313 argp_usage(state);
314 }
315 case 'C':
316 env.comparison_mode = true;
317 break;
318 case 'R':
319 env.replay_mode = true;
320 break;
321 case 'f':
322 if (arg[0] == '@')
323 err = append_filter_file(arg + 1);
324 else if (arg[0] == '!')
325 err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1);
326 else
327 err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg);
328 if (err) {
329 fprintf(stderr, "Failed to collect program filter expressions: %d\n", err);
330 return err;
331 }
332 break;
333 case 'S':
334 errno = 0;
335 env.top_src_lines = strtol(arg, NULL, 10);
336 if (errno) {
337 fprintf(stderr, "invalid top lines N specifier: %s\n", arg);
338 argp_usage(state);
339 }
340 break;
341 case ARGP_KEY_ARG:
342 tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
343 if (!tmp)
344 return -ENOMEM;
345 env.filenames = tmp;
346 env.filenames[env.filename_cnt] = strdup(arg);
347 if (!env.filenames[env.filename_cnt])
348 return -ENOMEM;
349 env.filename_cnt++;
350 break;
351 default:
352 return ARGP_ERR_UNKNOWN;
353 }
354 return 0;
355 }
356
357 static const struct argp argp = {
358 .options = opts,
359 .parser = parse_arg,
360 .doc = argp_program_doc,
361 };
362
363
364 /* Adapted from perf/util/string.c */
glob_matches(const char * str,const char * pat)365 static bool glob_matches(const char *str, const char *pat)
366 {
367 while (*str && *pat && *pat != '*') {
368 if (*str != *pat)
369 return false;
370 str++;
371 pat++;
372 }
373 /* Check wild card */
374 if (*pat == '*') {
375 while (*pat == '*')
376 pat++;
377 if (!*pat) /* Tail wild card matches all */
378 return true;
379 while (*str)
380 if (glob_matches(str++, pat))
381 return true;
382 }
383 return !*str && !*pat;
384 }
385
is_bpf_obj_file(const char * path)386 static bool is_bpf_obj_file(const char *path) {
387 Elf64_Ehdr *ehdr;
388 int fd, err = -EINVAL;
389 Elf *elf = NULL;
390
391 fd = open(path, O_RDONLY | O_CLOEXEC);
392 if (fd < 0)
393 return true; /* we'll fail later and propagate error */
394
395 /* ensure libelf is initialized */
396 (void)elf_version(EV_CURRENT);
397
398 elf = elf_begin(fd, ELF_C_READ, NULL);
399 if (!elf)
400 goto cleanup;
401
402 if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64)
403 goto cleanup;
404
405 ehdr = elf64_getehdr(elf);
406 /* Old LLVM set e_machine to EM_NONE */
407 if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF))
408 goto cleanup;
409
410 err = 0;
411 cleanup:
412 if (elf)
413 elf_end(elf);
414 close(fd);
415 return err == 0;
416 }
417
should_process_file_prog(const char * filename,const char * prog_name)418 static bool should_process_file_prog(const char *filename, const char *prog_name)
419 {
420 struct filter *f;
421 int i, allow_cnt = 0;
422
423 for (i = 0; i < env.deny_filter_cnt; i++) {
424 f = &env.deny_filters[i];
425 if (f->kind != FILTER_NAME)
426 continue;
427
428 if (f->any_glob && glob_matches(filename, f->any_glob))
429 return false;
430 if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob))
431 return false;
432 if (f->file_glob && glob_matches(filename, f->file_glob))
433 return false;
434 if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob))
435 return false;
436 }
437
438 for (i = 0; i < env.allow_filter_cnt; i++) {
439 f = &env.allow_filters[i];
440 if (f->kind != FILTER_NAME)
441 continue;
442
443 allow_cnt++;
444 if (f->any_glob) {
445 if (glob_matches(filename, f->any_glob))
446 return true;
447 /* If we don't know program name yet, any_glob filter
448 * has to assume that current BPF object file might be
449 * relevant; we'll check again later on after opening
450 * BPF object file, at which point program name will
451 * be known finally.
452 */
453 if (!prog_name || glob_matches(prog_name, f->any_glob))
454 return true;
455 } else {
456 if (f->file_glob && !glob_matches(filename, f->file_glob))
457 continue;
458 if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob))
459 continue;
460 return true;
461 }
462 }
463
464 /* if there are no file/prog name allow filters, allow all progs,
465 * unless they are denied earlier explicitly
466 */
467 return allow_cnt == 0;
468 }
469
470 static struct {
471 enum operator_kind op_kind;
472 const char *op_str;
473 } operators[] = {
474 /* Order of these definitions matter to avoid situations like '<'
475 * matching part of what is actually a '<>' operator. That is,
476 * substrings should go last.
477 */
478 { OP_EQ, "==" },
479 { OP_NEQ, "!=" },
480 { OP_NEQ, "<>" },
481 { OP_LE, "<=" },
482 { OP_LT, "<" },
483 { OP_GE, ">=" },
484 { OP_GT, ">" },
485 { OP_EQ, "=" },
486 };
487
488 static bool parse_stat_id_var(const char *name, size_t len, int *id,
489 enum stat_variant *var, bool *is_abs);
490
append_filter(struct filter ** filters,int * cnt,const char * str)491 static int append_filter(struct filter **filters, int *cnt, const char *str)
492 {
493 struct filter *f;
494 void *tmp;
495 const char *p;
496 int i;
497
498 tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
499 if (!tmp)
500 return -ENOMEM;
501 *filters = tmp;
502
503 f = &(*filters)[*cnt];
504 memset(f, 0, sizeof(*f));
505
506 /* First, let's check if it's a stats filter of the following form:
507 * <stat><op><value, where:
508 * - <stat> is one of supported numerical stats (verdict is also
509 * considered numerical, failure == 0, success == 1);
510 * - <op> is comparison operator (see `operators` definitions);
511 * - <value> is an integer (or failure/success, or false/true as
512 * special aliases for 0 and 1, respectively).
513 * If the form doesn't match what user provided, we assume file/prog
514 * glob filter.
515 */
516 for (i = 0; i < ARRAY_SIZE(operators); i++) {
517 enum stat_variant var;
518 int id;
519 long val;
520 const char *end = str;
521 const char *op_str;
522 bool is_abs;
523
524 op_str = operators[i].op_str;
525 p = strstr(str, op_str);
526 if (!p)
527 continue;
528
529 if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) {
530 fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
531 return -EINVAL;
532 }
533 if (id >= FILE_NAME) {
534 fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str);
535 return -EINVAL;
536 }
537
538 p += strlen(op_str);
539
540 if (strcasecmp(p, "true") == 0 ||
541 strcasecmp(p, "t") == 0 ||
542 strcasecmp(p, "success") == 0 ||
543 strcasecmp(p, "succ") == 0 ||
544 strcasecmp(p, "s") == 0 ||
545 strcasecmp(p, "match") == 0 ||
546 strcasecmp(p, "m") == 0) {
547 val = 1;
548 } else if (strcasecmp(p, "false") == 0 ||
549 strcasecmp(p, "f") == 0 ||
550 strcasecmp(p, "failure") == 0 ||
551 strcasecmp(p, "fail") == 0 ||
552 strcasecmp(p, "mismatch") == 0 ||
553 strcasecmp(p, "mis") == 0) {
554 val = 0;
555 } else {
556 errno = 0;
557 val = strtol(p, (char **)&end, 10);
558 if (errno || end == p || *end != '\0' ) {
559 fprintf(stderr, "Invalid integer value in '%s'!\n", str);
560 return -EINVAL;
561 }
562 }
563
564 f->kind = FILTER_STAT;
565 f->stat_id = id;
566 f->stat_var = var;
567 f->op = operators[i].op_kind;
568 f->abs = true;
569 f->value = val;
570
571 *cnt += 1;
572 return 0;
573 }
574
575 /* File/prog filter can be specified either as '<glob>' or
576 * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
577 * both file and program names. This seems to be way more useful in
578 * practice. If user needs full control, they can use '/<prog-glob>'
579 * form to glob just program name, or '<file-glob>/' to glob only file
580 * name. But usually common <glob> seems to be the most useful and
581 * ergonomic way.
582 */
583 f->kind = FILTER_NAME;
584 p = strchr(str, '/');
585 if (!p) {
586 f->any_glob = strdup(str);
587 if (!f->any_glob)
588 return -ENOMEM;
589 } else {
590 if (str != p) {
591 /* non-empty file glob */
592 f->file_glob = strndup(str, p - str);
593 if (!f->file_glob)
594 return -ENOMEM;
595 }
596 if (strlen(p + 1) > 0) {
597 /* non-empty prog glob */
598 f->prog_glob = strdup(p + 1);
599 if (!f->prog_glob) {
600 free(f->file_glob);
601 f->file_glob = NULL;
602 return -ENOMEM;
603 }
604 }
605 }
606
607 *cnt += 1;
608 return 0;
609 }
610
append_filter_file(const char * path)611 static int append_filter_file(const char *path)
612 {
613 char buf[1024];
614 FILE *f;
615 int err = 0;
616
617 f = fopen(path, "r");
618 if (!f) {
619 err = -errno;
620 fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
621 return err;
622 }
623
624 while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
625 /* lines starting with # are comments, skip them */
626 if (buf[0] == '\0' || buf[0] == '#')
627 continue;
628 /* lines starting with ! are negative match filters */
629 if (buf[0] == '!')
630 err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1);
631 else
632 err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf);
633 if (err)
634 goto cleanup;
635 }
636
637 cleanup:
638 fclose(f);
639 return err;
640 }
641
642 static const struct stat_specs default_output_spec = {
643 .spec_cnt = 7,
644 .ids = {
645 FILE_NAME, PROG_NAME, VERDICT, DURATION,
646 TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
647 },
648 };
649
650 static const struct stat_specs default_csv_output_spec = {
651 .spec_cnt = 9,
652 .ids = {
653 FILE_NAME, PROG_NAME, VERDICT, DURATION,
654 TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
655 MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
656 },
657 };
658
659 static const struct stat_specs default_sort_spec = {
660 .spec_cnt = 2,
661 .ids = {
662 FILE_NAME, PROG_NAME,
663 },
664 .asc = { true, true, },
665 };
666
667 /* sorting for comparison mode to join two data sets */
668 static const struct stat_specs join_sort_spec = {
669 .spec_cnt = 2,
670 .ids = {
671 FILE_NAME, PROG_NAME,
672 },
673 .asc = { true, true, },
674 };
675
676 static struct stat_def {
677 const char *header;
678 const char *names[4];
679 bool asc_by_default;
680 bool left_aligned;
681 } stat_defs[] = {
682 [FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ },
683 [PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ },
684 [VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ },
685 [DURATION] = { "Duration (us)", {"duration", "dur"}, },
686 [TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, },
687 [TOTAL_STATES] = { "States", {"total_states", "states"}, },
688 [PEAK_STATES] = { "Peak states", {"peak_states"}, },
689 [MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
690 [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
691 };
692
parse_stat_id_var(const char * name,size_t len,int * id,enum stat_variant * var,bool * is_abs)693 static bool parse_stat_id_var(const char *name, size_t len, int *id,
694 enum stat_variant *var, bool *is_abs)
695 {
696 static const char *var_sfxs[] = {
697 [VARIANT_A] = "_a",
698 [VARIANT_B] = "_b",
699 [VARIANT_DIFF] = "_diff",
700 [VARIANT_PCT] = "_pct",
701 };
702 int i, j, k;
703
704 /* |<stat>| means we take absolute value of given stat */
705 *is_abs = false;
706 if (len > 2 && name[0] == '|' && name[len - 1] == '|') {
707 *is_abs = true;
708 name += 1;
709 len -= 2;
710 }
711
712 for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
713 struct stat_def *def = &stat_defs[i];
714 size_t alias_len, sfx_len;
715 const char *alias;
716
717 for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
718 alias = def->names[j];
719 if (!alias)
720 continue;
721
722 alias_len = strlen(alias);
723 if (strncmp(name, alias, alias_len) != 0)
724 continue;
725
726 if (alias_len == len) {
727 /* If no variant suffix is specified, we
728 * assume control group (just in case we are
729 * in comparison mode. Variant is ignored in
730 * non-comparison mode.
731 */
732 *var = VARIANT_B;
733 *id = i;
734 return true;
735 }
736
737 for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) {
738 sfx_len = strlen(var_sfxs[k]);
739 if (alias_len + sfx_len != len)
740 continue;
741
742 if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) {
743 *var = (enum stat_variant)k;
744 *id = i;
745 return true;
746 }
747 }
748 }
749 }
750
751 return false;
752 }
753
is_asc_sym(char c)754 static bool is_asc_sym(char c)
755 {
756 return c == '^';
757 }
758
is_desc_sym(char c)759 static bool is_desc_sym(char c)
760 {
761 return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
762 }
763
parse_stat(const char * stat_name,struct stat_specs * specs)764 static int parse_stat(const char *stat_name, struct stat_specs *specs)
765 {
766 int id;
767 bool has_order = false, is_asc = false, is_abs = false;
768 size_t len = strlen(stat_name);
769 enum stat_variant var;
770
771 if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
772 fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
773 return -E2BIG;
774 }
775
776 if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) {
777 has_order = true;
778 is_asc = is_asc_sym(stat_name[len - 1]);
779 len -= 1;
780 }
781
782 if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) {
783 fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
784 return -ESRCH;
785 }
786
787 specs->ids[specs->spec_cnt] = id;
788 specs->variants[specs->spec_cnt] = var;
789 specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
790 specs->abs[specs->spec_cnt] = is_abs;
791 specs->spec_cnt++;
792
793 return 0;
794 }
795
parse_stats(const char * stats_str,struct stat_specs * specs)796 static int parse_stats(const char *stats_str, struct stat_specs *specs)
797 {
798 char *input, *state = NULL, *next;
799 int err, cnt = 0;
800
801 input = strdup(stats_str);
802 if (!input)
803 return -ENOMEM;
804
805 while ((next = strtok_r(cnt++ ? NULL : input, ",", &state))) {
806 err = parse_stat(next, specs);
807 if (err) {
808 free(input);
809 return err;
810 }
811 }
812
813 free(input);
814 return 0;
815 }
816
free_verif_stats(struct verif_stats * stats,size_t stat_cnt)817 static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
818 {
819 int i;
820
821 if (!stats)
822 return;
823
824 for (i = 0; i < stat_cnt; i++) {
825 free(stats[i].file_name);
826 free(stats[i].prog_name);
827 }
828 free(stats);
829 }
830
831 static char verif_log_buf[64 * 1024];
832
833 #define MAX_PARSED_LOG_LINES 100
834
parse_verif_log(char * const buf,size_t buf_sz,struct verif_stats * s)835 static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
836 {
837 const char *cur;
838 int pos, lines;
839
840 buf[buf_sz - 1] = '\0';
841
842 for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) {
843 /* find previous endline or otherwise take the start of log buf */
844 for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) {
845 }
846 /* next time start from end of previous line (or pos goes to <0) */
847 pos--;
848 /* if we found endline, point right after endline symbol;
849 * otherwise, stay at the beginning of log buf
850 */
851 if (cur[0] == '\n')
852 cur++;
853
854 if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
855 continue;
856 if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
857 &s->stats[TOTAL_INSNS],
858 &s->stats[MAX_STATES_PER_INSN],
859 &s->stats[TOTAL_STATES],
860 &s->stats[PEAK_STATES],
861 &s->stats[MARK_READ_MAX_LEN]))
862 continue;
863 }
864
865 return 0;
866 }
867
868 struct line_cnt {
869 char *line;
870 int cnt;
871 };
872
str_cmp(const void * a,const void * b)873 static int str_cmp(const void *a, const void *b)
874 {
875 const char **str1 = (const char **)a;
876 const char **str2 = (const char **)b;
877
878 return strcmp(*str1, *str2);
879 }
880
line_cnt_cmp(const void * a,const void * b)881 static int line_cnt_cmp(const void *a, const void *b)
882 {
883 const struct line_cnt *a_cnt = (const struct line_cnt *)a;
884 const struct line_cnt *b_cnt = (const struct line_cnt *)b;
885
886 if (a_cnt->cnt != b_cnt->cnt)
887 return a_cnt->cnt < b_cnt->cnt ? -1 : 1;
888 return strcmp(a_cnt->line, b_cnt->line);
889 }
890
print_top_src_lines(char * const buf,size_t buf_sz,const char * prog_name)891 static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name)
892 {
893 int lines_cap = 0;
894 int lines_size = 0;
895 char **lines = NULL;
896 char *line = NULL;
897 char *state;
898 struct line_cnt *freq = NULL;
899 struct line_cnt *cur;
900 int unique_lines;
901 int err = 0;
902 int i;
903
904 while ((line = strtok_r(line ? NULL : buf, "\n", &state))) {
905 if (strncmp(line, "; ", 2) != 0)
906 continue;
907 line += 2;
908
909 if (lines_size == lines_cap) {
910 char **tmp;
911
912 lines_cap = max(16, lines_cap * 2);
913 tmp = realloc(lines, lines_cap * sizeof(*tmp));
914 if (!tmp) {
915 err = -ENOMEM;
916 goto cleanup;
917 }
918 lines = tmp;
919 }
920 lines[lines_size] = line;
921 lines_size++;
922 }
923
924 if (lines_size == 0)
925 goto cleanup;
926
927 qsort(lines, lines_size, sizeof(*lines), str_cmp);
928
929 freq = calloc(lines_size, sizeof(*freq));
930 if (!freq) {
931 err = -ENOMEM;
932 goto cleanup;
933 }
934
935 cur = freq;
936 cur->line = lines[0];
937 cur->cnt = 1;
938 for (i = 1; i < lines_size; ++i) {
939 if (strcmp(lines[i], cur->line) != 0) {
940 cur++;
941 cur->line = lines[i];
942 cur->cnt = 0;
943 }
944 cur->cnt++;
945 }
946 unique_lines = cur - freq + 1;
947
948 qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp);
949
950 printf("Top source lines (%s):\n", prog_name);
951 for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
952 const char *src_code = freq[i].line;
953 const char *src_line = NULL;
954 char *split = strrchr(freq[i].line, '@');
955
956 if (split) {
957 src_line = split + 1;
958
959 while (*src_line && isspace(*src_line))
960 src_line++;
961
962 while (split > src_code && isspace(*split))
963 split--;
964 *split = '\0';
965 }
966
967 if (src_line)
968 printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line, src_code);
969 else
970 printf("%5d: %s\n", freq[i].cnt, src_code);
971 }
972 printf("\n");
973
974 cleanup:
975 free(freq);
976 free(lines);
977 return err;
978 }
979
guess_prog_type_by_ctx_name(const char * ctx_name,enum bpf_prog_type * prog_type,enum bpf_attach_type * attach_type)980 static int guess_prog_type_by_ctx_name(const char *ctx_name,
981 enum bpf_prog_type *prog_type,
982 enum bpf_attach_type *attach_type)
983 {
984 /* We need to guess program type based on its declared context type.
985 * This guess can't be perfect as many different program types might
986 * share the same context type. So we can only hope to reasonably
987 * well guess this and get lucky.
988 *
989 * Just in case, we support both UAPI-side type names and
990 * kernel-internal names.
991 */
992 static struct {
993 const char *uapi_name;
994 const char *kern_name;
995 enum bpf_prog_type prog_type;
996 enum bpf_attach_type attach_type;
997 } ctx_map[] = {
998 /* __sk_buff is most ambiguous, we assume TC program */
999 { "__sk_buff", "sk_buff", BPF_PROG_TYPE_SCHED_CLS },
1000 { "bpf_sock", "sock", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND },
1001 { "bpf_sock_addr", "bpf_sock_addr_kern", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND },
1002 { "bpf_sock_ops", "bpf_sock_ops_kern", BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS },
1003 { "sk_msg_md", "sk_msg", BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT },
1004 { "bpf_cgroup_dev_ctx", "bpf_cgroup_dev_ctx", BPF_PROG_TYPE_CGROUP_DEVICE, BPF_CGROUP_DEVICE },
1005 { "bpf_sysctl", "bpf_sysctl_kern", BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL },
1006 { "bpf_sockopt", "bpf_sockopt_kern", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT },
1007 { "sk_reuseport_md", "sk_reuseport_kern", BPF_PROG_TYPE_SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE },
1008 { "bpf_sk_lookup", "bpf_sk_lookup_kern", BPF_PROG_TYPE_SK_LOOKUP, BPF_SK_LOOKUP },
1009 { "xdp_md", "xdp_buff", BPF_PROG_TYPE_XDP, BPF_XDP },
1010 /* tracing types with no expected attach type */
1011 { "bpf_user_pt_regs_t", "pt_regs", BPF_PROG_TYPE_KPROBE },
1012 { "bpf_perf_event_data", "bpf_perf_event_data_kern", BPF_PROG_TYPE_PERF_EVENT },
1013 /* raw_tp programs use u64[] from kernel side, we don't want
1014 * to match on that, probably; so NULL for kern-side type
1015 */
1016 { "bpf_raw_tracepoint_args", NULL, BPF_PROG_TYPE_RAW_TRACEPOINT },
1017 };
1018 int i;
1019
1020 if (!ctx_name)
1021 return -EINVAL;
1022
1023 for (i = 0; i < ARRAY_SIZE(ctx_map); i++) {
1024 if (strcmp(ctx_map[i].uapi_name, ctx_name) == 0 ||
1025 (ctx_map[i].kern_name && strcmp(ctx_map[i].kern_name, ctx_name) == 0)) {
1026 *prog_type = ctx_map[i].prog_type;
1027 *attach_type = ctx_map[i].attach_type;
1028 return 0;
1029 }
1030 }
1031
1032 return -ESRCH;
1033 }
1034
fixup_obj(struct bpf_object * obj,struct bpf_program * prog,const char * filename)1035 static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename)
1036 {
1037 struct bpf_map *map;
1038
1039 bpf_object__for_each_map(map, obj) {
1040 /* disable pinning */
1041 bpf_map__set_pin_path(map, NULL);
1042
1043 /* fix up map size, if necessary */
1044 switch (bpf_map__type(map)) {
1045 case BPF_MAP_TYPE_SK_STORAGE:
1046 case BPF_MAP_TYPE_TASK_STORAGE:
1047 case BPF_MAP_TYPE_INODE_STORAGE:
1048 case BPF_MAP_TYPE_CGROUP_STORAGE:
1049 break;
1050 default:
1051 if (bpf_map__max_entries(map) == 0)
1052 bpf_map__set_max_entries(map, 1);
1053 }
1054 }
1055
1056 /* SEC(freplace) programs can't be loaded with veristat as is,
1057 * but we can try guessing their target program's expected type by
1058 * looking at the type of program's first argument and substituting
1059 * corresponding program type
1060 */
1061 if (bpf_program__type(prog) == BPF_PROG_TYPE_EXT) {
1062 const struct btf *btf = bpf_object__btf(obj);
1063 const char *prog_name = bpf_program__name(prog);
1064 enum bpf_prog_type prog_type;
1065 enum bpf_attach_type attach_type;
1066 const struct btf_type *t;
1067 const char *ctx_name;
1068 int id;
1069
1070 if (!btf)
1071 goto skip_freplace_fixup;
1072
1073 id = btf__find_by_name_kind(btf, prog_name, BTF_KIND_FUNC);
1074 t = btf__type_by_id(btf, id);
1075 t = btf__type_by_id(btf, t->type);
1076 if (!btf_is_func_proto(t) || btf_vlen(t) != 1)
1077 goto skip_freplace_fixup;
1078
1079 /* context argument is a pointer to a struct/typedef */
1080 t = btf__type_by_id(btf, btf_params(t)[0].type);
1081 while (t && btf_is_mod(t))
1082 t = btf__type_by_id(btf, t->type);
1083 if (!t || !btf_is_ptr(t))
1084 goto skip_freplace_fixup;
1085 t = btf__type_by_id(btf, t->type);
1086 while (t && btf_is_mod(t))
1087 t = btf__type_by_id(btf, t->type);
1088 if (!t)
1089 goto skip_freplace_fixup;
1090
1091 ctx_name = btf__name_by_offset(btf, t->name_off);
1092
1093 if (guess_prog_type_by_ctx_name(ctx_name, &prog_type, &attach_type) == 0) {
1094 bpf_program__set_type(prog, prog_type);
1095 bpf_program__set_expected_attach_type(prog, attach_type);
1096
1097 if (!env.quiet) {
1098 printf("Using guessed program type '%s' for %s/%s...\n",
1099 libbpf_bpf_prog_type_str(prog_type),
1100 filename, prog_name);
1101 }
1102 } else {
1103 if (!env.quiet) {
1104 printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
1105 ctx_name, filename, prog_name);
1106 }
1107 }
1108 }
1109 skip_freplace_fixup:
1110 return;
1111 }
1112
max_verifier_log_size(void)1113 static int max_verifier_log_size(void)
1114 {
1115 const int SMALL_LOG_SIZE = UINT_MAX >> 8;
1116 const int BIG_LOG_SIZE = UINT_MAX >> 2;
1117 struct bpf_insn insns[] = {
1118 { .code = BPF_ALU | BPF_MOV | BPF_X, .dst_reg = BPF_REG_0, },
1119 { .code = BPF_JMP | BPF_EXIT, },
1120 };
1121 LIBBPF_OPTS(bpf_prog_load_opts, opts,
1122 .log_size = BIG_LOG_SIZE,
1123 .log_buf = (void *)-1,
1124 .log_level = 4
1125 );
1126 int ret, insn_cnt = ARRAY_SIZE(insns);
1127 static int log_size;
1128
1129 if (log_size != 0)
1130 return log_size;
1131
1132 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
1133
1134 if (ret == -EFAULT)
1135 log_size = BIG_LOG_SIZE;
1136 else /* ret == -EINVAL, big log size is not supported by the verifier */
1137 log_size = SMALL_LOG_SIZE;
1138
1139 return log_size;
1140 }
1141
process_prog(const char * filename,struct bpf_object * obj,struct bpf_program * prog)1142 static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
1143 {
1144 const char *base_filename = basename(strdupa(filename));
1145 const char *prog_name = bpf_program__name(prog);
1146 char *buf;
1147 int buf_sz, log_level;
1148 struct verif_stats *stats;
1149 int err = 0;
1150 void *tmp;
1151
1152 if (!should_process_file_prog(base_filename, bpf_program__name(prog))) {
1153 env.progs_skipped++;
1154 return 0;
1155 }
1156
1157 tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
1158 if (!tmp)
1159 return -ENOMEM;
1160 env.prog_stats = tmp;
1161 stats = &env.prog_stats[env.prog_stat_cnt++];
1162 memset(stats, 0, sizeof(*stats));
1163
1164 if (env.verbose || env.top_src_lines > 0) {
1165 buf_sz = env.log_size ? env.log_size : max_verifier_log_size();
1166 buf = malloc(buf_sz);
1167 if (!buf)
1168 return -ENOMEM;
1169 /* ensure we always request stats */
1170 log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0);
1171 /* --top-src-lines needs verifier log */
1172 if (env.top_src_lines > 0 && env.log_level == 0)
1173 log_level |= 2;
1174 } else {
1175 buf = verif_log_buf;
1176 buf_sz = sizeof(verif_log_buf);
1177 /* request only verifier stats */
1178 log_level = 4 | (env.log_fixed ? 8 : 0);
1179 }
1180 verif_log_buf[0] = '\0';
1181
1182 bpf_program__set_log_buf(prog, buf, buf_sz);
1183 bpf_program__set_log_level(prog, log_level);
1184
1185 /* increase chances of successful BPF object loading */
1186 fixup_obj(obj, prog, base_filename);
1187
1188 if (env.force_checkpoints)
1189 bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
1190 if (env.force_reg_invariants)
1191 bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
1192
1193 err = bpf_object__load(obj);
1194 env.progs_processed++;
1195
1196 stats->file_name = strdup(base_filename);
1197 stats->prog_name = strdup(bpf_program__name(prog));
1198 stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
1199 parse_verif_log(buf, buf_sz, stats);
1200
1201 if (env.verbose) {
1202 printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
1203 filename, prog_name, stats->stats[DURATION],
1204 err ? "failure" : "success", buf);
1205 }
1206 if (env.top_src_lines > 0)
1207 print_top_src_lines(buf, buf_sz, stats->prog_name);
1208
1209 if (verif_log_buf != buf)
1210 free(buf);
1211
1212 return 0;
1213 };
1214
process_obj(const char * filename)1215 static int process_obj(const char *filename)
1216 {
1217 const char *base_filename = basename(strdupa(filename));
1218 struct bpf_object *obj = NULL, *tobj;
1219 struct bpf_program *prog, *tprog, *lprog;
1220 libbpf_print_fn_t old_libbpf_print_fn;
1221 LIBBPF_OPTS(bpf_object_open_opts, opts);
1222 int err = 0, prog_cnt = 0;
1223
1224 if (!should_process_file_prog(base_filename, NULL)) {
1225 if (env.verbose)
1226 printf("Skipping '%s' due to filters...\n", filename);
1227 env.files_skipped++;
1228 return 0;
1229 }
1230 if (!is_bpf_obj_file(filename)) {
1231 if (env.verbose)
1232 printf("Skipping '%s' as it's not a BPF object file...\n", filename);
1233 env.files_skipped++;
1234 return 0;
1235 }
1236
1237 if (!env.quiet && env.out_fmt == RESFMT_TABLE)
1238 printf("Processing '%s'...\n", base_filename);
1239
1240 old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
1241 obj = bpf_object__open_file(filename, &opts);
1242 if (!obj) {
1243 /* if libbpf can't open BPF object file, it could be because
1244 * that BPF object file is incomplete and has to be statically
1245 * linked into a final BPF object file; instead of bailing
1246 * out, report it into stderr, mark it as skipped, and
1247 * proceed
1248 */
1249 fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
1250 env.files_skipped++;
1251 err = 0;
1252 goto cleanup;
1253 }
1254
1255 env.files_processed++;
1256
1257 bpf_object__for_each_program(prog, obj) {
1258 prog_cnt++;
1259 }
1260
1261 if (prog_cnt == 1) {
1262 prog = bpf_object__next_program(obj, NULL);
1263 bpf_program__set_autoload(prog, true);
1264 process_prog(filename, obj, prog);
1265 goto cleanup;
1266 }
1267
1268 bpf_object__for_each_program(prog, obj) {
1269 const char *prog_name = bpf_program__name(prog);
1270
1271 tobj = bpf_object__open_file(filename, &opts);
1272 if (!tobj) {
1273 err = -errno;
1274 fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1275 goto cleanup;
1276 }
1277
1278 lprog = NULL;
1279 bpf_object__for_each_program(tprog, tobj) {
1280 const char *tprog_name = bpf_program__name(tprog);
1281
1282 if (strcmp(prog_name, tprog_name) == 0) {
1283 bpf_program__set_autoload(tprog, true);
1284 lprog = tprog;
1285 } else {
1286 bpf_program__set_autoload(tprog, false);
1287 }
1288 }
1289
1290 process_prog(filename, tobj, lprog);
1291 bpf_object__close(tobj);
1292 }
1293
1294 cleanup:
1295 bpf_object__close(obj);
1296 libbpf_set_print(old_libbpf_print_fn);
1297 return err;
1298 }
1299
cmp_stat(const struct verif_stats * s1,const struct verif_stats * s2,enum stat_id id,bool asc,bool abs)1300 static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
1301 enum stat_id id, bool asc, bool abs)
1302 {
1303 int cmp = 0;
1304
1305 switch (id) {
1306 case FILE_NAME:
1307 cmp = strcmp(s1->file_name, s2->file_name);
1308 break;
1309 case PROG_NAME:
1310 cmp = strcmp(s1->prog_name, s2->prog_name);
1311 break;
1312 case VERDICT:
1313 case DURATION:
1314 case TOTAL_INSNS:
1315 case TOTAL_STATES:
1316 case PEAK_STATES:
1317 case MAX_STATES_PER_INSN:
1318 case MARK_READ_MAX_LEN: {
1319 long v1 = s1->stats[id];
1320 long v2 = s2->stats[id];
1321
1322 if (abs) {
1323 v1 = v1 < 0 ? -v1 : v1;
1324 v2 = v2 < 0 ? -v2 : v2;
1325 }
1326
1327 if (v1 != v2)
1328 cmp = v1 < v2 ? -1 : 1;
1329 break;
1330 }
1331 default:
1332 fprintf(stderr, "Unrecognized stat #%d\n", id);
1333 exit(1);
1334 }
1335
1336 return asc ? cmp : -cmp;
1337 }
1338
cmp_prog_stats(const void * v1,const void * v2)1339 static int cmp_prog_stats(const void *v1, const void *v2)
1340 {
1341 const struct verif_stats *s1 = v1, *s2 = v2;
1342 int i, cmp;
1343
1344 for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1345 cmp = cmp_stat(s1, s2, env.sort_spec.ids[i],
1346 env.sort_spec.asc[i], env.sort_spec.abs[i]);
1347 if (cmp != 0)
1348 return cmp;
1349 }
1350
1351 /* always disambiguate with file+prog, which are unique */
1352 cmp = strcmp(s1->file_name, s2->file_name);
1353 if (cmp != 0)
1354 return cmp;
1355 return strcmp(s1->prog_name, s2->prog_name);
1356 }
1357
fetch_join_stat_value(const struct verif_stats_join * s,enum stat_id id,enum stat_variant var,const char ** str_val,double * num_val)1358 static void fetch_join_stat_value(const struct verif_stats_join *s,
1359 enum stat_id id, enum stat_variant var,
1360 const char **str_val,
1361 double *num_val)
1362 {
1363 long v1, v2;
1364
1365 if (id == FILE_NAME) {
1366 *str_val = s->file_name;
1367 return;
1368 }
1369 if (id == PROG_NAME) {
1370 *str_val = s->prog_name;
1371 return;
1372 }
1373
1374 v1 = s->stats_a ? s->stats_a->stats[id] : 0;
1375 v2 = s->stats_b ? s->stats_b->stats[id] : 0;
1376
1377 switch (var) {
1378 case VARIANT_A:
1379 if (!s->stats_a)
1380 *num_val = -DBL_MAX;
1381 else
1382 *num_val = s->stats_a->stats[id];
1383 return;
1384 case VARIANT_B:
1385 if (!s->stats_b)
1386 *num_val = -DBL_MAX;
1387 else
1388 *num_val = s->stats_b->stats[id];
1389 return;
1390 case VARIANT_DIFF:
1391 if (!s->stats_a || !s->stats_b)
1392 *num_val = -DBL_MAX;
1393 else if (id == VERDICT)
1394 *num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */;
1395 else
1396 *num_val = (double)(v2 - v1);
1397 return;
1398 case VARIANT_PCT:
1399 if (!s->stats_a || !s->stats_b) {
1400 *num_val = -DBL_MAX;
1401 } else if (v1 == 0) {
1402 if (v1 == v2)
1403 *num_val = 0.0;
1404 else
1405 *num_val = v2 < v1 ? -100.0 : 100.0;
1406 } else {
1407 *num_val = (v2 - v1) * 100.0 / v1;
1408 }
1409 return;
1410 }
1411 }
1412
cmp_join_stat(const struct verif_stats_join * s1,const struct verif_stats_join * s2,enum stat_id id,enum stat_variant var,bool asc,bool abs)1413 static int cmp_join_stat(const struct verif_stats_join *s1,
1414 const struct verif_stats_join *s2,
1415 enum stat_id id, enum stat_variant var,
1416 bool asc, bool abs)
1417 {
1418 const char *str1 = NULL, *str2 = NULL;
1419 double v1 = 0.0, v2 = 0.0;
1420 int cmp = 0;
1421
1422 fetch_join_stat_value(s1, id, var, &str1, &v1);
1423 fetch_join_stat_value(s2, id, var, &str2, &v2);
1424
1425 if (abs) {
1426 v1 = fabs(v1);
1427 v2 = fabs(v2);
1428 }
1429
1430 if (str1)
1431 cmp = strcmp(str1, str2);
1432 else if (v1 != v2)
1433 cmp = v1 < v2 ? -1 : 1;
1434
1435 return asc ? cmp : -cmp;
1436 }
1437
cmp_join_stats(const void * v1,const void * v2)1438 static int cmp_join_stats(const void *v1, const void *v2)
1439 {
1440 const struct verif_stats_join *s1 = v1, *s2 = v2;
1441 int i, cmp;
1442
1443 for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1444 cmp = cmp_join_stat(s1, s2,
1445 env.sort_spec.ids[i],
1446 env.sort_spec.variants[i],
1447 env.sort_spec.asc[i],
1448 env.sort_spec.abs[i]);
1449 if (cmp != 0)
1450 return cmp;
1451 }
1452
1453 /* always disambiguate with file+prog, which are unique */
1454 cmp = strcmp(s1->file_name, s2->file_name);
1455 if (cmp != 0)
1456 return cmp;
1457 return strcmp(s1->prog_name, s2->prog_name);
1458 }
1459
1460 #define HEADER_CHAR '-'
1461 #define COLUMN_SEP " "
1462
output_header_underlines(void)1463 static void output_header_underlines(void)
1464 {
1465 int i, j, len;
1466
1467 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1468 len = env.output_spec.lens[i];
1469
1470 printf("%s", i == 0 ? "" : COLUMN_SEP);
1471 for (j = 0; j < len; j++)
1472 printf("%c", HEADER_CHAR);
1473 }
1474 printf("\n");
1475 }
1476
output_headers(enum resfmt fmt)1477 static void output_headers(enum resfmt fmt)
1478 {
1479 const char *fmt_str;
1480 int i, len;
1481
1482 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1483 int id = env.output_spec.ids[i];
1484 int *max_len = &env.output_spec.lens[i];
1485
1486 switch (fmt) {
1487 case RESFMT_TABLE_CALCLEN:
1488 len = snprintf(NULL, 0, "%s", stat_defs[id].header);
1489 if (len > *max_len)
1490 *max_len = len;
1491 break;
1492 case RESFMT_TABLE:
1493 fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s";
1494 printf(fmt_str, i == 0 ? "" : COLUMN_SEP, *max_len, stat_defs[id].header);
1495 if (i == env.output_spec.spec_cnt - 1)
1496 printf("\n");
1497 break;
1498 case RESFMT_CSV:
1499 printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]);
1500 if (i == env.output_spec.spec_cnt - 1)
1501 printf("\n");
1502 break;
1503 }
1504 }
1505
1506 if (fmt == RESFMT_TABLE)
1507 output_header_underlines();
1508 }
1509
prepare_value(const struct verif_stats * s,enum stat_id id,const char ** str,long * val)1510 static void prepare_value(const struct verif_stats *s, enum stat_id id,
1511 const char **str, long *val)
1512 {
1513 switch (id) {
1514 case FILE_NAME:
1515 *str = s ? s->file_name : "N/A";
1516 break;
1517 case PROG_NAME:
1518 *str = s ? s->prog_name : "N/A";
1519 break;
1520 case VERDICT:
1521 if (!s)
1522 *str = "N/A";
1523 else
1524 *str = s->stats[VERDICT] ? "success" : "failure";
1525 break;
1526 case DURATION:
1527 case TOTAL_INSNS:
1528 case TOTAL_STATES:
1529 case PEAK_STATES:
1530 case MAX_STATES_PER_INSN:
1531 case MARK_READ_MAX_LEN:
1532 *val = s ? s->stats[id] : 0;
1533 break;
1534 default:
1535 fprintf(stderr, "Unrecognized stat #%d\n", id);
1536 exit(1);
1537 }
1538 }
1539
output_stats(const struct verif_stats * s,enum resfmt fmt,bool last)1540 static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
1541 {
1542 int i;
1543
1544 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1545 int id = env.output_spec.ids[i];
1546 int *max_len = &env.output_spec.lens[i], len;
1547 const char *str = NULL;
1548 long val = 0;
1549
1550 prepare_value(s, id, &str, &val);
1551
1552 switch (fmt) {
1553 case RESFMT_TABLE_CALCLEN:
1554 if (str)
1555 len = snprintf(NULL, 0, "%s", str);
1556 else
1557 len = snprintf(NULL, 0, "%ld", val);
1558 if (len > *max_len)
1559 *max_len = len;
1560 break;
1561 case RESFMT_TABLE:
1562 if (str)
1563 printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
1564 else
1565 printf("%s%*ld", i == 0 ? "" : COLUMN_SEP, *max_len, val);
1566 if (i == env.output_spec.spec_cnt - 1)
1567 printf("\n");
1568 break;
1569 case RESFMT_CSV:
1570 if (str)
1571 printf("%s%s", i == 0 ? "" : ",", str);
1572 else
1573 printf("%s%ld", i == 0 ? "" : ",", val);
1574 if (i == env.output_spec.spec_cnt - 1)
1575 printf("\n");
1576 break;
1577 }
1578 }
1579
1580 if (last && fmt == RESFMT_TABLE) {
1581 output_header_underlines();
1582 printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
1583 env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
1584 }
1585 }
1586
parse_stat_value(const char * str,enum stat_id id,struct verif_stats * st)1587 static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
1588 {
1589 switch (id) {
1590 case FILE_NAME:
1591 st->file_name = strdup(str);
1592 if (!st->file_name)
1593 return -ENOMEM;
1594 break;
1595 case PROG_NAME:
1596 st->prog_name = strdup(str);
1597 if (!st->prog_name)
1598 return -ENOMEM;
1599 break;
1600 case VERDICT:
1601 if (strcmp(str, "success") == 0) {
1602 st->stats[VERDICT] = true;
1603 } else if (strcmp(str, "failure") == 0) {
1604 st->stats[VERDICT] = false;
1605 } else {
1606 fprintf(stderr, "Unrecognized verification verdict '%s'\n", str);
1607 return -EINVAL;
1608 }
1609 break;
1610 case DURATION:
1611 case TOTAL_INSNS:
1612 case TOTAL_STATES:
1613 case PEAK_STATES:
1614 case MAX_STATES_PER_INSN:
1615 case MARK_READ_MAX_LEN: {
1616 long val;
1617 int err, n;
1618
1619 if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) {
1620 err = -errno;
1621 fprintf(stderr, "Failed to parse '%s' as integer\n", str);
1622 return err;
1623 }
1624
1625 st->stats[id] = val;
1626 break;
1627 }
1628 default:
1629 fprintf(stderr, "Unrecognized stat #%d\n", id);
1630 return -EINVAL;
1631 }
1632 return 0;
1633 }
1634
parse_stats_csv(const char * filename,struct stat_specs * specs,struct verif_stats ** statsp,int * stat_cntp)1635 static int parse_stats_csv(const char *filename, struct stat_specs *specs,
1636 struct verif_stats **statsp, int *stat_cntp)
1637 {
1638 char line[4096];
1639 FILE *f;
1640 int err = 0;
1641 bool header = true;
1642
1643 f = fopen(filename, "r");
1644 if (!f) {
1645 err = -errno;
1646 fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1647 return err;
1648 }
1649
1650 *stat_cntp = 0;
1651
1652 while (fgets(line, sizeof(line), f)) {
1653 char *input = line, *state = NULL, *next;
1654 struct verif_stats *st = NULL;
1655 int col = 0, cnt = 0;
1656
1657 if (!header) {
1658 void *tmp;
1659
1660 tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp));
1661 if (!tmp) {
1662 err = -ENOMEM;
1663 goto cleanup;
1664 }
1665 *statsp = tmp;
1666
1667 st = &(*statsp)[*stat_cntp];
1668 memset(st, 0, sizeof(*st));
1669
1670 *stat_cntp += 1;
1671 }
1672
1673 while ((next = strtok_r(cnt++ ? NULL : input, ",\n", &state))) {
1674 if (header) {
1675 /* for the first line, set up spec stats */
1676 err = parse_stat(next, specs);
1677 if (err)
1678 goto cleanup;
1679 continue;
1680 }
1681
1682 /* for all other lines, parse values based on spec */
1683 if (col >= specs->spec_cnt) {
1684 fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n",
1685 col, *stat_cntp, filename);
1686 err = -EINVAL;
1687 goto cleanup;
1688 }
1689 err = parse_stat_value(next, specs->ids[col], st);
1690 if (err)
1691 goto cleanup;
1692 col++;
1693 }
1694
1695 if (header) {
1696 header = false;
1697 continue;
1698 }
1699
1700 if (col < specs->spec_cnt) {
1701 fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
1702 *stat_cntp, filename);
1703 err = -EINVAL;
1704 goto cleanup;
1705 }
1706
1707 if (!st->file_name || !st->prog_name) {
1708 fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n",
1709 *stat_cntp, filename);
1710 err = -EINVAL;
1711 goto cleanup;
1712 }
1713
1714 /* in comparison mode we can only check filters after we
1715 * parsed entire line; if row should be ignored we pretend we
1716 * never parsed it
1717 */
1718 if (!should_process_file_prog(st->file_name, st->prog_name)) {
1719 free(st->file_name);
1720 free(st->prog_name);
1721 *stat_cntp -= 1;
1722 }
1723 }
1724
1725 if (!feof(f)) {
1726 err = -errno;
1727 fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err);
1728 }
1729
1730 cleanup:
1731 fclose(f);
1732 return err;
1733 }
1734
1735 /* empty/zero stats for mismatched rows */
1736 static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" };
1737
is_key_stat(enum stat_id id)1738 static bool is_key_stat(enum stat_id id)
1739 {
1740 return id == FILE_NAME || id == PROG_NAME;
1741 }
1742
output_comp_header_underlines(void)1743 static void output_comp_header_underlines(void)
1744 {
1745 int i, j, k;
1746
1747 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1748 int id = env.output_spec.ids[i];
1749 int max_j = is_key_stat(id) ? 1 : 3;
1750
1751 for (j = 0; j < max_j; j++) {
1752 int len = env.output_spec.lens[3 * i + j];
1753
1754 printf("%s", i + j == 0 ? "" : COLUMN_SEP);
1755
1756 for (k = 0; k < len; k++)
1757 printf("%c", HEADER_CHAR);
1758 }
1759 }
1760 printf("\n");
1761 }
1762
output_comp_headers(enum resfmt fmt)1763 static void output_comp_headers(enum resfmt fmt)
1764 {
1765 static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"};
1766 static const char *name_sfxs[3] = {"_base", "_comp", "_diff"};
1767 int i, j, len;
1768
1769 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1770 int id = env.output_spec.ids[i];
1771 /* key stats don't have A/B/DIFF columns, they are common for both data sets */
1772 int max_j = is_key_stat(id) ? 1 : 3;
1773
1774 for (j = 0; j < max_j; j++) {
1775 int *max_len = &env.output_spec.lens[3 * i + j];
1776 bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1);
1777 const char *sfx;
1778
1779 switch (fmt) {
1780 case RESFMT_TABLE_CALCLEN:
1781 sfx = is_key_stat(id) ? "" : table_sfxs[j];
1782 len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx);
1783 if (len > *max_len)
1784 *max_len = len;
1785 break;
1786 case RESFMT_TABLE:
1787 sfx = is_key_stat(id) ? "" : table_sfxs[j];
1788 printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP,
1789 *max_len - (int)strlen(sfx), stat_defs[id].header, sfx);
1790 if (last)
1791 printf("\n");
1792 break;
1793 case RESFMT_CSV:
1794 sfx = is_key_stat(id) ? "" : name_sfxs[j];
1795 printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx);
1796 if (last)
1797 printf("\n");
1798 break;
1799 }
1800 }
1801 }
1802
1803 if (fmt == RESFMT_TABLE)
1804 output_comp_header_underlines();
1805 }
1806
output_comp_stats(const struct verif_stats_join * join_stats,enum resfmt fmt,bool last)1807 static void output_comp_stats(const struct verif_stats_join *join_stats,
1808 enum resfmt fmt, bool last)
1809 {
1810 const struct verif_stats *base = join_stats->stats_a;
1811 const struct verif_stats *comp = join_stats->stats_b;
1812 char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
1813 int i;
1814
1815 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1816 int id = env.output_spec.ids[i], len;
1817 int *max_len_base = &env.output_spec.lens[3 * i + 0];
1818 int *max_len_comp = &env.output_spec.lens[3 * i + 1];
1819 int *max_len_diff = &env.output_spec.lens[3 * i + 2];
1820 const char *base_str = NULL, *comp_str = NULL;
1821 long base_val = 0, comp_val = 0, diff_val = 0;
1822
1823 prepare_value(base, id, &base_str, &base_val);
1824 prepare_value(comp, id, &comp_str, &comp_val);
1825
1826 /* normalize all the outputs to be in string buffers for simplicity */
1827 if (is_key_stat(id)) {
1828 /* key stats (file and program name) are always strings */
1829 if (base)
1830 snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1831 else
1832 snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
1833 } else if (base_str) {
1834 snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1835 snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
1836 if (!base || !comp)
1837 snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1838 else if (strcmp(base_str, comp_str) == 0)
1839 snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
1840 else
1841 snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
1842 } else {
1843 double p = 0.0;
1844
1845 if (base)
1846 snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
1847 else
1848 snprintf(base_buf, sizeof(base_buf), "%s", "N/A");
1849 if (comp)
1850 snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
1851 else
1852 snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A");
1853
1854 diff_val = comp_val - base_val;
1855 if (!base || !comp) {
1856 snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1857 } else {
1858 if (base_val == 0) {
1859 if (comp_val == base_val)
1860 p = 0.0; /* avoid +0 (+100%) case */
1861 else
1862 p = comp_val < base_val ? -100.0 : 100.0;
1863 } else {
1864 p = diff_val * 100.0 / base_val;
1865 }
1866 snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
1867 }
1868 }
1869
1870 switch (fmt) {
1871 case RESFMT_TABLE_CALCLEN:
1872 len = strlen(base_buf);
1873 if (len > *max_len_base)
1874 *max_len_base = len;
1875 if (!is_key_stat(id)) {
1876 len = strlen(comp_buf);
1877 if (len > *max_len_comp)
1878 *max_len_comp = len;
1879 len = strlen(diff_buf);
1880 if (len > *max_len_diff)
1881 *max_len_diff = len;
1882 }
1883 break;
1884 case RESFMT_TABLE: {
1885 /* string outputs are left-aligned, number outputs are right-aligned */
1886 const char *fmt = base_str ? "%s%-*s" : "%s%*s";
1887
1888 printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf);
1889 if (!is_key_stat(id)) {
1890 printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf);
1891 printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf);
1892 }
1893 if (i == env.output_spec.spec_cnt - 1)
1894 printf("\n");
1895 break;
1896 }
1897 case RESFMT_CSV:
1898 printf("%s%s", i == 0 ? "" : ",", base_buf);
1899 if (!is_key_stat(id)) {
1900 printf("%s%s", i == 0 ? "" : ",", comp_buf);
1901 printf("%s%s", i == 0 ? "" : ",", diff_buf);
1902 }
1903 if (i == env.output_spec.spec_cnt - 1)
1904 printf("\n");
1905 break;
1906 }
1907 }
1908
1909 if (last && fmt == RESFMT_TABLE)
1910 output_comp_header_underlines();
1911 }
1912
cmp_stats_key(const struct verif_stats * base,const struct verif_stats * comp)1913 static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp)
1914 {
1915 int r;
1916
1917 r = strcmp(base->file_name, comp->file_name);
1918 if (r != 0)
1919 return r;
1920 return strcmp(base->prog_name, comp->prog_name);
1921 }
1922
is_join_stat_filter_matched(struct filter * f,const struct verif_stats_join * stats)1923 static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats)
1924 {
1925 static const double eps = 1e-9;
1926 const char *str = NULL;
1927 double value = 0.0;
1928
1929 fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
1930
1931 if (f->abs)
1932 value = fabs(value);
1933
1934 switch (f->op) {
1935 case OP_EQ: return value > f->value - eps && value < f->value + eps;
1936 case OP_NEQ: return value < f->value - eps || value > f->value + eps;
1937 case OP_LT: return value < f->value - eps;
1938 case OP_LE: return value <= f->value + eps;
1939 case OP_GT: return value > f->value + eps;
1940 case OP_GE: return value >= f->value - eps;
1941 }
1942
1943 fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
1944 return false;
1945 }
1946
should_output_join_stats(const struct verif_stats_join * stats)1947 static bool should_output_join_stats(const struct verif_stats_join *stats)
1948 {
1949 struct filter *f;
1950 int i, allow_cnt = 0;
1951
1952 for (i = 0; i < env.deny_filter_cnt; i++) {
1953 f = &env.deny_filters[i];
1954 if (f->kind != FILTER_STAT)
1955 continue;
1956
1957 if (is_join_stat_filter_matched(f, stats))
1958 return false;
1959 }
1960
1961 for (i = 0; i < env.allow_filter_cnt; i++) {
1962 f = &env.allow_filters[i];
1963 if (f->kind != FILTER_STAT)
1964 continue;
1965 allow_cnt++;
1966
1967 if (is_join_stat_filter_matched(f, stats))
1968 return true;
1969 }
1970
1971 /* if there are no stat allowed filters, pass everything through */
1972 return allow_cnt == 0;
1973 }
1974
handle_comparison_mode(void)1975 static int handle_comparison_mode(void)
1976 {
1977 struct stat_specs base_specs = {}, comp_specs = {};
1978 struct stat_specs tmp_sort_spec;
1979 enum resfmt cur_fmt;
1980 int err, i, j, last_idx, cnt;
1981
1982 if (env.filename_cnt != 2) {
1983 fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
1984 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1985 return -EINVAL;
1986 }
1987
1988 err = parse_stats_csv(env.filenames[0], &base_specs,
1989 &env.baseline_stats, &env.baseline_stat_cnt);
1990 if (err) {
1991 fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
1992 return err;
1993 }
1994 err = parse_stats_csv(env.filenames[1], &comp_specs,
1995 &env.prog_stats, &env.prog_stat_cnt);
1996 if (err) {
1997 fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err);
1998 return err;
1999 }
2000
2001 /* To keep it simple we validate that the set and order of stats in
2002 * both CSVs are exactly the same. This can be lifted with a bit more
2003 * pre-processing later.
2004 */
2005 if (base_specs.spec_cnt != comp_specs.spec_cnt) {
2006 fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n",
2007 env.filenames[0], env.filenames[1],
2008 base_specs.spec_cnt, comp_specs.spec_cnt);
2009 return -EINVAL;
2010 }
2011 for (i = 0; i < base_specs.spec_cnt; i++) {
2012 if (base_specs.ids[i] != comp_specs.ids[i]) {
2013 fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n",
2014 env.filenames[0], env.filenames[1],
2015 stat_defs[base_specs.ids[i]].names[0],
2016 stat_defs[comp_specs.ids[i]].names[0]);
2017 return -EINVAL;
2018 }
2019 }
2020
2021 /* Replace user-specified sorting spec with file+prog sorting rule to
2022 * be able to join two datasets correctly. Once we are done, we will
2023 * restore the original sort spec.
2024 */
2025 tmp_sort_spec = env.sort_spec;
2026 env.sort_spec = join_sort_spec;
2027 qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2028 qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
2029 env.sort_spec = tmp_sort_spec;
2030
2031 /* Join two datasets together. If baseline and comparison datasets
2032 * have different subset of rows (we match by 'object + prog' as
2033 * a unique key) then assume empty/missing/zero value for rows that
2034 * are missing in the opposite data set.
2035 */
2036 i = j = 0;
2037 while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
2038 const struct verif_stats *base, *comp;
2039 struct verif_stats_join *join;
2040 void *tmp;
2041 int r;
2042
2043 base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
2044 comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats;
2045
2046 if (!base->file_name || !base->prog_name) {
2047 fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
2048 i, env.filenames[0]);
2049 return -EINVAL;
2050 }
2051 if (!comp->file_name || !comp->prog_name) {
2052 fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
2053 j, env.filenames[1]);
2054 return -EINVAL;
2055 }
2056
2057 tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats));
2058 if (!tmp)
2059 return -ENOMEM;
2060 env.join_stats = tmp;
2061
2062 join = &env.join_stats[env.join_stat_cnt];
2063 memset(join, 0, sizeof(*join));
2064
2065 r = cmp_stats_key(base, comp);
2066 if (r == 0) {
2067 join->file_name = base->file_name;
2068 join->prog_name = base->prog_name;
2069 join->stats_a = base;
2070 join->stats_b = comp;
2071 i++;
2072 j++;
2073 } else if (base != &fallback_stats && (comp == &fallback_stats || r < 0)) {
2074 join->file_name = base->file_name;
2075 join->prog_name = base->prog_name;
2076 join->stats_a = base;
2077 join->stats_b = NULL;
2078 i++;
2079 } else if (comp != &fallback_stats && (base == &fallback_stats || r > 0)) {
2080 join->file_name = comp->file_name;
2081 join->prog_name = comp->prog_name;
2082 join->stats_a = NULL;
2083 join->stats_b = comp;
2084 j++;
2085 } else {
2086 fprintf(stderr, "%s:%d: should never reach here i=%i, j=%i",
2087 __FILE__, __LINE__, i, j);
2088 return -EINVAL;
2089 }
2090 env.join_stat_cnt += 1;
2091 }
2092
2093 /* now sort joined results according to sort spec */
2094 qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
2095
2096 /* for human-readable table output we need to do extra pass to
2097 * calculate column widths, so we substitute current output format
2098 * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
2099 * and do everything again.
2100 */
2101 if (env.out_fmt == RESFMT_TABLE)
2102 cur_fmt = RESFMT_TABLE_CALCLEN;
2103 else
2104 cur_fmt = env.out_fmt;
2105
2106 one_more_time:
2107 output_comp_headers(cur_fmt);
2108
2109 last_idx = -1;
2110 cnt = 0;
2111 for (i = 0; i < env.join_stat_cnt; i++) {
2112 const struct verif_stats_join *join = &env.join_stats[i];
2113
2114 if (!should_output_join_stats(join))
2115 continue;
2116
2117 if (env.top_n && cnt >= env.top_n)
2118 break;
2119
2120 if (cur_fmt == RESFMT_TABLE_CALCLEN)
2121 last_idx = i;
2122
2123 output_comp_stats(join, cur_fmt, i == last_idx);
2124
2125 cnt++;
2126 }
2127
2128 if (cur_fmt == RESFMT_TABLE_CALCLEN) {
2129 cur_fmt = RESFMT_TABLE;
2130 goto one_more_time; /* ... this time with feeling */
2131 }
2132
2133 return 0;
2134 }
2135
is_stat_filter_matched(struct filter * f,const struct verif_stats * stats)2136 static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats)
2137 {
2138 long value = stats->stats[f->stat_id];
2139
2140 if (f->abs)
2141 value = value < 0 ? -value : value;
2142
2143 switch (f->op) {
2144 case OP_EQ: return value == f->value;
2145 case OP_NEQ: return value != f->value;
2146 case OP_LT: return value < f->value;
2147 case OP_LE: return value <= f->value;
2148 case OP_GT: return value > f->value;
2149 case OP_GE: return value >= f->value;
2150 }
2151
2152 fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
2153 return false;
2154 }
2155
should_output_stats(const struct verif_stats * stats)2156 static bool should_output_stats(const struct verif_stats *stats)
2157 {
2158 struct filter *f;
2159 int i, allow_cnt = 0;
2160
2161 for (i = 0; i < env.deny_filter_cnt; i++) {
2162 f = &env.deny_filters[i];
2163 if (f->kind != FILTER_STAT)
2164 continue;
2165
2166 if (is_stat_filter_matched(f, stats))
2167 return false;
2168 }
2169
2170 for (i = 0; i < env.allow_filter_cnt; i++) {
2171 f = &env.allow_filters[i];
2172 if (f->kind != FILTER_STAT)
2173 continue;
2174 allow_cnt++;
2175
2176 if (is_stat_filter_matched(f, stats))
2177 return true;
2178 }
2179
2180 /* if there are no stat allowed filters, pass everything through */
2181 return allow_cnt == 0;
2182 }
2183
output_prog_stats(void)2184 static void output_prog_stats(void)
2185 {
2186 const struct verif_stats *stats;
2187 int i, last_stat_idx = 0, cnt = 0;
2188
2189 if (env.out_fmt == RESFMT_TABLE) {
2190 /* calculate column widths */
2191 output_headers(RESFMT_TABLE_CALCLEN);
2192 for (i = 0; i < env.prog_stat_cnt; i++) {
2193 stats = &env.prog_stats[i];
2194 if (!should_output_stats(stats))
2195 continue;
2196 output_stats(stats, RESFMT_TABLE_CALCLEN, false);
2197 last_stat_idx = i;
2198 }
2199 }
2200
2201 /* actually output the table */
2202 output_headers(env.out_fmt);
2203 for (i = 0; i < env.prog_stat_cnt; i++) {
2204 stats = &env.prog_stats[i];
2205 if (!should_output_stats(stats))
2206 continue;
2207 if (env.top_n && cnt >= env.top_n)
2208 break;
2209 output_stats(stats, env.out_fmt, i == last_stat_idx);
2210 cnt++;
2211 }
2212 }
2213
handle_verif_mode(void)2214 static int handle_verif_mode(void)
2215 {
2216 int i, err;
2217
2218 if (env.filename_cnt == 0) {
2219 fprintf(stderr, "Please provide path to BPF object file!\n\n");
2220 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2221 return -EINVAL;
2222 }
2223
2224 for (i = 0; i < env.filename_cnt; i++) {
2225 err = process_obj(env.filenames[i]);
2226 if (err) {
2227 fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
2228 return err;
2229 }
2230 }
2231
2232 qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2233
2234 output_prog_stats();
2235
2236 return 0;
2237 }
2238
handle_replay_mode(void)2239 static int handle_replay_mode(void)
2240 {
2241 struct stat_specs specs = {};
2242 int err;
2243
2244 if (env.filename_cnt != 1) {
2245 fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n");
2246 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2247 return -EINVAL;
2248 }
2249
2250 err = parse_stats_csv(env.filenames[0], &specs,
2251 &env.prog_stats, &env.prog_stat_cnt);
2252 if (err) {
2253 fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
2254 return err;
2255 }
2256
2257 qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2258
2259 output_prog_stats();
2260
2261 return 0;
2262 }
2263
main(int argc,char ** argv)2264 int main(int argc, char **argv)
2265 {
2266 int err = 0, i;
2267
2268 if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
2269 return 1;
2270
2271 if (env.show_version) {
2272 printf("%s\n", argp_program_version);
2273 return 0;
2274 }
2275
2276 if (env.verbose && env.quiet) {
2277 fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
2278 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2279 return 1;
2280 }
2281 if (env.verbose && env.log_level == 0)
2282 env.log_level = 1;
2283
2284 if (env.output_spec.spec_cnt == 0) {
2285 if (env.out_fmt == RESFMT_CSV)
2286 env.output_spec = default_csv_output_spec;
2287 else
2288 env.output_spec = default_output_spec;
2289 }
2290 if (env.sort_spec.spec_cnt == 0)
2291 env.sort_spec = default_sort_spec;
2292
2293 if (env.comparison_mode && env.replay_mode) {
2294 fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n");
2295 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2296 return 1;
2297 }
2298
2299 if (env.comparison_mode)
2300 err = handle_comparison_mode();
2301 else if (env.replay_mode)
2302 err = handle_replay_mode();
2303 else
2304 err = handle_verif_mode();
2305
2306 free_verif_stats(env.prog_stats, env.prog_stat_cnt);
2307 free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
2308 free(env.join_stats);
2309 for (i = 0; i < env.filename_cnt; i++)
2310 free(env.filenames[i]);
2311 free(env.filenames);
2312 for (i = 0; i < env.allow_filter_cnt; i++) {
2313 free(env.allow_filters[i].any_glob);
2314 free(env.allow_filters[i].file_glob);
2315 free(env.allow_filters[i].prog_glob);
2316 }
2317 free(env.allow_filters);
2318 for (i = 0; i < env.deny_filter_cnt; i++) {
2319 free(env.deny_filters[i].any_glob);
2320 free(env.deny_filters[i].file_glob);
2321 free(env.deny_filters[i].prog_glob);
2322 }
2323 free(env.deny_filters);
2324 return -err;
2325 }
2326