xref: /linux/tools/testing/selftests/bpf/veristat.c (revision 001821b0e79716c4e17c71d8e053a23599a7a508)
1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
3 #define _GNU_SOURCE
4 #include <argp.h>
5 #include <string.h>
6 #include <stdlib.h>
7 #include <sched.h>
8 #include <pthread.h>
9 #include <dirent.h>
10 #include <signal.h>
11 #include <fcntl.h>
12 #include <unistd.h>
13 #include <sys/time.h>
14 #include <sys/sysinfo.h>
15 #include <sys/stat.h>
16 #include <bpf/libbpf.h>
17 #include <bpf/btf.h>
18 #include <libelf.h>
19 #include <gelf.h>
20 #include <float.h>
21 #include <math.h>
22 
23 #ifndef ARRAY_SIZE
24 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
25 #endif
26 
27 enum stat_id {
28 	VERDICT,
29 	DURATION,
30 	TOTAL_INSNS,
31 	TOTAL_STATES,
32 	PEAK_STATES,
33 	MAX_STATES_PER_INSN,
34 	MARK_READ_MAX_LEN,
35 
36 	FILE_NAME,
37 	PROG_NAME,
38 
39 	ALL_STATS_CNT,
40 	NUM_STATS_CNT = FILE_NAME - VERDICT,
41 };
42 
43 /* In comparison mode each stat can specify up to four different values:
44  *   - A side value;
45  *   - B side value;
46  *   - absolute diff value;
47  *   - relative (percentage) diff value.
48  *
49  * When specifying stat specs in comparison mode, user can use one of the
50  * following variant suffixes to specify which exact variant should be used for
51  * ordering or filtering:
52  *   - `_a` for A side value;
53  *   - `_b` for B side value;
54  *   - `_diff` for absolute diff value;
55  *   - `_pct` for relative (percentage) diff value.
56  *
57  * If no variant suffix is provided, then `_b` (control data) is assumed.
58  *
59  * As an example, let's say instructions stat has the following output:
60  *
61  * Insns (A)  Insns (B)  Insns   (DIFF)
62  * ---------  ---------  --------------
63  * 21547      20920       -627 (-2.91%)
64  *
65  * Then:
66  *   - 21547 is A side value (insns_a);
67  *   - 20920 is B side value (insns_b);
68  *   - -627 is absolute diff value (insns_diff);
69  *   - -2.91% is relative diff value (insns_pct).
70  *
71  * For verdict there is no verdict_pct variant.
72  * For file and program name, _a and _b variants are equivalent and there are
73  * no _diff or _pct variants.
74  */
75 enum stat_variant {
76 	VARIANT_A,
77 	VARIANT_B,
78 	VARIANT_DIFF,
79 	VARIANT_PCT,
80 };
81 
82 struct verif_stats {
83 	char *file_name;
84 	char *prog_name;
85 
86 	long stats[NUM_STATS_CNT];
87 };
88 
89 /* joined comparison mode stats */
90 struct verif_stats_join {
91 	char *file_name;
92 	char *prog_name;
93 
94 	const struct verif_stats *stats_a;
95 	const struct verif_stats *stats_b;
96 };
97 
98 struct stat_specs {
99 	int spec_cnt;
100 	enum stat_id ids[ALL_STATS_CNT];
101 	enum stat_variant variants[ALL_STATS_CNT];
102 	bool asc[ALL_STATS_CNT];
103 	bool abs[ALL_STATS_CNT];
104 	int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
105 };
106 
107 enum resfmt {
108 	RESFMT_TABLE,
109 	RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */
110 	RESFMT_CSV,
111 };
112 
113 enum filter_kind {
114 	FILTER_NAME,
115 	FILTER_STAT,
116 };
117 
118 enum operator_kind {
119 	OP_EQ,		/* == or = */
120 	OP_NEQ,		/* != or <> */
121 	OP_LT,		/* < */
122 	OP_LE,		/* <= */
123 	OP_GT,		/* > */
124 	OP_GE,		/* >= */
125 };
126 
127 struct filter {
128 	enum filter_kind kind;
129 	/* FILTER_NAME */
130 	char *any_glob;
131 	char *file_glob;
132 	char *prog_glob;
133 	/* FILTER_STAT */
134 	enum operator_kind op;
135 	int stat_id;
136 	enum stat_variant stat_var;
137 	long value;
138 	bool abs;
139 };
140 
141 static struct env {
142 	char **filenames;
143 	int filename_cnt;
144 	bool verbose;
145 	bool debug;
146 	bool quiet;
147 	bool force_checkpoints;
148 	bool force_reg_invariants;
149 	enum resfmt out_fmt;
150 	bool show_version;
151 	bool comparison_mode;
152 	bool replay_mode;
153 	int top_n;
154 
155 	int log_level;
156 	int log_size;
157 	bool log_fixed;
158 
159 	struct verif_stats *prog_stats;
160 	int prog_stat_cnt;
161 
162 	/* baseline_stats is allocated and used only in comparison mode */
163 	struct verif_stats *baseline_stats;
164 	int baseline_stat_cnt;
165 
166 	struct verif_stats_join *join_stats;
167 	int join_stat_cnt;
168 
169 	struct stat_specs output_spec;
170 	struct stat_specs sort_spec;
171 
172 	struct filter *allow_filters;
173 	struct filter *deny_filters;
174 	int allow_filter_cnt;
175 	int deny_filter_cnt;
176 
177 	int files_processed;
178 	int files_skipped;
179 	int progs_processed;
180 	int progs_skipped;
181 } env;
182 
183 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
184 {
185 	if (!env.verbose)
186 		return 0;
187 	if (level == LIBBPF_DEBUG  && !env.debug)
188 		return 0;
189 	return vfprintf(stderr, format, args);
190 }
191 
192 #ifndef VERISTAT_VERSION
193 #define VERISTAT_VERSION "<kernel>"
194 #endif
195 
196 const char *argp_program_version = "veristat v" VERISTAT_VERSION;
197 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
198 const char argp_program_doc[] =
199 "veristat    BPF verifier stats collection and comparison tool.\n"
200 "\n"
201 "USAGE: veristat <obj-file> [<obj-file>...]\n"
202 "   OR: veristat -C <baseline.csv> <comparison.csv>\n"
203 "   OR: veristat -R <results.csv>\n";
204 
205 enum {
206 	OPT_LOG_FIXED = 1000,
207 	OPT_LOG_SIZE = 1001,
208 };
209 
210 static const struct argp_option opts[] = {
211 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
212 	{ "version", 'V', NULL, 0, "Print version" },
213 	{ "verbose", 'v', NULL, 0, "Verbose mode" },
214 	{ "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" },
215 	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
216 	{ "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
217 	{ "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
218 	{ "top-n", 'n', "N", 0, "Emit only up to first N results." },
219 	{ "quiet", 'q', NULL, 0, "Quiet mode" },
220 	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
221 	{ "sort", 's', "SPEC", 0, "Specify sort order" },
222 	{ "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
223 	{ "compare", 'C', NULL, 0, "Comparison mode" },
224 	{ "replay", 'R', NULL, 0, "Replay mode" },
225 	{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
226 	{ "test-states", 't', NULL, 0,
227 	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
228 	{ "test-reg-invariants", 'r', NULL, 0,
229 	  "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
230 	{},
231 };
232 
233 static int parse_stats(const char *stats_str, struct stat_specs *specs);
234 static int append_filter(struct filter **filters, int *cnt, const char *str);
235 static int append_filter_file(const char *path);
236 
237 static error_t parse_arg(int key, char *arg, struct argp_state *state)
238 {
239 	void *tmp;
240 	int err;
241 
242 	switch (key) {
243 	case 'h':
244 		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
245 		break;
246 	case 'V':
247 		env.show_version = true;
248 		break;
249 	case 'v':
250 		env.verbose = true;
251 		break;
252 	case 'd':
253 		env.debug = true;
254 		env.verbose = true;
255 		break;
256 	case 'q':
257 		env.quiet = true;
258 		break;
259 	case 'e':
260 		err = parse_stats(arg, &env.output_spec);
261 		if (err)
262 			return err;
263 		break;
264 	case 's':
265 		err = parse_stats(arg, &env.sort_spec);
266 		if (err)
267 			return err;
268 		break;
269 	case 'o':
270 		if (strcmp(arg, "table") == 0) {
271 			env.out_fmt = RESFMT_TABLE;
272 		} else if (strcmp(arg, "csv") == 0) {
273 			env.out_fmt = RESFMT_CSV;
274 		} else {
275 			fprintf(stderr, "Unrecognized output format '%s'\n", arg);
276 			return -EINVAL;
277 		}
278 		break;
279 	case 'l':
280 		errno = 0;
281 		env.log_level = strtol(arg, NULL, 10);
282 		if (errno) {
283 			fprintf(stderr, "invalid log level: %s\n", arg);
284 			argp_usage(state);
285 		}
286 		break;
287 	case OPT_LOG_FIXED:
288 		env.log_fixed = true;
289 		break;
290 	case OPT_LOG_SIZE:
291 		errno = 0;
292 		env.log_size = strtol(arg, NULL, 10);
293 		if (errno) {
294 			fprintf(stderr, "invalid log size: %s\n", arg);
295 			argp_usage(state);
296 		}
297 		break;
298 	case 't':
299 		env.force_checkpoints = true;
300 		break;
301 	case 'r':
302 		env.force_reg_invariants = true;
303 		break;
304 	case 'n':
305 		errno = 0;
306 		env.top_n = strtol(arg, NULL, 10);
307 		if (errno) {
308 			fprintf(stderr, "invalid top N specifier: %s\n", arg);
309 			argp_usage(state);
310 		}
311 	case 'C':
312 		env.comparison_mode = true;
313 		break;
314 	case 'R':
315 		env.replay_mode = true;
316 		break;
317 	case 'f':
318 		if (arg[0] == '@')
319 			err = append_filter_file(arg + 1);
320 		else if (arg[0] == '!')
321 			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1);
322 		else
323 			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg);
324 		if (err) {
325 			fprintf(stderr, "Failed to collect program filter expressions: %d\n", err);
326 			return err;
327 		}
328 		break;
329 	case ARGP_KEY_ARG:
330 		tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
331 		if (!tmp)
332 			return -ENOMEM;
333 		env.filenames = tmp;
334 		env.filenames[env.filename_cnt] = strdup(arg);
335 		if (!env.filenames[env.filename_cnt])
336 			return -ENOMEM;
337 		env.filename_cnt++;
338 		break;
339 	default:
340 		return ARGP_ERR_UNKNOWN;
341 	}
342 	return 0;
343 }
344 
345 static const struct argp argp = {
346 	.options = opts,
347 	.parser = parse_arg,
348 	.doc = argp_program_doc,
349 };
350 
351 
352 /* Adapted from perf/util/string.c */
353 static bool glob_matches(const char *str, const char *pat)
354 {
355 	while (*str && *pat && *pat != '*') {
356 		if (*str != *pat)
357 			return false;
358 		str++;
359 		pat++;
360 	}
361 	/* Check wild card */
362 	if (*pat == '*') {
363 		while (*pat == '*')
364 			pat++;
365 		if (!*pat) /* Tail wild card matches all */
366 			return true;
367 		while (*str)
368 			if (glob_matches(str++, pat))
369 				return true;
370 	}
371 	return !*str && !*pat;
372 }
373 
374 static bool is_bpf_obj_file(const char *path) {
375 	Elf64_Ehdr *ehdr;
376 	int fd, err = -EINVAL;
377 	Elf *elf = NULL;
378 
379 	fd = open(path, O_RDONLY | O_CLOEXEC);
380 	if (fd < 0)
381 		return true; /* we'll fail later and propagate error */
382 
383 	/* ensure libelf is initialized */
384 	(void)elf_version(EV_CURRENT);
385 
386 	elf = elf_begin(fd, ELF_C_READ, NULL);
387 	if (!elf)
388 		goto cleanup;
389 
390 	if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64)
391 		goto cleanup;
392 
393 	ehdr = elf64_getehdr(elf);
394 	/* Old LLVM set e_machine to EM_NONE */
395 	if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF))
396 		goto cleanup;
397 
398 	err = 0;
399 cleanup:
400 	if (elf)
401 		elf_end(elf);
402 	close(fd);
403 	return err == 0;
404 }
405 
406 static bool should_process_file_prog(const char *filename, const char *prog_name)
407 {
408 	struct filter *f;
409 	int i, allow_cnt = 0;
410 
411 	for (i = 0; i < env.deny_filter_cnt; i++) {
412 		f = &env.deny_filters[i];
413 		if (f->kind != FILTER_NAME)
414 			continue;
415 
416 		if (f->any_glob && glob_matches(filename, f->any_glob))
417 			return false;
418 		if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob))
419 			return false;
420 		if (f->file_glob && glob_matches(filename, f->file_glob))
421 			return false;
422 		if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob))
423 			return false;
424 	}
425 
426 	for (i = 0; i < env.allow_filter_cnt; i++) {
427 		f = &env.allow_filters[i];
428 		if (f->kind != FILTER_NAME)
429 			continue;
430 
431 		allow_cnt++;
432 		if (f->any_glob) {
433 			if (glob_matches(filename, f->any_glob))
434 				return true;
435 			/* If we don't know program name yet, any_glob filter
436 			 * has to assume that current BPF object file might be
437 			 * relevant; we'll check again later on after opening
438 			 * BPF object file, at which point program name will
439 			 * be known finally.
440 			 */
441 			if (!prog_name || glob_matches(prog_name, f->any_glob))
442 				return true;
443 		} else {
444 			if (f->file_glob && !glob_matches(filename, f->file_glob))
445 				continue;
446 			if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob))
447 				continue;
448 			return true;
449 		}
450 	}
451 
452 	/* if there are no file/prog name allow filters, allow all progs,
453 	 * unless they are denied earlier explicitly
454 	 */
455 	return allow_cnt == 0;
456 }
457 
458 static struct {
459 	enum operator_kind op_kind;
460 	const char *op_str;
461 } operators[] = {
462 	/* Order of these definitions matter to avoid situations like '<'
463 	 * matching part of what is actually a '<>' operator. That is,
464 	 * substrings should go last.
465 	 */
466 	{ OP_EQ, "==" },
467 	{ OP_NEQ, "!=" },
468 	{ OP_NEQ, "<>" },
469 	{ OP_LE, "<=" },
470 	{ OP_LT, "<" },
471 	{ OP_GE, ">=" },
472 	{ OP_GT, ">" },
473 	{ OP_EQ, "=" },
474 };
475 
476 static bool parse_stat_id_var(const char *name, size_t len, int *id,
477 			      enum stat_variant *var, bool *is_abs);
478 
479 static int append_filter(struct filter **filters, int *cnt, const char *str)
480 {
481 	struct filter *f;
482 	void *tmp;
483 	const char *p;
484 	int i;
485 
486 	tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
487 	if (!tmp)
488 		return -ENOMEM;
489 	*filters = tmp;
490 
491 	f = &(*filters)[*cnt];
492 	memset(f, 0, sizeof(*f));
493 
494 	/* First, let's check if it's a stats filter of the following form:
495 	 * <stat><op><value, where:
496 	 *   - <stat> is one of supported numerical stats (verdict is also
497 	 *     considered numerical, failure == 0, success == 1);
498 	 *   - <op> is comparison operator (see `operators` definitions);
499 	 *   - <value> is an integer (or failure/success, or false/true as
500 	 *     special aliases for 0 and 1, respectively).
501 	 * If the form doesn't match what user provided, we assume file/prog
502 	 * glob filter.
503 	 */
504 	for (i = 0; i < ARRAY_SIZE(operators); i++) {
505 		enum stat_variant var;
506 		int id;
507 		long val;
508 		const char *end = str;
509 		const char *op_str;
510 		bool is_abs;
511 
512 		op_str = operators[i].op_str;
513 		p = strstr(str, op_str);
514 		if (!p)
515 			continue;
516 
517 		if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) {
518 			fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
519 			return -EINVAL;
520 		}
521 		if (id >= FILE_NAME) {
522 			fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str);
523 			return -EINVAL;
524 		}
525 
526 		p += strlen(op_str);
527 
528 		if (strcasecmp(p, "true") == 0 ||
529 		    strcasecmp(p, "t") == 0 ||
530 		    strcasecmp(p, "success") == 0 ||
531 		    strcasecmp(p, "succ") == 0 ||
532 		    strcasecmp(p, "s") == 0 ||
533 		    strcasecmp(p, "match") == 0 ||
534 		    strcasecmp(p, "m") == 0) {
535 			val = 1;
536 		} else if (strcasecmp(p, "false") == 0 ||
537 			   strcasecmp(p, "f") == 0 ||
538 			   strcasecmp(p, "failure") == 0 ||
539 			   strcasecmp(p, "fail") == 0 ||
540 			   strcasecmp(p, "mismatch") == 0 ||
541 			   strcasecmp(p, "mis") == 0) {
542 			val = 0;
543 		} else {
544 			errno = 0;
545 			val = strtol(p, (char **)&end, 10);
546 			if (errno || end == p || *end != '\0' ) {
547 				fprintf(stderr, "Invalid integer value in '%s'!\n", str);
548 				return -EINVAL;
549 			}
550 		}
551 
552 		f->kind = FILTER_STAT;
553 		f->stat_id = id;
554 		f->stat_var = var;
555 		f->op = operators[i].op_kind;
556 		f->abs = true;
557 		f->value = val;
558 
559 		*cnt += 1;
560 		return 0;
561 	}
562 
563 	/* File/prog filter can be specified either as '<glob>' or
564 	 * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
565 	 * both file and program names. This seems to be way more useful in
566 	 * practice. If user needs full control, they can use '/<prog-glob>'
567 	 * form to glob just program name, or '<file-glob>/' to glob only file
568 	 * name. But usually common <glob> seems to be the most useful and
569 	 * ergonomic way.
570 	 */
571 	f->kind = FILTER_NAME;
572 	p = strchr(str, '/');
573 	if (!p) {
574 		f->any_glob = strdup(str);
575 		if (!f->any_glob)
576 			return -ENOMEM;
577 	} else {
578 		if (str != p) {
579 			/* non-empty file glob */
580 			f->file_glob = strndup(str, p - str);
581 			if (!f->file_glob)
582 				return -ENOMEM;
583 		}
584 		if (strlen(p + 1) > 0) {
585 			/* non-empty prog glob */
586 			f->prog_glob = strdup(p + 1);
587 			if (!f->prog_glob) {
588 				free(f->file_glob);
589 				f->file_glob = NULL;
590 				return -ENOMEM;
591 			}
592 		}
593 	}
594 
595 	*cnt += 1;
596 	return 0;
597 }
598 
599 static int append_filter_file(const char *path)
600 {
601 	char buf[1024];
602 	FILE *f;
603 	int err = 0;
604 
605 	f = fopen(path, "r");
606 	if (!f) {
607 		err = -errno;
608 		fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
609 		return err;
610 	}
611 
612 	while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
613 		/* lines starting with # are comments, skip them */
614 		if (buf[0] == '\0' || buf[0] == '#')
615 			continue;
616 		/* lines starting with ! are negative match filters */
617 		if (buf[0] == '!')
618 			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1);
619 		else
620 			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf);
621 		if (err)
622 			goto cleanup;
623 	}
624 
625 cleanup:
626 	fclose(f);
627 	return err;
628 }
629 
630 static const struct stat_specs default_output_spec = {
631 	.spec_cnt = 7,
632 	.ids = {
633 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
634 		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
635 	},
636 };
637 
638 static const struct stat_specs default_csv_output_spec = {
639 	.spec_cnt = 9,
640 	.ids = {
641 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
642 		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
643 		MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
644 	},
645 };
646 
647 static const struct stat_specs default_sort_spec = {
648 	.spec_cnt = 2,
649 	.ids = {
650 		FILE_NAME, PROG_NAME,
651 	},
652 	.asc = { true, true, },
653 };
654 
655 /* sorting for comparison mode to join two data sets */
656 static const struct stat_specs join_sort_spec = {
657 	.spec_cnt = 2,
658 	.ids = {
659 		FILE_NAME, PROG_NAME,
660 	},
661 	.asc = { true, true, },
662 };
663 
664 static struct stat_def {
665 	const char *header;
666 	const char *names[4];
667 	bool asc_by_default;
668 	bool left_aligned;
669 } stat_defs[] = {
670 	[FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ },
671 	[PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ },
672 	[VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ },
673 	[DURATION] = { "Duration (us)", {"duration", "dur"}, },
674 	[TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, },
675 	[TOTAL_STATES] = { "States", {"total_states", "states"}, },
676 	[PEAK_STATES] = { "Peak states", {"peak_states"}, },
677 	[MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
678 	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
679 };
680 
681 static bool parse_stat_id_var(const char *name, size_t len, int *id,
682 			      enum stat_variant *var, bool *is_abs)
683 {
684 	static const char *var_sfxs[] = {
685 		[VARIANT_A] = "_a",
686 		[VARIANT_B] = "_b",
687 		[VARIANT_DIFF] = "_diff",
688 		[VARIANT_PCT] = "_pct",
689 	};
690 	int i, j, k;
691 
692 	/* |<stat>| means we take absolute value of given stat */
693 	*is_abs = false;
694 	if (len > 2 && name[0] == '|' && name[len - 1] == '|') {
695 		*is_abs = true;
696 		name += 1;
697 		len -= 2;
698 	}
699 
700 	for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
701 		struct stat_def *def = &stat_defs[i];
702 		size_t alias_len, sfx_len;
703 		const char *alias;
704 
705 		for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
706 			alias = def->names[j];
707 			if (!alias)
708 				continue;
709 
710 			alias_len = strlen(alias);
711 			if (strncmp(name, alias, alias_len) != 0)
712 				continue;
713 
714 			if (alias_len == len) {
715 				/* If no variant suffix is specified, we
716 				 * assume control group (just in case we are
717 				 * in comparison mode. Variant is ignored in
718 				 * non-comparison mode.
719 				 */
720 				*var = VARIANT_B;
721 				*id = i;
722 				return true;
723 			}
724 
725 			for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) {
726 				sfx_len = strlen(var_sfxs[k]);
727 				if (alias_len + sfx_len != len)
728 					continue;
729 
730 				if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) {
731 					*var = (enum stat_variant)k;
732 					*id = i;
733 					return true;
734 				}
735 			}
736 		}
737 	}
738 
739 	return false;
740 }
741 
742 static bool is_asc_sym(char c)
743 {
744 	return c == '^';
745 }
746 
747 static bool is_desc_sym(char c)
748 {
749 	return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
750 }
751 
752 static int parse_stat(const char *stat_name, struct stat_specs *specs)
753 {
754 	int id;
755 	bool has_order = false, is_asc = false, is_abs = false;
756 	size_t len = strlen(stat_name);
757 	enum stat_variant var;
758 
759 	if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
760 		fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
761 		return -E2BIG;
762 	}
763 
764 	if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) {
765 		has_order = true;
766 		is_asc = is_asc_sym(stat_name[len - 1]);
767 		len -= 1;
768 	}
769 
770 	if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) {
771 		fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
772 		return -ESRCH;
773 	}
774 
775 	specs->ids[specs->spec_cnt] = id;
776 	specs->variants[specs->spec_cnt] = var;
777 	specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
778 	specs->abs[specs->spec_cnt] = is_abs;
779 	specs->spec_cnt++;
780 
781 	return 0;
782 }
783 
784 static int parse_stats(const char *stats_str, struct stat_specs *specs)
785 {
786 	char *input, *state = NULL, *next;
787 	int err;
788 
789 	input = strdup(stats_str);
790 	if (!input)
791 		return -ENOMEM;
792 
793 	while ((next = strtok_r(state ? NULL : input, ",", &state))) {
794 		err = parse_stat(next, specs);
795 		if (err) {
796 			free(input);
797 			return err;
798 		}
799 	}
800 
801 	free(input);
802 	return 0;
803 }
804 
805 static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
806 {
807 	int i;
808 
809 	if (!stats)
810 		return;
811 
812 	for (i = 0; i < stat_cnt; i++) {
813 		free(stats[i].file_name);
814 		free(stats[i].prog_name);
815 	}
816 	free(stats);
817 }
818 
819 static char verif_log_buf[64 * 1024];
820 
821 #define MAX_PARSED_LOG_LINES 100
822 
823 static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
824 {
825 	const char *cur;
826 	int pos, lines;
827 
828 	buf[buf_sz - 1] = '\0';
829 
830 	for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) {
831 		/* find previous endline or otherwise take the start of log buf */
832 		for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) {
833 		}
834 		/* next time start from end of previous line (or pos goes to <0) */
835 		pos--;
836 		/* if we found endline, point right after endline symbol;
837 		 * otherwise, stay at the beginning of log buf
838 		 */
839 		if (cur[0] == '\n')
840 			cur++;
841 
842 		if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
843 			continue;
844 		if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
845 				&s->stats[TOTAL_INSNS],
846 				&s->stats[MAX_STATES_PER_INSN],
847 				&s->stats[TOTAL_STATES],
848 				&s->stats[PEAK_STATES],
849 				&s->stats[MARK_READ_MAX_LEN]))
850 			continue;
851 	}
852 
853 	return 0;
854 }
855 
856 static int guess_prog_type_by_ctx_name(const char *ctx_name,
857 				       enum bpf_prog_type *prog_type,
858 				       enum bpf_attach_type *attach_type)
859 {
860 	/* We need to guess program type based on its declared context type.
861 	 * This guess can't be perfect as many different program types might
862 	 * share the same context type.  So we can only hope to reasonably
863 	 * well guess this and get lucky.
864 	 *
865 	 * Just in case, we support both UAPI-side type names and
866 	 * kernel-internal names.
867 	 */
868 	static struct {
869 		const char *uapi_name;
870 		const char *kern_name;
871 		enum bpf_prog_type prog_type;
872 		enum bpf_attach_type attach_type;
873 	} ctx_map[] = {
874 		/* __sk_buff is most ambiguous, we assume TC program */
875 		{ "__sk_buff", "sk_buff", BPF_PROG_TYPE_SCHED_CLS },
876 		{ "bpf_sock", "sock", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND },
877 		{ "bpf_sock_addr", "bpf_sock_addr_kern",  BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND },
878 		{ "bpf_sock_ops", "bpf_sock_ops_kern", BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS },
879 		{ "sk_msg_md", "sk_msg", BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT },
880 		{ "bpf_cgroup_dev_ctx", "bpf_cgroup_dev_ctx", BPF_PROG_TYPE_CGROUP_DEVICE, BPF_CGROUP_DEVICE },
881 		{ "bpf_sysctl", "bpf_sysctl_kern", BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL },
882 		{ "bpf_sockopt", "bpf_sockopt_kern", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT },
883 		{ "sk_reuseport_md", "sk_reuseport_kern", BPF_PROG_TYPE_SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE },
884 		{ "bpf_sk_lookup", "bpf_sk_lookup_kern", BPF_PROG_TYPE_SK_LOOKUP, BPF_SK_LOOKUP },
885 		{ "xdp_md", "xdp_buff", BPF_PROG_TYPE_XDP, BPF_XDP },
886 		/* tracing types with no expected attach type */
887 		{ "bpf_user_pt_regs_t", "pt_regs", BPF_PROG_TYPE_KPROBE },
888 		{ "bpf_perf_event_data", "bpf_perf_event_data_kern", BPF_PROG_TYPE_PERF_EVENT },
889 		/* raw_tp programs use u64[] from kernel side, we don't want
890 		 * to match on that, probably; so NULL for kern-side type
891 		 */
892 		{ "bpf_raw_tracepoint_args", NULL, BPF_PROG_TYPE_RAW_TRACEPOINT },
893 	};
894 	int i;
895 
896 	if (!ctx_name)
897 		return -EINVAL;
898 
899 	for (i = 0; i < ARRAY_SIZE(ctx_map); i++) {
900 		if (strcmp(ctx_map[i].uapi_name, ctx_name) == 0 ||
901 		    (ctx_map[i].kern_name && strcmp(ctx_map[i].kern_name, ctx_name) == 0)) {
902 			*prog_type = ctx_map[i].prog_type;
903 			*attach_type = ctx_map[i].attach_type;
904 			return 0;
905 		}
906 	}
907 
908 	return -ESRCH;
909 }
910 
911 static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename)
912 {
913 	struct bpf_map *map;
914 
915 	bpf_object__for_each_map(map, obj) {
916 		/* disable pinning */
917 		bpf_map__set_pin_path(map, NULL);
918 
919 		/* fix up map size, if necessary */
920 		switch (bpf_map__type(map)) {
921 		case BPF_MAP_TYPE_SK_STORAGE:
922 		case BPF_MAP_TYPE_TASK_STORAGE:
923 		case BPF_MAP_TYPE_INODE_STORAGE:
924 		case BPF_MAP_TYPE_CGROUP_STORAGE:
925 			break;
926 		default:
927 			if (bpf_map__max_entries(map) == 0)
928 				bpf_map__set_max_entries(map, 1);
929 		}
930 	}
931 
932 	/* SEC(freplace) programs can't be loaded with veristat as is,
933 	 * but we can try guessing their target program's expected type by
934 	 * looking at the type of program's first argument and substituting
935 	 * corresponding program type
936 	 */
937 	if (bpf_program__type(prog) == BPF_PROG_TYPE_EXT) {
938 		const struct btf *btf = bpf_object__btf(obj);
939 		const char *prog_name = bpf_program__name(prog);
940 		enum bpf_prog_type prog_type;
941 		enum bpf_attach_type attach_type;
942 		const struct btf_type *t;
943 		const char *ctx_name;
944 		int id;
945 
946 		if (!btf)
947 			goto skip_freplace_fixup;
948 
949 		id = btf__find_by_name_kind(btf, prog_name, BTF_KIND_FUNC);
950 		t = btf__type_by_id(btf, id);
951 		t = btf__type_by_id(btf, t->type);
952 		if (!btf_is_func_proto(t) || btf_vlen(t) != 1)
953 			goto skip_freplace_fixup;
954 
955 		/* context argument is a pointer to a struct/typedef */
956 		t = btf__type_by_id(btf, btf_params(t)[0].type);
957 		while (t && btf_is_mod(t))
958 			t = btf__type_by_id(btf, t->type);
959 		if (!t || !btf_is_ptr(t))
960 			goto skip_freplace_fixup;
961 		t = btf__type_by_id(btf, t->type);
962 		while (t && btf_is_mod(t))
963 			t = btf__type_by_id(btf, t->type);
964 		if (!t)
965 			goto skip_freplace_fixup;
966 
967 		ctx_name = btf__name_by_offset(btf, t->name_off);
968 
969 		if (guess_prog_type_by_ctx_name(ctx_name, &prog_type, &attach_type) == 0) {
970 			bpf_program__set_type(prog, prog_type);
971 			bpf_program__set_expected_attach_type(prog, attach_type);
972 
973 			if (!env.quiet) {
974 				printf("Using guessed program type '%s' for %s/%s...\n",
975 					libbpf_bpf_prog_type_str(prog_type),
976 					filename, prog_name);
977 			}
978 		} else {
979 			if (!env.quiet) {
980 				printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
981 					ctx_name, filename, prog_name);
982 			}
983 		}
984 	}
985 skip_freplace_fixup:
986 	return;
987 }
988 
989 static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
990 {
991 	const char *prog_name = bpf_program__name(prog);
992 	const char *base_filename = basename(filename);
993 	char *buf;
994 	int buf_sz, log_level;
995 	struct verif_stats *stats;
996 	int err = 0;
997 	void *tmp;
998 
999 	if (!should_process_file_prog(base_filename, bpf_program__name(prog))) {
1000 		env.progs_skipped++;
1001 		return 0;
1002 	}
1003 
1004 	tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
1005 	if (!tmp)
1006 		return -ENOMEM;
1007 	env.prog_stats = tmp;
1008 	stats = &env.prog_stats[env.prog_stat_cnt++];
1009 	memset(stats, 0, sizeof(*stats));
1010 
1011 	if (env.verbose) {
1012 		buf_sz = env.log_size ? env.log_size : 16 * 1024 * 1024;
1013 		buf = malloc(buf_sz);
1014 		if (!buf)
1015 			return -ENOMEM;
1016 		/* ensure we always request stats */
1017 		log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0);
1018 	} else {
1019 		buf = verif_log_buf;
1020 		buf_sz = sizeof(verif_log_buf);
1021 		/* request only verifier stats */
1022 		log_level = 4 | (env.log_fixed ? 8 : 0);
1023 	}
1024 	verif_log_buf[0] = '\0';
1025 
1026 	bpf_program__set_log_buf(prog, buf, buf_sz);
1027 	bpf_program__set_log_level(prog, log_level);
1028 
1029 	/* increase chances of successful BPF object loading */
1030 	fixup_obj(obj, prog, base_filename);
1031 
1032 	if (env.force_checkpoints)
1033 		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
1034 	if (env.force_reg_invariants)
1035 		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
1036 
1037 	err = bpf_object__load(obj);
1038 	env.progs_processed++;
1039 
1040 	stats->file_name = strdup(base_filename);
1041 	stats->prog_name = strdup(bpf_program__name(prog));
1042 	stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
1043 	parse_verif_log(buf, buf_sz, stats);
1044 
1045 	if (env.verbose) {
1046 		printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
1047 		       filename, prog_name, stats->stats[DURATION],
1048 		       err ? "failure" : "success", buf);
1049 	}
1050 
1051 	if (verif_log_buf != buf)
1052 		free(buf);
1053 
1054 	return 0;
1055 };
1056 
1057 static int process_obj(const char *filename)
1058 {
1059 	struct bpf_object *obj = NULL, *tobj;
1060 	struct bpf_program *prog, *tprog, *lprog;
1061 	libbpf_print_fn_t old_libbpf_print_fn;
1062 	LIBBPF_OPTS(bpf_object_open_opts, opts);
1063 	int err = 0, prog_cnt = 0;
1064 
1065 	if (!should_process_file_prog(basename(filename), NULL)) {
1066 		if (env.verbose)
1067 			printf("Skipping '%s' due to filters...\n", filename);
1068 		env.files_skipped++;
1069 		return 0;
1070 	}
1071 	if (!is_bpf_obj_file(filename)) {
1072 		if (env.verbose)
1073 			printf("Skipping '%s' as it's not a BPF object file...\n", filename);
1074 		env.files_skipped++;
1075 		return 0;
1076 	}
1077 
1078 	if (!env.quiet && env.out_fmt == RESFMT_TABLE)
1079 		printf("Processing '%s'...\n", basename(filename));
1080 
1081 	old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
1082 	obj = bpf_object__open_file(filename, &opts);
1083 	if (!obj) {
1084 		/* if libbpf can't open BPF object file, it could be because
1085 		 * that BPF object file is incomplete and has to be statically
1086 		 * linked into a final BPF object file; instead of bailing
1087 		 * out, report it into stderr, mark it as skipped, and
1088 		 * proceed
1089 		 */
1090 		fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
1091 		env.files_skipped++;
1092 		err = 0;
1093 		goto cleanup;
1094 	}
1095 
1096 	env.files_processed++;
1097 
1098 	bpf_object__for_each_program(prog, obj) {
1099 		prog_cnt++;
1100 	}
1101 
1102 	if (prog_cnt == 1) {
1103 		prog = bpf_object__next_program(obj, NULL);
1104 		bpf_program__set_autoload(prog, true);
1105 		process_prog(filename, obj, prog);
1106 		goto cleanup;
1107 	}
1108 
1109 	bpf_object__for_each_program(prog, obj) {
1110 		const char *prog_name = bpf_program__name(prog);
1111 
1112 		tobj = bpf_object__open_file(filename, &opts);
1113 		if (!tobj) {
1114 			err = -errno;
1115 			fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1116 			goto cleanup;
1117 		}
1118 
1119 		lprog = NULL;
1120 		bpf_object__for_each_program(tprog, tobj) {
1121 			const char *tprog_name = bpf_program__name(tprog);
1122 
1123 			if (strcmp(prog_name, tprog_name) == 0) {
1124 				bpf_program__set_autoload(tprog, true);
1125 				lprog = tprog;
1126 			} else {
1127 				bpf_program__set_autoload(tprog, false);
1128 			}
1129 		}
1130 
1131 		process_prog(filename, tobj, lprog);
1132 		bpf_object__close(tobj);
1133 	}
1134 
1135 cleanup:
1136 	bpf_object__close(obj);
1137 	libbpf_set_print(old_libbpf_print_fn);
1138 	return err;
1139 }
1140 
1141 static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
1142 		    enum stat_id id, bool asc, bool abs)
1143 {
1144 	int cmp = 0;
1145 
1146 	switch (id) {
1147 	case FILE_NAME:
1148 		cmp = strcmp(s1->file_name, s2->file_name);
1149 		break;
1150 	case PROG_NAME:
1151 		cmp = strcmp(s1->prog_name, s2->prog_name);
1152 		break;
1153 	case VERDICT:
1154 	case DURATION:
1155 	case TOTAL_INSNS:
1156 	case TOTAL_STATES:
1157 	case PEAK_STATES:
1158 	case MAX_STATES_PER_INSN:
1159 	case MARK_READ_MAX_LEN: {
1160 		long v1 = s1->stats[id];
1161 		long v2 = s2->stats[id];
1162 
1163 		if (abs) {
1164 			v1 = v1 < 0 ? -v1 : v1;
1165 			v2 = v2 < 0 ? -v2 : v2;
1166 		}
1167 
1168 		if (v1 != v2)
1169 			cmp = v1 < v2 ? -1 : 1;
1170 		break;
1171 	}
1172 	default:
1173 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1174 		exit(1);
1175 	}
1176 
1177 	return asc ? cmp : -cmp;
1178 }
1179 
1180 static int cmp_prog_stats(const void *v1, const void *v2)
1181 {
1182 	const struct verif_stats *s1 = v1, *s2 = v2;
1183 	int i, cmp;
1184 
1185 	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1186 		cmp = cmp_stat(s1, s2, env.sort_spec.ids[i],
1187 			       env.sort_spec.asc[i], env.sort_spec.abs[i]);
1188 		if (cmp != 0)
1189 			return cmp;
1190 	}
1191 
1192 	/* always disambiguate with file+prog, which are unique */
1193 	cmp = strcmp(s1->file_name, s2->file_name);
1194 	if (cmp != 0)
1195 		return cmp;
1196 	return strcmp(s1->prog_name, s2->prog_name);
1197 }
1198 
1199 static void fetch_join_stat_value(const struct verif_stats_join *s,
1200 				  enum stat_id id, enum stat_variant var,
1201 				  const char **str_val,
1202 				  double *num_val)
1203 {
1204 	long v1, v2;
1205 
1206 	if (id == FILE_NAME) {
1207 		*str_val = s->file_name;
1208 		return;
1209 	}
1210 	if (id == PROG_NAME) {
1211 		*str_val = s->prog_name;
1212 		return;
1213 	}
1214 
1215 	v1 = s->stats_a ? s->stats_a->stats[id] : 0;
1216 	v2 = s->stats_b ? s->stats_b->stats[id] : 0;
1217 
1218 	switch (var) {
1219 	case VARIANT_A:
1220 		if (!s->stats_a)
1221 			*num_val = -DBL_MAX;
1222 		else
1223 			*num_val = s->stats_a->stats[id];
1224 		return;
1225 	case VARIANT_B:
1226 		if (!s->stats_b)
1227 			*num_val = -DBL_MAX;
1228 		else
1229 			*num_val = s->stats_b->stats[id];
1230 		return;
1231 	case VARIANT_DIFF:
1232 		if (!s->stats_a || !s->stats_b)
1233 			*num_val = -DBL_MAX;
1234 		else if (id == VERDICT)
1235 			*num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */;
1236 		else
1237 			*num_val = (double)(v2 - v1);
1238 		return;
1239 	case VARIANT_PCT:
1240 		if (!s->stats_a || !s->stats_b) {
1241 			*num_val = -DBL_MAX;
1242 		} else if (v1 == 0) {
1243 			if (v1 == v2)
1244 				*num_val = 0.0;
1245 			else
1246 				*num_val = v2 < v1 ? -100.0 : 100.0;
1247 		} else {
1248 			 *num_val = (v2 - v1) * 100.0 / v1;
1249 		}
1250 		return;
1251 	}
1252 }
1253 
1254 static int cmp_join_stat(const struct verif_stats_join *s1,
1255 			 const struct verif_stats_join *s2,
1256 			 enum stat_id id, enum stat_variant var,
1257 			 bool asc, bool abs)
1258 {
1259 	const char *str1 = NULL, *str2 = NULL;
1260 	double v1 = 0.0, v2 = 0.0;
1261 	int cmp = 0;
1262 
1263 	fetch_join_stat_value(s1, id, var, &str1, &v1);
1264 	fetch_join_stat_value(s2, id, var, &str2, &v2);
1265 
1266 	if (abs) {
1267 		v1 = fabs(v1);
1268 		v2 = fabs(v2);
1269 	}
1270 
1271 	if (str1)
1272 		cmp = strcmp(str1, str2);
1273 	else if (v1 != v2)
1274 		cmp = v1 < v2 ? -1 : 1;
1275 
1276 	return asc ? cmp : -cmp;
1277 }
1278 
1279 static int cmp_join_stats(const void *v1, const void *v2)
1280 {
1281 	const struct verif_stats_join *s1 = v1, *s2 = v2;
1282 	int i, cmp;
1283 
1284 	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1285 		cmp = cmp_join_stat(s1, s2,
1286 				    env.sort_spec.ids[i],
1287 				    env.sort_spec.variants[i],
1288 				    env.sort_spec.asc[i],
1289 				    env.sort_spec.abs[i]);
1290 		if (cmp != 0)
1291 			return cmp;
1292 	}
1293 
1294 	/* always disambiguate with file+prog, which are unique */
1295 	cmp = strcmp(s1->file_name, s2->file_name);
1296 	if (cmp != 0)
1297 		return cmp;
1298 	return strcmp(s1->prog_name, s2->prog_name);
1299 }
1300 
1301 #define HEADER_CHAR '-'
1302 #define COLUMN_SEP "  "
1303 
1304 static void output_header_underlines(void)
1305 {
1306 	int i, j, len;
1307 
1308 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1309 		len = env.output_spec.lens[i];
1310 
1311 		printf("%s", i == 0 ? "" : COLUMN_SEP);
1312 		for (j = 0; j < len; j++)
1313 			printf("%c", HEADER_CHAR);
1314 	}
1315 	printf("\n");
1316 }
1317 
1318 static void output_headers(enum resfmt fmt)
1319 {
1320 	const char *fmt_str;
1321 	int i, len;
1322 
1323 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1324 		int id = env.output_spec.ids[i];
1325 		int *max_len = &env.output_spec.lens[i];
1326 
1327 		switch (fmt) {
1328 		case RESFMT_TABLE_CALCLEN:
1329 			len = snprintf(NULL, 0, "%s", stat_defs[id].header);
1330 			if (len > *max_len)
1331 				*max_len = len;
1332 			break;
1333 		case RESFMT_TABLE:
1334 			fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s";
1335 			printf(fmt_str, i == 0 ? "" : COLUMN_SEP,  *max_len, stat_defs[id].header);
1336 			if (i == env.output_spec.spec_cnt - 1)
1337 				printf("\n");
1338 			break;
1339 		case RESFMT_CSV:
1340 			printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]);
1341 			if (i == env.output_spec.spec_cnt - 1)
1342 				printf("\n");
1343 			break;
1344 		}
1345 	}
1346 
1347 	if (fmt == RESFMT_TABLE)
1348 		output_header_underlines();
1349 }
1350 
1351 static void prepare_value(const struct verif_stats *s, enum stat_id id,
1352 			  const char **str, long *val)
1353 {
1354 	switch (id) {
1355 	case FILE_NAME:
1356 		*str = s ? s->file_name : "N/A";
1357 		break;
1358 	case PROG_NAME:
1359 		*str = s ? s->prog_name : "N/A";
1360 		break;
1361 	case VERDICT:
1362 		if (!s)
1363 			*str = "N/A";
1364 		else
1365 			*str = s->stats[VERDICT] ? "success" : "failure";
1366 		break;
1367 	case DURATION:
1368 	case TOTAL_INSNS:
1369 	case TOTAL_STATES:
1370 	case PEAK_STATES:
1371 	case MAX_STATES_PER_INSN:
1372 	case MARK_READ_MAX_LEN:
1373 		*val = s ? s->stats[id] : 0;
1374 		break;
1375 	default:
1376 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1377 		exit(1);
1378 	}
1379 }
1380 
1381 static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
1382 {
1383 	int i;
1384 
1385 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1386 		int id = env.output_spec.ids[i];
1387 		int *max_len = &env.output_spec.lens[i], len;
1388 		const char *str = NULL;
1389 		long val = 0;
1390 
1391 		prepare_value(s, id, &str, &val);
1392 
1393 		switch (fmt) {
1394 		case RESFMT_TABLE_CALCLEN:
1395 			if (str)
1396 				len = snprintf(NULL, 0, "%s", str);
1397 			else
1398 				len = snprintf(NULL, 0, "%ld", val);
1399 			if (len > *max_len)
1400 				*max_len = len;
1401 			break;
1402 		case RESFMT_TABLE:
1403 			if (str)
1404 				printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
1405 			else
1406 				printf("%s%*ld", i == 0 ? "" : COLUMN_SEP,  *max_len, val);
1407 			if (i == env.output_spec.spec_cnt - 1)
1408 				printf("\n");
1409 			break;
1410 		case RESFMT_CSV:
1411 			if (str)
1412 				printf("%s%s", i == 0 ? "" : ",", str);
1413 			else
1414 				printf("%s%ld", i == 0 ? "" : ",", val);
1415 			if (i == env.output_spec.spec_cnt - 1)
1416 				printf("\n");
1417 			break;
1418 		}
1419 	}
1420 
1421 	if (last && fmt == RESFMT_TABLE) {
1422 		output_header_underlines();
1423 		printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
1424 		       env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
1425 	}
1426 }
1427 
1428 static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
1429 {
1430 	switch (id) {
1431 	case FILE_NAME:
1432 		st->file_name = strdup(str);
1433 		if (!st->file_name)
1434 			return -ENOMEM;
1435 		break;
1436 	case PROG_NAME:
1437 		st->prog_name = strdup(str);
1438 		if (!st->prog_name)
1439 			return -ENOMEM;
1440 		break;
1441 	case VERDICT:
1442 		if (strcmp(str, "success") == 0) {
1443 			st->stats[VERDICT] = true;
1444 		} else if (strcmp(str, "failure") == 0) {
1445 			st->stats[VERDICT] = false;
1446 		} else {
1447 			fprintf(stderr, "Unrecognized verification verdict '%s'\n", str);
1448 			return -EINVAL;
1449 		}
1450 		break;
1451 	case DURATION:
1452 	case TOTAL_INSNS:
1453 	case TOTAL_STATES:
1454 	case PEAK_STATES:
1455 	case MAX_STATES_PER_INSN:
1456 	case MARK_READ_MAX_LEN: {
1457 		long val;
1458 		int err, n;
1459 
1460 		if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) {
1461 			err = -errno;
1462 			fprintf(stderr, "Failed to parse '%s' as integer\n", str);
1463 			return err;
1464 		}
1465 
1466 		st->stats[id] = val;
1467 		break;
1468 	}
1469 	default:
1470 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1471 		return -EINVAL;
1472 	}
1473 	return 0;
1474 }
1475 
1476 static int parse_stats_csv(const char *filename, struct stat_specs *specs,
1477 			   struct verif_stats **statsp, int *stat_cntp)
1478 {
1479 	char line[4096];
1480 	FILE *f;
1481 	int err = 0;
1482 	bool header = true;
1483 
1484 	f = fopen(filename, "r");
1485 	if (!f) {
1486 		err = -errno;
1487 		fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1488 		return err;
1489 	}
1490 
1491 	*stat_cntp = 0;
1492 
1493 	while (fgets(line, sizeof(line), f)) {
1494 		char *input = line, *state = NULL, *next;
1495 		struct verif_stats *st = NULL;
1496 		int col = 0;
1497 
1498 		if (!header) {
1499 			void *tmp;
1500 
1501 			tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp));
1502 			if (!tmp) {
1503 				err = -ENOMEM;
1504 				goto cleanup;
1505 			}
1506 			*statsp = tmp;
1507 
1508 			st = &(*statsp)[*stat_cntp];
1509 			memset(st, 0, sizeof(*st));
1510 
1511 			*stat_cntp += 1;
1512 		}
1513 
1514 		while ((next = strtok_r(state ? NULL : input, ",\n", &state))) {
1515 			if (header) {
1516 				/* for the first line, set up spec stats */
1517 				err = parse_stat(next, specs);
1518 				if (err)
1519 					goto cleanup;
1520 				continue;
1521 			}
1522 
1523 			/* for all other lines, parse values based on spec */
1524 			if (col >= specs->spec_cnt) {
1525 				fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n",
1526 					col, *stat_cntp, filename);
1527 				err = -EINVAL;
1528 				goto cleanup;
1529 			}
1530 			err = parse_stat_value(next, specs->ids[col], st);
1531 			if (err)
1532 				goto cleanup;
1533 			col++;
1534 		}
1535 
1536 		if (header) {
1537 			header = false;
1538 			continue;
1539 		}
1540 
1541 		if (col < specs->spec_cnt) {
1542 			fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
1543 				*stat_cntp, filename);
1544 			err = -EINVAL;
1545 			goto cleanup;
1546 		}
1547 
1548 		if (!st->file_name || !st->prog_name) {
1549 			fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n",
1550 				*stat_cntp, filename);
1551 			err = -EINVAL;
1552 			goto cleanup;
1553 		}
1554 
1555 		/* in comparison mode we can only check filters after we
1556 		 * parsed entire line; if row should be ignored we pretend we
1557 		 * never parsed it
1558 		 */
1559 		if (!should_process_file_prog(st->file_name, st->prog_name)) {
1560 			free(st->file_name);
1561 			free(st->prog_name);
1562 			*stat_cntp -= 1;
1563 		}
1564 	}
1565 
1566 	if (!feof(f)) {
1567 		err = -errno;
1568 		fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err);
1569 	}
1570 
1571 cleanup:
1572 	fclose(f);
1573 	return err;
1574 }
1575 
1576 /* empty/zero stats for mismatched rows */
1577 static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" };
1578 
1579 static bool is_key_stat(enum stat_id id)
1580 {
1581 	return id == FILE_NAME || id == PROG_NAME;
1582 }
1583 
1584 static void output_comp_header_underlines(void)
1585 {
1586 	int i, j, k;
1587 
1588 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1589 		int id = env.output_spec.ids[i];
1590 		int max_j = is_key_stat(id) ? 1 : 3;
1591 
1592 		for (j = 0; j < max_j; j++) {
1593 			int len = env.output_spec.lens[3 * i + j];
1594 
1595 			printf("%s", i + j == 0 ? "" : COLUMN_SEP);
1596 
1597 			for (k = 0; k < len; k++)
1598 				printf("%c", HEADER_CHAR);
1599 		}
1600 	}
1601 	printf("\n");
1602 }
1603 
1604 static void output_comp_headers(enum resfmt fmt)
1605 {
1606 	static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"};
1607 	static const char *name_sfxs[3] = {"_base", "_comp", "_diff"};
1608 	int i, j, len;
1609 
1610 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1611 		int id = env.output_spec.ids[i];
1612 		/* key stats don't have A/B/DIFF columns, they are common for both data sets */
1613 		int max_j = is_key_stat(id) ? 1 : 3;
1614 
1615 		for (j = 0; j < max_j; j++) {
1616 			int *max_len = &env.output_spec.lens[3 * i + j];
1617 			bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1);
1618 			const char *sfx;
1619 
1620 			switch (fmt) {
1621 			case RESFMT_TABLE_CALCLEN:
1622 				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1623 				len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx);
1624 				if (len > *max_len)
1625 					*max_len = len;
1626 				break;
1627 			case RESFMT_TABLE:
1628 				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1629 				printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP,
1630 				       *max_len - (int)strlen(sfx), stat_defs[id].header, sfx);
1631 				if (last)
1632 					printf("\n");
1633 				break;
1634 			case RESFMT_CSV:
1635 				sfx = is_key_stat(id) ? "" : name_sfxs[j];
1636 				printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx);
1637 				if (last)
1638 					printf("\n");
1639 				break;
1640 			}
1641 		}
1642 	}
1643 
1644 	if (fmt == RESFMT_TABLE)
1645 		output_comp_header_underlines();
1646 }
1647 
1648 static void output_comp_stats(const struct verif_stats_join *join_stats,
1649 			      enum resfmt fmt, bool last)
1650 {
1651 	const struct verif_stats *base = join_stats->stats_a;
1652 	const struct verif_stats *comp = join_stats->stats_b;
1653 	char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
1654 	int i;
1655 
1656 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1657 		int id = env.output_spec.ids[i], len;
1658 		int *max_len_base = &env.output_spec.lens[3 * i + 0];
1659 		int *max_len_comp = &env.output_spec.lens[3 * i + 1];
1660 		int *max_len_diff = &env.output_spec.lens[3 * i + 2];
1661 		const char *base_str = NULL, *comp_str = NULL;
1662 		long base_val = 0, comp_val = 0, diff_val = 0;
1663 
1664 		prepare_value(base, id, &base_str, &base_val);
1665 		prepare_value(comp, id, &comp_str, &comp_val);
1666 
1667 		/* normalize all the outputs to be in string buffers for simplicity */
1668 		if (is_key_stat(id)) {
1669 			/* key stats (file and program name) are always strings */
1670 			if (base)
1671 				snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1672 			else
1673 				snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
1674 		} else if (base_str) {
1675 			snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1676 			snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
1677 			if (!base || !comp)
1678 				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1679 			else if (strcmp(base_str, comp_str) == 0)
1680 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
1681 			else
1682 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
1683 		} else {
1684 			double p = 0.0;
1685 
1686 			if (base)
1687 				snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
1688 			else
1689 				snprintf(base_buf, sizeof(base_buf), "%s", "N/A");
1690 			if (comp)
1691 				snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
1692 			else
1693 				snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A");
1694 
1695 			diff_val = comp_val - base_val;
1696 			if (!base || !comp) {
1697 				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1698 			} else {
1699 				if (base_val == 0) {
1700 					if (comp_val == base_val)
1701 						p = 0.0; /* avoid +0 (+100%) case */
1702 					else
1703 						p = comp_val < base_val ? -100.0 : 100.0;
1704 				} else {
1705 					 p = diff_val * 100.0 / base_val;
1706 				}
1707 				snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
1708 			}
1709 		}
1710 
1711 		switch (fmt) {
1712 		case RESFMT_TABLE_CALCLEN:
1713 			len = strlen(base_buf);
1714 			if (len > *max_len_base)
1715 				*max_len_base = len;
1716 			if (!is_key_stat(id)) {
1717 				len = strlen(comp_buf);
1718 				if (len > *max_len_comp)
1719 					*max_len_comp = len;
1720 				len = strlen(diff_buf);
1721 				if (len > *max_len_diff)
1722 					*max_len_diff = len;
1723 			}
1724 			break;
1725 		case RESFMT_TABLE: {
1726 			/* string outputs are left-aligned, number outputs are right-aligned */
1727 			const char *fmt = base_str ? "%s%-*s" : "%s%*s";
1728 
1729 			printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf);
1730 			if (!is_key_stat(id)) {
1731 				printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf);
1732 				printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf);
1733 			}
1734 			if (i == env.output_spec.spec_cnt - 1)
1735 				printf("\n");
1736 			break;
1737 		}
1738 		case RESFMT_CSV:
1739 			printf("%s%s", i == 0 ? "" : ",", base_buf);
1740 			if (!is_key_stat(id)) {
1741 				printf("%s%s", i == 0 ? "" : ",", comp_buf);
1742 				printf("%s%s", i == 0 ? "" : ",", diff_buf);
1743 			}
1744 			if (i == env.output_spec.spec_cnt - 1)
1745 				printf("\n");
1746 			break;
1747 		}
1748 	}
1749 
1750 	if (last && fmt == RESFMT_TABLE)
1751 		output_comp_header_underlines();
1752 }
1753 
1754 static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp)
1755 {
1756 	int r;
1757 
1758 	r = strcmp(base->file_name, comp->file_name);
1759 	if (r != 0)
1760 		return r;
1761 	return strcmp(base->prog_name, comp->prog_name);
1762 }
1763 
1764 static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats)
1765 {
1766 	static const double eps = 1e-9;
1767 	const char *str = NULL;
1768 	double value = 0.0;
1769 
1770 	fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
1771 
1772 	if (f->abs)
1773 		value = fabs(value);
1774 
1775 	switch (f->op) {
1776 	case OP_EQ: return value > f->value - eps && value < f->value + eps;
1777 	case OP_NEQ: return value < f->value - eps || value > f->value + eps;
1778 	case OP_LT: return value < f->value - eps;
1779 	case OP_LE: return value <= f->value + eps;
1780 	case OP_GT: return value > f->value + eps;
1781 	case OP_GE: return value >= f->value - eps;
1782 	}
1783 
1784 	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
1785 	return false;
1786 }
1787 
1788 static bool should_output_join_stats(const struct verif_stats_join *stats)
1789 {
1790 	struct filter *f;
1791 	int i, allow_cnt = 0;
1792 
1793 	for (i = 0; i < env.deny_filter_cnt; i++) {
1794 		f = &env.deny_filters[i];
1795 		if (f->kind != FILTER_STAT)
1796 			continue;
1797 
1798 		if (is_join_stat_filter_matched(f, stats))
1799 			return false;
1800 	}
1801 
1802 	for (i = 0; i < env.allow_filter_cnt; i++) {
1803 		f = &env.allow_filters[i];
1804 		if (f->kind != FILTER_STAT)
1805 			continue;
1806 		allow_cnt++;
1807 
1808 		if (is_join_stat_filter_matched(f, stats))
1809 			return true;
1810 	}
1811 
1812 	/* if there are no stat allowed filters, pass everything through */
1813 	return allow_cnt == 0;
1814 }
1815 
1816 static int handle_comparison_mode(void)
1817 {
1818 	struct stat_specs base_specs = {}, comp_specs = {};
1819 	struct stat_specs tmp_sort_spec;
1820 	enum resfmt cur_fmt;
1821 	int err, i, j, last_idx, cnt;
1822 
1823 	if (env.filename_cnt != 2) {
1824 		fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
1825 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1826 		return -EINVAL;
1827 	}
1828 
1829 	err = parse_stats_csv(env.filenames[0], &base_specs,
1830 			      &env.baseline_stats, &env.baseline_stat_cnt);
1831 	if (err) {
1832 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
1833 		return err;
1834 	}
1835 	err = parse_stats_csv(env.filenames[1], &comp_specs,
1836 			      &env.prog_stats, &env.prog_stat_cnt);
1837 	if (err) {
1838 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err);
1839 		return err;
1840 	}
1841 
1842 	/* To keep it simple we validate that the set and order of stats in
1843 	 * both CSVs are exactly the same. This can be lifted with a bit more
1844 	 * pre-processing later.
1845 	 */
1846 	if (base_specs.spec_cnt != comp_specs.spec_cnt) {
1847 		fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n",
1848 			env.filenames[0], env.filenames[1],
1849 			base_specs.spec_cnt, comp_specs.spec_cnt);
1850 		return -EINVAL;
1851 	}
1852 	for (i = 0; i < base_specs.spec_cnt; i++) {
1853 		if (base_specs.ids[i] != comp_specs.ids[i]) {
1854 			fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n",
1855 				env.filenames[0], env.filenames[1],
1856 				stat_defs[base_specs.ids[i]].names[0],
1857 				stat_defs[comp_specs.ids[i]].names[0]);
1858 			return -EINVAL;
1859 		}
1860 	}
1861 
1862 	/* Replace user-specified sorting spec with file+prog sorting rule to
1863 	 * be able to join two datasets correctly. Once we are done, we will
1864 	 * restore the original sort spec.
1865 	 */
1866 	tmp_sort_spec = env.sort_spec;
1867 	env.sort_spec = join_sort_spec;
1868 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
1869 	qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
1870 	env.sort_spec = tmp_sort_spec;
1871 
1872 	/* Join two datasets together. If baseline and comparison datasets
1873 	 * have different subset of rows (we match by 'object + prog' as
1874 	 * a unique key) then assume empty/missing/zero value for rows that
1875 	 * are missing in the opposite data set.
1876 	 */
1877 	i = j = 0;
1878 	while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
1879 		const struct verif_stats *base, *comp;
1880 		struct verif_stats_join *join;
1881 		void *tmp;
1882 		int r;
1883 
1884 		base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
1885 		comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats;
1886 
1887 		if (!base->file_name || !base->prog_name) {
1888 			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
1889 				i, env.filenames[0]);
1890 			return -EINVAL;
1891 		}
1892 		if (!comp->file_name || !comp->prog_name) {
1893 			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
1894 				j, env.filenames[1]);
1895 			return -EINVAL;
1896 		}
1897 
1898 		tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats));
1899 		if (!tmp)
1900 			return -ENOMEM;
1901 		env.join_stats = tmp;
1902 
1903 		join = &env.join_stats[env.join_stat_cnt];
1904 		memset(join, 0, sizeof(*join));
1905 
1906 		r = cmp_stats_key(base, comp);
1907 		if (r == 0) {
1908 			join->file_name = base->file_name;
1909 			join->prog_name = base->prog_name;
1910 			join->stats_a = base;
1911 			join->stats_b = comp;
1912 			i++;
1913 			j++;
1914 		} else if (base != &fallback_stats && (comp == &fallback_stats || r < 0)) {
1915 			join->file_name = base->file_name;
1916 			join->prog_name = base->prog_name;
1917 			join->stats_a = base;
1918 			join->stats_b = NULL;
1919 			i++;
1920 		} else if (comp != &fallback_stats && (base == &fallback_stats || r > 0)) {
1921 			join->file_name = comp->file_name;
1922 			join->prog_name = comp->prog_name;
1923 			join->stats_a = NULL;
1924 			join->stats_b = comp;
1925 			j++;
1926 		} else {
1927 			fprintf(stderr, "%s:%d: should never reach here i=%i, j=%i",
1928 				__FILE__, __LINE__, i, j);
1929 			return -EINVAL;
1930 		}
1931 		env.join_stat_cnt += 1;
1932 	}
1933 
1934 	/* now sort joined results according to sort spec */
1935 	qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
1936 
1937 	/* for human-readable table output we need to do extra pass to
1938 	 * calculate column widths, so we substitute current output format
1939 	 * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
1940 	 * and do everything again.
1941 	 */
1942 	if (env.out_fmt == RESFMT_TABLE)
1943 		cur_fmt = RESFMT_TABLE_CALCLEN;
1944 	else
1945 		cur_fmt = env.out_fmt;
1946 
1947 one_more_time:
1948 	output_comp_headers(cur_fmt);
1949 
1950 	last_idx = -1;
1951 	cnt = 0;
1952 	for (i = 0; i < env.join_stat_cnt; i++) {
1953 		const struct verif_stats_join *join = &env.join_stats[i];
1954 
1955 		if (!should_output_join_stats(join))
1956 			continue;
1957 
1958 		if (env.top_n && cnt >= env.top_n)
1959 			break;
1960 
1961 		if (cur_fmt == RESFMT_TABLE_CALCLEN)
1962 			last_idx = i;
1963 
1964 		output_comp_stats(join, cur_fmt, i == last_idx);
1965 
1966 		cnt++;
1967 	}
1968 
1969 	if (cur_fmt == RESFMT_TABLE_CALCLEN) {
1970 		cur_fmt = RESFMT_TABLE;
1971 		goto one_more_time; /* ... this time with feeling */
1972 	}
1973 
1974 	return 0;
1975 }
1976 
1977 static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats)
1978 {
1979 	long value = stats->stats[f->stat_id];
1980 
1981 	if (f->abs)
1982 		value = value < 0 ? -value : value;
1983 
1984 	switch (f->op) {
1985 	case OP_EQ: return value == f->value;
1986 	case OP_NEQ: return value != f->value;
1987 	case OP_LT: return value < f->value;
1988 	case OP_LE: return value <= f->value;
1989 	case OP_GT: return value > f->value;
1990 	case OP_GE: return value >= f->value;
1991 	}
1992 
1993 	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
1994 	return false;
1995 }
1996 
1997 static bool should_output_stats(const struct verif_stats *stats)
1998 {
1999 	struct filter *f;
2000 	int i, allow_cnt = 0;
2001 
2002 	for (i = 0; i < env.deny_filter_cnt; i++) {
2003 		f = &env.deny_filters[i];
2004 		if (f->kind != FILTER_STAT)
2005 			continue;
2006 
2007 		if (is_stat_filter_matched(f, stats))
2008 			return false;
2009 	}
2010 
2011 	for (i = 0; i < env.allow_filter_cnt; i++) {
2012 		f = &env.allow_filters[i];
2013 		if (f->kind != FILTER_STAT)
2014 			continue;
2015 		allow_cnt++;
2016 
2017 		if (is_stat_filter_matched(f, stats))
2018 			return true;
2019 	}
2020 
2021 	/* if there are no stat allowed filters, pass everything through */
2022 	return allow_cnt == 0;
2023 }
2024 
2025 static void output_prog_stats(void)
2026 {
2027 	const struct verif_stats *stats;
2028 	int i, last_stat_idx = 0, cnt = 0;
2029 
2030 	if (env.out_fmt == RESFMT_TABLE) {
2031 		/* calculate column widths */
2032 		output_headers(RESFMT_TABLE_CALCLEN);
2033 		for (i = 0; i < env.prog_stat_cnt; i++) {
2034 			stats = &env.prog_stats[i];
2035 			if (!should_output_stats(stats))
2036 				continue;
2037 			output_stats(stats, RESFMT_TABLE_CALCLEN, false);
2038 			last_stat_idx = i;
2039 		}
2040 	}
2041 
2042 	/* actually output the table */
2043 	output_headers(env.out_fmt);
2044 	for (i = 0; i < env.prog_stat_cnt; i++) {
2045 		stats = &env.prog_stats[i];
2046 		if (!should_output_stats(stats))
2047 			continue;
2048 		if (env.top_n && cnt >= env.top_n)
2049 			break;
2050 		output_stats(stats, env.out_fmt, i == last_stat_idx);
2051 		cnt++;
2052 	}
2053 }
2054 
2055 static int handle_verif_mode(void)
2056 {
2057 	int i, err;
2058 
2059 	if (env.filename_cnt == 0) {
2060 		fprintf(stderr, "Please provide path to BPF object file!\n\n");
2061 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2062 		return -EINVAL;
2063 	}
2064 
2065 	for (i = 0; i < env.filename_cnt; i++) {
2066 		err = process_obj(env.filenames[i]);
2067 		if (err) {
2068 			fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
2069 			return err;
2070 		}
2071 	}
2072 
2073 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2074 
2075 	output_prog_stats();
2076 
2077 	return 0;
2078 }
2079 
2080 static int handle_replay_mode(void)
2081 {
2082 	struct stat_specs specs = {};
2083 	int err;
2084 
2085 	if (env.filename_cnt != 1) {
2086 		fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n");
2087 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2088 		return -EINVAL;
2089 	}
2090 
2091 	err = parse_stats_csv(env.filenames[0], &specs,
2092 			      &env.prog_stats, &env.prog_stat_cnt);
2093 	if (err) {
2094 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
2095 		return err;
2096 	}
2097 
2098 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2099 
2100 	output_prog_stats();
2101 
2102 	return 0;
2103 }
2104 
2105 int main(int argc, char **argv)
2106 {
2107 	int err = 0, i;
2108 
2109 	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
2110 		return 1;
2111 
2112 	if (env.show_version) {
2113 		printf("%s\n", argp_program_version);
2114 		return 0;
2115 	}
2116 
2117 	if (env.verbose && env.quiet) {
2118 		fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
2119 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2120 		return 1;
2121 	}
2122 	if (env.verbose && env.log_level == 0)
2123 		env.log_level = 1;
2124 
2125 	if (env.output_spec.spec_cnt == 0) {
2126 		if (env.out_fmt == RESFMT_CSV)
2127 			env.output_spec = default_csv_output_spec;
2128 		else
2129 			env.output_spec = default_output_spec;
2130 	}
2131 	if (env.sort_spec.spec_cnt == 0)
2132 		env.sort_spec = default_sort_spec;
2133 
2134 	if (env.comparison_mode && env.replay_mode) {
2135 		fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n");
2136 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2137 		return 1;
2138 	}
2139 
2140 	if (env.comparison_mode)
2141 		err = handle_comparison_mode();
2142 	else if (env.replay_mode)
2143 		err = handle_replay_mode();
2144 	else
2145 		err = handle_verif_mode();
2146 
2147 	free_verif_stats(env.prog_stats, env.prog_stat_cnt);
2148 	free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
2149 	free(env.join_stats);
2150 	for (i = 0; i < env.filename_cnt; i++)
2151 		free(env.filenames[i]);
2152 	free(env.filenames);
2153 	for (i = 0; i < env.allow_filter_cnt; i++) {
2154 		free(env.allow_filters[i].any_glob);
2155 		free(env.allow_filters[i].file_glob);
2156 		free(env.allow_filters[i].prog_glob);
2157 	}
2158 	free(env.allow_filters);
2159 	for (i = 0; i < env.deny_filter_cnt; i++) {
2160 		free(env.deny_filters[i].any_glob);
2161 		free(env.deny_filters[i].file_glob);
2162 		free(env.deny_filters[i].prog_glob);
2163 	}
2164 	free(env.deny_filters);
2165 	return -err;
2166 }
2167