xref: /linux/tools/testing/selftests/bpf/veristat.c (revision 7f71507851fc7764b36a3221839607d3a45c2025)
1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
3 #define _GNU_SOURCE
4 #include <argp.h>
5 #include <libgen.h>
6 #include <string.h>
7 #include <stdlib.h>
8 #include <sched.h>
9 #include <pthread.h>
10 #include <dirent.h>
11 #include <signal.h>
12 #include <fcntl.h>
13 #include <unistd.h>
14 #include <sys/time.h>
15 #include <sys/sysinfo.h>
16 #include <sys/stat.h>
17 #include <bpf/libbpf.h>
18 #include <bpf/btf.h>
19 #include <bpf/bpf.h>
20 #include <libelf.h>
21 #include <gelf.h>
22 #include <float.h>
23 #include <math.h>
24 
25 #ifndef ARRAY_SIZE
26 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
27 #endif
28 
29 enum stat_id {
30 	VERDICT,
31 	DURATION,
32 	TOTAL_INSNS,
33 	TOTAL_STATES,
34 	PEAK_STATES,
35 	MAX_STATES_PER_INSN,
36 	MARK_READ_MAX_LEN,
37 
38 	FILE_NAME,
39 	PROG_NAME,
40 
41 	ALL_STATS_CNT,
42 	NUM_STATS_CNT = FILE_NAME - VERDICT,
43 };
44 
45 /* In comparison mode each stat can specify up to four different values:
46  *   - A side value;
47  *   - B side value;
48  *   - absolute diff value;
49  *   - relative (percentage) diff value.
50  *
51  * When specifying stat specs in comparison mode, user can use one of the
52  * following variant suffixes to specify which exact variant should be used for
53  * ordering or filtering:
54  *   - `_a` for A side value;
55  *   - `_b` for B side value;
56  *   - `_diff` for absolute diff value;
57  *   - `_pct` for relative (percentage) diff value.
58  *
59  * If no variant suffix is provided, then `_b` (control data) is assumed.
60  *
61  * As an example, let's say instructions stat has the following output:
62  *
63  * Insns (A)  Insns (B)  Insns   (DIFF)
64  * ---------  ---------  --------------
65  * 21547      20920       -627 (-2.91%)
66  *
67  * Then:
68  *   - 21547 is A side value (insns_a);
69  *   - 20920 is B side value (insns_b);
70  *   - -627 is absolute diff value (insns_diff);
71  *   - -2.91% is relative diff value (insns_pct).
72  *
73  * For verdict there is no verdict_pct variant.
74  * For file and program name, _a and _b variants are equivalent and there are
75  * no _diff or _pct variants.
76  */
77 enum stat_variant {
78 	VARIANT_A,
79 	VARIANT_B,
80 	VARIANT_DIFF,
81 	VARIANT_PCT,
82 };
83 
84 struct verif_stats {
85 	char *file_name;
86 	char *prog_name;
87 
88 	long stats[NUM_STATS_CNT];
89 };
90 
91 /* joined comparison mode stats */
92 struct verif_stats_join {
93 	char *file_name;
94 	char *prog_name;
95 
96 	const struct verif_stats *stats_a;
97 	const struct verif_stats *stats_b;
98 };
99 
100 struct stat_specs {
101 	int spec_cnt;
102 	enum stat_id ids[ALL_STATS_CNT];
103 	enum stat_variant variants[ALL_STATS_CNT];
104 	bool asc[ALL_STATS_CNT];
105 	bool abs[ALL_STATS_CNT];
106 	int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
107 };
108 
109 enum resfmt {
110 	RESFMT_TABLE,
111 	RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */
112 	RESFMT_CSV,
113 };
114 
115 enum filter_kind {
116 	FILTER_NAME,
117 	FILTER_STAT,
118 };
119 
120 enum operator_kind {
121 	OP_EQ,		/* == or = */
122 	OP_NEQ,		/* != or <> */
123 	OP_LT,		/* < */
124 	OP_LE,		/* <= */
125 	OP_GT,		/* > */
126 	OP_GE,		/* >= */
127 };
128 
129 struct filter {
130 	enum filter_kind kind;
131 	/* FILTER_NAME */
132 	char *any_glob;
133 	char *file_glob;
134 	char *prog_glob;
135 	/* FILTER_STAT */
136 	enum operator_kind op;
137 	int stat_id;
138 	enum stat_variant stat_var;
139 	long value;
140 	bool abs;
141 };
142 
143 static struct env {
144 	char **filenames;
145 	int filename_cnt;
146 	bool verbose;
147 	bool debug;
148 	bool quiet;
149 	bool force_checkpoints;
150 	bool force_reg_invariants;
151 	enum resfmt out_fmt;
152 	bool show_version;
153 	bool comparison_mode;
154 	bool replay_mode;
155 	int top_n;
156 
157 	int log_level;
158 	int log_size;
159 	bool log_fixed;
160 
161 	struct verif_stats *prog_stats;
162 	int prog_stat_cnt;
163 
164 	/* baseline_stats is allocated and used only in comparison mode */
165 	struct verif_stats *baseline_stats;
166 	int baseline_stat_cnt;
167 
168 	struct verif_stats_join *join_stats;
169 	int join_stat_cnt;
170 
171 	struct stat_specs output_spec;
172 	struct stat_specs sort_spec;
173 
174 	struct filter *allow_filters;
175 	struct filter *deny_filters;
176 	int allow_filter_cnt;
177 	int deny_filter_cnt;
178 
179 	int files_processed;
180 	int files_skipped;
181 	int progs_processed;
182 	int progs_skipped;
183 	int top_src_lines;
184 } env;
185 
186 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
187 {
188 	if (!env.verbose)
189 		return 0;
190 	if (level == LIBBPF_DEBUG  && !env.debug)
191 		return 0;
192 	return vfprintf(stderr, format, args);
193 }
194 
195 #ifndef VERISTAT_VERSION
196 #define VERISTAT_VERSION "<kernel>"
197 #endif
198 
199 const char *argp_program_version = "veristat v" VERISTAT_VERSION;
200 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
201 const char argp_program_doc[] =
202 "veristat    BPF verifier stats collection and comparison tool.\n"
203 "\n"
204 "USAGE: veristat <obj-file> [<obj-file>...]\n"
205 "   OR: veristat -C <baseline.csv> <comparison.csv>\n"
206 "   OR: veristat -R <results.csv>\n";
207 
208 enum {
209 	OPT_LOG_FIXED = 1000,
210 	OPT_LOG_SIZE = 1001,
211 };
212 
213 static const struct argp_option opts[] = {
214 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
215 	{ "version", 'V', NULL, 0, "Print version" },
216 	{ "verbose", 'v', NULL, 0, "Verbose mode" },
217 	{ "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" },
218 	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
219 	{ "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
220 	{ "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
221 	{ "top-n", 'n', "N", 0, "Emit only up to first N results." },
222 	{ "quiet", 'q', NULL, 0, "Quiet mode" },
223 	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
224 	{ "sort", 's', "SPEC", 0, "Specify sort order" },
225 	{ "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
226 	{ "compare", 'C', NULL, 0, "Comparison mode" },
227 	{ "replay", 'R', NULL, 0, "Replay mode" },
228 	{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
229 	{ "test-states", 't', NULL, 0,
230 	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
231 	{ "test-reg-invariants", 'r', NULL, 0,
232 	  "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
233 	{ "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" },
234 	{},
235 };
236 
237 static int parse_stats(const char *stats_str, struct stat_specs *specs);
238 static int append_filter(struct filter **filters, int *cnt, const char *str);
239 static int append_filter_file(const char *path);
240 
241 static error_t parse_arg(int key, char *arg, struct argp_state *state)
242 {
243 	void *tmp;
244 	int err;
245 
246 	switch (key) {
247 	case 'h':
248 		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
249 		break;
250 	case 'V':
251 		env.show_version = true;
252 		break;
253 	case 'v':
254 		env.verbose = true;
255 		break;
256 	case 'd':
257 		env.debug = true;
258 		env.verbose = true;
259 		break;
260 	case 'q':
261 		env.quiet = true;
262 		break;
263 	case 'e':
264 		err = parse_stats(arg, &env.output_spec);
265 		if (err)
266 			return err;
267 		break;
268 	case 's':
269 		err = parse_stats(arg, &env.sort_spec);
270 		if (err)
271 			return err;
272 		break;
273 	case 'o':
274 		if (strcmp(arg, "table") == 0) {
275 			env.out_fmt = RESFMT_TABLE;
276 		} else if (strcmp(arg, "csv") == 0) {
277 			env.out_fmt = RESFMT_CSV;
278 		} else {
279 			fprintf(stderr, "Unrecognized output format '%s'\n", arg);
280 			return -EINVAL;
281 		}
282 		break;
283 	case 'l':
284 		errno = 0;
285 		env.log_level = strtol(arg, NULL, 10);
286 		if (errno) {
287 			fprintf(stderr, "invalid log level: %s\n", arg);
288 			argp_usage(state);
289 		}
290 		break;
291 	case OPT_LOG_FIXED:
292 		env.log_fixed = true;
293 		break;
294 	case OPT_LOG_SIZE:
295 		errno = 0;
296 		env.log_size = strtol(arg, NULL, 10);
297 		if (errno) {
298 			fprintf(stderr, "invalid log size: %s\n", arg);
299 			argp_usage(state);
300 		}
301 		break;
302 	case 't':
303 		env.force_checkpoints = true;
304 		break;
305 	case 'r':
306 		env.force_reg_invariants = true;
307 		break;
308 	case 'n':
309 		errno = 0;
310 		env.top_n = strtol(arg, NULL, 10);
311 		if (errno) {
312 			fprintf(stderr, "invalid top N specifier: %s\n", arg);
313 			argp_usage(state);
314 		}
315 	case 'C':
316 		env.comparison_mode = true;
317 		break;
318 	case 'R':
319 		env.replay_mode = true;
320 		break;
321 	case 'f':
322 		if (arg[0] == '@')
323 			err = append_filter_file(arg + 1);
324 		else if (arg[0] == '!')
325 			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1);
326 		else
327 			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg);
328 		if (err) {
329 			fprintf(stderr, "Failed to collect program filter expressions: %d\n", err);
330 			return err;
331 		}
332 		break;
333 	case 'S':
334 		errno = 0;
335 		env.top_src_lines = strtol(arg, NULL, 10);
336 		if (errno) {
337 			fprintf(stderr, "invalid top lines N specifier: %s\n", arg);
338 			argp_usage(state);
339 		}
340 		break;
341 	case ARGP_KEY_ARG:
342 		tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
343 		if (!tmp)
344 			return -ENOMEM;
345 		env.filenames = tmp;
346 		env.filenames[env.filename_cnt] = strdup(arg);
347 		if (!env.filenames[env.filename_cnt])
348 			return -ENOMEM;
349 		env.filename_cnt++;
350 		break;
351 	default:
352 		return ARGP_ERR_UNKNOWN;
353 	}
354 	return 0;
355 }
356 
357 static const struct argp argp = {
358 	.options = opts,
359 	.parser = parse_arg,
360 	.doc = argp_program_doc,
361 };
362 
363 
364 /* Adapted from perf/util/string.c */
365 static bool glob_matches(const char *str, const char *pat)
366 {
367 	while (*str && *pat && *pat != '*') {
368 		if (*str != *pat)
369 			return false;
370 		str++;
371 		pat++;
372 	}
373 	/* Check wild card */
374 	if (*pat == '*') {
375 		while (*pat == '*')
376 			pat++;
377 		if (!*pat) /* Tail wild card matches all */
378 			return true;
379 		while (*str)
380 			if (glob_matches(str++, pat))
381 				return true;
382 	}
383 	return !*str && !*pat;
384 }
385 
386 static bool is_bpf_obj_file(const char *path) {
387 	Elf64_Ehdr *ehdr;
388 	int fd, err = -EINVAL;
389 	Elf *elf = NULL;
390 
391 	fd = open(path, O_RDONLY | O_CLOEXEC);
392 	if (fd < 0)
393 		return true; /* we'll fail later and propagate error */
394 
395 	/* ensure libelf is initialized */
396 	(void)elf_version(EV_CURRENT);
397 
398 	elf = elf_begin(fd, ELF_C_READ, NULL);
399 	if (!elf)
400 		goto cleanup;
401 
402 	if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64)
403 		goto cleanup;
404 
405 	ehdr = elf64_getehdr(elf);
406 	/* Old LLVM set e_machine to EM_NONE */
407 	if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF))
408 		goto cleanup;
409 
410 	err = 0;
411 cleanup:
412 	if (elf)
413 		elf_end(elf);
414 	close(fd);
415 	return err == 0;
416 }
417 
418 static bool should_process_file_prog(const char *filename, const char *prog_name)
419 {
420 	struct filter *f;
421 	int i, allow_cnt = 0;
422 
423 	for (i = 0; i < env.deny_filter_cnt; i++) {
424 		f = &env.deny_filters[i];
425 		if (f->kind != FILTER_NAME)
426 			continue;
427 
428 		if (f->any_glob && glob_matches(filename, f->any_glob))
429 			return false;
430 		if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob))
431 			return false;
432 		if (f->file_glob && glob_matches(filename, f->file_glob))
433 			return false;
434 		if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob))
435 			return false;
436 	}
437 
438 	for (i = 0; i < env.allow_filter_cnt; i++) {
439 		f = &env.allow_filters[i];
440 		if (f->kind != FILTER_NAME)
441 			continue;
442 
443 		allow_cnt++;
444 		if (f->any_glob) {
445 			if (glob_matches(filename, f->any_glob))
446 				return true;
447 			/* If we don't know program name yet, any_glob filter
448 			 * has to assume that current BPF object file might be
449 			 * relevant; we'll check again later on after opening
450 			 * BPF object file, at which point program name will
451 			 * be known finally.
452 			 */
453 			if (!prog_name || glob_matches(prog_name, f->any_glob))
454 				return true;
455 		} else {
456 			if (f->file_glob && !glob_matches(filename, f->file_glob))
457 				continue;
458 			if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob))
459 				continue;
460 			return true;
461 		}
462 	}
463 
464 	/* if there are no file/prog name allow filters, allow all progs,
465 	 * unless they are denied earlier explicitly
466 	 */
467 	return allow_cnt == 0;
468 }
469 
470 static struct {
471 	enum operator_kind op_kind;
472 	const char *op_str;
473 } operators[] = {
474 	/* Order of these definitions matter to avoid situations like '<'
475 	 * matching part of what is actually a '<>' operator. That is,
476 	 * substrings should go last.
477 	 */
478 	{ OP_EQ, "==" },
479 	{ OP_NEQ, "!=" },
480 	{ OP_NEQ, "<>" },
481 	{ OP_LE, "<=" },
482 	{ OP_LT, "<" },
483 	{ OP_GE, ">=" },
484 	{ OP_GT, ">" },
485 	{ OP_EQ, "=" },
486 };
487 
488 static bool parse_stat_id_var(const char *name, size_t len, int *id,
489 			      enum stat_variant *var, bool *is_abs);
490 
491 static int append_filter(struct filter **filters, int *cnt, const char *str)
492 {
493 	struct filter *f;
494 	void *tmp;
495 	const char *p;
496 	int i;
497 
498 	tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
499 	if (!tmp)
500 		return -ENOMEM;
501 	*filters = tmp;
502 
503 	f = &(*filters)[*cnt];
504 	memset(f, 0, sizeof(*f));
505 
506 	/* First, let's check if it's a stats filter of the following form:
507 	 * <stat><op><value, where:
508 	 *   - <stat> is one of supported numerical stats (verdict is also
509 	 *     considered numerical, failure == 0, success == 1);
510 	 *   - <op> is comparison operator (see `operators` definitions);
511 	 *   - <value> is an integer (or failure/success, or false/true as
512 	 *     special aliases for 0 and 1, respectively).
513 	 * If the form doesn't match what user provided, we assume file/prog
514 	 * glob filter.
515 	 */
516 	for (i = 0; i < ARRAY_SIZE(operators); i++) {
517 		enum stat_variant var;
518 		int id;
519 		long val;
520 		const char *end = str;
521 		const char *op_str;
522 		bool is_abs;
523 
524 		op_str = operators[i].op_str;
525 		p = strstr(str, op_str);
526 		if (!p)
527 			continue;
528 
529 		if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) {
530 			fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
531 			return -EINVAL;
532 		}
533 		if (id >= FILE_NAME) {
534 			fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str);
535 			return -EINVAL;
536 		}
537 
538 		p += strlen(op_str);
539 
540 		if (strcasecmp(p, "true") == 0 ||
541 		    strcasecmp(p, "t") == 0 ||
542 		    strcasecmp(p, "success") == 0 ||
543 		    strcasecmp(p, "succ") == 0 ||
544 		    strcasecmp(p, "s") == 0 ||
545 		    strcasecmp(p, "match") == 0 ||
546 		    strcasecmp(p, "m") == 0) {
547 			val = 1;
548 		} else if (strcasecmp(p, "false") == 0 ||
549 			   strcasecmp(p, "f") == 0 ||
550 			   strcasecmp(p, "failure") == 0 ||
551 			   strcasecmp(p, "fail") == 0 ||
552 			   strcasecmp(p, "mismatch") == 0 ||
553 			   strcasecmp(p, "mis") == 0) {
554 			val = 0;
555 		} else {
556 			errno = 0;
557 			val = strtol(p, (char **)&end, 10);
558 			if (errno || end == p || *end != '\0' ) {
559 				fprintf(stderr, "Invalid integer value in '%s'!\n", str);
560 				return -EINVAL;
561 			}
562 		}
563 
564 		f->kind = FILTER_STAT;
565 		f->stat_id = id;
566 		f->stat_var = var;
567 		f->op = operators[i].op_kind;
568 		f->abs = true;
569 		f->value = val;
570 
571 		*cnt += 1;
572 		return 0;
573 	}
574 
575 	/* File/prog filter can be specified either as '<glob>' or
576 	 * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
577 	 * both file and program names. This seems to be way more useful in
578 	 * practice. If user needs full control, they can use '/<prog-glob>'
579 	 * form to glob just program name, or '<file-glob>/' to glob only file
580 	 * name. But usually common <glob> seems to be the most useful and
581 	 * ergonomic way.
582 	 */
583 	f->kind = FILTER_NAME;
584 	p = strchr(str, '/');
585 	if (!p) {
586 		f->any_glob = strdup(str);
587 		if (!f->any_glob)
588 			return -ENOMEM;
589 	} else {
590 		if (str != p) {
591 			/* non-empty file glob */
592 			f->file_glob = strndup(str, p - str);
593 			if (!f->file_glob)
594 				return -ENOMEM;
595 		}
596 		if (strlen(p + 1) > 0) {
597 			/* non-empty prog glob */
598 			f->prog_glob = strdup(p + 1);
599 			if (!f->prog_glob) {
600 				free(f->file_glob);
601 				f->file_glob = NULL;
602 				return -ENOMEM;
603 			}
604 		}
605 	}
606 
607 	*cnt += 1;
608 	return 0;
609 }
610 
611 static int append_filter_file(const char *path)
612 {
613 	char buf[1024];
614 	FILE *f;
615 	int err = 0;
616 
617 	f = fopen(path, "r");
618 	if (!f) {
619 		err = -errno;
620 		fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
621 		return err;
622 	}
623 
624 	while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
625 		/* lines starting with # are comments, skip them */
626 		if (buf[0] == '\0' || buf[0] == '#')
627 			continue;
628 		/* lines starting with ! are negative match filters */
629 		if (buf[0] == '!')
630 			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1);
631 		else
632 			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf);
633 		if (err)
634 			goto cleanup;
635 	}
636 
637 cleanup:
638 	fclose(f);
639 	return err;
640 }
641 
642 static const struct stat_specs default_output_spec = {
643 	.spec_cnt = 7,
644 	.ids = {
645 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
646 		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
647 	},
648 };
649 
650 static const struct stat_specs default_csv_output_spec = {
651 	.spec_cnt = 9,
652 	.ids = {
653 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
654 		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
655 		MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
656 	},
657 };
658 
659 static const struct stat_specs default_sort_spec = {
660 	.spec_cnt = 2,
661 	.ids = {
662 		FILE_NAME, PROG_NAME,
663 	},
664 	.asc = { true, true, },
665 };
666 
667 /* sorting for comparison mode to join two data sets */
668 static const struct stat_specs join_sort_spec = {
669 	.spec_cnt = 2,
670 	.ids = {
671 		FILE_NAME, PROG_NAME,
672 	},
673 	.asc = { true, true, },
674 };
675 
676 static struct stat_def {
677 	const char *header;
678 	const char *names[4];
679 	bool asc_by_default;
680 	bool left_aligned;
681 } stat_defs[] = {
682 	[FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ },
683 	[PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ },
684 	[VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ },
685 	[DURATION] = { "Duration (us)", {"duration", "dur"}, },
686 	[TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, },
687 	[TOTAL_STATES] = { "States", {"total_states", "states"}, },
688 	[PEAK_STATES] = { "Peak states", {"peak_states"}, },
689 	[MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
690 	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
691 };
692 
693 static bool parse_stat_id_var(const char *name, size_t len, int *id,
694 			      enum stat_variant *var, bool *is_abs)
695 {
696 	static const char *var_sfxs[] = {
697 		[VARIANT_A] = "_a",
698 		[VARIANT_B] = "_b",
699 		[VARIANT_DIFF] = "_diff",
700 		[VARIANT_PCT] = "_pct",
701 	};
702 	int i, j, k;
703 
704 	/* |<stat>| means we take absolute value of given stat */
705 	*is_abs = false;
706 	if (len > 2 && name[0] == '|' && name[len - 1] == '|') {
707 		*is_abs = true;
708 		name += 1;
709 		len -= 2;
710 	}
711 
712 	for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
713 		struct stat_def *def = &stat_defs[i];
714 		size_t alias_len, sfx_len;
715 		const char *alias;
716 
717 		for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
718 			alias = def->names[j];
719 			if (!alias)
720 				continue;
721 
722 			alias_len = strlen(alias);
723 			if (strncmp(name, alias, alias_len) != 0)
724 				continue;
725 
726 			if (alias_len == len) {
727 				/* If no variant suffix is specified, we
728 				 * assume control group (just in case we are
729 				 * in comparison mode. Variant is ignored in
730 				 * non-comparison mode.
731 				 */
732 				*var = VARIANT_B;
733 				*id = i;
734 				return true;
735 			}
736 
737 			for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) {
738 				sfx_len = strlen(var_sfxs[k]);
739 				if (alias_len + sfx_len != len)
740 					continue;
741 
742 				if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) {
743 					*var = (enum stat_variant)k;
744 					*id = i;
745 					return true;
746 				}
747 			}
748 		}
749 	}
750 
751 	return false;
752 }
753 
754 static bool is_asc_sym(char c)
755 {
756 	return c == '^';
757 }
758 
759 static bool is_desc_sym(char c)
760 {
761 	return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
762 }
763 
764 static int parse_stat(const char *stat_name, struct stat_specs *specs)
765 {
766 	int id;
767 	bool has_order = false, is_asc = false, is_abs = false;
768 	size_t len = strlen(stat_name);
769 	enum stat_variant var;
770 
771 	if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
772 		fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
773 		return -E2BIG;
774 	}
775 
776 	if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) {
777 		has_order = true;
778 		is_asc = is_asc_sym(stat_name[len - 1]);
779 		len -= 1;
780 	}
781 
782 	if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) {
783 		fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
784 		return -ESRCH;
785 	}
786 
787 	specs->ids[specs->spec_cnt] = id;
788 	specs->variants[specs->spec_cnt] = var;
789 	specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
790 	specs->abs[specs->spec_cnt] = is_abs;
791 	specs->spec_cnt++;
792 
793 	return 0;
794 }
795 
796 static int parse_stats(const char *stats_str, struct stat_specs *specs)
797 {
798 	char *input, *state = NULL, *next;
799 	int err, cnt = 0;
800 
801 	input = strdup(stats_str);
802 	if (!input)
803 		return -ENOMEM;
804 
805 	while ((next = strtok_r(cnt++ ? NULL : input, ",", &state))) {
806 		err = parse_stat(next, specs);
807 		if (err) {
808 			free(input);
809 			return err;
810 		}
811 	}
812 
813 	free(input);
814 	return 0;
815 }
816 
817 static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
818 {
819 	int i;
820 
821 	if (!stats)
822 		return;
823 
824 	for (i = 0; i < stat_cnt; i++) {
825 		free(stats[i].file_name);
826 		free(stats[i].prog_name);
827 	}
828 	free(stats);
829 }
830 
831 static char verif_log_buf[64 * 1024];
832 
833 #define MAX_PARSED_LOG_LINES 100
834 
835 static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
836 {
837 	const char *cur;
838 	int pos, lines;
839 
840 	buf[buf_sz - 1] = '\0';
841 
842 	for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) {
843 		/* find previous endline or otherwise take the start of log buf */
844 		for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) {
845 		}
846 		/* next time start from end of previous line (or pos goes to <0) */
847 		pos--;
848 		/* if we found endline, point right after endline symbol;
849 		 * otherwise, stay at the beginning of log buf
850 		 */
851 		if (cur[0] == '\n')
852 			cur++;
853 
854 		if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
855 			continue;
856 		if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
857 				&s->stats[TOTAL_INSNS],
858 				&s->stats[MAX_STATES_PER_INSN],
859 				&s->stats[TOTAL_STATES],
860 				&s->stats[PEAK_STATES],
861 				&s->stats[MARK_READ_MAX_LEN]))
862 			continue;
863 	}
864 
865 	return 0;
866 }
867 
868 struct line_cnt {
869 	char *line;
870 	int cnt;
871 };
872 
873 static int str_cmp(const void *a, const void *b)
874 {
875 	const char **str1 = (const char **)a;
876 	const char **str2 = (const char **)b;
877 
878 	return strcmp(*str1, *str2);
879 }
880 
881 static int line_cnt_cmp(const void *a, const void *b)
882 {
883 	const struct line_cnt *a_cnt = (const struct line_cnt *)a;
884 	const struct line_cnt *b_cnt = (const struct line_cnt *)b;
885 
886 	if (a_cnt->cnt != b_cnt->cnt)
887 		return a_cnt->cnt < b_cnt->cnt ? -1 : 1;
888 	return strcmp(a_cnt->line, b_cnt->line);
889 }
890 
891 static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name)
892 {
893 	int lines_cap = 0;
894 	int lines_size = 0;
895 	char **lines = NULL;
896 	char *line = NULL;
897 	char *state;
898 	struct line_cnt *freq = NULL;
899 	struct line_cnt *cur;
900 	int unique_lines;
901 	int err = 0;
902 	int i;
903 
904 	while ((line = strtok_r(line ? NULL : buf, "\n", &state))) {
905 		if (strncmp(line, "; ", 2) != 0)
906 			continue;
907 		line += 2;
908 
909 		if (lines_size == lines_cap) {
910 			char **tmp;
911 
912 			lines_cap = max(16, lines_cap * 2);
913 			tmp = realloc(lines, lines_cap * sizeof(*tmp));
914 			if (!tmp) {
915 				err = -ENOMEM;
916 				goto cleanup;
917 			}
918 			lines = tmp;
919 		}
920 		lines[lines_size] = line;
921 		lines_size++;
922 	}
923 
924 	if (lines_size == 0)
925 		goto cleanup;
926 
927 	qsort(lines, lines_size, sizeof(*lines), str_cmp);
928 
929 	freq = calloc(lines_size, sizeof(*freq));
930 	if (!freq) {
931 		err = -ENOMEM;
932 		goto cleanup;
933 	}
934 
935 	cur = freq;
936 	cur->line = lines[0];
937 	cur->cnt = 1;
938 	for (i = 1; i < lines_size; ++i) {
939 		if (strcmp(lines[i], cur->line) != 0) {
940 			cur++;
941 			cur->line = lines[i];
942 			cur->cnt = 0;
943 		}
944 		cur->cnt++;
945 	}
946 	unique_lines = cur - freq + 1;
947 
948 	qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp);
949 
950 	printf("Top source lines (%s):\n", prog_name);
951 	for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
952 		const char *src_code = freq[i].line;
953 		const char *src_line = NULL;
954 		char *split = strrchr(freq[i].line, '@');
955 
956 		if (split) {
957 			src_line = split + 1;
958 
959 			while (*src_line && isspace(*src_line))
960 				src_line++;
961 
962 			while (split > src_code && isspace(*split))
963 				split--;
964 			*split = '\0';
965 		}
966 
967 		if (src_line)
968 			printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line, src_code);
969 		else
970 			printf("%5d: %s\n", freq[i].cnt, src_code);
971 	}
972 	printf("\n");
973 
974 cleanup:
975 	free(freq);
976 	free(lines);
977 	return err;
978 }
979 
980 static int guess_prog_type_by_ctx_name(const char *ctx_name,
981 				       enum bpf_prog_type *prog_type,
982 				       enum bpf_attach_type *attach_type)
983 {
984 	/* We need to guess program type based on its declared context type.
985 	 * This guess can't be perfect as many different program types might
986 	 * share the same context type.  So we can only hope to reasonably
987 	 * well guess this and get lucky.
988 	 *
989 	 * Just in case, we support both UAPI-side type names and
990 	 * kernel-internal names.
991 	 */
992 	static struct {
993 		const char *uapi_name;
994 		const char *kern_name;
995 		enum bpf_prog_type prog_type;
996 		enum bpf_attach_type attach_type;
997 	} ctx_map[] = {
998 		/* __sk_buff is most ambiguous, we assume TC program */
999 		{ "__sk_buff", "sk_buff", BPF_PROG_TYPE_SCHED_CLS },
1000 		{ "bpf_sock", "sock", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND },
1001 		{ "bpf_sock_addr", "bpf_sock_addr_kern",  BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND },
1002 		{ "bpf_sock_ops", "bpf_sock_ops_kern", BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS },
1003 		{ "sk_msg_md", "sk_msg", BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT },
1004 		{ "bpf_cgroup_dev_ctx", "bpf_cgroup_dev_ctx", BPF_PROG_TYPE_CGROUP_DEVICE, BPF_CGROUP_DEVICE },
1005 		{ "bpf_sysctl", "bpf_sysctl_kern", BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL },
1006 		{ "bpf_sockopt", "bpf_sockopt_kern", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT },
1007 		{ "sk_reuseport_md", "sk_reuseport_kern", BPF_PROG_TYPE_SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE },
1008 		{ "bpf_sk_lookup", "bpf_sk_lookup_kern", BPF_PROG_TYPE_SK_LOOKUP, BPF_SK_LOOKUP },
1009 		{ "xdp_md", "xdp_buff", BPF_PROG_TYPE_XDP, BPF_XDP },
1010 		/* tracing types with no expected attach type */
1011 		{ "bpf_user_pt_regs_t", "pt_regs", BPF_PROG_TYPE_KPROBE },
1012 		{ "bpf_perf_event_data", "bpf_perf_event_data_kern", BPF_PROG_TYPE_PERF_EVENT },
1013 		/* raw_tp programs use u64[] from kernel side, we don't want
1014 		 * to match on that, probably; so NULL for kern-side type
1015 		 */
1016 		{ "bpf_raw_tracepoint_args", NULL, BPF_PROG_TYPE_RAW_TRACEPOINT },
1017 	};
1018 	int i;
1019 
1020 	if (!ctx_name)
1021 		return -EINVAL;
1022 
1023 	for (i = 0; i < ARRAY_SIZE(ctx_map); i++) {
1024 		if (strcmp(ctx_map[i].uapi_name, ctx_name) == 0 ||
1025 		    (ctx_map[i].kern_name && strcmp(ctx_map[i].kern_name, ctx_name) == 0)) {
1026 			*prog_type = ctx_map[i].prog_type;
1027 			*attach_type = ctx_map[i].attach_type;
1028 			return 0;
1029 		}
1030 	}
1031 
1032 	return -ESRCH;
1033 }
1034 
1035 static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename)
1036 {
1037 	struct bpf_map *map;
1038 
1039 	bpf_object__for_each_map(map, obj) {
1040 		/* disable pinning */
1041 		bpf_map__set_pin_path(map, NULL);
1042 
1043 		/* fix up map size, if necessary */
1044 		switch (bpf_map__type(map)) {
1045 		case BPF_MAP_TYPE_SK_STORAGE:
1046 		case BPF_MAP_TYPE_TASK_STORAGE:
1047 		case BPF_MAP_TYPE_INODE_STORAGE:
1048 		case BPF_MAP_TYPE_CGROUP_STORAGE:
1049 			break;
1050 		default:
1051 			if (bpf_map__max_entries(map) == 0)
1052 				bpf_map__set_max_entries(map, 1);
1053 		}
1054 	}
1055 
1056 	/* SEC(freplace) programs can't be loaded with veristat as is,
1057 	 * but we can try guessing their target program's expected type by
1058 	 * looking at the type of program's first argument and substituting
1059 	 * corresponding program type
1060 	 */
1061 	if (bpf_program__type(prog) == BPF_PROG_TYPE_EXT) {
1062 		const struct btf *btf = bpf_object__btf(obj);
1063 		const char *prog_name = bpf_program__name(prog);
1064 		enum bpf_prog_type prog_type;
1065 		enum bpf_attach_type attach_type;
1066 		const struct btf_type *t;
1067 		const char *ctx_name;
1068 		int id;
1069 
1070 		if (!btf)
1071 			goto skip_freplace_fixup;
1072 
1073 		id = btf__find_by_name_kind(btf, prog_name, BTF_KIND_FUNC);
1074 		t = btf__type_by_id(btf, id);
1075 		t = btf__type_by_id(btf, t->type);
1076 		if (!btf_is_func_proto(t) || btf_vlen(t) != 1)
1077 			goto skip_freplace_fixup;
1078 
1079 		/* context argument is a pointer to a struct/typedef */
1080 		t = btf__type_by_id(btf, btf_params(t)[0].type);
1081 		while (t && btf_is_mod(t))
1082 			t = btf__type_by_id(btf, t->type);
1083 		if (!t || !btf_is_ptr(t))
1084 			goto skip_freplace_fixup;
1085 		t = btf__type_by_id(btf, t->type);
1086 		while (t && btf_is_mod(t))
1087 			t = btf__type_by_id(btf, t->type);
1088 		if (!t)
1089 			goto skip_freplace_fixup;
1090 
1091 		ctx_name = btf__name_by_offset(btf, t->name_off);
1092 
1093 		if (guess_prog_type_by_ctx_name(ctx_name, &prog_type, &attach_type) == 0) {
1094 			bpf_program__set_type(prog, prog_type);
1095 			bpf_program__set_expected_attach_type(prog, attach_type);
1096 
1097 			if (!env.quiet) {
1098 				printf("Using guessed program type '%s' for %s/%s...\n",
1099 					libbpf_bpf_prog_type_str(prog_type),
1100 					filename, prog_name);
1101 			}
1102 		} else {
1103 			if (!env.quiet) {
1104 				printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
1105 					ctx_name, filename, prog_name);
1106 			}
1107 		}
1108 	}
1109 skip_freplace_fixup:
1110 	return;
1111 }
1112 
1113 static int max_verifier_log_size(void)
1114 {
1115 	const int SMALL_LOG_SIZE = UINT_MAX >> 8;
1116 	const int BIG_LOG_SIZE = UINT_MAX >> 2;
1117 	struct bpf_insn insns[] = {
1118 		{ .code = BPF_ALU | BPF_MOV | BPF_X, .dst_reg = BPF_REG_0, },
1119 		{ .code  = BPF_JMP | BPF_EXIT, },
1120 	};
1121 	LIBBPF_OPTS(bpf_prog_load_opts, opts,
1122 		    .log_size = BIG_LOG_SIZE,
1123 		    .log_buf = (void *)-1,
1124 		    .log_level = 4
1125 	);
1126 	int ret, insn_cnt = ARRAY_SIZE(insns);
1127 	static int log_size;
1128 
1129 	if (log_size != 0)
1130 		return log_size;
1131 
1132 	ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
1133 
1134 	if (ret == -EFAULT)
1135 		log_size = BIG_LOG_SIZE;
1136 	else /* ret == -EINVAL, big log size is not supported by the verifier */
1137 		log_size = SMALL_LOG_SIZE;
1138 
1139 	return log_size;
1140 }
1141 
1142 static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
1143 {
1144 	const char *base_filename = basename(strdupa(filename));
1145 	const char *prog_name = bpf_program__name(prog);
1146 	char *buf;
1147 	int buf_sz, log_level;
1148 	struct verif_stats *stats;
1149 	int err = 0;
1150 	void *tmp;
1151 
1152 	if (!should_process_file_prog(base_filename, bpf_program__name(prog))) {
1153 		env.progs_skipped++;
1154 		return 0;
1155 	}
1156 
1157 	tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
1158 	if (!tmp)
1159 		return -ENOMEM;
1160 	env.prog_stats = tmp;
1161 	stats = &env.prog_stats[env.prog_stat_cnt++];
1162 	memset(stats, 0, sizeof(*stats));
1163 
1164 	if (env.verbose || env.top_src_lines > 0) {
1165 		buf_sz = env.log_size ? env.log_size : max_verifier_log_size();
1166 		buf = malloc(buf_sz);
1167 		if (!buf)
1168 			return -ENOMEM;
1169 		/* ensure we always request stats */
1170 		log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0);
1171 		/* --top-src-lines needs verifier log */
1172 		if (env.top_src_lines > 0 && env.log_level == 0)
1173 			log_level |= 2;
1174 	} else {
1175 		buf = verif_log_buf;
1176 		buf_sz = sizeof(verif_log_buf);
1177 		/* request only verifier stats */
1178 		log_level = 4 | (env.log_fixed ? 8 : 0);
1179 	}
1180 	verif_log_buf[0] = '\0';
1181 
1182 	bpf_program__set_log_buf(prog, buf, buf_sz);
1183 	bpf_program__set_log_level(prog, log_level);
1184 
1185 	/* increase chances of successful BPF object loading */
1186 	fixup_obj(obj, prog, base_filename);
1187 
1188 	if (env.force_checkpoints)
1189 		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
1190 	if (env.force_reg_invariants)
1191 		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
1192 
1193 	err = bpf_object__load(obj);
1194 	env.progs_processed++;
1195 
1196 	stats->file_name = strdup(base_filename);
1197 	stats->prog_name = strdup(bpf_program__name(prog));
1198 	stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
1199 	parse_verif_log(buf, buf_sz, stats);
1200 
1201 	if (env.verbose) {
1202 		printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
1203 		       filename, prog_name, stats->stats[DURATION],
1204 		       err ? "failure" : "success", buf);
1205 	}
1206 	if (env.top_src_lines > 0)
1207 		print_top_src_lines(buf, buf_sz, stats->prog_name);
1208 
1209 	if (verif_log_buf != buf)
1210 		free(buf);
1211 
1212 	return 0;
1213 };
1214 
1215 static int process_obj(const char *filename)
1216 {
1217 	const char *base_filename = basename(strdupa(filename));
1218 	struct bpf_object *obj = NULL, *tobj;
1219 	struct bpf_program *prog, *tprog, *lprog;
1220 	libbpf_print_fn_t old_libbpf_print_fn;
1221 	LIBBPF_OPTS(bpf_object_open_opts, opts);
1222 	int err = 0, prog_cnt = 0;
1223 
1224 	if (!should_process_file_prog(base_filename, NULL)) {
1225 		if (env.verbose)
1226 			printf("Skipping '%s' due to filters...\n", filename);
1227 		env.files_skipped++;
1228 		return 0;
1229 	}
1230 	if (!is_bpf_obj_file(filename)) {
1231 		if (env.verbose)
1232 			printf("Skipping '%s' as it's not a BPF object file...\n", filename);
1233 		env.files_skipped++;
1234 		return 0;
1235 	}
1236 
1237 	if (!env.quiet && env.out_fmt == RESFMT_TABLE)
1238 		printf("Processing '%s'...\n", base_filename);
1239 
1240 	old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
1241 	obj = bpf_object__open_file(filename, &opts);
1242 	if (!obj) {
1243 		/* if libbpf can't open BPF object file, it could be because
1244 		 * that BPF object file is incomplete and has to be statically
1245 		 * linked into a final BPF object file; instead of bailing
1246 		 * out, report it into stderr, mark it as skipped, and
1247 		 * proceed
1248 		 */
1249 		fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
1250 		env.files_skipped++;
1251 		err = 0;
1252 		goto cleanup;
1253 	}
1254 
1255 	env.files_processed++;
1256 
1257 	bpf_object__for_each_program(prog, obj) {
1258 		prog_cnt++;
1259 	}
1260 
1261 	if (prog_cnt == 1) {
1262 		prog = bpf_object__next_program(obj, NULL);
1263 		bpf_program__set_autoload(prog, true);
1264 		process_prog(filename, obj, prog);
1265 		goto cleanup;
1266 	}
1267 
1268 	bpf_object__for_each_program(prog, obj) {
1269 		const char *prog_name = bpf_program__name(prog);
1270 
1271 		tobj = bpf_object__open_file(filename, &opts);
1272 		if (!tobj) {
1273 			err = -errno;
1274 			fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1275 			goto cleanup;
1276 		}
1277 
1278 		lprog = NULL;
1279 		bpf_object__for_each_program(tprog, tobj) {
1280 			const char *tprog_name = bpf_program__name(tprog);
1281 
1282 			if (strcmp(prog_name, tprog_name) == 0) {
1283 				bpf_program__set_autoload(tprog, true);
1284 				lprog = tprog;
1285 			} else {
1286 				bpf_program__set_autoload(tprog, false);
1287 			}
1288 		}
1289 
1290 		process_prog(filename, tobj, lprog);
1291 		bpf_object__close(tobj);
1292 	}
1293 
1294 cleanup:
1295 	bpf_object__close(obj);
1296 	libbpf_set_print(old_libbpf_print_fn);
1297 	return err;
1298 }
1299 
1300 static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
1301 		    enum stat_id id, bool asc, bool abs)
1302 {
1303 	int cmp = 0;
1304 
1305 	switch (id) {
1306 	case FILE_NAME:
1307 		cmp = strcmp(s1->file_name, s2->file_name);
1308 		break;
1309 	case PROG_NAME:
1310 		cmp = strcmp(s1->prog_name, s2->prog_name);
1311 		break;
1312 	case VERDICT:
1313 	case DURATION:
1314 	case TOTAL_INSNS:
1315 	case TOTAL_STATES:
1316 	case PEAK_STATES:
1317 	case MAX_STATES_PER_INSN:
1318 	case MARK_READ_MAX_LEN: {
1319 		long v1 = s1->stats[id];
1320 		long v2 = s2->stats[id];
1321 
1322 		if (abs) {
1323 			v1 = v1 < 0 ? -v1 : v1;
1324 			v2 = v2 < 0 ? -v2 : v2;
1325 		}
1326 
1327 		if (v1 != v2)
1328 			cmp = v1 < v2 ? -1 : 1;
1329 		break;
1330 	}
1331 	default:
1332 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1333 		exit(1);
1334 	}
1335 
1336 	return asc ? cmp : -cmp;
1337 }
1338 
1339 static int cmp_prog_stats(const void *v1, const void *v2)
1340 {
1341 	const struct verif_stats *s1 = v1, *s2 = v2;
1342 	int i, cmp;
1343 
1344 	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1345 		cmp = cmp_stat(s1, s2, env.sort_spec.ids[i],
1346 			       env.sort_spec.asc[i], env.sort_spec.abs[i]);
1347 		if (cmp != 0)
1348 			return cmp;
1349 	}
1350 
1351 	/* always disambiguate with file+prog, which are unique */
1352 	cmp = strcmp(s1->file_name, s2->file_name);
1353 	if (cmp != 0)
1354 		return cmp;
1355 	return strcmp(s1->prog_name, s2->prog_name);
1356 }
1357 
1358 static void fetch_join_stat_value(const struct verif_stats_join *s,
1359 				  enum stat_id id, enum stat_variant var,
1360 				  const char **str_val,
1361 				  double *num_val)
1362 {
1363 	long v1, v2;
1364 
1365 	if (id == FILE_NAME) {
1366 		*str_val = s->file_name;
1367 		return;
1368 	}
1369 	if (id == PROG_NAME) {
1370 		*str_val = s->prog_name;
1371 		return;
1372 	}
1373 
1374 	v1 = s->stats_a ? s->stats_a->stats[id] : 0;
1375 	v2 = s->stats_b ? s->stats_b->stats[id] : 0;
1376 
1377 	switch (var) {
1378 	case VARIANT_A:
1379 		if (!s->stats_a)
1380 			*num_val = -DBL_MAX;
1381 		else
1382 			*num_val = s->stats_a->stats[id];
1383 		return;
1384 	case VARIANT_B:
1385 		if (!s->stats_b)
1386 			*num_val = -DBL_MAX;
1387 		else
1388 			*num_val = s->stats_b->stats[id];
1389 		return;
1390 	case VARIANT_DIFF:
1391 		if (!s->stats_a || !s->stats_b)
1392 			*num_val = -DBL_MAX;
1393 		else if (id == VERDICT)
1394 			*num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */;
1395 		else
1396 			*num_val = (double)(v2 - v1);
1397 		return;
1398 	case VARIANT_PCT:
1399 		if (!s->stats_a || !s->stats_b) {
1400 			*num_val = -DBL_MAX;
1401 		} else if (v1 == 0) {
1402 			if (v1 == v2)
1403 				*num_val = 0.0;
1404 			else
1405 				*num_val = v2 < v1 ? -100.0 : 100.0;
1406 		} else {
1407 			 *num_val = (v2 - v1) * 100.0 / v1;
1408 		}
1409 		return;
1410 	}
1411 }
1412 
1413 static int cmp_join_stat(const struct verif_stats_join *s1,
1414 			 const struct verif_stats_join *s2,
1415 			 enum stat_id id, enum stat_variant var,
1416 			 bool asc, bool abs)
1417 {
1418 	const char *str1 = NULL, *str2 = NULL;
1419 	double v1 = 0.0, v2 = 0.0;
1420 	int cmp = 0;
1421 
1422 	fetch_join_stat_value(s1, id, var, &str1, &v1);
1423 	fetch_join_stat_value(s2, id, var, &str2, &v2);
1424 
1425 	if (abs) {
1426 		v1 = fabs(v1);
1427 		v2 = fabs(v2);
1428 	}
1429 
1430 	if (str1)
1431 		cmp = strcmp(str1, str2);
1432 	else if (v1 != v2)
1433 		cmp = v1 < v2 ? -1 : 1;
1434 
1435 	return asc ? cmp : -cmp;
1436 }
1437 
1438 static int cmp_join_stats(const void *v1, const void *v2)
1439 {
1440 	const struct verif_stats_join *s1 = v1, *s2 = v2;
1441 	int i, cmp;
1442 
1443 	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1444 		cmp = cmp_join_stat(s1, s2,
1445 				    env.sort_spec.ids[i],
1446 				    env.sort_spec.variants[i],
1447 				    env.sort_spec.asc[i],
1448 				    env.sort_spec.abs[i]);
1449 		if (cmp != 0)
1450 			return cmp;
1451 	}
1452 
1453 	/* always disambiguate with file+prog, which are unique */
1454 	cmp = strcmp(s1->file_name, s2->file_name);
1455 	if (cmp != 0)
1456 		return cmp;
1457 	return strcmp(s1->prog_name, s2->prog_name);
1458 }
1459 
1460 #define HEADER_CHAR '-'
1461 #define COLUMN_SEP "  "
1462 
1463 static void output_header_underlines(void)
1464 {
1465 	int i, j, len;
1466 
1467 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1468 		len = env.output_spec.lens[i];
1469 
1470 		printf("%s", i == 0 ? "" : COLUMN_SEP);
1471 		for (j = 0; j < len; j++)
1472 			printf("%c", HEADER_CHAR);
1473 	}
1474 	printf("\n");
1475 }
1476 
1477 static void output_headers(enum resfmt fmt)
1478 {
1479 	const char *fmt_str;
1480 	int i, len;
1481 
1482 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1483 		int id = env.output_spec.ids[i];
1484 		int *max_len = &env.output_spec.lens[i];
1485 
1486 		switch (fmt) {
1487 		case RESFMT_TABLE_CALCLEN:
1488 			len = snprintf(NULL, 0, "%s", stat_defs[id].header);
1489 			if (len > *max_len)
1490 				*max_len = len;
1491 			break;
1492 		case RESFMT_TABLE:
1493 			fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s";
1494 			printf(fmt_str, i == 0 ? "" : COLUMN_SEP,  *max_len, stat_defs[id].header);
1495 			if (i == env.output_spec.spec_cnt - 1)
1496 				printf("\n");
1497 			break;
1498 		case RESFMT_CSV:
1499 			printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]);
1500 			if (i == env.output_spec.spec_cnt - 1)
1501 				printf("\n");
1502 			break;
1503 		}
1504 	}
1505 
1506 	if (fmt == RESFMT_TABLE)
1507 		output_header_underlines();
1508 }
1509 
1510 static void prepare_value(const struct verif_stats *s, enum stat_id id,
1511 			  const char **str, long *val)
1512 {
1513 	switch (id) {
1514 	case FILE_NAME:
1515 		*str = s ? s->file_name : "N/A";
1516 		break;
1517 	case PROG_NAME:
1518 		*str = s ? s->prog_name : "N/A";
1519 		break;
1520 	case VERDICT:
1521 		if (!s)
1522 			*str = "N/A";
1523 		else
1524 			*str = s->stats[VERDICT] ? "success" : "failure";
1525 		break;
1526 	case DURATION:
1527 	case TOTAL_INSNS:
1528 	case TOTAL_STATES:
1529 	case PEAK_STATES:
1530 	case MAX_STATES_PER_INSN:
1531 	case MARK_READ_MAX_LEN:
1532 		*val = s ? s->stats[id] : 0;
1533 		break;
1534 	default:
1535 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1536 		exit(1);
1537 	}
1538 }
1539 
1540 static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
1541 {
1542 	int i;
1543 
1544 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1545 		int id = env.output_spec.ids[i];
1546 		int *max_len = &env.output_spec.lens[i], len;
1547 		const char *str = NULL;
1548 		long val = 0;
1549 
1550 		prepare_value(s, id, &str, &val);
1551 
1552 		switch (fmt) {
1553 		case RESFMT_TABLE_CALCLEN:
1554 			if (str)
1555 				len = snprintf(NULL, 0, "%s", str);
1556 			else
1557 				len = snprintf(NULL, 0, "%ld", val);
1558 			if (len > *max_len)
1559 				*max_len = len;
1560 			break;
1561 		case RESFMT_TABLE:
1562 			if (str)
1563 				printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
1564 			else
1565 				printf("%s%*ld", i == 0 ? "" : COLUMN_SEP,  *max_len, val);
1566 			if (i == env.output_spec.spec_cnt - 1)
1567 				printf("\n");
1568 			break;
1569 		case RESFMT_CSV:
1570 			if (str)
1571 				printf("%s%s", i == 0 ? "" : ",", str);
1572 			else
1573 				printf("%s%ld", i == 0 ? "" : ",", val);
1574 			if (i == env.output_spec.spec_cnt - 1)
1575 				printf("\n");
1576 			break;
1577 		}
1578 	}
1579 
1580 	if (last && fmt == RESFMT_TABLE) {
1581 		output_header_underlines();
1582 		printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
1583 		       env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
1584 	}
1585 }
1586 
1587 static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
1588 {
1589 	switch (id) {
1590 	case FILE_NAME:
1591 		st->file_name = strdup(str);
1592 		if (!st->file_name)
1593 			return -ENOMEM;
1594 		break;
1595 	case PROG_NAME:
1596 		st->prog_name = strdup(str);
1597 		if (!st->prog_name)
1598 			return -ENOMEM;
1599 		break;
1600 	case VERDICT:
1601 		if (strcmp(str, "success") == 0) {
1602 			st->stats[VERDICT] = true;
1603 		} else if (strcmp(str, "failure") == 0) {
1604 			st->stats[VERDICT] = false;
1605 		} else {
1606 			fprintf(stderr, "Unrecognized verification verdict '%s'\n", str);
1607 			return -EINVAL;
1608 		}
1609 		break;
1610 	case DURATION:
1611 	case TOTAL_INSNS:
1612 	case TOTAL_STATES:
1613 	case PEAK_STATES:
1614 	case MAX_STATES_PER_INSN:
1615 	case MARK_READ_MAX_LEN: {
1616 		long val;
1617 		int err, n;
1618 
1619 		if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) {
1620 			err = -errno;
1621 			fprintf(stderr, "Failed to parse '%s' as integer\n", str);
1622 			return err;
1623 		}
1624 
1625 		st->stats[id] = val;
1626 		break;
1627 	}
1628 	default:
1629 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1630 		return -EINVAL;
1631 	}
1632 	return 0;
1633 }
1634 
1635 static int parse_stats_csv(const char *filename, struct stat_specs *specs,
1636 			   struct verif_stats **statsp, int *stat_cntp)
1637 {
1638 	char line[4096];
1639 	FILE *f;
1640 	int err = 0;
1641 	bool header = true;
1642 
1643 	f = fopen(filename, "r");
1644 	if (!f) {
1645 		err = -errno;
1646 		fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1647 		return err;
1648 	}
1649 
1650 	*stat_cntp = 0;
1651 
1652 	while (fgets(line, sizeof(line), f)) {
1653 		char *input = line, *state = NULL, *next;
1654 		struct verif_stats *st = NULL;
1655 		int col = 0, cnt = 0;
1656 
1657 		if (!header) {
1658 			void *tmp;
1659 
1660 			tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp));
1661 			if (!tmp) {
1662 				err = -ENOMEM;
1663 				goto cleanup;
1664 			}
1665 			*statsp = tmp;
1666 
1667 			st = &(*statsp)[*stat_cntp];
1668 			memset(st, 0, sizeof(*st));
1669 
1670 			*stat_cntp += 1;
1671 		}
1672 
1673 		while ((next = strtok_r(cnt++ ? NULL : input, ",\n", &state))) {
1674 			if (header) {
1675 				/* for the first line, set up spec stats */
1676 				err = parse_stat(next, specs);
1677 				if (err)
1678 					goto cleanup;
1679 				continue;
1680 			}
1681 
1682 			/* for all other lines, parse values based on spec */
1683 			if (col >= specs->spec_cnt) {
1684 				fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n",
1685 					col, *stat_cntp, filename);
1686 				err = -EINVAL;
1687 				goto cleanup;
1688 			}
1689 			err = parse_stat_value(next, specs->ids[col], st);
1690 			if (err)
1691 				goto cleanup;
1692 			col++;
1693 		}
1694 
1695 		if (header) {
1696 			header = false;
1697 			continue;
1698 		}
1699 
1700 		if (col < specs->spec_cnt) {
1701 			fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
1702 				*stat_cntp, filename);
1703 			err = -EINVAL;
1704 			goto cleanup;
1705 		}
1706 
1707 		if (!st->file_name || !st->prog_name) {
1708 			fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n",
1709 				*stat_cntp, filename);
1710 			err = -EINVAL;
1711 			goto cleanup;
1712 		}
1713 
1714 		/* in comparison mode we can only check filters after we
1715 		 * parsed entire line; if row should be ignored we pretend we
1716 		 * never parsed it
1717 		 */
1718 		if (!should_process_file_prog(st->file_name, st->prog_name)) {
1719 			free(st->file_name);
1720 			free(st->prog_name);
1721 			*stat_cntp -= 1;
1722 		}
1723 	}
1724 
1725 	if (!feof(f)) {
1726 		err = -errno;
1727 		fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err);
1728 	}
1729 
1730 cleanup:
1731 	fclose(f);
1732 	return err;
1733 }
1734 
1735 /* empty/zero stats for mismatched rows */
1736 static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" };
1737 
1738 static bool is_key_stat(enum stat_id id)
1739 {
1740 	return id == FILE_NAME || id == PROG_NAME;
1741 }
1742 
1743 static void output_comp_header_underlines(void)
1744 {
1745 	int i, j, k;
1746 
1747 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1748 		int id = env.output_spec.ids[i];
1749 		int max_j = is_key_stat(id) ? 1 : 3;
1750 
1751 		for (j = 0; j < max_j; j++) {
1752 			int len = env.output_spec.lens[3 * i + j];
1753 
1754 			printf("%s", i + j == 0 ? "" : COLUMN_SEP);
1755 
1756 			for (k = 0; k < len; k++)
1757 				printf("%c", HEADER_CHAR);
1758 		}
1759 	}
1760 	printf("\n");
1761 }
1762 
1763 static void output_comp_headers(enum resfmt fmt)
1764 {
1765 	static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"};
1766 	static const char *name_sfxs[3] = {"_base", "_comp", "_diff"};
1767 	int i, j, len;
1768 
1769 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1770 		int id = env.output_spec.ids[i];
1771 		/* key stats don't have A/B/DIFF columns, they are common for both data sets */
1772 		int max_j = is_key_stat(id) ? 1 : 3;
1773 
1774 		for (j = 0; j < max_j; j++) {
1775 			int *max_len = &env.output_spec.lens[3 * i + j];
1776 			bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1);
1777 			const char *sfx;
1778 
1779 			switch (fmt) {
1780 			case RESFMT_TABLE_CALCLEN:
1781 				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1782 				len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx);
1783 				if (len > *max_len)
1784 					*max_len = len;
1785 				break;
1786 			case RESFMT_TABLE:
1787 				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1788 				printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP,
1789 				       *max_len - (int)strlen(sfx), stat_defs[id].header, sfx);
1790 				if (last)
1791 					printf("\n");
1792 				break;
1793 			case RESFMT_CSV:
1794 				sfx = is_key_stat(id) ? "" : name_sfxs[j];
1795 				printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx);
1796 				if (last)
1797 					printf("\n");
1798 				break;
1799 			}
1800 		}
1801 	}
1802 
1803 	if (fmt == RESFMT_TABLE)
1804 		output_comp_header_underlines();
1805 }
1806 
1807 static void output_comp_stats(const struct verif_stats_join *join_stats,
1808 			      enum resfmt fmt, bool last)
1809 {
1810 	const struct verif_stats *base = join_stats->stats_a;
1811 	const struct verif_stats *comp = join_stats->stats_b;
1812 	char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
1813 	int i;
1814 
1815 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1816 		int id = env.output_spec.ids[i], len;
1817 		int *max_len_base = &env.output_spec.lens[3 * i + 0];
1818 		int *max_len_comp = &env.output_spec.lens[3 * i + 1];
1819 		int *max_len_diff = &env.output_spec.lens[3 * i + 2];
1820 		const char *base_str = NULL, *comp_str = NULL;
1821 		long base_val = 0, comp_val = 0, diff_val = 0;
1822 
1823 		prepare_value(base, id, &base_str, &base_val);
1824 		prepare_value(comp, id, &comp_str, &comp_val);
1825 
1826 		/* normalize all the outputs to be in string buffers for simplicity */
1827 		if (is_key_stat(id)) {
1828 			/* key stats (file and program name) are always strings */
1829 			if (base)
1830 				snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1831 			else
1832 				snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
1833 		} else if (base_str) {
1834 			snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1835 			snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
1836 			if (!base || !comp)
1837 				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1838 			else if (strcmp(base_str, comp_str) == 0)
1839 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
1840 			else
1841 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
1842 		} else {
1843 			double p = 0.0;
1844 
1845 			if (base)
1846 				snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
1847 			else
1848 				snprintf(base_buf, sizeof(base_buf), "%s", "N/A");
1849 			if (comp)
1850 				snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
1851 			else
1852 				snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A");
1853 
1854 			diff_val = comp_val - base_val;
1855 			if (!base || !comp) {
1856 				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1857 			} else {
1858 				if (base_val == 0) {
1859 					if (comp_val == base_val)
1860 						p = 0.0; /* avoid +0 (+100%) case */
1861 					else
1862 						p = comp_val < base_val ? -100.0 : 100.0;
1863 				} else {
1864 					 p = diff_val * 100.0 / base_val;
1865 				}
1866 				snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
1867 			}
1868 		}
1869 
1870 		switch (fmt) {
1871 		case RESFMT_TABLE_CALCLEN:
1872 			len = strlen(base_buf);
1873 			if (len > *max_len_base)
1874 				*max_len_base = len;
1875 			if (!is_key_stat(id)) {
1876 				len = strlen(comp_buf);
1877 				if (len > *max_len_comp)
1878 					*max_len_comp = len;
1879 				len = strlen(diff_buf);
1880 				if (len > *max_len_diff)
1881 					*max_len_diff = len;
1882 			}
1883 			break;
1884 		case RESFMT_TABLE: {
1885 			/* string outputs are left-aligned, number outputs are right-aligned */
1886 			const char *fmt = base_str ? "%s%-*s" : "%s%*s";
1887 
1888 			printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf);
1889 			if (!is_key_stat(id)) {
1890 				printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf);
1891 				printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf);
1892 			}
1893 			if (i == env.output_spec.spec_cnt - 1)
1894 				printf("\n");
1895 			break;
1896 		}
1897 		case RESFMT_CSV:
1898 			printf("%s%s", i == 0 ? "" : ",", base_buf);
1899 			if (!is_key_stat(id)) {
1900 				printf("%s%s", i == 0 ? "" : ",", comp_buf);
1901 				printf("%s%s", i == 0 ? "" : ",", diff_buf);
1902 			}
1903 			if (i == env.output_spec.spec_cnt - 1)
1904 				printf("\n");
1905 			break;
1906 		}
1907 	}
1908 
1909 	if (last && fmt == RESFMT_TABLE)
1910 		output_comp_header_underlines();
1911 }
1912 
1913 static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp)
1914 {
1915 	int r;
1916 
1917 	r = strcmp(base->file_name, comp->file_name);
1918 	if (r != 0)
1919 		return r;
1920 	return strcmp(base->prog_name, comp->prog_name);
1921 }
1922 
1923 static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats)
1924 {
1925 	static const double eps = 1e-9;
1926 	const char *str = NULL;
1927 	double value = 0.0;
1928 
1929 	fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
1930 
1931 	if (f->abs)
1932 		value = fabs(value);
1933 
1934 	switch (f->op) {
1935 	case OP_EQ: return value > f->value - eps && value < f->value + eps;
1936 	case OP_NEQ: return value < f->value - eps || value > f->value + eps;
1937 	case OP_LT: return value < f->value - eps;
1938 	case OP_LE: return value <= f->value + eps;
1939 	case OP_GT: return value > f->value + eps;
1940 	case OP_GE: return value >= f->value - eps;
1941 	}
1942 
1943 	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
1944 	return false;
1945 }
1946 
1947 static bool should_output_join_stats(const struct verif_stats_join *stats)
1948 {
1949 	struct filter *f;
1950 	int i, allow_cnt = 0;
1951 
1952 	for (i = 0; i < env.deny_filter_cnt; i++) {
1953 		f = &env.deny_filters[i];
1954 		if (f->kind != FILTER_STAT)
1955 			continue;
1956 
1957 		if (is_join_stat_filter_matched(f, stats))
1958 			return false;
1959 	}
1960 
1961 	for (i = 0; i < env.allow_filter_cnt; i++) {
1962 		f = &env.allow_filters[i];
1963 		if (f->kind != FILTER_STAT)
1964 			continue;
1965 		allow_cnt++;
1966 
1967 		if (is_join_stat_filter_matched(f, stats))
1968 			return true;
1969 	}
1970 
1971 	/* if there are no stat allowed filters, pass everything through */
1972 	return allow_cnt == 0;
1973 }
1974 
1975 static int handle_comparison_mode(void)
1976 {
1977 	struct stat_specs base_specs = {}, comp_specs = {};
1978 	struct stat_specs tmp_sort_spec;
1979 	enum resfmt cur_fmt;
1980 	int err, i, j, last_idx, cnt;
1981 
1982 	if (env.filename_cnt != 2) {
1983 		fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
1984 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1985 		return -EINVAL;
1986 	}
1987 
1988 	err = parse_stats_csv(env.filenames[0], &base_specs,
1989 			      &env.baseline_stats, &env.baseline_stat_cnt);
1990 	if (err) {
1991 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
1992 		return err;
1993 	}
1994 	err = parse_stats_csv(env.filenames[1], &comp_specs,
1995 			      &env.prog_stats, &env.prog_stat_cnt);
1996 	if (err) {
1997 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err);
1998 		return err;
1999 	}
2000 
2001 	/* To keep it simple we validate that the set and order of stats in
2002 	 * both CSVs are exactly the same. This can be lifted with a bit more
2003 	 * pre-processing later.
2004 	 */
2005 	if (base_specs.spec_cnt != comp_specs.spec_cnt) {
2006 		fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n",
2007 			env.filenames[0], env.filenames[1],
2008 			base_specs.spec_cnt, comp_specs.spec_cnt);
2009 		return -EINVAL;
2010 	}
2011 	for (i = 0; i < base_specs.spec_cnt; i++) {
2012 		if (base_specs.ids[i] != comp_specs.ids[i]) {
2013 			fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n",
2014 				env.filenames[0], env.filenames[1],
2015 				stat_defs[base_specs.ids[i]].names[0],
2016 				stat_defs[comp_specs.ids[i]].names[0]);
2017 			return -EINVAL;
2018 		}
2019 	}
2020 
2021 	/* Replace user-specified sorting spec with file+prog sorting rule to
2022 	 * be able to join two datasets correctly. Once we are done, we will
2023 	 * restore the original sort spec.
2024 	 */
2025 	tmp_sort_spec = env.sort_spec;
2026 	env.sort_spec = join_sort_spec;
2027 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2028 	qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
2029 	env.sort_spec = tmp_sort_spec;
2030 
2031 	/* Join two datasets together. If baseline and comparison datasets
2032 	 * have different subset of rows (we match by 'object + prog' as
2033 	 * a unique key) then assume empty/missing/zero value for rows that
2034 	 * are missing in the opposite data set.
2035 	 */
2036 	i = j = 0;
2037 	while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
2038 		const struct verif_stats *base, *comp;
2039 		struct verif_stats_join *join;
2040 		void *tmp;
2041 		int r;
2042 
2043 		base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
2044 		comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats;
2045 
2046 		if (!base->file_name || !base->prog_name) {
2047 			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
2048 				i, env.filenames[0]);
2049 			return -EINVAL;
2050 		}
2051 		if (!comp->file_name || !comp->prog_name) {
2052 			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
2053 				j, env.filenames[1]);
2054 			return -EINVAL;
2055 		}
2056 
2057 		tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats));
2058 		if (!tmp)
2059 			return -ENOMEM;
2060 		env.join_stats = tmp;
2061 
2062 		join = &env.join_stats[env.join_stat_cnt];
2063 		memset(join, 0, sizeof(*join));
2064 
2065 		r = cmp_stats_key(base, comp);
2066 		if (r == 0) {
2067 			join->file_name = base->file_name;
2068 			join->prog_name = base->prog_name;
2069 			join->stats_a = base;
2070 			join->stats_b = comp;
2071 			i++;
2072 			j++;
2073 		} else if (base != &fallback_stats && (comp == &fallback_stats || r < 0)) {
2074 			join->file_name = base->file_name;
2075 			join->prog_name = base->prog_name;
2076 			join->stats_a = base;
2077 			join->stats_b = NULL;
2078 			i++;
2079 		} else if (comp != &fallback_stats && (base == &fallback_stats || r > 0)) {
2080 			join->file_name = comp->file_name;
2081 			join->prog_name = comp->prog_name;
2082 			join->stats_a = NULL;
2083 			join->stats_b = comp;
2084 			j++;
2085 		} else {
2086 			fprintf(stderr, "%s:%d: should never reach here i=%i, j=%i",
2087 				__FILE__, __LINE__, i, j);
2088 			return -EINVAL;
2089 		}
2090 		env.join_stat_cnt += 1;
2091 	}
2092 
2093 	/* now sort joined results according to sort spec */
2094 	qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
2095 
2096 	/* for human-readable table output we need to do extra pass to
2097 	 * calculate column widths, so we substitute current output format
2098 	 * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
2099 	 * and do everything again.
2100 	 */
2101 	if (env.out_fmt == RESFMT_TABLE)
2102 		cur_fmt = RESFMT_TABLE_CALCLEN;
2103 	else
2104 		cur_fmt = env.out_fmt;
2105 
2106 one_more_time:
2107 	output_comp_headers(cur_fmt);
2108 
2109 	last_idx = -1;
2110 	cnt = 0;
2111 	for (i = 0; i < env.join_stat_cnt; i++) {
2112 		const struct verif_stats_join *join = &env.join_stats[i];
2113 
2114 		if (!should_output_join_stats(join))
2115 			continue;
2116 
2117 		if (env.top_n && cnt >= env.top_n)
2118 			break;
2119 
2120 		if (cur_fmt == RESFMT_TABLE_CALCLEN)
2121 			last_idx = i;
2122 
2123 		output_comp_stats(join, cur_fmt, i == last_idx);
2124 
2125 		cnt++;
2126 	}
2127 
2128 	if (cur_fmt == RESFMT_TABLE_CALCLEN) {
2129 		cur_fmt = RESFMT_TABLE;
2130 		goto one_more_time; /* ... this time with feeling */
2131 	}
2132 
2133 	return 0;
2134 }
2135 
2136 static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats)
2137 {
2138 	long value = stats->stats[f->stat_id];
2139 
2140 	if (f->abs)
2141 		value = value < 0 ? -value : value;
2142 
2143 	switch (f->op) {
2144 	case OP_EQ: return value == f->value;
2145 	case OP_NEQ: return value != f->value;
2146 	case OP_LT: return value < f->value;
2147 	case OP_LE: return value <= f->value;
2148 	case OP_GT: return value > f->value;
2149 	case OP_GE: return value >= f->value;
2150 	}
2151 
2152 	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
2153 	return false;
2154 }
2155 
2156 static bool should_output_stats(const struct verif_stats *stats)
2157 {
2158 	struct filter *f;
2159 	int i, allow_cnt = 0;
2160 
2161 	for (i = 0; i < env.deny_filter_cnt; i++) {
2162 		f = &env.deny_filters[i];
2163 		if (f->kind != FILTER_STAT)
2164 			continue;
2165 
2166 		if (is_stat_filter_matched(f, stats))
2167 			return false;
2168 	}
2169 
2170 	for (i = 0; i < env.allow_filter_cnt; i++) {
2171 		f = &env.allow_filters[i];
2172 		if (f->kind != FILTER_STAT)
2173 			continue;
2174 		allow_cnt++;
2175 
2176 		if (is_stat_filter_matched(f, stats))
2177 			return true;
2178 	}
2179 
2180 	/* if there are no stat allowed filters, pass everything through */
2181 	return allow_cnt == 0;
2182 }
2183 
2184 static void output_prog_stats(void)
2185 {
2186 	const struct verif_stats *stats;
2187 	int i, last_stat_idx = 0, cnt = 0;
2188 
2189 	if (env.out_fmt == RESFMT_TABLE) {
2190 		/* calculate column widths */
2191 		output_headers(RESFMT_TABLE_CALCLEN);
2192 		for (i = 0; i < env.prog_stat_cnt; i++) {
2193 			stats = &env.prog_stats[i];
2194 			if (!should_output_stats(stats))
2195 				continue;
2196 			output_stats(stats, RESFMT_TABLE_CALCLEN, false);
2197 			last_stat_idx = i;
2198 		}
2199 	}
2200 
2201 	/* actually output the table */
2202 	output_headers(env.out_fmt);
2203 	for (i = 0; i < env.prog_stat_cnt; i++) {
2204 		stats = &env.prog_stats[i];
2205 		if (!should_output_stats(stats))
2206 			continue;
2207 		if (env.top_n && cnt >= env.top_n)
2208 			break;
2209 		output_stats(stats, env.out_fmt, i == last_stat_idx);
2210 		cnt++;
2211 	}
2212 }
2213 
2214 static int handle_verif_mode(void)
2215 {
2216 	int i, err;
2217 
2218 	if (env.filename_cnt == 0) {
2219 		fprintf(stderr, "Please provide path to BPF object file!\n\n");
2220 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2221 		return -EINVAL;
2222 	}
2223 
2224 	for (i = 0; i < env.filename_cnt; i++) {
2225 		err = process_obj(env.filenames[i]);
2226 		if (err) {
2227 			fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
2228 			return err;
2229 		}
2230 	}
2231 
2232 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2233 
2234 	output_prog_stats();
2235 
2236 	return 0;
2237 }
2238 
2239 static int handle_replay_mode(void)
2240 {
2241 	struct stat_specs specs = {};
2242 	int err;
2243 
2244 	if (env.filename_cnt != 1) {
2245 		fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n");
2246 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2247 		return -EINVAL;
2248 	}
2249 
2250 	err = parse_stats_csv(env.filenames[0], &specs,
2251 			      &env.prog_stats, &env.prog_stat_cnt);
2252 	if (err) {
2253 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
2254 		return err;
2255 	}
2256 
2257 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2258 
2259 	output_prog_stats();
2260 
2261 	return 0;
2262 }
2263 
2264 int main(int argc, char **argv)
2265 {
2266 	int err = 0, i;
2267 
2268 	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
2269 		return 1;
2270 
2271 	if (env.show_version) {
2272 		printf("%s\n", argp_program_version);
2273 		return 0;
2274 	}
2275 
2276 	if (env.verbose && env.quiet) {
2277 		fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
2278 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2279 		return 1;
2280 	}
2281 	if (env.verbose && env.log_level == 0)
2282 		env.log_level = 1;
2283 
2284 	if (env.output_spec.spec_cnt == 0) {
2285 		if (env.out_fmt == RESFMT_CSV)
2286 			env.output_spec = default_csv_output_spec;
2287 		else
2288 			env.output_spec = default_output_spec;
2289 	}
2290 	if (env.sort_spec.spec_cnt == 0)
2291 		env.sort_spec = default_sort_spec;
2292 
2293 	if (env.comparison_mode && env.replay_mode) {
2294 		fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n");
2295 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2296 		return 1;
2297 	}
2298 
2299 	if (env.comparison_mode)
2300 		err = handle_comparison_mode();
2301 	else if (env.replay_mode)
2302 		err = handle_replay_mode();
2303 	else
2304 		err = handle_verif_mode();
2305 
2306 	free_verif_stats(env.prog_stats, env.prog_stat_cnt);
2307 	free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
2308 	free(env.join_stats);
2309 	for (i = 0; i < env.filename_cnt; i++)
2310 		free(env.filenames[i]);
2311 	free(env.filenames);
2312 	for (i = 0; i < env.allow_filter_cnt; i++) {
2313 		free(env.allow_filters[i].any_glob);
2314 		free(env.allow_filters[i].file_glob);
2315 		free(env.allow_filters[i].prog_glob);
2316 	}
2317 	free(env.allow_filters);
2318 	for (i = 0; i < env.deny_filter_cnt; i++) {
2319 		free(env.deny_filters[i].any_glob);
2320 		free(env.deny_filters[i].file_glob);
2321 		free(env.deny_filters[i].prog_glob);
2322 	}
2323 	free(env.deny_filters);
2324 	return -err;
2325 }
2326