xref: /linux/tools/testing/selftests/bpf/veristat.c (revision 58ecb3a789fdc2b015112a31a91aa674c040a5ba)
1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
3 #define _GNU_SOURCE
4 #include <argp.h>
5 #include <libgen.h>
6 #include <string.h>
7 #include <stdlib.h>
8 #include <sched.h>
9 #include <pthread.h>
10 #include <dirent.h>
11 #include <signal.h>
12 #include <fcntl.h>
13 #include <unistd.h>
14 #include <sys/time.h>
15 #include <sys/sysinfo.h>
16 #include <sys/stat.h>
17 #include <bpf/libbpf.h>
18 #include <bpf/btf.h>
19 #include <bpf/bpf.h>
20 #include <libelf.h>
21 #include <gelf.h>
22 #include <float.h>
23 #include <math.h>
24 
25 #ifndef ARRAY_SIZE
26 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
27 #endif
28 
29 enum stat_id {
30 	VERDICT,
31 	DURATION,
32 	TOTAL_INSNS,
33 	TOTAL_STATES,
34 	PEAK_STATES,
35 	MAX_STATES_PER_INSN,
36 	MARK_READ_MAX_LEN,
37 	SIZE,
38 	JITED_SIZE,
39 	STACK,
40 	PROG_TYPE,
41 	ATTACH_TYPE,
42 
43 	FILE_NAME,
44 	PROG_NAME,
45 
46 	ALL_STATS_CNT,
47 	NUM_STATS_CNT = FILE_NAME - VERDICT,
48 };
49 
50 /* In comparison mode each stat can specify up to four different values:
51  *   - A side value;
52  *   - B side value;
53  *   - absolute diff value;
54  *   - relative (percentage) diff value.
55  *
56  * When specifying stat specs in comparison mode, user can use one of the
57  * following variant suffixes to specify which exact variant should be used for
58  * ordering or filtering:
59  *   - `_a` for A side value;
60  *   - `_b` for B side value;
61  *   - `_diff` for absolute diff value;
62  *   - `_pct` for relative (percentage) diff value.
63  *
64  * If no variant suffix is provided, then `_b` (control data) is assumed.
65  *
66  * As an example, let's say instructions stat has the following output:
67  *
68  * Insns (A)  Insns (B)  Insns   (DIFF)
69  * ---------  ---------  --------------
70  * 21547      20920       -627 (-2.91%)
71  *
72  * Then:
73  *   - 21547 is A side value (insns_a);
74  *   - 20920 is B side value (insns_b);
75  *   - -627 is absolute diff value (insns_diff);
76  *   - -2.91% is relative diff value (insns_pct).
77  *
78  * For verdict there is no verdict_pct variant.
79  * For file and program name, _a and _b variants are equivalent and there are
80  * no _diff or _pct variants.
81  */
82 enum stat_variant {
83 	VARIANT_A,
84 	VARIANT_B,
85 	VARIANT_DIFF,
86 	VARIANT_PCT,
87 };
88 
89 struct verif_stats {
90 	char *file_name;
91 	char *prog_name;
92 
93 	long stats[NUM_STATS_CNT];
94 };
95 
96 /* joined comparison mode stats */
97 struct verif_stats_join {
98 	char *file_name;
99 	char *prog_name;
100 
101 	const struct verif_stats *stats_a;
102 	const struct verif_stats *stats_b;
103 };
104 
105 struct stat_specs {
106 	int spec_cnt;
107 	enum stat_id ids[ALL_STATS_CNT];
108 	enum stat_variant variants[ALL_STATS_CNT];
109 	bool asc[ALL_STATS_CNT];
110 	bool abs[ALL_STATS_CNT];
111 	int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
112 };
113 
114 enum resfmt {
115 	RESFMT_TABLE,
116 	RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */
117 	RESFMT_CSV,
118 };
119 
120 enum filter_kind {
121 	FILTER_NAME,
122 	FILTER_STAT,
123 };
124 
125 enum operator_kind {
126 	OP_EQ,		/* == or = */
127 	OP_NEQ,		/* != or <> */
128 	OP_LT,		/* < */
129 	OP_LE,		/* <= */
130 	OP_GT,		/* > */
131 	OP_GE,		/* >= */
132 };
133 
134 struct filter {
135 	enum filter_kind kind;
136 	/* FILTER_NAME */
137 	char *any_glob;
138 	char *file_glob;
139 	char *prog_glob;
140 	/* FILTER_STAT */
141 	enum operator_kind op;
142 	int stat_id;
143 	enum stat_variant stat_var;
144 	long value;
145 	bool abs;
146 };
147 
148 static struct env {
149 	char **filenames;
150 	int filename_cnt;
151 	bool verbose;
152 	bool debug;
153 	bool quiet;
154 	bool force_checkpoints;
155 	bool force_reg_invariants;
156 	enum resfmt out_fmt;
157 	bool show_version;
158 	bool comparison_mode;
159 	bool replay_mode;
160 	int top_n;
161 
162 	int log_level;
163 	int log_size;
164 	bool log_fixed;
165 
166 	struct verif_stats *prog_stats;
167 	int prog_stat_cnt;
168 
169 	/* baseline_stats is allocated and used only in comparison mode */
170 	struct verif_stats *baseline_stats;
171 	int baseline_stat_cnt;
172 
173 	struct verif_stats_join *join_stats;
174 	int join_stat_cnt;
175 
176 	struct stat_specs output_spec;
177 	struct stat_specs sort_spec;
178 
179 	struct filter *allow_filters;
180 	struct filter *deny_filters;
181 	int allow_filter_cnt;
182 	int deny_filter_cnt;
183 
184 	int files_processed;
185 	int files_skipped;
186 	int progs_processed;
187 	int progs_skipped;
188 	int top_src_lines;
189 } env;
190 
191 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
192 {
193 	if (!env.verbose)
194 		return 0;
195 	if (level == LIBBPF_DEBUG  && !env.debug)
196 		return 0;
197 	return vfprintf(stderr, format, args);
198 }
199 
200 #ifndef VERISTAT_VERSION
201 #define VERISTAT_VERSION "<kernel>"
202 #endif
203 
204 const char *argp_program_version = "veristat v" VERISTAT_VERSION;
205 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
206 const char argp_program_doc[] =
207 "veristat    BPF verifier stats collection and comparison tool.\n"
208 "\n"
209 "USAGE: veristat <obj-file> [<obj-file>...]\n"
210 "   OR: veristat -C <baseline.csv> <comparison.csv>\n"
211 "   OR: veristat -R <results.csv>\n";
212 
213 enum {
214 	OPT_LOG_FIXED = 1000,
215 	OPT_LOG_SIZE = 1001,
216 };
217 
218 static const struct argp_option opts[] = {
219 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
220 	{ "version", 'V', NULL, 0, "Print version" },
221 	{ "verbose", 'v', NULL, 0, "Verbose mode" },
222 	{ "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" },
223 	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
224 	{ "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
225 	{ "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
226 	{ "top-n", 'n', "N", 0, "Emit only up to first N results." },
227 	{ "quiet", 'q', NULL, 0, "Quiet mode" },
228 	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
229 	{ "sort", 's', "SPEC", 0, "Specify sort order" },
230 	{ "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
231 	{ "compare", 'C', NULL, 0, "Comparison mode" },
232 	{ "replay", 'R', NULL, 0, "Replay mode" },
233 	{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
234 	{ "test-states", 't', NULL, 0,
235 	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
236 	{ "test-reg-invariants", 'r', NULL, 0,
237 	  "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
238 	{ "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" },
239 	{},
240 };
241 
242 static int parse_stats(const char *stats_str, struct stat_specs *specs);
243 static int append_filter(struct filter **filters, int *cnt, const char *str);
244 static int append_filter_file(const char *path);
245 
246 static error_t parse_arg(int key, char *arg, struct argp_state *state)
247 {
248 	void *tmp;
249 	int err;
250 
251 	switch (key) {
252 	case 'h':
253 		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
254 		break;
255 	case 'V':
256 		env.show_version = true;
257 		break;
258 	case 'v':
259 		env.verbose = true;
260 		break;
261 	case 'd':
262 		env.debug = true;
263 		env.verbose = true;
264 		break;
265 	case 'q':
266 		env.quiet = true;
267 		break;
268 	case 'e':
269 		err = parse_stats(arg, &env.output_spec);
270 		if (err)
271 			return err;
272 		break;
273 	case 's':
274 		err = parse_stats(arg, &env.sort_spec);
275 		if (err)
276 			return err;
277 		break;
278 	case 'o':
279 		if (strcmp(arg, "table") == 0) {
280 			env.out_fmt = RESFMT_TABLE;
281 		} else if (strcmp(arg, "csv") == 0) {
282 			env.out_fmt = RESFMT_CSV;
283 		} else {
284 			fprintf(stderr, "Unrecognized output format '%s'\n", arg);
285 			return -EINVAL;
286 		}
287 		break;
288 	case 'l':
289 		errno = 0;
290 		env.log_level = strtol(arg, NULL, 10);
291 		if (errno) {
292 			fprintf(stderr, "invalid log level: %s\n", arg);
293 			argp_usage(state);
294 		}
295 		break;
296 	case OPT_LOG_FIXED:
297 		env.log_fixed = true;
298 		break;
299 	case OPT_LOG_SIZE:
300 		errno = 0;
301 		env.log_size = strtol(arg, NULL, 10);
302 		if (errno) {
303 			fprintf(stderr, "invalid log size: %s\n", arg);
304 			argp_usage(state);
305 		}
306 		break;
307 	case 't':
308 		env.force_checkpoints = true;
309 		break;
310 	case 'r':
311 		env.force_reg_invariants = true;
312 		break;
313 	case 'n':
314 		errno = 0;
315 		env.top_n = strtol(arg, NULL, 10);
316 		if (errno) {
317 			fprintf(stderr, "invalid top N specifier: %s\n", arg);
318 			argp_usage(state);
319 		}
320 	case 'C':
321 		env.comparison_mode = true;
322 		break;
323 	case 'R':
324 		env.replay_mode = true;
325 		break;
326 	case 'f':
327 		if (arg[0] == '@')
328 			err = append_filter_file(arg + 1);
329 		else if (arg[0] == '!')
330 			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1);
331 		else
332 			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg);
333 		if (err) {
334 			fprintf(stderr, "Failed to collect program filter expressions: %d\n", err);
335 			return err;
336 		}
337 		break;
338 	case 'S':
339 		errno = 0;
340 		env.top_src_lines = strtol(arg, NULL, 10);
341 		if (errno) {
342 			fprintf(stderr, "invalid top lines N specifier: %s\n", arg);
343 			argp_usage(state);
344 		}
345 		break;
346 	case ARGP_KEY_ARG:
347 		tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
348 		if (!tmp)
349 			return -ENOMEM;
350 		env.filenames = tmp;
351 		env.filenames[env.filename_cnt] = strdup(arg);
352 		if (!env.filenames[env.filename_cnt])
353 			return -ENOMEM;
354 		env.filename_cnt++;
355 		break;
356 	default:
357 		return ARGP_ERR_UNKNOWN;
358 	}
359 	return 0;
360 }
361 
362 static const struct argp argp = {
363 	.options = opts,
364 	.parser = parse_arg,
365 	.doc = argp_program_doc,
366 };
367 
368 
369 /* Adapted from perf/util/string.c */
370 static bool glob_matches(const char *str, const char *pat)
371 {
372 	while (*str && *pat && *pat != '*') {
373 		if (*str != *pat)
374 			return false;
375 		str++;
376 		pat++;
377 	}
378 	/* Check wild card */
379 	if (*pat == '*') {
380 		while (*pat == '*')
381 			pat++;
382 		if (!*pat) /* Tail wild card matches all */
383 			return true;
384 		while (*str)
385 			if (glob_matches(str++, pat))
386 				return true;
387 	}
388 	return !*str && !*pat;
389 }
390 
391 static bool is_bpf_obj_file(const char *path) {
392 	Elf64_Ehdr *ehdr;
393 	int fd, err = -EINVAL;
394 	Elf *elf = NULL;
395 
396 	fd = open(path, O_RDONLY | O_CLOEXEC);
397 	if (fd < 0)
398 		return true; /* we'll fail later and propagate error */
399 
400 	/* ensure libelf is initialized */
401 	(void)elf_version(EV_CURRENT);
402 
403 	elf = elf_begin(fd, ELF_C_READ, NULL);
404 	if (!elf)
405 		goto cleanup;
406 
407 	if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64)
408 		goto cleanup;
409 
410 	ehdr = elf64_getehdr(elf);
411 	/* Old LLVM set e_machine to EM_NONE */
412 	if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF))
413 		goto cleanup;
414 
415 	err = 0;
416 cleanup:
417 	if (elf)
418 		elf_end(elf);
419 	close(fd);
420 	return err == 0;
421 }
422 
423 static bool should_process_file_prog(const char *filename, const char *prog_name)
424 {
425 	struct filter *f;
426 	int i, allow_cnt = 0;
427 
428 	for (i = 0; i < env.deny_filter_cnt; i++) {
429 		f = &env.deny_filters[i];
430 		if (f->kind != FILTER_NAME)
431 			continue;
432 
433 		if (f->any_glob && glob_matches(filename, f->any_glob))
434 			return false;
435 		if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob))
436 			return false;
437 		if (f->file_glob && glob_matches(filename, f->file_glob))
438 			return false;
439 		if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob))
440 			return false;
441 	}
442 
443 	for (i = 0; i < env.allow_filter_cnt; i++) {
444 		f = &env.allow_filters[i];
445 		if (f->kind != FILTER_NAME)
446 			continue;
447 
448 		allow_cnt++;
449 		if (f->any_glob) {
450 			if (glob_matches(filename, f->any_glob))
451 				return true;
452 			/* If we don't know program name yet, any_glob filter
453 			 * has to assume that current BPF object file might be
454 			 * relevant; we'll check again later on after opening
455 			 * BPF object file, at which point program name will
456 			 * be known finally.
457 			 */
458 			if (!prog_name || glob_matches(prog_name, f->any_glob))
459 				return true;
460 		} else {
461 			if (f->file_glob && !glob_matches(filename, f->file_glob))
462 				continue;
463 			if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob))
464 				continue;
465 			return true;
466 		}
467 	}
468 
469 	/* if there are no file/prog name allow filters, allow all progs,
470 	 * unless they are denied earlier explicitly
471 	 */
472 	return allow_cnt == 0;
473 }
474 
475 static struct {
476 	enum operator_kind op_kind;
477 	const char *op_str;
478 } operators[] = {
479 	/* Order of these definitions matter to avoid situations like '<'
480 	 * matching part of what is actually a '<>' operator. That is,
481 	 * substrings should go last.
482 	 */
483 	{ OP_EQ, "==" },
484 	{ OP_NEQ, "!=" },
485 	{ OP_NEQ, "<>" },
486 	{ OP_LE, "<=" },
487 	{ OP_LT, "<" },
488 	{ OP_GE, ">=" },
489 	{ OP_GT, ">" },
490 	{ OP_EQ, "=" },
491 };
492 
493 static bool parse_stat_id_var(const char *name, size_t len, int *id,
494 			      enum stat_variant *var, bool *is_abs);
495 
496 static int append_filter(struct filter **filters, int *cnt, const char *str)
497 {
498 	struct filter *f;
499 	void *tmp;
500 	const char *p;
501 	int i;
502 
503 	tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
504 	if (!tmp)
505 		return -ENOMEM;
506 	*filters = tmp;
507 
508 	f = &(*filters)[*cnt];
509 	memset(f, 0, sizeof(*f));
510 
511 	/* First, let's check if it's a stats filter of the following form:
512 	 * <stat><op><value, where:
513 	 *   - <stat> is one of supported numerical stats (verdict is also
514 	 *     considered numerical, failure == 0, success == 1);
515 	 *   - <op> is comparison operator (see `operators` definitions);
516 	 *   - <value> is an integer (or failure/success, or false/true as
517 	 *     special aliases for 0 and 1, respectively).
518 	 * If the form doesn't match what user provided, we assume file/prog
519 	 * glob filter.
520 	 */
521 	for (i = 0; i < ARRAY_SIZE(operators); i++) {
522 		enum stat_variant var;
523 		int id;
524 		long val;
525 		const char *end = str;
526 		const char *op_str;
527 		bool is_abs;
528 
529 		op_str = operators[i].op_str;
530 		p = strstr(str, op_str);
531 		if (!p)
532 			continue;
533 
534 		if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) {
535 			fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
536 			return -EINVAL;
537 		}
538 		if (id >= FILE_NAME) {
539 			fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str);
540 			return -EINVAL;
541 		}
542 
543 		p += strlen(op_str);
544 
545 		if (strcasecmp(p, "true") == 0 ||
546 		    strcasecmp(p, "t") == 0 ||
547 		    strcasecmp(p, "success") == 0 ||
548 		    strcasecmp(p, "succ") == 0 ||
549 		    strcasecmp(p, "s") == 0 ||
550 		    strcasecmp(p, "match") == 0 ||
551 		    strcasecmp(p, "m") == 0) {
552 			val = 1;
553 		} else if (strcasecmp(p, "false") == 0 ||
554 			   strcasecmp(p, "f") == 0 ||
555 			   strcasecmp(p, "failure") == 0 ||
556 			   strcasecmp(p, "fail") == 0 ||
557 			   strcasecmp(p, "mismatch") == 0 ||
558 			   strcasecmp(p, "mis") == 0) {
559 			val = 0;
560 		} else {
561 			errno = 0;
562 			val = strtol(p, (char **)&end, 10);
563 			if (errno || end == p || *end != '\0' ) {
564 				fprintf(stderr, "Invalid integer value in '%s'!\n", str);
565 				return -EINVAL;
566 			}
567 		}
568 
569 		f->kind = FILTER_STAT;
570 		f->stat_id = id;
571 		f->stat_var = var;
572 		f->op = operators[i].op_kind;
573 		f->abs = true;
574 		f->value = val;
575 
576 		*cnt += 1;
577 		return 0;
578 	}
579 
580 	/* File/prog filter can be specified either as '<glob>' or
581 	 * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
582 	 * both file and program names. This seems to be way more useful in
583 	 * practice. If user needs full control, they can use '/<prog-glob>'
584 	 * form to glob just program name, or '<file-glob>/' to glob only file
585 	 * name. But usually common <glob> seems to be the most useful and
586 	 * ergonomic way.
587 	 */
588 	f->kind = FILTER_NAME;
589 	p = strchr(str, '/');
590 	if (!p) {
591 		f->any_glob = strdup(str);
592 		if (!f->any_glob)
593 			return -ENOMEM;
594 	} else {
595 		if (str != p) {
596 			/* non-empty file glob */
597 			f->file_glob = strndup(str, p - str);
598 			if (!f->file_glob)
599 				return -ENOMEM;
600 		}
601 		if (strlen(p + 1) > 0) {
602 			/* non-empty prog glob */
603 			f->prog_glob = strdup(p + 1);
604 			if (!f->prog_glob) {
605 				free(f->file_glob);
606 				f->file_glob = NULL;
607 				return -ENOMEM;
608 			}
609 		}
610 	}
611 
612 	*cnt += 1;
613 	return 0;
614 }
615 
616 static int append_filter_file(const char *path)
617 {
618 	char buf[1024];
619 	FILE *f;
620 	int err = 0;
621 
622 	f = fopen(path, "r");
623 	if (!f) {
624 		err = -errno;
625 		fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
626 		return err;
627 	}
628 
629 	while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
630 		/* lines starting with # are comments, skip them */
631 		if (buf[0] == '\0' || buf[0] == '#')
632 			continue;
633 		/* lines starting with ! are negative match filters */
634 		if (buf[0] == '!')
635 			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1);
636 		else
637 			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf);
638 		if (err)
639 			goto cleanup;
640 	}
641 
642 cleanup:
643 	fclose(f);
644 	return err;
645 }
646 
647 static const struct stat_specs default_output_spec = {
648 	.spec_cnt = 8,
649 	.ids = {
650 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
651 		TOTAL_INSNS, TOTAL_STATES, SIZE, JITED_SIZE
652 	},
653 };
654 
655 static const struct stat_specs default_csv_output_spec = {
656 	.spec_cnt = 14,
657 	.ids = {
658 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
659 		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
660 		MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
661 		SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE,
662 		STACK,
663 	},
664 };
665 
666 static const struct stat_specs default_sort_spec = {
667 	.spec_cnt = 2,
668 	.ids = {
669 		FILE_NAME, PROG_NAME,
670 	},
671 	.asc = { true, true, },
672 };
673 
674 /* sorting for comparison mode to join two data sets */
675 static const struct stat_specs join_sort_spec = {
676 	.spec_cnt = 2,
677 	.ids = {
678 		FILE_NAME, PROG_NAME,
679 	},
680 	.asc = { true, true, },
681 };
682 
683 static struct stat_def {
684 	const char *header;
685 	const char *names[4];
686 	bool asc_by_default;
687 	bool left_aligned;
688 } stat_defs[] = {
689 	[FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ },
690 	[PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ },
691 	[VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ },
692 	[DURATION] = { "Duration (us)", {"duration", "dur"}, },
693 	[TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, },
694 	[TOTAL_STATES] = { "States", {"total_states", "states"}, },
695 	[PEAK_STATES] = { "Peak states", {"peak_states"}, },
696 	[MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
697 	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
698 	[SIZE] = { "Program size", {"prog_size"}, },
699 	[JITED_SIZE] = { "Jited size", {"prog_size_jited"}, },
700 	[STACK] = {"Stack depth", {"stack_depth", "stack"}, },
701 	[PROG_TYPE] = { "Program type", {"prog_type"}, },
702 	[ATTACH_TYPE] = { "Attach type", {"attach_type", }, },
703 };
704 
705 static bool parse_stat_id_var(const char *name, size_t len, int *id,
706 			      enum stat_variant *var, bool *is_abs)
707 {
708 	static const char *var_sfxs[] = {
709 		[VARIANT_A] = "_a",
710 		[VARIANT_B] = "_b",
711 		[VARIANT_DIFF] = "_diff",
712 		[VARIANT_PCT] = "_pct",
713 	};
714 	int i, j, k;
715 
716 	/* |<stat>| means we take absolute value of given stat */
717 	*is_abs = false;
718 	if (len > 2 && name[0] == '|' && name[len - 1] == '|') {
719 		*is_abs = true;
720 		name += 1;
721 		len -= 2;
722 	}
723 
724 	for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
725 		struct stat_def *def = &stat_defs[i];
726 		size_t alias_len, sfx_len;
727 		const char *alias;
728 
729 		for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
730 			alias = def->names[j];
731 			if (!alias)
732 				continue;
733 
734 			alias_len = strlen(alias);
735 			if (strncmp(name, alias, alias_len) != 0)
736 				continue;
737 
738 			if (alias_len == len) {
739 				/* If no variant suffix is specified, we
740 				 * assume control group (just in case we are
741 				 * in comparison mode. Variant is ignored in
742 				 * non-comparison mode.
743 				 */
744 				*var = VARIANT_B;
745 				*id = i;
746 				return true;
747 			}
748 
749 			for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) {
750 				sfx_len = strlen(var_sfxs[k]);
751 				if (alias_len + sfx_len != len)
752 					continue;
753 
754 				if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) {
755 					*var = (enum stat_variant)k;
756 					*id = i;
757 					return true;
758 				}
759 			}
760 		}
761 	}
762 
763 	return false;
764 }
765 
766 static bool is_asc_sym(char c)
767 {
768 	return c == '^';
769 }
770 
771 static bool is_desc_sym(char c)
772 {
773 	return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
774 }
775 
776 static int parse_stat(const char *stat_name, struct stat_specs *specs)
777 {
778 	int id;
779 	bool has_order = false, is_asc = false, is_abs = false;
780 	size_t len = strlen(stat_name);
781 	enum stat_variant var;
782 
783 	if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
784 		fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
785 		return -E2BIG;
786 	}
787 
788 	if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) {
789 		has_order = true;
790 		is_asc = is_asc_sym(stat_name[len - 1]);
791 		len -= 1;
792 	}
793 
794 	if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) {
795 		fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
796 		return -ESRCH;
797 	}
798 
799 	specs->ids[specs->spec_cnt] = id;
800 	specs->variants[specs->spec_cnt] = var;
801 	specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
802 	specs->abs[specs->spec_cnt] = is_abs;
803 	specs->spec_cnt++;
804 
805 	return 0;
806 }
807 
808 static int parse_stats(const char *stats_str, struct stat_specs *specs)
809 {
810 	char *input, *state = NULL, *next;
811 	int err, cnt = 0;
812 
813 	input = strdup(stats_str);
814 	if (!input)
815 		return -ENOMEM;
816 
817 	while ((next = strtok_r(cnt++ ? NULL : input, ",", &state))) {
818 		err = parse_stat(next, specs);
819 		if (err) {
820 			free(input);
821 			return err;
822 		}
823 	}
824 
825 	free(input);
826 	return 0;
827 }
828 
829 static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
830 {
831 	int i;
832 
833 	if (!stats)
834 		return;
835 
836 	for (i = 0; i < stat_cnt; i++) {
837 		free(stats[i].file_name);
838 		free(stats[i].prog_name);
839 	}
840 	free(stats);
841 }
842 
843 static char verif_log_buf[64 * 1024];
844 
845 #define MAX_PARSED_LOG_LINES 100
846 
847 static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
848 {
849 	const char *cur;
850 	int pos, lines, sub_stack, cnt = 0;
851 	char *state = NULL, *token, stack[512];
852 
853 	buf[buf_sz - 1] = '\0';
854 
855 	for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) {
856 		/* find previous endline or otherwise take the start of log buf */
857 		for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) {
858 		}
859 		/* next time start from end of previous line (or pos goes to <0) */
860 		pos--;
861 		/* if we found endline, point right after endline symbol;
862 		 * otherwise, stay at the beginning of log buf
863 		 */
864 		if (cur[0] == '\n')
865 			cur++;
866 
867 		if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
868 			continue;
869 		if (5 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
870 				&s->stats[TOTAL_INSNS],
871 				&s->stats[MAX_STATES_PER_INSN],
872 				&s->stats[TOTAL_STATES],
873 				&s->stats[PEAK_STATES],
874 				&s->stats[MARK_READ_MAX_LEN]))
875 			continue;
876 
877 		if (1 == sscanf(cur, "stack depth %511s", stack))
878 			continue;
879 	}
880 	while ((token = strtok_r(cnt++ ? NULL : stack, "+", &state))) {
881 		if (sscanf(token, "%d", &sub_stack) == 0)
882 			break;
883 		s->stats[STACK] += sub_stack;
884 	}
885 	return 0;
886 }
887 
888 struct line_cnt {
889 	char *line;
890 	int cnt;
891 };
892 
893 static int str_cmp(const void *a, const void *b)
894 {
895 	const char **str1 = (const char **)a;
896 	const char **str2 = (const char **)b;
897 
898 	return strcmp(*str1, *str2);
899 }
900 
901 static int line_cnt_cmp(const void *a, const void *b)
902 {
903 	const struct line_cnt *a_cnt = (const struct line_cnt *)a;
904 	const struct line_cnt *b_cnt = (const struct line_cnt *)b;
905 
906 	if (a_cnt->cnt != b_cnt->cnt)
907 		return a_cnt->cnt < b_cnt->cnt ? -1 : 1;
908 	return strcmp(a_cnt->line, b_cnt->line);
909 }
910 
911 static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name)
912 {
913 	int lines_cap = 0;
914 	int lines_size = 0;
915 	char **lines = NULL;
916 	char *line = NULL;
917 	char *state;
918 	struct line_cnt *freq = NULL;
919 	struct line_cnt *cur;
920 	int unique_lines;
921 	int err = 0;
922 	int i;
923 
924 	while ((line = strtok_r(line ? NULL : buf, "\n", &state))) {
925 		if (strncmp(line, "; ", 2) != 0)
926 			continue;
927 		line += 2;
928 
929 		if (lines_size == lines_cap) {
930 			char **tmp;
931 
932 			lines_cap = max(16, lines_cap * 2);
933 			tmp = realloc(lines, lines_cap * sizeof(*tmp));
934 			if (!tmp) {
935 				err = -ENOMEM;
936 				goto cleanup;
937 			}
938 			lines = tmp;
939 		}
940 		lines[lines_size] = line;
941 		lines_size++;
942 	}
943 
944 	if (lines_size == 0)
945 		goto cleanup;
946 
947 	qsort(lines, lines_size, sizeof(*lines), str_cmp);
948 
949 	freq = calloc(lines_size, sizeof(*freq));
950 	if (!freq) {
951 		err = -ENOMEM;
952 		goto cleanup;
953 	}
954 
955 	cur = freq;
956 	cur->line = lines[0];
957 	cur->cnt = 1;
958 	for (i = 1; i < lines_size; ++i) {
959 		if (strcmp(lines[i], cur->line) != 0) {
960 			cur++;
961 			cur->line = lines[i];
962 			cur->cnt = 0;
963 		}
964 		cur->cnt++;
965 	}
966 	unique_lines = cur - freq + 1;
967 
968 	qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp);
969 
970 	printf("Top source lines (%s):\n", prog_name);
971 	for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
972 		const char *src_code = freq[i].line;
973 		const char *src_line = NULL;
974 		char *split = strrchr(freq[i].line, '@');
975 
976 		if (split) {
977 			src_line = split + 1;
978 
979 			while (*src_line && isspace(*src_line))
980 				src_line++;
981 
982 			while (split > src_code && isspace(*split))
983 				split--;
984 			*split = '\0';
985 		}
986 
987 		if (src_line)
988 			printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line, src_code);
989 		else
990 			printf("%5d: %s\n", freq[i].cnt, src_code);
991 	}
992 	printf("\n");
993 
994 cleanup:
995 	free(freq);
996 	free(lines);
997 	return err;
998 }
999 
1000 static int guess_prog_type_by_ctx_name(const char *ctx_name,
1001 				       enum bpf_prog_type *prog_type,
1002 				       enum bpf_attach_type *attach_type)
1003 {
1004 	/* We need to guess program type based on its declared context type.
1005 	 * This guess can't be perfect as many different program types might
1006 	 * share the same context type.  So we can only hope to reasonably
1007 	 * well guess this and get lucky.
1008 	 *
1009 	 * Just in case, we support both UAPI-side type names and
1010 	 * kernel-internal names.
1011 	 */
1012 	static struct {
1013 		const char *uapi_name;
1014 		const char *kern_name;
1015 		enum bpf_prog_type prog_type;
1016 		enum bpf_attach_type attach_type;
1017 	} ctx_map[] = {
1018 		/* __sk_buff is most ambiguous, we assume TC program */
1019 		{ "__sk_buff", "sk_buff", BPF_PROG_TYPE_SCHED_CLS },
1020 		{ "bpf_sock", "sock", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND },
1021 		{ "bpf_sock_addr", "bpf_sock_addr_kern",  BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND },
1022 		{ "bpf_sock_ops", "bpf_sock_ops_kern", BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS },
1023 		{ "sk_msg_md", "sk_msg", BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT },
1024 		{ "bpf_cgroup_dev_ctx", "bpf_cgroup_dev_ctx", BPF_PROG_TYPE_CGROUP_DEVICE, BPF_CGROUP_DEVICE },
1025 		{ "bpf_sysctl", "bpf_sysctl_kern", BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL },
1026 		{ "bpf_sockopt", "bpf_sockopt_kern", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT },
1027 		{ "sk_reuseport_md", "sk_reuseport_kern", BPF_PROG_TYPE_SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE },
1028 		{ "bpf_sk_lookup", "bpf_sk_lookup_kern", BPF_PROG_TYPE_SK_LOOKUP, BPF_SK_LOOKUP },
1029 		{ "xdp_md", "xdp_buff", BPF_PROG_TYPE_XDP, BPF_XDP },
1030 		/* tracing types with no expected attach type */
1031 		{ "bpf_user_pt_regs_t", "pt_regs", BPF_PROG_TYPE_KPROBE },
1032 		{ "bpf_perf_event_data", "bpf_perf_event_data_kern", BPF_PROG_TYPE_PERF_EVENT },
1033 		/* raw_tp programs use u64[] from kernel side, we don't want
1034 		 * to match on that, probably; so NULL for kern-side type
1035 		 */
1036 		{ "bpf_raw_tracepoint_args", NULL, BPF_PROG_TYPE_RAW_TRACEPOINT },
1037 	};
1038 	int i;
1039 
1040 	if (!ctx_name)
1041 		return -EINVAL;
1042 
1043 	for (i = 0; i < ARRAY_SIZE(ctx_map); i++) {
1044 		if (strcmp(ctx_map[i].uapi_name, ctx_name) == 0 ||
1045 		    (ctx_map[i].kern_name && strcmp(ctx_map[i].kern_name, ctx_name) == 0)) {
1046 			*prog_type = ctx_map[i].prog_type;
1047 			*attach_type = ctx_map[i].attach_type;
1048 			return 0;
1049 		}
1050 	}
1051 
1052 	return -ESRCH;
1053 }
1054 
1055 static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename)
1056 {
1057 	struct bpf_map *map;
1058 
1059 	bpf_object__for_each_map(map, obj) {
1060 		/* disable pinning */
1061 		bpf_map__set_pin_path(map, NULL);
1062 
1063 		/* fix up map size, if necessary */
1064 		switch (bpf_map__type(map)) {
1065 		case BPF_MAP_TYPE_SK_STORAGE:
1066 		case BPF_MAP_TYPE_TASK_STORAGE:
1067 		case BPF_MAP_TYPE_INODE_STORAGE:
1068 		case BPF_MAP_TYPE_CGROUP_STORAGE:
1069 			break;
1070 		default:
1071 			if (bpf_map__max_entries(map) == 0)
1072 				bpf_map__set_max_entries(map, 1);
1073 		}
1074 	}
1075 
1076 	/* SEC(freplace) programs can't be loaded with veristat as is,
1077 	 * but we can try guessing their target program's expected type by
1078 	 * looking at the type of program's first argument and substituting
1079 	 * corresponding program type
1080 	 */
1081 	if (bpf_program__type(prog) == BPF_PROG_TYPE_EXT) {
1082 		const struct btf *btf = bpf_object__btf(obj);
1083 		const char *prog_name = bpf_program__name(prog);
1084 		enum bpf_prog_type prog_type;
1085 		enum bpf_attach_type attach_type;
1086 		const struct btf_type *t;
1087 		const char *ctx_name;
1088 		int id;
1089 
1090 		if (!btf)
1091 			goto skip_freplace_fixup;
1092 
1093 		id = btf__find_by_name_kind(btf, prog_name, BTF_KIND_FUNC);
1094 		t = btf__type_by_id(btf, id);
1095 		t = btf__type_by_id(btf, t->type);
1096 		if (!btf_is_func_proto(t) || btf_vlen(t) != 1)
1097 			goto skip_freplace_fixup;
1098 
1099 		/* context argument is a pointer to a struct/typedef */
1100 		t = btf__type_by_id(btf, btf_params(t)[0].type);
1101 		while (t && btf_is_mod(t))
1102 			t = btf__type_by_id(btf, t->type);
1103 		if (!t || !btf_is_ptr(t))
1104 			goto skip_freplace_fixup;
1105 		t = btf__type_by_id(btf, t->type);
1106 		while (t && btf_is_mod(t))
1107 			t = btf__type_by_id(btf, t->type);
1108 		if (!t)
1109 			goto skip_freplace_fixup;
1110 
1111 		ctx_name = btf__name_by_offset(btf, t->name_off);
1112 
1113 		if (guess_prog_type_by_ctx_name(ctx_name, &prog_type, &attach_type) == 0) {
1114 			bpf_program__set_type(prog, prog_type);
1115 			bpf_program__set_expected_attach_type(prog, attach_type);
1116 
1117 			if (!env.quiet) {
1118 				printf("Using guessed program type '%s' for %s/%s...\n",
1119 					libbpf_bpf_prog_type_str(prog_type),
1120 					filename, prog_name);
1121 			}
1122 		} else {
1123 			if (!env.quiet) {
1124 				printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
1125 					ctx_name, filename, prog_name);
1126 			}
1127 		}
1128 	}
1129 skip_freplace_fixup:
1130 	return;
1131 }
1132 
1133 static int max_verifier_log_size(void)
1134 {
1135 	const int SMALL_LOG_SIZE = UINT_MAX >> 8;
1136 	const int BIG_LOG_SIZE = UINT_MAX >> 2;
1137 	struct bpf_insn insns[] = {
1138 		{ .code = BPF_ALU | BPF_MOV | BPF_X, .dst_reg = BPF_REG_0, },
1139 		{ .code  = BPF_JMP | BPF_EXIT, },
1140 	};
1141 	LIBBPF_OPTS(bpf_prog_load_opts, opts,
1142 		    .log_size = BIG_LOG_SIZE,
1143 		    .log_buf = (void *)-1,
1144 		    .log_level = 4
1145 	);
1146 	int ret, insn_cnt = ARRAY_SIZE(insns);
1147 	static int log_size;
1148 
1149 	if (log_size != 0)
1150 		return log_size;
1151 
1152 	ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
1153 
1154 	if (ret == -EFAULT)
1155 		log_size = BIG_LOG_SIZE;
1156 	else /* ret == -EINVAL, big log size is not supported by the verifier */
1157 		log_size = SMALL_LOG_SIZE;
1158 
1159 	return log_size;
1160 }
1161 
1162 static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
1163 {
1164 	const char *base_filename = basename(strdupa(filename));
1165 	const char *prog_name = bpf_program__name(prog);
1166 	char *buf;
1167 	int buf_sz, log_level;
1168 	struct verif_stats *stats;
1169 	struct bpf_prog_info info;
1170 	__u32 info_len = sizeof(info);
1171 	int err = 0;
1172 	void *tmp;
1173 	int fd;
1174 
1175 	if (!should_process_file_prog(base_filename, bpf_program__name(prog))) {
1176 		env.progs_skipped++;
1177 		return 0;
1178 	}
1179 
1180 	tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
1181 	if (!tmp)
1182 		return -ENOMEM;
1183 	env.prog_stats = tmp;
1184 	stats = &env.prog_stats[env.prog_stat_cnt++];
1185 	memset(stats, 0, sizeof(*stats));
1186 
1187 	if (env.verbose || env.top_src_lines > 0) {
1188 		buf_sz = env.log_size ? env.log_size : max_verifier_log_size();
1189 		buf = malloc(buf_sz);
1190 		if (!buf)
1191 			return -ENOMEM;
1192 		/* ensure we always request stats */
1193 		log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0);
1194 		/* --top-src-lines needs verifier log */
1195 		if (env.top_src_lines > 0 && env.log_level == 0)
1196 			log_level |= 2;
1197 	} else {
1198 		buf = verif_log_buf;
1199 		buf_sz = sizeof(verif_log_buf);
1200 		/* request only verifier stats */
1201 		log_level = 4 | (env.log_fixed ? 8 : 0);
1202 	}
1203 	verif_log_buf[0] = '\0';
1204 
1205 	bpf_program__set_log_buf(prog, buf, buf_sz);
1206 	bpf_program__set_log_level(prog, log_level);
1207 
1208 	/* increase chances of successful BPF object loading */
1209 	fixup_obj(obj, prog, base_filename);
1210 
1211 	if (env.force_checkpoints)
1212 		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
1213 	if (env.force_reg_invariants)
1214 		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
1215 
1216 	err = bpf_object__load(obj);
1217 	env.progs_processed++;
1218 
1219 	stats->file_name = strdup(base_filename);
1220 	stats->prog_name = strdup(bpf_program__name(prog));
1221 	stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
1222 	stats->stats[SIZE] = bpf_program__insn_cnt(prog);
1223 	stats->stats[PROG_TYPE] = bpf_program__type(prog);
1224 	stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog);
1225 
1226 	memset(&info, 0, info_len);
1227 	fd = bpf_program__fd(prog);
1228 	if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0)
1229 		stats->stats[JITED_SIZE] = info.jited_prog_len;
1230 
1231 	parse_verif_log(buf, buf_sz, stats);
1232 
1233 	if (env.verbose) {
1234 		printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
1235 		       filename, prog_name, stats->stats[DURATION],
1236 		       err ? "failure" : "success", buf);
1237 	}
1238 	if (env.top_src_lines > 0)
1239 		print_top_src_lines(buf, buf_sz, stats->prog_name);
1240 
1241 	if (verif_log_buf != buf)
1242 		free(buf);
1243 
1244 	return 0;
1245 };
1246 
1247 static int process_obj(const char *filename)
1248 {
1249 	const char *base_filename = basename(strdupa(filename));
1250 	struct bpf_object *obj = NULL, *tobj;
1251 	struct bpf_program *prog, *tprog, *lprog;
1252 	libbpf_print_fn_t old_libbpf_print_fn;
1253 	LIBBPF_OPTS(bpf_object_open_opts, opts);
1254 	int err = 0, prog_cnt = 0;
1255 
1256 	if (!should_process_file_prog(base_filename, NULL)) {
1257 		if (env.verbose)
1258 			printf("Skipping '%s' due to filters...\n", filename);
1259 		env.files_skipped++;
1260 		return 0;
1261 	}
1262 	if (!is_bpf_obj_file(filename)) {
1263 		if (env.verbose)
1264 			printf("Skipping '%s' as it's not a BPF object file...\n", filename);
1265 		env.files_skipped++;
1266 		return 0;
1267 	}
1268 
1269 	if (!env.quiet && env.out_fmt == RESFMT_TABLE)
1270 		printf("Processing '%s'...\n", base_filename);
1271 
1272 	old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
1273 	obj = bpf_object__open_file(filename, &opts);
1274 	if (!obj) {
1275 		/* if libbpf can't open BPF object file, it could be because
1276 		 * that BPF object file is incomplete and has to be statically
1277 		 * linked into a final BPF object file; instead of bailing
1278 		 * out, report it into stderr, mark it as skipped, and
1279 		 * proceed
1280 		 */
1281 		fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
1282 		env.files_skipped++;
1283 		err = 0;
1284 		goto cleanup;
1285 	}
1286 
1287 	env.files_processed++;
1288 
1289 	bpf_object__for_each_program(prog, obj) {
1290 		prog_cnt++;
1291 	}
1292 
1293 	if (prog_cnt == 1) {
1294 		prog = bpf_object__next_program(obj, NULL);
1295 		bpf_program__set_autoload(prog, true);
1296 		process_prog(filename, obj, prog);
1297 		goto cleanup;
1298 	}
1299 
1300 	bpf_object__for_each_program(prog, obj) {
1301 		const char *prog_name = bpf_program__name(prog);
1302 
1303 		tobj = bpf_object__open_file(filename, &opts);
1304 		if (!tobj) {
1305 			err = -errno;
1306 			fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1307 			goto cleanup;
1308 		}
1309 
1310 		lprog = NULL;
1311 		bpf_object__for_each_program(tprog, tobj) {
1312 			const char *tprog_name = bpf_program__name(tprog);
1313 
1314 			if (strcmp(prog_name, tprog_name) == 0) {
1315 				bpf_program__set_autoload(tprog, true);
1316 				lprog = tprog;
1317 			} else {
1318 				bpf_program__set_autoload(tprog, false);
1319 			}
1320 		}
1321 
1322 		process_prog(filename, tobj, lprog);
1323 		bpf_object__close(tobj);
1324 	}
1325 
1326 cleanup:
1327 	bpf_object__close(obj);
1328 	libbpf_set_print(old_libbpf_print_fn);
1329 	return err;
1330 }
1331 
1332 static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
1333 		    enum stat_id id, bool asc, bool abs)
1334 {
1335 	int cmp = 0;
1336 
1337 	switch (id) {
1338 	case FILE_NAME:
1339 		cmp = strcmp(s1->file_name, s2->file_name);
1340 		break;
1341 	case PROG_NAME:
1342 		cmp = strcmp(s1->prog_name, s2->prog_name);
1343 		break;
1344 	case ATTACH_TYPE:
1345 	case PROG_TYPE:
1346 	case SIZE:
1347 	case JITED_SIZE:
1348 	case STACK:
1349 	case VERDICT:
1350 	case DURATION:
1351 	case TOTAL_INSNS:
1352 	case TOTAL_STATES:
1353 	case PEAK_STATES:
1354 	case MAX_STATES_PER_INSN:
1355 	case MARK_READ_MAX_LEN: {
1356 		long v1 = s1->stats[id];
1357 		long v2 = s2->stats[id];
1358 
1359 		if (abs) {
1360 			v1 = v1 < 0 ? -v1 : v1;
1361 			v2 = v2 < 0 ? -v2 : v2;
1362 		}
1363 
1364 		if (v1 != v2)
1365 			cmp = v1 < v2 ? -1 : 1;
1366 		break;
1367 	}
1368 	default:
1369 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1370 		exit(1);
1371 	}
1372 
1373 	return asc ? cmp : -cmp;
1374 }
1375 
1376 static int cmp_prog_stats(const void *v1, const void *v2)
1377 {
1378 	const struct verif_stats *s1 = v1, *s2 = v2;
1379 	int i, cmp;
1380 
1381 	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1382 		cmp = cmp_stat(s1, s2, env.sort_spec.ids[i],
1383 			       env.sort_spec.asc[i], env.sort_spec.abs[i]);
1384 		if (cmp != 0)
1385 			return cmp;
1386 	}
1387 
1388 	/* always disambiguate with file+prog, which are unique */
1389 	cmp = strcmp(s1->file_name, s2->file_name);
1390 	if (cmp != 0)
1391 		return cmp;
1392 	return strcmp(s1->prog_name, s2->prog_name);
1393 }
1394 
1395 static void fetch_join_stat_value(const struct verif_stats_join *s,
1396 				  enum stat_id id, enum stat_variant var,
1397 				  const char **str_val,
1398 				  double *num_val)
1399 {
1400 	long v1, v2;
1401 
1402 	if (id == FILE_NAME) {
1403 		*str_val = s->file_name;
1404 		return;
1405 	}
1406 	if (id == PROG_NAME) {
1407 		*str_val = s->prog_name;
1408 		return;
1409 	}
1410 
1411 	v1 = s->stats_a ? s->stats_a->stats[id] : 0;
1412 	v2 = s->stats_b ? s->stats_b->stats[id] : 0;
1413 
1414 	switch (var) {
1415 	case VARIANT_A:
1416 		if (!s->stats_a)
1417 			*num_val = -DBL_MAX;
1418 		else
1419 			*num_val = s->stats_a->stats[id];
1420 		return;
1421 	case VARIANT_B:
1422 		if (!s->stats_b)
1423 			*num_val = -DBL_MAX;
1424 		else
1425 			*num_val = s->stats_b->stats[id];
1426 		return;
1427 	case VARIANT_DIFF:
1428 		if (!s->stats_a || !s->stats_b)
1429 			*num_val = -DBL_MAX;
1430 		else if (id == VERDICT)
1431 			*num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */;
1432 		else
1433 			*num_val = (double)(v2 - v1);
1434 		return;
1435 	case VARIANT_PCT:
1436 		if (!s->stats_a || !s->stats_b) {
1437 			*num_val = -DBL_MAX;
1438 		} else if (v1 == 0) {
1439 			if (v1 == v2)
1440 				*num_val = 0.0;
1441 			else
1442 				*num_val = v2 < v1 ? -100.0 : 100.0;
1443 		} else {
1444 			 *num_val = (v2 - v1) * 100.0 / v1;
1445 		}
1446 		return;
1447 	}
1448 }
1449 
1450 static int cmp_join_stat(const struct verif_stats_join *s1,
1451 			 const struct verif_stats_join *s2,
1452 			 enum stat_id id, enum stat_variant var,
1453 			 bool asc, bool abs)
1454 {
1455 	const char *str1 = NULL, *str2 = NULL;
1456 	double v1 = 0.0, v2 = 0.0;
1457 	int cmp = 0;
1458 
1459 	fetch_join_stat_value(s1, id, var, &str1, &v1);
1460 	fetch_join_stat_value(s2, id, var, &str2, &v2);
1461 
1462 	if (abs) {
1463 		v1 = fabs(v1);
1464 		v2 = fabs(v2);
1465 	}
1466 
1467 	if (str1)
1468 		cmp = strcmp(str1, str2);
1469 	else if (v1 != v2)
1470 		cmp = v1 < v2 ? -1 : 1;
1471 
1472 	return asc ? cmp : -cmp;
1473 }
1474 
1475 static int cmp_join_stats(const void *v1, const void *v2)
1476 {
1477 	const struct verif_stats_join *s1 = v1, *s2 = v2;
1478 	int i, cmp;
1479 
1480 	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1481 		cmp = cmp_join_stat(s1, s2,
1482 				    env.sort_spec.ids[i],
1483 				    env.sort_spec.variants[i],
1484 				    env.sort_spec.asc[i],
1485 				    env.sort_spec.abs[i]);
1486 		if (cmp != 0)
1487 			return cmp;
1488 	}
1489 
1490 	/* always disambiguate with file+prog, which are unique */
1491 	cmp = strcmp(s1->file_name, s2->file_name);
1492 	if (cmp != 0)
1493 		return cmp;
1494 	return strcmp(s1->prog_name, s2->prog_name);
1495 }
1496 
1497 #define HEADER_CHAR '-'
1498 #define COLUMN_SEP "  "
1499 
1500 static void output_header_underlines(void)
1501 {
1502 	int i, j, len;
1503 
1504 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1505 		len = env.output_spec.lens[i];
1506 
1507 		printf("%s", i == 0 ? "" : COLUMN_SEP);
1508 		for (j = 0; j < len; j++)
1509 			printf("%c", HEADER_CHAR);
1510 	}
1511 	printf("\n");
1512 }
1513 
1514 static void output_headers(enum resfmt fmt)
1515 {
1516 	const char *fmt_str;
1517 	int i, len;
1518 
1519 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1520 		int id = env.output_spec.ids[i];
1521 		int *max_len = &env.output_spec.lens[i];
1522 
1523 		switch (fmt) {
1524 		case RESFMT_TABLE_CALCLEN:
1525 			len = snprintf(NULL, 0, "%s", stat_defs[id].header);
1526 			if (len > *max_len)
1527 				*max_len = len;
1528 			break;
1529 		case RESFMT_TABLE:
1530 			fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s";
1531 			printf(fmt_str, i == 0 ? "" : COLUMN_SEP,  *max_len, stat_defs[id].header);
1532 			if (i == env.output_spec.spec_cnt - 1)
1533 				printf("\n");
1534 			break;
1535 		case RESFMT_CSV:
1536 			printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]);
1537 			if (i == env.output_spec.spec_cnt - 1)
1538 				printf("\n");
1539 			break;
1540 		}
1541 	}
1542 
1543 	if (fmt == RESFMT_TABLE)
1544 		output_header_underlines();
1545 }
1546 
1547 static void prepare_value(const struct verif_stats *s, enum stat_id id,
1548 			  const char **str, long *val)
1549 {
1550 	switch (id) {
1551 	case FILE_NAME:
1552 		*str = s ? s->file_name : "N/A";
1553 		break;
1554 	case PROG_NAME:
1555 		*str = s ? s->prog_name : "N/A";
1556 		break;
1557 	case VERDICT:
1558 		if (!s)
1559 			*str = "N/A";
1560 		else
1561 			*str = s->stats[VERDICT] ? "success" : "failure";
1562 		break;
1563 	case ATTACH_TYPE:
1564 		if (!s)
1565 			*str = "N/A";
1566 		else
1567 			*str = libbpf_bpf_attach_type_str(s->stats[ATTACH_TYPE]) ?: "N/A";
1568 		break;
1569 	case PROG_TYPE:
1570 		if (!s)
1571 			*str = "N/A";
1572 		else
1573 			*str = libbpf_bpf_prog_type_str(s->stats[PROG_TYPE]) ?: "N/A";
1574 		break;
1575 	case DURATION:
1576 	case TOTAL_INSNS:
1577 	case TOTAL_STATES:
1578 	case PEAK_STATES:
1579 	case MAX_STATES_PER_INSN:
1580 	case MARK_READ_MAX_LEN:
1581 	case STACK:
1582 	case SIZE:
1583 	case JITED_SIZE:
1584 		*val = s ? s->stats[id] : 0;
1585 		break;
1586 	default:
1587 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1588 		exit(1);
1589 	}
1590 }
1591 
1592 static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
1593 {
1594 	int i;
1595 
1596 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1597 		int id = env.output_spec.ids[i];
1598 		int *max_len = &env.output_spec.lens[i], len;
1599 		const char *str = NULL;
1600 		long val = 0;
1601 
1602 		prepare_value(s, id, &str, &val);
1603 
1604 		switch (fmt) {
1605 		case RESFMT_TABLE_CALCLEN:
1606 			if (str)
1607 				len = snprintf(NULL, 0, "%s", str);
1608 			else
1609 				len = snprintf(NULL, 0, "%ld", val);
1610 			if (len > *max_len)
1611 				*max_len = len;
1612 			break;
1613 		case RESFMT_TABLE:
1614 			if (str)
1615 				printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
1616 			else
1617 				printf("%s%*ld", i == 0 ? "" : COLUMN_SEP,  *max_len, val);
1618 			if (i == env.output_spec.spec_cnt - 1)
1619 				printf("\n");
1620 			break;
1621 		case RESFMT_CSV:
1622 			if (str)
1623 				printf("%s%s", i == 0 ? "" : ",", str);
1624 			else
1625 				printf("%s%ld", i == 0 ? "" : ",", val);
1626 			if (i == env.output_spec.spec_cnt - 1)
1627 				printf("\n");
1628 			break;
1629 		}
1630 	}
1631 
1632 	if (last && fmt == RESFMT_TABLE) {
1633 		output_header_underlines();
1634 		printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
1635 		       env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
1636 	}
1637 }
1638 
1639 static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
1640 {
1641 	switch (id) {
1642 	case FILE_NAME:
1643 		st->file_name = strdup(str);
1644 		if (!st->file_name)
1645 			return -ENOMEM;
1646 		break;
1647 	case PROG_NAME:
1648 		st->prog_name = strdup(str);
1649 		if (!st->prog_name)
1650 			return -ENOMEM;
1651 		break;
1652 	case VERDICT:
1653 		if (strcmp(str, "success") == 0) {
1654 			st->stats[VERDICT] = true;
1655 		} else if (strcmp(str, "failure") == 0) {
1656 			st->stats[VERDICT] = false;
1657 		} else {
1658 			fprintf(stderr, "Unrecognized verification verdict '%s'\n", str);
1659 			return -EINVAL;
1660 		}
1661 		break;
1662 	case DURATION:
1663 	case TOTAL_INSNS:
1664 	case TOTAL_STATES:
1665 	case PEAK_STATES:
1666 	case MAX_STATES_PER_INSN:
1667 	case MARK_READ_MAX_LEN: {
1668 		long val;
1669 		int err, n;
1670 
1671 		if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) {
1672 			err = -errno;
1673 			fprintf(stderr, "Failed to parse '%s' as integer\n", str);
1674 			return err;
1675 		}
1676 
1677 		st->stats[id] = val;
1678 		break;
1679 	}
1680 	default:
1681 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1682 		return -EINVAL;
1683 	}
1684 	return 0;
1685 }
1686 
1687 static int parse_stats_csv(const char *filename, struct stat_specs *specs,
1688 			   struct verif_stats **statsp, int *stat_cntp)
1689 {
1690 	char line[4096];
1691 	FILE *f;
1692 	int err = 0;
1693 	bool header = true;
1694 
1695 	f = fopen(filename, "r");
1696 	if (!f) {
1697 		err = -errno;
1698 		fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1699 		return err;
1700 	}
1701 
1702 	*stat_cntp = 0;
1703 
1704 	while (fgets(line, sizeof(line), f)) {
1705 		char *input = line, *state = NULL, *next;
1706 		struct verif_stats *st = NULL;
1707 		int col = 0, cnt = 0;
1708 
1709 		if (!header) {
1710 			void *tmp;
1711 
1712 			tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp));
1713 			if (!tmp) {
1714 				err = -ENOMEM;
1715 				goto cleanup;
1716 			}
1717 			*statsp = tmp;
1718 
1719 			st = &(*statsp)[*stat_cntp];
1720 			memset(st, 0, sizeof(*st));
1721 
1722 			*stat_cntp += 1;
1723 		}
1724 
1725 		while ((next = strtok_r(cnt++ ? NULL : input, ",\n", &state))) {
1726 			if (header) {
1727 				/* for the first line, set up spec stats */
1728 				err = parse_stat(next, specs);
1729 				if (err)
1730 					goto cleanup;
1731 				continue;
1732 			}
1733 
1734 			/* for all other lines, parse values based on spec */
1735 			if (col >= specs->spec_cnt) {
1736 				fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n",
1737 					col, *stat_cntp, filename);
1738 				err = -EINVAL;
1739 				goto cleanup;
1740 			}
1741 			err = parse_stat_value(next, specs->ids[col], st);
1742 			if (err)
1743 				goto cleanup;
1744 			col++;
1745 		}
1746 
1747 		if (header) {
1748 			header = false;
1749 			continue;
1750 		}
1751 
1752 		if (col < specs->spec_cnt) {
1753 			fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
1754 				*stat_cntp, filename);
1755 			err = -EINVAL;
1756 			goto cleanup;
1757 		}
1758 
1759 		if (!st->file_name || !st->prog_name) {
1760 			fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n",
1761 				*stat_cntp, filename);
1762 			err = -EINVAL;
1763 			goto cleanup;
1764 		}
1765 
1766 		/* in comparison mode we can only check filters after we
1767 		 * parsed entire line; if row should be ignored we pretend we
1768 		 * never parsed it
1769 		 */
1770 		if (!should_process_file_prog(st->file_name, st->prog_name)) {
1771 			free(st->file_name);
1772 			free(st->prog_name);
1773 			*stat_cntp -= 1;
1774 		}
1775 	}
1776 
1777 	if (!feof(f)) {
1778 		err = -errno;
1779 		fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err);
1780 	}
1781 
1782 cleanup:
1783 	fclose(f);
1784 	return err;
1785 }
1786 
1787 /* empty/zero stats for mismatched rows */
1788 static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" };
1789 
1790 static bool is_key_stat(enum stat_id id)
1791 {
1792 	return id == FILE_NAME || id == PROG_NAME;
1793 }
1794 
1795 static void output_comp_header_underlines(void)
1796 {
1797 	int i, j, k;
1798 
1799 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1800 		int id = env.output_spec.ids[i];
1801 		int max_j = is_key_stat(id) ? 1 : 3;
1802 
1803 		for (j = 0; j < max_j; j++) {
1804 			int len = env.output_spec.lens[3 * i + j];
1805 
1806 			printf("%s", i + j == 0 ? "" : COLUMN_SEP);
1807 
1808 			for (k = 0; k < len; k++)
1809 				printf("%c", HEADER_CHAR);
1810 		}
1811 	}
1812 	printf("\n");
1813 }
1814 
1815 static void output_comp_headers(enum resfmt fmt)
1816 {
1817 	static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"};
1818 	static const char *name_sfxs[3] = {"_base", "_comp", "_diff"};
1819 	int i, j, len;
1820 
1821 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1822 		int id = env.output_spec.ids[i];
1823 		/* key stats don't have A/B/DIFF columns, they are common for both data sets */
1824 		int max_j = is_key_stat(id) ? 1 : 3;
1825 
1826 		for (j = 0; j < max_j; j++) {
1827 			int *max_len = &env.output_spec.lens[3 * i + j];
1828 			bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1);
1829 			const char *sfx;
1830 
1831 			switch (fmt) {
1832 			case RESFMT_TABLE_CALCLEN:
1833 				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1834 				len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx);
1835 				if (len > *max_len)
1836 					*max_len = len;
1837 				break;
1838 			case RESFMT_TABLE:
1839 				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1840 				printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP,
1841 				       *max_len - (int)strlen(sfx), stat_defs[id].header, sfx);
1842 				if (last)
1843 					printf("\n");
1844 				break;
1845 			case RESFMT_CSV:
1846 				sfx = is_key_stat(id) ? "" : name_sfxs[j];
1847 				printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx);
1848 				if (last)
1849 					printf("\n");
1850 				break;
1851 			}
1852 		}
1853 	}
1854 
1855 	if (fmt == RESFMT_TABLE)
1856 		output_comp_header_underlines();
1857 }
1858 
1859 static void output_comp_stats(const struct verif_stats_join *join_stats,
1860 			      enum resfmt fmt, bool last)
1861 {
1862 	const struct verif_stats *base = join_stats->stats_a;
1863 	const struct verif_stats *comp = join_stats->stats_b;
1864 	char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
1865 	int i;
1866 
1867 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1868 		int id = env.output_spec.ids[i], len;
1869 		int *max_len_base = &env.output_spec.lens[3 * i + 0];
1870 		int *max_len_comp = &env.output_spec.lens[3 * i + 1];
1871 		int *max_len_diff = &env.output_spec.lens[3 * i + 2];
1872 		const char *base_str = NULL, *comp_str = NULL;
1873 		long base_val = 0, comp_val = 0, diff_val = 0;
1874 
1875 		prepare_value(base, id, &base_str, &base_val);
1876 		prepare_value(comp, id, &comp_str, &comp_val);
1877 
1878 		/* normalize all the outputs to be in string buffers for simplicity */
1879 		if (is_key_stat(id)) {
1880 			/* key stats (file and program name) are always strings */
1881 			if (base)
1882 				snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1883 			else
1884 				snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
1885 		} else if (base_str) {
1886 			snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1887 			snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
1888 			if (!base || !comp)
1889 				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1890 			else if (strcmp(base_str, comp_str) == 0)
1891 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
1892 			else
1893 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
1894 		} else {
1895 			double p = 0.0;
1896 
1897 			if (base)
1898 				snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
1899 			else
1900 				snprintf(base_buf, sizeof(base_buf), "%s", "N/A");
1901 			if (comp)
1902 				snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
1903 			else
1904 				snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A");
1905 
1906 			diff_val = comp_val - base_val;
1907 			if (!base || !comp) {
1908 				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1909 			} else {
1910 				if (base_val == 0) {
1911 					if (comp_val == base_val)
1912 						p = 0.0; /* avoid +0 (+100%) case */
1913 					else
1914 						p = comp_val < base_val ? -100.0 : 100.0;
1915 				} else {
1916 					 p = diff_val * 100.0 / base_val;
1917 				}
1918 				snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
1919 			}
1920 		}
1921 
1922 		switch (fmt) {
1923 		case RESFMT_TABLE_CALCLEN:
1924 			len = strlen(base_buf);
1925 			if (len > *max_len_base)
1926 				*max_len_base = len;
1927 			if (!is_key_stat(id)) {
1928 				len = strlen(comp_buf);
1929 				if (len > *max_len_comp)
1930 					*max_len_comp = len;
1931 				len = strlen(diff_buf);
1932 				if (len > *max_len_diff)
1933 					*max_len_diff = len;
1934 			}
1935 			break;
1936 		case RESFMT_TABLE: {
1937 			/* string outputs are left-aligned, number outputs are right-aligned */
1938 			const char *fmt = base_str ? "%s%-*s" : "%s%*s";
1939 
1940 			printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf);
1941 			if (!is_key_stat(id)) {
1942 				printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf);
1943 				printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf);
1944 			}
1945 			if (i == env.output_spec.spec_cnt - 1)
1946 				printf("\n");
1947 			break;
1948 		}
1949 		case RESFMT_CSV:
1950 			printf("%s%s", i == 0 ? "" : ",", base_buf);
1951 			if (!is_key_stat(id)) {
1952 				printf("%s%s", i == 0 ? "" : ",", comp_buf);
1953 				printf("%s%s", i == 0 ? "" : ",", diff_buf);
1954 			}
1955 			if (i == env.output_spec.spec_cnt - 1)
1956 				printf("\n");
1957 			break;
1958 		}
1959 	}
1960 
1961 	if (last && fmt == RESFMT_TABLE)
1962 		output_comp_header_underlines();
1963 }
1964 
1965 static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp)
1966 {
1967 	int r;
1968 
1969 	r = strcmp(base->file_name, comp->file_name);
1970 	if (r != 0)
1971 		return r;
1972 	return strcmp(base->prog_name, comp->prog_name);
1973 }
1974 
1975 static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats)
1976 {
1977 	static const double eps = 1e-9;
1978 	const char *str = NULL;
1979 	double value = 0.0;
1980 
1981 	fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
1982 
1983 	if (f->abs)
1984 		value = fabs(value);
1985 
1986 	switch (f->op) {
1987 	case OP_EQ: return value > f->value - eps && value < f->value + eps;
1988 	case OP_NEQ: return value < f->value - eps || value > f->value + eps;
1989 	case OP_LT: return value < f->value - eps;
1990 	case OP_LE: return value <= f->value + eps;
1991 	case OP_GT: return value > f->value + eps;
1992 	case OP_GE: return value >= f->value - eps;
1993 	}
1994 
1995 	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
1996 	return false;
1997 }
1998 
1999 static bool should_output_join_stats(const struct verif_stats_join *stats)
2000 {
2001 	struct filter *f;
2002 	int i, allow_cnt = 0;
2003 
2004 	for (i = 0; i < env.deny_filter_cnt; i++) {
2005 		f = &env.deny_filters[i];
2006 		if (f->kind != FILTER_STAT)
2007 			continue;
2008 
2009 		if (is_join_stat_filter_matched(f, stats))
2010 			return false;
2011 	}
2012 
2013 	for (i = 0; i < env.allow_filter_cnt; i++) {
2014 		f = &env.allow_filters[i];
2015 		if (f->kind != FILTER_STAT)
2016 			continue;
2017 		allow_cnt++;
2018 
2019 		if (is_join_stat_filter_matched(f, stats))
2020 			return true;
2021 	}
2022 
2023 	/* if there are no stat allowed filters, pass everything through */
2024 	return allow_cnt == 0;
2025 }
2026 
2027 static int handle_comparison_mode(void)
2028 {
2029 	struct stat_specs base_specs = {}, comp_specs = {};
2030 	struct stat_specs tmp_sort_spec;
2031 	enum resfmt cur_fmt;
2032 	int err, i, j, last_idx, cnt;
2033 
2034 	if (env.filename_cnt != 2) {
2035 		fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
2036 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2037 		return -EINVAL;
2038 	}
2039 
2040 	err = parse_stats_csv(env.filenames[0], &base_specs,
2041 			      &env.baseline_stats, &env.baseline_stat_cnt);
2042 	if (err) {
2043 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
2044 		return err;
2045 	}
2046 	err = parse_stats_csv(env.filenames[1], &comp_specs,
2047 			      &env.prog_stats, &env.prog_stat_cnt);
2048 	if (err) {
2049 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err);
2050 		return err;
2051 	}
2052 
2053 	/* To keep it simple we validate that the set and order of stats in
2054 	 * both CSVs are exactly the same. This can be lifted with a bit more
2055 	 * pre-processing later.
2056 	 */
2057 	if (base_specs.spec_cnt != comp_specs.spec_cnt) {
2058 		fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n",
2059 			env.filenames[0], env.filenames[1],
2060 			base_specs.spec_cnt, comp_specs.spec_cnt);
2061 		return -EINVAL;
2062 	}
2063 	for (i = 0; i < base_specs.spec_cnt; i++) {
2064 		if (base_specs.ids[i] != comp_specs.ids[i]) {
2065 			fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n",
2066 				env.filenames[0], env.filenames[1],
2067 				stat_defs[base_specs.ids[i]].names[0],
2068 				stat_defs[comp_specs.ids[i]].names[0]);
2069 			return -EINVAL;
2070 		}
2071 	}
2072 
2073 	/* Replace user-specified sorting spec with file+prog sorting rule to
2074 	 * be able to join two datasets correctly. Once we are done, we will
2075 	 * restore the original sort spec.
2076 	 */
2077 	tmp_sort_spec = env.sort_spec;
2078 	env.sort_spec = join_sort_spec;
2079 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2080 	qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
2081 	env.sort_spec = tmp_sort_spec;
2082 
2083 	/* Join two datasets together. If baseline and comparison datasets
2084 	 * have different subset of rows (we match by 'object + prog' as
2085 	 * a unique key) then assume empty/missing/zero value for rows that
2086 	 * are missing in the opposite data set.
2087 	 */
2088 	i = j = 0;
2089 	while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
2090 		const struct verif_stats *base, *comp;
2091 		struct verif_stats_join *join;
2092 		void *tmp;
2093 		int r;
2094 
2095 		base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
2096 		comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats;
2097 
2098 		if (!base->file_name || !base->prog_name) {
2099 			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
2100 				i, env.filenames[0]);
2101 			return -EINVAL;
2102 		}
2103 		if (!comp->file_name || !comp->prog_name) {
2104 			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
2105 				j, env.filenames[1]);
2106 			return -EINVAL;
2107 		}
2108 
2109 		tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats));
2110 		if (!tmp)
2111 			return -ENOMEM;
2112 		env.join_stats = tmp;
2113 
2114 		join = &env.join_stats[env.join_stat_cnt];
2115 		memset(join, 0, sizeof(*join));
2116 
2117 		r = cmp_stats_key(base, comp);
2118 		if (r == 0) {
2119 			join->file_name = base->file_name;
2120 			join->prog_name = base->prog_name;
2121 			join->stats_a = base;
2122 			join->stats_b = comp;
2123 			i++;
2124 			j++;
2125 		} else if (base != &fallback_stats && (comp == &fallback_stats || r < 0)) {
2126 			join->file_name = base->file_name;
2127 			join->prog_name = base->prog_name;
2128 			join->stats_a = base;
2129 			join->stats_b = NULL;
2130 			i++;
2131 		} else if (comp != &fallback_stats && (base == &fallback_stats || r > 0)) {
2132 			join->file_name = comp->file_name;
2133 			join->prog_name = comp->prog_name;
2134 			join->stats_a = NULL;
2135 			join->stats_b = comp;
2136 			j++;
2137 		} else {
2138 			fprintf(stderr, "%s:%d: should never reach here i=%i, j=%i",
2139 				__FILE__, __LINE__, i, j);
2140 			return -EINVAL;
2141 		}
2142 		env.join_stat_cnt += 1;
2143 	}
2144 
2145 	/* now sort joined results according to sort spec */
2146 	qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
2147 
2148 	/* for human-readable table output we need to do extra pass to
2149 	 * calculate column widths, so we substitute current output format
2150 	 * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
2151 	 * and do everything again.
2152 	 */
2153 	if (env.out_fmt == RESFMT_TABLE)
2154 		cur_fmt = RESFMT_TABLE_CALCLEN;
2155 	else
2156 		cur_fmt = env.out_fmt;
2157 
2158 one_more_time:
2159 	output_comp_headers(cur_fmt);
2160 
2161 	last_idx = -1;
2162 	cnt = 0;
2163 	for (i = 0; i < env.join_stat_cnt; i++) {
2164 		const struct verif_stats_join *join = &env.join_stats[i];
2165 
2166 		if (!should_output_join_stats(join))
2167 			continue;
2168 
2169 		if (env.top_n && cnt >= env.top_n)
2170 			break;
2171 
2172 		if (cur_fmt == RESFMT_TABLE_CALCLEN)
2173 			last_idx = i;
2174 
2175 		output_comp_stats(join, cur_fmt, i == last_idx);
2176 
2177 		cnt++;
2178 	}
2179 
2180 	if (cur_fmt == RESFMT_TABLE_CALCLEN) {
2181 		cur_fmt = RESFMT_TABLE;
2182 		goto one_more_time; /* ... this time with feeling */
2183 	}
2184 
2185 	return 0;
2186 }
2187 
2188 static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats)
2189 {
2190 	long value = stats->stats[f->stat_id];
2191 
2192 	if (f->abs)
2193 		value = value < 0 ? -value : value;
2194 
2195 	switch (f->op) {
2196 	case OP_EQ: return value == f->value;
2197 	case OP_NEQ: return value != f->value;
2198 	case OP_LT: return value < f->value;
2199 	case OP_LE: return value <= f->value;
2200 	case OP_GT: return value > f->value;
2201 	case OP_GE: return value >= f->value;
2202 	}
2203 
2204 	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
2205 	return false;
2206 }
2207 
2208 static bool should_output_stats(const struct verif_stats *stats)
2209 {
2210 	struct filter *f;
2211 	int i, allow_cnt = 0;
2212 
2213 	for (i = 0; i < env.deny_filter_cnt; i++) {
2214 		f = &env.deny_filters[i];
2215 		if (f->kind != FILTER_STAT)
2216 			continue;
2217 
2218 		if (is_stat_filter_matched(f, stats))
2219 			return false;
2220 	}
2221 
2222 	for (i = 0; i < env.allow_filter_cnt; i++) {
2223 		f = &env.allow_filters[i];
2224 		if (f->kind != FILTER_STAT)
2225 			continue;
2226 		allow_cnt++;
2227 
2228 		if (is_stat_filter_matched(f, stats))
2229 			return true;
2230 	}
2231 
2232 	/* if there are no stat allowed filters, pass everything through */
2233 	return allow_cnt == 0;
2234 }
2235 
2236 static void output_prog_stats(void)
2237 {
2238 	const struct verif_stats *stats;
2239 	int i, last_stat_idx = 0, cnt = 0;
2240 
2241 	if (env.out_fmt == RESFMT_TABLE) {
2242 		/* calculate column widths */
2243 		output_headers(RESFMT_TABLE_CALCLEN);
2244 		for (i = 0; i < env.prog_stat_cnt; i++) {
2245 			stats = &env.prog_stats[i];
2246 			if (!should_output_stats(stats))
2247 				continue;
2248 			output_stats(stats, RESFMT_TABLE_CALCLEN, false);
2249 			last_stat_idx = i;
2250 		}
2251 	}
2252 
2253 	/* actually output the table */
2254 	output_headers(env.out_fmt);
2255 	for (i = 0; i < env.prog_stat_cnt; i++) {
2256 		stats = &env.prog_stats[i];
2257 		if (!should_output_stats(stats))
2258 			continue;
2259 		if (env.top_n && cnt >= env.top_n)
2260 			break;
2261 		output_stats(stats, env.out_fmt, i == last_stat_idx);
2262 		cnt++;
2263 	}
2264 }
2265 
2266 static int handle_verif_mode(void)
2267 {
2268 	int i, err;
2269 
2270 	if (env.filename_cnt == 0) {
2271 		fprintf(stderr, "Please provide path to BPF object file!\n\n");
2272 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2273 		return -EINVAL;
2274 	}
2275 
2276 	for (i = 0; i < env.filename_cnt; i++) {
2277 		err = process_obj(env.filenames[i]);
2278 		if (err) {
2279 			fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
2280 			return err;
2281 		}
2282 	}
2283 
2284 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2285 
2286 	output_prog_stats();
2287 
2288 	return 0;
2289 }
2290 
2291 static int handle_replay_mode(void)
2292 {
2293 	struct stat_specs specs = {};
2294 	int err;
2295 
2296 	if (env.filename_cnt != 1) {
2297 		fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n");
2298 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2299 		return -EINVAL;
2300 	}
2301 
2302 	err = parse_stats_csv(env.filenames[0], &specs,
2303 			      &env.prog_stats, &env.prog_stat_cnt);
2304 	if (err) {
2305 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
2306 		return err;
2307 	}
2308 
2309 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2310 
2311 	output_prog_stats();
2312 
2313 	return 0;
2314 }
2315 
2316 int main(int argc, char **argv)
2317 {
2318 	int err = 0, i;
2319 
2320 	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
2321 		return 1;
2322 
2323 	if (env.show_version) {
2324 		printf("%s\n", argp_program_version);
2325 		return 0;
2326 	}
2327 
2328 	if (env.verbose && env.quiet) {
2329 		fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
2330 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2331 		return 1;
2332 	}
2333 	if (env.verbose && env.log_level == 0)
2334 		env.log_level = 1;
2335 
2336 	if (env.output_spec.spec_cnt == 0) {
2337 		if (env.out_fmt == RESFMT_CSV)
2338 			env.output_spec = default_csv_output_spec;
2339 		else
2340 			env.output_spec = default_output_spec;
2341 	}
2342 	if (env.sort_spec.spec_cnt == 0)
2343 		env.sort_spec = default_sort_spec;
2344 
2345 	if (env.comparison_mode && env.replay_mode) {
2346 		fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n");
2347 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2348 		return 1;
2349 	}
2350 
2351 	if (env.comparison_mode)
2352 		err = handle_comparison_mode();
2353 	else if (env.replay_mode)
2354 		err = handle_replay_mode();
2355 	else
2356 		err = handle_verif_mode();
2357 
2358 	free_verif_stats(env.prog_stats, env.prog_stat_cnt);
2359 	free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
2360 	free(env.join_stats);
2361 	for (i = 0; i < env.filename_cnt; i++)
2362 		free(env.filenames[i]);
2363 	free(env.filenames);
2364 	for (i = 0; i < env.allow_filter_cnt; i++) {
2365 		free(env.allow_filters[i].any_glob);
2366 		free(env.allow_filters[i].file_glob);
2367 		free(env.allow_filters[i].prog_glob);
2368 	}
2369 	free(env.allow_filters);
2370 	for (i = 0; i < env.deny_filter_cnt; i++) {
2371 		free(env.deny_filters[i].any_glob);
2372 		free(env.deny_filters[i].file_glob);
2373 		free(env.deny_filters[i].prog_glob);
2374 	}
2375 	free(env.deny_filters);
2376 	return -err;
2377 }
2378