xref: /linux/tools/testing/selftests/bpf/veristat.c (revision b6a1af0362b3232c7b474b9b46e49b862602018c)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
3 #define _GNU_SOURCE
4 #include <argp.h>
5 #include <string.h>
6 #include <stdlib.h>
7 #include <linux/compiler.h>
8 #include <sched.h>
9 #include <pthread.h>
10 #include <dirent.h>
11 #include <signal.h>
12 #include <fcntl.h>
13 #include <unistd.h>
14 #include <sys/time.h>
15 #include <sys/sysinfo.h>
16 #include <sys/stat.h>
17 #include <bpf/libbpf.h>
18 #include <libelf.h>
19 #include <gelf.h>
20 #include <float.h>
21 
22 enum stat_id {
23 	VERDICT,
24 	DURATION,
25 	TOTAL_INSNS,
26 	TOTAL_STATES,
27 	PEAK_STATES,
28 	MAX_STATES_PER_INSN,
29 	MARK_READ_MAX_LEN,
30 
31 	FILE_NAME,
32 	PROG_NAME,
33 
34 	ALL_STATS_CNT,
35 	NUM_STATS_CNT = FILE_NAME - VERDICT,
36 };
37 
38 /* In comparison mode each stat can specify up to four different values:
39  *   - A side value;
40  *   - B side value;
41  *   - absolute diff value;
42  *   - relative (percentage) diff value.
43  *
44  * When specifying stat specs in comparison mode, user can use one of the
45  * following variant suffixes to specify which exact variant should be used for
46  * ordering or filtering:
47  *   - `_a` for A side value;
48  *   - `_b` for B side value;
49  *   - `_diff` for absolute diff value;
50  *   - `_pct` for relative (percentage) diff value.
51  *
52  * If no variant suffix is provided, then `_b` (control data) is assumed.
53  *
54  * As an example, let's say instructions stat has the following output:
55  *
56  * Insns (A)  Insns (B)  Insns   (DIFF)
57  * ---------  ---------  --------------
58  * 21547      20920       -627 (-2.91%)
59  *
60  * Then:
61  *   - 21547 is A side value (insns_a);
62  *   - 20920 is B side value (insns_b);
63  *   - -627 is absolute diff value (insns_diff);
64  *   - -2.91% is relative diff value (insns_pct).
65  *
66  * For verdict there is no verdict_pct variant.
67  * For file and program name, _a and _b variants are equivalent and there are
68  * no _diff or _pct variants.
69  */
70 enum stat_variant {
71 	VARIANT_A,
72 	VARIANT_B,
73 	VARIANT_DIFF,
74 	VARIANT_PCT,
75 };
76 
77 struct verif_stats {
78 	char *file_name;
79 	char *prog_name;
80 
81 	long stats[NUM_STATS_CNT];
82 };
83 
84 /* joined comparison mode stats */
85 struct verif_stats_join {
86 	char *file_name;
87 	char *prog_name;
88 
89 	const struct verif_stats *stats_a;
90 	const struct verif_stats *stats_b;
91 };
92 
93 struct stat_specs {
94 	int spec_cnt;
95 	enum stat_id ids[ALL_STATS_CNT];
96 	enum stat_variant variants[ALL_STATS_CNT];
97 	bool asc[ALL_STATS_CNT];
98 	int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
99 };
100 
101 enum resfmt {
102 	RESFMT_TABLE,
103 	RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */
104 	RESFMT_CSV,
105 };
106 
107 enum filter_kind {
108 	FILTER_NAME,
109 	FILTER_STAT,
110 };
111 
112 enum operator_kind {
113 	OP_EQ,		/* == or = */
114 	OP_NEQ,		/* != or <> */
115 	OP_LT,		/* < */
116 	OP_LE,		/* <= */
117 	OP_GT,		/* > */
118 	OP_GE,		/* >= */
119 };
120 
121 struct filter {
122 	enum filter_kind kind;
123 	/* FILTER_NAME */
124 	char *any_glob;
125 	char *file_glob;
126 	char *prog_glob;
127 	/* FILTER_STAT */
128 	enum operator_kind op;
129 	int stat_id;
130 	enum stat_variant stat_var;
131 	long value;
132 };
133 
134 static struct env {
135 	char **filenames;
136 	int filename_cnt;
137 	bool verbose;
138 	bool quiet;
139 	int log_level;
140 	enum resfmt out_fmt;
141 	bool comparison_mode;
142 	bool replay_mode;
143 
144 	struct verif_stats *prog_stats;
145 	int prog_stat_cnt;
146 
147 	/* baseline_stats is allocated and used only in comparsion mode */
148 	struct verif_stats *baseline_stats;
149 	int baseline_stat_cnt;
150 
151 	struct verif_stats_join *join_stats;
152 	int join_stat_cnt;
153 
154 	struct stat_specs output_spec;
155 	struct stat_specs sort_spec;
156 
157 	struct filter *allow_filters;
158 	struct filter *deny_filters;
159 	int allow_filter_cnt;
160 	int deny_filter_cnt;
161 
162 	int files_processed;
163 	int files_skipped;
164 	int progs_processed;
165 	int progs_skipped;
166 } env;
167 
168 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
169 {
170 	if (!env.verbose)
171 		return 0;
172 	if (level == LIBBPF_DEBUG /* && !env.verbose */)
173 		return 0;
174 	return vfprintf(stderr, format, args);
175 }
176 
177 const char *argp_program_version = "veristat";
178 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
179 const char argp_program_doc[] =
180 "veristat    BPF verifier stats collection and comparison tool.\n"
181 "\n"
182 "USAGE: veristat <obj-file> [<obj-file>...]\n"
183 "   OR: veristat -C <baseline.csv> <comparison.csv>\n";
184 
185 static const struct argp_option opts[] = {
186 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
187 	{ "verbose", 'v', NULL, 0, "Verbose mode" },
188 	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
189 	{ "quiet", 'q', NULL, 0, "Quiet mode" },
190 	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
191 	{ "sort", 's', "SPEC", 0, "Specify sort order" },
192 	{ "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
193 	{ "compare", 'C', NULL, 0, "Comparison mode" },
194 	{ "replay", 'R', NULL, 0, "Replay mode" },
195 	{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
196 	{},
197 };
198 
199 static int parse_stats(const char *stats_str, struct stat_specs *specs);
200 static int append_filter(struct filter **filters, int *cnt, const char *str);
201 static int append_filter_file(const char *path);
202 
203 static error_t parse_arg(int key, char *arg, struct argp_state *state)
204 {
205 	void *tmp;
206 	int err;
207 
208 	switch (key) {
209 	case 'h':
210 		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
211 		break;
212 	case 'v':
213 		env.verbose = true;
214 		break;
215 	case 'q':
216 		env.quiet = true;
217 		break;
218 	case 'e':
219 		err = parse_stats(arg, &env.output_spec);
220 		if (err)
221 			return err;
222 		break;
223 	case 's':
224 		err = parse_stats(arg, &env.sort_spec);
225 		if (err)
226 			return err;
227 		break;
228 	case 'o':
229 		if (strcmp(arg, "table") == 0) {
230 			env.out_fmt = RESFMT_TABLE;
231 		} else if (strcmp(arg, "csv") == 0) {
232 			env.out_fmt = RESFMT_CSV;
233 		} else {
234 			fprintf(stderr, "Unrecognized output format '%s'\n", arg);
235 			return -EINVAL;
236 		}
237 		break;
238 	case 'l':
239 		errno = 0;
240 		env.log_level = strtol(arg, NULL, 10);
241 		if (errno) {
242 			fprintf(stderr, "invalid log level: %s\n", arg);
243 			argp_usage(state);
244 		}
245 		break;
246 	case 'C':
247 		env.comparison_mode = true;
248 		break;
249 	case 'R':
250 		env.replay_mode = true;
251 		break;
252 	case 'f':
253 		if (arg[0] == '@')
254 			err = append_filter_file(arg + 1);
255 		else if (arg[0] == '!')
256 			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1);
257 		else
258 			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg);
259 		if (err) {
260 			fprintf(stderr, "Failed to collect program filter expressions: %d\n", err);
261 			return err;
262 		}
263 		break;
264 	case ARGP_KEY_ARG:
265 		tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
266 		if (!tmp)
267 			return -ENOMEM;
268 		env.filenames = tmp;
269 		env.filenames[env.filename_cnt] = strdup(arg);
270 		if (!env.filenames[env.filename_cnt])
271 			return -ENOMEM;
272 		env.filename_cnt++;
273 		break;
274 	default:
275 		return ARGP_ERR_UNKNOWN;
276 	}
277 	return 0;
278 }
279 
280 static const struct argp argp = {
281 	.options = opts,
282 	.parser = parse_arg,
283 	.doc = argp_program_doc,
284 };
285 
286 
287 /* Adapted from perf/util/string.c */
288 static bool glob_matches(const char *str, const char *pat)
289 {
290 	while (*str && *pat && *pat != '*') {
291 		if (*str != *pat)
292 			return false;
293 		str++;
294 		pat++;
295 	}
296 	/* Check wild card */
297 	if (*pat == '*') {
298 		while (*pat == '*')
299 			pat++;
300 		if (!*pat) /* Tail wild card matches all */
301 			return true;
302 		while (*str)
303 			if (glob_matches(str++, pat))
304 				return true;
305 	}
306 	return !*str && !*pat;
307 }
308 
309 static bool is_bpf_obj_file(const char *path) {
310 	Elf64_Ehdr *ehdr;
311 	int fd, err = -EINVAL;
312 	Elf *elf = NULL;
313 
314 	fd = open(path, O_RDONLY | O_CLOEXEC);
315 	if (fd < 0)
316 		return true; /* we'll fail later and propagate error */
317 
318 	/* ensure libelf is initialized */
319 	(void)elf_version(EV_CURRENT);
320 
321 	elf = elf_begin(fd, ELF_C_READ, NULL);
322 	if (!elf)
323 		goto cleanup;
324 
325 	if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64)
326 		goto cleanup;
327 
328 	ehdr = elf64_getehdr(elf);
329 	/* Old LLVM set e_machine to EM_NONE */
330 	if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF))
331 		goto cleanup;
332 
333 	err = 0;
334 cleanup:
335 	if (elf)
336 		elf_end(elf);
337 	close(fd);
338 	return err == 0;
339 }
340 
341 static bool should_process_file_prog(const char *filename, const char *prog_name)
342 {
343 	struct filter *f;
344 	int i, allow_cnt = 0;
345 
346 	for (i = 0; i < env.deny_filter_cnt; i++) {
347 		f = &env.deny_filters[i];
348 		if (f->kind != FILTER_NAME)
349 			continue;
350 
351 		if (f->any_glob && glob_matches(filename, f->any_glob))
352 			return false;
353 		if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob))
354 			return false;
355 		if (f->file_glob && glob_matches(filename, f->file_glob))
356 			return false;
357 		if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob))
358 			return false;
359 	}
360 
361 	for (i = 0; i < env.allow_filter_cnt; i++) {
362 		f = &env.allow_filters[i];
363 		if (f->kind != FILTER_NAME)
364 			continue;
365 
366 		allow_cnt++;
367 		if (f->any_glob) {
368 			if (glob_matches(filename, f->any_glob))
369 				return true;
370 			/* If we don't know program name yet, any_glob filter
371 			 * has to assume that current BPF object file might be
372 			 * relevant; we'll check again later on after opening
373 			 * BPF object file, at which point program name will
374 			 * be known finally.
375 			 */
376 			if (!prog_name || glob_matches(prog_name, f->any_glob))
377 				return true;
378 		} else {
379 			if (f->file_glob && !glob_matches(filename, f->file_glob))
380 				continue;
381 			if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob))
382 				continue;
383 			return true;
384 		}
385 	}
386 
387 	/* if there are no file/prog name allow filters, allow all progs,
388 	 * unless they are denied earlier explicitly
389 	 */
390 	return allow_cnt == 0;
391 }
392 
393 static struct {
394 	enum operator_kind op_kind;
395 	const char *op_str;
396 } operators[] = {
397 	/* Order of these definitions matter to avoid situations like '<'
398 	 * matching part of what is actually a '<>' operator. That is,
399 	 * substrings should go last.
400 	 */
401 	{ OP_EQ, "==" },
402 	{ OP_NEQ, "!=" },
403 	{ OP_NEQ, "<>" },
404 	{ OP_LE, "<=" },
405 	{ OP_LT, "<" },
406 	{ OP_GE, ">=" },
407 	{ OP_GT, ">" },
408 	{ OP_EQ, "=" },
409 };
410 
411 static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var);
412 
413 static int append_filter(struct filter **filters, int *cnt, const char *str)
414 {
415 	struct filter *f;
416 	void *tmp;
417 	const char *p;
418 	int i;
419 
420 	tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
421 	if (!tmp)
422 		return -ENOMEM;
423 	*filters = tmp;
424 
425 	f = &(*filters)[*cnt];
426 	memset(f, 0, sizeof(*f));
427 
428 	/* First, let's check if it's a stats filter of the following form:
429 	 * <stat><op><value, where:
430 	 *   - <stat> is one of supported numerical stats (verdict is also
431 	 *     considered numerical, failure == 0, success == 1);
432 	 *   - <op> is comparison operator (see `operators` definitions);
433 	 *   - <value> is an integer (or failure/success, or false/true as
434 	 *     special aliases for 0 and 1, respectively).
435 	 * If the form doesn't match what user provided, we assume file/prog
436 	 * glob filter.
437 	 */
438 	for (i = 0; i < ARRAY_SIZE(operators); i++) {
439 		enum stat_variant var;
440 		int id;
441 		long val;
442 		const char *end = str;
443 		const char *op_str;
444 
445 		op_str = operators[i].op_str;
446 		p = strstr(str, op_str);
447 		if (!p)
448 			continue;
449 
450 		if (!parse_stat_id_var(str, p - str, &id, &var)) {
451 			fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
452 			return -EINVAL;
453 		}
454 		if (id >= FILE_NAME) {
455 			fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str);
456 			return -EINVAL;
457 		}
458 
459 		p += strlen(op_str);
460 
461 		if (strcasecmp(p, "true") == 0 ||
462 		    strcasecmp(p, "t") == 0 ||
463 		    strcasecmp(p, "success") == 0 ||
464 		    strcasecmp(p, "succ") == 0 ||
465 		    strcasecmp(p, "s") == 0 ||
466 		    strcasecmp(p, "match") == 0 ||
467 		    strcasecmp(p, "m") == 0) {
468 			val = 1;
469 		} else if (strcasecmp(p, "false") == 0 ||
470 			   strcasecmp(p, "f") == 0 ||
471 			   strcasecmp(p, "failure") == 0 ||
472 			   strcasecmp(p, "fail") == 0 ||
473 			   strcasecmp(p, "mismatch") == 0 ||
474 			   strcasecmp(p, "mis") == 0) {
475 			val = 0;
476 		} else {
477 			errno = 0;
478 			val = strtol(p, (char **)&end, 10);
479 			if (errno || end == p || *end != '\0' ) {
480 				fprintf(stderr, "Invalid integer value in '%s'!\n", str);
481 				return -EINVAL;
482 			}
483 		}
484 
485 		f->kind = FILTER_STAT;
486 		f->stat_id = id;
487 		f->stat_var = var;
488 		f->op = operators[i].op_kind;
489 		f->value = val;
490 
491 		*cnt += 1;
492 		return 0;
493 	}
494 
495 	/* File/prog filter can be specified either as '<glob>' or
496 	 * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
497 	 * both file and program names. This seems to be way more useful in
498 	 * practice. If user needs full control, they can use '/<prog-glob>'
499 	 * form to glob just program name, or '<file-glob>/' to glob only file
500 	 * name. But usually common <glob> seems to be the most useful and
501 	 * ergonomic way.
502 	 */
503 	f->kind = FILTER_NAME;
504 	p = strchr(str, '/');
505 	if (!p) {
506 		f->any_glob = strdup(str);
507 		if (!f->any_glob)
508 			return -ENOMEM;
509 	} else {
510 		if (str != p) {
511 			/* non-empty file glob */
512 			f->file_glob = strndup(str, p - str);
513 			if (!f->file_glob)
514 				return -ENOMEM;
515 		}
516 		if (strlen(p + 1) > 0) {
517 			/* non-empty prog glob */
518 			f->prog_glob = strdup(p + 1);
519 			if (!f->prog_glob) {
520 				free(f->file_glob);
521 				f->file_glob = NULL;
522 				return -ENOMEM;
523 			}
524 		}
525 	}
526 
527 	*cnt += 1;
528 	return 0;
529 }
530 
531 static int append_filter_file(const char *path)
532 {
533 	char buf[1024];
534 	FILE *f;
535 	int err = 0;
536 
537 	f = fopen(path, "r");
538 	if (!f) {
539 		err = -errno;
540 		fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
541 		return err;
542 	}
543 
544 	while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
545 		/* lines starting with # are comments, skip them */
546 		if (buf[0] == '\0' || buf[0] == '#')
547 			continue;
548 		/* lines starting with ! are negative match filters */
549 		if (buf[0] == '!')
550 			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1);
551 		else
552 			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf);
553 		if (err)
554 			goto cleanup;
555 	}
556 
557 cleanup:
558 	fclose(f);
559 	return err;
560 }
561 
562 static const struct stat_specs default_output_spec = {
563 	.spec_cnt = 7,
564 	.ids = {
565 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
566 		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
567 	},
568 };
569 
570 static const struct stat_specs default_csv_output_spec = {
571 	.spec_cnt = 9,
572 	.ids = {
573 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
574 		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
575 		MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
576 	},
577 };
578 
579 static const struct stat_specs default_sort_spec = {
580 	.spec_cnt = 2,
581 	.ids = {
582 		FILE_NAME, PROG_NAME,
583 	},
584 	.asc = { true, true, },
585 };
586 
587 /* sorting for comparison mode to join two data sets */
588 static const struct stat_specs join_sort_spec = {
589 	.spec_cnt = 2,
590 	.ids = {
591 		FILE_NAME, PROG_NAME,
592 	},
593 	.asc = { true, true, },
594 };
595 
596 static struct stat_def {
597 	const char *header;
598 	const char *names[4];
599 	bool asc_by_default;
600 	bool left_aligned;
601 } stat_defs[] = {
602 	[FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ },
603 	[PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ },
604 	[VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ },
605 	[DURATION] = { "Duration (us)", {"duration", "dur"}, },
606 	[TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, },
607 	[TOTAL_STATES] = { "States", {"total_states", "states"}, },
608 	[PEAK_STATES] = { "Peak states", {"peak_states"}, },
609 	[MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
610 	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
611 };
612 
613 static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var)
614 {
615 	static const char *var_sfxs[] = {
616 		[VARIANT_A] = "_a",
617 		[VARIANT_B] = "_b",
618 		[VARIANT_DIFF] = "_diff",
619 		[VARIANT_PCT] = "_pct",
620 	};
621 	int i, j, k;
622 
623 	for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
624 		struct stat_def *def = &stat_defs[i];
625 		size_t alias_len, sfx_len;
626 		const char *alias;
627 
628 		for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
629 			alias = def->names[j];
630 			if (!alias)
631 				continue;
632 
633 			alias_len = strlen(alias);
634 			if (strncmp(name, alias, alias_len) != 0)
635 				continue;
636 
637 			if (alias_len == len) {
638 				/* If no variant suffix is specified, we
639 				 * assume control group (just in case we are
640 				 * in comparison mode. Variant is ignored in
641 				 * non-comparison mode.
642 				 */
643 				*var = VARIANT_B;
644 				*id = i;
645 				return true;
646 			}
647 
648 			for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) {
649 				sfx_len = strlen(var_sfxs[k]);
650 				if (alias_len + sfx_len != len)
651 					continue;
652 
653 				if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) {
654 					*var = (enum stat_variant)k;
655 					*id = i;
656 					return true;
657 				}
658 			}
659 		}
660 	}
661 
662 	return false;
663 }
664 
665 static bool is_asc_sym(char c)
666 {
667 	return c == '^';
668 }
669 
670 static bool is_desc_sym(char c)
671 {
672 	return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
673 }
674 
675 static int parse_stat(const char *stat_name, struct stat_specs *specs)
676 {
677 	int id;
678 	bool has_order = false, is_asc = false;
679 	size_t len = strlen(stat_name);
680 	enum stat_variant var;
681 
682 	if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
683 		fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
684 		return -E2BIG;
685 	}
686 
687 	if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) {
688 		has_order = true;
689 		is_asc = is_asc_sym(stat_name[len - 1]);
690 		len -= 1;
691 	}
692 
693 	if (!parse_stat_id_var(stat_name, len, &id, &var)) {
694 		fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
695 		return -ESRCH;
696 	}
697 
698 	specs->ids[specs->spec_cnt] = id;
699 	specs->variants[specs->spec_cnt] = var;
700 	specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
701 	specs->spec_cnt++;
702 
703 	return 0;
704 }
705 
706 static int parse_stats(const char *stats_str, struct stat_specs *specs)
707 {
708 	char *input, *state = NULL, *next;
709 	int err;
710 
711 	input = strdup(stats_str);
712 	if (!input)
713 		return -ENOMEM;
714 
715 	while ((next = strtok_r(state ? NULL : input, ",", &state))) {
716 		err = parse_stat(next, specs);
717 		if (err)
718 			return err;
719 	}
720 
721 	return 0;
722 }
723 
724 static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
725 {
726 	int i;
727 
728 	if (!stats)
729 		return;
730 
731 	for (i = 0; i < stat_cnt; i++) {
732 		free(stats[i].file_name);
733 		free(stats[i].prog_name);
734 	}
735 	free(stats);
736 }
737 
738 static char verif_log_buf[64 * 1024];
739 
740 #define MAX_PARSED_LOG_LINES 100
741 
742 static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
743 {
744 	const char *cur;
745 	int pos, lines;
746 
747 	buf[buf_sz - 1] = '\0';
748 
749 	for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) {
750 		/* find previous endline or otherwise take the start of log buf */
751 		for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) {
752 		}
753 		/* next time start from end of previous line (or pos goes to <0) */
754 		pos--;
755 		/* if we found endline, point right after endline symbol;
756 		 * otherwise, stay at the beginning of log buf
757 		 */
758 		if (cur[0] == '\n')
759 			cur++;
760 
761 		if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
762 			continue;
763 		if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
764 				&s->stats[TOTAL_INSNS],
765 				&s->stats[MAX_STATES_PER_INSN],
766 				&s->stats[TOTAL_STATES],
767 				&s->stats[PEAK_STATES],
768 				&s->stats[MARK_READ_MAX_LEN]))
769 			continue;
770 	}
771 
772 	return 0;
773 }
774 
775 static void fixup_obj(struct bpf_object *obj)
776 {
777 	struct bpf_map *map;
778 
779 	bpf_object__for_each_map(map, obj) {
780 		/* disable pinning */
781 		bpf_map__set_pin_path(map, NULL);
782 
783 		/* fix up map size, if necessary */
784 		switch (bpf_map__type(map)) {
785 		case BPF_MAP_TYPE_SK_STORAGE:
786 		case BPF_MAP_TYPE_TASK_STORAGE:
787 		case BPF_MAP_TYPE_INODE_STORAGE:
788 		case BPF_MAP_TYPE_CGROUP_STORAGE:
789 			break;
790 		default:
791 			if (bpf_map__max_entries(map) == 0)
792 				bpf_map__set_max_entries(map, 1);
793 		}
794 	}
795 }
796 
797 static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
798 {
799 	const char *prog_name = bpf_program__name(prog);
800 	size_t buf_sz = sizeof(verif_log_buf);
801 	char *buf = verif_log_buf;
802 	struct verif_stats *stats;
803 	int err = 0;
804 	void *tmp;
805 
806 	if (!should_process_file_prog(basename(filename), bpf_program__name(prog))) {
807 		env.progs_skipped++;
808 		return 0;
809 	}
810 
811 	tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
812 	if (!tmp)
813 		return -ENOMEM;
814 	env.prog_stats = tmp;
815 	stats = &env.prog_stats[env.prog_stat_cnt++];
816 	memset(stats, 0, sizeof(*stats));
817 
818 	if (env.verbose) {
819 		buf_sz = 16 * 1024 * 1024;
820 		buf = malloc(buf_sz);
821 		if (!buf)
822 			return -ENOMEM;
823 		bpf_program__set_log_buf(prog, buf, buf_sz);
824 		bpf_program__set_log_level(prog, env.log_level | 4); /* stats + log */
825 	} else {
826 		bpf_program__set_log_buf(prog, buf, buf_sz);
827 		bpf_program__set_log_level(prog, 4); /* only verifier stats */
828 	}
829 	verif_log_buf[0] = '\0';
830 
831 	/* increase chances of successful BPF object loading */
832 	fixup_obj(obj);
833 
834 	err = bpf_object__load(obj);
835 	env.progs_processed++;
836 
837 	stats->file_name = strdup(basename(filename));
838 	stats->prog_name = strdup(bpf_program__name(prog));
839 	stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
840 	parse_verif_log(buf, buf_sz, stats);
841 
842 	if (env.verbose) {
843 		printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
844 		       filename, prog_name, stats->stats[DURATION],
845 		       err ? "failure" : "success", buf);
846 	}
847 
848 	if (verif_log_buf != buf)
849 		free(buf);
850 
851 	return 0;
852 };
853 
854 static int process_obj(const char *filename)
855 {
856 	struct bpf_object *obj = NULL, *tobj;
857 	struct bpf_program *prog, *tprog, *lprog;
858 	libbpf_print_fn_t old_libbpf_print_fn;
859 	LIBBPF_OPTS(bpf_object_open_opts, opts);
860 	int err = 0, prog_cnt = 0;
861 
862 	if (!should_process_file_prog(basename(filename), NULL)) {
863 		if (env.verbose)
864 			printf("Skipping '%s' due to filters...\n", filename);
865 		env.files_skipped++;
866 		return 0;
867 	}
868 	if (!is_bpf_obj_file(filename)) {
869 		if (env.verbose)
870 			printf("Skipping '%s' as it's not a BPF object file...\n", filename);
871 		env.files_skipped++;
872 		return 0;
873 	}
874 
875 	if (!env.quiet && env.out_fmt == RESFMT_TABLE)
876 		printf("Processing '%s'...\n", basename(filename));
877 
878 	old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
879 	obj = bpf_object__open_file(filename, &opts);
880 	if (!obj) {
881 		/* if libbpf can't open BPF object file, it could be because
882 		 * that BPF object file is incomplete and has to be statically
883 		 * linked into a final BPF object file; instead of bailing
884 		 * out, report it into stderr, mark it as skipped, and
885 		 * proceeed
886 		 */
887 		fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
888 		env.files_skipped++;
889 		err = 0;
890 		goto cleanup;
891 	}
892 
893 	env.files_processed++;
894 
895 	bpf_object__for_each_program(prog, obj) {
896 		prog_cnt++;
897 	}
898 
899 	if (prog_cnt == 1) {
900 		prog = bpf_object__next_program(obj, NULL);
901 		bpf_program__set_autoload(prog, true);
902 		process_prog(filename, obj, prog);
903 		goto cleanup;
904 	}
905 
906 	bpf_object__for_each_program(prog, obj) {
907 		const char *prog_name = bpf_program__name(prog);
908 
909 		tobj = bpf_object__open_file(filename, &opts);
910 		if (!tobj) {
911 			err = -errno;
912 			fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
913 			goto cleanup;
914 		}
915 
916 		bpf_object__for_each_program(tprog, tobj) {
917 			const char *tprog_name = bpf_program__name(tprog);
918 
919 			if (strcmp(prog_name, tprog_name) == 0) {
920 				bpf_program__set_autoload(tprog, true);
921 				lprog = tprog;
922 			} else {
923 				bpf_program__set_autoload(tprog, false);
924 			}
925 		}
926 
927 		process_prog(filename, tobj, lprog);
928 		bpf_object__close(tobj);
929 	}
930 
931 cleanup:
932 	bpf_object__close(obj);
933 	libbpf_set_print(old_libbpf_print_fn);
934 	return err;
935 }
936 
937 static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
938 		    enum stat_id id, bool asc)
939 {
940 	int cmp = 0;
941 
942 	switch (id) {
943 	case FILE_NAME:
944 		cmp = strcmp(s1->file_name, s2->file_name);
945 		break;
946 	case PROG_NAME:
947 		cmp = strcmp(s1->prog_name, s2->prog_name);
948 		break;
949 	case VERDICT:
950 	case DURATION:
951 	case TOTAL_INSNS:
952 	case TOTAL_STATES:
953 	case PEAK_STATES:
954 	case MAX_STATES_PER_INSN:
955 	case MARK_READ_MAX_LEN: {
956 		long v1 = s1->stats[id];
957 		long v2 = s2->stats[id];
958 
959 		if (v1 != v2)
960 			cmp = v1 < v2 ? -1 : 1;
961 		break;
962 	}
963 	default:
964 		fprintf(stderr, "Unrecognized stat #%d\n", id);
965 		exit(1);
966 	}
967 
968 	return asc ? cmp : -cmp;
969 }
970 
971 static int cmp_prog_stats(const void *v1, const void *v2)
972 {
973 	const struct verif_stats *s1 = v1, *s2 = v2;
974 	int i, cmp;
975 
976 	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
977 		cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]);
978 		if (cmp != 0)
979 			return cmp;
980 	}
981 
982 	/* always disambiguate with file+prog, which are unique */
983 	cmp = strcmp(s1->file_name, s2->file_name);
984 	if (cmp != 0)
985 		return cmp;
986 	return strcmp(s1->prog_name, s2->prog_name);
987 }
988 
989 static void fetch_join_stat_value(const struct verif_stats_join *s,
990 				  enum stat_id id, enum stat_variant var,
991 				  const char **str_val,
992 				  double *num_val)
993 {
994 	long v1, v2;
995 
996 	if (id == FILE_NAME) {
997 		*str_val = s->file_name;
998 		return;
999 	}
1000 	if (id == PROG_NAME) {
1001 		*str_val = s->prog_name;
1002 		return;
1003 	}
1004 
1005 	v1 = s->stats_a ? s->stats_a->stats[id] : 0;
1006 	v2 = s->stats_b ? s->stats_b->stats[id] : 0;
1007 
1008 	switch (var) {
1009 	case VARIANT_A:
1010 		if (!s->stats_a)
1011 			*num_val = -DBL_MAX;
1012 		else
1013 			*num_val = s->stats_a->stats[id];
1014 		return;
1015 	case VARIANT_B:
1016 		if (!s->stats_b)
1017 			*num_val = -DBL_MAX;
1018 		else
1019 			*num_val = s->stats_b->stats[id];
1020 		return;
1021 	case VARIANT_DIFF:
1022 		if (!s->stats_a || !s->stats_b)
1023 			*num_val = -DBL_MAX;
1024 		else if (id == VERDICT)
1025 			*num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */;
1026 		else
1027 			*num_val = (double)(v2 - v1);
1028 		return;
1029 	case VARIANT_PCT:
1030 		if (!s->stats_a || !s->stats_b) {
1031 			*num_val = -DBL_MAX;
1032 		} else if (v1 == 0) {
1033 			if (v1 == v2)
1034 				*num_val = 0.0;
1035 			else
1036 				*num_val = v2 < v1 ? -100.0 : 100.0;
1037 		} else {
1038 			 *num_val = (v2 - v1) * 100.0 / v1;
1039 		}
1040 		return;
1041 	}
1042 }
1043 
1044 static int cmp_join_stat(const struct verif_stats_join *s1,
1045 			 const struct verif_stats_join *s2,
1046 			 enum stat_id id, enum stat_variant var, bool asc)
1047 {
1048 	const char *str1 = NULL, *str2 = NULL;
1049 	double v1, v2;
1050 	int cmp = 0;
1051 
1052 	fetch_join_stat_value(s1, id, var, &str1, &v1);
1053 	fetch_join_stat_value(s2, id, var, &str2, &v2);
1054 
1055 	if (str1)
1056 		cmp = strcmp(str1, str2);
1057 	else if (v1 != v2)
1058 		cmp = v1 < v2 ? -1 : 1;
1059 
1060 	return asc ? cmp : -cmp;
1061 }
1062 
1063 static int cmp_join_stats(const void *v1, const void *v2)
1064 {
1065 	const struct verif_stats_join *s1 = v1, *s2 = v2;
1066 	int i, cmp;
1067 
1068 	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1069 		cmp = cmp_join_stat(s1, s2,
1070 				    env.sort_spec.ids[i],
1071 				    env.sort_spec.variants[i],
1072 				    env.sort_spec.asc[i]);
1073 		if (cmp != 0)
1074 			return cmp;
1075 	}
1076 
1077 	/* always disambiguate with file+prog, which are unique */
1078 	cmp = strcmp(s1->file_name, s2->file_name);
1079 	if (cmp != 0)
1080 		return cmp;
1081 	return strcmp(s1->prog_name, s2->prog_name);
1082 }
1083 
1084 #define HEADER_CHAR '-'
1085 #define COLUMN_SEP "  "
1086 
1087 static void output_header_underlines(void)
1088 {
1089 	int i, j, len;
1090 
1091 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1092 		len = env.output_spec.lens[i];
1093 
1094 		printf("%s", i == 0 ? "" : COLUMN_SEP);
1095 		for (j = 0; j < len; j++)
1096 			printf("%c", HEADER_CHAR);
1097 	}
1098 	printf("\n");
1099 }
1100 
1101 static void output_headers(enum resfmt fmt)
1102 {
1103 	const char *fmt_str;
1104 	int i, len;
1105 
1106 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1107 		int id = env.output_spec.ids[i];
1108 		int *max_len = &env.output_spec.lens[i];
1109 
1110 		switch (fmt) {
1111 		case RESFMT_TABLE_CALCLEN:
1112 			len = snprintf(NULL, 0, "%s", stat_defs[id].header);
1113 			if (len > *max_len)
1114 				*max_len = len;
1115 			break;
1116 		case RESFMT_TABLE:
1117 			fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s";
1118 			printf(fmt_str, i == 0 ? "" : COLUMN_SEP,  *max_len, stat_defs[id].header);
1119 			if (i == env.output_spec.spec_cnt - 1)
1120 				printf("\n");
1121 			break;
1122 		case RESFMT_CSV:
1123 			printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]);
1124 			if (i == env.output_spec.spec_cnt - 1)
1125 				printf("\n");
1126 			break;
1127 		}
1128 	}
1129 
1130 	if (fmt == RESFMT_TABLE)
1131 		output_header_underlines();
1132 }
1133 
1134 static void prepare_value(const struct verif_stats *s, enum stat_id id,
1135 			  const char **str, long *val)
1136 {
1137 	switch (id) {
1138 	case FILE_NAME:
1139 		*str = s ? s->file_name : "N/A";
1140 		break;
1141 	case PROG_NAME:
1142 		*str = s ? s->prog_name : "N/A";
1143 		break;
1144 	case VERDICT:
1145 		if (!s)
1146 			*str = "N/A";
1147 		else
1148 			*str = s->stats[VERDICT] ? "success" : "failure";
1149 		break;
1150 	case DURATION:
1151 	case TOTAL_INSNS:
1152 	case TOTAL_STATES:
1153 	case PEAK_STATES:
1154 	case MAX_STATES_PER_INSN:
1155 	case MARK_READ_MAX_LEN:
1156 		*val = s ? s->stats[id] : 0;
1157 		break;
1158 	default:
1159 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1160 		exit(1);
1161 	}
1162 }
1163 
1164 static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
1165 {
1166 	int i;
1167 
1168 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1169 		int id = env.output_spec.ids[i];
1170 		int *max_len = &env.output_spec.lens[i], len;
1171 		const char *str = NULL;
1172 		long val = 0;
1173 
1174 		prepare_value(s, id, &str, &val);
1175 
1176 		switch (fmt) {
1177 		case RESFMT_TABLE_CALCLEN:
1178 			if (str)
1179 				len = snprintf(NULL, 0, "%s", str);
1180 			else
1181 				len = snprintf(NULL, 0, "%ld", val);
1182 			if (len > *max_len)
1183 				*max_len = len;
1184 			break;
1185 		case RESFMT_TABLE:
1186 			if (str)
1187 				printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
1188 			else
1189 				printf("%s%*ld", i == 0 ? "" : COLUMN_SEP,  *max_len, val);
1190 			if (i == env.output_spec.spec_cnt - 1)
1191 				printf("\n");
1192 			break;
1193 		case RESFMT_CSV:
1194 			if (str)
1195 				printf("%s%s", i == 0 ? "" : ",", str);
1196 			else
1197 				printf("%s%ld", i == 0 ? "" : ",", val);
1198 			if (i == env.output_spec.spec_cnt - 1)
1199 				printf("\n");
1200 			break;
1201 		}
1202 	}
1203 
1204 	if (last && fmt == RESFMT_TABLE) {
1205 		output_header_underlines();
1206 		printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
1207 		       env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
1208 	}
1209 }
1210 
1211 static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
1212 {
1213 	switch (id) {
1214 	case FILE_NAME:
1215 		st->file_name = strdup(str);
1216 		if (!st->file_name)
1217 			return -ENOMEM;
1218 		break;
1219 	case PROG_NAME:
1220 		st->prog_name = strdup(str);
1221 		if (!st->prog_name)
1222 			return -ENOMEM;
1223 		break;
1224 	case VERDICT:
1225 		if (strcmp(str, "success") == 0) {
1226 			st->stats[VERDICT] = true;
1227 		} else if (strcmp(str, "failure") == 0) {
1228 			st->stats[VERDICT] = false;
1229 		} else {
1230 			fprintf(stderr, "Unrecognized verification verdict '%s'\n", str);
1231 			return -EINVAL;
1232 		}
1233 		break;
1234 	case DURATION:
1235 	case TOTAL_INSNS:
1236 	case TOTAL_STATES:
1237 	case PEAK_STATES:
1238 	case MAX_STATES_PER_INSN:
1239 	case MARK_READ_MAX_LEN: {
1240 		long val;
1241 		int err, n;
1242 
1243 		if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) {
1244 			err = -errno;
1245 			fprintf(stderr, "Failed to parse '%s' as integer\n", str);
1246 			return err;
1247 		}
1248 
1249 		st->stats[id] = val;
1250 		break;
1251 	}
1252 	default:
1253 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1254 		return -EINVAL;
1255 	}
1256 	return 0;
1257 }
1258 
1259 static int parse_stats_csv(const char *filename, struct stat_specs *specs,
1260 			   struct verif_stats **statsp, int *stat_cntp)
1261 {
1262 	char line[4096];
1263 	FILE *f;
1264 	int err = 0;
1265 	bool header = true;
1266 
1267 	f = fopen(filename, "r");
1268 	if (!f) {
1269 		err = -errno;
1270 		fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1271 		return err;
1272 	}
1273 
1274 	*stat_cntp = 0;
1275 
1276 	while (fgets(line, sizeof(line), f)) {
1277 		char *input = line, *state = NULL, *next;
1278 		struct verif_stats *st = NULL;
1279 		int col = 0;
1280 
1281 		if (!header) {
1282 			void *tmp;
1283 
1284 			tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp));
1285 			if (!tmp) {
1286 				err = -ENOMEM;
1287 				goto cleanup;
1288 			}
1289 			*statsp = tmp;
1290 
1291 			st = &(*statsp)[*stat_cntp];
1292 			memset(st, 0, sizeof(*st));
1293 
1294 			*stat_cntp += 1;
1295 		}
1296 
1297 		while ((next = strtok_r(state ? NULL : input, ",\n", &state))) {
1298 			if (header) {
1299 				/* for the first line, set up spec stats */
1300 				err = parse_stat(next, specs);
1301 				if (err)
1302 					goto cleanup;
1303 				continue;
1304 			}
1305 
1306 			/* for all other lines, parse values based on spec */
1307 			if (col >= specs->spec_cnt) {
1308 				fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n",
1309 					col, *stat_cntp, filename);
1310 				err = -EINVAL;
1311 				goto cleanup;
1312 			}
1313 			err = parse_stat_value(next, specs->ids[col], st);
1314 			if (err)
1315 				goto cleanup;
1316 			col++;
1317 		}
1318 
1319 		if (header) {
1320 			header = false;
1321 			continue;
1322 		}
1323 
1324 		if (col < specs->spec_cnt) {
1325 			fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
1326 				*stat_cntp, filename);
1327 			err = -EINVAL;
1328 			goto cleanup;
1329 		}
1330 
1331 		if (!st->file_name || !st->prog_name) {
1332 			fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n",
1333 				*stat_cntp, filename);
1334 			err = -EINVAL;
1335 			goto cleanup;
1336 		}
1337 
1338 		/* in comparison mode we can only check filters after we
1339 		 * parsed entire line; if row should be ignored we pretend we
1340 		 * never parsed it
1341 		 */
1342 		if (!should_process_file_prog(st->file_name, st->prog_name)) {
1343 			free(st->file_name);
1344 			free(st->prog_name);
1345 			*stat_cntp -= 1;
1346 		}
1347 	}
1348 
1349 	if (!feof(f)) {
1350 		err = -errno;
1351 		fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err);
1352 	}
1353 
1354 cleanup:
1355 	fclose(f);
1356 	return err;
1357 }
1358 
1359 /* empty/zero stats for mismatched rows */
1360 static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" };
1361 
1362 static bool is_key_stat(enum stat_id id)
1363 {
1364 	return id == FILE_NAME || id == PROG_NAME;
1365 }
1366 
1367 static void output_comp_header_underlines(void)
1368 {
1369 	int i, j, k;
1370 
1371 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1372 		int id = env.output_spec.ids[i];
1373 		int max_j = is_key_stat(id) ? 1 : 3;
1374 
1375 		for (j = 0; j < max_j; j++) {
1376 			int len = env.output_spec.lens[3 * i + j];
1377 
1378 			printf("%s", i + j == 0 ? "" : COLUMN_SEP);
1379 
1380 			for (k = 0; k < len; k++)
1381 				printf("%c", HEADER_CHAR);
1382 		}
1383 	}
1384 	printf("\n");
1385 }
1386 
1387 static void output_comp_headers(enum resfmt fmt)
1388 {
1389 	static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"};
1390 	static const char *name_sfxs[3] = {"_base", "_comp", "_diff"};
1391 	int i, j, len;
1392 
1393 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1394 		int id = env.output_spec.ids[i];
1395 		/* key stats don't have A/B/DIFF columns, they are common for both data sets */
1396 		int max_j = is_key_stat(id) ? 1 : 3;
1397 
1398 		for (j = 0; j < max_j; j++) {
1399 			int *max_len = &env.output_spec.lens[3 * i + j];
1400 			bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1);
1401 			const char *sfx;
1402 
1403 			switch (fmt) {
1404 			case RESFMT_TABLE_CALCLEN:
1405 				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1406 				len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx);
1407 				if (len > *max_len)
1408 					*max_len = len;
1409 				break;
1410 			case RESFMT_TABLE:
1411 				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1412 				printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP,
1413 				       *max_len - (int)strlen(sfx), stat_defs[id].header, sfx);
1414 				if (last)
1415 					printf("\n");
1416 				break;
1417 			case RESFMT_CSV:
1418 				sfx = is_key_stat(id) ? "" : name_sfxs[j];
1419 				printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx);
1420 				if (last)
1421 					printf("\n");
1422 				break;
1423 			}
1424 		}
1425 	}
1426 
1427 	if (fmt == RESFMT_TABLE)
1428 		output_comp_header_underlines();
1429 }
1430 
1431 static void output_comp_stats(const struct verif_stats_join *join_stats,
1432 			      enum resfmt fmt, bool last)
1433 {
1434 	const struct verif_stats *base = join_stats->stats_a;
1435 	const struct verif_stats *comp = join_stats->stats_b;
1436 	char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
1437 	int i;
1438 
1439 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1440 		int id = env.output_spec.ids[i], len;
1441 		int *max_len_base = &env.output_spec.lens[3 * i + 0];
1442 		int *max_len_comp = &env.output_spec.lens[3 * i + 1];
1443 		int *max_len_diff = &env.output_spec.lens[3 * i + 2];
1444 		const char *base_str = NULL, *comp_str = NULL;
1445 		long base_val = 0, comp_val = 0, diff_val = 0;
1446 
1447 		prepare_value(base, id, &base_str, &base_val);
1448 		prepare_value(comp, id, &comp_str, &comp_val);
1449 
1450 		/* normalize all the outputs to be in string buffers for simplicity */
1451 		if (is_key_stat(id)) {
1452 			/* key stats (file and program name) are always strings */
1453 			if (base)
1454 				snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1455 			else
1456 				snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
1457 		} else if (base_str) {
1458 			snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1459 			snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
1460 			if (!base || !comp)
1461 				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1462 			else if (strcmp(base_str, comp_str) == 0)
1463 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
1464 			else
1465 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
1466 		} else {
1467 			double p = 0.0;
1468 
1469 			if (base)
1470 				snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
1471 			else
1472 				snprintf(base_buf, sizeof(base_buf), "%s", "N/A");
1473 			if (comp)
1474 				snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
1475 			else
1476 				snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A");
1477 
1478 			diff_val = comp_val - base_val;
1479 			if (!base || !comp) {
1480 				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1481 			} else {
1482 				if (base_val == 0) {
1483 					if (comp_val == base_val)
1484 						p = 0.0; /* avoid +0 (+100%) case */
1485 					else
1486 						p = comp_val < base_val ? -100.0 : 100.0;
1487 				} else {
1488 					 p = diff_val * 100.0 / base_val;
1489 				}
1490 				snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
1491 			}
1492 		}
1493 
1494 		switch (fmt) {
1495 		case RESFMT_TABLE_CALCLEN:
1496 			len = strlen(base_buf);
1497 			if (len > *max_len_base)
1498 				*max_len_base = len;
1499 			if (!is_key_stat(id)) {
1500 				len = strlen(comp_buf);
1501 				if (len > *max_len_comp)
1502 					*max_len_comp = len;
1503 				len = strlen(diff_buf);
1504 				if (len > *max_len_diff)
1505 					*max_len_diff = len;
1506 			}
1507 			break;
1508 		case RESFMT_TABLE: {
1509 			/* string outputs are left-aligned, number outputs are right-aligned */
1510 			const char *fmt = base_str ? "%s%-*s" : "%s%*s";
1511 
1512 			printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf);
1513 			if (!is_key_stat(id)) {
1514 				printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf);
1515 				printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf);
1516 			}
1517 			if (i == env.output_spec.spec_cnt - 1)
1518 				printf("\n");
1519 			break;
1520 		}
1521 		case RESFMT_CSV:
1522 			printf("%s%s", i == 0 ? "" : ",", base_buf);
1523 			if (!is_key_stat(id)) {
1524 				printf("%s%s", i == 0 ? "" : ",", comp_buf);
1525 				printf("%s%s", i == 0 ? "" : ",", diff_buf);
1526 			}
1527 			if (i == env.output_spec.spec_cnt - 1)
1528 				printf("\n");
1529 			break;
1530 		}
1531 	}
1532 
1533 	if (last && fmt == RESFMT_TABLE)
1534 		output_comp_header_underlines();
1535 }
1536 
1537 static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp)
1538 {
1539 	int r;
1540 
1541 	r = strcmp(base->file_name, comp->file_name);
1542 	if (r != 0)
1543 		return r;
1544 	return strcmp(base->prog_name, comp->prog_name);
1545 }
1546 
1547 static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats)
1548 {
1549 	static const double eps = 1e-9;
1550 	const char *str = NULL;
1551 	double value = 0.0;
1552 
1553 	fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
1554 
1555 	switch (f->op) {
1556 	case OP_EQ: return value > f->value - eps && value < f->value + eps;
1557 	case OP_NEQ: return value < f->value - eps || value > f->value + eps;
1558 	case OP_LT: return value < f->value - eps;
1559 	case OP_LE: return value <= f->value + eps;
1560 	case OP_GT: return value > f->value + eps;
1561 	case OP_GE: return value >= f->value - eps;
1562 	}
1563 
1564 	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
1565 	return false;
1566 }
1567 
1568 static bool should_output_join_stats(const struct verif_stats_join *stats)
1569 {
1570 	struct filter *f;
1571 	int i, allow_cnt = 0;
1572 
1573 	for (i = 0; i < env.deny_filter_cnt; i++) {
1574 		f = &env.deny_filters[i];
1575 		if (f->kind != FILTER_STAT)
1576 			continue;
1577 
1578 		if (is_join_stat_filter_matched(f, stats))
1579 			return false;
1580 	}
1581 
1582 	for (i = 0; i < env.allow_filter_cnt; i++) {
1583 		f = &env.allow_filters[i];
1584 		if (f->kind != FILTER_STAT)
1585 			continue;
1586 		allow_cnt++;
1587 
1588 		if (is_join_stat_filter_matched(f, stats))
1589 			return true;
1590 	}
1591 
1592 	/* if there are no stat allowed filters, pass everything through */
1593 	return allow_cnt == 0;
1594 }
1595 
1596 static int handle_comparison_mode(void)
1597 {
1598 	struct stat_specs base_specs = {}, comp_specs = {};
1599 	struct stat_specs tmp_sort_spec;
1600 	enum resfmt cur_fmt;
1601 	int err, i, j, last_idx;
1602 
1603 	if (env.filename_cnt != 2) {
1604 		fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
1605 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1606 		return -EINVAL;
1607 	}
1608 
1609 	err = parse_stats_csv(env.filenames[0], &base_specs,
1610 			      &env.baseline_stats, &env.baseline_stat_cnt);
1611 	if (err) {
1612 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
1613 		return err;
1614 	}
1615 	err = parse_stats_csv(env.filenames[1], &comp_specs,
1616 			      &env.prog_stats, &env.prog_stat_cnt);
1617 	if (err) {
1618 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err);
1619 		return err;
1620 	}
1621 
1622 	/* To keep it simple we validate that the set and order of stats in
1623 	 * both CSVs are exactly the same. This can be lifted with a bit more
1624 	 * pre-processing later.
1625 	 */
1626 	if (base_specs.spec_cnt != comp_specs.spec_cnt) {
1627 		fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n",
1628 			env.filenames[0], env.filenames[1],
1629 			base_specs.spec_cnt, comp_specs.spec_cnt);
1630 		return -EINVAL;
1631 	}
1632 	for (i = 0; i < base_specs.spec_cnt; i++) {
1633 		if (base_specs.ids[i] != comp_specs.ids[i]) {
1634 			fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n",
1635 				env.filenames[0], env.filenames[1],
1636 				stat_defs[base_specs.ids[i]].names[0],
1637 				stat_defs[comp_specs.ids[i]].names[0]);
1638 			return -EINVAL;
1639 		}
1640 	}
1641 
1642 	/* Replace user-specified sorting spec with file+prog sorting rule to
1643 	 * be able to join two datasets correctly. Once we are done, we will
1644 	 * restore the original sort spec.
1645 	 */
1646 	tmp_sort_spec = env.sort_spec;
1647 	env.sort_spec = join_sort_spec;
1648 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
1649 	qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
1650 	env.sort_spec = tmp_sort_spec;
1651 
1652 	/* Join two datasets together. If baseline and comparison datasets
1653 	 * have different subset of rows (we match by 'object + prog' as
1654 	 * a unique key) then assume empty/missing/zero value for rows that
1655 	 * are missing in the opposite data set.
1656 	 */
1657 	i = j = 0;
1658 	while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
1659 		const struct verif_stats *base, *comp;
1660 		struct verif_stats_join *join;
1661 		void *tmp;
1662 		int r;
1663 
1664 		base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
1665 		comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats;
1666 
1667 		if (!base->file_name || !base->prog_name) {
1668 			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
1669 				i, env.filenames[0]);
1670 			return -EINVAL;
1671 		}
1672 		if (!comp->file_name || !comp->prog_name) {
1673 			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
1674 				j, env.filenames[1]);
1675 			return -EINVAL;
1676 		}
1677 
1678 		tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats));
1679 		if (!tmp)
1680 			return -ENOMEM;
1681 		env.join_stats = tmp;
1682 
1683 		join = &env.join_stats[env.join_stat_cnt];
1684 		memset(join, 0, sizeof(*join));
1685 
1686 		r = cmp_stats_key(base, comp);
1687 		if (r == 0) {
1688 			join->file_name = base->file_name;
1689 			join->prog_name = base->prog_name;
1690 			join->stats_a = base;
1691 			join->stats_b = comp;
1692 			i++;
1693 			j++;
1694 		} else if (comp == &fallback_stats || r < 0) {
1695 			join->file_name = base->file_name;
1696 			join->prog_name = base->prog_name;
1697 			join->stats_a = base;
1698 			join->stats_b = NULL;
1699 			i++;
1700 		} else {
1701 			join->file_name = comp->file_name;
1702 			join->prog_name = comp->prog_name;
1703 			join->stats_a = NULL;
1704 			join->stats_b = comp;
1705 			j++;
1706 		}
1707 		env.join_stat_cnt += 1;
1708 	}
1709 
1710 	/* now sort joined results accorsing to sort spec */
1711 	qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
1712 
1713 	/* for human-readable table output we need to do extra pass to
1714 	 * calculate column widths, so we substitute current output format
1715 	 * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
1716 	 * and do everything again.
1717 	 */
1718 	if (env.out_fmt == RESFMT_TABLE)
1719 		cur_fmt = RESFMT_TABLE_CALCLEN;
1720 	else
1721 		cur_fmt = env.out_fmt;
1722 
1723 one_more_time:
1724 	output_comp_headers(cur_fmt);
1725 
1726 	for (i = 0; i < env.join_stat_cnt; i++) {
1727 		const struct verif_stats_join *join = &env.join_stats[i];
1728 
1729 		if (!should_output_join_stats(join))
1730 			continue;
1731 
1732 		if (cur_fmt == RESFMT_TABLE_CALCLEN)
1733 			last_idx = i;
1734 
1735 		output_comp_stats(join, cur_fmt, i == last_idx);
1736 	}
1737 
1738 	if (cur_fmt == RESFMT_TABLE_CALCLEN) {
1739 		cur_fmt = RESFMT_TABLE;
1740 		goto one_more_time; /* ... this time with feeling */
1741 	}
1742 
1743 	return 0;
1744 }
1745 
1746 static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats)
1747 {
1748 	long value = stats->stats[f->stat_id];
1749 
1750 	switch (f->op) {
1751 	case OP_EQ: return value == f->value;
1752 	case OP_NEQ: return value != f->value;
1753 	case OP_LT: return value < f->value;
1754 	case OP_LE: return value <= f->value;
1755 	case OP_GT: return value > f->value;
1756 	case OP_GE: return value >= f->value;
1757 	}
1758 
1759 	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
1760 	return false;
1761 }
1762 
1763 static bool should_output_stats(const struct verif_stats *stats)
1764 {
1765 	struct filter *f;
1766 	int i, allow_cnt = 0;
1767 
1768 	for (i = 0; i < env.deny_filter_cnt; i++) {
1769 		f = &env.deny_filters[i];
1770 		if (f->kind != FILTER_STAT)
1771 			continue;
1772 
1773 		if (is_stat_filter_matched(f, stats))
1774 			return false;
1775 	}
1776 
1777 	for (i = 0; i < env.allow_filter_cnt; i++) {
1778 		f = &env.allow_filters[i];
1779 		if (f->kind != FILTER_STAT)
1780 			continue;
1781 		allow_cnt++;
1782 
1783 		if (is_stat_filter_matched(f, stats))
1784 			return true;
1785 	}
1786 
1787 	/* if there are no stat allowed filters, pass everything through */
1788 	return allow_cnt == 0;
1789 }
1790 
1791 static void output_prog_stats(void)
1792 {
1793 	const struct verif_stats *stats;
1794 	int i, last_stat_idx = 0;
1795 
1796 	if (env.out_fmt == RESFMT_TABLE) {
1797 		/* calculate column widths */
1798 		output_headers(RESFMT_TABLE_CALCLEN);
1799 		for (i = 0; i < env.prog_stat_cnt; i++) {
1800 			stats = &env.prog_stats[i];
1801 			if (!should_output_stats(stats))
1802 				continue;
1803 			output_stats(stats, RESFMT_TABLE_CALCLEN, false);
1804 			last_stat_idx = i;
1805 		}
1806 	}
1807 
1808 	/* actually output the table */
1809 	output_headers(env.out_fmt);
1810 	for (i = 0; i < env.prog_stat_cnt; i++) {
1811 		stats = &env.prog_stats[i];
1812 		if (!should_output_stats(stats))
1813 			continue;
1814 		output_stats(stats, env.out_fmt, i == last_stat_idx);
1815 	}
1816 }
1817 
1818 static int handle_verif_mode(void)
1819 {
1820 	int i, err;
1821 
1822 	if (env.filename_cnt == 0) {
1823 		fprintf(stderr, "Please provide path to BPF object file!\n\n");
1824 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1825 		return -EINVAL;
1826 	}
1827 
1828 	for (i = 0; i < env.filename_cnt; i++) {
1829 		err = process_obj(env.filenames[i]);
1830 		if (err) {
1831 			fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
1832 			return err;
1833 		}
1834 	}
1835 
1836 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
1837 
1838 	output_prog_stats();
1839 
1840 	return 0;
1841 }
1842 
1843 static int handle_replay_mode(void)
1844 {
1845 	struct stat_specs specs = {};
1846 	int err;
1847 
1848 	if (env.filename_cnt != 1) {
1849 		fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n");
1850 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1851 		return -EINVAL;
1852 	}
1853 
1854 	err = parse_stats_csv(env.filenames[0], &specs,
1855 			      &env.prog_stats, &env.prog_stat_cnt);
1856 	if (err) {
1857 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
1858 		return err;
1859 	}
1860 
1861 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
1862 
1863 	output_prog_stats();
1864 
1865 	return 0;
1866 }
1867 
1868 int main(int argc, char **argv)
1869 {
1870 	int err = 0, i;
1871 
1872 	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
1873 		return 1;
1874 
1875 	if (env.verbose && env.quiet) {
1876 		fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
1877 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1878 		return 1;
1879 	}
1880 	if (env.verbose && env.log_level == 0)
1881 		env.log_level = 1;
1882 
1883 	if (env.output_spec.spec_cnt == 0) {
1884 		if (env.out_fmt == RESFMT_CSV)
1885 			env.output_spec = default_csv_output_spec;
1886 		else
1887 			env.output_spec = default_output_spec;
1888 	}
1889 	if (env.sort_spec.spec_cnt == 0)
1890 		env.sort_spec = default_sort_spec;
1891 
1892 	if (env.comparison_mode && env.replay_mode) {
1893 		fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n");
1894 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1895 		return 1;
1896 	}
1897 
1898 	if (env.comparison_mode)
1899 		err = handle_comparison_mode();
1900 	else if (env.replay_mode)
1901 		err = handle_replay_mode();
1902 	else
1903 		err = handle_verif_mode();
1904 
1905 	free_verif_stats(env.prog_stats, env.prog_stat_cnt);
1906 	free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
1907 	free(env.join_stats);
1908 	for (i = 0; i < env.filename_cnt; i++)
1909 		free(env.filenames[i]);
1910 	free(env.filenames);
1911 	for (i = 0; i < env.allow_filter_cnt; i++) {
1912 		free(env.allow_filters[i].any_glob);
1913 		free(env.allow_filters[i].file_glob);
1914 		free(env.allow_filters[i].prog_glob);
1915 	}
1916 	free(env.allow_filters);
1917 	for (i = 0; i < env.deny_filter_cnt; i++) {
1918 		free(env.deny_filters[i].any_glob);
1919 		free(env.deny_filters[i].file_glob);
1920 		free(env.deny_filters[i].prog_glob);
1921 	}
1922 	free(env.deny_filters);
1923 	return -err;
1924 }
1925