xref: /linux/tools/testing/selftests/bpf/veristat.c (revision d0d106a2bd21499901299160744e5fe9f4c83ddb)
1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
3 #define _GNU_SOURCE
4 #include <argp.h>
5 #include <libgen.h>
6 #include <string.h>
7 #include <stdlib.h>
8 #include <sched.h>
9 #include <pthread.h>
10 #include <dirent.h>
11 #include <signal.h>
12 #include <fcntl.h>
13 #include <unistd.h>
14 #include <sys/time.h>
15 #include <sys/sysinfo.h>
16 #include <sys/stat.h>
17 #include <bpf/libbpf.h>
18 #include <bpf/btf.h>
19 #include <bpf/bpf.h>
20 #include <libelf.h>
21 #include <gelf.h>
22 #include <float.h>
23 #include <math.h>
24 #include <limits.h>
25 
26 #ifndef ARRAY_SIZE
27 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
28 #endif
29 
30 #ifndef max
31 #define max(a, b) ((a) > (b) ? (a) : (b))
32 #endif
33 
34 #ifndef min
35 #define min(a, b) ((a) < (b) ? (a) : (b))
36 #endif
37 
38 enum stat_id {
39 	VERDICT,
40 	DURATION,
41 	TOTAL_INSNS,
42 	TOTAL_STATES,
43 	PEAK_STATES,
44 	MAX_STATES_PER_INSN,
45 	MARK_READ_MAX_LEN,
46 	SIZE,
47 	JITED_SIZE,
48 	STACK,
49 	PROG_TYPE,
50 	ATTACH_TYPE,
51 
52 	FILE_NAME,
53 	PROG_NAME,
54 
55 	ALL_STATS_CNT,
56 	NUM_STATS_CNT = FILE_NAME - VERDICT,
57 };
58 
59 /* In comparison mode each stat can specify up to four different values:
60  *   - A side value;
61  *   - B side value;
62  *   - absolute diff value;
63  *   - relative (percentage) diff value.
64  *
65  * When specifying stat specs in comparison mode, user can use one of the
66  * following variant suffixes to specify which exact variant should be used for
67  * ordering or filtering:
68  *   - `_a` for A side value;
69  *   - `_b` for B side value;
70  *   - `_diff` for absolute diff value;
71  *   - `_pct` for relative (percentage) diff value.
72  *
73  * If no variant suffix is provided, then `_b` (control data) is assumed.
74  *
75  * As an example, let's say instructions stat has the following output:
76  *
77  * Insns (A)  Insns (B)  Insns   (DIFF)
78  * ---------  ---------  --------------
79  * 21547      20920       -627 (-2.91%)
80  *
81  * Then:
82  *   - 21547 is A side value (insns_a);
83  *   - 20920 is B side value (insns_b);
84  *   - -627 is absolute diff value (insns_diff);
85  *   - -2.91% is relative diff value (insns_pct).
86  *
87  * For verdict there is no verdict_pct variant.
88  * For file and program name, _a and _b variants are equivalent and there are
89  * no _diff or _pct variants.
90  */
91 enum stat_variant {
92 	VARIANT_A,
93 	VARIANT_B,
94 	VARIANT_DIFF,
95 	VARIANT_PCT,
96 };
97 
98 struct verif_stats {
99 	char *file_name;
100 	char *prog_name;
101 
102 	long stats[NUM_STATS_CNT];
103 };
104 
105 /* joined comparison mode stats */
106 struct verif_stats_join {
107 	char *file_name;
108 	char *prog_name;
109 
110 	const struct verif_stats *stats_a;
111 	const struct verif_stats *stats_b;
112 };
113 
114 struct stat_specs {
115 	int spec_cnt;
116 	enum stat_id ids[ALL_STATS_CNT];
117 	enum stat_variant variants[ALL_STATS_CNT];
118 	bool asc[ALL_STATS_CNT];
119 	bool abs[ALL_STATS_CNT];
120 	int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
121 };
122 
123 enum resfmt {
124 	RESFMT_TABLE,
125 	RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */
126 	RESFMT_CSV,
127 };
128 
129 enum filter_kind {
130 	FILTER_NAME,
131 	FILTER_STAT,
132 };
133 
134 enum operator_kind {
135 	OP_EQ,		/* == or = */
136 	OP_NEQ,		/* != or <> */
137 	OP_LT,		/* < */
138 	OP_LE,		/* <= */
139 	OP_GT,		/* > */
140 	OP_GE,		/* >= */
141 };
142 
143 struct filter {
144 	enum filter_kind kind;
145 	/* FILTER_NAME */
146 	char *any_glob;
147 	char *file_glob;
148 	char *prog_glob;
149 	/* FILTER_STAT */
150 	enum operator_kind op;
151 	int stat_id;
152 	enum stat_variant stat_var;
153 	long value;
154 	bool abs;
155 };
156 
157 static struct env {
158 	char **filenames;
159 	int filename_cnt;
160 	bool verbose;
161 	bool debug;
162 	bool quiet;
163 	bool force_checkpoints;
164 	bool force_reg_invariants;
165 	enum resfmt out_fmt;
166 	bool show_version;
167 	bool comparison_mode;
168 	bool replay_mode;
169 	int top_n;
170 
171 	int log_level;
172 	int log_size;
173 	bool log_fixed;
174 
175 	struct verif_stats *prog_stats;
176 	int prog_stat_cnt;
177 
178 	/* baseline_stats is allocated and used only in comparison mode */
179 	struct verif_stats *baseline_stats;
180 	int baseline_stat_cnt;
181 
182 	struct verif_stats_join *join_stats;
183 	int join_stat_cnt;
184 
185 	struct stat_specs output_spec;
186 	struct stat_specs sort_spec;
187 
188 	struct filter *allow_filters;
189 	struct filter *deny_filters;
190 	int allow_filter_cnt;
191 	int deny_filter_cnt;
192 
193 	int files_processed;
194 	int files_skipped;
195 	int progs_processed;
196 	int progs_skipped;
197 	int top_src_lines;
198 } env;
199 
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)200 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
201 {
202 	if (!env.verbose)
203 		return 0;
204 	if (level == LIBBPF_DEBUG  && !env.debug)
205 		return 0;
206 	return vfprintf(stderr, format, args);
207 }
208 
209 #ifndef VERISTAT_VERSION
210 #define VERISTAT_VERSION "<kernel>"
211 #endif
212 
213 const char *argp_program_version = "veristat v" VERISTAT_VERSION;
214 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
215 const char argp_program_doc[] =
216 "veristat    BPF verifier stats collection and comparison tool.\n"
217 "\n"
218 "USAGE: veristat <obj-file> [<obj-file>...]\n"
219 "   OR: veristat -C <baseline.csv> <comparison.csv>\n"
220 "   OR: veristat -R <results.csv>\n"
221 "   OR: veristat -vl2 <to_analyze.bpf.o>\n";
222 
223 enum {
224 	OPT_LOG_FIXED = 1000,
225 	OPT_LOG_SIZE = 1001,
226 };
227 
228 static const struct argp_option opts[] = {
229 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
230 	{ "version", 'V', NULL, 0, "Print version" },
231 	{ "verbose", 'v', NULL, 0, "Verbose mode" },
232 	{ "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" },
233 	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode, 2 for full verification log)" },
234 	{ "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
235 	{ "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
236 	{ "top-n", 'n', "N", 0, "Emit only up to first N results." },
237 	{ "quiet", 'q', NULL, 0, "Quiet mode" },
238 	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
239 	{ "sort", 's', "SPEC", 0, "Specify sort order" },
240 	{ "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
241 	{ "compare", 'C', NULL, 0, "Comparison mode" },
242 	{ "replay", 'R', NULL, 0, "Replay mode" },
243 	{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
244 	{ "test-states", 't', NULL, 0,
245 	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
246 	{ "test-reg-invariants", 'r', NULL, 0,
247 	  "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
248 	{ "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" },
249 	{},
250 };
251 
252 static int parse_stats(const char *stats_str, struct stat_specs *specs);
253 static int append_filter(struct filter **filters, int *cnt, const char *str);
254 static int append_filter_file(const char *path);
255 
parse_arg(int key,char * arg,struct argp_state * state)256 static error_t parse_arg(int key, char *arg, struct argp_state *state)
257 {
258 	void *tmp;
259 	int err;
260 
261 	switch (key) {
262 	case 'h':
263 		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
264 		break;
265 	case 'V':
266 		env.show_version = true;
267 		break;
268 	case 'v':
269 		env.verbose = true;
270 		break;
271 	case 'd':
272 		env.debug = true;
273 		env.verbose = true;
274 		break;
275 	case 'q':
276 		env.quiet = true;
277 		break;
278 	case 'e':
279 		err = parse_stats(arg, &env.output_spec);
280 		if (err)
281 			return err;
282 		break;
283 	case 's':
284 		err = parse_stats(arg, &env.sort_spec);
285 		if (err)
286 			return err;
287 		break;
288 	case 'o':
289 		if (strcmp(arg, "table") == 0) {
290 			env.out_fmt = RESFMT_TABLE;
291 		} else if (strcmp(arg, "csv") == 0) {
292 			env.out_fmt = RESFMT_CSV;
293 		} else {
294 			fprintf(stderr, "Unrecognized output format '%s'\n", arg);
295 			return -EINVAL;
296 		}
297 		break;
298 	case 'l':
299 		errno = 0;
300 		env.log_level = strtol(arg, NULL, 10);
301 		if (errno) {
302 			fprintf(stderr, "invalid log level: %s\n", arg);
303 			argp_usage(state);
304 		}
305 		break;
306 	case OPT_LOG_FIXED:
307 		env.log_fixed = true;
308 		break;
309 	case OPT_LOG_SIZE:
310 		errno = 0;
311 		env.log_size = strtol(arg, NULL, 10);
312 		if (errno) {
313 			fprintf(stderr, "invalid log size: %s\n", arg);
314 			argp_usage(state);
315 		}
316 		break;
317 	case 't':
318 		env.force_checkpoints = true;
319 		break;
320 	case 'r':
321 		env.force_reg_invariants = true;
322 		break;
323 	case 'n':
324 		errno = 0;
325 		env.top_n = strtol(arg, NULL, 10);
326 		if (errno) {
327 			fprintf(stderr, "invalid top N specifier: %s\n", arg);
328 			argp_usage(state);
329 		}
330 	case 'C':
331 		env.comparison_mode = true;
332 		break;
333 	case 'R':
334 		env.replay_mode = true;
335 		break;
336 	case 'f':
337 		if (arg[0] == '@')
338 			err = append_filter_file(arg + 1);
339 		else if (arg[0] == '!')
340 			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1);
341 		else
342 			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg);
343 		if (err) {
344 			fprintf(stderr, "Failed to collect program filter expressions: %d\n", err);
345 			return err;
346 		}
347 		break;
348 	case 'S':
349 		errno = 0;
350 		env.top_src_lines = strtol(arg, NULL, 10);
351 		if (errno) {
352 			fprintf(stderr, "invalid top lines N specifier: %s\n", arg);
353 			argp_usage(state);
354 		}
355 		break;
356 	case ARGP_KEY_ARG:
357 		tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
358 		if (!tmp)
359 			return -ENOMEM;
360 		env.filenames = tmp;
361 		env.filenames[env.filename_cnt] = strdup(arg);
362 		if (!env.filenames[env.filename_cnt])
363 			return -ENOMEM;
364 		env.filename_cnt++;
365 		break;
366 	default:
367 		return ARGP_ERR_UNKNOWN;
368 	}
369 	return 0;
370 }
371 
372 static const struct argp argp = {
373 	.options = opts,
374 	.parser = parse_arg,
375 	.doc = argp_program_doc,
376 };
377 
378 
379 /* Adapted from perf/util/string.c */
glob_matches(const char * str,const char * pat)380 static bool glob_matches(const char *str, const char *pat)
381 {
382 	while (*str && *pat && *pat != '*') {
383 		if (*str != *pat)
384 			return false;
385 		str++;
386 		pat++;
387 	}
388 	/* Check wild card */
389 	if (*pat == '*') {
390 		while (*pat == '*')
391 			pat++;
392 		if (!*pat) /* Tail wild card matches all */
393 			return true;
394 		while (*str)
395 			if (glob_matches(str++, pat))
396 				return true;
397 	}
398 	return !*str && !*pat;
399 }
400 
is_bpf_obj_file(const char * path)401 static bool is_bpf_obj_file(const char *path) {
402 	Elf64_Ehdr *ehdr;
403 	int fd, err = -EINVAL;
404 	Elf *elf = NULL;
405 
406 	fd = open(path, O_RDONLY | O_CLOEXEC);
407 	if (fd < 0)
408 		return true; /* we'll fail later and propagate error */
409 
410 	/* ensure libelf is initialized */
411 	(void)elf_version(EV_CURRENT);
412 
413 	elf = elf_begin(fd, ELF_C_READ, NULL);
414 	if (!elf)
415 		goto cleanup;
416 
417 	if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64)
418 		goto cleanup;
419 
420 	ehdr = elf64_getehdr(elf);
421 	/* Old LLVM set e_machine to EM_NONE */
422 	if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF))
423 		goto cleanup;
424 
425 	err = 0;
426 cleanup:
427 	if (elf)
428 		elf_end(elf);
429 	close(fd);
430 	return err == 0;
431 }
432 
should_process_file_prog(const char * filename,const char * prog_name)433 static bool should_process_file_prog(const char *filename, const char *prog_name)
434 {
435 	struct filter *f;
436 	int i, allow_cnt = 0;
437 
438 	for (i = 0; i < env.deny_filter_cnt; i++) {
439 		f = &env.deny_filters[i];
440 		if (f->kind != FILTER_NAME)
441 			continue;
442 
443 		if (f->any_glob && glob_matches(filename, f->any_glob))
444 			return false;
445 		if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob))
446 			return false;
447 		if (f->file_glob && glob_matches(filename, f->file_glob))
448 			return false;
449 		if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob))
450 			return false;
451 	}
452 
453 	for (i = 0; i < env.allow_filter_cnt; i++) {
454 		f = &env.allow_filters[i];
455 		if (f->kind != FILTER_NAME)
456 			continue;
457 
458 		allow_cnt++;
459 		if (f->any_glob) {
460 			if (glob_matches(filename, f->any_glob))
461 				return true;
462 			/* If we don't know program name yet, any_glob filter
463 			 * has to assume that current BPF object file might be
464 			 * relevant; we'll check again later on after opening
465 			 * BPF object file, at which point program name will
466 			 * be known finally.
467 			 */
468 			if (!prog_name || glob_matches(prog_name, f->any_glob))
469 				return true;
470 		} else {
471 			if (f->file_glob && !glob_matches(filename, f->file_glob))
472 				continue;
473 			if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob))
474 				continue;
475 			return true;
476 		}
477 	}
478 
479 	/* if there are no file/prog name allow filters, allow all progs,
480 	 * unless they are denied earlier explicitly
481 	 */
482 	return allow_cnt == 0;
483 }
484 
485 static struct {
486 	enum operator_kind op_kind;
487 	const char *op_str;
488 } operators[] = {
489 	/* Order of these definitions matter to avoid situations like '<'
490 	 * matching part of what is actually a '<>' operator. That is,
491 	 * substrings should go last.
492 	 */
493 	{ OP_EQ, "==" },
494 	{ OP_NEQ, "!=" },
495 	{ OP_NEQ, "<>" },
496 	{ OP_LE, "<=" },
497 	{ OP_LT, "<" },
498 	{ OP_GE, ">=" },
499 	{ OP_GT, ">" },
500 	{ OP_EQ, "=" },
501 };
502 
503 static bool parse_stat_id_var(const char *name, size_t len, int *id,
504 			      enum stat_variant *var, bool *is_abs);
505 
append_filter(struct filter ** filters,int * cnt,const char * str)506 static int append_filter(struct filter **filters, int *cnt, const char *str)
507 {
508 	struct filter *f;
509 	void *tmp;
510 	const char *p;
511 	int i;
512 
513 	tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
514 	if (!tmp)
515 		return -ENOMEM;
516 	*filters = tmp;
517 
518 	f = &(*filters)[*cnt];
519 	memset(f, 0, sizeof(*f));
520 
521 	/* First, let's check if it's a stats filter of the following form:
522 	 * <stat><op><value, where:
523 	 *   - <stat> is one of supported numerical stats (verdict is also
524 	 *     considered numerical, failure == 0, success == 1);
525 	 *   - <op> is comparison operator (see `operators` definitions);
526 	 *   - <value> is an integer (or failure/success, or false/true as
527 	 *     special aliases for 0 and 1, respectively).
528 	 * If the form doesn't match what user provided, we assume file/prog
529 	 * glob filter.
530 	 */
531 	for (i = 0; i < ARRAY_SIZE(operators); i++) {
532 		enum stat_variant var;
533 		int id;
534 		long val;
535 		const char *end = str;
536 		const char *op_str;
537 		bool is_abs;
538 
539 		op_str = operators[i].op_str;
540 		p = strstr(str, op_str);
541 		if (!p)
542 			continue;
543 
544 		if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) {
545 			fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
546 			return -EINVAL;
547 		}
548 		if (id >= FILE_NAME) {
549 			fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str);
550 			return -EINVAL;
551 		}
552 
553 		p += strlen(op_str);
554 
555 		if (strcasecmp(p, "true") == 0 ||
556 		    strcasecmp(p, "t") == 0 ||
557 		    strcasecmp(p, "success") == 0 ||
558 		    strcasecmp(p, "succ") == 0 ||
559 		    strcasecmp(p, "s") == 0 ||
560 		    strcasecmp(p, "match") == 0 ||
561 		    strcasecmp(p, "m") == 0) {
562 			val = 1;
563 		} else if (strcasecmp(p, "false") == 0 ||
564 			   strcasecmp(p, "f") == 0 ||
565 			   strcasecmp(p, "failure") == 0 ||
566 			   strcasecmp(p, "fail") == 0 ||
567 			   strcasecmp(p, "mismatch") == 0 ||
568 			   strcasecmp(p, "mis") == 0) {
569 			val = 0;
570 		} else {
571 			errno = 0;
572 			val = strtol(p, (char **)&end, 10);
573 			if (errno || end == p || *end != '\0' ) {
574 				fprintf(stderr, "Invalid integer value in '%s'!\n", str);
575 				return -EINVAL;
576 			}
577 		}
578 
579 		f->kind = FILTER_STAT;
580 		f->stat_id = id;
581 		f->stat_var = var;
582 		f->op = operators[i].op_kind;
583 		f->abs = true;
584 		f->value = val;
585 
586 		*cnt += 1;
587 		return 0;
588 	}
589 
590 	/* File/prog filter can be specified either as '<glob>' or
591 	 * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
592 	 * both file and program names. This seems to be way more useful in
593 	 * practice. If user needs full control, they can use '/<prog-glob>'
594 	 * form to glob just program name, or '<file-glob>/' to glob only file
595 	 * name. But usually common <glob> seems to be the most useful and
596 	 * ergonomic way.
597 	 */
598 	f->kind = FILTER_NAME;
599 	p = strchr(str, '/');
600 	if (!p) {
601 		f->any_glob = strdup(str);
602 		if (!f->any_glob)
603 			return -ENOMEM;
604 	} else {
605 		if (str != p) {
606 			/* non-empty file glob */
607 			f->file_glob = strndup(str, p - str);
608 			if (!f->file_glob)
609 				return -ENOMEM;
610 		}
611 		if (strlen(p + 1) > 0) {
612 			/* non-empty prog glob */
613 			f->prog_glob = strdup(p + 1);
614 			if (!f->prog_glob) {
615 				free(f->file_glob);
616 				f->file_glob = NULL;
617 				return -ENOMEM;
618 			}
619 		}
620 	}
621 
622 	*cnt += 1;
623 	return 0;
624 }
625 
append_filter_file(const char * path)626 static int append_filter_file(const char *path)
627 {
628 	char buf[1024];
629 	FILE *f;
630 	int err = 0;
631 
632 	f = fopen(path, "r");
633 	if (!f) {
634 		err = -errno;
635 		fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
636 		return err;
637 	}
638 
639 	while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
640 		/* lines starting with # are comments, skip them */
641 		if (buf[0] == '\0' || buf[0] == '#')
642 			continue;
643 		/* lines starting with ! are negative match filters */
644 		if (buf[0] == '!')
645 			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1);
646 		else
647 			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf);
648 		if (err)
649 			goto cleanup;
650 	}
651 
652 cleanup:
653 	fclose(f);
654 	return err;
655 }
656 
657 static const struct stat_specs default_output_spec = {
658 	.spec_cnt = 8,
659 	.ids = {
660 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
661 		TOTAL_INSNS, TOTAL_STATES, SIZE, JITED_SIZE
662 	},
663 };
664 
665 static const struct stat_specs default_csv_output_spec = {
666 	.spec_cnt = 14,
667 	.ids = {
668 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
669 		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
670 		MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
671 		SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE,
672 		STACK,
673 	},
674 };
675 
676 static const struct stat_specs default_sort_spec = {
677 	.spec_cnt = 2,
678 	.ids = {
679 		FILE_NAME, PROG_NAME,
680 	},
681 	.asc = { true, true, },
682 };
683 
684 /* sorting for comparison mode to join two data sets */
685 static const struct stat_specs join_sort_spec = {
686 	.spec_cnt = 2,
687 	.ids = {
688 		FILE_NAME, PROG_NAME,
689 	},
690 	.asc = { true, true, },
691 };
692 
693 static struct stat_def {
694 	const char *header;
695 	const char *names[4];
696 	bool asc_by_default;
697 	bool left_aligned;
698 } stat_defs[] = {
699 	[FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ },
700 	[PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ },
701 	[VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ },
702 	[DURATION] = { "Duration (us)", {"duration", "dur"}, },
703 	[TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, },
704 	[TOTAL_STATES] = { "States", {"total_states", "states"}, },
705 	[PEAK_STATES] = { "Peak states", {"peak_states"}, },
706 	[MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
707 	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
708 	[SIZE] = { "Program size", {"prog_size"}, },
709 	[JITED_SIZE] = { "Jited size", {"prog_size_jited"}, },
710 	[STACK] = {"Stack depth", {"stack_depth", "stack"}, },
711 	[PROG_TYPE] = { "Program type", {"prog_type"}, },
712 	[ATTACH_TYPE] = { "Attach type", {"attach_type", }, },
713 };
714 
parse_stat_id_var(const char * name,size_t len,int * id,enum stat_variant * var,bool * is_abs)715 static bool parse_stat_id_var(const char *name, size_t len, int *id,
716 			      enum stat_variant *var, bool *is_abs)
717 {
718 	static const char *var_sfxs[] = {
719 		[VARIANT_A] = "_a",
720 		[VARIANT_B] = "_b",
721 		[VARIANT_DIFF] = "_diff",
722 		[VARIANT_PCT] = "_pct",
723 	};
724 	int i, j, k;
725 
726 	/* |<stat>| means we take absolute value of given stat */
727 	*is_abs = false;
728 	if (len > 2 && name[0] == '|' && name[len - 1] == '|') {
729 		*is_abs = true;
730 		name += 1;
731 		len -= 2;
732 	}
733 
734 	for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
735 		struct stat_def *def = &stat_defs[i];
736 		size_t alias_len, sfx_len;
737 		const char *alias;
738 
739 		for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
740 			alias = def->names[j];
741 			if (!alias)
742 				continue;
743 
744 			alias_len = strlen(alias);
745 			if (strncmp(name, alias, alias_len) != 0)
746 				continue;
747 
748 			if (alias_len == len) {
749 				/* If no variant suffix is specified, we
750 				 * assume control group (just in case we are
751 				 * in comparison mode. Variant is ignored in
752 				 * non-comparison mode.
753 				 */
754 				*var = VARIANT_B;
755 				*id = i;
756 				return true;
757 			}
758 
759 			for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) {
760 				sfx_len = strlen(var_sfxs[k]);
761 				if (alias_len + sfx_len != len)
762 					continue;
763 
764 				if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) {
765 					*var = (enum stat_variant)k;
766 					*id = i;
767 					return true;
768 				}
769 			}
770 		}
771 	}
772 
773 	return false;
774 }
775 
is_asc_sym(char c)776 static bool is_asc_sym(char c)
777 {
778 	return c == '^';
779 }
780 
is_desc_sym(char c)781 static bool is_desc_sym(char c)
782 {
783 	return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
784 }
785 
parse_stat(const char * stat_name,struct stat_specs * specs)786 static int parse_stat(const char *stat_name, struct stat_specs *specs)
787 {
788 	int id;
789 	bool has_order = false, is_asc = false, is_abs = false;
790 	size_t len = strlen(stat_name);
791 	enum stat_variant var;
792 
793 	if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
794 		fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
795 		return -E2BIG;
796 	}
797 
798 	if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) {
799 		has_order = true;
800 		is_asc = is_asc_sym(stat_name[len - 1]);
801 		len -= 1;
802 	}
803 
804 	if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) {
805 		fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
806 		return -ESRCH;
807 	}
808 
809 	specs->ids[specs->spec_cnt] = id;
810 	specs->variants[specs->spec_cnt] = var;
811 	specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
812 	specs->abs[specs->spec_cnt] = is_abs;
813 	specs->spec_cnt++;
814 
815 	return 0;
816 }
817 
parse_stats(const char * stats_str,struct stat_specs * specs)818 static int parse_stats(const char *stats_str, struct stat_specs *specs)
819 {
820 	char *input, *state = NULL, *next;
821 	int err, cnt = 0;
822 
823 	input = strdup(stats_str);
824 	if (!input)
825 		return -ENOMEM;
826 
827 	while ((next = strtok_r(cnt++ ? NULL : input, ",", &state))) {
828 		err = parse_stat(next, specs);
829 		if (err) {
830 			free(input);
831 			return err;
832 		}
833 	}
834 
835 	free(input);
836 	return 0;
837 }
838 
free_verif_stats(struct verif_stats * stats,size_t stat_cnt)839 static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
840 {
841 	int i;
842 
843 	if (!stats)
844 		return;
845 
846 	for (i = 0; i < stat_cnt; i++) {
847 		free(stats[i].file_name);
848 		free(stats[i].prog_name);
849 	}
850 	free(stats);
851 }
852 
853 static char verif_log_buf[64 * 1024];
854 
855 #define MAX_PARSED_LOG_LINES 100
856 
parse_verif_log(char * const buf,size_t buf_sz,struct verif_stats * s)857 static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
858 {
859 	const char *cur;
860 	int pos, lines, sub_stack, cnt = 0;
861 	char *state = NULL, *token, stack[512];
862 
863 	buf[buf_sz - 1] = '\0';
864 
865 	for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) {
866 		/* find previous endline or otherwise take the start of log buf */
867 		for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) {
868 		}
869 		/* next time start from end of previous line (or pos goes to <0) */
870 		pos--;
871 		/* if we found endline, point right after endline symbol;
872 		 * otherwise, stay at the beginning of log buf
873 		 */
874 		if (cur[0] == '\n')
875 			cur++;
876 
877 		if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
878 			continue;
879 		if (5 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
880 				&s->stats[TOTAL_INSNS],
881 				&s->stats[MAX_STATES_PER_INSN],
882 				&s->stats[TOTAL_STATES],
883 				&s->stats[PEAK_STATES],
884 				&s->stats[MARK_READ_MAX_LEN]))
885 			continue;
886 
887 		if (1 == sscanf(cur, "stack depth %511s", stack))
888 			continue;
889 	}
890 	while ((token = strtok_r(cnt++ ? NULL : stack, "+", &state))) {
891 		if (sscanf(token, "%d", &sub_stack) == 0)
892 			break;
893 		s->stats[STACK] += sub_stack;
894 	}
895 	return 0;
896 }
897 
898 struct line_cnt {
899 	char *line;
900 	int cnt;
901 };
902 
str_cmp(const void * a,const void * b)903 static int str_cmp(const void *a, const void *b)
904 {
905 	const char **str1 = (const char **)a;
906 	const char **str2 = (const char **)b;
907 
908 	return strcmp(*str1, *str2);
909 }
910 
line_cnt_cmp(const void * a,const void * b)911 static int line_cnt_cmp(const void *a, const void *b)
912 {
913 	const struct line_cnt *a_cnt = (const struct line_cnt *)a;
914 	const struct line_cnt *b_cnt = (const struct line_cnt *)b;
915 
916 	if (a_cnt->cnt != b_cnt->cnt)
917 		return a_cnt->cnt > b_cnt->cnt ? -1 : 1;
918 	return strcmp(a_cnt->line, b_cnt->line);
919 }
920 
print_top_src_lines(char * const buf,size_t buf_sz,const char * prog_name)921 static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name)
922 {
923 	int lines_cap = 0;
924 	int lines_size = 0;
925 	char **lines = NULL;
926 	char *line = NULL;
927 	char *state;
928 	struct line_cnt *freq = NULL;
929 	struct line_cnt *cur;
930 	int unique_lines;
931 	int err = 0;
932 	int i;
933 
934 	while ((line = strtok_r(line ? NULL : buf, "\n", &state))) {
935 		if (strncmp(line, "; ", 2) != 0)
936 			continue;
937 		line += 2;
938 
939 		if (lines_size == lines_cap) {
940 			char **tmp;
941 
942 			lines_cap = max(16, lines_cap * 2);
943 			tmp = realloc(lines, lines_cap * sizeof(*tmp));
944 			if (!tmp) {
945 				err = -ENOMEM;
946 				goto cleanup;
947 			}
948 			lines = tmp;
949 		}
950 		lines[lines_size] = line;
951 		lines_size++;
952 	}
953 
954 	if (lines_size == 0)
955 		goto cleanup;
956 
957 	qsort(lines, lines_size, sizeof(*lines), str_cmp);
958 
959 	freq = calloc(lines_size, sizeof(*freq));
960 	if (!freq) {
961 		err = -ENOMEM;
962 		goto cleanup;
963 	}
964 
965 	cur = freq;
966 	cur->line = lines[0];
967 	cur->cnt = 1;
968 	for (i = 1; i < lines_size; ++i) {
969 		if (strcmp(lines[i], cur->line) != 0) {
970 			cur++;
971 			cur->line = lines[i];
972 			cur->cnt = 0;
973 		}
974 		cur->cnt++;
975 	}
976 	unique_lines = cur - freq + 1;
977 
978 	qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp);
979 
980 	printf("Top source lines (%s):\n", prog_name);
981 	for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
982 		const char *src_code = freq[i].line;
983 		const char *src_line = NULL;
984 		char *split = strrchr(freq[i].line, '@');
985 
986 		if (split) {
987 			src_line = split + 1;
988 
989 			while (*src_line && isspace(*src_line))
990 				src_line++;
991 
992 			while (split > src_code && isspace(*split))
993 				split--;
994 			*split = '\0';
995 		}
996 
997 		if (src_line)
998 			printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line, src_code);
999 		else
1000 			printf("%5d: %s\n", freq[i].cnt, src_code);
1001 	}
1002 	printf("\n");
1003 
1004 cleanup:
1005 	free(freq);
1006 	free(lines);
1007 	return err;
1008 }
1009 
guess_prog_type_by_ctx_name(const char * ctx_name,enum bpf_prog_type * prog_type,enum bpf_attach_type * attach_type)1010 static int guess_prog_type_by_ctx_name(const char *ctx_name,
1011 				       enum bpf_prog_type *prog_type,
1012 				       enum bpf_attach_type *attach_type)
1013 {
1014 	/* We need to guess program type based on its declared context type.
1015 	 * This guess can't be perfect as many different program types might
1016 	 * share the same context type.  So we can only hope to reasonably
1017 	 * well guess this and get lucky.
1018 	 *
1019 	 * Just in case, we support both UAPI-side type names and
1020 	 * kernel-internal names.
1021 	 */
1022 	static struct {
1023 		const char *uapi_name;
1024 		const char *kern_name;
1025 		enum bpf_prog_type prog_type;
1026 		enum bpf_attach_type attach_type;
1027 	} ctx_map[] = {
1028 		/* __sk_buff is most ambiguous, we assume TC program */
1029 		{ "__sk_buff", "sk_buff", BPF_PROG_TYPE_SCHED_CLS },
1030 		{ "bpf_sock", "sock", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND },
1031 		{ "bpf_sock_addr", "bpf_sock_addr_kern",  BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND },
1032 		{ "bpf_sock_ops", "bpf_sock_ops_kern", BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS },
1033 		{ "sk_msg_md", "sk_msg", BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT },
1034 		{ "bpf_cgroup_dev_ctx", "bpf_cgroup_dev_ctx", BPF_PROG_TYPE_CGROUP_DEVICE, BPF_CGROUP_DEVICE },
1035 		{ "bpf_sysctl", "bpf_sysctl_kern", BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL },
1036 		{ "bpf_sockopt", "bpf_sockopt_kern", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT },
1037 		{ "sk_reuseport_md", "sk_reuseport_kern", BPF_PROG_TYPE_SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE },
1038 		{ "bpf_sk_lookup", "bpf_sk_lookup_kern", BPF_PROG_TYPE_SK_LOOKUP, BPF_SK_LOOKUP },
1039 		{ "xdp_md", "xdp_buff", BPF_PROG_TYPE_XDP, BPF_XDP },
1040 		/* tracing types with no expected attach type */
1041 		{ "bpf_user_pt_regs_t", "pt_regs", BPF_PROG_TYPE_KPROBE },
1042 		{ "bpf_perf_event_data", "bpf_perf_event_data_kern", BPF_PROG_TYPE_PERF_EVENT },
1043 		/* raw_tp programs use u64[] from kernel side, we don't want
1044 		 * to match on that, probably; so NULL for kern-side type
1045 		 */
1046 		{ "bpf_raw_tracepoint_args", NULL, BPF_PROG_TYPE_RAW_TRACEPOINT },
1047 	};
1048 	int i;
1049 
1050 	if (!ctx_name)
1051 		return -EINVAL;
1052 
1053 	for (i = 0; i < ARRAY_SIZE(ctx_map); i++) {
1054 		if (strcmp(ctx_map[i].uapi_name, ctx_name) == 0 ||
1055 		    (ctx_map[i].kern_name && strcmp(ctx_map[i].kern_name, ctx_name) == 0)) {
1056 			*prog_type = ctx_map[i].prog_type;
1057 			*attach_type = ctx_map[i].attach_type;
1058 			return 0;
1059 		}
1060 	}
1061 
1062 	return -ESRCH;
1063 }
1064 
1065 /* Make sure only target program is referenced from struct_ops map,
1066  * otherwise libbpf would automatically set autocreate for all
1067  * referenced programs.
1068  * See libbpf.c:bpf_object_adjust_struct_ops_autoload.
1069  */
mask_unrelated_struct_ops_progs(struct bpf_object * obj,struct bpf_map * map,struct bpf_program * prog)1070 static void mask_unrelated_struct_ops_progs(struct bpf_object *obj,
1071 					    struct bpf_map *map,
1072 					    struct bpf_program *prog)
1073 {
1074 	struct btf *btf = bpf_object__btf(obj);
1075 	const struct btf_type *t, *mt;
1076 	struct btf_member *m;
1077 	int i, moff;
1078 	size_t data_sz, ptr_sz = sizeof(void *);
1079 	void *data;
1080 
1081 	t = btf__type_by_id(btf, bpf_map__btf_value_type_id(map));
1082 	if (!btf_is_struct(t))
1083 		return;
1084 
1085 	data = bpf_map__initial_value(map, &data_sz);
1086 	for (i = 0; i < btf_vlen(t); i++) {
1087 		m = &btf_members(t)[i];
1088 		mt = btf__type_by_id(btf, m->type);
1089 		if (!btf_is_ptr(mt))
1090 			continue;
1091 		moff = m->offset / 8;
1092 		if (moff + ptr_sz > data_sz)
1093 			continue;
1094 		if (memcmp(data + moff, &prog, ptr_sz) == 0)
1095 			continue;
1096 		memset(data + moff, 0, ptr_sz);
1097 	}
1098 }
1099 
fixup_obj(struct bpf_object * obj,struct bpf_program * prog,const char * filename)1100 static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename)
1101 {
1102 	struct bpf_map *map;
1103 
1104 	bpf_object__for_each_map(map, obj) {
1105 		/* disable pinning */
1106 		bpf_map__set_pin_path(map, NULL);
1107 
1108 		/* fix up map size, if necessary */
1109 		switch (bpf_map__type(map)) {
1110 		case BPF_MAP_TYPE_SK_STORAGE:
1111 		case BPF_MAP_TYPE_TASK_STORAGE:
1112 		case BPF_MAP_TYPE_INODE_STORAGE:
1113 		case BPF_MAP_TYPE_CGROUP_STORAGE:
1114 			break;
1115 		case BPF_MAP_TYPE_STRUCT_OPS:
1116 			mask_unrelated_struct_ops_progs(obj, map, prog);
1117 			break;
1118 		default:
1119 			if (bpf_map__max_entries(map) == 0)
1120 				bpf_map__set_max_entries(map, 1);
1121 		}
1122 	}
1123 
1124 	/* SEC(freplace) programs can't be loaded with veristat as is,
1125 	 * but we can try guessing their target program's expected type by
1126 	 * looking at the type of program's first argument and substituting
1127 	 * corresponding program type
1128 	 */
1129 	if (bpf_program__type(prog) == BPF_PROG_TYPE_EXT) {
1130 		const struct btf *btf = bpf_object__btf(obj);
1131 		const char *prog_name = bpf_program__name(prog);
1132 		enum bpf_prog_type prog_type;
1133 		enum bpf_attach_type attach_type;
1134 		const struct btf_type *t;
1135 		const char *ctx_name;
1136 		int id;
1137 
1138 		if (!btf)
1139 			goto skip_freplace_fixup;
1140 
1141 		id = btf__find_by_name_kind(btf, prog_name, BTF_KIND_FUNC);
1142 		t = btf__type_by_id(btf, id);
1143 		t = btf__type_by_id(btf, t->type);
1144 		if (!btf_is_func_proto(t) || btf_vlen(t) != 1)
1145 			goto skip_freplace_fixup;
1146 
1147 		/* context argument is a pointer to a struct/typedef */
1148 		t = btf__type_by_id(btf, btf_params(t)[0].type);
1149 		while (t && btf_is_mod(t))
1150 			t = btf__type_by_id(btf, t->type);
1151 		if (!t || !btf_is_ptr(t))
1152 			goto skip_freplace_fixup;
1153 		t = btf__type_by_id(btf, t->type);
1154 		while (t && btf_is_mod(t))
1155 			t = btf__type_by_id(btf, t->type);
1156 		if (!t)
1157 			goto skip_freplace_fixup;
1158 
1159 		ctx_name = btf__name_by_offset(btf, t->name_off);
1160 
1161 		if (guess_prog_type_by_ctx_name(ctx_name, &prog_type, &attach_type) == 0) {
1162 			bpf_program__set_type(prog, prog_type);
1163 			bpf_program__set_expected_attach_type(prog, attach_type);
1164 
1165 			if (!env.quiet) {
1166 				printf("Using guessed program type '%s' for %s/%s...\n",
1167 					libbpf_bpf_prog_type_str(prog_type),
1168 					filename, prog_name);
1169 			}
1170 		} else {
1171 			if (!env.quiet) {
1172 				printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
1173 					ctx_name, filename, prog_name);
1174 			}
1175 		}
1176 	}
1177 skip_freplace_fixup:
1178 	return;
1179 }
1180 
max_verifier_log_size(void)1181 static int max_verifier_log_size(void)
1182 {
1183 	const int SMALL_LOG_SIZE = UINT_MAX >> 8;
1184 	const int BIG_LOG_SIZE = UINT_MAX >> 2;
1185 	struct bpf_insn insns[] = {
1186 		{ .code = BPF_ALU | BPF_MOV | BPF_X, .dst_reg = BPF_REG_0, },
1187 		{ .code  = BPF_JMP | BPF_EXIT, },
1188 	};
1189 	LIBBPF_OPTS(bpf_prog_load_opts, opts,
1190 		    .log_size = BIG_LOG_SIZE,
1191 		    .log_buf = (void *)-1,
1192 		    .log_level = 4
1193 	);
1194 	int ret, insn_cnt = ARRAY_SIZE(insns);
1195 	static int log_size;
1196 
1197 	if (log_size != 0)
1198 		return log_size;
1199 
1200 	ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
1201 
1202 	if (ret == -EFAULT)
1203 		log_size = BIG_LOG_SIZE;
1204 	else /* ret == -EINVAL, big log size is not supported by the verifier */
1205 		log_size = SMALL_LOG_SIZE;
1206 
1207 	return log_size;
1208 }
1209 
process_prog(const char * filename,struct bpf_object * obj,struct bpf_program * prog)1210 static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
1211 {
1212 	const char *base_filename = basename(strdupa(filename));
1213 	const char *prog_name = bpf_program__name(prog);
1214 	char *buf;
1215 	int buf_sz, log_level;
1216 	struct verif_stats *stats;
1217 	struct bpf_prog_info info;
1218 	__u32 info_len = sizeof(info);
1219 	int err = 0;
1220 	void *tmp;
1221 	int fd;
1222 
1223 	if (!should_process_file_prog(base_filename, bpf_program__name(prog))) {
1224 		env.progs_skipped++;
1225 		return 0;
1226 	}
1227 
1228 	tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
1229 	if (!tmp)
1230 		return -ENOMEM;
1231 	env.prog_stats = tmp;
1232 	stats = &env.prog_stats[env.prog_stat_cnt++];
1233 	memset(stats, 0, sizeof(*stats));
1234 
1235 	if (env.verbose || env.top_src_lines > 0) {
1236 		buf_sz = env.log_size ? env.log_size : max_verifier_log_size();
1237 		buf = malloc(buf_sz);
1238 		if (!buf)
1239 			return -ENOMEM;
1240 		/* ensure we always request stats */
1241 		log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0);
1242 		/* --top-src-lines needs verifier log */
1243 		if (env.top_src_lines > 0 && env.log_level == 0)
1244 			log_level |= 2;
1245 	} else {
1246 		buf = verif_log_buf;
1247 		buf_sz = sizeof(verif_log_buf);
1248 		/* request only verifier stats */
1249 		log_level = 4 | (env.log_fixed ? 8 : 0);
1250 	}
1251 	verif_log_buf[0] = '\0';
1252 
1253 	bpf_program__set_log_buf(prog, buf, buf_sz);
1254 	bpf_program__set_log_level(prog, log_level);
1255 
1256 	/* increase chances of successful BPF object loading */
1257 	fixup_obj(obj, prog, base_filename);
1258 
1259 	if (env.force_checkpoints)
1260 		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
1261 	if (env.force_reg_invariants)
1262 		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
1263 
1264 	err = bpf_object__load(obj);
1265 	env.progs_processed++;
1266 
1267 	stats->file_name = strdup(base_filename);
1268 	stats->prog_name = strdup(bpf_program__name(prog));
1269 	stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
1270 	stats->stats[SIZE] = bpf_program__insn_cnt(prog);
1271 	stats->stats[PROG_TYPE] = bpf_program__type(prog);
1272 	stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog);
1273 
1274 	memset(&info, 0, info_len);
1275 	fd = bpf_program__fd(prog);
1276 	if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0)
1277 		stats->stats[JITED_SIZE] = info.jited_prog_len;
1278 
1279 	parse_verif_log(buf, buf_sz, stats);
1280 
1281 	if (env.verbose) {
1282 		printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
1283 		       filename, prog_name, stats->stats[DURATION],
1284 		       err ? "failure" : "success", buf);
1285 	}
1286 	if (env.top_src_lines > 0)
1287 		print_top_src_lines(buf, buf_sz, stats->prog_name);
1288 
1289 	if (verif_log_buf != buf)
1290 		free(buf);
1291 
1292 	return 0;
1293 };
1294 
process_obj(const char * filename)1295 static int process_obj(const char *filename)
1296 {
1297 	const char *base_filename = basename(strdupa(filename));
1298 	struct bpf_object *obj = NULL, *tobj;
1299 	struct bpf_program *prog, *tprog, *lprog;
1300 	libbpf_print_fn_t old_libbpf_print_fn;
1301 	LIBBPF_OPTS(bpf_object_open_opts, opts);
1302 	int err = 0, prog_cnt = 0;
1303 
1304 	if (!should_process_file_prog(base_filename, NULL)) {
1305 		if (env.verbose)
1306 			printf("Skipping '%s' due to filters...\n", filename);
1307 		env.files_skipped++;
1308 		return 0;
1309 	}
1310 	if (!is_bpf_obj_file(filename)) {
1311 		if (env.verbose)
1312 			printf("Skipping '%s' as it's not a BPF object file...\n", filename);
1313 		env.files_skipped++;
1314 		return 0;
1315 	}
1316 
1317 	if (!env.quiet && env.out_fmt == RESFMT_TABLE)
1318 		printf("Processing '%s'...\n", base_filename);
1319 
1320 	old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
1321 	obj = bpf_object__open_file(filename, &opts);
1322 	if (!obj) {
1323 		/* if libbpf can't open BPF object file, it could be because
1324 		 * that BPF object file is incomplete and has to be statically
1325 		 * linked into a final BPF object file; instead of bailing
1326 		 * out, report it into stderr, mark it as skipped, and
1327 		 * proceed
1328 		 */
1329 		fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
1330 		env.files_skipped++;
1331 		err = 0;
1332 		goto cleanup;
1333 	}
1334 
1335 	env.files_processed++;
1336 
1337 	bpf_object__for_each_program(prog, obj) {
1338 		prog_cnt++;
1339 	}
1340 
1341 	if (prog_cnt == 1) {
1342 		prog = bpf_object__next_program(obj, NULL);
1343 		bpf_program__set_autoload(prog, true);
1344 		process_prog(filename, obj, prog);
1345 		goto cleanup;
1346 	}
1347 
1348 	bpf_object__for_each_program(prog, obj) {
1349 		const char *prog_name = bpf_program__name(prog);
1350 
1351 		tobj = bpf_object__open_file(filename, &opts);
1352 		if (!tobj) {
1353 			err = -errno;
1354 			fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1355 			goto cleanup;
1356 		}
1357 
1358 		lprog = NULL;
1359 		bpf_object__for_each_program(tprog, tobj) {
1360 			const char *tprog_name = bpf_program__name(tprog);
1361 
1362 			if (strcmp(prog_name, tprog_name) == 0) {
1363 				bpf_program__set_autoload(tprog, true);
1364 				lprog = tprog;
1365 			} else {
1366 				bpf_program__set_autoload(tprog, false);
1367 			}
1368 		}
1369 
1370 		process_prog(filename, tobj, lprog);
1371 		bpf_object__close(tobj);
1372 	}
1373 
1374 cleanup:
1375 	bpf_object__close(obj);
1376 	libbpf_set_print(old_libbpf_print_fn);
1377 	return err;
1378 }
1379 
cmp_stat(const struct verif_stats * s1,const struct verif_stats * s2,enum stat_id id,bool asc,bool abs)1380 static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
1381 		    enum stat_id id, bool asc, bool abs)
1382 {
1383 	int cmp = 0;
1384 
1385 	switch (id) {
1386 	case FILE_NAME:
1387 		cmp = strcmp(s1->file_name, s2->file_name);
1388 		break;
1389 	case PROG_NAME:
1390 		cmp = strcmp(s1->prog_name, s2->prog_name);
1391 		break;
1392 	case ATTACH_TYPE:
1393 	case PROG_TYPE:
1394 	case SIZE:
1395 	case JITED_SIZE:
1396 	case STACK:
1397 	case VERDICT:
1398 	case DURATION:
1399 	case TOTAL_INSNS:
1400 	case TOTAL_STATES:
1401 	case PEAK_STATES:
1402 	case MAX_STATES_PER_INSN:
1403 	case MARK_READ_MAX_LEN: {
1404 		long v1 = s1->stats[id];
1405 		long v2 = s2->stats[id];
1406 
1407 		if (abs) {
1408 			v1 = v1 < 0 ? -v1 : v1;
1409 			v2 = v2 < 0 ? -v2 : v2;
1410 		}
1411 
1412 		if (v1 != v2)
1413 			cmp = v1 < v2 ? -1 : 1;
1414 		break;
1415 	}
1416 	default:
1417 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1418 		exit(1);
1419 	}
1420 
1421 	return asc ? cmp : -cmp;
1422 }
1423 
cmp_prog_stats(const void * v1,const void * v2)1424 static int cmp_prog_stats(const void *v1, const void *v2)
1425 {
1426 	const struct verif_stats *s1 = v1, *s2 = v2;
1427 	int i, cmp;
1428 
1429 	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1430 		cmp = cmp_stat(s1, s2, env.sort_spec.ids[i],
1431 			       env.sort_spec.asc[i], env.sort_spec.abs[i]);
1432 		if (cmp != 0)
1433 			return cmp;
1434 	}
1435 
1436 	/* always disambiguate with file+prog, which are unique */
1437 	cmp = strcmp(s1->file_name, s2->file_name);
1438 	if (cmp != 0)
1439 		return cmp;
1440 	return strcmp(s1->prog_name, s2->prog_name);
1441 }
1442 
fetch_join_stat_value(const struct verif_stats_join * s,enum stat_id id,enum stat_variant var,const char ** str_val,double * num_val)1443 static void fetch_join_stat_value(const struct verif_stats_join *s,
1444 				  enum stat_id id, enum stat_variant var,
1445 				  const char **str_val,
1446 				  double *num_val)
1447 {
1448 	long v1, v2;
1449 
1450 	if (id == FILE_NAME) {
1451 		*str_val = s->file_name;
1452 		return;
1453 	}
1454 	if (id == PROG_NAME) {
1455 		*str_val = s->prog_name;
1456 		return;
1457 	}
1458 
1459 	v1 = s->stats_a ? s->stats_a->stats[id] : 0;
1460 	v2 = s->stats_b ? s->stats_b->stats[id] : 0;
1461 
1462 	switch (var) {
1463 	case VARIANT_A:
1464 		if (!s->stats_a)
1465 			*num_val = -DBL_MAX;
1466 		else
1467 			*num_val = s->stats_a->stats[id];
1468 		return;
1469 	case VARIANT_B:
1470 		if (!s->stats_b)
1471 			*num_val = -DBL_MAX;
1472 		else
1473 			*num_val = s->stats_b->stats[id];
1474 		return;
1475 	case VARIANT_DIFF:
1476 		if (!s->stats_a || !s->stats_b)
1477 			*num_val = -DBL_MAX;
1478 		else if (id == VERDICT)
1479 			*num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */;
1480 		else
1481 			*num_val = (double)(v2 - v1);
1482 		return;
1483 	case VARIANT_PCT:
1484 		if (!s->stats_a || !s->stats_b) {
1485 			*num_val = -DBL_MAX;
1486 		} else if (v1 == 0) {
1487 			if (v1 == v2)
1488 				*num_val = 0.0;
1489 			else
1490 				*num_val = v2 < v1 ? -100.0 : 100.0;
1491 		} else {
1492 			 *num_val = (v2 - v1) * 100.0 / v1;
1493 		}
1494 		return;
1495 	}
1496 }
1497 
cmp_join_stat(const struct verif_stats_join * s1,const struct verif_stats_join * s2,enum stat_id id,enum stat_variant var,bool asc,bool abs)1498 static int cmp_join_stat(const struct verif_stats_join *s1,
1499 			 const struct verif_stats_join *s2,
1500 			 enum stat_id id, enum stat_variant var,
1501 			 bool asc, bool abs)
1502 {
1503 	const char *str1 = NULL, *str2 = NULL;
1504 	double v1 = 0.0, v2 = 0.0;
1505 	int cmp = 0;
1506 
1507 	fetch_join_stat_value(s1, id, var, &str1, &v1);
1508 	fetch_join_stat_value(s2, id, var, &str2, &v2);
1509 
1510 	if (abs) {
1511 		v1 = fabs(v1);
1512 		v2 = fabs(v2);
1513 	}
1514 
1515 	if (str1)
1516 		cmp = strcmp(str1, str2);
1517 	else if (v1 != v2)
1518 		cmp = v1 < v2 ? -1 : 1;
1519 
1520 	return asc ? cmp : -cmp;
1521 }
1522 
cmp_join_stats(const void * v1,const void * v2)1523 static int cmp_join_stats(const void *v1, const void *v2)
1524 {
1525 	const struct verif_stats_join *s1 = v1, *s2 = v2;
1526 	int i, cmp;
1527 
1528 	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1529 		cmp = cmp_join_stat(s1, s2,
1530 				    env.sort_spec.ids[i],
1531 				    env.sort_spec.variants[i],
1532 				    env.sort_spec.asc[i],
1533 				    env.sort_spec.abs[i]);
1534 		if (cmp != 0)
1535 			return cmp;
1536 	}
1537 
1538 	/* always disambiguate with file+prog, which are unique */
1539 	cmp = strcmp(s1->file_name, s2->file_name);
1540 	if (cmp != 0)
1541 		return cmp;
1542 	return strcmp(s1->prog_name, s2->prog_name);
1543 }
1544 
1545 #define HEADER_CHAR '-'
1546 #define COLUMN_SEP "  "
1547 
output_header_underlines(void)1548 static void output_header_underlines(void)
1549 {
1550 	int i, j, len;
1551 
1552 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1553 		len = env.output_spec.lens[i];
1554 
1555 		printf("%s", i == 0 ? "" : COLUMN_SEP);
1556 		for (j = 0; j < len; j++)
1557 			printf("%c", HEADER_CHAR);
1558 	}
1559 	printf("\n");
1560 }
1561 
output_headers(enum resfmt fmt)1562 static void output_headers(enum resfmt fmt)
1563 {
1564 	const char *fmt_str;
1565 	int i, len;
1566 
1567 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1568 		int id = env.output_spec.ids[i];
1569 		int *max_len = &env.output_spec.lens[i];
1570 
1571 		switch (fmt) {
1572 		case RESFMT_TABLE_CALCLEN:
1573 			len = snprintf(NULL, 0, "%s", stat_defs[id].header);
1574 			if (len > *max_len)
1575 				*max_len = len;
1576 			break;
1577 		case RESFMT_TABLE:
1578 			fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s";
1579 			printf(fmt_str, i == 0 ? "" : COLUMN_SEP,  *max_len, stat_defs[id].header);
1580 			if (i == env.output_spec.spec_cnt - 1)
1581 				printf("\n");
1582 			break;
1583 		case RESFMT_CSV:
1584 			printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]);
1585 			if (i == env.output_spec.spec_cnt - 1)
1586 				printf("\n");
1587 			break;
1588 		}
1589 	}
1590 
1591 	if (fmt == RESFMT_TABLE)
1592 		output_header_underlines();
1593 }
1594 
prepare_value(const struct verif_stats * s,enum stat_id id,const char ** str,long * val)1595 static void prepare_value(const struct verif_stats *s, enum stat_id id,
1596 			  const char **str, long *val)
1597 {
1598 	switch (id) {
1599 	case FILE_NAME:
1600 		*str = s ? s->file_name : "N/A";
1601 		break;
1602 	case PROG_NAME:
1603 		*str = s ? s->prog_name : "N/A";
1604 		break;
1605 	case VERDICT:
1606 		if (!s)
1607 			*str = "N/A";
1608 		else
1609 			*str = s->stats[VERDICT] ? "success" : "failure";
1610 		break;
1611 	case ATTACH_TYPE:
1612 		if (!s)
1613 			*str = "N/A";
1614 		else
1615 			*str = libbpf_bpf_attach_type_str(s->stats[ATTACH_TYPE]) ?: "N/A";
1616 		break;
1617 	case PROG_TYPE:
1618 		if (!s)
1619 			*str = "N/A";
1620 		else
1621 			*str = libbpf_bpf_prog_type_str(s->stats[PROG_TYPE]) ?: "N/A";
1622 		break;
1623 	case DURATION:
1624 	case TOTAL_INSNS:
1625 	case TOTAL_STATES:
1626 	case PEAK_STATES:
1627 	case MAX_STATES_PER_INSN:
1628 	case MARK_READ_MAX_LEN:
1629 	case STACK:
1630 	case SIZE:
1631 	case JITED_SIZE:
1632 		*val = s ? s->stats[id] : 0;
1633 		break;
1634 	default:
1635 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1636 		exit(1);
1637 	}
1638 }
1639 
output_stats(const struct verif_stats * s,enum resfmt fmt,bool last)1640 static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
1641 {
1642 	int i;
1643 
1644 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1645 		int id = env.output_spec.ids[i];
1646 		int *max_len = &env.output_spec.lens[i], len;
1647 		const char *str = NULL;
1648 		long val = 0;
1649 
1650 		prepare_value(s, id, &str, &val);
1651 
1652 		switch (fmt) {
1653 		case RESFMT_TABLE_CALCLEN:
1654 			if (str)
1655 				len = snprintf(NULL, 0, "%s", str);
1656 			else
1657 				len = snprintf(NULL, 0, "%ld", val);
1658 			if (len > *max_len)
1659 				*max_len = len;
1660 			break;
1661 		case RESFMT_TABLE:
1662 			if (str)
1663 				printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
1664 			else
1665 				printf("%s%*ld", i == 0 ? "" : COLUMN_SEP,  *max_len, val);
1666 			if (i == env.output_spec.spec_cnt - 1)
1667 				printf("\n");
1668 			break;
1669 		case RESFMT_CSV:
1670 			if (str)
1671 				printf("%s%s", i == 0 ? "" : ",", str);
1672 			else
1673 				printf("%s%ld", i == 0 ? "" : ",", val);
1674 			if (i == env.output_spec.spec_cnt - 1)
1675 				printf("\n");
1676 			break;
1677 		}
1678 	}
1679 
1680 	if (last && fmt == RESFMT_TABLE) {
1681 		output_header_underlines();
1682 		printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
1683 		       env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
1684 	}
1685 }
1686 
parse_stat_value(const char * str,enum stat_id id,struct verif_stats * st)1687 static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
1688 {
1689 	switch (id) {
1690 	case FILE_NAME:
1691 		st->file_name = strdup(str);
1692 		if (!st->file_name)
1693 			return -ENOMEM;
1694 		break;
1695 	case PROG_NAME:
1696 		st->prog_name = strdup(str);
1697 		if (!st->prog_name)
1698 			return -ENOMEM;
1699 		break;
1700 	case VERDICT:
1701 		if (strcmp(str, "success") == 0) {
1702 			st->stats[VERDICT] = true;
1703 		} else if (strcmp(str, "failure") == 0) {
1704 			st->stats[VERDICT] = false;
1705 		} else {
1706 			fprintf(stderr, "Unrecognized verification verdict '%s'\n", str);
1707 			return -EINVAL;
1708 		}
1709 		break;
1710 	case DURATION:
1711 	case TOTAL_INSNS:
1712 	case TOTAL_STATES:
1713 	case PEAK_STATES:
1714 	case MAX_STATES_PER_INSN:
1715 	case MARK_READ_MAX_LEN:
1716 	case SIZE:
1717 	case JITED_SIZE:
1718 	case STACK: {
1719 		long val;
1720 		int err, n;
1721 
1722 		if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) {
1723 			err = -errno;
1724 			fprintf(stderr, "Failed to parse '%s' as integer\n", str);
1725 			return err;
1726 		}
1727 
1728 		st->stats[id] = val;
1729 		break;
1730 	}
1731 	case PROG_TYPE: {
1732 		enum bpf_prog_type prog_type = 0;
1733 		const char *type;
1734 
1735 		while ((type = libbpf_bpf_prog_type_str(prog_type)))  {
1736 			if (strcmp(type, str) == 0) {
1737 				st->stats[id] = prog_type;
1738 				break;
1739 			}
1740 			prog_type++;
1741 		}
1742 
1743 		if (!type) {
1744 			fprintf(stderr, "Unrecognized prog type %s\n", str);
1745 			return -EINVAL;
1746 		}
1747 		break;
1748 	}
1749 	case ATTACH_TYPE: {
1750 		enum bpf_attach_type attach_type = 0;
1751 		const char *type;
1752 
1753 		while ((type = libbpf_bpf_attach_type_str(attach_type)))  {
1754 			if (strcmp(type, str) == 0) {
1755 				st->stats[id] = attach_type;
1756 				break;
1757 			}
1758 			attach_type++;
1759 		}
1760 
1761 		if (!type) {
1762 			fprintf(stderr, "Unrecognized attach type %s\n", str);
1763 			return -EINVAL;
1764 		}
1765 		break;
1766 	}
1767 	default:
1768 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1769 		return -EINVAL;
1770 	}
1771 	return 0;
1772 }
1773 
parse_stats_csv(const char * filename,struct stat_specs * specs,struct verif_stats ** statsp,int * stat_cntp)1774 static int parse_stats_csv(const char *filename, struct stat_specs *specs,
1775 			   struct verif_stats **statsp, int *stat_cntp)
1776 {
1777 	char line[4096];
1778 	FILE *f;
1779 	int err = 0;
1780 	bool header = true;
1781 
1782 	f = fopen(filename, "r");
1783 	if (!f) {
1784 		err = -errno;
1785 		fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1786 		return err;
1787 	}
1788 
1789 	*stat_cntp = 0;
1790 
1791 	while (fgets(line, sizeof(line), f)) {
1792 		char *input = line, *state = NULL, *next;
1793 		struct verif_stats *st = NULL;
1794 		int col = 0, cnt = 0;
1795 
1796 		if (!header) {
1797 			void *tmp;
1798 
1799 			tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp));
1800 			if (!tmp) {
1801 				err = -ENOMEM;
1802 				goto cleanup;
1803 			}
1804 			*statsp = tmp;
1805 
1806 			st = &(*statsp)[*stat_cntp];
1807 			memset(st, 0, sizeof(*st));
1808 
1809 			*stat_cntp += 1;
1810 		}
1811 
1812 		while ((next = strtok_r(cnt++ ? NULL : input, ",\n", &state))) {
1813 			if (header) {
1814 				/* for the first line, set up spec stats */
1815 				err = parse_stat(next, specs);
1816 				if (err)
1817 					goto cleanup;
1818 				continue;
1819 			}
1820 
1821 			/* for all other lines, parse values based on spec */
1822 			if (col >= specs->spec_cnt) {
1823 				fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n",
1824 					col, *stat_cntp, filename);
1825 				err = -EINVAL;
1826 				goto cleanup;
1827 			}
1828 			err = parse_stat_value(next, specs->ids[col], st);
1829 			if (err)
1830 				goto cleanup;
1831 			col++;
1832 		}
1833 
1834 		if (header) {
1835 			header = false;
1836 			continue;
1837 		}
1838 
1839 		if (col < specs->spec_cnt) {
1840 			fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
1841 				*stat_cntp, filename);
1842 			err = -EINVAL;
1843 			goto cleanup;
1844 		}
1845 
1846 		if (!st->file_name || !st->prog_name) {
1847 			fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n",
1848 				*stat_cntp, filename);
1849 			err = -EINVAL;
1850 			goto cleanup;
1851 		}
1852 
1853 		/* in comparison mode we can only check filters after we
1854 		 * parsed entire line; if row should be ignored we pretend we
1855 		 * never parsed it
1856 		 */
1857 		if (!should_process_file_prog(st->file_name, st->prog_name)) {
1858 			free(st->file_name);
1859 			free(st->prog_name);
1860 			*stat_cntp -= 1;
1861 		}
1862 	}
1863 
1864 	if (!feof(f)) {
1865 		err = -errno;
1866 		fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err);
1867 	}
1868 
1869 cleanup:
1870 	fclose(f);
1871 	return err;
1872 }
1873 
1874 /* empty/zero stats for mismatched rows */
1875 static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" };
1876 
is_key_stat(enum stat_id id)1877 static bool is_key_stat(enum stat_id id)
1878 {
1879 	return id == FILE_NAME || id == PROG_NAME;
1880 }
1881 
output_comp_header_underlines(void)1882 static void output_comp_header_underlines(void)
1883 {
1884 	int i, j, k;
1885 
1886 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1887 		int id = env.output_spec.ids[i];
1888 		int max_j = is_key_stat(id) ? 1 : 3;
1889 
1890 		for (j = 0; j < max_j; j++) {
1891 			int len = env.output_spec.lens[3 * i + j];
1892 
1893 			printf("%s", i + j == 0 ? "" : COLUMN_SEP);
1894 
1895 			for (k = 0; k < len; k++)
1896 				printf("%c", HEADER_CHAR);
1897 		}
1898 	}
1899 	printf("\n");
1900 }
1901 
output_comp_headers(enum resfmt fmt)1902 static void output_comp_headers(enum resfmt fmt)
1903 {
1904 	static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"};
1905 	static const char *name_sfxs[3] = {"_base", "_comp", "_diff"};
1906 	int i, j, len;
1907 
1908 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1909 		int id = env.output_spec.ids[i];
1910 		/* key stats don't have A/B/DIFF columns, they are common for both data sets */
1911 		int max_j = is_key_stat(id) ? 1 : 3;
1912 
1913 		for (j = 0; j < max_j; j++) {
1914 			int *max_len = &env.output_spec.lens[3 * i + j];
1915 			bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1);
1916 			const char *sfx;
1917 
1918 			switch (fmt) {
1919 			case RESFMT_TABLE_CALCLEN:
1920 				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1921 				len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx);
1922 				if (len > *max_len)
1923 					*max_len = len;
1924 				break;
1925 			case RESFMT_TABLE:
1926 				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1927 				printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP,
1928 				       *max_len - (int)strlen(sfx), stat_defs[id].header, sfx);
1929 				if (last)
1930 					printf("\n");
1931 				break;
1932 			case RESFMT_CSV:
1933 				sfx = is_key_stat(id) ? "" : name_sfxs[j];
1934 				printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx);
1935 				if (last)
1936 					printf("\n");
1937 				break;
1938 			}
1939 		}
1940 	}
1941 
1942 	if (fmt == RESFMT_TABLE)
1943 		output_comp_header_underlines();
1944 }
1945 
output_comp_stats(const struct verif_stats_join * join_stats,enum resfmt fmt,bool last)1946 static void output_comp_stats(const struct verif_stats_join *join_stats,
1947 			      enum resfmt fmt, bool last)
1948 {
1949 	const struct verif_stats *base = join_stats->stats_a;
1950 	const struct verif_stats *comp = join_stats->stats_b;
1951 	char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
1952 	int i;
1953 
1954 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1955 		int id = env.output_spec.ids[i], len;
1956 		int *max_len_base = &env.output_spec.lens[3 * i + 0];
1957 		int *max_len_comp = &env.output_spec.lens[3 * i + 1];
1958 		int *max_len_diff = &env.output_spec.lens[3 * i + 2];
1959 		const char *base_str = NULL, *comp_str = NULL;
1960 		long base_val = 0, comp_val = 0, diff_val = 0;
1961 
1962 		prepare_value(base, id, &base_str, &base_val);
1963 		prepare_value(comp, id, &comp_str, &comp_val);
1964 
1965 		/* normalize all the outputs to be in string buffers for simplicity */
1966 		if (is_key_stat(id)) {
1967 			/* key stats (file and program name) are always strings */
1968 			if (base)
1969 				snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1970 			else
1971 				snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
1972 		} else if (base_str) {
1973 			snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1974 			snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
1975 			if (!base || !comp)
1976 				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1977 			else if (strcmp(base_str, comp_str) == 0)
1978 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
1979 			else
1980 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
1981 		} else {
1982 			double p = 0.0;
1983 
1984 			if (base)
1985 				snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
1986 			else
1987 				snprintf(base_buf, sizeof(base_buf), "%s", "N/A");
1988 			if (comp)
1989 				snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
1990 			else
1991 				snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A");
1992 
1993 			diff_val = comp_val - base_val;
1994 			if (!base || !comp) {
1995 				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1996 			} else {
1997 				if (base_val == 0) {
1998 					if (comp_val == base_val)
1999 						p = 0.0; /* avoid +0 (+100%) case */
2000 					else
2001 						p = comp_val < base_val ? -100.0 : 100.0;
2002 				} else {
2003 					 p = diff_val * 100.0 / base_val;
2004 				}
2005 				snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
2006 			}
2007 		}
2008 
2009 		switch (fmt) {
2010 		case RESFMT_TABLE_CALCLEN:
2011 			len = strlen(base_buf);
2012 			if (len > *max_len_base)
2013 				*max_len_base = len;
2014 			if (!is_key_stat(id)) {
2015 				len = strlen(comp_buf);
2016 				if (len > *max_len_comp)
2017 					*max_len_comp = len;
2018 				len = strlen(diff_buf);
2019 				if (len > *max_len_diff)
2020 					*max_len_diff = len;
2021 			}
2022 			break;
2023 		case RESFMT_TABLE: {
2024 			/* string outputs are left-aligned, number outputs are right-aligned */
2025 			const char *fmt = base_str ? "%s%-*s" : "%s%*s";
2026 
2027 			printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf);
2028 			if (!is_key_stat(id)) {
2029 				printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf);
2030 				printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf);
2031 			}
2032 			if (i == env.output_spec.spec_cnt - 1)
2033 				printf("\n");
2034 			break;
2035 		}
2036 		case RESFMT_CSV:
2037 			printf("%s%s", i == 0 ? "" : ",", base_buf);
2038 			if (!is_key_stat(id)) {
2039 				printf("%s%s", i == 0 ? "" : ",", comp_buf);
2040 				printf("%s%s", i == 0 ? "" : ",", diff_buf);
2041 			}
2042 			if (i == env.output_spec.spec_cnt - 1)
2043 				printf("\n");
2044 			break;
2045 		}
2046 	}
2047 
2048 	if (last && fmt == RESFMT_TABLE)
2049 		output_comp_header_underlines();
2050 }
2051 
cmp_stats_key(const struct verif_stats * base,const struct verif_stats * comp)2052 static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp)
2053 {
2054 	int r;
2055 
2056 	r = strcmp(base->file_name, comp->file_name);
2057 	if (r != 0)
2058 		return r;
2059 	return strcmp(base->prog_name, comp->prog_name);
2060 }
2061 
is_join_stat_filter_matched(struct filter * f,const struct verif_stats_join * stats)2062 static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats)
2063 {
2064 	static const double eps = 1e-9;
2065 	const char *str = NULL;
2066 	double value = 0.0;
2067 
2068 	fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
2069 
2070 	if (f->abs)
2071 		value = fabs(value);
2072 
2073 	switch (f->op) {
2074 	case OP_EQ: return value > f->value - eps && value < f->value + eps;
2075 	case OP_NEQ: return value < f->value - eps || value > f->value + eps;
2076 	case OP_LT: return value < f->value - eps;
2077 	case OP_LE: return value <= f->value + eps;
2078 	case OP_GT: return value > f->value + eps;
2079 	case OP_GE: return value >= f->value - eps;
2080 	}
2081 
2082 	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
2083 	return false;
2084 }
2085 
should_output_join_stats(const struct verif_stats_join * stats)2086 static bool should_output_join_stats(const struct verif_stats_join *stats)
2087 {
2088 	struct filter *f;
2089 	int i, allow_cnt = 0;
2090 
2091 	for (i = 0; i < env.deny_filter_cnt; i++) {
2092 		f = &env.deny_filters[i];
2093 		if (f->kind != FILTER_STAT)
2094 			continue;
2095 
2096 		if (is_join_stat_filter_matched(f, stats))
2097 			return false;
2098 	}
2099 
2100 	for (i = 0; i < env.allow_filter_cnt; i++) {
2101 		f = &env.allow_filters[i];
2102 		if (f->kind != FILTER_STAT)
2103 			continue;
2104 		allow_cnt++;
2105 
2106 		if (is_join_stat_filter_matched(f, stats))
2107 			return true;
2108 	}
2109 
2110 	/* if there are no stat allowed filters, pass everything through */
2111 	return allow_cnt == 0;
2112 }
2113 
handle_comparison_mode(void)2114 static int handle_comparison_mode(void)
2115 {
2116 	struct stat_specs base_specs = {}, comp_specs = {};
2117 	struct stat_specs tmp_sort_spec;
2118 	enum resfmt cur_fmt;
2119 	int err, i, j, last_idx, cnt;
2120 
2121 	if (env.filename_cnt != 2) {
2122 		fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
2123 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2124 		return -EINVAL;
2125 	}
2126 
2127 	err = parse_stats_csv(env.filenames[0], &base_specs,
2128 			      &env.baseline_stats, &env.baseline_stat_cnt);
2129 	if (err) {
2130 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
2131 		return err;
2132 	}
2133 	err = parse_stats_csv(env.filenames[1], &comp_specs,
2134 			      &env.prog_stats, &env.prog_stat_cnt);
2135 	if (err) {
2136 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err);
2137 		return err;
2138 	}
2139 
2140 	/* To keep it simple we validate that the set and order of stats in
2141 	 * both CSVs are exactly the same. This can be lifted with a bit more
2142 	 * pre-processing later.
2143 	 */
2144 	if (base_specs.spec_cnt != comp_specs.spec_cnt) {
2145 		fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n",
2146 			env.filenames[0], env.filenames[1],
2147 			base_specs.spec_cnt, comp_specs.spec_cnt);
2148 		return -EINVAL;
2149 	}
2150 	for (i = 0; i < base_specs.spec_cnt; i++) {
2151 		if (base_specs.ids[i] != comp_specs.ids[i]) {
2152 			fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n",
2153 				env.filenames[0], env.filenames[1],
2154 				stat_defs[base_specs.ids[i]].names[0],
2155 				stat_defs[comp_specs.ids[i]].names[0]);
2156 			return -EINVAL;
2157 		}
2158 	}
2159 
2160 	/* Replace user-specified sorting spec with file+prog sorting rule to
2161 	 * be able to join two datasets correctly. Once we are done, we will
2162 	 * restore the original sort spec.
2163 	 */
2164 	tmp_sort_spec = env.sort_spec;
2165 	env.sort_spec = join_sort_spec;
2166 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2167 	qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
2168 	env.sort_spec = tmp_sort_spec;
2169 
2170 	/* Join two datasets together. If baseline and comparison datasets
2171 	 * have different subset of rows (we match by 'object + prog' as
2172 	 * a unique key) then assume empty/missing/zero value for rows that
2173 	 * are missing in the opposite data set.
2174 	 */
2175 	i = j = 0;
2176 	while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
2177 		const struct verif_stats *base, *comp;
2178 		struct verif_stats_join *join;
2179 		void *tmp;
2180 		int r;
2181 
2182 		base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
2183 		comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats;
2184 
2185 		if (!base->file_name || !base->prog_name) {
2186 			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
2187 				i, env.filenames[0]);
2188 			return -EINVAL;
2189 		}
2190 		if (!comp->file_name || !comp->prog_name) {
2191 			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
2192 				j, env.filenames[1]);
2193 			return -EINVAL;
2194 		}
2195 
2196 		tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats));
2197 		if (!tmp)
2198 			return -ENOMEM;
2199 		env.join_stats = tmp;
2200 
2201 		join = &env.join_stats[env.join_stat_cnt];
2202 		memset(join, 0, sizeof(*join));
2203 
2204 		r = cmp_stats_key(base, comp);
2205 		if (r == 0) {
2206 			join->file_name = base->file_name;
2207 			join->prog_name = base->prog_name;
2208 			join->stats_a = base;
2209 			join->stats_b = comp;
2210 			i++;
2211 			j++;
2212 		} else if (base != &fallback_stats && (comp == &fallback_stats || r < 0)) {
2213 			join->file_name = base->file_name;
2214 			join->prog_name = base->prog_name;
2215 			join->stats_a = base;
2216 			join->stats_b = NULL;
2217 			i++;
2218 		} else if (comp != &fallback_stats && (base == &fallback_stats || r > 0)) {
2219 			join->file_name = comp->file_name;
2220 			join->prog_name = comp->prog_name;
2221 			join->stats_a = NULL;
2222 			join->stats_b = comp;
2223 			j++;
2224 		} else {
2225 			fprintf(stderr, "%s:%d: should never reach here i=%i, j=%i",
2226 				__FILE__, __LINE__, i, j);
2227 			return -EINVAL;
2228 		}
2229 		env.join_stat_cnt += 1;
2230 	}
2231 
2232 	/* now sort joined results according to sort spec */
2233 	qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
2234 
2235 	/* for human-readable table output we need to do extra pass to
2236 	 * calculate column widths, so we substitute current output format
2237 	 * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
2238 	 * and do everything again.
2239 	 */
2240 	if (env.out_fmt == RESFMT_TABLE)
2241 		cur_fmt = RESFMT_TABLE_CALCLEN;
2242 	else
2243 		cur_fmt = env.out_fmt;
2244 
2245 one_more_time:
2246 	output_comp_headers(cur_fmt);
2247 
2248 	last_idx = -1;
2249 	cnt = 0;
2250 	for (i = 0; i < env.join_stat_cnt; i++) {
2251 		const struct verif_stats_join *join = &env.join_stats[i];
2252 
2253 		if (!should_output_join_stats(join))
2254 			continue;
2255 
2256 		if (env.top_n && cnt >= env.top_n)
2257 			break;
2258 
2259 		if (cur_fmt == RESFMT_TABLE_CALCLEN)
2260 			last_idx = i;
2261 
2262 		output_comp_stats(join, cur_fmt, i == last_idx);
2263 
2264 		cnt++;
2265 	}
2266 
2267 	if (cur_fmt == RESFMT_TABLE_CALCLEN) {
2268 		cur_fmt = RESFMT_TABLE;
2269 		goto one_more_time; /* ... this time with feeling */
2270 	}
2271 
2272 	return 0;
2273 }
2274 
is_stat_filter_matched(struct filter * f,const struct verif_stats * stats)2275 static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats)
2276 {
2277 	long value = stats->stats[f->stat_id];
2278 
2279 	if (f->abs)
2280 		value = value < 0 ? -value : value;
2281 
2282 	switch (f->op) {
2283 	case OP_EQ: return value == f->value;
2284 	case OP_NEQ: return value != f->value;
2285 	case OP_LT: return value < f->value;
2286 	case OP_LE: return value <= f->value;
2287 	case OP_GT: return value > f->value;
2288 	case OP_GE: return value >= f->value;
2289 	}
2290 
2291 	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
2292 	return false;
2293 }
2294 
should_output_stats(const struct verif_stats * stats)2295 static bool should_output_stats(const struct verif_stats *stats)
2296 {
2297 	struct filter *f;
2298 	int i, allow_cnt = 0;
2299 
2300 	for (i = 0; i < env.deny_filter_cnt; i++) {
2301 		f = &env.deny_filters[i];
2302 		if (f->kind != FILTER_STAT)
2303 			continue;
2304 
2305 		if (is_stat_filter_matched(f, stats))
2306 			return false;
2307 	}
2308 
2309 	for (i = 0; i < env.allow_filter_cnt; i++) {
2310 		f = &env.allow_filters[i];
2311 		if (f->kind != FILTER_STAT)
2312 			continue;
2313 		allow_cnt++;
2314 
2315 		if (is_stat_filter_matched(f, stats))
2316 			return true;
2317 	}
2318 
2319 	/* if there are no stat allowed filters, pass everything through */
2320 	return allow_cnt == 0;
2321 }
2322 
output_prog_stats(void)2323 static void output_prog_stats(void)
2324 {
2325 	const struct verif_stats *stats;
2326 	int i, last_stat_idx = 0, cnt = 0;
2327 
2328 	if (env.out_fmt == RESFMT_TABLE) {
2329 		/* calculate column widths */
2330 		output_headers(RESFMT_TABLE_CALCLEN);
2331 		for (i = 0; i < env.prog_stat_cnt; i++) {
2332 			stats = &env.prog_stats[i];
2333 			if (!should_output_stats(stats))
2334 				continue;
2335 			output_stats(stats, RESFMT_TABLE_CALCLEN, false);
2336 			last_stat_idx = i;
2337 		}
2338 	}
2339 
2340 	/* actually output the table */
2341 	output_headers(env.out_fmt);
2342 	for (i = 0; i < env.prog_stat_cnt; i++) {
2343 		stats = &env.prog_stats[i];
2344 		if (!should_output_stats(stats))
2345 			continue;
2346 		if (env.top_n && cnt >= env.top_n)
2347 			break;
2348 		output_stats(stats, env.out_fmt, i == last_stat_idx);
2349 		cnt++;
2350 	}
2351 }
2352 
handle_verif_mode(void)2353 static int handle_verif_mode(void)
2354 {
2355 	int i, err;
2356 
2357 	if (env.filename_cnt == 0) {
2358 		fprintf(stderr, "Please provide path to BPF object file!\n\n");
2359 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2360 		return -EINVAL;
2361 	}
2362 
2363 	for (i = 0; i < env.filename_cnt; i++) {
2364 		err = process_obj(env.filenames[i]);
2365 		if (err) {
2366 			fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
2367 			return err;
2368 		}
2369 	}
2370 
2371 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2372 
2373 	output_prog_stats();
2374 
2375 	return 0;
2376 }
2377 
handle_replay_mode(void)2378 static int handle_replay_mode(void)
2379 {
2380 	struct stat_specs specs = {};
2381 	int err;
2382 
2383 	if (env.filename_cnt != 1) {
2384 		fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n");
2385 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2386 		return -EINVAL;
2387 	}
2388 
2389 	err = parse_stats_csv(env.filenames[0], &specs,
2390 			      &env.prog_stats, &env.prog_stat_cnt);
2391 	if (err) {
2392 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
2393 		return err;
2394 	}
2395 
2396 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2397 
2398 	output_prog_stats();
2399 
2400 	return 0;
2401 }
2402 
main(int argc,char ** argv)2403 int main(int argc, char **argv)
2404 {
2405 	int err = 0, i;
2406 
2407 	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
2408 		return 1;
2409 
2410 	if (env.show_version) {
2411 		printf("%s\n", argp_program_version);
2412 		return 0;
2413 	}
2414 
2415 	if (env.verbose && env.quiet) {
2416 		fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
2417 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2418 		return 1;
2419 	}
2420 	if (env.verbose && env.log_level == 0)
2421 		env.log_level = 1;
2422 
2423 	if (env.output_spec.spec_cnt == 0) {
2424 		if (env.out_fmt == RESFMT_CSV)
2425 			env.output_spec = default_csv_output_spec;
2426 		else
2427 			env.output_spec = default_output_spec;
2428 	}
2429 	if (env.sort_spec.spec_cnt == 0)
2430 		env.sort_spec = default_sort_spec;
2431 
2432 	if (env.comparison_mode && env.replay_mode) {
2433 		fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n");
2434 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2435 		return 1;
2436 	}
2437 
2438 	if (env.comparison_mode)
2439 		err = handle_comparison_mode();
2440 	else if (env.replay_mode)
2441 		err = handle_replay_mode();
2442 	else
2443 		err = handle_verif_mode();
2444 
2445 	free_verif_stats(env.prog_stats, env.prog_stat_cnt);
2446 	free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
2447 	free(env.join_stats);
2448 	for (i = 0; i < env.filename_cnt; i++)
2449 		free(env.filenames[i]);
2450 	free(env.filenames);
2451 	for (i = 0; i < env.allow_filter_cnt; i++) {
2452 		free(env.allow_filters[i].any_glob);
2453 		free(env.allow_filters[i].file_glob);
2454 		free(env.allow_filters[i].prog_glob);
2455 	}
2456 	free(env.allow_filters);
2457 	for (i = 0; i < env.deny_filter_cnt; i++) {
2458 		free(env.deny_filters[i].any_glob);
2459 		free(env.deny_filters[i].file_glob);
2460 		free(env.deny_filters[i].prog_glob);
2461 	}
2462 	free(env.deny_filters);
2463 	return -err;
2464 }
2465