xref: /linux/tools/perf/util/bpf-filter.c (revision ec714e371f22f716a04e6ecb2a24988c92b26911)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /**
3  * Generic event filter for sampling events in BPF.
4  *
5  * The BPF program is fixed and just to read filter expressions in the 'filters'
6  * map and compare the sample data in order to reject samples that don't match.
7  * Each filter expression contains a sample flag (term) to compare, an operation
8  * (==, >=, and so on) and a value.
9  *
10  * Note that each entry has an array of filter expressions and it only succeeds
11  * when all of the expressions are satisfied.  But it supports the logical OR
12  * using a GROUP operation which is satisfied when any of its member expression
13  * is evaluated to true.  But it doesn't allow nested GROUP operations for now.
14  *
15  * To support non-root users, the filters map can be loaded and pinned in the BPF
16  * filesystem by root (perf record --setup-filter pin).  Then each user will get
17  * a new entry in the shared filters map to fill the filter expressions.  And the
18  * BPF program will find the filter using (task-id, event-id) as a key.
19  *
20  * The pinned BPF object (shared for regular users) has:
21  *
22  *                  event_hash                   |
23  *                  |        |                   |
24  *   event->id ---> |   id   | ---+   idx_hash   |     filters
25  *                  |        |    |   |      |   |    |       |
26  *                  |  ....  |    +-> |  idx | --+--> | exprs | --->  perf_bpf_filter_entry[]
27  *                                |   |      |   |    |       |               .op
28  *   task id (tgid) --------------+   | .... |   |    |  ...  |               .term (+ part)
29  *                                               |                            .value
30  *                                               |
31  *   ======= (root would skip this part) ========                     (compares it in a loop)
32  *
33  * This is used for per-task use cases while system-wide profiling (normally from
34  * root user) uses a separate copy of the program and the maps for its own so that
35  * it can proceed even if a lot of non-root users are using the filters at the
36  * same time.  In this case the filters map has a single entry and no need to use
37  * the hash maps to get the index (key) of the filters map (IOW it's always 0).
38  *
39  * The BPF program returns 1 to accept the sample or 0 to drop it.
40  * The 'dropped' map is to keep how many samples it dropped by the filter and
41  * it will be reported as lost samples.
42  */
43 #include <stdlib.h>
44 #include <fcntl.h>
45 #include <sys/ioctl.h>
46 #include <sys/stat.h>
47 
48 #include <bpf/bpf.h>
49 #include <linux/err.h>
50 #include <linux/list.h>
51 #include <api/fs/fs.h>
52 #include <internal/xyarray.h>
53 #include <perf/threadmap.h>
54 
55 #include "util/cap.h"
56 #include "util/debug.h"
57 #include "util/evsel.h"
58 #include "util/target.h"
59 #include "util/bpf-utils.h"
60 
61 #include "util/bpf-filter.h"
62 #include <util/bpf-filter-flex.h>
63 #include <util/bpf-filter-bison.h>
64 
65 #include "bpf_skel/sample-filter.h"
66 #include "bpf_skel/sample_filter.skel.h"
67 
68 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
69 
70 #define __PERF_SAMPLE_TYPE(tt, st, opt)	{ tt, #st, opt }
71 #define PERF_SAMPLE_TYPE(_st, opt)	__PERF_SAMPLE_TYPE(PBF_TERM_##_st, PERF_SAMPLE_##_st, opt)
72 
73 /* Index in the pinned 'filters' map.  Should be released after use. */
74 struct pinned_filter_idx {
75 	struct list_head list;
76 	struct evsel *evsel;
77 	u64 event_id;
78 	int hash_idx;
79 };
80 
81 static LIST_HEAD(pinned_filters);
82 
83 static const struct perf_sample_info {
84 	enum perf_bpf_filter_term type;
85 	const char *name;
86 	const char *option;
87 } sample_table[] = {
88 	/* default sample flags */
89 	PERF_SAMPLE_TYPE(IP, NULL),
90 	PERF_SAMPLE_TYPE(TID, NULL),
91 	PERF_SAMPLE_TYPE(PERIOD, NULL),
92 	/* flags mostly set by default, but still have options */
93 	PERF_SAMPLE_TYPE(ID, "--sample-identifier"),
94 	PERF_SAMPLE_TYPE(CPU, "--sample-cpu"),
95 	PERF_SAMPLE_TYPE(TIME, "-T"),
96 	/* optional sample flags */
97 	PERF_SAMPLE_TYPE(ADDR, "-d"),
98 	PERF_SAMPLE_TYPE(DATA_SRC, "-d"),
99 	PERF_SAMPLE_TYPE(PHYS_ADDR, "--phys-data"),
100 	PERF_SAMPLE_TYPE(WEIGHT, "-W"),
101 	PERF_SAMPLE_TYPE(WEIGHT_STRUCT, "-W"),
102 	PERF_SAMPLE_TYPE(TRANSACTION, "--transaction"),
103 	PERF_SAMPLE_TYPE(CODE_PAGE_SIZE, "--code-page-size"),
104 	PERF_SAMPLE_TYPE(DATA_PAGE_SIZE, "--data-page-size"),
105 	PERF_SAMPLE_TYPE(CGROUP, "--all-cgroups"),
106 };
107 
108 static int get_pinned_fd(const char *name);
109 
get_sample_info(enum perf_bpf_filter_term type)110 static const struct perf_sample_info *get_sample_info(enum perf_bpf_filter_term type)
111 {
112 	size_t i;
113 
114 	for (i = 0; i < ARRAY_SIZE(sample_table); i++) {
115 		if (sample_table[i].type == type)
116 			return &sample_table[i];
117 	}
118 	return NULL;
119 }
120 
check_sample_flags(struct evsel * evsel,struct perf_bpf_filter_expr * expr)121 static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr *expr)
122 {
123 	const struct perf_sample_info *info;
124 
125 	if (expr->term >= PBF_TERM_SAMPLE_START && expr->term <= PBF_TERM_SAMPLE_END &&
126 	    (evsel->core.attr.sample_type & (1 << (expr->term - PBF_TERM_SAMPLE_START))))
127 		return 0;
128 
129 	if (expr->term == PBF_TERM_UID || expr->term == PBF_TERM_GID) {
130 		/* Not dependent on the sample_type as computed from a BPF helper. */
131 		return 0;
132 	}
133 
134 	if (expr->op == PBF_OP_GROUP_BEGIN) {
135 		struct perf_bpf_filter_expr *group;
136 
137 		list_for_each_entry(group, &expr->groups, list) {
138 			if (check_sample_flags(evsel, group) < 0)
139 				return -1;
140 		}
141 		return 0;
142 	}
143 
144 	info = get_sample_info(expr->term);
145 	if (info == NULL) {
146 		pr_err("Error: %s event does not have sample flags %d\n",
147 		       evsel__name(evsel), expr->term);
148 		return -1;
149 	}
150 
151 	pr_err("Error: %s event does not have %s\n", evsel__name(evsel), info->name);
152 	if (info->option)
153 		pr_err(" Hint: please add %s option to perf record\n", info->option);
154 	return -1;
155 }
156 
get_filter_entries(struct evsel * evsel,struct perf_bpf_filter_entry * entry)157 static int get_filter_entries(struct evsel *evsel, struct perf_bpf_filter_entry *entry)
158 {
159 	int i = 0;
160 	struct perf_bpf_filter_expr *expr;
161 
162 	list_for_each_entry(expr, &evsel->bpf_filters, list) {
163 		if (check_sample_flags(evsel, expr) < 0)
164 			return -EINVAL;
165 
166 		if (i == MAX_FILTERS)
167 			return -E2BIG;
168 
169 		entry[i].op = expr->op;
170 		entry[i].part = expr->part;
171 		entry[i].term = expr->term;
172 		entry[i].value = expr->val;
173 		i++;
174 
175 		if (expr->op == PBF_OP_GROUP_BEGIN) {
176 			struct perf_bpf_filter_expr *group;
177 
178 			list_for_each_entry(group, &expr->groups, list) {
179 				if (i == MAX_FILTERS)
180 					return -E2BIG;
181 
182 				entry[i].op = group->op;
183 				entry[i].part = group->part;
184 				entry[i].term = group->term;
185 				entry[i].value = group->val;
186 				i++;
187 			}
188 
189 			if (i == MAX_FILTERS)
190 				return -E2BIG;
191 
192 			entry[i].op = PBF_OP_GROUP_END;
193 			i++;
194 		}
195 	}
196 
197 	if (i < MAX_FILTERS) {
198 		/* to terminate the loop early */
199 		entry[i].op = PBF_OP_DONE;
200 		i++;
201 	}
202 	return 0;
203 }
204 
convert_to_tgid(int tid)205 static int convert_to_tgid(int tid)
206 {
207 	char path[128];
208 	char *buf, *p, *q;
209 	int tgid;
210 	size_t len;
211 
212 	scnprintf(path, sizeof(path), "%d/status", tid);
213 	if (procfs__read_str(path, &buf, &len) < 0)
214 		return -1;
215 
216 	p = strstr(buf, "Tgid:");
217 	if (p == NULL) {
218 		free(buf);
219 		return -1;
220 	}
221 
222 	tgid = strtol(p + 6, &q, 0);
223 	free(buf);
224 	if (*q != '\n')
225 		return -1;
226 
227 	return tgid;
228 }
229 
230 /*
231  * The event might be closed already so we cannot get the list of ids using FD
232  * like in create_event_hash() below, let's iterate the event_hash map and
233  * delete all entries that have the event id as a key.
234  */
destroy_event_hash(u64 event_id)235 static void destroy_event_hash(u64 event_id)
236 {
237 	int fd;
238 	u64 key, *prev_key = NULL;
239 	int num = 0, alloced = 32;
240 	u64 *ids = calloc(alloced, sizeof(*ids));
241 
242 	if (ids == NULL)
243 		return;
244 
245 	fd = get_pinned_fd("event_hash");
246 	if (fd < 0) {
247 		pr_debug("cannot get fd for 'event_hash' map\n");
248 		free(ids);
249 		return;
250 	}
251 
252 	/* Iterate the whole map to collect keys for the event id. */
253 	while (!bpf_map_get_next_key(fd, prev_key, &key)) {
254 		u64 id;
255 
256 		if (bpf_map_lookup_elem(fd, &key, &id) == 0 && id == event_id) {
257 			if (num == alloced) {
258 				void *tmp;
259 
260 				alloced *= 2;
261 				tmp = realloc(ids, alloced * sizeof(*ids));
262 				if (tmp == NULL)
263 					break;
264 
265 				ids = tmp;
266 			}
267 			ids[num++] = key;
268 		}
269 
270 		prev_key = &key;
271 	}
272 
273 	for (int i = 0; i < num; i++)
274 		bpf_map_delete_elem(fd, &ids[i]);
275 
276 	free(ids);
277 	close(fd);
278 }
279 
280 /*
281  * Return a representative id if ok, or 0 for failures.
282  *
283  * The perf_event->id is good for this, but an evsel would have multiple
284  * instances for CPUs and tasks.  So pick up the first id and setup a hash
285  * from id of each instance to the representative id (the first one).
286  */
create_event_hash(struct evsel * evsel)287 static u64 create_event_hash(struct evsel *evsel)
288 {
289 	int x, y, fd;
290 	u64 the_id = 0, id;
291 
292 	fd = get_pinned_fd("event_hash");
293 	if (fd < 0) {
294 		pr_err("cannot get fd for 'event_hash' map\n");
295 		return 0;
296 	}
297 
298 	for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) {
299 		for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) {
300 			int ret = ioctl(FD(evsel, x, y), PERF_EVENT_IOC_ID, &id);
301 
302 			if (ret < 0) {
303 				pr_err("Failed to get the event id\n");
304 				if (the_id)
305 					destroy_event_hash(the_id);
306 				return 0;
307 			}
308 
309 			if (the_id == 0)
310 				the_id = id;
311 
312 			bpf_map_update_elem(fd, &id, &the_id, BPF_ANY);
313 		}
314 	}
315 
316 	close(fd);
317 	return the_id;
318 }
319 
destroy_idx_hash(struct pinned_filter_idx * pfi)320 static void destroy_idx_hash(struct pinned_filter_idx *pfi)
321 {
322 	int fd, nr;
323 	struct perf_thread_map *threads;
324 
325 	fd = get_pinned_fd("filters");
326 	bpf_map_delete_elem(fd, &pfi->hash_idx);
327 	close(fd);
328 
329 	if (pfi->event_id)
330 		destroy_event_hash(pfi->event_id);
331 
332 	threads = perf_evsel__threads(&pfi->evsel->core);
333 	if (threads == NULL)
334 		return;
335 
336 	fd = get_pinned_fd("idx_hash");
337 	nr = perf_thread_map__nr(threads);
338 	for (int i = 0; i < nr; i++) {
339 		/* The target task might be dead already, just try the pid */
340 		struct idx_hash_key key = {
341 			.evt_id = pfi->event_id,
342 			.tgid = perf_thread_map__pid(threads, i),
343 		};
344 
345 		bpf_map_delete_elem(fd, &key);
346 	}
347 	close(fd);
348 }
349 
350 /* Maintain a hashmap from (tgid, event-id) to filter index */
create_idx_hash(struct evsel * evsel,struct perf_bpf_filter_entry * entry)351 static int create_idx_hash(struct evsel *evsel, struct perf_bpf_filter_entry *entry)
352 {
353 	int filter_idx;
354 	int fd, nr, last;
355 	u64 event_id = 0;
356 	struct pinned_filter_idx *pfi = NULL;
357 	struct perf_thread_map *threads;
358 
359 	fd = get_pinned_fd("filters");
360 	if (fd < 0) {
361 		pr_err("cannot get fd for 'filters' map\n");
362 		return fd;
363 	}
364 
365 	/* Find the first available entry in the filters map */
366 	for (filter_idx = 0; filter_idx < MAX_FILTERS; filter_idx++) {
367 		if (bpf_map_update_elem(fd, &filter_idx, entry, BPF_NOEXIST) == 0)
368 			break;
369 	}
370 	close(fd);
371 
372 	if (filter_idx == MAX_FILTERS) {
373 		pr_err("Too many users for the filter map\n");
374 		return -EBUSY;
375 	}
376 
377 	pfi = zalloc(sizeof(*pfi));
378 	if (pfi == NULL) {
379 		pr_err("Cannot save pinned filter index\n");
380 		return -ENOMEM;
381 	}
382 
383 	pfi->evsel = evsel;
384 	pfi->hash_idx = filter_idx;
385 
386 	event_id = create_event_hash(evsel);
387 	if (event_id == 0) {
388 		pr_err("Cannot update the event hash\n");
389 		goto err;
390 	}
391 
392 	pfi->event_id = event_id;
393 
394 	threads = perf_evsel__threads(&evsel->core);
395 	if (threads == NULL) {
396 		pr_err("Cannot get the thread list of the event\n");
397 		goto err;
398 	}
399 
400 	/* save the index to a hash map */
401 	fd = get_pinned_fd("idx_hash");
402 	if (fd < 0) {
403 		pr_err("cannot get fd for 'idx_hash' map\n");
404 		goto err;
405 	}
406 
407 	last = -1;
408 	nr = perf_thread_map__nr(threads);
409 	for (int i = 0; i < nr; i++) {
410 		int pid = perf_thread_map__pid(threads, i);
411 		int tgid;
412 		struct idx_hash_key key = {
413 			.evt_id = event_id,
414 		};
415 
416 		/* it actually needs tgid, let's get tgid from /proc. */
417 		tgid = convert_to_tgid(pid);
418 		if (tgid < 0) {
419 			/* the thread may be dead, ignore. */
420 			continue;
421 		}
422 
423 		if (tgid == last)
424 			continue;
425 		last = tgid;
426 		key.tgid = tgid;
427 
428 		if (bpf_map_update_elem(fd, &key, &filter_idx, BPF_ANY) < 0) {
429 			pr_err("Failed to update the idx_hash\n");
430 			close(fd);
431 			goto err;
432 		}
433 		pr_debug("bpf-filter: idx_hash (task=%d,%s) -> %d\n",
434 			 tgid, evsel__name(evsel), filter_idx);
435 	}
436 
437 	list_add(&pfi->list, &pinned_filters);
438 	close(fd);
439 	return filter_idx;
440 
441 err:
442 	destroy_idx_hash(pfi);
443 	free(pfi);
444 	return -1;
445 }
446 
perf_bpf_filter__prepare(struct evsel * evsel,struct target * target)447 int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target)
448 {
449 	int i, x, y, fd, ret;
450 	struct sample_filter_bpf *skel = NULL;
451 	struct bpf_program *prog;
452 	struct bpf_link *link;
453 	struct perf_bpf_filter_entry *entry;
454 	bool needs_idx_hash = !target__has_cpu(target);
455 #if LIBBPF_CURRENT_VERSION_GEQ(1, 7)
456 	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts,
457 			    .dont_enable = true);
458 #else
459 	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
460 #endif
461 
462 	entry = calloc(MAX_FILTERS, sizeof(*entry));
463 	if (entry == NULL)
464 		return -1;
465 
466 	ret = get_filter_entries(evsel, entry);
467 	if (ret < 0) {
468 		pr_err("Failed to process filter entries\n");
469 		goto err;
470 	}
471 
472 	if (needs_idx_hash && geteuid() != 0) {
473 		int zero = 0;
474 
475 		/* The filters map is shared among other processes */
476 		ret = create_idx_hash(evsel, entry);
477 		if (ret < 0)
478 			goto err;
479 
480 		fd = get_pinned_fd("dropped");
481 		if (fd < 0) {
482 			ret = fd;
483 			goto err;
484 		}
485 
486 		/* Reset the lost count */
487 		bpf_map_update_elem(fd, &ret, &zero, BPF_ANY);
488 		close(fd);
489 
490 		fd = get_pinned_fd("perf_sample_filter");
491 		if (fd < 0) {
492 			ret = fd;
493 			goto err;
494 		}
495 
496 		for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) {
497 			for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) {
498 				ret = ioctl(FD(evsel, x, y), PERF_EVENT_IOC_SET_BPF, fd);
499 				if (ret < 0) {
500 					pr_err("Failed to attach perf sample-filter\n");
501 					close(fd);
502 					goto err;
503 				}
504 			}
505 		}
506 
507 		close(fd);
508 		free(entry);
509 		return 0;
510 	}
511 
512 	skel = sample_filter_bpf__open_and_load();
513 	if (!skel) {
514 		ret = -errno;
515 		pr_err("Failed to load perf sample-filter BPF skeleton\n");
516 		goto err;
517 	}
518 
519 	i = 0;
520 	fd = bpf_map__fd(skel->maps.filters);
521 
522 	/* The filters map has only one entry in this case */
523 	if (bpf_map_update_elem(fd, &i, entry, BPF_ANY) < 0) {
524 		ret = -errno;
525 		pr_err("Failed to update the filter map\n");
526 		goto err;
527 	}
528 
529 	prog = skel->progs.perf_sample_filter;
530 	for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) {
531 		for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) {
532 			link = bpf_program__attach_perf_event_opts(prog, FD(evsel, x, y),
533 								   &pe_opts);
534 			if (IS_ERR(link)) {
535 				pr_err("Failed to attach perf sample-filter program\n");
536 				ret = PTR_ERR(link);
537 				goto err;
538 			}
539 		}
540 	}
541 	free(entry);
542 	evsel->bpf_skel = skel;
543 	return 0;
544 
545 err:
546 	free(entry);
547 	if (!list_empty(&pinned_filters)) {
548 		struct pinned_filter_idx *pfi, *tmp;
549 
550 		list_for_each_entry_safe(pfi, tmp, &pinned_filters, list) {
551 			destroy_idx_hash(pfi);
552 			list_del(&pfi->list);
553 			free(pfi);
554 		}
555 	}
556 	sample_filter_bpf__destroy(skel);
557 	return ret;
558 }
559 
perf_bpf_filter__destroy(struct evsel * evsel)560 int perf_bpf_filter__destroy(struct evsel *evsel)
561 {
562 	struct perf_bpf_filter_expr *expr, *tmp;
563 	struct pinned_filter_idx *pfi, *pos;
564 
565 	list_for_each_entry_safe(expr, tmp, &evsel->bpf_filters, list) {
566 		list_del(&expr->list);
567 		free(expr);
568 	}
569 	sample_filter_bpf__destroy(evsel->bpf_skel);
570 
571 	list_for_each_entry_safe(pfi, pos, &pinned_filters, list) {
572 		destroy_idx_hash(pfi);
573 		list_del(&pfi->list);
574 		free(pfi);
575 	}
576 	return 0;
577 }
578 
perf_bpf_filter__lost_count(struct evsel * evsel)579 u64 perf_bpf_filter__lost_count(struct evsel *evsel)
580 {
581 	int count = 0;
582 
583 	if (list_empty(&evsel->bpf_filters))
584 		return 0;
585 
586 	if (!list_empty(&pinned_filters)) {
587 		int fd = get_pinned_fd("dropped");
588 		struct pinned_filter_idx *pfi;
589 
590 		if (fd < 0)
591 			return 0;
592 
593 		list_for_each_entry(pfi, &pinned_filters, list) {
594 			if (pfi->evsel != evsel)
595 				continue;
596 
597 			bpf_map_lookup_elem(fd, &pfi->hash_idx, &count);
598 			break;
599 		}
600 		close(fd);
601 	} else if (evsel->bpf_skel) {
602 		struct sample_filter_bpf *skel = evsel->bpf_skel;
603 		int fd = bpf_map__fd(skel->maps.dropped);
604 		int idx = 0;
605 
606 		bpf_map_lookup_elem(fd, &idx, &count);
607 	}
608 
609 	return count;
610 }
611 
perf_bpf_filter_expr__new(enum perf_bpf_filter_term term,int part,enum perf_bpf_filter_op op,unsigned long val)612 struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term term,
613 						       int part,
614 						       enum perf_bpf_filter_op op,
615 						       unsigned long val)
616 {
617 	struct perf_bpf_filter_expr *expr;
618 
619 	expr = malloc(sizeof(*expr));
620 	if (expr != NULL) {
621 		expr->term = term;
622 		expr->part = part;
623 		expr->op = op;
624 		expr->val = val;
625 		INIT_LIST_HEAD(&expr->groups);
626 	}
627 	return expr;
628 }
629 
check_bpf_filter_capable(void)630 static bool check_bpf_filter_capable(void)
631 {
632 	bool used_root;
633 
634 	if (perf_cap__capable(CAP_BPF, &used_root))
635 		return true;
636 
637 	if (!used_root) {
638 		/* Check if root already pinned the filter programs and maps */
639 		int fd = get_pinned_fd("filters");
640 
641 		if (fd >= 0) {
642 			close(fd);
643 			return true;
644 		}
645 	}
646 
647 	pr_err("Error: BPF filter only works for %s!\n"
648 	       "\tPlease run 'perf record --setup-filter pin' as root first.\n",
649 	       used_root ? "root" : "users with the CAP_BPF capability");
650 
651 	return false;
652 }
653 
perf_bpf_filter__parse(struct list_head * expr_head,const char * str)654 int perf_bpf_filter__parse(struct list_head *expr_head, const char *str)
655 {
656 	YY_BUFFER_STATE buffer;
657 	int ret;
658 
659 	if (!check_bpf_filter_capable())
660 		return -EPERM;
661 
662 	buffer = perf_bpf_filter__scan_string(str);
663 
664 	ret = perf_bpf_filter_parse(expr_head);
665 
666 	perf_bpf_filter__flush_buffer(buffer);
667 	perf_bpf_filter__delete_buffer(buffer);
668 	perf_bpf_filter_lex_destroy();
669 
670 	return ret;
671 }
672 
perf_bpf_filter__pin(void)673 int perf_bpf_filter__pin(void)
674 {
675 	struct sample_filter_bpf *skel;
676 	char *path = NULL;
677 	int dir_fd, ret = -1;
678 
679 	skel = sample_filter_bpf__open();
680 	if (!skel) {
681 		ret = -errno;
682 		pr_err("Failed to open perf sample-filter BPF skeleton\n");
683 		goto err;
684 	}
685 
686 	/* pinned program will use pid-hash */
687 	bpf_map__set_max_entries(skel->maps.filters, MAX_FILTERS);
688 	bpf_map__set_max_entries(skel->maps.event_hash, MAX_EVT_HASH);
689 	bpf_map__set_max_entries(skel->maps.idx_hash, MAX_IDX_HASH);
690 	bpf_map__set_max_entries(skel->maps.dropped, MAX_FILTERS);
691 	skel->rodata->use_idx_hash = 1;
692 
693 	if (sample_filter_bpf__load(skel) < 0) {
694 		ret = -errno;
695 		pr_err("Failed to load perf sample-filter BPF skeleton\n");
696 		goto err;
697 	}
698 
699 	if (asprintf(&path, "%s/fs/bpf/%s", sysfs__mountpoint(),
700 		     PERF_BPF_FILTER_PIN_PATH) < 0) {
701 		ret = -errno;
702 		pr_err("Failed to allocate pathname in the BPF-fs\n");
703 		goto err;
704 	}
705 
706 	ret = bpf_object__pin(skel->obj, path);
707 	if (ret < 0) {
708 		pr_err("Failed to pin BPF filter objects\n");
709 		goto err;
710 	}
711 
712 	/* setup access permissions for the pinned objects */
713 	dir_fd = open(path, O_PATH);
714 	if (dir_fd < 0) {
715 		bpf_object__unpin(skel->obj, path);
716 		ret = dir_fd;
717 		goto err;
718 	}
719 
720 	/* BPF-fs root has the sticky bit */
721 	if (fchmodat(dir_fd, "..", 01755, 0) < 0) {
722 		pr_debug("chmod for BPF-fs failed\n");
723 		ret = -errno;
724 		goto err_close;
725 	}
726 
727 	/* perf_filter directory */
728 	if (fchmodat(dir_fd, ".", 0755, 0) < 0) {
729 		pr_debug("chmod for perf_filter directory failed?\n");
730 		ret = -errno;
731 		goto err_close;
732 	}
733 
734 	/* programs need write permission for some reason */
735 	if (fchmodat(dir_fd, "perf_sample_filter", 0777, 0) < 0) {
736 		pr_debug("chmod for perf_sample_filter failed\n");
737 		ret = -errno;
738 	}
739 	/* maps */
740 	if (fchmodat(dir_fd, "filters", 0666, 0) < 0) {
741 		pr_debug("chmod for filters failed\n");
742 		ret = -errno;
743 	}
744 	if (fchmodat(dir_fd, "event_hash", 0666, 0) < 0) {
745 		pr_debug("chmod for event_hash failed\n");
746 		ret = -errno;
747 	}
748 	if (fchmodat(dir_fd, "idx_hash", 0666, 0) < 0) {
749 		pr_debug("chmod for idx_hash failed\n");
750 		ret = -errno;
751 	}
752 	if (fchmodat(dir_fd, "dropped", 0666, 0) < 0) {
753 		pr_debug("chmod for dropped failed\n");
754 		ret = -errno;
755 	}
756 
757 err_close:
758 	close(dir_fd);
759 
760 err:
761 	free(path);
762 	sample_filter_bpf__destroy(skel);
763 	return ret;
764 }
765 
perf_bpf_filter__unpin(void)766 int perf_bpf_filter__unpin(void)
767 {
768 	struct sample_filter_bpf *skel;
769 	char *path = NULL;
770 	int ret = -1;
771 
772 	skel = sample_filter_bpf__open_and_load();
773 	if (!skel) {
774 		ret = -errno;
775 		pr_err("Failed to open perf sample-filter BPF skeleton\n");
776 		goto err;
777 	}
778 
779 	if (asprintf(&path, "%s/fs/bpf/%s", sysfs__mountpoint(),
780 		     PERF_BPF_FILTER_PIN_PATH) < 0) {
781 		ret = -errno;
782 		pr_err("Failed to allocate pathname in the BPF-fs\n");
783 		goto err;
784 	}
785 
786 	ret = bpf_object__unpin(skel->obj, path);
787 
788 err:
789 	free(path);
790 	sample_filter_bpf__destroy(skel);
791 	return ret;
792 }
793 
get_pinned_fd(const char * name)794 static int get_pinned_fd(const char *name)
795 {
796 	char *path = NULL;
797 	int fd;
798 
799 	if (asprintf(&path, "%s/fs/bpf/%s/%s", sysfs__mountpoint(),
800 		     PERF_BPF_FILTER_PIN_PATH, name) < 0)
801 		return -1;
802 
803 	fd = bpf_obj_get(path);
804 
805 	free(path);
806 	return fd;
807 }
808