1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 // Copyright (c) 2023 Google
3 #include "vmlinux.h"
4 #include <bpf/bpf_helpers.h>
5 #include <bpf/bpf_tracing.h>
6 #include <bpf/bpf_core_read.h>
7
8 #include "sample-filter.h"
9
10 /* BPF map that will be filled by user space */
11 struct filters {
12 __uint(type, BPF_MAP_TYPE_HASH);
13 __type(key, int);
14 __type(value, struct perf_bpf_filter_entry[MAX_FILTERS]);
15 __uint(max_entries, 1);
16 } filters SEC(".maps");
17
18 /*
19 * An evsel has multiple instances for each CPU or task but we need a single
20 * id to be used as a key for the idx_hash. This hashmap would translate the
21 * instance's ID to a representative ID.
22 */
23 struct event_hash {
24 __uint(type, BPF_MAP_TYPE_HASH);
25 __type(key, __u64);
26 __type(value, __u64);
27 __uint(max_entries, 1);
28 } event_hash SEC(".maps");
29
30 /* tgid/evtid to filter index */
31 struct idx_hash {
32 __uint(type, BPF_MAP_TYPE_HASH);
33 __type(key, struct idx_hash_key);
34 __type(value, int);
35 __uint(max_entries, 1);
36 } idx_hash SEC(".maps");
37
38 /* tgid to filter index */
39 struct lost_count {
40 __uint(type, BPF_MAP_TYPE_ARRAY);
41 __type(key, int);
42 __type(value, int);
43 __uint(max_entries, 1);
44 } dropped SEC(".maps");
45
46 volatile const int use_idx_hash;
47
48 void *bpf_cast_to_kern_ctx(void *) __ksym;
49
50 /* new kernel perf_sample_data definition */
51 struct perf_sample_data___new {
52 __u64 sample_flags;
53 } __attribute__((preserve_access_index));
54
55 /* new kernel perf_mem_data_src definition */
56 union perf_mem_data_src___new {
57 __u64 val;
58 struct {
59 __u64 mem_op:5, /* type of opcode */
60 mem_lvl:14, /* memory hierarchy level */
61 mem_snoop:5, /* snoop mode */
62 mem_lock:2, /* lock instr */
63 mem_dtlb:7, /* tlb access */
64 mem_lvl_num:4, /* memory hierarchy level number */
65 mem_remote:1, /* remote */
66 mem_snoopx:2, /* snoop mode, ext */
67 mem_blk:3, /* access blocked */
68 mem_hops:3, /* hop level */
69 mem_rsvd:18;
70 };
71 };
72
73 /* helper function to return the given perf sample data */
perf_get_sample(struct bpf_perf_event_data_kern * kctx,struct perf_bpf_filter_entry * entry)74 static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx,
75 struct perf_bpf_filter_entry *entry)
76 {
77 struct perf_sample_data___new *data = (void *)kctx->data;
78
79 if (!bpf_core_field_exists(data->sample_flags))
80 return 0;
81
82 #define BUILD_CHECK_SAMPLE(x) \
83 _Static_assert((1 << (PBF_TERM_##x - PBF_TERM_SAMPLE_START)) == PERF_SAMPLE_##x, \
84 "Mismatched PBF term to sample bit " #x)
85 BUILD_CHECK_SAMPLE(IP);
86 BUILD_CHECK_SAMPLE(TID);
87 BUILD_CHECK_SAMPLE(TIME);
88 BUILD_CHECK_SAMPLE(ADDR);
89 BUILD_CHECK_SAMPLE(ID);
90 BUILD_CHECK_SAMPLE(CPU);
91 BUILD_CHECK_SAMPLE(PERIOD);
92 BUILD_CHECK_SAMPLE(WEIGHT);
93 BUILD_CHECK_SAMPLE(DATA_SRC);
94 BUILD_CHECK_SAMPLE(TRANSACTION);
95 BUILD_CHECK_SAMPLE(PHYS_ADDR);
96 BUILD_CHECK_SAMPLE(CGROUP);
97 BUILD_CHECK_SAMPLE(DATA_PAGE_SIZE);
98 BUILD_CHECK_SAMPLE(CODE_PAGE_SIZE);
99 BUILD_CHECK_SAMPLE(WEIGHT_STRUCT);
100 #undef BUILD_CHECK_SAMPLE
101
102 /* For sample terms check the sample bit is set. */
103 if (entry->term >= PBF_TERM_SAMPLE_START && entry->term <= PBF_TERM_SAMPLE_END &&
104 (data->sample_flags & (1 << (entry->term - PBF_TERM_SAMPLE_START))) == 0)
105 return 0;
106
107 switch (entry->term) {
108 case PBF_TERM_IP:
109 return kctx->data->ip;
110 case PBF_TERM_ID:
111 return kctx->data->id;
112 case PBF_TERM_TID:
113 if (entry->part)
114 return kctx->data->tid_entry.pid;
115 else
116 return kctx->data->tid_entry.tid;
117 case PBF_TERM_CPU:
118 return kctx->data->cpu_entry.cpu;
119 case PBF_TERM_TIME:
120 return kctx->data->time;
121 case PBF_TERM_ADDR:
122 return kctx->data->addr;
123 case PBF_TERM_PERIOD:
124 return kctx->data->period;
125 case PBF_TERM_TRANSACTION:
126 return kctx->data->txn;
127 case PBF_TERM_WEIGHT_STRUCT:
128 if (entry->part == 1)
129 return kctx->data->weight.var1_dw;
130 if (entry->part == 2)
131 return kctx->data->weight.var2_w;
132 if (entry->part == 3)
133 return kctx->data->weight.var3_w;
134 /* fall through */
135 case PBF_TERM_WEIGHT:
136 return kctx->data->weight.full;
137 case PBF_TERM_PHYS_ADDR:
138 return kctx->data->phys_addr;
139 case PBF_TERM_CGROUP:
140 return kctx->data->cgroup;
141 case PBF_TERM_CODE_PAGE_SIZE:
142 return kctx->data->code_page_size;
143 case PBF_TERM_DATA_PAGE_SIZE:
144 return kctx->data->data_page_size;
145 case PBF_TERM_DATA_SRC:
146 if (entry->part == 1)
147 return kctx->data->data_src.mem_op;
148 if (entry->part == 2)
149 return kctx->data->data_src.mem_lvl_num;
150 if (entry->part == 3) {
151 __u32 snoop = kctx->data->data_src.mem_snoop;
152 __u32 snoopx = kctx->data->data_src.mem_snoopx;
153
154 return (snoopx << 5) | snoop;
155 }
156 if (entry->part == 4)
157 return kctx->data->data_src.mem_remote;
158 if (entry->part == 5)
159 return kctx->data->data_src.mem_lock;
160 if (entry->part == 6)
161 return kctx->data->data_src.mem_dtlb;
162 if (entry->part == 7)
163 return kctx->data->data_src.mem_blk;
164 if (entry->part == 8) {
165 union perf_mem_data_src___new *data = (void *)&kctx->data->data_src;
166
167 if (__builtin_preserve_field_info(data->mem_hops, BPF_FIELD_EXISTS))
168 return data->mem_hops;
169
170 return 0;
171 }
172 /* return the whole word */
173 return kctx->data->data_src.val;
174 case PBF_TERM_UID:
175 return bpf_get_current_uid_gid() & 0xFFFFFFFF;
176 case PBF_TERM_GID:
177 return bpf_get_current_uid_gid() >> 32;
178 case PBF_TERM_NONE:
179 case __PBF_UNUSED_TERM4:
180 case __PBF_UNUSED_TERM5:
181 case __PBF_UNUSED_TERM9:
182 case __PBF_UNUSED_TERM10:
183 case __PBF_UNUSED_TERM11:
184 case __PBF_UNUSED_TERM12:
185 case __PBF_UNUSED_TERM13:
186 case __PBF_UNUSED_TERM16:
187 case __PBF_UNUSED_TERM18:
188 case __PBF_UNUSED_TERM20:
189 default:
190 break;
191 }
192 return 0;
193 }
194
195 #define CHECK_RESULT(data, op, val) \
196 if (!(data op val)) { \
197 if (!in_group) \
198 goto drop; \
199 } else if (in_group) { \
200 group_result = 1; \
201 }
202
203 /* BPF program to be called from perf event overflow handler */
204 SEC("perf_event")
perf_sample_filter(void * ctx)205 int perf_sample_filter(void *ctx)
206 {
207 struct bpf_perf_event_data_kern *kctx;
208 struct perf_bpf_filter_entry *entry;
209 __u64 sample_data;
210 int in_group = 0;
211 int group_result = 0;
212 int i, k;
213 int *losts;
214
215 kctx = bpf_cast_to_kern_ctx(ctx);
216
217 k = 0;
218
219 if (use_idx_hash) {
220 struct idx_hash_key key = {
221 .tgid = bpf_get_current_pid_tgid() >> 32,
222 };
223 __u64 eid = kctx->event->id;
224 __u64 *key_id;
225 int *idx;
226
227 /* get primary_event_id */
228 if (kctx->event->parent)
229 eid = kctx->event->parent->id;
230
231 key_id = bpf_map_lookup_elem(&event_hash, &eid);
232 if (key_id == NULL)
233 goto drop;
234
235 key.evt_id = *key_id;
236
237 idx = bpf_map_lookup_elem(&idx_hash, &key);
238 if (idx)
239 k = *idx;
240 else
241 goto drop;
242 }
243
244 entry = bpf_map_lookup_elem(&filters, &k);
245 if (entry == NULL)
246 goto drop;
247
248 for (i = 0; i < MAX_FILTERS; i++) {
249 sample_data = perf_get_sample(kctx, &entry[i]);
250
251 switch (entry[i].op) {
252 case PBF_OP_EQ:
253 CHECK_RESULT(sample_data, ==, entry[i].value)
254 break;
255 case PBF_OP_NEQ:
256 CHECK_RESULT(sample_data, !=, entry[i].value)
257 break;
258 case PBF_OP_GT:
259 CHECK_RESULT(sample_data, >, entry[i].value)
260 break;
261 case PBF_OP_GE:
262 CHECK_RESULT(sample_data, >=, entry[i].value)
263 break;
264 case PBF_OP_LT:
265 CHECK_RESULT(sample_data, <, entry[i].value)
266 break;
267 case PBF_OP_LE:
268 CHECK_RESULT(sample_data, <=, entry[i].value)
269 break;
270 case PBF_OP_AND:
271 CHECK_RESULT(sample_data, &, entry[i].value)
272 break;
273 case PBF_OP_GROUP_BEGIN:
274 in_group = 1;
275 group_result = 0;
276 break;
277 case PBF_OP_GROUP_END:
278 if (group_result == 0)
279 goto drop;
280 in_group = 0;
281 break;
282 case PBF_OP_DONE:
283 /* no failures so far, accept it */
284 return 1;
285 }
286 }
287 /* generate sample data */
288 return 1;
289
290 drop:
291 losts = bpf_map_lookup_elem(&dropped, &k);
292 if (losts != NULL)
293 __sync_fetch_and_add(losts, 1);
294
295 return 0;
296 }
297
298 char LICENSE[] SEC("license") = "Dual BSD/GPL";
299