xref: /linux/kernel/bpf/trampoline.c (revision 9d106c6dd81bb26ad7fc3ee89cb1d62557c8e2c9)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2019 Facebook */
3 #include <linux/hash.h>
4 #include <linux/bpf.h>
5 #include <linux/filter.h>
6 #include <linux/ftrace.h>
7 #include <linux/rbtree_latch.h>
8 #include <linux/perf_event.h>
9 
10 /* dummy _ops. The verifier will operate on target program's ops. */
11 const struct bpf_verifier_ops bpf_extension_verifier_ops = {
12 };
13 const struct bpf_prog_ops bpf_extension_prog_ops = {
14 };
15 
16 /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
17 #define TRAMPOLINE_HASH_BITS 10
18 #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
19 
20 static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
21 
22 /* serializes access to trampoline_table */
23 static DEFINE_MUTEX(trampoline_mutex);
24 
25 void *bpf_jit_alloc_exec_page(void)
26 {
27 	void *image;
28 
29 	image = bpf_jit_alloc_exec(PAGE_SIZE);
30 	if (!image)
31 		return NULL;
32 
33 	set_vm_flush_reset_perms(image);
34 	/* Keep image as writeable. The alternative is to keep flipping ro/rw
35 	 * everytime new program is attached or detached.
36 	 */
37 	set_memory_x((long)image, 1);
38 	return image;
39 }
40 
41 void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
42 {
43 	ksym->start = (unsigned long) data;
44 	ksym->end = ksym->start + PAGE_SIZE;
45 	bpf_ksym_add(ksym);
46 	perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
47 			   PAGE_SIZE, false, ksym->name);
48 }
49 
50 void bpf_image_ksym_del(struct bpf_ksym *ksym)
51 {
52 	bpf_ksym_del(ksym);
53 	perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
54 			   PAGE_SIZE, true, ksym->name);
55 }
56 
57 static void bpf_trampoline_ksym_add(struct bpf_trampoline *tr)
58 {
59 	struct bpf_ksym *ksym = &tr->ksym;
60 
61 	snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", tr->key);
62 	bpf_image_ksym_add(tr->image, ksym);
63 }
64 
65 struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
66 {
67 	struct bpf_trampoline *tr;
68 	struct hlist_head *head;
69 	void *image;
70 	int i;
71 
72 	mutex_lock(&trampoline_mutex);
73 	head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
74 	hlist_for_each_entry(tr, head, hlist) {
75 		if (tr->key == key) {
76 			refcount_inc(&tr->refcnt);
77 			goto out;
78 		}
79 	}
80 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
81 	if (!tr)
82 		goto out;
83 
84 	/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
85 	image = bpf_jit_alloc_exec_page();
86 	if (!image) {
87 		kfree(tr);
88 		tr = NULL;
89 		goto out;
90 	}
91 
92 	tr->key = key;
93 	INIT_HLIST_NODE(&tr->hlist);
94 	hlist_add_head(&tr->hlist, head);
95 	refcount_set(&tr->refcnt, 1);
96 	mutex_init(&tr->mutex);
97 	for (i = 0; i < BPF_TRAMP_MAX; i++)
98 		INIT_HLIST_HEAD(&tr->progs_hlist[i]);
99 	tr->image = image;
100 	INIT_LIST_HEAD_RCU(&tr->ksym.lnode);
101 	bpf_trampoline_ksym_add(tr);
102 out:
103 	mutex_unlock(&trampoline_mutex);
104 	return tr;
105 }
106 
107 static int is_ftrace_location(void *ip)
108 {
109 	long addr;
110 
111 	addr = ftrace_location((long)ip);
112 	if (!addr)
113 		return 0;
114 	if (WARN_ON_ONCE(addr != (long)ip))
115 		return -EFAULT;
116 	return 1;
117 }
118 
119 static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
120 {
121 	void *ip = tr->func.addr;
122 	int ret;
123 
124 	if (tr->func.ftrace_managed)
125 		ret = unregister_ftrace_direct((long)ip, (long)old_addr);
126 	else
127 		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
128 	return ret;
129 }
130 
131 static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr)
132 {
133 	void *ip = tr->func.addr;
134 	int ret;
135 
136 	if (tr->func.ftrace_managed)
137 		ret = modify_ftrace_direct((long)ip, (long)old_addr, (long)new_addr);
138 	else
139 		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr);
140 	return ret;
141 }
142 
143 /* first time registering */
144 static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
145 {
146 	void *ip = tr->func.addr;
147 	int ret;
148 
149 	ret = is_ftrace_location(ip);
150 	if (ret < 0)
151 		return ret;
152 	tr->func.ftrace_managed = ret;
153 
154 	if (tr->func.ftrace_managed)
155 		ret = register_ftrace_direct((long)ip, (long)new_addr);
156 	else
157 		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
158 	return ret;
159 }
160 
161 static struct bpf_tramp_progs *
162 bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)
163 {
164 	const struct bpf_prog_aux *aux;
165 	struct bpf_tramp_progs *tprogs;
166 	struct bpf_prog **progs;
167 	int kind;
168 
169 	*total = 0;
170 	tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
171 	if (!tprogs)
172 		return ERR_PTR(-ENOMEM);
173 
174 	for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
175 		tprogs[kind].nr_progs = tr->progs_cnt[kind];
176 		*total += tr->progs_cnt[kind];
177 		progs = tprogs[kind].progs;
178 
179 		hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist)
180 			*progs++ = aux->prog;
181 	}
182 	return tprogs;
183 }
184 
185 static int bpf_trampoline_update(struct bpf_trampoline *tr)
186 {
187 	void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
188 	void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
189 	struct bpf_tramp_progs *tprogs;
190 	u32 flags = BPF_TRAMP_F_RESTORE_REGS;
191 	int err, total;
192 
193 	tprogs = bpf_trampoline_get_progs(tr, &total);
194 	if (IS_ERR(tprogs))
195 		return PTR_ERR(tprogs);
196 
197 	if (total == 0) {
198 		err = unregister_fentry(tr, old_image);
199 		tr->selector = 0;
200 		goto out;
201 	}
202 
203 	if (tprogs[BPF_TRAMP_FEXIT].nr_progs ||
204 	    tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs)
205 		flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
206 
207 	/* Though the second half of trampoline page is unused a task could be
208 	 * preempted in the middle of the first half of trampoline and two
209 	 * updates to trampoline would change the code from underneath the
210 	 * preempted task. Hence wait for tasks to voluntarily schedule or go
211 	 * to userspace.
212 	 */
213 
214 	synchronize_rcu_tasks();
215 
216 	err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
217 					  &tr->func.model, flags, tprogs,
218 					  tr->func.addr);
219 	if (err < 0)
220 		goto out;
221 
222 	if (tr->selector)
223 		/* progs already running at this address */
224 		err = modify_fentry(tr, old_image, new_image);
225 	else
226 		/* first time registering */
227 		err = register_fentry(tr, new_image);
228 	if (err)
229 		goto out;
230 	tr->selector++;
231 out:
232 	kfree(tprogs);
233 	return err;
234 }
235 
236 static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
237 {
238 	switch (t) {
239 	case BPF_TRACE_FENTRY:
240 		return BPF_TRAMP_FENTRY;
241 	case BPF_MODIFY_RETURN:
242 		return BPF_TRAMP_MODIFY_RETURN;
243 	case BPF_TRACE_FEXIT:
244 		return BPF_TRAMP_FEXIT;
245 	default:
246 		return BPF_TRAMP_REPLACE;
247 	}
248 }
249 
250 int bpf_trampoline_link_prog(struct bpf_prog *prog)
251 {
252 	enum bpf_tramp_prog_type kind;
253 	struct bpf_trampoline *tr;
254 	int err = 0;
255 	int cnt;
256 
257 	tr = prog->aux->trampoline;
258 	kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
259 	mutex_lock(&tr->mutex);
260 	if (tr->extension_prog) {
261 		/* cannot attach fentry/fexit if extension prog is attached.
262 		 * cannot overwrite extension prog either.
263 		 */
264 		err = -EBUSY;
265 		goto out;
266 	}
267 	cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT];
268 	if (kind == BPF_TRAMP_REPLACE) {
269 		/* Cannot attach extension if fentry/fexit are in use. */
270 		if (cnt) {
271 			err = -EBUSY;
272 			goto out;
273 		}
274 		tr->extension_prog = prog;
275 		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
276 					 prog->bpf_func);
277 		goto out;
278 	}
279 	if (cnt >= BPF_MAX_TRAMP_PROGS) {
280 		err = -E2BIG;
281 		goto out;
282 	}
283 	if (!hlist_unhashed(&prog->aux->tramp_hlist)) {
284 		/* prog already linked */
285 		err = -EBUSY;
286 		goto out;
287 	}
288 	hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]);
289 	tr->progs_cnt[kind]++;
290 	err = bpf_trampoline_update(prog->aux->trampoline);
291 	if (err) {
292 		hlist_del(&prog->aux->tramp_hlist);
293 		tr->progs_cnt[kind]--;
294 	}
295 out:
296 	mutex_unlock(&tr->mutex);
297 	return err;
298 }
299 
300 /* bpf_trampoline_unlink_prog() should never fail. */
301 int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
302 {
303 	enum bpf_tramp_prog_type kind;
304 	struct bpf_trampoline *tr;
305 	int err;
306 
307 	tr = prog->aux->trampoline;
308 	kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
309 	mutex_lock(&tr->mutex);
310 	if (kind == BPF_TRAMP_REPLACE) {
311 		WARN_ON_ONCE(!tr->extension_prog);
312 		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
313 					 tr->extension_prog->bpf_func, NULL);
314 		tr->extension_prog = NULL;
315 		goto out;
316 	}
317 	hlist_del(&prog->aux->tramp_hlist);
318 	tr->progs_cnt[kind]--;
319 	err = bpf_trampoline_update(prog->aux->trampoline);
320 out:
321 	mutex_unlock(&tr->mutex);
322 	return err;
323 }
324 
325 void bpf_trampoline_put(struct bpf_trampoline *tr)
326 {
327 	if (!tr)
328 		return;
329 	mutex_lock(&trampoline_mutex);
330 	if (!refcount_dec_and_test(&tr->refcnt))
331 		goto out;
332 	WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
333 	if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY])))
334 		goto out;
335 	if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
336 		goto out;
337 	bpf_image_ksym_del(&tr->ksym);
338 	/* wait for tasks to get out of trampoline before freeing it */
339 	synchronize_rcu_tasks();
340 	bpf_jit_free_exec(tr->image);
341 	hlist_del(&tr->hlist);
342 	kfree(tr);
343 out:
344 	mutex_unlock(&trampoline_mutex);
345 }
346 
347 /* The logic is similar to BPF_PROG_RUN, but with an explicit
348  * rcu_read_lock() and migrate_disable() which are required
349  * for the trampoline. The macro is split into
350  * call _bpf_prog_enter
351  * call prog->bpf_func
352  * call __bpf_prog_exit
353  */
354 u64 notrace __bpf_prog_enter(void)
355 	__acquires(RCU)
356 {
357 	u64 start = 0;
358 
359 	rcu_read_lock();
360 	migrate_disable();
361 	if (static_branch_unlikely(&bpf_stats_enabled_key))
362 		start = sched_clock();
363 	return start;
364 }
365 
366 void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
367 	__releases(RCU)
368 {
369 	struct bpf_prog_stats *stats;
370 
371 	if (static_branch_unlikely(&bpf_stats_enabled_key) &&
372 	    /* static_key could be enabled in __bpf_prog_enter
373 	     * and disabled in __bpf_prog_exit.
374 	     * And vice versa.
375 	     * Hence check that 'start' is not zero.
376 	     */
377 	    start) {
378 		stats = this_cpu_ptr(prog->aux->stats);
379 		u64_stats_update_begin(&stats->syncp);
380 		stats->cnt++;
381 		stats->nsecs += sched_clock() - start;
382 		u64_stats_update_end(&stats->syncp);
383 	}
384 	migrate_enable();
385 	rcu_read_unlock();
386 }
387 
388 int __weak
389 arch_prepare_bpf_trampoline(void *image, void *image_end,
390 			    const struct btf_func_model *m, u32 flags,
391 			    struct bpf_tramp_progs *tprogs,
392 			    void *orig_call)
393 {
394 	return -ENOTSUPP;
395 }
396 
397 static int __init init_trampolines(void)
398 {
399 	int i;
400 
401 	for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
402 		INIT_HLIST_HEAD(&trampoline_table[i]);
403 	return 0;
404 }
405 late_initcall(init_trampolines);
406