xref: /linux/kernel/bpf/trampoline.c (revision ba042ed6446fc524c1d804227765b45616f9cba3)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2019 Facebook */
3 #include <linux/hash.h>
4 #include <linux/bpf.h>
5 #include <linux/filter.h>
6 #include <linux/ftrace.h>
7 #include <linux/rbtree_latch.h>
8 #include <linux/perf_event.h>
9 #include <linux/btf.h>
10 #include <linux/rcupdate_trace.h>
11 #include <linux/rcupdate_wait.h>
12 #include <linux/static_call.h>
13 #include <linux/bpf_verifier.h>
14 #include <linux/bpf_lsm.h>
15 #include <linux/delay.h>
16 
17 /* dummy _ops. The verifier will operate on target program's ops. */
18 const struct bpf_verifier_ops bpf_extension_verifier_ops = {
19 };
20 const struct bpf_prog_ops bpf_extension_prog_ops = {
21 };
22 
23 /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
24 #define TRAMPOLINE_HASH_BITS 10
25 #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
26 
27 static struct hlist_head trampoline_key_table[TRAMPOLINE_TABLE_SIZE];
28 static struct hlist_head trampoline_ip_table[TRAMPOLINE_TABLE_SIZE];
29 
30 /* serializes access to trampoline tables */
31 static DEFINE_MUTEX(trampoline_mutex);
32 
33 /*
34  * Keep 32 trampoline locks (5 bits) in the pool so trampoline_lock_all()
35  * stays below MAX_LOCK_DEPTH.  Each pool slot has a distinct lockdep
36  * class because trampoline_lock_all() takes all pool mutexes at once;
37  * otherwise lockdep would report recursive locking on same-class mutexes.
38  */
39 #define TRAMPOLINE_LOCKS_BITS 5
40 #define TRAMPOLINE_LOCKS_TABLE_SIZE (1 << TRAMPOLINE_LOCKS_BITS)
41 
42 static struct {
43 	struct mutex mutex;
44 	struct lock_class_key key;
45 } trampoline_locks[TRAMPOLINE_LOCKS_TABLE_SIZE];
46 
47 static struct mutex *select_trampoline_lock(struct bpf_trampoline *tr)
48 {
49 	return &trampoline_locks[hash_ptr(tr, TRAMPOLINE_LOCKS_BITS)].mutex;
50 }
51 
52 static void trampoline_lock(struct bpf_trampoline *tr)
53 {
54 	mutex_lock(select_trampoline_lock(tr));
55 }
56 
57 static void trampoline_unlock(struct bpf_trampoline *tr)
58 {
59 	mutex_unlock(select_trampoline_lock(tr));
60 }
61 
62 struct bpf_trampoline_ops {
63 	int (*register_fentry)(struct bpf_trampoline *tr, struct bpf_tramp_image *im, void *data);
64 	int (*unregister_fentry)(struct bpf_trampoline *tr, u32 orig_flags, void *data);
65 	int (*modify_fentry)(struct bpf_trampoline *tr, u32 orig_flags, struct bpf_tramp_image *im,
66 			     bool lock_direct_mutex, void *data);
67 };
68 
69 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
70 static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex,
71 				 const struct bpf_trampoline_ops *ops, void *data);
72 static const struct bpf_trampoline_ops trampoline_ops;
73 
74 #ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
75 static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
76 {
77 	struct hlist_head *head_ip;
78 	struct bpf_trampoline *tr;
79 
80 	mutex_lock(&trampoline_mutex);
81 	head_ip = &trampoline_ip_table[hash_64(ip, TRAMPOLINE_HASH_BITS)];
82 	hlist_for_each_entry(tr, head_ip, hlist_ip) {
83 		if (tr->ip == ip)
84 			goto out;
85 	}
86 	tr = NULL;
87 out:
88 	mutex_unlock(&trampoline_mutex);
89 	return tr;
90 }
91 #else
92 static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
93 {
94 	return ops->private;
95 }
96 #endif /* CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
97 
98 static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
99 				     enum ftrace_ops_cmd cmd)
100 {
101 	struct bpf_trampoline *tr;
102 	int ret = 0;
103 
104 	tr = direct_ops_ip_lookup(ops, ip);
105 	if (!tr)
106 		return -EINVAL;
107 
108 	if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
109 		/* This is called inside register_ftrace_direct_multi(), so
110 		 * trampoline's mutex is already locked.
111 		 */
112 		lockdep_assert_held_once(select_trampoline_lock(tr));
113 
114 		/* Instead of updating the trampoline here, we propagate
115 		 * -EAGAIN to register_ftrace_direct(). Then we can
116 		 * retry register_ftrace_direct() after updating the
117 		 * trampoline.
118 		 */
119 		if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
120 		    !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) {
121 			if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY))
122 				return -EBUSY;
123 
124 			tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
125 			return -EAGAIN;
126 		}
127 
128 		return 0;
129 	}
130 
131 	/* The normal locking order is
132 	 *    select_trampoline_lock(tr) => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
133 	 *
134 	 * The following two commands are called from
135 	 *
136 	 *   prepare_direct_functions_for_ipmodify
137 	 *   cleanup_direct_functions_after_ipmodify
138 	 *
139 	 * In both cases, direct_mutex is already locked. Use
140 	 * mutex_trylock(select_trampoline_lock(tr)) to avoid deadlock in race condition
141 	 * (something else holds the same pool lock).
142 	 */
143 	if (!mutex_trylock(select_trampoline_lock(tr))) {
144 		/* sleep 1 ms to make sure whatever holding select_trampoline_lock(tr)
145 		 * makes some progress.
146 		 */
147 		msleep(1);
148 		return -EAGAIN;
149 	}
150 
151 	switch (cmd) {
152 	case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER:
153 		tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
154 
155 		if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
156 		    !(tr->flags & BPF_TRAMP_F_ORIG_STACK))
157 			ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */,
158 						    &trampoline_ops, NULL);
159 		break;
160 	case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER:
161 		tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY;
162 
163 		if (tr->flags & BPF_TRAMP_F_ORIG_STACK)
164 			ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */,
165 						    &trampoline_ops, NULL);
166 		break;
167 	default:
168 		ret = -EINVAL;
169 		break;
170 	}
171 
172 	trampoline_unlock(tr);
173 	return ret;
174 }
175 #endif
176 
177 bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
178 {
179 	enum bpf_attach_type eatype = prog->expected_attach_type;
180 	enum bpf_prog_type ptype = prog->type;
181 
182 	switch (ptype) {
183 	case BPF_PROG_TYPE_TRACING:
184 		if (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
185 		    eatype == BPF_MODIFY_RETURN || eatype == BPF_TRACE_FSESSION ||
186 		    eatype == BPF_TRACE_FENTRY_MULTI || eatype == BPF_TRACE_FEXIT_MULTI ||
187 		    eatype == BPF_TRACE_FSESSION_MULTI)
188 			return true;
189 		return false;
190 	case BPF_PROG_TYPE_LSM:
191 		return eatype == BPF_LSM_MAC;
192 	default:
193 		return false;
194 	}
195 }
196 
197 void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym)
198 {
199 	ksym->start = (unsigned long) data;
200 	ksym->end = ksym->start + size;
201 }
202 
203 void bpf_image_ksym_add(struct bpf_ksym *ksym)
204 {
205 	bpf_ksym_add(ksym);
206 	perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
207 			   PAGE_SIZE, false, ksym->name);
208 }
209 
210 void bpf_image_ksym_del(struct bpf_ksym *ksym)
211 {
212 	bpf_ksym_del(ksym);
213 	perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
214 			   PAGE_SIZE, true, ksym->name);
215 }
216 
217 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
218 #ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
219 /*
220  * We have only single direct_ops which contains all the direct call
221  * sites and is the only global ftrace_ops for all trampolines.
222  *
223  * We use 'update_ftrace_direct_*' api for attachment.
224  */
225 struct ftrace_ops direct_ops = {
226 	.ops_func = bpf_tramp_ftrace_ops_func,
227 };
228 
229 static int direct_ops_alloc(struct bpf_trampoline *tr)
230 {
231 	tr->fops = &direct_ops;
232 	return 0;
233 }
234 
235 static void direct_ops_free(struct bpf_trampoline *tr) { }
236 
237 static struct ftrace_hash *hash_from_ip(struct bpf_trampoline *tr, void *ptr)
238 {
239 	unsigned long ip, addr = (unsigned long) ptr;
240 	struct ftrace_hash *hash;
241 
242 	ip = ftrace_location(tr->ip);
243 	if (!ip)
244 		return NULL;
245 	hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
246 	if (!hash)
247 		return NULL;
248 	if (bpf_trampoline_use_jmp(tr->flags))
249 		addr = ftrace_jmp_set(addr);
250 	if (!add_ftrace_hash_entry_direct(hash, ip, addr)) {
251 		free_ftrace_hash(hash);
252 		return NULL;
253 	}
254 	return hash;
255 }
256 
257 static int direct_ops_add(struct bpf_trampoline *tr, void *addr)
258 {
259 	struct ftrace_hash *hash = hash_from_ip(tr, addr);
260 	int err;
261 
262 	if (!hash)
263 		return -ENOMEM;
264 	err = update_ftrace_direct_add(tr->fops, hash);
265 	free_ftrace_hash(hash);
266 	return err;
267 }
268 
269 static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
270 {
271 	struct ftrace_hash *hash = hash_from_ip(tr, addr);
272 	int err;
273 
274 	if (!hash)
275 		return -ENOMEM;
276 	err = update_ftrace_direct_del(tr->fops, hash);
277 	free_ftrace_hash(hash);
278 	return err;
279 }
280 
281 static int direct_ops_mod(struct bpf_trampoline *tr, void *addr, bool lock_direct_mutex)
282 {
283 	struct ftrace_hash *hash = hash_from_ip(tr, addr);
284 	int err;
285 
286 	if (!hash)
287 		return -ENOMEM;
288 	err = update_ftrace_direct_mod(tr->fops, hash, lock_direct_mutex);
289 	free_ftrace_hash(hash);
290 	return err;
291 }
292 #else
293 /*
294  * We allocate ftrace_ops object for each trampoline and it contains
295  * call site specific for that trampoline.
296  *
297  * We use *_ftrace_direct api for attachment.
298  */
299 static int direct_ops_alloc(struct bpf_trampoline *tr)
300 {
301 	tr->fops = kzalloc_obj(struct ftrace_ops);
302 	if (!tr->fops)
303 		return -ENOMEM;
304 	tr->fops->private = tr;
305 	tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
306 	return 0;
307 }
308 
309 static void direct_ops_free(struct bpf_trampoline *tr)
310 {
311 	if (!tr->fops)
312 		return;
313 	ftrace_free_filter(tr->fops);
314 	kfree(tr->fops);
315 }
316 
317 static int direct_ops_add(struct bpf_trampoline *tr, void *ptr)
318 {
319 	unsigned long addr = (unsigned long) ptr;
320 	struct ftrace_ops *ops = tr->fops;
321 	int ret;
322 
323 	if (bpf_trampoline_use_jmp(tr->flags))
324 		addr = ftrace_jmp_set(addr);
325 
326 	ret = ftrace_set_filter_ip(ops, tr->ip, 0, 1);
327 	if (ret)
328 		return ret;
329 	return register_ftrace_direct(ops, addr);
330 }
331 
332 static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
333 {
334 	return unregister_ftrace_direct(tr->fops, (long)addr, false);
335 }
336 
337 static int direct_ops_mod(struct bpf_trampoline *tr, void *ptr, bool lock_direct_mutex)
338 {
339 	unsigned long addr = (unsigned long) ptr;
340 	struct ftrace_ops *ops = tr->fops;
341 
342 	if (bpf_trampoline_use_jmp(tr->flags))
343 		addr = ftrace_jmp_set(addr);
344 	if (lock_direct_mutex)
345 		return modify_ftrace_direct(ops, addr);
346 	return modify_ftrace_direct_nolock(ops, addr);
347 }
348 #endif /* CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
349 #else
350 static void direct_ops_free(struct bpf_trampoline *tr) { }
351 
352 static int direct_ops_alloc(struct bpf_trampoline *tr)
353 {
354 	return 0;
355 }
356 
357 static int direct_ops_add(struct bpf_trampoline *tr, void *addr)
358 {
359 	return -ENODEV;
360 }
361 
362 static int direct_ops_del(struct bpf_trampoline *tr, void *addr)
363 {
364 	return -ENODEV;
365 }
366 
367 static int direct_ops_mod(struct bpf_trampoline *tr, void *ptr, bool lock_direct_mutex)
368 {
369 	return -ENODEV;
370 }
371 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
372 
373 static struct bpf_trampoline *bpf_trampoline_lookup(u64 key, unsigned long ip)
374 {
375 	struct bpf_trampoline *tr;
376 	struct hlist_head *head;
377 	int i;
378 
379 	mutex_lock(&trampoline_mutex);
380 	head = &trampoline_key_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
381 	hlist_for_each_entry(tr, head, hlist_key) {
382 		if (tr->key == key) {
383 			refcount_inc(&tr->refcnt);
384 			goto out;
385 		}
386 	}
387 	tr = kzalloc_obj(*tr);
388 	if (!tr)
389 		goto out;
390 	if (direct_ops_alloc(tr)) {
391 		kfree(tr);
392 		tr = NULL;
393 		goto out;
394 	}
395 
396 	tr->key = key;
397 	tr->ip = ftrace_location(ip);
398 	INIT_HLIST_NODE(&tr->hlist_key);
399 	INIT_HLIST_NODE(&tr->hlist_ip);
400 	hlist_add_head(&tr->hlist_key, head);
401 	head = &trampoline_ip_table[hash_64(tr->ip, TRAMPOLINE_HASH_BITS)];
402 	hlist_add_head(&tr->hlist_ip, head);
403 	refcount_set(&tr->refcnt, 1);
404 	for (i = 0; i < BPF_TRAMP_MAX; i++)
405 		INIT_HLIST_HEAD(&tr->progs_hlist[i]);
406 out:
407 	mutex_unlock(&trampoline_mutex);
408 	return tr;
409 }
410 
411 static int bpf_trampoline_update_fentry(struct bpf_trampoline *tr, u32 orig_flags,
412 					void *old_addr, void *new_addr)
413 {
414 	enum bpf_text_poke_type new_t = BPF_MOD_CALL, old_t = BPF_MOD_CALL;
415 	void *ip = tr->func.addr;
416 
417 	if (!new_addr)
418 		new_t = BPF_MOD_NOP;
419 	else if (bpf_trampoline_use_jmp(tr->flags))
420 		new_t = BPF_MOD_JUMP;
421 
422 	if (!old_addr)
423 		old_t = BPF_MOD_NOP;
424 	else if (bpf_trampoline_use_jmp(orig_flags))
425 		old_t = BPF_MOD_JUMP;
426 
427 	return bpf_arch_text_poke(ip, old_t, new_t, old_addr, new_addr);
428 }
429 
430 static void bpf_tramp_image_put(struct bpf_tramp_image *im);
431 
432 static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags, void *data __maybe_unused)
433 {
434 	void *old_addr = tr->cur_image->image;
435 	int ret;
436 
437 	if (tr->func.ftrace_managed)
438 		ret = direct_ops_del(tr, old_addr);
439 	else
440 		ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr, NULL);
441 
442 	if (ret)
443 		return ret;
444 
445 	bpf_tramp_image_put(tr->cur_image);
446 	tr->cur_image = NULL;
447 	return 0;
448 }
449 
450 static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags, struct bpf_tramp_image *im,
451 			 bool lock_direct_mutex, void *data __maybe_unused)
452 {
453 	void *old_addr = tr->cur_image->image;
454 	void *new_addr = im->image;
455 	int ret;
456 
457 	if (tr->func.ftrace_managed) {
458 		ret = direct_ops_mod(tr, new_addr, lock_direct_mutex);
459 	} else {
460 		ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr,
461 						   new_addr);
462 	}
463 
464 	if (ret)
465 		return ret;
466 
467 	bpf_tramp_image_put(tr->cur_image);
468 	tr->cur_image = im;
469 	return 0;
470 }
471 
472 /* first time registering */
473 static int register_fentry(struct bpf_trampoline *tr, struct bpf_tramp_image *im,
474 			   void *data __maybe_unused)
475 {
476 	void *new_addr = im->image;
477 	void *ip = tr->func.addr;
478 	unsigned long faddr;
479 	int ret;
480 
481 	faddr = ftrace_location((unsigned long)ip);
482 	if (faddr) {
483 		if (!tr->fops)
484 			return -ENOTSUPP;
485 		tr->func.ftrace_managed = true;
486 	}
487 
488 	if (tr->func.ftrace_managed) {
489 		ret = direct_ops_add(tr, new_addr);
490 	} else {
491 		ret = bpf_trampoline_update_fentry(tr, 0, NULL, new_addr);
492 	}
493 
494 	if (ret)
495 		return ret;
496 
497 	tr->cur_image = im;
498 	return 0;
499 }
500 
501 static const struct bpf_trampoline_ops trampoline_ops = {
502 	.register_fentry   = register_fentry,
503 	.unregister_fentry = unregister_fentry,
504 	.modify_fentry     = modify_fentry,
505 };
506 
507 static struct bpf_tramp_nodes *
508 bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
509 {
510 	struct bpf_tramp_node *node, **nodes;
511 	struct bpf_tramp_nodes *tnodes;
512 	int kind;
513 
514 	*total = 0;
515 	tnodes = kzalloc_objs(*tnodes, BPF_TRAMP_MAX);
516 	if (!tnodes)
517 		return ERR_PTR(-ENOMEM);
518 
519 	for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
520 		tnodes[kind].nr_nodes = tr->progs_cnt[kind];
521 		*total += tr->progs_cnt[kind];
522 		nodes = tnodes[kind].nodes;
523 
524 		hlist_for_each_entry(node, &tr->progs_hlist[kind], tramp_hlist) {
525 			*ip_arg |= node->link->prog->call_get_func_ip;
526 			*nodes++ = node;
527 		}
528 	}
529 	return tnodes;
530 }
531 
532 static void bpf_tramp_image_free(struct bpf_tramp_image *im)
533 {
534 	bpf_image_ksym_del(&im->ksym);
535 	arch_free_bpf_trampoline(im->image, im->size);
536 	bpf_jit_uncharge_modmem(im->size);
537 	percpu_ref_exit(&im->pcref);
538 	kfree_rcu(im, rcu);
539 }
540 
541 static void __bpf_tramp_image_put_deferred(struct work_struct *work)
542 {
543 	struct bpf_tramp_image *im;
544 
545 	im = container_of(work, struct bpf_tramp_image, work);
546 	bpf_tramp_image_free(im);
547 }
548 
549 /* callback, fexit step 3 or fentry step 2 */
550 static void __bpf_tramp_image_put_rcu(struct rcu_head *rcu)
551 {
552 	struct bpf_tramp_image *im;
553 
554 	im = container_of(rcu, struct bpf_tramp_image, rcu);
555 	INIT_WORK(&im->work, __bpf_tramp_image_put_deferred);
556 	schedule_work(&im->work);
557 }
558 
559 /* callback, fexit step 2. Called after percpu_ref_kill confirms. */
560 static void __bpf_tramp_image_release(struct percpu_ref *pcref)
561 {
562 	struct bpf_tramp_image *im;
563 
564 	im = container_of(pcref, struct bpf_tramp_image, pcref);
565 	call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu);
566 }
567 
568 /* callback, fexit or fentry step 1 */
569 static void __bpf_tramp_image_put_rcu_tasks(struct rcu_head *rcu)
570 {
571 	struct bpf_tramp_image *im;
572 
573 	im = container_of(rcu, struct bpf_tramp_image, rcu);
574 	if (im->ip_after_call)
575 		/* the case of fmod_ret/fexit trampoline and CONFIG_PREEMPTION=y */
576 		percpu_ref_kill(&im->pcref);
577 	else
578 		/* the case of fentry trampoline */
579 		call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu);
580 }
581 
582 static void bpf_tramp_image_put(struct bpf_tramp_image *im)
583 {
584 	/* The trampoline image that calls original function is using:
585 	 * rcu_read_lock_trace to protect sleepable bpf progs
586 	 * rcu_read_lock to protect normal bpf progs
587 	 * percpu_ref to protect trampoline itself
588 	 * rcu tasks to protect trampoline asm not covered by percpu_ref
589 	 * (which are few asm insns before __bpf_tramp_enter and
590 	 *  after __bpf_tramp_exit)
591 	 *
592 	 * The trampoline is unreachable before bpf_tramp_image_put().
593 	 *
594 	 * First, patch the trampoline to avoid calling into fexit progs.
595 	 * The progs will be freed even if the original function is still
596 	 * executing or sleeping.
597 	 * In case of CONFIG_PREEMPT=y use call_rcu_tasks() to wait on
598 	 * first few asm instructions to execute and call into
599 	 * __bpf_tramp_enter->percpu_ref_get.
600 	 * Then use percpu_ref_kill to wait for the trampoline and the original
601 	 * function to finish.
602 	 * Then use call_rcu_tasks() to make sure few asm insns in
603 	 * the trampoline epilogue are done as well.
604 	 *
605 	 * In !PREEMPT case the task that got interrupted in the first asm
606 	 * insns won't go through an RCU quiescent state which the
607 	 * percpu_ref_kill will be waiting for. Hence the first
608 	 * call_rcu_tasks() is not necessary.
609 	 */
610 	if (im->ip_after_call) {
611 		int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_NOP,
612 					     BPF_MOD_JUMP, NULL,
613 					     im->ip_epilogue);
614 		WARN_ON(err);
615 		if (IS_ENABLED(CONFIG_TASKS_RCU))
616 			call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
617 		else
618 			percpu_ref_kill(&im->pcref);
619 		return;
620 	}
621 
622 	/* The trampoline without fexit and fmod_ret progs doesn't call original
623 	 * function and doesn't use percpu_ref.
624 	 * Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
625 	 * Then use call_rcu_tasks() to wait for the rest of trampoline asm
626 	 * and normal progs.
627 	 */
628 	call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
629 }
630 
631 static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size)
632 {
633 	struct bpf_tramp_image *im;
634 	struct bpf_ksym *ksym;
635 	void *image;
636 	int err = -ENOMEM;
637 
638 	im = kzalloc_obj(*im);
639 	if (!im)
640 		goto out;
641 
642 	err = bpf_jit_charge_modmem(size);
643 	if (err)
644 		goto out_free_im;
645 	im->size = size;
646 
647 	err = -ENOMEM;
648 	im->image = image = arch_alloc_bpf_trampoline(size);
649 	if (!image)
650 		goto out_uncharge;
651 
652 	err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL);
653 	if (err)
654 		goto out_free_image;
655 
656 	ksym = &im->ksym;
657 	INIT_LIST_HEAD_RCU(&ksym->lnode);
658 	snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key);
659 	bpf_image_ksym_init(image, size, ksym);
660 	bpf_image_ksym_add(ksym);
661 	return im;
662 
663 out_free_image:
664 	arch_free_bpf_trampoline(im->image, im->size);
665 out_uncharge:
666 	bpf_jit_uncharge_modmem(size);
667 out_free_im:
668 	kfree(im);
669 out:
670 	return ERR_PTR(err);
671 }
672 
673 static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex,
674 				 const struct bpf_trampoline_ops *ops, void *data)
675 {
676 	struct bpf_tramp_image *im;
677 	struct bpf_tramp_nodes *tnodes;
678 	u32 orig_flags = tr->flags;
679 	bool ip_arg = false;
680 	int err, total, size;
681 
682 	tnodes = bpf_trampoline_get_progs(tr, &total, &ip_arg);
683 	if (IS_ERR(tnodes))
684 		return PTR_ERR(tnodes);
685 
686 	if (total == 0) {
687 		err = ops->unregister_fentry(tr, orig_flags, data);
688 		goto out;
689 	}
690 
691 	/* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */
692 	tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX);
693 
694 	if (tnodes[BPF_TRAMP_FEXIT].nr_nodes ||
695 	    tnodes[BPF_TRAMP_MODIFY_RETURN].nr_nodes) {
696 		/* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME
697 		 * should not be set together.
698 		 */
699 		tr->flags |= BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
700 	} else {
701 		tr->flags |= BPF_TRAMP_F_RESTORE_REGS;
702 	}
703 
704 	if (ip_arg)
705 		tr->flags |= BPF_TRAMP_F_IP_ARG;
706 
707 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
708 again:
709 	if (tr->flags & BPF_TRAMP_F_CALL_ORIG) {
710 		if (tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) {
711 			/* The BPF_TRAMP_F_SKIP_FRAME can be cleared in the
712 			 * first try, reset it in the second try.
713 			 */
714 			tr->flags |= BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SKIP_FRAME;
715 		} else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_JMP)) {
716 			/* Use "jmp" instead of "call" for the trampoline
717 			 * in the origin call case, and we don't need to
718 			 * skip the frame.
719 			 */
720 			tr->flags &= ~BPF_TRAMP_F_SKIP_FRAME;
721 		}
722 	}
723 #endif
724 
725 	size = arch_bpf_trampoline_size(&tr->func.model, tr->flags,
726 					tnodes, tr->func.addr);
727 	if (size < 0) {
728 		err = size;
729 		goto out;
730 	}
731 
732 	if (size > PAGE_SIZE) {
733 		err = -E2BIG;
734 		goto out;
735 	}
736 
737 	im = bpf_tramp_image_alloc(tr->key, size);
738 	if (IS_ERR(im)) {
739 		err = PTR_ERR(im);
740 		goto out;
741 	}
742 
743 	err = arch_prepare_bpf_trampoline(im, im->image, im->image + size,
744 					  &tr->func.model, tr->flags, tnodes,
745 					  tr->func.addr);
746 	if (err < 0)
747 		goto out_free;
748 
749 	err = arch_protect_bpf_trampoline(im->image, im->size);
750 	if (err)
751 		goto out_free;
752 
753 	if (tr->cur_image)
754 		/* progs already running at this address */
755 		err = ops->modify_fentry(tr, orig_flags, im, lock_direct_mutex, data);
756 	else
757 		/* first time registering */
758 		err = ops->register_fentry(tr, im, data);
759 
760 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
761 	if (err == -EAGAIN) {
762 		/* -EAGAIN from bpf_tramp_ftrace_ops_func. Now
763 		 * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the
764 		 * trampoline again, and retry register.
765 		 */
766 		bpf_tramp_image_free(im);
767 		goto again;
768 	}
769 #endif
770 
771 out_free:
772 	if (err)
773 		bpf_tramp_image_free(im);
774 out:
775 	/* If any error happens, restore previous flags */
776 	if (err)
777 		tr->flags = orig_flags;
778 	kfree(tnodes);
779 	return err;
780 }
781 
782 static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
783 {
784 	switch (prog->expected_attach_type) {
785 	case BPF_TRACE_FENTRY:
786 	case BPF_TRACE_FENTRY_MULTI:
787 		return BPF_TRAMP_FENTRY;
788 	case BPF_MODIFY_RETURN:
789 		return BPF_TRAMP_MODIFY_RETURN;
790 	case BPF_TRACE_FEXIT:
791 	case BPF_TRACE_FEXIT_MULTI:
792 		return BPF_TRAMP_FEXIT;
793 	case BPF_TRACE_FSESSION:
794 	case BPF_TRACE_FSESSION_MULTI:
795 		return BPF_TRAMP_FSESSION;
796 	case BPF_LSM_MAC:
797 		if (!prog->aux->attach_func_proto->type)
798 			/* The function returns void, we cannot modify its
799 			 * return value.
800 			 */
801 			return BPF_TRAMP_FEXIT;
802 		else
803 			return BPF_TRAMP_MODIFY_RETURN;
804 	default:
805 		return BPF_TRAMP_REPLACE;
806 	}
807 }
808 
809 static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog)
810 {
811 	struct bpf_prog_aux *aux = tgt_prog->aux;
812 
813 	guard(mutex)(&aux->ext_mutex);
814 	if (aux->prog_array_member_cnt)
815 		/* Program extensions can not extend target prog when the target
816 		 * prog has been updated to any prog_array map as tail callee.
817 		 * It's to prevent a potential infinite loop like:
818 		 * tgt prog entry -> tgt prog subprog -> freplace prog entry
819 		 * --tailcall-> tgt prog entry.
820 		 */
821 		return -EBUSY;
822 
823 	aux->is_extended = true;
824 	return 0;
825 }
826 
827 static struct bpf_tramp_node *fsession_exit(struct bpf_tramp_node *node)
828 {
829 	if (node->link->type == BPF_LINK_TYPE_TRACING) {
830 		struct bpf_tracing_link *link;
831 
832 		link = container_of(node->link, struct bpf_tracing_link, link.link);
833 		return &link->fexit;
834 	} else if (node->link->type == BPF_LINK_TYPE_TRACING_MULTI) {
835 		struct bpf_tracing_multi_link *link;
836 		struct bpf_tracing_multi_node *mnode;
837 
838 		link = container_of(node->link, struct bpf_tracing_multi_link, link);
839 		mnode = container_of(node, struct bpf_tracing_multi_node, node);
840 		return &link->fexits[mnode - link->nodes];
841 	}
842 	return NULL;
843 }
844 
845 static int bpf_trampoline_add_prog(struct bpf_trampoline *tr,
846 				   struct bpf_tramp_node *node,
847 				   int cnt)
848 {
849 	enum bpf_tramp_prog_type kind;
850 	struct bpf_tramp_node *node_existing, *fexit;
851 	struct hlist_head *prog_list;
852 
853 	kind = bpf_attach_type_to_tramp(node->link->prog);
854 	if (kind == BPF_TRAMP_FSESSION) {
855 		prog_list = &tr->progs_hlist[BPF_TRAMP_FENTRY];
856 		cnt++;
857 	} else {
858 		prog_list = &tr->progs_hlist[kind];
859 	}
860 	if (cnt >= BPF_MAX_TRAMP_LINKS)
861 		return -E2BIG;
862 	if (!hlist_unhashed(&node->tramp_hlist))
863 		/* prog already linked */
864 		return -EBUSY;
865 	hlist_for_each_entry(node_existing, prog_list, tramp_hlist) {
866 		if (node_existing->link->prog != node->link->prog)
867 			continue;
868 		/* prog already linked */
869 		return -EBUSY;
870 	}
871 
872 	hlist_add_head(&node->tramp_hlist, prog_list);
873 	if (kind == BPF_TRAMP_FSESSION) {
874 		tr->progs_cnt[BPF_TRAMP_FENTRY]++;
875 		fexit = fsession_exit(node);
876 		if (WARN_ON_ONCE(!fexit))
877 			return -EINVAL;
878 		hlist_add_head(&fexit->tramp_hlist, &tr->progs_hlist[BPF_TRAMP_FEXIT]);
879 		tr->progs_cnt[BPF_TRAMP_FEXIT]++;
880 	} else {
881 		tr->progs_cnt[kind]++;
882 	}
883 	return 0;
884 }
885 
886 static void bpf_trampoline_remove_prog(struct bpf_trampoline *tr,
887 				       struct bpf_tramp_node *node)
888 {
889 	enum bpf_tramp_prog_type kind;
890 	struct bpf_tramp_node *fexit;
891 
892 	kind = bpf_attach_type_to_tramp(node->link->prog);
893 	if (kind == BPF_TRAMP_FSESSION) {
894 		fexit = fsession_exit(node);
895 		if (WARN_ON_ONCE(!fexit))
896 			return;
897 		hlist_del_init(&fexit->tramp_hlist);
898 		tr->progs_cnt[BPF_TRAMP_FEXIT]--;
899 		kind = BPF_TRAMP_FENTRY;
900 	}
901 	hlist_del_init(&node->tramp_hlist);
902 	tr->progs_cnt[kind]--;
903 }
904 
905 static int __bpf_trampoline_link_prog(struct bpf_tramp_node *node,
906 				      struct bpf_trampoline *tr,
907 				      struct bpf_prog *tgt_prog,
908 				      const struct bpf_trampoline_ops *ops,
909 				      void *data)
910 {
911 	enum bpf_tramp_prog_type kind;
912 	int err = 0;
913 	int cnt = 0, i;
914 
915 	kind = bpf_attach_type_to_tramp(node->link->prog);
916 	if (tr->extension_prog)
917 		/* cannot attach fentry/fexit if extension prog is attached.
918 		 * cannot overwrite extension prog either.
919 		 */
920 		return -EBUSY;
921 
922 	for (i = 0; i < BPF_TRAMP_MAX; i++)
923 		cnt += tr->progs_cnt[i];
924 
925 	if (kind == BPF_TRAMP_REPLACE) {
926 		/* Cannot attach extension if fentry/fexit are in use. */
927 		if (cnt)
928 			return -EBUSY;
929 		err = bpf_freplace_check_tgt_prog(tgt_prog);
930 		if (err)
931 			return err;
932 		tr->extension_prog = node->link->prog;
933 		return bpf_arch_text_poke(tr->func.addr, BPF_MOD_NOP,
934 					  BPF_MOD_JUMP, NULL,
935 					  node->link->prog->bpf_func);
936 	}
937 	err = bpf_trampoline_add_prog(tr, node, cnt);
938 	if (err)
939 		return err;
940 	err = bpf_trampoline_update(tr, true /* lock_direct_mutex */, ops, data);
941 	if (err)
942 		bpf_trampoline_remove_prog(tr, node);
943 	return err;
944 }
945 
946 int bpf_trampoline_link_prog(struct bpf_tramp_node *node,
947 			     struct bpf_trampoline *tr,
948 			     struct bpf_prog *tgt_prog)
949 {
950 	int err;
951 
952 	trampoline_lock(tr);
953 	err = __bpf_trampoline_link_prog(node, tr, tgt_prog, &trampoline_ops, NULL);
954 	trampoline_unlock(tr);
955 	return err;
956 }
957 
958 static int __bpf_trampoline_unlink_prog(struct bpf_tramp_node *node,
959 					struct bpf_trampoline *tr,
960 					struct bpf_prog *tgt_prog,
961 					const struct bpf_trampoline_ops *ops,
962 					void *data)
963 {
964 	enum bpf_tramp_prog_type kind;
965 	int err;
966 
967 	kind = bpf_attach_type_to_tramp(node->link->prog);
968 	if (kind == BPF_TRAMP_REPLACE) {
969 		WARN_ON_ONCE(!tr->extension_prog);
970 		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
971 					 BPF_MOD_NOP,
972 					 tr->extension_prog->bpf_func, NULL);
973 		tr->extension_prog = NULL;
974 		guard(mutex)(&tgt_prog->aux->ext_mutex);
975 		tgt_prog->aux->is_extended = false;
976 		return err;
977 	}
978 	bpf_trampoline_remove_prog(tr, node);
979 	return bpf_trampoline_update(tr, true /* lock_direct_mutex */, ops, data);
980 }
981 
982 /* bpf_trampoline_unlink_prog() should never fail. */
983 int bpf_trampoline_unlink_prog(struct bpf_tramp_node *node,
984 			       struct bpf_trampoline *tr,
985 			       struct bpf_prog *tgt_prog)
986 {
987 	int err;
988 
989 	trampoline_lock(tr);
990 	err = __bpf_trampoline_unlink_prog(node, tr, tgt_prog, &trampoline_ops, NULL);
991 	trampoline_unlock(tr);
992 	return err;
993 }
994 
995 #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
996 static void bpf_shim_tramp_link_release(struct bpf_link *link)
997 {
998 	struct bpf_shim_tramp_link *shim_link =
999 		container_of(link, struct bpf_shim_tramp_link, link.link);
1000 
1001 	/* paired with 'shim_link->trampoline = tr' in bpf_trampoline_link_cgroup_shim */
1002 	if (!shim_link->trampoline)
1003 		return;
1004 
1005 	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link.node, shim_link->trampoline, NULL));
1006 	bpf_trampoline_put(shim_link->trampoline);
1007 }
1008 
1009 static void bpf_shim_tramp_link_dealloc(struct bpf_link *link)
1010 {
1011 	struct bpf_shim_tramp_link *shim_link =
1012 		container_of(link, struct bpf_shim_tramp_link, link.link);
1013 
1014 	kfree(shim_link);
1015 }
1016 
1017 static const struct bpf_link_ops bpf_shim_tramp_link_lops = {
1018 	.release = bpf_shim_tramp_link_release,
1019 	.dealloc = bpf_shim_tramp_link_dealloc,
1020 };
1021 
1022 static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog,
1023 						     bpf_func_t bpf_func,
1024 						     int cgroup_atype,
1025 						     enum bpf_attach_type attach_type)
1026 {
1027 	struct bpf_shim_tramp_link *shim_link = NULL;
1028 	struct bpf_prog *p;
1029 
1030 	shim_link = kzalloc_obj(*shim_link, GFP_USER);
1031 	if (!shim_link)
1032 		return NULL;
1033 
1034 	p = bpf_prog_alloc(1, 0);
1035 	if (!p) {
1036 		kfree(shim_link);
1037 		return NULL;
1038 	}
1039 
1040 	p->jited = false;
1041 	p->bpf_func = bpf_func;
1042 
1043 	p->aux->cgroup_atype = cgroup_atype;
1044 	p->aux->attach_func_proto = prog->aux->attach_func_proto;
1045 	p->aux->attach_btf_id = prog->aux->attach_btf_id;
1046 	p->aux->attach_btf = prog->aux->attach_btf;
1047 	btf_get(p->aux->attach_btf);
1048 	p->type = BPF_PROG_TYPE_LSM;
1049 	p->expected_attach_type = BPF_LSM_MAC;
1050 	bpf_prog_inc(p);
1051 	bpf_tramp_link_init(&shim_link->link, BPF_LINK_TYPE_UNSPEC,
1052 		      &bpf_shim_tramp_link_lops, p, attach_type, 0);
1053 	bpf_cgroup_atype_get(p->aux->attach_btf_id, cgroup_atype);
1054 
1055 	return shim_link;
1056 }
1057 
1058 static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr,
1059 						    bpf_func_t bpf_func)
1060 {
1061 	struct bpf_tramp_node *node;
1062 	int kind;
1063 
1064 	for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
1065 		hlist_for_each_entry(node, &tr->progs_hlist[kind], tramp_hlist) {
1066 			struct bpf_prog *p = node->link->prog;
1067 
1068 			if (p->bpf_func == bpf_func)
1069 				return container_of(node, struct bpf_shim_tramp_link, link.node);
1070 		}
1071 	}
1072 
1073 	return NULL;
1074 }
1075 
1076 int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
1077 				    int cgroup_atype,
1078 				    enum bpf_attach_type attach_type)
1079 {
1080 	struct bpf_shim_tramp_link *shim_link = NULL;
1081 	struct bpf_attach_target_info tgt_info = {};
1082 	struct bpf_trampoline *tr;
1083 	bpf_func_t bpf_func;
1084 	u64 key;
1085 	int err;
1086 
1087 	err = bpf_check_attach_target(NULL, prog, NULL,
1088 				      prog->aux->attach_btf_id,
1089 				      &tgt_info);
1090 	if (err)
1091 		return err;
1092 
1093 	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
1094 					 prog->aux->attach_btf_id);
1095 
1096 	bpf_lsm_find_cgroup_shim(prog, &bpf_func);
1097 	tr = bpf_trampoline_get(key, &tgt_info);
1098 	if (!tr)
1099 		return  -ENOMEM;
1100 
1101 	trampoline_lock(tr);
1102 
1103 	shim_link = cgroup_shim_find(tr, bpf_func);
1104 	if (shim_link && !IS_ERR(bpf_link_inc_not_zero(&shim_link->link.link))) {
1105 		/* Reusing existing shim attached by the other program. */
1106 		trampoline_unlock(tr);
1107 		bpf_trampoline_put(tr); /* bpf_trampoline_get above */
1108 		return 0;
1109 	}
1110 
1111 	/* Allocate and install new shim. */
1112 
1113 	shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype, attach_type);
1114 	if (!shim_link) {
1115 		err = -ENOMEM;
1116 		goto err;
1117 	}
1118 
1119 	err = __bpf_trampoline_link_prog(&shim_link->link.node, tr, NULL, &trampoline_ops, NULL);
1120 	if (err)
1121 		goto err;
1122 
1123 	shim_link->trampoline = tr;
1124 	/* note, we're still holding tr refcnt from above */
1125 
1126 	trampoline_unlock(tr);
1127 
1128 	return 0;
1129 err:
1130 	trampoline_unlock(tr);
1131 
1132 	if (shim_link)
1133 		bpf_link_put(&shim_link->link.link);
1134 
1135 	/* have to release tr while _not_ holding pool mutex for trampoline */
1136 	bpf_trampoline_put(tr); /* bpf_trampoline_get above */
1137 
1138 	return err;
1139 }
1140 
1141 void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
1142 {
1143 	struct bpf_shim_tramp_link *shim_link = NULL;
1144 	struct bpf_trampoline *tr;
1145 	bpf_func_t bpf_func;
1146 	u64 key;
1147 
1148 	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
1149 					 prog->aux->attach_btf_id);
1150 
1151 	bpf_lsm_find_cgroup_shim(prog, &bpf_func);
1152 	tr = bpf_trampoline_lookup(key, 0);
1153 	if (WARN_ON_ONCE(!tr))
1154 		return;
1155 
1156 	trampoline_lock(tr);
1157 	shim_link = cgroup_shim_find(tr, bpf_func);
1158 	trampoline_unlock(tr);
1159 
1160 	if (shim_link)
1161 		bpf_link_put(&shim_link->link.link);
1162 
1163 	bpf_trampoline_put(tr); /* bpf_trampoline_lookup above */
1164 }
1165 #endif
1166 
1167 struct bpf_trampoline *bpf_trampoline_get(u64 key,
1168 					  struct bpf_attach_target_info *tgt_info)
1169 {
1170 	struct bpf_trampoline *tr;
1171 
1172 	tr = bpf_trampoline_lookup(key, tgt_info->tgt_addr);
1173 	if (!tr)
1174 		return NULL;
1175 
1176 	trampoline_lock(tr);
1177 	if (tr->func.addr)
1178 		goto out;
1179 
1180 	memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel));
1181 	tr->func.addr = (void *)tgt_info->tgt_addr;
1182 out:
1183 	trampoline_unlock(tr);
1184 	return tr;
1185 }
1186 
1187 void bpf_trampoline_put(struct bpf_trampoline *tr)
1188 {
1189 	int i;
1190 
1191 	if (!tr)
1192 		return;
1193 	mutex_lock(&trampoline_mutex);
1194 	if (!refcount_dec_and_test(&tr->refcnt))
1195 		goto out;
1196 
1197 	for (i = 0; i < BPF_TRAMP_MAX; i++)
1198 		if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i])))
1199 			goto out;
1200 
1201 	/* This code will be executed even when the last bpf_tramp_image
1202 	 * is alive. All progs are detached from the trampoline and the
1203 	 * trampoline image is patched with jmp into epilogue to skip
1204 	 * fexit progs. The fentry-only trampoline will be freed via
1205 	 * multiple rcu callbacks.
1206 	 */
1207 	hlist_del(&tr->hlist_key);
1208 	hlist_del(&tr->hlist_ip);
1209 	direct_ops_free(tr);
1210 	kfree(tr);
1211 out:
1212 	mutex_unlock(&trampoline_mutex);
1213 }
1214 
1215 #define NO_START_TIME 1
1216 static __always_inline u64 notrace bpf_prog_start_time(void)
1217 {
1218 	u64 start = NO_START_TIME;
1219 
1220 	if (static_branch_unlikely(&bpf_stats_enabled_key)) {
1221 		start = sched_clock();
1222 		if (unlikely(!start))
1223 			start = NO_START_TIME;
1224 	}
1225 	return start;
1226 }
1227 
1228 /* The logic is similar to bpf_prog_run(), but with an explicit
1229  * rcu_read_lock() and migrate_disable() which are required
1230  * for the trampoline. The macro is split into
1231  * call __bpf_prog_enter
1232  * call prog->bpf_func
1233  * call __bpf_prog_exit
1234  *
1235  * __bpf_prog_enter returns:
1236  * 0 - skip execution of the bpf prog
1237  * 1 - execute bpf prog
1238  * [2..MAX_U64] - execute bpf prog and record execution time.
1239  *     This is start time.
1240  */
1241 static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
1242 	__acquires(RCU)
1243 {
1244 	rcu_read_lock_dont_migrate();
1245 
1246 	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
1247 
1248 	if (unlikely(!bpf_prog_get_recursion_context(prog))) {
1249 		bpf_prog_inc_misses_counter(prog);
1250 		if (prog->aux->recursion_detected)
1251 			prog->aux->recursion_detected(prog);
1252 		return 0;
1253 	}
1254 	return bpf_prog_start_time();
1255 }
1256 
1257 static void notrace __update_prog_stats(struct bpf_prog *prog, u64 start)
1258 {
1259 	struct bpf_prog_stats *stats;
1260 	unsigned long flags;
1261 	u64 duration;
1262 
1263 	/*
1264 	 * static_key could be enabled in __bpf_prog_enter* and disabled in
1265 	 * __bpf_prog_exit*. And vice versa. Check that 'start' is valid.
1266 	 */
1267 	if (start <= NO_START_TIME)
1268 		return;
1269 
1270 	duration = sched_clock() - start;
1271 	stats = this_cpu_ptr(prog->stats);
1272 	flags = u64_stats_update_begin_irqsave(&stats->syncp);
1273 	u64_stats_inc(&stats->cnt);
1274 	u64_stats_add(&stats->nsecs, duration);
1275 	u64_stats_update_end_irqrestore(&stats->syncp, flags);
1276 }
1277 
1278 static __always_inline void notrace update_prog_stats(struct bpf_prog *prog,
1279 						      u64 start)
1280 {
1281 	if (static_branch_unlikely(&bpf_stats_enabled_key))
1282 		__update_prog_stats(prog, start);
1283 }
1284 
1285 static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start,
1286 					  struct bpf_tramp_run_ctx *run_ctx)
1287 	__releases(RCU)
1288 {
1289 	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
1290 
1291 	update_prog_stats(prog, start);
1292 	bpf_prog_put_recursion_context(prog);
1293 	rcu_read_unlock_migrate();
1294 }
1295 
1296 static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
1297 					       struct bpf_tramp_run_ctx *run_ctx)
1298 	__acquires(RCU)
1299 {
1300 	/* Runtime stats are exported via actual BPF_LSM_CGROUP
1301 	 * programs, not the shims.
1302 	 */
1303 	rcu_read_lock_dont_migrate();
1304 
1305 	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
1306 
1307 	return NO_START_TIME;
1308 }
1309 
1310 static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
1311 					       struct bpf_tramp_run_ctx *run_ctx)
1312 	__releases(RCU)
1313 {
1314 	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
1315 
1316 	rcu_read_unlock_migrate();
1317 }
1318 
1319 u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
1320 					     struct bpf_tramp_run_ctx *run_ctx)
1321 {
1322 	rcu_read_lock_trace();
1323 	migrate_disable();
1324 	might_fault();
1325 
1326 	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
1327 
1328 	if (unlikely(!bpf_prog_get_recursion_context(prog))) {
1329 		bpf_prog_inc_misses_counter(prog);
1330 		if (prog->aux->recursion_detected)
1331 			prog->aux->recursion_detected(prog);
1332 		return 0;
1333 	}
1334 	return bpf_prog_start_time();
1335 }
1336 
1337 void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
1338 					     struct bpf_tramp_run_ctx *run_ctx)
1339 {
1340 	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
1341 
1342 	update_prog_stats(prog, start);
1343 	bpf_prog_put_recursion_context(prog);
1344 	migrate_enable();
1345 	rcu_read_unlock_trace();
1346 }
1347 
1348 static u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog,
1349 					      struct bpf_tramp_run_ctx *run_ctx)
1350 {
1351 	rcu_read_lock_trace();
1352 	migrate_disable();
1353 	might_fault();
1354 
1355 	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
1356 
1357 	return bpf_prog_start_time();
1358 }
1359 
1360 static void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
1361 					      struct bpf_tramp_run_ctx *run_ctx)
1362 {
1363 	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
1364 
1365 	update_prog_stats(prog, start);
1366 	migrate_enable();
1367 	rcu_read_unlock_trace();
1368 }
1369 
1370 static u64 notrace __bpf_prog_enter(struct bpf_prog *prog,
1371 				    struct bpf_tramp_run_ctx *run_ctx)
1372 	__acquires(RCU)
1373 {
1374 	rcu_read_lock_dont_migrate();
1375 
1376 	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
1377 
1378 	return bpf_prog_start_time();
1379 }
1380 
1381 static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start,
1382 				    struct bpf_tramp_run_ctx *run_ctx)
1383 	__releases(RCU)
1384 {
1385 	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
1386 
1387 	update_prog_stats(prog, start);
1388 	rcu_read_unlock_migrate();
1389 }
1390 
1391 void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr)
1392 {
1393 	percpu_ref_get(&tr->pcref);
1394 }
1395 
1396 void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
1397 {
1398 	percpu_ref_put(&tr->pcref);
1399 }
1400 
1401 bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog)
1402 {
1403 	bool sleepable = prog->sleepable;
1404 
1405 	if (bpf_prog_check_recur(prog))
1406 		return sleepable ? __bpf_prog_enter_sleepable_recur :
1407 			__bpf_prog_enter_recur;
1408 
1409 	if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
1410 	    prog->expected_attach_type == BPF_LSM_CGROUP)
1411 		return __bpf_prog_enter_lsm_cgroup;
1412 
1413 	return sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter;
1414 }
1415 
1416 bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog)
1417 {
1418 	bool sleepable = prog->sleepable;
1419 
1420 	if (bpf_prog_check_recur(prog))
1421 		return sleepable ? __bpf_prog_exit_sleepable_recur :
1422 			__bpf_prog_exit_recur;
1423 
1424 	if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
1425 	    prog->expected_attach_type == BPF_LSM_CGROUP)
1426 		return __bpf_prog_exit_lsm_cgroup;
1427 
1428 	return sleepable ? __bpf_prog_exit_sleepable : __bpf_prog_exit;
1429 }
1430 
1431 int __weak
1432 arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
1433 			    const struct btf_func_model *m, u32 flags,
1434 			    struct bpf_tramp_nodes *tnodes,
1435 			    void *func_addr)
1436 {
1437 	return -ENOTSUPP;
1438 }
1439 
1440 void * __weak arch_alloc_bpf_trampoline(unsigned int size)
1441 {
1442 	void *image;
1443 
1444 	if (WARN_ON_ONCE(size > PAGE_SIZE))
1445 		return NULL;
1446 	image = bpf_jit_alloc_exec(PAGE_SIZE);
1447 	if (image)
1448 		set_vm_flush_reset_perms(image);
1449 	return image;
1450 }
1451 
1452 void __weak arch_free_bpf_trampoline(void *image, unsigned int size)
1453 {
1454 	WARN_ON_ONCE(size > PAGE_SIZE);
1455 	/* bpf_jit_free_exec doesn't need "size", but
1456 	 * bpf_prog_pack_free() needs it.
1457 	 */
1458 	bpf_jit_free_exec(image);
1459 }
1460 
1461 int __weak arch_protect_bpf_trampoline(void *image, unsigned int size)
1462 {
1463 	WARN_ON_ONCE(size > PAGE_SIZE);
1464 	return set_memory_rox((long)image, 1);
1465 }
1466 
1467 int __weak arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
1468 				    struct bpf_tramp_nodes *tnodes, void *func_addr)
1469 {
1470 	return -ENOTSUPP;
1471 }
1472 
1473 #if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) && \
1474     defined(CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS) && \
1475     defined(CONFIG_BPF_SYSCALL)
1476 
1477 static void trampoline_lock_all(void)
1478 {
1479 	int i;
1480 
1481 	for (i = 0; i < TRAMPOLINE_LOCKS_TABLE_SIZE; i++)
1482 		mutex_lock(&trampoline_locks[i].mutex);
1483 }
1484 
1485 static void trampoline_unlock_all(void)
1486 {
1487 	int i;
1488 
1489 	for (i = 0; i < TRAMPOLINE_LOCKS_TABLE_SIZE; i++)
1490 		mutex_unlock(&trampoline_locks[i].mutex);
1491 }
1492 
1493 static void remove_tracing_multi_data(struct bpf_tracing_multi_data *data)
1494 {
1495 	ftrace_hash_remove(data->reg);
1496 	ftrace_hash_remove(data->unreg);
1497 	ftrace_hash_remove(data->modify);
1498 }
1499 
1500 static void clear_tracing_multi_data(struct bpf_tracing_multi_data *data)
1501 {
1502 	remove_tracing_multi_data(data);
1503 
1504 	free_ftrace_hash(data->reg);
1505 	free_ftrace_hash(data->unreg);
1506 	free_ftrace_hash(data->modify);
1507 }
1508 
1509 static int init_tracing_multi_data(struct bpf_tracing_multi_data *data)
1510 {
1511 	data->reg    = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
1512 	data->unreg  = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
1513 	data->modify = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
1514 
1515 	if (!data->reg || !data->unreg || !data->modify) {
1516 		clear_tracing_multi_data(data);
1517 		return -ENOMEM;
1518 	}
1519 	return 0;
1520 }
1521 
1522 static void ftrace_hash_add(struct ftrace_hash *hash, struct ftrace_func_entry *entry,
1523 			    unsigned long ip, unsigned long direct)
1524 {
1525 	entry->ip = ip;
1526 	entry->direct = direct;
1527 	add_ftrace_hash_entry(hash, entry);
1528 }
1529 
1530 static int register_fentry_multi(struct bpf_trampoline *tr, struct bpf_tramp_image *im, void *ptr)
1531 {
1532 	unsigned long addr = (unsigned long) im->image;
1533 	unsigned long ip = ftrace_location(tr->ip);
1534 	struct bpf_tracing_multi_data *data = ptr;
1535 
1536 	if (bpf_trampoline_use_jmp(tr->flags))
1537 		addr = ftrace_jmp_set(addr);
1538 
1539 	ftrace_hash_add(data->reg, data->entry, ip, addr);
1540 	tr->cur_image = im;
1541 	return 0;
1542 }
1543 
1544 static int unregister_fentry_multi(struct bpf_trampoline *tr, u32 orig_flags, void *ptr)
1545 {
1546 	unsigned long addr = (unsigned long) tr->cur_image->image;
1547 	unsigned long ip = ftrace_location(tr->ip);
1548 	struct bpf_tracing_multi_data *data = ptr;
1549 
1550 	if (bpf_trampoline_use_jmp(tr->flags))
1551 		addr = ftrace_jmp_set(addr);
1552 
1553 	ftrace_hash_add(data->unreg, data->entry, ip, addr);
1554 	tr->cur_image = NULL;
1555 	return 0;
1556 }
1557 
1558 static int modify_fentry_multi(struct bpf_trampoline *tr, u32 orig_flags, struct bpf_tramp_image *im,
1559 			       bool lock_direct_mutex, void *ptr)
1560 {
1561 	unsigned long addr = (unsigned long) im->image;
1562 	unsigned long ip = ftrace_location(tr->ip);
1563 	struct bpf_tracing_multi_data *data = ptr;
1564 
1565 	if (bpf_trampoline_use_jmp(tr->flags))
1566 		addr = ftrace_jmp_set(addr);
1567 
1568 	ftrace_hash_add(data->modify, data->entry, ip, addr);
1569 	tr->cur_image = im;
1570 	return 0;
1571 }
1572 
1573 static const struct bpf_trampoline_ops trampoline_multi_ops = {
1574 	.register_fentry   = register_fentry_multi,
1575 	.unregister_fentry = unregister_fentry_multi,
1576 	.modify_fentry     = modify_fentry_multi,
1577 };
1578 
1579 static void bpf_trampoline_multi_attach_init(struct bpf_trampoline *tr)
1580 {
1581 	tr->multi_attach.old_image = tr->cur_image;
1582 	tr->multi_attach.old_flags = tr->flags;
1583 }
1584 
1585 static void bpf_trampoline_multi_attach_free(struct bpf_trampoline *tr)
1586 {
1587 	if (tr->multi_attach.old_image)
1588 		bpf_tramp_image_put(tr->multi_attach.old_image);
1589 
1590 	tr->multi_attach.old_image = NULL;
1591 	tr->multi_attach.old_flags = 0;
1592 }
1593 
1594 static void bpf_trampoline_multi_attach_rollback(struct bpf_trampoline *tr)
1595 {
1596 	if (tr->cur_image)
1597 		bpf_tramp_image_put(tr->cur_image);
1598 	tr->cur_image = tr->multi_attach.old_image;
1599 	tr->flags = tr->multi_attach.old_flags;
1600 
1601 	tr->multi_attach.old_image = NULL;
1602 	tr->multi_attach.old_flags = 0;
1603 }
1604 
1605 #define for_each_mnode_cnt(mnode, link, cnt) \
1606 	for (i = 0, mnode = &link->nodes[i]; i < cnt; i++, mnode = &link->nodes[i])
1607 
1608 #define for_each_mnode(mnode, link) \
1609 	for_each_mnode_cnt(mnode, link, link->nodes_cnt)
1610 
1611 int bpf_trampoline_multi_attach(struct bpf_prog *prog, u32 *ids,
1612 				struct bpf_tracing_multi_link *link)
1613 {
1614 	struct bpf_tracing_multi_data *data = &link->data;
1615 	struct bpf_attach_target_info tgt_info = {};
1616 	struct btf *btf = prog->aux->attach_btf;
1617 	struct bpf_tracing_multi_node *mnode;
1618 	struct bpf_trampoline *tr;
1619 	int i, err, rollback_cnt;
1620 	u64 key;
1621 
1622 	for_each_mnode(mnode, link) {
1623 		rollback_cnt = i;
1624 
1625 		err = bpf_check_attach_btf_id_multi(btf, prog, ids[i], &tgt_info);
1626 		if (err)
1627 			goto rollback_put;
1628 
1629 		key = bpf_trampoline_compute_key(NULL, btf, ids[i]);
1630 
1631 		tr = bpf_trampoline_get(key, &tgt_info);
1632 		if (!tr) {
1633 			err = -ENOMEM;
1634 			goto rollback_put;
1635 		}
1636 
1637 		mnode->trampoline = tr;
1638 		mnode->node.link = &link->link;
1639 		mnode->node.cookie = link->cookies ? link->cookies[i] : 0;
1640 
1641 		if (prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) {
1642 			link->fexits[i].link = &link->link;
1643 			link->fexits[i].cookie = link->cookies ? link->cookies[i] : 0;
1644 		}
1645 
1646 		cond_resched();
1647 	}
1648 
1649 	err = init_tracing_multi_data(data);
1650 	if (err) {
1651 		rollback_cnt = link->nodes_cnt;
1652 		goto rollback_put;
1653 	}
1654 
1655 	trampoline_lock_all();
1656 
1657 	for_each_mnode(mnode, link) {
1658 		bpf_trampoline_multi_attach_init(mnode->trampoline);
1659 
1660 		data->entry = &mnode->entry;
1661 		err = __bpf_trampoline_link_prog(&mnode->node, mnode->trampoline, NULL,
1662 						 &trampoline_multi_ops, data);
1663 		if (err) {
1664 			rollback_cnt = i;
1665 			goto rollback_unlink;
1666 		}
1667 	}
1668 
1669 	rollback_cnt = link->nodes_cnt;
1670 	if (ftrace_hash_count(data->reg)) {
1671 		err = update_ftrace_direct_add(&direct_ops, data->reg);
1672 		if (err)
1673 			goto rollback_unlink;
1674 	}
1675 
1676 	if (ftrace_hash_count(data->modify)) {
1677 		err = update_ftrace_direct_mod(&direct_ops, data->modify, true);
1678 		if (err) {
1679 			if (ftrace_hash_count(data->reg))
1680 				WARN_ON_ONCE(update_ftrace_direct_del(&direct_ops, data->reg));
1681 			goto rollback_unlink;
1682 		}
1683 	}
1684 
1685 	for_each_mnode(mnode, link)
1686 		bpf_trampoline_multi_attach_free(mnode->trampoline);
1687 
1688 	trampoline_unlock_all();
1689 
1690 	remove_tracing_multi_data(data);
1691 	return 0;
1692 
1693 rollback_unlink:
1694 	for_each_mnode_cnt(mnode, link, rollback_cnt) {
1695 		bpf_trampoline_remove_prog(mnode->trampoline, &mnode->node);
1696 		bpf_trampoline_multi_attach_rollback(mnode->trampoline);
1697 	}
1698 
1699 	trampoline_unlock_all();
1700 
1701 	clear_tracing_multi_data(data);
1702 	rollback_cnt = link->nodes_cnt;
1703 
1704 rollback_put:
1705 	for_each_mnode_cnt(mnode, link, rollback_cnt)
1706 		bpf_trampoline_put(mnode->trampoline);
1707 
1708 	return err;
1709 }
1710 
1711 int bpf_trampoline_multi_detach(struct bpf_prog *prog, struct bpf_tracing_multi_link *link)
1712 {
1713 	struct bpf_tracing_multi_data *data = &link->data;
1714 	struct bpf_tracing_multi_node *mnode;
1715 	int i;
1716 
1717 	trampoline_lock_all();
1718 
1719 	for_each_mnode(mnode, link) {
1720 		data->entry = &mnode->entry;
1721 		bpf_trampoline_multi_attach_init(mnode->trampoline);
1722 		WARN_ON_ONCE(__bpf_trampoline_unlink_prog(&mnode->node, mnode->trampoline,
1723 					NULL, &trampoline_multi_ops, data));
1724 	}
1725 
1726 	if (ftrace_hash_count(data->unreg))
1727 		WARN_ON_ONCE(update_ftrace_direct_del(&direct_ops, data->unreg));
1728 	if (ftrace_hash_count(data->modify))
1729 		WARN_ON_ONCE(update_ftrace_direct_mod(&direct_ops, data->modify, true));
1730 
1731 	for_each_mnode(mnode, link)
1732 		bpf_trampoline_multi_attach_free(mnode->trampoline);
1733 
1734 	trampoline_unlock_all();
1735 
1736 	for_each_mnode(mnode, link)
1737 		bpf_trampoline_put(mnode->trampoline);
1738 
1739 	clear_tracing_multi_data(data);
1740 	return 0;
1741 }
1742 
1743 #undef for_each_mnode_cnt
1744 #undef for_each_mnode
1745 
1746 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS &&
1747 	  CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS &&
1748 	  CONFIG_BPF_SYSCALL */
1749 
1750 static int __init init_trampolines(void)
1751 {
1752 	int i;
1753 
1754 	for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
1755 		INIT_HLIST_HEAD(&trampoline_key_table[i]);
1756 	for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
1757 		INIT_HLIST_HEAD(&trampoline_ip_table[i]);
1758 	for (i = 0; i < TRAMPOLINE_LOCKS_TABLE_SIZE; i++)
1759 		__mutex_init(&trampoline_locks[i].mutex, "trampoline_lock", &trampoline_locks[i].key);
1760 	return 0;
1761 }
1762 late_initcall(init_trampolines);
1763