xref: /linux/kernel/trace/bpf_trace.c (revision 68f4e480b089abae26fbab0c38c3df3cbac3d79d)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
3  * Copyright (c) 2016 Facebook
4  */
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/slab.h>
8 #include <linux/bpf.h>
9 #include <linux/bpf_verifier.h>
10 #include <linux/bpf_perf_event.h>
11 #include <linux/btf.h>
12 #include <linux/filter.h>
13 #include <linux/uaccess.h>
14 #include <linux/ctype.h>
15 #include <linux/kprobes.h>
16 #include <linux/spinlock.h>
17 #include <linux/syscalls.h>
18 #include <linux/error-injection.h>
19 #include <linux/btf_ids.h>
20 #include <linux/bpf_lsm.h>
21 #include <linux/fprobe.h>
22 #include <linux/bsearch.h>
23 #include <linux/sort.h>
24 #include <linux/key.h>
25 #include <linux/namei.h>
26 
27 #include <net/bpf_sk_storage.h>
28 
29 #include <uapi/linux/bpf.h>
30 #include <uapi/linux/btf.h>
31 
32 #include <asm/tlb.h>
33 
34 #include "trace_probe.h"
35 #include "trace.h"
36 
37 #define CREATE_TRACE_POINTS
38 #include "bpf_trace.h"
39 
40 #define bpf_event_rcu_dereference(p)					\
41 	rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
42 
43 #define MAX_UPROBE_MULTI_CNT (1U << 20)
44 #define MAX_KPROBE_MULTI_CNT (1U << 20)
45 #define MAX_TRACING_MULTI_CNT (1U << 20)
46 
47 #ifdef CONFIG_MODULES
48 struct bpf_trace_module {
49 	struct module *module;
50 	struct list_head list;
51 };
52 
53 static LIST_HEAD(bpf_trace_modules);
54 static DEFINE_MUTEX(bpf_module_mutex);
55 
56 static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
57 {
58 	struct bpf_raw_event_map *btp, *ret = NULL;
59 	struct bpf_trace_module *btm;
60 	unsigned int i;
61 
62 	mutex_lock(&bpf_module_mutex);
63 	list_for_each_entry(btm, &bpf_trace_modules, list) {
64 		for (i = 0; i < btm->module->num_bpf_raw_events; ++i) {
65 			btp = &btm->module->bpf_raw_events[i];
66 			if (!strcmp(btp->tp->name, name)) {
67 				if (try_module_get(btm->module))
68 					ret = btp;
69 				goto out;
70 			}
71 		}
72 	}
73 out:
74 	mutex_unlock(&bpf_module_mutex);
75 	return ret;
76 }
77 #else
78 static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
79 {
80 	return NULL;
81 }
82 #endif /* CONFIG_MODULES */
83 
84 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
85 u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
86 
87 static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
88 				  u64 flags, const struct btf **btf,
89 				  s32 *btf_id);
90 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx);
91 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx);
92 
93 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx);
94 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx);
95 
96 /**
97  * trace_call_bpf - invoke BPF program
98  * @call: tracepoint event
99  * @ctx: opaque context pointer
100  *
101  * kprobe handlers execute BPF programs via this helper.
102  * Can be used from static tracepoints in the future.
103  *
104  * Return: BPF programs always return an integer which is interpreted by
105  * kprobe handler as:
106  * 0 - return from kprobe (event is filtered out)
107  * 1 - store kprobe event into ring buffer
108  * Other values are reserved and currently alias to 1
109  */
110 unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
111 {
112 	unsigned int ret;
113 
114 	cant_sleep();
115 
116 	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
117 		/*
118 		 * since some bpf program is already running on this cpu,
119 		 * don't call into another bpf program (same or different)
120 		 * and don't send kprobe event into ring-buffer,
121 		 * so return zero here
122 		 */
123 		rcu_read_lock();
124 		bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array));
125 		rcu_read_unlock();
126 		ret = 0;
127 		goto out;
128 	}
129 
130 	/*
131 	 * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
132 	 * to all call sites, we did a bpf_prog_array_valid() there to check
133 	 * whether call->prog_array is empty or not, which is
134 	 * a heuristic to speed up execution.
135 	 *
136 	 * If bpf_prog_array_valid() fetched prog_array was
137 	 * non-NULL, we go into trace_call_bpf() and do the actual
138 	 * proper rcu_dereference() under RCU lock.
139 	 * If it turns out that prog_array is NULL then, we bail out.
140 	 * For the opposite, if the bpf_prog_array_valid() fetched pointer
141 	 * was NULL, you'll skip the prog_array with the risk of missing
142 	 * out of events when it was updated in between this and the
143 	 * rcu_dereference() which is accepted risk.
144 	 */
145 	rcu_read_lock();
146 	ret = bpf_prog_run_array(rcu_dereference(call->prog_array),
147 				 ctx, bpf_prog_run);
148 	rcu_read_unlock();
149 
150  out:
151 	__this_cpu_dec(bpf_prog_active);
152 
153 	return ret;
154 }
155 
156 /**
157  * trace_call_bpf_faultable - invoke BPF program in faultable context
158  * @call: tracepoint event
159  * @ctx: opaque context pointer
160  *
161  * Variant of trace_call_bpf() for faultable tracepoints (syscall
162  * tracepoints). Supports sleepable BPF programs by using rcu_tasks_trace
163  * for lifetime protection and bpf_prog_run_array_sleepable() for per-program
164  * RCU flavor selection, following the uprobe pattern.
165  *
166  * Per-program recursion protection is provided by
167  * bpf_prog_run_array_sleepable(). Global bpf_prog_active is not
168  * needed because syscall tracepoints cannot self-recurse.
169  *
170  * Must be called from a faultable/preemptible context.
171  */
172 unsigned int trace_call_bpf_faultable(struct trace_event_call *call, void *ctx)
173 {
174 	struct bpf_prog_array *prog_array;
175 
176 	might_fault();
177 	guard(rcu_tasks_trace)();
178 
179 	prog_array = rcu_dereference_check(call->prog_array,
180 					   rcu_read_lock_trace_held());
181 	return bpf_prog_run_array_sleepable(prog_array, ctx, bpf_prog_run);
182 }
183 
184 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
185 BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
186 {
187 	regs_set_return_value(regs, rc);
188 	override_function_with_return(regs);
189 	return 0;
190 }
191 
192 static const struct bpf_func_proto bpf_override_return_proto = {
193 	.func		= bpf_override_return,
194 	.gpl_only	= true,
195 	.ret_type	= RET_INTEGER,
196 	.arg1_type	= ARG_PTR_TO_CTX,
197 	.arg2_type	= ARG_ANYTHING,
198 };
199 #endif
200 
201 static __always_inline int
202 bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr)
203 {
204 	int ret;
205 
206 	ret = copy_from_user_nofault(dst, unsafe_ptr, size);
207 	if (unlikely(ret < 0))
208 		memset(dst, 0, size);
209 	return ret;
210 }
211 
212 BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size,
213 	   const void __user *, unsafe_ptr)
214 {
215 	return bpf_probe_read_user_common(dst, size, unsafe_ptr);
216 }
217 
218 const struct bpf_func_proto bpf_probe_read_user_proto = {
219 	.func		= bpf_probe_read_user,
220 	.gpl_only	= true,
221 	.ret_type	= RET_INTEGER,
222 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
223 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
224 	.arg3_type	= ARG_ANYTHING,
225 };
226 
227 static __always_inline int
228 bpf_probe_read_user_str_common(void *dst, u32 size,
229 			       const void __user *unsafe_ptr)
230 {
231 	int ret;
232 
233 	/*
234 	 * NB: We rely on strncpy_from_user() not copying junk past the NUL
235 	 * terminator into `dst`.
236 	 *
237 	 * strncpy_from_user() does long-sized strides in the fast path. If the
238 	 * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
239 	 * then there could be junk after the NUL in `dst`. If user takes `dst`
240 	 * and keys a hash map with it, then semantically identical strings can
241 	 * occupy multiple entries in the map.
242 	 */
243 	ret = strncpy_from_user_nofault(dst, unsafe_ptr, size);
244 	if (unlikely(ret < 0))
245 		memset(dst, 0, size);
246 	return ret;
247 }
248 
249 BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size,
250 	   const void __user *, unsafe_ptr)
251 {
252 	return bpf_probe_read_user_str_common(dst, size, unsafe_ptr);
253 }
254 
255 const struct bpf_func_proto bpf_probe_read_user_str_proto = {
256 	.func		= bpf_probe_read_user_str,
257 	.gpl_only	= true,
258 	.ret_type	= RET_INTEGER,
259 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
260 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
261 	.arg3_type	= ARG_ANYTHING,
262 };
263 
264 BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
265 	   const void *, unsafe_ptr)
266 {
267 	return bpf_probe_read_kernel_common(dst, size, unsafe_ptr);
268 }
269 
270 const struct bpf_func_proto bpf_probe_read_kernel_proto = {
271 	.func		= bpf_probe_read_kernel,
272 	.gpl_only	= true,
273 	.ret_type	= RET_INTEGER,
274 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
275 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
276 	.arg3_type	= ARG_ANYTHING,
277 };
278 
279 static __always_inline int
280 bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr)
281 {
282 	int ret;
283 
284 	/*
285 	 * The strncpy_from_kernel_nofault() call will likely not fill the
286 	 * entire buffer, but that's okay in this circumstance as we're probing
287 	 * arbitrary memory anyway similar to bpf_probe_read_*() and might
288 	 * as well probe the stack. Thus, memory is explicitly cleared
289 	 * only in error case, so that improper users ignoring return
290 	 * code altogether don't copy garbage; otherwise length of string
291 	 * is returned that can be used for bpf_perf_event_output() et al.
292 	 */
293 	ret = strncpy_from_kernel_nofault(dst, unsafe_ptr, size);
294 	if (unlikely(ret < 0))
295 		memset(dst, 0, size);
296 	return ret;
297 }
298 
299 BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size,
300 	   const void *, unsafe_ptr)
301 {
302 	return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr);
303 }
304 
305 const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
306 	.func		= bpf_probe_read_kernel_str,
307 	.gpl_only	= true,
308 	.ret_type	= RET_INTEGER,
309 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
310 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
311 	.arg3_type	= ARG_ANYTHING,
312 };
313 
314 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
315 BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
316 	   const void *, unsafe_ptr)
317 {
318 	if ((unsigned long)unsafe_ptr < TASK_SIZE) {
319 		return bpf_probe_read_user_common(dst, size,
320 				(__force void __user *)unsafe_ptr);
321 	}
322 	return bpf_probe_read_kernel_common(dst, size, unsafe_ptr);
323 }
324 
325 static const struct bpf_func_proto bpf_probe_read_compat_proto = {
326 	.func		= bpf_probe_read_compat,
327 	.gpl_only	= true,
328 	.ret_type	= RET_INTEGER,
329 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
330 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
331 	.arg3_type	= ARG_ANYTHING,
332 };
333 
334 BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size,
335 	   const void *, unsafe_ptr)
336 {
337 	if ((unsigned long)unsafe_ptr < TASK_SIZE) {
338 		return bpf_probe_read_user_str_common(dst, size,
339 				(__force void __user *)unsafe_ptr);
340 	}
341 	return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr);
342 }
343 
344 static const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
345 	.func		= bpf_probe_read_compat_str,
346 	.gpl_only	= true,
347 	.ret_type	= RET_INTEGER,
348 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
349 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
350 	.arg3_type	= ARG_ANYTHING,
351 };
352 #endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */
353 
354 BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
355 	   u32, size)
356 {
357 	/*
358 	 * Ensure we're in user context which is safe for the helper to
359 	 * run. This helper has no business in a kthread.
360 	 *
361 	 * access_ok() should prevent writing to non-user memory, but in
362 	 * some situations (nommu, temporary switch, etc) access_ok() does
363 	 * not provide enough validation, hence the check on KERNEL_DS.
364 	 *
365 	 * nmi_uaccess_okay() ensures the probe is not run in an interim
366 	 * state, when the task or mm are switched. This is specifically
367 	 * required to prevent the use of temporary mm.
368 	 */
369 
370 	if (unlikely(in_interrupt() ||
371 		     current->flags & (PF_KTHREAD | PF_EXITING)))
372 		return -EPERM;
373 	if (unlikely(!nmi_uaccess_okay()))
374 		return -EPERM;
375 
376 	return copy_to_user_nofault(unsafe_ptr, src, size);
377 }
378 
379 static const struct bpf_func_proto bpf_probe_write_user_proto = {
380 	.func		= bpf_probe_write_user,
381 	.gpl_only	= true,
382 	.ret_type	= RET_INTEGER,
383 	.arg1_type	= ARG_ANYTHING,
384 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
385 	.arg3_type	= ARG_CONST_SIZE,
386 };
387 
388 #define MAX_TRACE_PRINTK_VARARGS	3
389 #define BPF_TRACE_PRINTK_SIZE		1024
390 
391 BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
392 	   u64, arg2, u64, arg3)
393 {
394 	u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
395 	struct bpf_bprintf_data data = {
396 		.get_bin_args	= true,
397 		.get_buf	= true,
398 	};
399 	int ret;
400 
401 	ret = bpf_bprintf_prepare(fmt, fmt_size, args,
402 				  MAX_TRACE_PRINTK_VARARGS, &data);
403 	if (ret < 0)
404 		return ret;
405 
406 	ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args);
407 
408 	trace_bpf_trace_printk(data.buf);
409 
410 	bpf_bprintf_cleanup(&data);
411 
412 	return ret;
413 }
414 
415 static const struct bpf_func_proto bpf_trace_printk_proto = {
416 	.func		= bpf_trace_printk,
417 	.gpl_only	= true,
418 	.ret_type	= RET_INTEGER,
419 	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
420 	.arg2_type	= ARG_CONST_SIZE,
421 };
422 
423 static void __set_printk_clr_event(struct work_struct *work)
424 {
425 	/*
426 	 * This program might be calling bpf_trace_printk,
427 	 * so enable the associated bpf_trace/bpf_trace_printk event.
428 	 * Repeat this each time as it is possible a user has
429 	 * disabled bpf_trace_printk events.  By loading a program
430 	 * calling bpf_trace_printk() however the user has expressed
431 	 * the intent to see such events.
432 	 */
433 	if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
434 		pr_warn_ratelimited("could not enable bpf_trace_printk events");
435 }
436 static DECLARE_WORK(set_printk_work, __set_printk_clr_event);
437 
438 const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
439 {
440 	schedule_work(&set_printk_work);
441 	return &bpf_trace_printk_proto;
442 }
443 
444 BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args,
445 	   u32, data_len)
446 {
447 	struct bpf_bprintf_data data = {
448 		.get_bin_args	= true,
449 		.get_buf	= true,
450 	};
451 	int ret, num_args;
452 
453 	if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
454 	    (data_len && !args))
455 		return -EINVAL;
456 	num_args = data_len / 8;
457 
458 	ret = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data);
459 	if (ret < 0)
460 		return ret;
461 
462 	ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args);
463 
464 	trace_bpf_trace_printk(data.buf);
465 
466 	bpf_bprintf_cleanup(&data);
467 
468 	return ret;
469 }
470 
471 static const struct bpf_func_proto bpf_trace_vprintk_proto = {
472 	.func		= bpf_trace_vprintk,
473 	.gpl_only	= true,
474 	.ret_type	= RET_INTEGER,
475 	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
476 	.arg2_type	= ARG_CONST_SIZE,
477 	.arg3_type	= ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
478 	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
479 };
480 
481 const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void)
482 {
483 	schedule_work(&set_printk_work);
484 	return &bpf_trace_vprintk_proto;
485 }
486 
487 BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
488 	   const void *, args, u32, data_len)
489 {
490 	struct bpf_bprintf_data data = {
491 		.get_bin_args	= true,
492 	};
493 	int err, num_args;
494 
495 	if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
496 	    (data_len && !args))
497 		return -EINVAL;
498 	num_args = data_len / 8;
499 
500 	err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data);
501 	if (err < 0)
502 		return err;
503 
504 	seq_bprintf(m, fmt, data.bin_args);
505 
506 	bpf_bprintf_cleanup(&data);
507 
508 	return seq_has_overflowed(m) ? -EOVERFLOW : 0;
509 }
510 
511 BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file)
512 
513 static const struct bpf_func_proto bpf_seq_printf_proto = {
514 	.func		= bpf_seq_printf,
515 	.gpl_only	= true,
516 	.ret_type	= RET_INTEGER,
517 	.arg1_type	= ARG_PTR_TO_BTF_ID,
518 	.arg1_btf_id	= &btf_seq_file_ids[0],
519 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
520 	.arg3_type	= ARG_CONST_SIZE,
521 	.arg4_type      = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
522 	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
523 };
524 
525 BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len)
526 {
527 	return seq_write(m, data, len) ? -EOVERFLOW : 0;
528 }
529 
530 static const struct bpf_func_proto bpf_seq_write_proto = {
531 	.func		= bpf_seq_write,
532 	.gpl_only	= true,
533 	.ret_type	= RET_INTEGER,
534 	.arg1_type	= ARG_PTR_TO_BTF_ID,
535 	.arg1_btf_id	= &btf_seq_file_ids[0],
536 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
537 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
538 };
539 
540 BPF_CALL_4(bpf_seq_printf_btf, struct seq_file *, m, struct btf_ptr *, ptr,
541 	   u32, btf_ptr_size, u64, flags)
542 {
543 	const struct btf *btf;
544 	s32 btf_id;
545 	int ret;
546 
547 	ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
548 	if (ret)
549 		return ret;
550 
551 	return btf_type_seq_show_flags(btf, btf_id, ptr->ptr, m, flags);
552 }
553 
554 static const struct bpf_func_proto bpf_seq_printf_btf_proto = {
555 	.func		= bpf_seq_printf_btf,
556 	.gpl_only	= true,
557 	.ret_type	= RET_INTEGER,
558 	.arg1_type	= ARG_PTR_TO_BTF_ID,
559 	.arg1_btf_id	= &btf_seq_file_ids[0],
560 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
561 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
562 	.arg4_type	= ARG_ANYTHING,
563 };
564 
565 static __always_inline int
566 get_map_perf_counter(struct bpf_map *map, u64 flags,
567 		     u64 *value, u64 *enabled, u64 *running)
568 {
569 	struct bpf_array *array = container_of(map, struct bpf_array, map);
570 	unsigned int cpu = smp_processor_id();
571 	u64 index = flags & BPF_F_INDEX_MASK;
572 	struct bpf_event_entry *ee;
573 
574 	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
575 		return -EINVAL;
576 	if (index == BPF_F_CURRENT_CPU)
577 		index = cpu;
578 	if (unlikely(index >= array->map.max_entries))
579 		return -E2BIG;
580 
581 	ee = READ_ONCE(array->ptrs[index]);
582 	if (!ee)
583 		return -ENOENT;
584 
585 	return perf_event_read_local(ee->event, value, enabled, running);
586 }
587 
588 BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
589 {
590 	u64 value = 0;
591 	int err;
592 
593 	err = get_map_perf_counter(map, flags, &value, NULL, NULL);
594 	/*
595 	 * this api is ugly since we miss [-22..-2] range of valid
596 	 * counter values, but that's uapi
597 	 */
598 	if (err)
599 		return err;
600 	return value;
601 }
602 
603 const struct bpf_func_proto bpf_perf_event_read_proto = {
604 	.func		= bpf_perf_event_read,
605 	.gpl_only	= true,
606 	.ret_type	= RET_INTEGER,
607 	.arg1_type	= ARG_CONST_MAP_PTR,
608 	.arg2_type	= ARG_ANYTHING,
609 };
610 
611 BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags,
612 	   struct bpf_perf_event_value *, buf, u32, size)
613 {
614 	int err = -EINVAL;
615 
616 	if (unlikely(size != sizeof(struct bpf_perf_event_value)))
617 		goto clear;
618 	err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled,
619 				   &buf->running);
620 	if (unlikely(err))
621 		goto clear;
622 	return 0;
623 clear:
624 	memset(buf, 0, size);
625 	return err;
626 }
627 
628 static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
629 	.func		= bpf_perf_event_read_value,
630 	.gpl_only	= true,
631 	.ret_type	= RET_INTEGER,
632 	.arg1_type	= ARG_CONST_MAP_PTR,
633 	.arg2_type	= ARG_ANYTHING,
634 	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
635 	.arg4_type	= ARG_CONST_SIZE,
636 };
637 
638 const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void)
639 {
640 	return &bpf_perf_event_read_value_proto;
641 }
642 
643 static __always_inline u64
644 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
645 			u64 flags, struct perf_raw_record *raw,
646 			struct perf_sample_data *sd)
647 {
648 	struct bpf_array *array = container_of(map, struct bpf_array, map);
649 	unsigned int cpu = smp_processor_id();
650 	u64 index = flags & BPF_F_INDEX_MASK;
651 	struct bpf_event_entry *ee;
652 	struct perf_event *event;
653 
654 	if (index == BPF_F_CURRENT_CPU)
655 		index = cpu;
656 	if (unlikely(index >= array->map.max_entries))
657 		return -E2BIG;
658 
659 	ee = READ_ONCE(array->ptrs[index]);
660 	if (!ee)
661 		return -ENOENT;
662 
663 	event = ee->event;
664 	if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
665 		     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
666 		return -EINVAL;
667 
668 	if (unlikely(event->oncpu != cpu))
669 		return -EOPNOTSUPP;
670 
671 	perf_sample_save_raw_data(sd, event, raw);
672 
673 	return perf_event_output(event, sd, regs);
674 }
675 
676 /*
677  * Support executing tracepoints in normal, irq, and nmi context that each call
678  * bpf_perf_event_output
679  */
680 struct bpf_trace_sample_data {
681 	struct perf_sample_data sds[3];
682 };
683 
684 static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds);
685 static DEFINE_PER_CPU(int, bpf_trace_nest_level);
686 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
687 	   u64, flags, void *, data, u64, size)
688 {
689 	struct bpf_trace_sample_data *sds;
690 	struct perf_raw_record raw = {
691 		.frag = {
692 			.size = size,
693 			.data = data,
694 		},
695 	};
696 	struct perf_sample_data *sd;
697 	int nest_level, err;
698 
699 	preempt_disable();
700 	sds = this_cpu_ptr(&bpf_trace_sds);
701 	nest_level = this_cpu_inc_return(bpf_trace_nest_level);
702 
703 	if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
704 		err = -EBUSY;
705 		goto out;
706 	}
707 
708 	sd = &sds->sds[nest_level - 1];
709 
710 	if (unlikely(flags & ~(BPF_F_INDEX_MASK))) {
711 		err = -EINVAL;
712 		goto out;
713 	}
714 
715 	perf_sample_data_init(sd, 0, 0);
716 
717 	err = __bpf_perf_event_output(regs, map, flags, &raw, sd);
718 out:
719 	this_cpu_dec(bpf_trace_nest_level);
720 	preempt_enable();
721 	return err;
722 }
723 
724 static const struct bpf_func_proto bpf_perf_event_output_proto = {
725 	.func		= bpf_perf_event_output,
726 	.gpl_only	= true,
727 	.ret_type	= RET_INTEGER,
728 	.arg1_type	= ARG_PTR_TO_CTX,
729 	.arg2_type	= ARG_CONST_MAP_PTR,
730 	.arg3_type	= ARG_ANYTHING,
731 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
732 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
733 };
734 
735 static DEFINE_PER_CPU(int, bpf_event_output_nest_level);
736 struct bpf_nested_pt_regs {
737 	struct pt_regs regs[3];
738 };
739 static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs);
740 static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds);
741 
742 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
743 		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
744 {
745 	struct perf_raw_frag frag = {
746 		.copy		= ctx_copy,
747 		.size		= ctx_size,
748 		.data		= ctx,
749 	};
750 	struct perf_raw_record raw = {
751 		.frag = {
752 			{
753 				.next	= ctx_size ? &frag : NULL,
754 			},
755 			.size	= meta_size,
756 			.data	= meta,
757 		},
758 	};
759 	struct perf_sample_data *sd;
760 	struct pt_regs *regs;
761 	int nest_level;
762 	u64 ret;
763 
764 	preempt_disable();
765 	nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
766 
767 	if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
768 		ret = -EBUSY;
769 		goto out;
770 	}
771 	sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]);
772 	regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]);
773 
774 	perf_fetch_caller_regs(regs);
775 	perf_sample_data_init(sd, 0, 0);
776 
777 	ret = __bpf_perf_event_output(regs, map, flags, &raw, sd);
778 out:
779 	this_cpu_dec(bpf_event_output_nest_level);
780 	preempt_enable();
781 	return ret;
782 }
783 
784 BPF_CALL_0(bpf_get_current_task)
785 {
786 	return (long) current;
787 }
788 
789 const struct bpf_func_proto bpf_get_current_task_proto = {
790 	.func		= bpf_get_current_task,
791 	.gpl_only	= true,
792 	.ret_type	= RET_INTEGER,
793 };
794 
795 BPF_CALL_0(bpf_get_current_task_btf)
796 {
797 	return (unsigned long) current;
798 }
799 
800 const struct bpf_func_proto bpf_get_current_task_btf_proto = {
801 	.func		= bpf_get_current_task_btf,
802 	.gpl_only	= true,
803 	.ret_type	= RET_PTR_TO_BTF_ID_TRUSTED,
804 	.ret_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
805 };
806 
807 BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task)
808 {
809 	return (unsigned long) task_pt_regs(task);
810 }
811 
812 BTF_ID_LIST_SINGLE(bpf_task_pt_regs_ids, struct, pt_regs)
813 
814 const struct bpf_func_proto bpf_task_pt_regs_proto = {
815 	.func		= bpf_task_pt_regs,
816 	.gpl_only	= true,
817 	.arg1_type	= ARG_PTR_TO_BTF_ID,
818 	.arg1_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
819 	.ret_type	= RET_PTR_TO_BTF_ID,
820 	.ret_btf_id	= &bpf_task_pt_regs_ids[0],
821 };
822 
823 struct send_signal_irq_work {
824 	struct irq_work irq_work;
825 	struct task_struct *task;
826 	u32 sig;
827 	enum pid_type type;
828 	bool has_siginfo;
829 	struct kernel_siginfo info;
830 };
831 
832 static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
833 
834 static void do_bpf_send_signal(struct irq_work *entry)
835 {
836 	struct send_signal_irq_work *work;
837 	struct kernel_siginfo *siginfo;
838 
839 	work = container_of(entry, struct send_signal_irq_work, irq_work);
840 	siginfo = work->has_siginfo ? &work->info : SEND_SIG_PRIV;
841 
842 	group_send_sig_info(work->sig, siginfo, work->task, work->type);
843 	put_task_struct(work->task);
844 }
845 
846 static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struct *task, u64 value)
847 {
848 	struct send_signal_irq_work *work = NULL;
849 	struct kernel_siginfo info;
850 	struct kernel_siginfo *siginfo;
851 
852 	if (!task) {
853 		task = current;
854 		siginfo = SEND_SIG_PRIV;
855 	} else {
856 		clear_siginfo(&info);
857 		info.si_signo = sig;
858 		info.si_errno = 0;
859 		info.si_code = SI_KERNEL;
860 		info.si_pid = 0;
861 		info.si_uid = 0;
862 		info.si_value.sival_ptr = (void __user __force *)(unsigned long)value;
863 		siginfo = &info;
864 	}
865 
866 	/* Similar to bpf_probe_write_user, task needs to be
867 	 * in a sound condition and kernel memory access be
868 	 * permitted in order to send signal to the current
869 	 * task.
870 	 */
871 	if (unlikely(task->flags & (PF_KTHREAD | PF_EXITING)))
872 		return -EPERM;
873 	if (unlikely(!nmi_uaccess_okay()))
874 		return -EPERM;
875 	/* Task should not be pid=1 to avoid kernel panic. */
876 	if (unlikely(is_global_init(task)))
877 		return -EPERM;
878 
879 	if (preempt_count() != 0 || irqs_disabled()) {
880 		/* Do an early check on signal validity. Otherwise,
881 		 * the error is lost in deferred irq_work.
882 		 */
883 		if (unlikely(!valid_signal(sig)))
884 			return -EINVAL;
885 
886 		work = this_cpu_ptr(&send_signal_work);
887 		if (irq_work_is_busy(&work->irq_work))
888 			return -EBUSY;
889 
890 		/* Add the current task, which is the target of sending signal,
891 		 * to the irq_work. The current task may change when queued
892 		 * irq works get executed.
893 		 */
894 		work->task = get_task_struct(task);
895 		work->has_siginfo = siginfo == &info;
896 		if (work->has_siginfo)
897 			copy_siginfo(&work->info, &info);
898 		work->sig = sig;
899 		work->type = type;
900 		irq_work_queue(&work->irq_work);
901 		return 0;
902 	}
903 
904 	return group_send_sig_info(sig, siginfo, task, type);
905 }
906 
907 BPF_CALL_1(bpf_send_signal, u32, sig)
908 {
909 	return bpf_send_signal_common(sig, PIDTYPE_TGID, NULL, 0);
910 }
911 
912 const struct bpf_func_proto bpf_send_signal_proto = {
913 	.func		= bpf_send_signal,
914 	.gpl_only	= false,
915 	.ret_type	= RET_INTEGER,
916 	.arg1_type	= ARG_ANYTHING,
917 };
918 
919 BPF_CALL_1(bpf_send_signal_thread, u32, sig)
920 {
921 	return bpf_send_signal_common(sig, PIDTYPE_PID, NULL, 0);
922 }
923 
924 const struct bpf_func_proto bpf_send_signal_thread_proto = {
925 	.func		= bpf_send_signal_thread,
926 	.gpl_only	= false,
927 	.ret_type	= RET_INTEGER,
928 	.arg1_type	= ARG_ANYTHING,
929 };
930 
931 BPF_CALL_3(bpf_d_path, const struct path *, path, char *, buf, u32, sz)
932 {
933 	struct path copy;
934 	long len;
935 	char *p;
936 
937 	if (!sz)
938 		return 0;
939 
940 	/*
941 	 * The path pointer is verified as trusted and safe to use,
942 	 * but let's double check it's valid anyway to workaround
943 	 * potentially broken verifier.
944 	 */
945 	len = copy_from_kernel_nofault(&copy, path, sizeof(*path));
946 	if (len < 0)
947 		return len;
948 
949 	p = d_path(&copy, buf, sz);
950 	if (IS_ERR(p)) {
951 		len = PTR_ERR(p);
952 	} else {
953 		len = buf + sz - p;
954 		memmove(buf, p, len);
955 	}
956 
957 	return len;
958 }
959 
960 BTF_SET_START(btf_allowlist_d_path)
961 #ifdef CONFIG_SECURITY
962 BTF_ID(func, security_file_permission)
963 BTF_ID(func, security_inode_getattr)
964 BTF_ID(func, security_file_open)
965 #endif
966 #ifdef CONFIG_SECURITY_PATH
967 BTF_ID(func, security_path_truncate)
968 #endif
969 BTF_ID(func, vfs_truncate)
970 BTF_ID(func, vfs_fallocate)
971 BTF_ID(func, dentry_open)
972 BTF_ID(func, vfs_getattr)
973 BTF_ID(func, filp_close)
974 BTF_SET_END(btf_allowlist_d_path)
975 
976 static bool bpf_d_path_allowed(const struct bpf_prog *prog)
977 {
978 	if (prog->type == BPF_PROG_TYPE_TRACING &&
979 	    prog->expected_attach_type == BPF_TRACE_ITER)
980 		return true;
981 
982 	if (prog->type == BPF_PROG_TYPE_LSM)
983 		return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id);
984 
985 	return btf_id_set_contains(&btf_allowlist_d_path,
986 				   prog->aux->attach_btf_id);
987 }
988 
989 BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path)
990 
991 static const struct bpf_func_proto bpf_d_path_proto = {
992 	.func		= bpf_d_path,
993 	.gpl_only	= false,
994 	.ret_type	= RET_INTEGER,
995 	.arg1_type	= ARG_PTR_TO_BTF_ID,
996 	.arg1_btf_id	= &bpf_d_path_btf_ids[0],
997 	.arg2_type	= ARG_PTR_TO_MEM | MEM_WRITE,
998 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
999 	.allowed	= bpf_d_path_allowed,
1000 };
1001 
1002 #define BTF_F_ALL	(BTF_F_COMPACT  | BTF_F_NONAME | \
1003 			 BTF_F_PTR_RAW | BTF_F_ZERO)
1004 
1005 static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
1006 				  u64 flags, const struct btf **btf,
1007 				  s32 *btf_id)
1008 {
1009 	const struct btf_type *t;
1010 
1011 	if (unlikely(flags & ~(BTF_F_ALL)))
1012 		return -EINVAL;
1013 
1014 	if (btf_ptr_size != sizeof(struct btf_ptr))
1015 		return -EINVAL;
1016 
1017 	*btf = bpf_get_btf_vmlinux();
1018 
1019 	if (IS_ERR_OR_NULL(*btf))
1020 		return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL;
1021 
1022 	if (ptr->type_id > 0)
1023 		*btf_id = ptr->type_id;
1024 	else
1025 		return -EINVAL;
1026 
1027 	if (*btf_id > 0)
1028 		t = btf_type_by_id(*btf, *btf_id);
1029 	if (*btf_id <= 0 || !t)
1030 		return -ENOENT;
1031 
1032 	return 0;
1033 }
1034 
1035 BPF_CALL_5(bpf_snprintf_btf, char *, str, u32, str_size, struct btf_ptr *, ptr,
1036 	   u32, btf_ptr_size, u64, flags)
1037 {
1038 	const struct btf *btf;
1039 	s32 btf_id;
1040 	int ret;
1041 
1042 	ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
1043 	if (ret)
1044 		return ret;
1045 
1046 	return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size,
1047 				      flags);
1048 }
1049 
1050 const struct bpf_func_proto bpf_snprintf_btf_proto = {
1051 	.func		= bpf_snprintf_btf,
1052 	.gpl_only	= false,
1053 	.ret_type	= RET_INTEGER,
1054 	.arg1_type	= ARG_PTR_TO_MEM | MEM_WRITE,
1055 	.arg2_type	= ARG_CONST_SIZE,
1056 	.arg3_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
1057 	.arg4_type	= ARG_CONST_SIZE,
1058 	.arg5_type	= ARG_ANYTHING,
1059 };
1060 
1061 BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx)
1062 {
1063 	/* This helper call is inlined by verifier. */
1064 	return ((u64 *)ctx)[-2];
1065 }
1066 
1067 static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = {
1068 	.func		= bpf_get_func_ip_tracing,
1069 	.gpl_only	= true,
1070 	.ret_type	= RET_INTEGER,
1071 	.arg1_type	= ARG_PTR_TO_CTX,
1072 };
1073 
1074 static inline unsigned long get_entry_ip(unsigned long fentry_ip)
1075 {
1076 #ifdef CONFIG_X86_KERNEL_IBT
1077 	if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE)))
1078 		fentry_ip -= ENDBR_INSN_SIZE;
1079 #endif
1080 	return fentry_ip;
1081 }
1082 
1083 BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs)
1084 {
1085 	struct bpf_trace_run_ctx *run_ctx __maybe_unused;
1086 	struct kprobe *kp;
1087 
1088 #ifdef CONFIG_UPROBES
1089 	run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
1090 	if (run_ctx->is_uprobe)
1091 		return ((struct uprobe_dispatch_data *)current->utask->vaddr)->bp_addr;
1092 #endif
1093 
1094 	kp = kprobe_running();
1095 
1096 	if (!kp || !(kp->flags & KPROBE_FLAG_ON_FUNC_ENTRY))
1097 		return 0;
1098 
1099 	return get_entry_ip((uintptr_t)kp->addr);
1100 }
1101 
1102 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = {
1103 	.func		= bpf_get_func_ip_kprobe,
1104 	.gpl_only	= true,
1105 	.ret_type	= RET_INTEGER,
1106 	.arg1_type	= ARG_PTR_TO_CTX,
1107 };
1108 
1109 BPF_CALL_1(bpf_get_func_ip_kprobe_multi, struct pt_regs *, regs)
1110 {
1111 	return bpf_kprobe_multi_entry_ip(current->bpf_ctx);
1112 }
1113 
1114 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe_multi = {
1115 	.func		= bpf_get_func_ip_kprobe_multi,
1116 	.gpl_only	= false,
1117 	.ret_type	= RET_INTEGER,
1118 	.arg1_type	= ARG_PTR_TO_CTX,
1119 };
1120 
1121 BPF_CALL_1(bpf_get_attach_cookie_kprobe_multi, struct pt_regs *, regs)
1122 {
1123 	return bpf_kprobe_multi_cookie(current->bpf_ctx);
1124 }
1125 
1126 static const struct bpf_func_proto bpf_get_attach_cookie_proto_kmulti = {
1127 	.func		= bpf_get_attach_cookie_kprobe_multi,
1128 	.gpl_only	= false,
1129 	.ret_type	= RET_INTEGER,
1130 	.arg1_type	= ARG_PTR_TO_CTX,
1131 };
1132 
1133 BPF_CALL_1(bpf_get_func_ip_uprobe_multi, struct pt_regs *, regs)
1134 {
1135 	return bpf_uprobe_multi_entry_ip(current->bpf_ctx);
1136 }
1137 
1138 static const struct bpf_func_proto bpf_get_func_ip_proto_uprobe_multi = {
1139 	.func		= bpf_get_func_ip_uprobe_multi,
1140 	.gpl_only	= false,
1141 	.ret_type	= RET_INTEGER,
1142 	.arg1_type	= ARG_PTR_TO_CTX,
1143 };
1144 
1145 BPF_CALL_1(bpf_get_attach_cookie_uprobe_multi, struct pt_regs *, regs)
1146 {
1147 	return bpf_uprobe_multi_cookie(current->bpf_ctx);
1148 }
1149 
1150 static const struct bpf_func_proto bpf_get_attach_cookie_proto_umulti = {
1151 	.func		= bpf_get_attach_cookie_uprobe_multi,
1152 	.gpl_only	= false,
1153 	.ret_type	= RET_INTEGER,
1154 	.arg1_type	= ARG_PTR_TO_CTX,
1155 };
1156 
1157 BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx)
1158 {
1159 	struct bpf_trace_run_ctx *run_ctx;
1160 
1161 	run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
1162 	return run_ctx->bpf_cookie;
1163 }
1164 
1165 static const struct bpf_func_proto bpf_get_attach_cookie_proto_trace = {
1166 	.func		= bpf_get_attach_cookie_trace,
1167 	.gpl_only	= false,
1168 	.ret_type	= RET_INTEGER,
1169 	.arg1_type	= ARG_PTR_TO_CTX,
1170 };
1171 
1172 BPF_CALL_1(bpf_get_attach_cookie_pe, struct bpf_perf_event_data_kern *, ctx)
1173 {
1174 	return ctx->event->bpf_cookie;
1175 }
1176 
1177 static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = {
1178 	.func		= bpf_get_attach_cookie_pe,
1179 	.gpl_only	= false,
1180 	.ret_type	= RET_INTEGER,
1181 	.arg1_type	= ARG_PTR_TO_CTX,
1182 };
1183 
1184 BPF_CALL_1(bpf_get_attach_cookie_tracing, void *, ctx)
1185 {
1186 	struct bpf_trace_run_ctx *run_ctx;
1187 
1188 	run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
1189 	return run_ctx->bpf_cookie;
1190 }
1191 
1192 static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing = {
1193 	.func		= bpf_get_attach_cookie_tracing,
1194 	.gpl_only	= false,
1195 	.ret_type	= RET_INTEGER,
1196 	.arg1_type	= ARG_PTR_TO_CTX,
1197 };
1198 
1199 BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
1200 {
1201 	static const u32 br_entry_size = sizeof(struct perf_branch_entry);
1202 	u32 entry_cnt = size / br_entry_size;
1203 
1204 	entry_cnt = static_call(perf_snapshot_branch_stack)(buf, entry_cnt);
1205 
1206 	if (unlikely(flags))
1207 		return -EINVAL;
1208 
1209 	if (!entry_cnt)
1210 		return -ENOENT;
1211 
1212 	return entry_cnt * br_entry_size;
1213 }
1214 
1215 const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
1216 	.func		= bpf_get_branch_snapshot,
1217 	.gpl_only	= true,
1218 	.ret_type	= RET_INTEGER,
1219 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
1220 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
1221 };
1222 
1223 BPF_CALL_3(get_func_arg, void *, ctx, u32, n, u64 *, value)
1224 {
1225 	/* This helper call is inlined by verifier. */
1226 	u64 nr_args = ((u64 *)ctx)[-1] & 0xFF;
1227 
1228 	if ((u64) n >= nr_args)
1229 		return -EINVAL;
1230 	*value = ((u64 *)ctx)[n];
1231 	return 0;
1232 }
1233 
1234 static const struct bpf_func_proto bpf_get_func_arg_proto = {
1235 	.func		= get_func_arg,
1236 	.ret_type	= RET_INTEGER,
1237 	.arg1_type	= ARG_PTR_TO_CTX,
1238 	.arg2_type	= ARG_ANYTHING,
1239 	.arg3_type	= ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
1240 	.arg3_size	= sizeof(u64),
1241 };
1242 
1243 BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value)
1244 {
1245 	/* This helper call is inlined by verifier. */
1246 	u64 nr_args = ((u64 *)ctx)[-1] & 0xFF;
1247 
1248 	*value = ((u64 *)ctx)[nr_args];
1249 	return 0;
1250 }
1251 
1252 static const struct bpf_func_proto bpf_get_func_ret_proto = {
1253 	.func		= get_func_ret,
1254 	.ret_type	= RET_INTEGER,
1255 	.arg1_type	= ARG_PTR_TO_CTX,
1256 	.arg2_type	= ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
1257 	.arg2_size	= sizeof(u64),
1258 };
1259 
1260 BPF_CALL_1(get_func_arg_cnt, void *, ctx)
1261 {
1262 	/* This helper call is inlined by verifier. */
1263 	return ((u64 *)ctx)[-1] & 0xFF;
1264 }
1265 
1266 static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = {
1267 	.func		= get_func_arg_cnt,
1268 	.ret_type	= RET_INTEGER,
1269 	.arg1_type	= ARG_PTR_TO_CTX,
1270 };
1271 
1272 static const struct bpf_func_proto *
1273 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1274 {
1275 	const struct bpf_func_proto *func_proto;
1276 
1277 	switch (func_id) {
1278 	case BPF_FUNC_get_smp_processor_id:
1279 		return &bpf_get_smp_processor_id_proto;
1280 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
1281 	case BPF_FUNC_probe_read:
1282 		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1283 		       NULL : &bpf_probe_read_compat_proto;
1284 	case BPF_FUNC_probe_read_str:
1285 		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1286 		       NULL : &bpf_probe_read_compat_str_proto;
1287 #endif
1288 	case BPF_FUNC_get_func_ip:
1289 		return &bpf_get_func_ip_proto_tracing;
1290 	default:
1291 		break;
1292 	}
1293 
1294 	func_proto = bpf_base_func_proto(func_id, prog);
1295 	if (func_proto)
1296 		return func_proto;
1297 
1298 	if (!bpf_token_capable(prog->aux->token, CAP_SYS_ADMIN))
1299 		return NULL;
1300 
1301 	switch (func_id) {
1302 	case BPF_FUNC_probe_write_user:
1303 		return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ?
1304 		       NULL : &bpf_probe_write_user_proto;
1305 	default:
1306 		return NULL;
1307 	}
1308 }
1309 
1310 static bool is_kprobe_multi(const struct bpf_prog *prog)
1311 {
1312 	return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ||
1313 	       prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION;
1314 }
1315 
1316 static inline bool is_kprobe_session(const struct bpf_prog *prog)
1317 {
1318 	return prog->type == BPF_PROG_TYPE_KPROBE &&
1319 	       prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION;
1320 }
1321 
1322 static inline bool is_uprobe_multi(const struct bpf_prog *prog)
1323 {
1324 	return prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI ||
1325 	       prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
1326 }
1327 
1328 static inline bool is_uprobe_session(const struct bpf_prog *prog)
1329 {
1330 	return prog->type == BPF_PROG_TYPE_KPROBE &&
1331 	       prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
1332 }
1333 
1334 static inline bool is_trace_fsession(const struct bpf_prog *prog)
1335 {
1336 	return prog->type == BPF_PROG_TYPE_TRACING &&
1337 	       (prog->expected_attach_type == BPF_TRACE_FSESSION ||
1338 		prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI);
1339 }
1340 
1341 static const struct bpf_func_proto *
1342 kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1343 {
1344 	switch (func_id) {
1345 	case BPF_FUNC_perf_event_output:
1346 		return &bpf_perf_event_output_proto;
1347 	case BPF_FUNC_get_stackid:
1348 		return &bpf_get_stackid_proto;
1349 	case BPF_FUNC_get_stack:
1350 		return prog->sleepable ? &bpf_get_stack_sleepable_proto : &bpf_get_stack_proto;
1351 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
1352 	case BPF_FUNC_override_return:
1353 		return &bpf_override_return_proto;
1354 #endif
1355 	case BPF_FUNC_get_func_ip:
1356 		if (is_kprobe_multi(prog))
1357 			return &bpf_get_func_ip_proto_kprobe_multi;
1358 		if (is_uprobe_multi(prog))
1359 			return &bpf_get_func_ip_proto_uprobe_multi;
1360 		return &bpf_get_func_ip_proto_kprobe;
1361 	case BPF_FUNC_get_attach_cookie:
1362 		if (is_kprobe_multi(prog))
1363 			return &bpf_get_attach_cookie_proto_kmulti;
1364 		if (is_uprobe_multi(prog))
1365 			return &bpf_get_attach_cookie_proto_umulti;
1366 		return &bpf_get_attach_cookie_proto_trace;
1367 	default:
1368 		return bpf_tracing_func_proto(func_id, prog);
1369 	}
1370 }
1371 
1372 /* bpf+kprobe programs can access fields of 'struct pt_regs' */
1373 static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
1374 					const struct bpf_prog *prog,
1375 					struct bpf_insn_access_aux *info)
1376 {
1377 	if (off < 0 || off >= sizeof(struct pt_regs))
1378 		return false;
1379 	if (off % size != 0)
1380 		return false;
1381 	/*
1382 	 * Assertion for 32 bit to make sure last 8 byte access
1383 	 * (BPF_DW) to the last 4 byte member is disallowed.
1384 	 */
1385 	if (off + size > sizeof(struct pt_regs))
1386 		return false;
1387 
1388 	if (type == BPF_WRITE)
1389 		prog->aux->kprobe_write_ctx = true;
1390 
1391 	return true;
1392 }
1393 
1394 const struct bpf_verifier_ops kprobe_verifier_ops = {
1395 	.get_func_proto  = kprobe_prog_func_proto,
1396 	.is_valid_access = kprobe_prog_is_valid_access,
1397 };
1398 
1399 const struct bpf_prog_ops kprobe_prog_ops = {
1400 };
1401 
1402 BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
1403 	   u64, flags, void *, data, u64, size)
1404 {
1405 	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
1406 
1407 	/*
1408 	 * r1 points to perf tracepoint buffer where first 8 bytes are hidden
1409 	 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
1410 	 * from there and call the same bpf_perf_event_output() helper inline.
1411 	 */
1412 	return ____bpf_perf_event_output(regs, map, flags, data, size);
1413 }
1414 
1415 static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
1416 	.func		= bpf_perf_event_output_tp,
1417 	.gpl_only	= true,
1418 	.ret_type	= RET_INTEGER,
1419 	.arg1_type	= ARG_PTR_TO_CTX,
1420 	.arg2_type	= ARG_CONST_MAP_PTR,
1421 	.arg3_type	= ARG_ANYTHING,
1422 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
1423 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
1424 };
1425 
1426 BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
1427 	   u64, flags)
1428 {
1429 	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
1430 
1431 	/*
1432 	 * Same comment as in bpf_perf_event_output_tp(), only that this time
1433 	 * the other helper's function body cannot be inlined due to being
1434 	 * external, thus we need to call raw helper function.
1435 	 */
1436 	return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
1437 			       flags, 0, 0);
1438 }
1439 
1440 static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
1441 	.func		= bpf_get_stackid_tp,
1442 	.gpl_only	= true,
1443 	.ret_type	= RET_INTEGER,
1444 	.arg1_type	= ARG_PTR_TO_CTX,
1445 	.arg2_type	= ARG_CONST_MAP_PTR,
1446 	.arg3_type	= ARG_ANYTHING,
1447 };
1448 
1449 BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size,
1450 	   u64, flags)
1451 {
1452 	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
1453 
1454 	return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
1455 			     (unsigned long) size, flags, 0);
1456 }
1457 
1458 static const struct bpf_func_proto bpf_get_stack_proto_tp = {
1459 	.func		= bpf_get_stack_tp,
1460 	.gpl_only	= true,
1461 	.ret_type	= RET_INTEGER,
1462 	.arg1_type	= ARG_PTR_TO_CTX,
1463 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
1464 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
1465 	.arg4_type	= ARG_ANYTHING,
1466 };
1467 
1468 static const struct bpf_func_proto *
1469 tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1470 {
1471 	switch (func_id) {
1472 	case BPF_FUNC_perf_event_output:
1473 		return &bpf_perf_event_output_proto_tp;
1474 	case BPF_FUNC_get_stackid:
1475 		return &bpf_get_stackid_proto_tp;
1476 	case BPF_FUNC_get_stack:
1477 		return &bpf_get_stack_proto_tp;
1478 	case BPF_FUNC_get_attach_cookie:
1479 		return &bpf_get_attach_cookie_proto_trace;
1480 	default:
1481 		return bpf_tracing_func_proto(func_id, prog);
1482 	}
1483 }
1484 
1485 static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
1486 				    const struct bpf_prog *prog,
1487 				    struct bpf_insn_access_aux *info)
1488 {
1489 	if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
1490 		return false;
1491 	if (type != BPF_READ)
1492 		return false;
1493 	if (off % size != 0)
1494 		return false;
1495 
1496 	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
1497 	return true;
1498 }
1499 
1500 const struct bpf_verifier_ops tracepoint_verifier_ops = {
1501 	.get_func_proto  = tp_prog_func_proto,
1502 	.is_valid_access = tp_prog_is_valid_access,
1503 };
1504 
1505 const struct bpf_prog_ops tracepoint_prog_ops = {
1506 };
1507 
1508 BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx,
1509 	   struct bpf_perf_event_value *, buf, u32, size)
1510 {
1511 	int err = -EINVAL;
1512 
1513 	if (unlikely(size != sizeof(struct bpf_perf_event_value)))
1514 		goto clear;
1515 	err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled,
1516 				    &buf->running);
1517 	if (unlikely(err))
1518 		goto clear;
1519 	return 0;
1520 clear:
1521 	memset(buf, 0, size);
1522 	return err;
1523 }
1524 
1525 static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
1526          .func           = bpf_perf_prog_read_value,
1527          .gpl_only       = true,
1528          .ret_type       = RET_INTEGER,
1529          .arg1_type      = ARG_PTR_TO_CTX,
1530          .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
1531          .arg3_type      = ARG_CONST_SIZE,
1532 };
1533 
1534 BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx,
1535 	   void *, buf, u32, size, u64, flags)
1536 {
1537 	static const u32 br_entry_size = sizeof(struct perf_branch_entry);
1538 	struct perf_branch_stack *br_stack = ctx->data->br_stack;
1539 	u32 to_copy;
1540 
1541 	if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE))
1542 		return -EINVAL;
1543 
1544 	if (unlikely(!(ctx->data->sample_flags & PERF_SAMPLE_BRANCH_STACK)))
1545 		return -ENOENT;
1546 
1547 	if (unlikely(!br_stack))
1548 		return -ENOENT;
1549 
1550 	if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE)
1551 		return br_stack->nr * br_entry_size;
1552 
1553 	if (!buf || (size % br_entry_size != 0))
1554 		return -EINVAL;
1555 
1556 	to_copy = min_t(u32, br_stack->nr * br_entry_size, size);
1557 	memcpy(buf, br_stack->entries, to_copy);
1558 
1559 	return to_copy;
1560 }
1561 
1562 static const struct bpf_func_proto bpf_read_branch_records_proto = {
1563 	.func           = bpf_read_branch_records,
1564 	.gpl_only       = true,
1565 	.ret_type       = RET_INTEGER,
1566 	.arg1_type      = ARG_PTR_TO_CTX,
1567 	.arg2_type      = ARG_PTR_TO_MEM_OR_NULL | MEM_WRITE,
1568 	.arg3_type      = ARG_CONST_SIZE_OR_ZERO,
1569 	.arg4_type      = ARG_ANYTHING,
1570 };
1571 
1572 static const struct bpf_func_proto *
1573 pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1574 {
1575 	switch (func_id) {
1576 	case BPF_FUNC_perf_event_output:
1577 		return &bpf_perf_event_output_proto_tp;
1578 	case BPF_FUNC_get_stackid:
1579 		return &bpf_get_stackid_proto_pe;
1580 	case BPF_FUNC_get_stack:
1581 		return &bpf_get_stack_proto_pe;
1582 	case BPF_FUNC_perf_prog_read_value:
1583 		return &bpf_perf_prog_read_value_proto;
1584 	case BPF_FUNC_read_branch_records:
1585 		return &bpf_read_branch_records_proto;
1586 	case BPF_FUNC_get_attach_cookie:
1587 		return &bpf_get_attach_cookie_proto_pe;
1588 	default:
1589 		return bpf_tracing_func_proto(func_id, prog);
1590 	}
1591 }
1592 
1593 /*
1594  * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
1595  * to avoid potential recursive reuse issue when/if tracepoints are added
1596  * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack.
1597  *
1598  * Since raw tracepoints run despite bpf_prog_active, support concurrent usage
1599  * in normal, irq, and nmi context.
1600  */
1601 struct bpf_raw_tp_regs {
1602 	struct pt_regs regs[3];
1603 };
1604 static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs);
1605 static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level);
1606 static struct pt_regs *get_bpf_raw_tp_regs(void)
1607 {
1608 	struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs);
1609 	int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level);
1610 
1611 	if (nest_level > ARRAY_SIZE(tp_regs->regs)) {
1612 		this_cpu_dec(bpf_raw_tp_nest_level);
1613 		return ERR_PTR(-EBUSY);
1614 	}
1615 
1616 	return &tp_regs->regs[nest_level - 1];
1617 }
1618 
1619 static void put_bpf_raw_tp_regs(void)
1620 {
1621 	this_cpu_dec(bpf_raw_tp_nest_level);
1622 }
1623 
1624 BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
1625 	   struct bpf_map *, map, u64, flags, void *, data, u64, size)
1626 {
1627 	struct pt_regs *regs = get_bpf_raw_tp_regs();
1628 	int ret;
1629 
1630 	if (IS_ERR(regs))
1631 		return PTR_ERR(regs);
1632 
1633 	perf_fetch_caller_regs(regs);
1634 	ret = ____bpf_perf_event_output(regs, map, flags, data, size);
1635 
1636 	put_bpf_raw_tp_regs();
1637 	return ret;
1638 }
1639 
1640 static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
1641 	.func		= bpf_perf_event_output_raw_tp,
1642 	.gpl_only	= true,
1643 	.ret_type	= RET_INTEGER,
1644 	.arg1_type	= ARG_PTR_TO_CTX,
1645 	.arg2_type	= ARG_CONST_MAP_PTR,
1646 	.arg3_type	= ARG_ANYTHING,
1647 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
1648 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
1649 };
1650 
1651 extern const struct bpf_func_proto bpf_skb_output_proto;
1652 extern const struct bpf_func_proto bpf_xdp_output_proto;
1653 extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
1654 
1655 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
1656 	   struct bpf_map *, map, u64, flags)
1657 {
1658 	struct pt_regs *regs = get_bpf_raw_tp_regs();
1659 	int ret;
1660 
1661 	if (IS_ERR(regs))
1662 		return PTR_ERR(regs);
1663 
1664 	perf_fetch_caller_regs(regs);
1665 	/* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
1666 	ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map,
1667 			      flags, 0, 0);
1668 	put_bpf_raw_tp_regs();
1669 	return ret;
1670 }
1671 
1672 static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
1673 	.func		= bpf_get_stackid_raw_tp,
1674 	.gpl_only	= true,
1675 	.ret_type	= RET_INTEGER,
1676 	.arg1_type	= ARG_PTR_TO_CTX,
1677 	.arg2_type	= ARG_CONST_MAP_PTR,
1678 	.arg3_type	= ARG_ANYTHING,
1679 };
1680 
1681 BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
1682 	   void *, buf, u32, size, u64, flags)
1683 {
1684 	struct pt_regs *regs = get_bpf_raw_tp_regs();
1685 	int ret;
1686 
1687 	if (IS_ERR(regs))
1688 		return PTR_ERR(regs);
1689 
1690 	perf_fetch_caller_regs(regs);
1691 	ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf,
1692 			    (unsigned long) size, flags, 0);
1693 	put_bpf_raw_tp_regs();
1694 	return ret;
1695 }
1696 
1697 static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
1698 	.func		= bpf_get_stack_raw_tp,
1699 	.gpl_only	= true,
1700 	.ret_type	= RET_INTEGER,
1701 	.arg1_type	= ARG_PTR_TO_CTX,
1702 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
1703 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
1704 	.arg4_type	= ARG_ANYTHING,
1705 };
1706 
1707 static const struct bpf_func_proto *
1708 raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1709 {
1710 	switch (func_id) {
1711 	case BPF_FUNC_perf_event_output:
1712 		return &bpf_perf_event_output_proto_raw_tp;
1713 	case BPF_FUNC_get_stackid:
1714 		return &bpf_get_stackid_proto_raw_tp;
1715 	case BPF_FUNC_get_stack:
1716 		return &bpf_get_stack_proto_raw_tp;
1717 	case BPF_FUNC_get_attach_cookie:
1718 		return &bpf_get_attach_cookie_proto_tracing;
1719 	default:
1720 		return bpf_tracing_func_proto(func_id, prog);
1721 	}
1722 }
1723 
1724 const struct bpf_func_proto *
1725 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1726 {
1727 	const struct bpf_func_proto *fn;
1728 
1729 	switch (func_id) {
1730 #ifdef CONFIG_NET
1731 	case BPF_FUNC_skb_output:
1732 		return &bpf_skb_output_proto;
1733 	case BPF_FUNC_xdp_output:
1734 		return &bpf_xdp_output_proto;
1735 	case BPF_FUNC_skc_to_tcp6_sock:
1736 		return &bpf_skc_to_tcp6_sock_proto;
1737 	case BPF_FUNC_skc_to_tcp_sock:
1738 		return &bpf_skc_to_tcp_sock_proto;
1739 	case BPF_FUNC_skc_to_tcp_timewait_sock:
1740 		return &bpf_skc_to_tcp_timewait_sock_proto;
1741 	case BPF_FUNC_skc_to_tcp_request_sock:
1742 		return &bpf_skc_to_tcp_request_sock_proto;
1743 	case BPF_FUNC_skc_to_udp6_sock:
1744 		return &bpf_skc_to_udp6_sock_proto;
1745 	case BPF_FUNC_skc_to_unix_sock:
1746 		return &bpf_skc_to_unix_sock_proto;
1747 	case BPF_FUNC_skc_to_mptcp_sock:
1748 		return &bpf_skc_to_mptcp_sock_proto;
1749 	case BPF_FUNC_sk_storage_get:
1750 		return &bpf_sk_storage_get_tracing_proto;
1751 	case BPF_FUNC_sk_storage_delete:
1752 		return &bpf_sk_storage_delete_tracing_proto;
1753 	case BPF_FUNC_sock_from_file:
1754 		return &bpf_sock_from_file_proto;
1755 	case BPF_FUNC_get_socket_cookie:
1756 		return &bpf_get_socket_ptr_cookie_proto;
1757 	case BPF_FUNC_xdp_get_buff_len:
1758 		return &bpf_xdp_get_buff_len_trace_proto;
1759 #endif
1760 	case BPF_FUNC_seq_printf:
1761 		return prog->expected_attach_type == BPF_TRACE_ITER ?
1762 		       &bpf_seq_printf_proto :
1763 		       NULL;
1764 	case BPF_FUNC_seq_write:
1765 		return prog->expected_attach_type == BPF_TRACE_ITER ?
1766 		       &bpf_seq_write_proto :
1767 		       NULL;
1768 	case BPF_FUNC_seq_printf_btf:
1769 		return prog->expected_attach_type == BPF_TRACE_ITER ?
1770 		       &bpf_seq_printf_btf_proto :
1771 		       NULL;
1772 	case BPF_FUNC_d_path:
1773 		return &bpf_d_path_proto;
1774 	case BPF_FUNC_get_func_arg:
1775 		if (bpf_prog_has_trampoline(prog) ||
1776 		    prog->expected_attach_type == BPF_TRACE_RAW_TP)
1777 			return &bpf_get_func_arg_proto;
1778 		return NULL;
1779 	case BPF_FUNC_get_func_ret:
1780 		return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL;
1781 	case BPF_FUNC_get_func_arg_cnt:
1782 		if (bpf_prog_has_trampoline(prog) ||
1783 		    prog->expected_attach_type == BPF_TRACE_RAW_TP)
1784 			return &bpf_get_func_arg_cnt_proto;
1785 		return NULL;
1786 	case BPF_FUNC_get_attach_cookie:
1787 		if (prog->type == BPF_PROG_TYPE_TRACING &&
1788 		    prog->expected_attach_type == BPF_TRACE_RAW_TP)
1789 			return &bpf_get_attach_cookie_proto_tracing;
1790 		return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto_tracing : NULL;
1791 	default:
1792 		fn = raw_tp_prog_func_proto(func_id, prog);
1793 		if (!fn && prog->expected_attach_type == BPF_TRACE_ITER)
1794 			fn = bpf_iter_get_func_proto(func_id, prog);
1795 		return fn;
1796 	}
1797 }
1798 
1799 static bool raw_tp_prog_is_valid_access(int off, int size,
1800 					enum bpf_access_type type,
1801 					const struct bpf_prog *prog,
1802 					struct bpf_insn_access_aux *info)
1803 {
1804 	return bpf_tracing_ctx_access(off, size, type);
1805 }
1806 
1807 static bool tracing_prog_is_valid_access(int off, int size,
1808 					 enum bpf_access_type type,
1809 					 const struct bpf_prog *prog,
1810 					 struct bpf_insn_access_aux *info)
1811 {
1812 	return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
1813 }
1814 
1815 int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog,
1816 				     const union bpf_attr *kattr,
1817 				     union bpf_attr __user *uattr)
1818 {
1819 	return -ENOTSUPP;
1820 }
1821 
1822 const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
1823 	.get_func_proto  = raw_tp_prog_func_proto,
1824 	.is_valid_access = raw_tp_prog_is_valid_access,
1825 };
1826 
1827 const struct bpf_prog_ops raw_tracepoint_prog_ops = {
1828 #ifdef CONFIG_NET
1829 	.test_run = bpf_prog_test_run_raw_tp,
1830 #endif
1831 };
1832 
1833 const struct bpf_verifier_ops tracing_verifier_ops = {
1834 	.get_func_proto  = tracing_prog_func_proto,
1835 	.is_valid_access = tracing_prog_is_valid_access,
1836 };
1837 
1838 const struct bpf_prog_ops tracing_prog_ops = {
1839 	.test_run = bpf_prog_test_run_tracing,
1840 };
1841 
1842 static bool raw_tp_writable_prog_is_valid_access(int off, int size,
1843 						 enum bpf_access_type type,
1844 						 const struct bpf_prog *prog,
1845 						 struct bpf_insn_access_aux *info)
1846 {
1847 	if (off == 0) {
1848 		if (size != sizeof(u64) || type != BPF_READ)
1849 			return false;
1850 		info->reg_type = PTR_TO_TP_BUFFER;
1851 	}
1852 	return raw_tp_prog_is_valid_access(off, size, type, prog, info);
1853 }
1854 
1855 const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = {
1856 	.get_func_proto  = raw_tp_prog_func_proto,
1857 	.is_valid_access = raw_tp_writable_prog_is_valid_access,
1858 };
1859 
1860 const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = {
1861 };
1862 
1863 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
1864 				    const struct bpf_prog *prog,
1865 				    struct bpf_insn_access_aux *info)
1866 {
1867 	const int size_u64 = sizeof(u64);
1868 
1869 	if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
1870 		return false;
1871 	if (type != BPF_READ)
1872 		return false;
1873 	if (off % size != 0) {
1874 		if (sizeof(unsigned long) != 4)
1875 			return false;
1876 		if (size != 8)
1877 			return false;
1878 		if (off % size != 4)
1879 			return false;
1880 	}
1881 
1882 	switch (off) {
1883 	case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
1884 		bpf_ctx_record_field_size(info, size_u64);
1885 		if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
1886 			return false;
1887 		break;
1888 	case bpf_ctx_range(struct bpf_perf_event_data, addr):
1889 		bpf_ctx_record_field_size(info, size_u64);
1890 		if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
1891 			return false;
1892 		break;
1893 	default:
1894 		if (size != sizeof(long))
1895 			return false;
1896 	}
1897 
1898 	return true;
1899 }
1900 
1901 static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
1902 				      const struct bpf_insn *si,
1903 				      struct bpf_insn *insn_buf,
1904 				      struct bpf_prog *prog, u32 *target_size)
1905 {
1906 	struct bpf_insn *insn = insn_buf;
1907 
1908 	switch (si->off) {
1909 	case offsetof(struct bpf_perf_event_data, sample_period):
1910 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
1911 						       data), si->dst_reg, si->src_reg,
1912 				      offsetof(struct bpf_perf_event_data_kern, data));
1913 		*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
1914 				      bpf_target_off(struct perf_sample_data, period, 8,
1915 						     target_size));
1916 		break;
1917 	case offsetof(struct bpf_perf_event_data, addr):
1918 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
1919 						       data), si->dst_reg, si->src_reg,
1920 				      offsetof(struct bpf_perf_event_data_kern, data));
1921 		*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
1922 				      bpf_target_off(struct perf_sample_data, addr, 8,
1923 						     target_size));
1924 		break;
1925 	default:
1926 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
1927 						       regs), si->dst_reg, si->src_reg,
1928 				      offsetof(struct bpf_perf_event_data_kern, regs));
1929 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg,
1930 				      si->off);
1931 		break;
1932 	}
1933 
1934 	return insn - insn_buf;
1935 }
1936 
1937 const struct bpf_verifier_ops perf_event_verifier_ops = {
1938 	.get_func_proto		= pe_prog_func_proto,
1939 	.is_valid_access	= pe_prog_is_valid_access,
1940 	.convert_ctx_access	= pe_prog_convert_ctx_access,
1941 };
1942 
1943 const struct bpf_prog_ops perf_event_prog_ops = {
1944 };
1945 
1946 static DEFINE_MUTEX(bpf_event_mutex);
1947 
1948 #define BPF_TRACE_MAX_PROGS 64
1949 
1950 int perf_event_attach_bpf_prog(struct perf_event *event,
1951 			       struct bpf_prog *prog,
1952 			       u64 bpf_cookie)
1953 {
1954 	struct bpf_prog_array *old_array;
1955 	struct bpf_prog_array *new_array;
1956 	int ret = -EEXIST;
1957 
1958 	/*
1959 	 * Kprobe override only works if they are on the function entry,
1960 	 * and only if they are on the opt-in list.
1961 	 */
1962 	if (prog->kprobe_override &&
1963 	    (!trace_kprobe_on_func_entry(event->tp_event) ||
1964 	     !trace_kprobe_error_injectable(event->tp_event)))
1965 		return -EINVAL;
1966 
1967 	mutex_lock(&bpf_event_mutex);
1968 
1969 	if (event->prog)
1970 		goto unlock;
1971 
1972 	old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
1973 	if (old_array &&
1974 	    bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) {
1975 		ret = -E2BIG;
1976 		goto unlock;
1977 	}
1978 
1979 	ret = bpf_prog_array_copy(old_array, NULL, prog, bpf_cookie, &new_array);
1980 	if (ret < 0)
1981 		goto unlock;
1982 
1983 	/* set the new array to event->tp_event and set event->prog */
1984 	event->prog = prog;
1985 	event->bpf_cookie = bpf_cookie;
1986 	rcu_assign_pointer(event->tp_event->prog_array, new_array);
1987 	bpf_prog_array_free_sleepable(old_array);
1988 
1989 unlock:
1990 	mutex_unlock(&bpf_event_mutex);
1991 	return ret;
1992 }
1993 
1994 void perf_event_detach_bpf_prog(struct perf_event *event)
1995 {
1996 	struct bpf_prog_array *old_array;
1997 	struct bpf_prog_array *new_array;
1998 	struct bpf_prog *prog = NULL;
1999 	int ret;
2000 
2001 	mutex_lock(&bpf_event_mutex);
2002 
2003 	if (!event->prog)
2004 		goto unlock;
2005 
2006 	old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
2007 	if (!old_array)
2008 		goto put;
2009 
2010 	ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array);
2011 	if (ret < 0) {
2012 		bpf_prog_array_delete_safe(old_array, event->prog);
2013 	} else {
2014 		rcu_assign_pointer(event->tp_event->prog_array, new_array);
2015 		bpf_prog_array_free_sleepable(old_array);
2016 	}
2017 
2018 put:
2019 	prog = event->prog;
2020 	event->prog = NULL;
2021 
2022 unlock:
2023 	mutex_unlock(&bpf_event_mutex);
2024 
2025 	if (prog) {
2026 		/*
2027 		 * It could be that the bpf_prog is not sleepable (and will be freed
2028 		 * via normal RCU), but is called from a point that supports sleepable
2029 		 * programs and uses tasks-trace-RCU.
2030 		 */
2031 		synchronize_rcu_tasks_trace();
2032 
2033 		bpf_prog_put(prog);
2034 	}
2035 }
2036 
2037 int perf_event_query_prog_array(struct perf_event *event, void __user *info)
2038 {
2039 	struct perf_event_query_bpf __user *uquery = info;
2040 	struct perf_event_query_bpf query = {};
2041 	struct bpf_prog_array *progs;
2042 	u32 *ids, prog_cnt, ids_len;
2043 	int ret;
2044 
2045 	if (!perfmon_capable())
2046 		return -EPERM;
2047 	if (event->attr.type != PERF_TYPE_TRACEPOINT)
2048 		return -EINVAL;
2049 	if (copy_from_user(&query, uquery, sizeof(query)))
2050 		return -EFAULT;
2051 
2052 	ids_len = query.ids_len;
2053 	if (ids_len > BPF_TRACE_MAX_PROGS)
2054 		return -E2BIG;
2055 	ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN);
2056 	if (!ids)
2057 		return -ENOMEM;
2058 	/*
2059 	 * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which
2060 	 * is required when user only wants to check for uquery->prog_cnt.
2061 	 * There is no need to check for it since the case is handled
2062 	 * gracefully in bpf_prog_array_copy_info.
2063 	 */
2064 
2065 	mutex_lock(&bpf_event_mutex);
2066 	progs = bpf_event_rcu_dereference(event->tp_event->prog_array);
2067 	ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt);
2068 	mutex_unlock(&bpf_event_mutex);
2069 
2070 	if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) ||
2071 	    copy_to_user(uquery->ids, ids, ids_len * sizeof(u32)))
2072 		ret = -EFAULT;
2073 
2074 	kfree(ids);
2075 	return ret;
2076 }
2077 
2078 extern struct bpf_raw_event_map __start__bpf_raw_tp[];
2079 extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
2080 
2081 struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name)
2082 {
2083 	struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
2084 
2085 	for (; btp < __stop__bpf_raw_tp; btp++) {
2086 		if (!strcmp(btp->tp->name, name))
2087 			return btp;
2088 	}
2089 
2090 	return bpf_get_raw_tracepoint_module(name);
2091 }
2092 
2093 void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
2094 {
2095 	struct module *mod;
2096 
2097 	guard(rcu)();
2098 	mod = __module_address((unsigned long)btp);
2099 	module_put(mod);
2100 }
2101 
2102 static __always_inline
2103 void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args)
2104 {
2105 	struct srcu_ctr __percpu *scp = NULL;
2106 	struct bpf_prog *prog = link->link.prog;
2107 	bool sleepable = prog->sleepable;
2108 	struct bpf_run_ctx *old_run_ctx;
2109 	struct bpf_trace_run_ctx run_ctx;
2110 
2111 	if (sleepable) {
2112 		scp = rcu_read_lock_tasks_trace();
2113 		migrate_disable();
2114 	} else {
2115 		rcu_read_lock_dont_migrate();
2116 	}
2117 
2118 	if (unlikely(!bpf_prog_get_recursion_context(prog))) {
2119 		bpf_prog_inc_misses_counter(prog);
2120 		goto out;
2121 	}
2122 
2123 	run_ctx.bpf_cookie = link->cookie;
2124 	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
2125 
2126 	(void)bpf_prog_run(prog, args);
2127 
2128 	bpf_reset_run_ctx(old_run_ctx);
2129 out:
2130 	bpf_prog_put_recursion_context(prog);
2131 
2132 	if (sleepable) {
2133 		migrate_enable();
2134 		rcu_read_unlock_tasks_trace(scp);
2135 	} else {
2136 		rcu_read_unlock_migrate();
2137 	}
2138 }
2139 
2140 #define UNPACK(...)			__VA_ARGS__
2141 #define REPEAT_1(FN, DL, X, ...)	FN(X)
2142 #define REPEAT_2(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__)
2143 #define REPEAT_3(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__)
2144 #define REPEAT_4(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__)
2145 #define REPEAT_5(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__)
2146 #define REPEAT_6(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__)
2147 #define REPEAT_7(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__)
2148 #define REPEAT_8(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__)
2149 #define REPEAT_9(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__)
2150 #define REPEAT_10(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__)
2151 #define REPEAT_11(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__)
2152 #define REPEAT_12(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__)
2153 #define REPEAT(X, FN, DL, ...)		REPEAT_##X(FN, DL, __VA_ARGS__)
2154 
2155 #define SARG(X)		u64 arg##X
2156 #define COPY(X)		args[X] = arg##X
2157 
2158 #define __DL_COM	(,)
2159 #define __DL_SEM	(;)
2160 
2161 #define __SEQ_0_11	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
2162 
2163 #define BPF_TRACE_DEFN_x(x)						\
2164 	void bpf_trace_run##x(struct bpf_raw_tp_link *link,		\
2165 			      REPEAT(x, SARG, __DL_COM, __SEQ_0_11))	\
2166 	{								\
2167 		u64 args[x];						\
2168 		REPEAT(x, COPY, __DL_SEM, __SEQ_0_11);			\
2169 		__bpf_trace_run(link, args);				\
2170 	}								\
2171 	EXPORT_SYMBOL_GPL(bpf_trace_run##x)
2172 BPF_TRACE_DEFN_x(1);
2173 BPF_TRACE_DEFN_x(2);
2174 BPF_TRACE_DEFN_x(3);
2175 BPF_TRACE_DEFN_x(4);
2176 BPF_TRACE_DEFN_x(5);
2177 BPF_TRACE_DEFN_x(6);
2178 BPF_TRACE_DEFN_x(7);
2179 BPF_TRACE_DEFN_x(8);
2180 BPF_TRACE_DEFN_x(9);
2181 BPF_TRACE_DEFN_x(10);
2182 BPF_TRACE_DEFN_x(11);
2183 BPF_TRACE_DEFN_x(12);
2184 
2185 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
2186 {
2187 	struct tracepoint *tp = btp->tp;
2188 	struct bpf_prog *prog = link->link.prog;
2189 
2190 	/*
2191 	 * check that program doesn't access arguments beyond what's
2192 	 * available in this tracepoint
2193 	 */
2194 	if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
2195 		return -EINVAL;
2196 
2197 	if (prog->aux->max_tp_access > btp->writable_size)
2198 		return -EINVAL;
2199 
2200 	return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, link);
2201 }
2202 
2203 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
2204 {
2205 	return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, link);
2206 }
2207 
2208 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
2209 			    u32 *fd_type, const char **buf,
2210 			    u64 *probe_offset, u64 *probe_addr,
2211 			    unsigned long *missed)
2212 {
2213 	bool is_tracepoint, is_syscall_tp;
2214 	struct bpf_prog *prog;
2215 	int flags, err = 0;
2216 
2217 	prog = event->prog;
2218 	if (!prog)
2219 		return -ENOENT;
2220 
2221 	/* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
2222 	if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
2223 		return -EOPNOTSUPP;
2224 
2225 	*prog_id = prog->aux->id;
2226 	flags = event->tp_event->flags;
2227 	is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
2228 	is_syscall_tp = is_syscall_trace_event(event->tp_event);
2229 
2230 	if (is_tracepoint || is_syscall_tp) {
2231 		*buf = is_tracepoint ? event->tp_event->tp->name
2232 				     : event->tp_event->name;
2233 		/* We allow NULL pointer for tracepoint */
2234 		if (fd_type)
2235 			*fd_type = BPF_FD_TYPE_TRACEPOINT;
2236 		if (probe_offset)
2237 			*probe_offset = 0x0;
2238 		if (probe_addr)
2239 			*probe_addr = 0x0;
2240 	} else {
2241 		/* kprobe/uprobe */
2242 		err = -EOPNOTSUPP;
2243 #ifdef CONFIG_KPROBE_EVENTS
2244 		if (flags & TRACE_EVENT_FL_KPROBE)
2245 			err = bpf_get_kprobe_info(event, fd_type, buf,
2246 						  probe_offset, probe_addr, missed,
2247 						  event->attr.type == PERF_TYPE_TRACEPOINT);
2248 #endif
2249 #ifdef CONFIG_UPROBE_EVENTS
2250 		if (flags & TRACE_EVENT_FL_UPROBE)
2251 			err = bpf_get_uprobe_info(event, fd_type, buf,
2252 						  probe_offset, probe_addr,
2253 						  event->attr.type == PERF_TYPE_TRACEPOINT);
2254 #endif
2255 	}
2256 
2257 	return err;
2258 }
2259 
2260 static int __init send_signal_irq_work_init(void)
2261 {
2262 	int cpu;
2263 	struct send_signal_irq_work *work;
2264 
2265 	for_each_possible_cpu(cpu) {
2266 		work = per_cpu_ptr(&send_signal_work, cpu);
2267 		init_irq_work(&work->irq_work, do_bpf_send_signal);
2268 	}
2269 	return 0;
2270 }
2271 
2272 subsys_initcall(send_signal_irq_work_init);
2273 
2274 #ifdef CONFIG_MODULES
2275 static int bpf_event_notify(struct notifier_block *nb, unsigned long op,
2276 			    void *module)
2277 {
2278 	struct bpf_trace_module *btm, *tmp;
2279 	struct module *mod = module;
2280 	int ret = 0;
2281 
2282 	if (mod->num_bpf_raw_events == 0 ||
2283 	    (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
2284 		goto out;
2285 
2286 	mutex_lock(&bpf_module_mutex);
2287 
2288 	switch (op) {
2289 	case MODULE_STATE_COMING:
2290 		btm = kzalloc_obj(*btm);
2291 		if (btm) {
2292 			btm->module = module;
2293 			list_add(&btm->list, &bpf_trace_modules);
2294 		} else {
2295 			ret = -ENOMEM;
2296 		}
2297 		break;
2298 	case MODULE_STATE_GOING:
2299 		list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) {
2300 			if (btm->module == module) {
2301 				list_del(&btm->list);
2302 				kfree(btm);
2303 				break;
2304 			}
2305 		}
2306 		break;
2307 	}
2308 
2309 	mutex_unlock(&bpf_module_mutex);
2310 
2311 out:
2312 	return notifier_from_errno(ret);
2313 }
2314 
2315 static struct notifier_block bpf_module_nb = {
2316 	.notifier_call = bpf_event_notify,
2317 };
2318 
2319 static int __init bpf_event_init(void)
2320 {
2321 	register_module_notifier(&bpf_module_nb);
2322 	return 0;
2323 }
2324 
2325 fs_initcall(bpf_event_init);
2326 #endif /* CONFIG_MODULES */
2327 
2328 struct bpf_session_run_ctx {
2329 	struct bpf_run_ctx run_ctx;
2330 	bool is_return;
2331 	void *data;
2332 };
2333 
2334 #ifdef CONFIG_FPROBE
2335 struct bpf_kprobe_multi_link {
2336 	struct bpf_link link;
2337 	struct fprobe fp;
2338 	unsigned long *addrs;
2339 	u64 *cookies;
2340 	u32 cnt;
2341 	u32 mods_cnt;
2342 	struct module **mods;
2343 };
2344 
2345 struct bpf_kprobe_multi_run_ctx {
2346 	struct bpf_session_run_ctx session_ctx;
2347 	struct bpf_kprobe_multi_link *link;
2348 	unsigned long entry_ip;
2349 };
2350 
2351 struct user_syms {
2352 	const char **syms;
2353 	char *buf;
2354 };
2355 
2356 #ifndef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS
2357 static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs);
2358 #define bpf_kprobe_multi_pt_regs_ptr()	this_cpu_ptr(&bpf_kprobe_multi_pt_regs)
2359 #else
2360 #define bpf_kprobe_multi_pt_regs_ptr()	(NULL)
2361 #endif
2362 
2363 static unsigned long ftrace_get_entry_ip(unsigned long fentry_ip)
2364 {
2365 	unsigned long ip = ftrace_get_symaddr(fentry_ip);
2366 
2367 	return ip ? : fentry_ip;
2368 }
2369 
2370 static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 cnt)
2371 {
2372 	unsigned long __user usymbol;
2373 	const char **syms = NULL;
2374 	char *buf = NULL, *p;
2375 	int err = -ENOMEM;
2376 	unsigned int i;
2377 
2378 	syms = kvmalloc_array(cnt, sizeof(*syms), GFP_KERNEL);
2379 	if (!syms)
2380 		goto error;
2381 
2382 	buf = kvmalloc_array(cnt, KSYM_NAME_LEN, GFP_KERNEL);
2383 	if (!buf)
2384 		goto error;
2385 
2386 	for (p = buf, i = 0; i < cnt; i++) {
2387 		if (__get_user(usymbol, usyms + i)) {
2388 			err = -EFAULT;
2389 			goto error;
2390 		}
2391 		err = strncpy_from_user(p, (const char __user *) usymbol, KSYM_NAME_LEN);
2392 		if (err == KSYM_NAME_LEN)
2393 			err = -E2BIG;
2394 		if (err < 0)
2395 			goto error;
2396 		syms[i] = p;
2397 		p += err + 1;
2398 	}
2399 
2400 	us->syms = syms;
2401 	us->buf = buf;
2402 	return 0;
2403 
2404 error:
2405 	if (err) {
2406 		kvfree(syms);
2407 		kvfree(buf);
2408 	}
2409 	return err;
2410 }
2411 
2412 static void kprobe_multi_put_modules(struct module **mods, u32 cnt)
2413 {
2414 	u32 i;
2415 
2416 	for (i = 0; i < cnt; i++)
2417 		module_put(mods[i]);
2418 }
2419 
2420 static void free_user_syms(struct user_syms *us)
2421 {
2422 	kvfree(us->syms);
2423 	kvfree(us->buf);
2424 }
2425 
2426 static void bpf_kprobe_multi_link_release(struct bpf_link *link)
2427 {
2428 	struct bpf_kprobe_multi_link *kmulti_link;
2429 
2430 	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
2431 	/* Don't wait for RCU GP here. */
2432 	unregister_fprobe_async(&kmulti_link->fp);
2433 	kprobe_multi_put_modules(kmulti_link->mods, kmulti_link->mods_cnt);
2434 }
2435 
2436 static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link)
2437 {
2438 	struct bpf_kprobe_multi_link *kmulti_link;
2439 
2440 	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
2441 	kvfree(kmulti_link->addrs);
2442 	kvfree(kmulti_link->cookies);
2443 	kfree(kmulti_link->mods);
2444 	kfree(kmulti_link);
2445 }
2446 
2447 static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
2448 						struct bpf_link_info *info)
2449 {
2450 	u64 __user *ucookies = u64_to_user_ptr(info->kprobe_multi.cookies);
2451 	u64 __user *uaddrs = u64_to_user_ptr(info->kprobe_multi.addrs);
2452 	struct bpf_kprobe_multi_link *kmulti_link;
2453 	u32 ucount = info->kprobe_multi.count;
2454 	int err = 0, i;
2455 
2456 	if (!uaddrs ^ !ucount)
2457 		return -EINVAL;
2458 	if (ucookies && !ucount)
2459 		return -EINVAL;
2460 
2461 	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
2462 	info->kprobe_multi.count = kmulti_link->cnt;
2463 	info->kprobe_multi.flags = kmulti_link->link.flags;
2464 	info->kprobe_multi.missed = kmulti_link->fp.nmissed;
2465 
2466 	if (!uaddrs)
2467 		return 0;
2468 	if (ucount < kmulti_link->cnt)
2469 		err = -ENOSPC;
2470 	else
2471 		ucount = kmulti_link->cnt;
2472 
2473 	if (ucookies) {
2474 		if (kmulti_link->cookies) {
2475 			if (copy_to_user(ucookies, kmulti_link->cookies, ucount * sizeof(u64)))
2476 				return -EFAULT;
2477 		} else {
2478 			for (i = 0; i < ucount; i++) {
2479 				if (put_user(0, ucookies + i))
2480 					return -EFAULT;
2481 			}
2482 		}
2483 	}
2484 
2485 	if (kallsyms_show_value(current_cred())) {
2486 		if (copy_to_user(uaddrs, kmulti_link->addrs, ucount * sizeof(u64)))
2487 			return -EFAULT;
2488 	} else {
2489 		for (i = 0; i < ucount; i++) {
2490 			if (put_user(0, uaddrs + i))
2491 				return -EFAULT;
2492 		}
2493 	}
2494 	return err;
2495 }
2496 
2497 #ifdef CONFIG_PROC_FS
2498 static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link,
2499 					 struct seq_file *seq)
2500 {
2501 	struct bpf_kprobe_multi_link *kmulti_link;
2502 	bool has_cookies;
2503 
2504 	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
2505 	has_cookies = !!kmulti_link->cookies;
2506 
2507 	seq_printf(seq,
2508 		   "kprobe_cnt:\t%u\n"
2509 		   "missed:\t%lu\n",
2510 		   kmulti_link->cnt,
2511 		   kmulti_link->fp.nmissed);
2512 
2513 	seq_printf(seq, "%s\t %s\n", "cookie", "func");
2514 	for (int i = 0; i < kmulti_link->cnt; i++) {
2515 		seq_printf(seq,
2516 			   "%llu\t %pS\n",
2517 			   has_cookies ? kmulti_link->cookies[i] : 0,
2518 			   (void *)kmulti_link->addrs[i]);
2519 	}
2520 }
2521 #endif
2522 
2523 static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
2524 	.release = bpf_kprobe_multi_link_release,
2525 	.dealloc_deferred = bpf_kprobe_multi_link_dealloc,
2526 	.fill_link_info = bpf_kprobe_multi_link_fill_link_info,
2527 #ifdef CONFIG_PROC_FS
2528 	.show_fdinfo = bpf_kprobe_multi_show_fdinfo,
2529 #endif
2530 };
2531 
2532 static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void *priv)
2533 {
2534 	const struct bpf_kprobe_multi_link *link = priv;
2535 	unsigned long *addr_a = a, *addr_b = b;
2536 	u64 *cookie_a, *cookie_b;
2537 
2538 	cookie_a = link->cookies + (addr_a - link->addrs);
2539 	cookie_b = link->cookies + (addr_b - link->addrs);
2540 
2541 	/* swap addr_a/addr_b and cookie_a/cookie_b values */
2542 	swap(*addr_a, *addr_b);
2543 	swap(*cookie_a, *cookie_b);
2544 }
2545 
2546 static int bpf_kprobe_multi_addrs_cmp(const void *a, const void *b)
2547 {
2548 	const unsigned long *addr_a = a, *addr_b = b;
2549 
2550 	if (*addr_a == *addr_b)
2551 		return 0;
2552 	return *addr_a < *addr_b ? -1 : 1;
2553 }
2554 
2555 static int bpf_kprobe_multi_cookie_cmp(const void *a, const void *b, const void *priv)
2556 {
2557 	return bpf_kprobe_multi_addrs_cmp(a, b);
2558 }
2559 
2560 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
2561 {
2562 	struct bpf_kprobe_multi_run_ctx *run_ctx;
2563 	struct bpf_kprobe_multi_link *link;
2564 	u64 *cookie, entry_ip;
2565 	unsigned long *addr;
2566 
2567 	if (WARN_ON_ONCE(!ctx))
2568 		return 0;
2569 	run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx,
2570 			       session_ctx.run_ctx);
2571 	link = run_ctx->link;
2572 	if (!link->cookies)
2573 		return 0;
2574 	entry_ip = run_ctx->entry_ip;
2575 	addr = bsearch(&entry_ip, link->addrs, link->cnt, sizeof(entry_ip),
2576 		       bpf_kprobe_multi_addrs_cmp);
2577 	if (!addr)
2578 		return 0;
2579 	cookie = link->cookies + (addr - link->addrs);
2580 	return *cookie;
2581 }
2582 
2583 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
2584 {
2585 	struct bpf_kprobe_multi_run_ctx *run_ctx;
2586 
2587 	run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx,
2588 			       session_ctx.run_ctx);
2589 	return run_ctx->entry_ip;
2590 }
2591 
2592 static __always_inline int
2593 kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
2594 			   unsigned long entry_ip, struct ftrace_regs *fregs,
2595 			   bool is_return, void *data)
2596 {
2597 	struct bpf_kprobe_multi_run_ctx run_ctx = {
2598 		.session_ctx = {
2599 			.is_return = is_return,
2600 			.data = data,
2601 		},
2602 		.link = link,
2603 		.entry_ip = entry_ip,
2604 	};
2605 	struct bpf_run_ctx *old_run_ctx;
2606 	struct pt_regs *regs;
2607 	int err;
2608 
2609 	/*
2610 	 * graph tracer framework ensures we won't migrate, so there is no need
2611 	 * to use migrate_disable for bpf_prog_run again. The check here just for
2612 	 * __this_cpu_inc_return.
2613 	 */
2614 	cant_sleep();
2615 
2616 	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
2617 		bpf_prog_inc_misses_counter(link->link.prog);
2618 		err = 1;
2619 		goto out;
2620 	}
2621 
2622 	rcu_read_lock();
2623 	regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr());
2624 	old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
2625 	err = bpf_prog_run(link->link.prog, regs);
2626 	bpf_reset_run_ctx(old_run_ctx);
2627 	ftrace_partial_regs_update(fregs, bpf_kprobe_multi_pt_regs_ptr());
2628 	rcu_read_unlock();
2629 
2630  out:
2631 	__this_cpu_dec(bpf_prog_active);
2632 	return err;
2633 }
2634 
2635 static int
2636 kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
2637 			  unsigned long ret_ip, struct ftrace_regs *fregs,
2638 			  void *data)
2639 {
2640 	struct bpf_kprobe_multi_link *link;
2641 	int err;
2642 
2643 	link = container_of(fp, struct bpf_kprobe_multi_link, fp);
2644 	err = kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip),
2645 					 fregs, false, data);
2646 	return is_kprobe_session(link->link.prog) ? err : 0;
2647 }
2648 
2649 static void
2650 kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip,
2651 			       unsigned long ret_ip, struct ftrace_regs *fregs,
2652 			       void *data)
2653 {
2654 	struct bpf_kprobe_multi_link *link;
2655 
2656 	link = container_of(fp, struct bpf_kprobe_multi_link, fp);
2657 	kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip),
2658 				   fregs, true, data);
2659 }
2660 
2661 static int symbols_cmp_r(const void *a, const void *b, const void *priv)
2662 {
2663 	const char **str_a = (const char **) a;
2664 	const char **str_b = (const char **) b;
2665 
2666 	return strcmp(*str_a, *str_b);
2667 }
2668 
2669 struct multi_symbols_sort {
2670 	const char **funcs;
2671 	u64 *cookies;
2672 };
2673 
2674 static void symbols_swap_r(void *a, void *b, int size, const void *priv)
2675 {
2676 	const struct multi_symbols_sort *data = priv;
2677 	const char **name_a = a, **name_b = b;
2678 
2679 	swap(*name_a, *name_b);
2680 
2681 	/* If defined, swap also related cookies. */
2682 	if (data->cookies) {
2683 		u64 *cookie_a, *cookie_b;
2684 
2685 		cookie_a = data->cookies + (name_a - data->funcs);
2686 		cookie_b = data->cookies + (name_b - data->funcs);
2687 		swap(*cookie_a, *cookie_b);
2688 	}
2689 }
2690 
2691 struct modules_array {
2692 	struct module **mods;
2693 	int mods_cnt;
2694 	int mods_cap;
2695 };
2696 
2697 static int add_module(struct modules_array *arr, struct module *mod)
2698 {
2699 	struct module **mods;
2700 
2701 	if (arr->mods_cnt == arr->mods_cap) {
2702 		arr->mods_cap = max(16, arr->mods_cap * 3 / 2);
2703 		mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL);
2704 		if (!mods)
2705 			return -ENOMEM;
2706 		arr->mods = mods;
2707 	}
2708 
2709 	arr->mods[arr->mods_cnt] = mod;
2710 	arr->mods_cnt++;
2711 	return 0;
2712 }
2713 
2714 static bool has_module(struct modules_array *arr, struct module *mod)
2715 {
2716 	int i;
2717 
2718 	for (i = arr->mods_cnt - 1; i >= 0; i--) {
2719 		if (arr->mods[i] == mod)
2720 			return true;
2721 	}
2722 	return false;
2723 }
2724 
2725 static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt)
2726 {
2727 	struct modules_array arr = {};
2728 	u32 i, err = 0;
2729 
2730 	for (i = 0; i < addrs_cnt; i++) {
2731 		bool skip_add = false;
2732 		struct module *mod;
2733 
2734 		scoped_guard(rcu) {
2735 			mod = __module_address(addrs[i]);
2736 			/* Either no module or it's already stored  */
2737 			if (!mod || has_module(&arr, mod)) {
2738 				skip_add = true;
2739 				break; /* scoped_guard */
2740 			}
2741 			if (!try_module_get(mod))
2742 				err = -EINVAL;
2743 		}
2744 		if (skip_add)
2745 			continue;
2746 		if (err)
2747 			break;
2748 		err = add_module(&arr, mod);
2749 		if (err) {
2750 			module_put(mod);
2751 			break;
2752 		}
2753 	}
2754 
2755 	/* We return either err < 0 in case of error, ... */
2756 	if (err) {
2757 		kprobe_multi_put_modules(arr.mods, arr.mods_cnt);
2758 		kfree(arr.mods);
2759 		return err;
2760 	}
2761 
2762 	/* or number of modules found if everything is ok. */
2763 	*mods = arr.mods;
2764 	return arr.mods_cnt;
2765 }
2766 
2767 static int addrs_check_error_injection_list(unsigned long *addrs, u32 cnt)
2768 {
2769 	u32 i;
2770 
2771 	for (i = 0; i < cnt; i++) {
2772 		if (!within_error_injection_list(addrs[i]))
2773 			return -EINVAL;
2774 	}
2775 	return 0;
2776 }
2777 
2778 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
2779 {
2780 	struct bpf_kprobe_multi_link *link = NULL;
2781 	struct bpf_link_primer link_primer;
2782 	void __user *ucookies;
2783 	unsigned long *addrs;
2784 	u32 flags, cnt, size;
2785 	void __user *uaddrs;
2786 	u64 *cookies = NULL;
2787 	void __user *usyms;
2788 	int err;
2789 
2790 	/* no support for 32bit archs yet */
2791 	if (sizeof(u64) != sizeof(void *))
2792 		return -EOPNOTSUPP;
2793 
2794 	if (attr->link_create.flags)
2795 		return -EINVAL;
2796 
2797 	if (!is_kprobe_multi(prog))
2798 		return -EINVAL;
2799 
2800 	/* kprobe_multi is not allowed to be sleepable. */
2801 	if (prog->sleepable)
2802 		return -EINVAL;
2803 
2804 	/* Writing to context is not allowed for kprobes. */
2805 	if (prog->aux->kprobe_write_ctx)
2806 		return -EINVAL;
2807 
2808 	flags = attr->link_create.kprobe_multi.flags;
2809 	if (flags & ~BPF_F_KPROBE_MULTI_RETURN)
2810 		return -EINVAL;
2811 
2812 	uaddrs = u64_to_user_ptr(attr->link_create.kprobe_multi.addrs);
2813 	usyms = u64_to_user_ptr(attr->link_create.kprobe_multi.syms);
2814 	if (!!uaddrs == !!usyms)
2815 		return -EINVAL;
2816 
2817 	cnt = attr->link_create.kprobe_multi.cnt;
2818 	if (!cnt)
2819 		return -EINVAL;
2820 	if (cnt > MAX_KPROBE_MULTI_CNT)
2821 		return -E2BIG;
2822 
2823 	size = cnt * sizeof(*addrs);
2824 	addrs = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);
2825 	if (!addrs)
2826 		return -ENOMEM;
2827 
2828 	ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies);
2829 	if (ucookies) {
2830 		cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);
2831 		if (!cookies) {
2832 			err = -ENOMEM;
2833 			goto error;
2834 		}
2835 		if (copy_from_user(cookies, ucookies, size)) {
2836 			err = -EFAULT;
2837 			goto error;
2838 		}
2839 	}
2840 
2841 	if (uaddrs) {
2842 		if (copy_from_user(addrs, uaddrs, size)) {
2843 			err = -EFAULT;
2844 			goto error;
2845 		}
2846 	} else {
2847 		struct multi_symbols_sort data = {
2848 			.cookies = cookies,
2849 		};
2850 		struct user_syms us;
2851 
2852 		err = copy_user_syms(&us, usyms, cnt);
2853 		if (err)
2854 			goto error;
2855 
2856 		if (cookies)
2857 			data.funcs = us.syms;
2858 
2859 		sort_r(us.syms, cnt, sizeof(*us.syms), symbols_cmp_r,
2860 		       symbols_swap_r, &data);
2861 
2862 		err = ftrace_lookup_symbols(us.syms, cnt, addrs);
2863 		free_user_syms(&us);
2864 		if (err)
2865 			goto error;
2866 	}
2867 
2868 	if (prog->kprobe_override && addrs_check_error_injection_list(addrs, cnt)) {
2869 		err = -EINVAL;
2870 		goto error;
2871 	}
2872 
2873 	link = kzalloc_obj(*link);
2874 	if (!link) {
2875 		err = -ENOMEM;
2876 		goto error;
2877 	}
2878 
2879 	bpf_link_init(&link->link, BPF_LINK_TYPE_KPROBE_MULTI,
2880 		      &bpf_kprobe_multi_link_lops, prog, attr->link_create.attach_type);
2881 
2882 	err = bpf_link_prime(&link->link, &link_primer);
2883 	if (err)
2884 		goto error;
2885 
2886 	if (!(flags & BPF_F_KPROBE_MULTI_RETURN))
2887 		link->fp.entry_handler = kprobe_multi_link_handler;
2888 	if ((flags & BPF_F_KPROBE_MULTI_RETURN) || is_kprobe_session(prog))
2889 		link->fp.exit_handler = kprobe_multi_link_exit_handler;
2890 	if (is_kprobe_session(prog))
2891 		link->fp.entry_data_size = sizeof(u64);
2892 
2893 	link->addrs = addrs;
2894 	link->cookies = cookies;
2895 	link->cnt = cnt;
2896 	link->link.flags = flags;
2897 
2898 	if (cookies) {
2899 		/*
2900 		 * Sorting addresses will trigger sorting cookies as well
2901 		 * (check bpf_kprobe_multi_cookie_swap). This way we can
2902 		 * find cookie based on the address in bpf_get_attach_cookie
2903 		 * helper.
2904 		 */
2905 		sort_r(addrs, cnt, sizeof(*addrs),
2906 		       bpf_kprobe_multi_cookie_cmp,
2907 		       bpf_kprobe_multi_cookie_swap,
2908 		       link);
2909 	}
2910 
2911 	err = get_modules_for_addrs(&link->mods, addrs, cnt);
2912 	if (err < 0) {
2913 		bpf_link_cleanup(&link_primer);
2914 		return err;
2915 	}
2916 	link->mods_cnt = err;
2917 
2918 	err = register_fprobe_ips(&link->fp, addrs, cnt);
2919 	if (err) {
2920 		kprobe_multi_put_modules(link->mods, link->mods_cnt);
2921 		bpf_link_cleanup(&link_primer);
2922 		return err;
2923 	}
2924 
2925 	return bpf_link_settle(&link_primer);
2926 
2927 error:
2928 	kfree(link);
2929 	kvfree(addrs);
2930 	kvfree(cookies);
2931 	return err;
2932 }
2933 #else /* !CONFIG_FPROBE */
2934 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
2935 {
2936 	return -EOPNOTSUPP;
2937 }
2938 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
2939 {
2940 	return 0;
2941 }
2942 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
2943 {
2944 	return 0;
2945 }
2946 #endif
2947 
2948 #ifdef CONFIG_UPROBES
2949 struct bpf_uprobe_multi_link;
2950 
2951 struct bpf_uprobe {
2952 	struct bpf_uprobe_multi_link *link;
2953 	loff_t offset;
2954 	unsigned long ref_ctr_offset;
2955 	u64 cookie;
2956 	struct uprobe *uprobe;
2957 	struct uprobe_consumer consumer;
2958 	bool session;
2959 };
2960 
2961 struct bpf_uprobe_multi_link {
2962 	struct path path;
2963 	struct bpf_link link;
2964 	u32 cnt;
2965 	struct bpf_uprobe *uprobes;
2966 	struct task_struct *task;
2967 };
2968 
2969 struct bpf_uprobe_multi_run_ctx {
2970 	struct bpf_session_run_ctx session_ctx;
2971 	unsigned long entry_ip;
2972 	struct bpf_uprobe *uprobe;
2973 };
2974 
2975 static void bpf_uprobe_unregister(struct bpf_uprobe *uprobes, u32 cnt)
2976 {
2977 	u32 i;
2978 
2979 	for (i = 0; i < cnt; i++)
2980 		uprobe_unregister_nosync(uprobes[i].uprobe, &uprobes[i].consumer);
2981 
2982 	if (cnt)
2983 		uprobe_unregister_sync();
2984 }
2985 
2986 static void bpf_uprobe_multi_link_release(struct bpf_link *link)
2987 {
2988 	struct bpf_uprobe_multi_link *umulti_link;
2989 
2990 	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
2991 	bpf_uprobe_unregister(umulti_link->uprobes, umulti_link->cnt);
2992 	if (umulti_link->task)
2993 		put_task_struct(umulti_link->task);
2994 	path_put(&umulti_link->path);
2995 }
2996 
2997 static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
2998 {
2999 	struct bpf_uprobe_multi_link *umulti_link;
3000 
3001 	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
3002 	kvfree(umulti_link->uprobes);
3003 	kfree(umulti_link);
3004 }
3005 
3006 static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
3007 						struct bpf_link_info *info)
3008 {
3009 	u64 __user *uref_ctr_offsets = u64_to_user_ptr(info->uprobe_multi.ref_ctr_offsets);
3010 	u64 __user *ucookies = u64_to_user_ptr(info->uprobe_multi.cookies);
3011 	u64 __user *uoffsets = u64_to_user_ptr(info->uprobe_multi.offsets);
3012 	u64 __user *upath = u64_to_user_ptr(info->uprobe_multi.path);
3013 	u32 upath_size = info->uprobe_multi.path_size;
3014 	struct bpf_uprobe_multi_link *umulti_link;
3015 	u32 ucount = info->uprobe_multi.count;
3016 	int err = 0, i;
3017 	char *p, *buf;
3018 	long left = 0;
3019 
3020 	if (!upath ^ !upath_size)
3021 		return -EINVAL;
3022 
3023 	if ((uoffsets || uref_ctr_offsets || ucookies) && !ucount)
3024 		return -EINVAL;
3025 
3026 	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
3027 	info->uprobe_multi.count = umulti_link->cnt;
3028 	info->uprobe_multi.flags = umulti_link->link.flags;
3029 	info->uprobe_multi.pid = umulti_link->task ?
3030 				 task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0;
3031 
3032 	upath_size = upath_size ? min_t(u32, upath_size, PATH_MAX) : PATH_MAX;
3033 	buf = kmalloc(upath_size, GFP_KERNEL);
3034 	if (!buf)
3035 		return -ENOMEM;
3036 	p = d_path(&umulti_link->path, buf, upath_size);
3037 	if (IS_ERR(p)) {
3038 		kfree(buf);
3039 		return PTR_ERR(p);
3040 	}
3041 	upath_size = buf + upath_size - p;
3042 
3043 	if (upath)
3044 		left = copy_to_user(upath, p, upath_size);
3045 	kfree(buf);
3046 	if (left)
3047 		return -EFAULT;
3048 	info->uprobe_multi.path_size = upath_size;
3049 
3050 	if (!uoffsets && !ucookies && !uref_ctr_offsets)
3051 		return 0;
3052 
3053 	if (ucount < umulti_link->cnt)
3054 		err = -ENOSPC;
3055 	else
3056 		ucount = umulti_link->cnt;
3057 
3058 	for (i = 0; i < ucount; i++) {
3059 		if (uoffsets &&
3060 		    put_user(umulti_link->uprobes[i].offset, uoffsets + i))
3061 			return -EFAULT;
3062 		if (uref_ctr_offsets &&
3063 		    put_user(umulti_link->uprobes[i].ref_ctr_offset, uref_ctr_offsets + i))
3064 			return -EFAULT;
3065 		if (ucookies &&
3066 		    put_user(umulti_link->uprobes[i].cookie, ucookies + i))
3067 			return -EFAULT;
3068 	}
3069 
3070 	return err;
3071 }
3072 
3073 #ifdef CONFIG_PROC_FS
3074 static void bpf_uprobe_multi_show_fdinfo(const struct bpf_link *link,
3075 					 struct seq_file *seq)
3076 {
3077 	struct bpf_uprobe_multi_link *umulti_link;
3078 	char *p, *buf;
3079 	pid_t pid;
3080 
3081 	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
3082 
3083 	buf = kmalloc(PATH_MAX, GFP_KERNEL);
3084 	if (!buf)
3085 		return;
3086 
3087 	p = d_path(&umulti_link->path, buf, PATH_MAX);
3088 	if (IS_ERR(p)) {
3089 		kfree(buf);
3090 		return;
3091 	}
3092 
3093 	pid = umulti_link->task ?
3094 	      task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0;
3095 	seq_printf(seq,
3096 		   "uprobe_cnt:\t%u\n"
3097 		   "pid:\t%u\n"
3098 		   "path:\t%s\n",
3099 		   umulti_link->cnt, pid, p);
3100 
3101 	seq_printf(seq, "%s\t %s\t %s\n", "cookie", "offset", "ref_ctr_offset");
3102 	for (int i = 0; i < umulti_link->cnt; i++) {
3103 		seq_printf(seq,
3104 			   "%llu\t %#llx\t %#lx\n",
3105 			   umulti_link->uprobes[i].cookie,
3106 			   umulti_link->uprobes[i].offset,
3107 			   umulti_link->uprobes[i].ref_ctr_offset);
3108 	}
3109 
3110 	kfree(buf);
3111 }
3112 #endif
3113 
3114 static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
3115 	.release = bpf_uprobe_multi_link_release,
3116 	.dealloc_deferred = bpf_uprobe_multi_link_dealloc,
3117 	.fill_link_info = bpf_uprobe_multi_link_fill_link_info,
3118 #ifdef CONFIG_PROC_FS
3119 	.show_fdinfo = bpf_uprobe_multi_show_fdinfo,
3120 #endif
3121 };
3122 
3123 static int uprobe_prog_run(struct bpf_uprobe *uprobe,
3124 			   unsigned long entry_ip,
3125 			   struct pt_regs *regs,
3126 			   bool is_return, void *data)
3127 {
3128 	struct bpf_uprobe_multi_link *link = uprobe->link;
3129 	struct bpf_uprobe_multi_run_ctx run_ctx = {
3130 		.session_ctx = {
3131 			.is_return = is_return,
3132 			.data = data,
3133 		},
3134 		.entry_ip = entry_ip,
3135 		.uprobe = uprobe,
3136 	};
3137 	struct bpf_prog *prog = link->link.prog;
3138 	bool sleepable = prog->sleepable;
3139 	struct bpf_run_ctx *old_run_ctx;
3140 	int err;
3141 
3142 	if (link->task && !same_thread_group(current, link->task))
3143 		return 0;
3144 
3145 	if (sleepable)
3146 		rcu_read_lock_trace();
3147 	else
3148 		rcu_read_lock();
3149 
3150 	migrate_disable();
3151 
3152 	old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
3153 	err = bpf_prog_run(link->link.prog, regs);
3154 	bpf_reset_run_ctx(old_run_ctx);
3155 
3156 	migrate_enable();
3157 
3158 	if (sleepable)
3159 		rcu_read_unlock_trace();
3160 	else
3161 		rcu_read_unlock();
3162 	return err;
3163 }
3164 
3165 static bool
3166 uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm)
3167 {
3168 	struct bpf_uprobe *uprobe;
3169 
3170 	uprobe = container_of(con, struct bpf_uprobe, consumer);
3171 	return uprobe->link->task->mm == mm;
3172 }
3173 
3174 static int
3175 uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs,
3176 			  __u64 *data)
3177 {
3178 	struct bpf_uprobe *uprobe;
3179 	int ret;
3180 
3181 	uprobe = container_of(con, struct bpf_uprobe, consumer);
3182 	ret = uprobe_prog_run(uprobe, instruction_pointer(regs), regs, false, data);
3183 	if (uprobe->session)
3184 		return ret ? UPROBE_HANDLER_IGNORE : 0;
3185 	return 0;
3186 }
3187 
3188 static int
3189 uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs,
3190 			      __u64 *data)
3191 {
3192 	struct bpf_uprobe *uprobe;
3193 
3194 	uprobe = container_of(con, struct bpf_uprobe, consumer);
3195 	uprobe_prog_run(uprobe, func, regs, true, data);
3196 	return 0;
3197 }
3198 
3199 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
3200 {
3201 	struct bpf_uprobe_multi_run_ctx *run_ctx;
3202 
3203 	run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx,
3204 			       session_ctx.run_ctx);
3205 	return run_ctx->entry_ip;
3206 }
3207 
3208 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx)
3209 {
3210 	struct bpf_uprobe_multi_run_ctx *run_ctx;
3211 
3212 	run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx,
3213 			       session_ctx.run_ctx);
3214 	return run_ctx->uprobe->cookie;
3215 }
3216 
3217 int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
3218 {
3219 	struct bpf_uprobe_multi_link *link = NULL;
3220 	unsigned long __user *uref_ctr_offsets;
3221 	struct bpf_link_primer link_primer;
3222 	struct bpf_uprobe *uprobes = NULL;
3223 	struct task_struct *task = NULL;
3224 	unsigned long __user *uoffsets;
3225 	u64 __user *ucookies;
3226 	void __user *upath;
3227 	u32 flags, cnt, i;
3228 	struct path path;
3229 	char *name;
3230 	pid_t pid;
3231 	int err;
3232 
3233 	/* no support for 32bit archs yet */
3234 	if (sizeof(u64) != sizeof(void *))
3235 		return -EOPNOTSUPP;
3236 
3237 	if (attr->link_create.flags)
3238 		return -EINVAL;
3239 
3240 	if (!is_uprobe_multi(prog))
3241 		return -EINVAL;
3242 
3243 	flags = attr->link_create.uprobe_multi.flags;
3244 	if (flags & ~BPF_F_UPROBE_MULTI_RETURN)
3245 		return -EINVAL;
3246 
3247 	/*
3248 	 * path, offsets and cnt are mandatory,
3249 	 * ref_ctr_offsets and cookies are optional
3250 	 */
3251 	upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path);
3252 	uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets);
3253 	cnt = attr->link_create.uprobe_multi.cnt;
3254 	pid = attr->link_create.uprobe_multi.pid;
3255 
3256 	if (!upath || !uoffsets || !cnt || pid < 0)
3257 		return -EINVAL;
3258 	if (cnt > MAX_UPROBE_MULTI_CNT)
3259 		return -E2BIG;
3260 
3261 	uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets);
3262 	ucookies = u64_to_user_ptr(attr->link_create.uprobe_multi.cookies);
3263 
3264 	name = strndup_user(upath, PATH_MAX);
3265 	if (IS_ERR(name)) {
3266 		err = PTR_ERR(name);
3267 		return err;
3268 	}
3269 
3270 	err = kern_path(name, LOOKUP_FOLLOW, &path);
3271 	kfree(name);
3272 	if (err)
3273 		return err;
3274 
3275 	if (!d_is_reg(path.dentry)) {
3276 		err = -EBADF;
3277 		goto error_path_put;
3278 	}
3279 
3280 	if (pid) {
3281 		rcu_read_lock();
3282 		task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
3283 		rcu_read_unlock();
3284 		if (!task) {
3285 			err = -ESRCH;
3286 			goto error_path_put;
3287 		}
3288 	}
3289 
3290 	err = -ENOMEM;
3291 
3292 	link = kzalloc_obj(*link);
3293 	uprobes = kvzalloc_objs(*uprobes, cnt);
3294 
3295 	if (!uprobes || !link)
3296 		goto error_free;
3297 
3298 	for (i = 0; i < cnt; i++) {
3299 		if (__get_user(uprobes[i].offset, uoffsets + i)) {
3300 			err = -EFAULT;
3301 			goto error_free;
3302 		}
3303 		if (uprobes[i].offset < 0) {
3304 			err = -EINVAL;
3305 			goto error_free;
3306 		}
3307 		if (uref_ctr_offsets && __get_user(uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) {
3308 			err = -EFAULT;
3309 			goto error_free;
3310 		}
3311 		if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) {
3312 			err = -EFAULT;
3313 			goto error_free;
3314 		}
3315 
3316 		uprobes[i].link = link;
3317 
3318 		if (!(flags & BPF_F_UPROBE_MULTI_RETURN))
3319 			uprobes[i].consumer.handler = uprobe_multi_link_handler;
3320 		if (flags & BPF_F_UPROBE_MULTI_RETURN || is_uprobe_session(prog))
3321 			uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler;
3322 		if (is_uprobe_session(prog))
3323 			uprobes[i].session = true;
3324 		if (pid)
3325 			uprobes[i].consumer.filter = uprobe_multi_link_filter;
3326 	}
3327 
3328 	link->cnt = cnt;
3329 	link->uprobes = uprobes;
3330 	link->path = path;
3331 	link->task = task;
3332 	link->link.flags = flags;
3333 
3334 	bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI,
3335 		      &bpf_uprobe_multi_link_lops, prog, attr->link_create.attach_type);
3336 
3337 	for (i = 0; i < cnt; i++) {
3338 		uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry),
3339 						    uprobes[i].offset,
3340 						    uprobes[i].ref_ctr_offset,
3341 						    &uprobes[i].consumer);
3342 		if (IS_ERR(uprobes[i].uprobe)) {
3343 			err = PTR_ERR(uprobes[i].uprobe);
3344 			link->cnt = i;
3345 			goto error_unregister;
3346 		}
3347 	}
3348 
3349 	err = bpf_link_prime(&link->link, &link_primer);
3350 	if (err)
3351 		goto error_unregister;
3352 
3353 	return bpf_link_settle(&link_primer);
3354 
3355 error_unregister:
3356 	bpf_uprobe_unregister(uprobes, link->cnt);
3357 
3358 error_free:
3359 	kvfree(uprobes);
3360 	kfree(link);
3361 	if (task)
3362 		put_task_struct(task);
3363 error_path_put:
3364 	path_put(&path);
3365 	return err;
3366 }
3367 #else /* !CONFIG_UPROBES */
3368 int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
3369 {
3370 	return -EOPNOTSUPP;
3371 }
3372 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx)
3373 {
3374 	return 0;
3375 }
3376 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
3377 {
3378 	return 0;
3379 }
3380 #endif /* CONFIG_UPROBES */
3381 
3382 __bpf_kfunc_start_defs();
3383 
3384 __bpf_kfunc bool bpf_session_is_return(void *ctx)
3385 {
3386 	struct bpf_session_run_ctx *session_ctx;
3387 
3388 	session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx);
3389 	return session_ctx->is_return;
3390 }
3391 
3392 __bpf_kfunc __u64 *bpf_session_cookie(void *ctx)
3393 {
3394 	struct bpf_session_run_ctx *session_ctx;
3395 
3396 	session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx);
3397 	return session_ctx->data;
3398 }
3399 
3400 __bpf_kfunc_end_defs();
3401 
3402 BTF_KFUNCS_START(session_kfunc_set_ids)
3403 BTF_ID_FLAGS(func, bpf_session_is_return)
3404 BTF_ID_FLAGS(func, bpf_session_cookie)
3405 BTF_KFUNCS_END(session_kfunc_set_ids)
3406 
3407 static int bpf_session_filter(const struct bpf_prog *prog, u32 kfunc_id)
3408 {
3409 	if (!btf_id_set8_contains(&session_kfunc_set_ids, kfunc_id))
3410 		return 0;
3411 
3412 	if (!is_kprobe_session(prog) && !is_uprobe_session(prog) && !is_trace_fsession(prog))
3413 		return -EACCES;
3414 
3415 	return 0;
3416 }
3417 
3418 static const struct btf_kfunc_id_set bpf_session_kfunc_set = {
3419 	.owner = THIS_MODULE,
3420 	.set = &session_kfunc_set_ids,
3421 	.filter = bpf_session_filter,
3422 };
3423 
3424 static int __init bpf_trace_kfuncs_init(void)
3425 {
3426 	int err = 0;
3427 
3428 	err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_KPROBE, &bpf_session_kfunc_set);
3429 	err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_session_kfunc_set);
3430 
3431 	return err;
3432 }
3433 
3434 late_initcall(bpf_trace_kfuncs_init);
3435 
3436 typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk);
3437 
3438 /*
3439  * The __always_inline is to make sure the compiler doesn't
3440  * generate indirect calls into callbacks, which is expensive,
3441  * on some kernel configurations. This allows compiler to put
3442  * direct calls into all the specific callback implementations
3443  * (copy_user_data_sleepable, copy_user_data_nofault, and so on)
3444  */
3445 static __always_inline int __bpf_dynptr_copy_str(const struct bpf_dynptr *dptr, u64 doff, u64 size,
3446 						 const void *unsafe_src,
3447 						 copy_fn_t str_copy_fn,
3448 						 struct task_struct *tsk)
3449 {
3450 	const struct bpf_dynptr_kern *dst;
3451 	u64 chunk_sz, off;
3452 	void *dst_slice;
3453 	int cnt, err;
3454 	char buf[256];
3455 
3456 	dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
3457 	if (likely(dst_slice))
3458 		return str_copy_fn(dst_slice, unsafe_src, size, tsk);
3459 
3460 	dst = (struct bpf_dynptr_kern *)dptr;
3461 	if (bpf_dynptr_check_off_len(dst, doff, size))
3462 		return -E2BIG;
3463 
3464 	for (off = 0; off < size; off += chunk_sz - 1) {
3465 		chunk_sz = min_t(u64, sizeof(buf), size - off);
3466 		/* Expect str_copy_fn to return count of copied bytes, including
3467 		 * zero terminator. Next iteration increment off by chunk_sz - 1 to
3468 		 * overwrite NUL.
3469 		 */
3470 		cnt = str_copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
3471 		if (cnt < 0)
3472 			return cnt;
3473 		err = __bpf_dynptr_write(dst, doff + off, buf, cnt, 0);
3474 		if (err)
3475 			return err;
3476 		if (cnt < chunk_sz || chunk_sz == 1) /* we are done */
3477 			return off + cnt;
3478 	}
3479 	return off;
3480 }
3481 
3482 static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 doff,
3483 					     u64 size, const void *unsafe_src,
3484 					     copy_fn_t copy_fn, struct task_struct *tsk)
3485 {
3486 	const struct bpf_dynptr_kern *dst;
3487 	void *dst_slice;
3488 	char buf[256];
3489 	u64 off, chunk_sz;
3490 	int err;
3491 
3492 	dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
3493 	if (likely(dst_slice))
3494 		return copy_fn(dst_slice, unsafe_src, size, tsk);
3495 
3496 	dst = (struct bpf_dynptr_kern *)dptr;
3497 	if (bpf_dynptr_check_off_len(dst, doff, size))
3498 		return -E2BIG;
3499 
3500 	for (off = 0; off < size; off += chunk_sz) {
3501 		chunk_sz = min_t(u64, sizeof(buf), size - off);
3502 		err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
3503 		if (err)
3504 			return err;
3505 		err = __bpf_dynptr_write(dst, doff + off, buf, chunk_sz, 0);
3506 		if (err)
3507 			return err;
3508 	}
3509 	return 0;
3510 }
3511 
3512 static __always_inline int copy_user_data_nofault(void *dst, const void *unsafe_src,
3513 						  u32 size, struct task_struct *tsk)
3514 {
3515 	return copy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
3516 }
3517 
3518 static __always_inline int copy_user_data_sleepable(void *dst, const void *unsafe_src,
3519 						    u32 size, struct task_struct *tsk)
3520 {
3521 	int ret;
3522 
3523 	if (!tsk) { /* Read from the current task */
3524 		ret = copy_from_user(dst, (const void __user *)unsafe_src, size);
3525 		if (ret)
3526 			return -EFAULT;
3527 		return 0;
3528 	}
3529 
3530 	ret = access_process_vm(tsk, (unsigned long)unsafe_src, dst, size, 0);
3531 	if (ret != size)
3532 		return -EFAULT;
3533 	return 0;
3534 }
3535 
3536 static __always_inline int copy_kernel_data_nofault(void *dst, const void *unsafe_src,
3537 						    u32 size, struct task_struct *tsk)
3538 {
3539 	return copy_from_kernel_nofault(dst, unsafe_src, size);
3540 }
3541 
3542 static __always_inline int copy_user_str_nofault(void *dst, const void *unsafe_src,
3543 						 u32 size, struct task_struct *tsk)
3544 {
3545 	return strncpy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
3546 }
3547 
3548 static __always_inline int copy_user_str_sleepable(void *dst, const void *unsafe_src,
3549 						   u32 size, struct task_struct *tsk)
3550 {
3551 	int ret;
3552 
3553 	if (unlikely(size == 0))
3554 		return 0;
3555 
3556 	if (tsk) {
3557 		ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_src, dst, size, 0);
3558 	} else {
3559 		ret = strncpy_from_user(dst, (const void __user *)unsafe_src, size - 1);
3560 		/* strncpy_from_user does not guarantee NUL termination */
3561 		if (ret >= 0)
3562 			((char *)dst)[ret] = '\0';
3563 	}
3564 
3565 	if (ret < 0)
3566 		return ret;
3567 	return ret + 1;
3568 }
3569 
3570 static __always_inline int copy_kernel_str_nofault(void *dst, const void *unsafe_src,
3571 						   u32 size, struct task_struct *tsk)
3572 {
3573 	return strncpy_from_kernel_nofault(dst, unsafe_src, size);
3574 }
3575 
3576 __bpf_kfunc_start_defs();
3577 
3578 __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type,
3579 				     u64 value)
3580 {
3581 	if (type != PIDTYPE_PID && type != PIDTYPE_TGID)
3582 		return -EINVAL;
3583 
3584 	return bpf_send_signal_common(sig, type, task, value);
3585 }
3586 
3587 __bpf_kfunc int bpf_probe_read_user_dynptr(const struct bpf_dynptr *dptr, u64 off,
3588 					   u64 size, const void __user *unsafe_ptr__ign)
3589 {
3590 	return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3591 				 copy_user_data_nofault, NULL);
3592 }
3593 
3594 __bpf_kfunc int bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dptr, u64 off,
3595 					     u64 size, const void *unsafe_ptr__ign)
3596 {
3597 	return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign,
3598 				 copy_kernel_data_nofault, NULL);
3599 }
3600 
3601 __bpf_kfunc int bpf_probe_read_user_str_dynptr(const struct bpf_dynptr *dptr, u64 off,
3602 					       u64 size, const void __user *unsafe_ptr__ign)
3603 {
3604 	return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3605 				     copy_user_str_nofault, NULL);
3606 }
3607 
3608 __bpf_kfunc int bpf_probe_read_kernel_str_dynptr(const struct bpf_dynptr *dptr, u64 off,
3609 						 u64 size, const void *unsafe_ptr__ign)
3610 {
3611 	return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign,
3612 				     copy_kernel_str_nofault, NULL);
3613 }
3614 
3615 __bpf_kfunc int bpf_copy_from_user_dynptr(const struct bpf_dynptr *dptr, u64 off,
3616 					  u64 size, const void __user *unsafe_ptr__ign)
3617 {
3618 	return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3619 				 copy_user_data_sleepable, NULL);
3620 }
3621 
3622 __bpf_kfunc int bpf_copy_from_user_str_dynptr(const struct bpf_dynptr *dptr, u64 off,
3623 					      u64 size, const void __user *unsafe_ptr__ign)
3624 {
3625 	return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3626 				     copy_user_str_sleepable, NULL);
3627 }
3628 
3629 __bpf_kfunc int bpf_copy_from_user_task_dynptr(const struct bpf_dynptr *dptr, u64 off,
3630 					       u64 size, const void __user *unsafe_ptr__ign,
3631 					       struct task_struct *tsk)
3632 {
3633 	return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3634 				 copy_user_data_sleepable, tsk);
3635 }
3636 
3637 __bpf_kfunc int bpf_copy_from_user_task_str_dynptr(const struct bpf_dynptr *dptr, u64 off,
3638 						   u64 size, const void __user *unsafe_ptr__ign,
3639 						   struct task_struct *tsk)
3640 {
3641 	return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3642 				     copy_user_str_sleepable, tsk);
3643 }
3644 
3645 __bpf_kfunc_end_defs();
3646 
3647 #if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) && \
3648     defined(CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS)
3649 
3650 static void bpf_tracing_multi_link_release(struct bpf_link *link)
3651 {
3652 	struct bpf_tracing_multi_link *tr_link =
3653 		container_of(link, struct bpf_tracing_multi_link, link);
3654 
3655 	WARN_ON_ONCE(bpf_trampoline_multi_detach(link->prog, tr_link));
3656 }
3657 
3658 static void bpf_tracing_multi_link_dealloc(struct bpf_link *link)
3659 {
3660 	struct bpf_tracing_multi_link *tr_link =
3661 		container_of(link, struct bpf_tracing_multi_link, link);
3662 
3663 	kvfree(tr_link->fexits);
3664 	kvfree(tr_link->cookies);
3665 	kvfree(tr_link);
3666 }
3667 
3668 #ifdef CONFIG_PROC_FS
3669 static void bpf_tracing_multi_show_fdinfo(const struct bpf_link *link,
3670 					  struct seq_file *seq)
3671 {
3672 	struct bpf_tracing_multi_link *tr_link =
3673 		container_of(link, struct bpf_tracing_multi_link, link);
3674 	bool has_cookies = !!tr_link->cookies;
3675 
3676 	seq_printf(seq, "attach_type:\t%u\n", tr_link->link.attach_type);
3677 	seq_printf(seq, "cnt:\t%u\n", tr_link->nodes_cnt);
3678 
3679 	seq_printf(seq, "%s\t %s\t %s\t %s\n", "obj-id", "btf-id", "cookie", "func");
3680 	for (int i = 0; i < tr_link->nodes_cnt; i++) {
3681 		struct bpf_tracing_multi_node *mnode = &tr_link->nodes[i];
3682 		u32 btf_id, obj_id;
3683 
3684 		bpf_trampoline_unpack_key(mnode->trampoline->key, &obj_id, &btf_id);
3685 		seq_printf(seq, "%u\t %u\t %llu\t %pS\n",
3686 			   obj_id, btf_id,
3687 			   has_cookies ? tr_link->cookies[i] : 0,
3688 			   (void *) mnode->trampoline->ip);
3689 
3690 		cond_resched();
3691 	}
3692 }
3693 #endif
3694 
3695 static const struct bpf_link_ops bpf_tracing_multi_link_lops = {
3696 	.release = bpf_tracing_multi_link_release,
3697 	.dealloc_deferred = bpf_tracing_multi_link_dealloc,
3698 #ifdef CONFIG_PROC_FS
3699 	.show_fdinfo = bpf_tracing_multi_show_fdinfo,
3700 #endif
3701 };
3702 
3703 static int ids_cmp_r(const void *pa, const void *pb, const void *priv __maybe_unused)
3704 {
3705 	u32 a = *(u32 *) pa;
3706 	u32 b = *(u32 *) pb;
3707 
3708 	return (a > b) - (a < b);
3709 }
3710 
3711 static void ids_swap_r(void *a, void *b, int size __maybe_unused,
3712 		       const void *priv __maybe_unused)
3713 {
3714 	u64 *cookie_a, *cookie_b, *cookies;
3715 	u32 *id_a = a, *id_b = b, *ids;
3716 	void **data = (void **) priv;
3717 
3718 	ids     = data[0];
3719 	cookies = data[1];
3720 
3721 	if (cookies) {
3722 		cookie_a = cookies + (id_a - ids);
3723 		cookie_b = cookies + (id_b - ids);
3724 		swap(*cookie_a, *cookie_b);
3725 	}
3726 	swap(*id_a, *id_b);
3727 }
3728 
3729 static int check_dup_ids(u32 *ids, u64 *cookies, u32 cnt)
3730 {
3731 	void *data[2] = { ids, cookies };
3732 	int err = 0;
3733 
3734 	/*
3735 	 * Sort ids array (together with cookies array if defined)
3736 	 * and check it for duplicates. The ids and cookies arrays
3737 	 * are left sorted.
3738 	 */
3739 	sort_r_nonatomic(ids, cnt, sizeof(ids[0]), ids_cmp_r, ids_swap_r, data);
3740 
3741 	for (int i = 1; i < cnt; i++) {
3742 		if (ids[i] == ids[i - 1]) {
3743 			err = -EINVAL;
3744 			break;
3745 		}
3746 	}
3747 	return err;
3748 }
3749 
3750 int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
3751 {
3752 	struct bpf_tracing_multi_link *link = NULL;
3753 	struct bpf_tramp_node *fexits = NULL;
3754 	struct bpf_link_primer link_primer;
3755 	u32 cnt, *ids = NULL;
3756 	u64 __user *ucookies;
3757 	u64 *cookies = NULL;
3758 	u32 __user *uids;
3759 	int err;
3760 
3761 	uids = u64_to_user_ptr(attr->link_create.tracing_multi.ids);
3762 	cnt = attr->link_create.tracing_multi.cnt;
3763 
3764 	if (!cnt || !uids)
3765 		return -EINVAL;
3766 	if (cnt > MAX_TRACING_MULTI_CNT)
3767 		return -E2BIG;
3768 	if (attr->link_create.flags || attr->link_create.target_fd)
3769 		return -EINVAL;
3770 
3771 	ids = kvmalloc_objs(*ids, cnt);
3772 	if (!ids)
3773 		return -ENOMEM;
3774 
3775 	if (copy_from_user(ids, uids, cnt * sizeof(*ids))) {
3776 		err = -EFAULT;
3777 		goto error;
3778 	}
3779 
3780 	ucookies = u64_to_user_ptr(attr->link_create.tracing_multi.cookies);
3781 	if (ucookies) {
3782 		cookies = kvmalloc_objs(*cookies, cnt);
3783 		if (!cookies) {
3784 			err = -ENOMEM;
3785 			goto error;
3786 		}
3787 		if (copy_from_user(cookies, ucookies, cnt * sizeof(*cookies))) {
3788 			err = -EFAULT;
3789 			goto error;
3790 		}
3791 	}
3792 
3793 	err = check_dup_ids(ids, cookies, cnt);
3794 	if (err)
3795 		goto error;
3796 
3797 	if (prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) {
3798 		fexits = kvmalloc_objs(*fexits, cnt);
3799 		if (!fexits) {
3800 			err = -ENOMEM;
3801 			goto error;
3802 		}
3803 	}
3804 
3805 	link = kvzalloc_flex(*link, nodes, cnt);
3806 	if (!link) {
3807 		err = -ENOMEM;
3808 		goto error;
3809 	}
3810 
3811 	bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING_MULTI,
3812 		      &bpf_tracing_multi_link_lops, prog, prog->expected_attach_type);
3813 
3814 	err = bpf_link_prime(&link->link, &link_primer);
3815 	if (err)
3816 		goto error;
3817 
3818 	link->nodes_cnt = cnt;
3819 	link->cookies = cookies;
3820 	link->fexits = fexits;
3821 
3822 	err = bpf_trampoline_multi_attach(prog, ids, link);
3823 	kvfree(ids);
3824 	if (err) {
3825 		bpf_link_cleanup(&link_primer);
3826 		return err;
3827 	}
3828 	return bpf_link_settle(&link_primer);
3829 
3830 error:
3831 	kvfree(fexits);
3832 	kvfree(cookies);
3833 	kvfree(ids);
3834 	kvfree(link);
3835 	return err;
3836 }
3837 
3838 #else
3839 
3840 int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
3841 {
3842 	return -EOPNOTSUPP;
3843 }
3844 
3845 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS && CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
3846