xref: /linux/kernel/trace/bpf_trace.c (revision 26330a9226417c9a3395db9fdb403f7d7371e6b7)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
3  * Copyright (c) 2016 Facebook
4  */
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/slab.h>
8 #include <linux/bpf.h>
9 #include <linux/bpf_verifier.h>
10 #include <linux/bpf_perf_event.h>
11 #include <linux/btf.h>
12 #include <linux/filter.h>
13 #include <linux/uaccess.h>
14 #include <linux/ctype.h>
15 #include <linux/kprobes.h>
16 #include <linux/spinlock.h>
17 #include <linux/syscalls.h>
18 #include <linux/error-injection.h>
19 #include <linux/btf_ids.h>
20 #include <linux/bpf_lsm.h>
21 #include <linux/fprobe.h>
22 #include <linux/bsearch.h>
23 #include <linux/sort.h>
24 #include <linux/key.h>
25 #include <linux/namei.h>
26 #include <linux/file.h>
27 
28 #include <net/bpf_sk_storage.h>
29 
30 #include <uapi/linux/bpf.h>
31 #include <uapi/linux/btf.h>
32 
33 #include <asm/tlb.h>
34 
35 #include "trace_probe.h"
36 #include "trace.h"
37 
38 #define CREATE_TRACE_POINTS
39 #include "bpf_trace.h"
40 
41 #define bpf_event_rcu_dereference(p)					\
42 	rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
43 
44 #define MAX_UPROBE_MULTI_CNT (1U << 20)
45 #define MAX_KPROBE_MULTI_CNT (1U << 20)
46 #define MAX_TRACING_MULTI_CNT (1U << 20)
47 
48 #ifdef CONFIG_MODULES
49 struct bpf_trace_module {
50 	struct module *module;
51 	struct list_head list;
52 };
53 
54 static LIST_HEAD(bpf_trace_modules);
55 static DEFINE_MUTEX(bpf_module_mutex);
56 
57 static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
58 {
59 	struct bpf_raw_event_map *btp, *ret = NULL;
60 	struct bpf_trace_module *btm;
61 	unsigned int i;
62 
63 	mutex_lock(&bpf_module_mutex);
64 	list_for_each_entry(btm, &bpf_trace_modules, list) {
65 		for (i = 0; i < btm->module->num_bpf_raw_events; ++i) {
66 			btp = &btm->module->bpf_raw_events[i];
67 			if (!strcmp(btp->tp->name, name)) {
68 				if (try_module_get(btm->module))
69 					ret = btp;
70 				goto out;
71 			}
72 		}
73 	}
74 out:
75 	mutex_unlock(&bpf_module_mutex);
76 	return ret;
77 }
78 #else
79 static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
80 {
81 	return NULL;
82 }
83 #endif /* CONFIG_MODULES */
84 
85 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
86 u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
87 
88 static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
89 				  u64 flags, const struct btf **btf,
90 				  s32 *btf_id);
91 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx);
92 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx);
93 
94 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx);
95 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx);
96 
97 /**
98  * trace_call_bpf - invoke BPF program
99  * @call: tracepoint event
100  * @ctx: opaque context pointer
101  *
102  * kprobe handlers execute BPF programs via this helper.
103  * Can be used from static tracepoints in the future.
104  *
105  * Return: BPF programs always return an integer which is interpreted by
106  * kprobe handler as:
107  * 0 - return from kprobe (event is filtered out)
108  * 1 - store kprobe event into ring buffer
109  * Other values are reserved and currently alias to 1
110  */
111 unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
112 {
113 	unsigned int ret;
114 
115 	cant_sleep();
116 
117 	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
118 		/*
119 		 * since some bpf program is already running on this cpu,
120 		 * don't call into another bpf program (same or different)
121 		 * and don't send kprobe event into ring-buffer,
122 		 * so return zero here
123 		 */
124 		rcu_read_lock();
125 		bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array));
126 		rcu_read_unlock();
127 		ret = 0;
128 		goto out;
129 	}
130 
131 	/*
132 	 * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
133 	 * to all call sites, we did a bpf_prog_array_valid() there to check
134 	 * whether call->prog_array is empty or not, which is
135 	 * a heuristic to speed up execution.
136 	 *
137 	 * If bpf_prog_array_valid() fetched prog_array was
138 	 * non-NULL, we go into trace_call_bpf() and do the actual
139 	 * proper rcu_dereference() under RCU lock.
140 	 * If it turns out that prog_array is NULL then, we bail out.
141 	 * For the opposite, if the bpf_prog_array_valid() fetched pointer
142 	 * was NULL, you'll skip the prog_array with the risk of missing
143 	 * out of events when it was updated in between this and the
144 	 * rcu_dereference() which is accepted risk.
145 	 */
146 	rcu_read_lock();
147 	ret = bpf_prog_run_array(rcu_dereference(call->prog_array),
148 				 ctx, bpf_prog_run);
149 	rcu_read_unlock();
150 
151  out:
152 	__this_cpu_dec(bpf_prog_active);
153 
154 	return ret;
155 }
156 
157 /**
158  * trace_call_bpf_faultable - invoke BPF program in faultable context
159  * @call: tracepoint event
160  * @ctx: opaque context pointer
161  *
162  * Variant of trace_call_bpf() for faultable tracepoints (syscall
163  * tracepoints). Supports sleepable BPF programs by using rcu_tasks_trace
164  * for lifetime protection and bpf_prog_run_array_sleepable() for per-program
165  * RCU flavor selection, following the uprobe pattern.
166  *
167  * Per-program recursion protection is provided by
168  * bpf_prog_run_array_sleepable(). Global bpf_prog_active is not
169  * needed because syscall tracepoints cannot self-recurse.
170  *
171  * Must be called from a faultable/preemptible context.
172  */
173 unsigned int trace_call_bpf_faultable(struct trace_event_call *call, void *ctx)
174 {
175 	struct bpf_prog_array *prog_array;
176 
177 	might_fault();
178 	guard(rcu_tasks_trace)();
179 
180 	prog_array = rcu_dereference_check(call->prog_array,
181 					   rcu_read_lock_trace_held());
182 	return bpf_prog_run_array_sleepable(prog_array, ctx, bpf_prog_run);
183 }
184 
185 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
186 BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
187 {
188 	regs_set_return_value(regs, rc);
189 	override_function_with_return(regs);
190 	return 0;
191 }
192 
193 static const struct bpf_func_proto bpf_override_return_proto = {
194 	.func		= bpf_override_return,
195 	.gpl_only	= true,
196 	.ret_type	= RET_INTEGER,
197 	.arg1_type	= ARG_PTR_TO_CTX,
198 	.arg2_type	= ARG_ANYTHING,
199 };
200 #endif
201 
202 static __always_inline int
203 bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr)
204 {
205 	int ret;
206 
207 	ret = copy_from_user_nofault(dst, unsafe_ptr, size);
208 	if (unlikely(ret < 0))
209 		memset(dst, 0, size);
210 	return ret;
211 }
212 
213 BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size,
214 	   const void __user *, unsafe_ptr)
215 {
216 	return bpf_probe_read_user_common(dst, size, unsafe_ptr);
217 }
218 
219 const struct bpf_func_proto bpf_probe_read_user_proto = {
220 	.func		= bpf_probe_read_user,
221 	.gpl_only	= true,
222 	.ret_type	= RET_INTEGER,
223 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
224 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
225 	.arg3_type	= ARG_ANYTHING,
226 };
227 
228 static __always_inline int
229 bpf_probe_read_user_str_common(void *dst, u32 size,
230 			       const void __user *unsafe_ptr)
231 {
232 	int ret;
233 
234 	/*
235 	 * NB: We rely on strncpy_from_user() not copying junk past the NUL
236 	 * terminator into `dst`.
237 	 *
238 	 * strncpy_from_user() does long-sized strides in the fast path. If the
239 	 * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
240 	 * then there could be junk after the NUL in `dst`. If user takes `dst`
241 	 * and keys a hash map with it, then semantically identical strings can
242 	 * occupy multiple entries in the map.
243 	 */
244 	ret = strncpy_from_user_nofault(dst, unsafe_ptr, size);
245 	if (unlikely(ret < 0))
246 		memset(dst, 0, size);
247 	return ret;
248 }
249 
250 BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size,
251 	   const void __user *, unsafe_ptr)
252 {
253 	return bpf_probe_read_user_str_common(dst, size, unsafe_ptr);
254 }
255 
256 const struct bpf_func_proto bpf_probe_read_user_str_proto = {
257 	.func		= bpf_probe_read_user_str,
258 	.gpl_only	= true,
259 	.ret_type	= RET_INTEGER,
260 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
261 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
262 	.arg3_type	= ARG_ANYTHING,
263 };
264 
265 BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
266 	   const void *, unsafe_ptr)
267 {
268 	return bpf_probe_read_kernel_common(dst, size, unsafe_ptr);
269 }
270 
271 const struct bpf_func_proto bpf_probe_read_kernel_proto = {
272 	.func		= bpf_probe_read_kernel,
273 	.gpl_only	= true,
274 	.ret_type	= RET_INTEGER,
275 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
276 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
277 	.arg3_type	= ARG_ANYTHING,
278 };
279 
280 static __always_inline int
281 bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr)
282 {
283 	int ret;
284 
285 	/*
286 	 * The strncpy_from_kernel_nofault() call will likely not fill the
287 	 * entire buffer, but that's okay in this circumstance as we're probing
288 	 * arbitrary memory anyway similar to bpf_probe_read_*() and might
289 	 * as well probe the stack. Thus, memory is explicitly cleared
290 	 * only in error case, so that improper users ignoring return
291 	 * code altogether don't copy garbage; otherwise length of string
292 	 * is returned that can be used for bpf_perf_event_output() et al.
293 	 */
294 	ret = strncpy_from_kernel_nofault(dst, unsafe_ptr, size);
295 	if (unlikely(ret < 0))
296 		memset(dst, 0, size);
297 	return ret;
298 }
299 
300 BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size,
301 	   const void *, unsafe_ptr)
302 {
303 	return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr);
304 }
305 
306 const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
307 	.func		= bpf_probe_read_kernel_str,
308 	.gpl_only	= true,
309 	.ret_type	= RET_INTEGER,
310 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
311 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
312 	.arg3_type	= ARG_ANYTHING,
313 };
314 
315 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
316 BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
317 	   const void *, unsafe_ptr)
318 {
319 	if ((unsigned long)unsafe_ptr < TASK_SIZE) {
320 		return bpf_probe_read_user_common(dst, size,
321 				(__force void __user *)unsafe_ptr);
322 	}
323 	return bpf_probe_read_kernel_common(dst, size, unsafe_ptr);
324 }
325 
326 static const struct bpf_func_proto bpf_probe_read_compat_proto = {
327 	.func		= bpf_probe_read_compat,
328 	.gpl_only	= true,
329 	.ret_type	= RET_INTEGER,
330 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
331 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
332 	.arg3_type	= ARG_ANYTHING,
333 };
334 
335 BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size,
336 	   const void *, unsafe_ptr)
337 {
338 	if ((unsigned long)unsafe_ptr < TASK_SIZE) {
339 		return bpf_probe_read_user_str_common(dst, size,
340 				(__force void __user *)unsafe_ptr);
341 	}
342 	return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr);
343 }
344 
345 static const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
346 	.func		= bpf_probe_read_compat_str,
347 	.gpl_only	= true,
348 	.ret_type	= RET_INTEGER,
349 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
350 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
351 	.arg3_type	= ARG_ANYTHING,
352 };
353 #endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */
354 
355 BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
356 	   u32, size)
357 {
358 	/*
359 	 * Ensure we're in user context which is safe for the helper to
360 	 * run. This helper has no business in a kthread.
361 	 *
362 	 * access_ok() should prevent writing to non-user memory, but in
363 	 * some situations (nommu, temporary switch, etc) access_ok() does
364 	 * not provide enough validation, hence the check on KERNEL_DS.
365 	 *
366 	 * nmi_uaccess_okay() ensures the probe is not run in an interim
367 	 * state, when the task or mm are switched. This is specifically
368 	 * required to prevent the use of temporary mm.
369 	 */
370 
371 	if (unlikely(in_interrupt() ||
372 		     current->flags & (PF_KTHREAD | PF_EXITING)))
373 		return -EPERM;
374 	if (unlikely(!nmi_uaccess_okay()))
375 		return -EPERM;
376 
377 	return copy_to_user_nofault(unsafe_ptr, src, size);
378 }
379 
380 static const struct bpf_func_proto bpf_probe_write_user_proto = {
381 	.func		= bpf_probe_write_user,
382 	.gpl_only	= true,
383 	.ret_type	= RET_INTEGER,
384 	.arg1_type	= ARG_ANYTHING,
385 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
386 	.arg3_type	= ARG_CONST_SIZE,
387 };
388 
389 #define MAX_TRACE_PRINTK_VARARGS	3
390 #define BPF_TRACE_PRINTK_SIZE		1024
391 
392 BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
393 	   u64, arg2, u64, arg3)
394 {
395 	u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
396 	struct bpf_bprintf_data data = {
397 		.get_bin_args	= true,
398 		.get_buf	= true,
399 	};
400 	int ret;
401 
402 	ret = bpf_bprintf_prepare(fmt, fmt_size, args,
403 				  MAX_TRACE_PRINTK_VARARGS, &data);
404 	if (ret < 0)
405 		return ret;
406 
407 	ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args);
408 
409 	trace_bpf_trace_printk(data.buf);
410 
411 	bpf_bprintf_cleanup(&data);
412 
413 	return ret;
414 }
415 
416 static const struct bpf_func_proto bpf_trace_printk_proto = {
417 	.func		= bpf_trace_printk,
418 	.gpl_only	= true,
419 	.ret_type	= RET_INTEGER,
420 	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
421 	.arg2_type	= ARG_CONST_SIZE,
422 };
423 
424 static void __set_printk_clr_event(struct work_struct *work)
425 {
426 	/*
427 	 * This program might be calling bpf_trace_printk,
428 	 * so enable the associated bpf_trace/bpf_trace_printk event.
429 	 * Repeat this each time as it is possible a user has
430 	 * disabled bpf_trace_printk events.  By loading a program
431 	 * calling bpf_trace_printk() however the user has expressed
432 	 * the intent to see such events.
433 	 */
434 	if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
435 		pr_warn_ratelimited("could not enable bpf_trace_printk events");
436 }
437 static DECLARE_WORK(set_printk_work, __set_printk_clr_event);
438 
439 const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
440 {
441 	schedule_work(&set_printk_work);
442 	return &bpf_trace_printk_proto;
443 }
444 
445 BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args,
446 	   u32, data_len)
447 {
448 	struct bpf_bprintf_data data = {
449 		.get_bin_args	= true,
450 		.get_buf	= true,
451 	};
452 	int ret, num_args;
453 
454 	if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
455 	    (data_len && !args))
456 		return -EINVAL;
457 	num_args = data_len / 8;
458 
459 	ret = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data);
460 	if (ret < 0)
461 		return ret;
462 
463 	ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args);
464 
465 	trace_bpf_trace_printk(data.buf);
466 
467 	bpf_bprintf_cleanup(&data);
468 
469 	return ret;
470 }
471 
472 static const struct bpf_func_proto bpf_trace_vprintk_proto = {
473 	.func		= bpf_trace_vprintk,
474 	.gpl_only	= true,
475 	.ret_type	= RET_INTEGER,
476 	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
477 	.arg2_type	= ARG_CONST_SIZE,
478 	.arg3_type	= ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
479 	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
480 };
481 
482 const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void)
483 {
484 	schedule_work(&set_printk_work);
485 	return &bpf_trace_vprintk_proto;
486 }
487 
488 BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
489 	   const void *, args, u32, data_len)
490 {
491 	struct bpf_bprintf_data data = {
492 		.get_bin_args	= true,
493 	};
494 	int err, num_args;
495 
496 	if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
497 	    (data_len && !args))
498 		return -EINVAL;
499 	num_args = data_len / 8;
500 
501 	err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data);
502 	if (err < 0)
503 		return err;
504 
505 	seq_bprintf(m, fmt, data.bin_args);
506 
507 	bpf_bprintf_cleanup(&data);
508 
509 	return seq_has_overflowed(m) ? -EOVERFLOW : 0;
510 }
511 
512 BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file)
513 
514 static const struct bpf_func_proto bpf_seq_printf_proto = {
515 	.func		= bpf_seq_printf,
516 	.gpl_only	= true,
517 	.ret_type	= RET_INTEGER,
518 	.arg1_type	= ARG_PTR_TO_BTF_ID,
519 	.arg1_btf_id	= &btf_seq_file_ids[0],
520 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
521 	.arg3_type	= ARG_CONST_SIZE,
522 	.arg4_type      = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
523 	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
524 };
525 
526 BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len)
527 {
528 	return seq_write(m, data, len) ? -EOVERFLOW : 0;
529 }
530 
531 static const struct bpf_func_proto bpf_seq_write_proto = {
532 	.func		= bpf_seq_write,
533 	.gpl_only	= true,
534 	.ret_type	= RET_INTEGER,
535 	.arg1_type	= ARG_PTR_TO_BTF_ID,
536 	.arg1_btf_id	= &btf_seq_file_ids[0],
537 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
538 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
539 };
540 
541 BPF_CALL_4(bpf_seq_printf_btf, struct seq_file *, m, struct btf_ptr *, ptr,
542 	   u32, btf_ptr_size, u64, flags)
543 {
544 	const struct btf *btf;
545 	s32 btf_id;
546 	int ret;
547 
548 	ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
549 	if (ret)
550 		return ret;
551 
552 	return btf_type_seq_show_flags(btf, btf_id, ptr->ptr, m, flags);
553 }
554 
555 static const struct bpf_func_proto bpf_seq_printf_btf_proto = {
556 	.func		= bpf_seq_printf_btf,
557 	.gpl_only	= true,
558 	.ret_type	= RET_INTEGER,
559 	.arg1_type	= ARG_PTR_TO_BTF_ID,
560 	.arg1_btf_id	= &btf_seq_file_ids[0],
561 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
562 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
563 	.arg4_type	= ARG_ANYTHING,
564 };
565 
566 static __always_inline int
567 get_map_perf_counter(struct bpf_map *map, u64 flags,
568 		     u64 *value, u64 *enabled, u64 *running)
569 {
570 	struct bpf_array *array = container_of(map, struct bpf_array, map);
571 	unsigned int cpu = smp_processor_id();
572 	u64 index = flags & BPF_F_INDEX_MASK;
573 	struct bpf_event_entry *ee;
574 
575 	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
576 		return -EINVAL;
577 	if (index == BPF_F_CURRENT_CPU)
578 		index = cpu;
579 	if (unlikely(index >= array->map.max_entries))
580 		return -E2BIG;
581 
582 	ee = READ_ONCE(array->ptrs[index]);
583 	if (!ee)
584 		return -ENOENT;
585 
586 	return perf_event_read_local(ee->event, value, enabled, running);
587 }
588 
589 BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
590 {
591 	u64 value = 0;
592 	int err;
593 
594 	err = get_map_perf_counter(map, flags, &value, NULL, NULL);
595 	/*
596 	 * this api is ugly since we miss [-22..-2] range of valid
597 	 * counter values, but that's uapi
598 	 */
599 	if (err)
600 		return err;
601 	return value;
602 }
603 
604 const struct bpf_func_proto bpf_perf_event_read_proto = {
605 	.func		= bpf_perf_event_read,
606 	.gpl_only	= true,
607 	.ret_type	= RET_INTEGER,
608 	.arg1_type	= ARG_CONST_MAP_PTR,
609 	.arg2_type	= ARG_ANYTHING,
610 };
611 
612 BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags,
613 	   struct bpf_perf_event_value *, buf, u32, size)
614 {
615 	int err = -EINVAL;
616 
617 	if (unlikely(size != sizeof(struct bpf_perf_event_value)))
618 		goto clear;
619 	err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled,
620 				   &buf->running);
621 	if (unlikely(err))
622 		goto clear;
623 	return 0;
624 clear:
625 	memset(buf, 0, size);
626 	return err;
627 }
628 
629 static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
630 	.func		= bpf_perf_event_read_value,
631 	.gpl_only	= true,
632 	.ret_type	= RET_INTEGER,
633 	.arg1_type	= ARG_CONST_MAP_PTR,
634 	.arg2_type	= ARG_ANYTHING,
635 	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
636 	.arg4_type	= ARG_CONST_SIZE,
637 };
638 
639 const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void)
640 {
641 	return &bpf_perf_event_read_value_proto;
642 }
643 
644 static __always_inline u64
645 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
646 			u64 flags, struct perf_raw_record *raw,
647 			struct perf_sample_data *sd)
648 {
649 	struct bpf_array *array = container_of(map, struct bpf_array, map);
650 	unsigned int cpu = smp_processor_id();
651 	u64 index = flags & BPF_F_INDEX_MASK;
652 	struct bpf_event_entry *ee;
653 	struct perf_event *event;
654 
655 	if (index == BPF_F_CURRENT_CPU)
656 		index = cpu;
657 	if (unlikely(index >= array->map.max_entries))
658 		return -E2BIG;
659 
660 	ee = READ_ONCE(array->ptrs[index]);
661 	if (!ee)
662 		return -ENOENT;
663 
664 	event = ee->event;
665 	if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
666 		     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
667 		return -EINVAL;
668 
669 	if (unlikely(event->oncpu != cpu))
670 		return -EOPNOTSUPP;
671 
672 	perf_sample_save_raw_data(sd, event, raw);
673 
674 	return perf_event_output(event, sd, regs);
675 }
676 
677 /*
678  * Support executing tracepoints in normal, irq, and nmi context that each call
679  * bpf_perf_event_output
680  */
681 struct bpf_trace_sample_data {
682 	struct perf_sample_data sds[3];
683 };
684 
685 static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds);
686 static DEFINE_PER_CPU(int, bpf_trace_nest_level);
687 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
688 	   u64, flags, void *, data, u64, size)
689 {
690 	struct bpf_trace_sample_data *sds;
691 	struct perf_raw_record raw = {
692 		.frag = {
693 			.size = size,
694 			.data = data,
695 		},
696 	};
697 	struct perf_sample_data *sd;
698 	int nest_level, err;
699 
700 	preempt_disable();
701 	sds = this_cpu_ptr(&bpf_trace_sds);
702 	nest_level = this_cpu_inc_return(bpf_trace_nest_level);
703 
704 	if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
705 		err = -EBUSY;
706 		goto out;
707 	}
708 
709 	sd = &sds->sds[nest_level - 1];
710 
711 	if (unlikely(flags & ~(BPF_F_INDEX_MASK))) {
712 		err = -EINVAL;
713 		goto out;
714 	}
715 
716 	perf_sample_data_init(sd, 0, 0);
717 
718 	err = __bpf_perf_event_output(regs, map, flags, &raw, sd);
719 out:
720 	this_cpu_dec(bpf_trace_nest_level);
721 	preempt_enable();
722 	return err;
723 }
724 
725 static const struct bpf_func_proto bpf_perf_event_output_proto = {
726 	.func		= bpf_perf_event_output,
727 	.gpl_only	= true,
728 	.ret_type	= RET_INTEGER,
729 	.arg1_type	= ARG_PTR_TO_CTX,
730 	.arg2_type	= ARG_CONST_MAP_PTR,
731 	.arg3_type	= ARG_ANYTHING,
732 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
733 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
734 };
735 
736 static DEFINE_PER_CPU(int, bpf_event_output_nest_level);
737 struct bpf_nested_pt_regs {
738 	struct pt_regs regs[3];
739 };
740 static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs);
741 static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds);
742 
743 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
744 		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
745 {
746 	struct perf_raw_frag frag = {
747 		.copy		= ctx_copy,
748 		.size		= ctx_size,
749 		.data		= ctx,
750 	};
751 	struct perf_raw_record raw = {
752 		.frag = {
753 			{
754 				.next	= ctx_size ? &frag : NULL,
755 			},
756 			.size	= meta_size,
757 			.data	= meta,
758 		},
759 	};
760 	struct perf_sample_data *sd;
761 	struct pt_regs *regs;
762 	int nest_level;
763 	u64 ret;
764 
765 	preempt_disable();
766 	nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
767 
768 	if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
769 		ret = -EBUSY;
770 		goto out;
771 	}
772 	sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]);
773 	regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]);
774 
775 	perf_fetch_caller_regs(regs);
776 	perf_sample_data_init(sd, 0, 0);
777 
778 	ret = __bpf_perf_event_output(regs, map, flags, &raw, sd);
779 out:
780 	this_cpu_dec(bpf_event_output_nest_level);
781 	preempt_enable();
782 	return ret;
783 }
784 
785 BPF_CALL_0(bpf_get_current_task)
786 {
787 	return (long) current;
788 }
789 
790 const struct bpf_func_proto bpf_get_current_task_proto = {
791 	.func		= bpf_get_current_task,
792 	.gpl_only	= true,
793 	.ret_type	= RET_INTEGER,
794 };
795 
796 BPF_CALL_0(bpf_get_current_task_btf)
797 {
798 	return (unsigned long) current;
799 }
800 
801 const struct bpf_func_proto bpf_get_current_task_btf_proto = {
802 	.func		= bpf_get_current_task_btf,
803 	.gpl_only	= true,
804 	.ret_type	= RET_PTR_TO_BTF_ID_TRUSTED,
805 	.ret_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
806 };
807 
808 BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task)
809 {
810 	return (unsigned long) task_pt_regs(task);
811 }
812 
813 BTF_ID_LIST_SINGLE(bpf_task_pt_regs_ids, struct, pt_regs)
814 
815 const struct bpf_func_proto bpf_task_pt_regs_proto = {
816 	.func		= bpf_task_pt_regs,
817 	.gpl_only	= true,
818 	.arg1_type	= ARG_PTR_TO_BTF_ID,
819 	.arg1_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
820 	.ret_type	= RET_PTR_TO_BTF_ID,
821 	.ret_btf_id	= &bpf_task_pt_regs_ids[0],
822 };
823 
824 struct send_signal_irq_work {
825 	struct irq_work irq_work;
826 	struct task_struct *task;
827 	u32 sig;
828 	enum pid_type type;
829 	bool has_siginfo;
830 	struct kernel_siginfo info;
831 };
832 
833 static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
834 
835 static void do_bpf_send_signal(struct irq_work *entry)
836 {
837 	struct send_signal_irq_work *work;
838 	struct kernel_siginfo *siginfo;
839 
840 	work = container_of(entry, struct send_signal_irq_work, irq_work);
841 	siginfo = work->has_siginfo ? &work->info : SEND_SIG_PRIV;
842 
843 	group_send_sig_info(work->sig, siginfo, work->task, work->type);
844 	put_task_struct(work->task);
845 }
846 
847 static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struct *task, u64 value)
848 {
849 	struct send_signal_irq_work *work = NULL;
850 	struct kernel_siginfo info;
851 	struct kernel_siginfo *siginfo;
852 
853 	if (!task) {
854 		task = current;
855 		siginfo = SEND_SIG_PRIV;
856 	} else {
857 		clear_siginfo(&info);
858 		info.si_signo = sig;
859 		info.si_errno = 0;
860 		info.si_code = SI_KERNEL;
861 		info.si_pid = 0;
862 		info.si_uid = 0;
863 		info.si_value.sival_ptr = (void __user __force *)(unsigned long)value;
864 		siginfo = &info;
865 	}
866 
867 	/* Similar to bpf_probe_write_user, task needs to be
868 	 * in a sound condition and kernel memory access be
869 	 * permitted in order to send signal to the current
870 	 * task.
871 	 */
872 	if (unlikely(task->flags & (PF_KTHREAD | PF_EXITING)))
873 		return -EPERM;
874 	if (unlikely(!nmi_uaccess_okay()))
875 		return -EPERM;
876 	/* Task should not be pid=1 to avoid kernel panic. */
877 	if (unlikely(is_global_init(task)))
878 		return -EPERM;
879 
880 	if (preempt_count() != 0 || irqs_disabled()) {
881 		/* Do an early check on signal validity. Otherwise,
882 		 * the error is lost in deferred irq_work.
883 		 */
884 		if (unlikely(!valid_signal(sig)))
885 			return -EINVAL;
886 
887 		work = this_cpu_ptr(&send_signal_work);
888 		if (irq_work_is_busy(&work->irq_work))
889 			return -EBUSY;
890 
891 		/* Add the current task, which is the target of sending signal,
892 		 * to the irq_work. The current task may change when queued
893 		 * irq works get executed.
894 		 */
895 		work->task = get_task_struct(task);
896 		work->has_siginfo = siginfo == &info;
897 		if (work->has_siginfo)
898 			copy_siginfo(&work->info, &info);
899 		work->sig = sig;
900 		work->type = type;
901 		irq_work_queue(&work->irq_work);
902 		return 0;
903 	}
904 
905 	return group_send_sig_info(sig, siginfo, task, type);
906 }
907 
908 BPF_CALL_1(bpf_send_signal, u32, sig)
909 {
910 	return bpf_send_signal_common(sig, PIDTYPE_TGID, NULL, 0);
911 }
912 
913 const struct bpf_func_proto bpf_send_signal_proto = {
914 	.func		= bpf_send_signal,
915 	.gpl_only	= false,
916 	.ret_type	= RET_INTEGER,
917 	.arg1_type	= ARG_ANYTHING,
918 };
919 
920 BPF_CALL_1(bpf_send_signal_thread, u32, sig)
921 {
922 	return bpf_send_signal_common(sig, PIDTYPE_PID, NULL, 0);
923 }
924 
925 const struct bpf_func_proto bpf_send_signal_thread_proto = {
926 	.func		= bpf_send_signal_thread,
927 	.gpl_only	= false,
928 	.ret_type	= RET_INTEGER,
929 	.arg1_type	= ARG_ANYTHING,
930 };
931 
932 BPF_CALL_3(bpf_d_path, const struct path *, path, char *, buf, u32, sz)
933 {
934 	struct path copy;
935 	long len;
936 	char *p;
937 
938 	if (!sz)
939 		return 0;
940 
941 	/*
942 	 * The path pointer is verified as trusted and safe to use,
943 	 * but let's double check it's valid anyway to workaround
944 	 * potentially broken verifier.
945 	 */
946 	len = copy_from_kernel_nofault(&copy, path, sizeof(*path));
947 	if (len < 0)
948 		return len;
949 
950 	p = d_path(&copy, buf, sz);
951 	if (IS_ERR(p)) {
952 		len = PTR_ERR(p);
953 	} else {
954 		len = buf + sz - p;
955 		memmove(buf, p, len);
956 	}
957 
958 	return len;
959 }
960 
961 BTF_SET_START(btf_allowlist_d_path)
962 #ifdef CONFIG_SECURITY
963 BTF_ID(func, security_file_permission)
964 BTF_ID(func, security_inode_getattr)
965 BTF_ID(func, security_file_open)
966 #endif
967 #ifdef CONFIG_SECURITY_PATH
968 BTF_ID(func, security_path_truncate)
969 #endif
970 BTF_ID(func, vfs_truncate)
971 BTF_ID(func, vfs_fallocate)
972 BTF_ID(func, dentry_open)
973 BTF_ID(func, vfs_getattr)
974 BTF_ID(func, filp_close)
975 BTF_SET_END(btf_allowlist_d_path)
976 
977 static bool bpf_d_path_allowed(const struct bpf_prog *prog)
978 {
979 	if (prog->type == BPF_PROG_TYPE_TRACING &&
980 	    prog->expected_attach_type == BPF_TRACE_ITER)
981 		return true;
982 
983 	if (prog->type == BPF_PROG_TYPE_LSM)
984 		return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id);
985 
986 	return btf_id_set_contains(&btf_allowlist_d_path,
987 				   prog->aux->attach_btf_id);
988 }
989 
990 BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path)
991 
992 static const struct bpf_func_proto bpf_d_path_proto = {
993 	.func		= bpf_d_path,
994 	.gpl_only	= false,
995 	.ret_type	= RET_INTEGER,
996 	.arg1_type	= ARG_PTR_TO_BTF_ID,
997 	.arg1_btf_id	= &bpf_d_path_btf_ids[0],
998 	.arg2_type	= ARG_PTR_TO_MEM | MEM_WRITE,
999 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
1000 	.allowed	= bpf_d_path_allowed,
1001 };
1002 
1003 #define BTF_F_ALL	(BTF_F_COMPACT  | BTF_F_NONAME | \
1004 			 BTF_F_PTR_RAW | BTF_F_ZERO)
1005 
1006 static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
1007 				  u64 flags, const struct btf **btf,
1008 				  s32 *btf_id)
1009 {
1010 	const struct btf_type *t;
1011 
1012 	if (unlikely(flags & ~(BTF_F_ALL)))
1013 		return -EINVAL;
1014 
1015 	if (btf_ptr_size != sizeof(struct btf_ptr))
1016 		return -EINVAL;
1017 
1018 	*btf = bpf_get_btf_vmlinux();
1019 
1020 	if (IS_ERR_OR_NULL(*btf))
1021 		return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL;
1022 
1023 	if (ptr->type_id > 0)
1024 		*btf_id = ptr->type_id;
1025 	else
1026 		return -EINVAL;
1027 
1028 	if (*btf_id > 0)
1029 		t = btf_type_by_id(*btf, *btf_id);
1030 	if (*btf_id <= 0 || !t)
1031 		return -ENOENT;
1032 
1033 	return 0;
1034 }
1035 
1036 BPF_CALL_5(bpf_snprintf_btf, char *, str, u32, str_size, struct btf_ptr *, ptr,
1037 	   u32, btf_ptr_size, u64, flags)
1038 {
1039 	const struct btf *btf;
1040 	s32 btf_id;
1041 	int ret;
1042 
1043 	ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
1044 	if (ret)
1045 		return ret;
1046 
1047 	return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size,
1048 				      flags);
1049 }
1050 
1051 const struct bpf_func_proto bpf_snprintf_btf_proto = {
1052 	.func		= bpf_snprintf_btf,
1053 	.gpl_only	= false,
1054 	.ret_type	= RET_INTEGER,
1055 	.arg1_type	= ARG_PTR_TO_MEM | MEM_WRITE,
1056 	.arg2_type	= ARG_CONST_SIZE,
1057 	.arg3_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
1058 	.arg4_type	= ARG_CONST_SIZE,
1059 	.arg5_type	= ARG_ANYTHING,
1060 };
1061 
1062 BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx)
1063 {
1064 	/* This helper call is inlined by verifier. */
1065 	return ((u64 *)ctx)[-2];
1066 }
1067 
1068 static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = {
1069 	.func		= bpf_get_func_ip_tracing,
1070 	.gpl_only	= true,
1071 	.ret_type	= RET_INTEGER,
1072 	.arg1_type	= ARG_PTR_TO_CTX,
1073 };
1074 
1075 static inline unsigned long get_entry_ip(unsigned long fentry_ip)
1076 {
1077 #ifdef CONFIG_X86_KERNEL_IBT
1078 	if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE)))
1079 		fentry_ip -= ENDBR_INSN_SIZE;
1080 #endif
1081 	return fentry_ip;
1082 }
1083 
1084 BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs)
1085 {
1086 	struct bpf_trace_run_ctx *run_ctx __maybe_unused;
1087 	struct kprobe *kp;
1088 
1089 #ifdef CONFIG_UPROBES
1090 	run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
1091 	if (run_ctx->is_uprobe)
1092 		return ((struct uprobe_dispatch_data *)current->utask->vaddr)->bp_addr;
1093 #endif
1094 
1095 	kp = kprobe_running();
1096 
1097 	if (!kp || !(kp->flags & KPROBE_FLAG_ON_FUNC_ENTRY))
1098 		return 0;
1099 
1100 	return get_entry_ip((uintptr_t)kp->addr);
1101 }
1102 
1103 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = {
1104 	.func		= bpf_get_func_ip_kprobe,
1105 	.gpl_only	= true,
1106 	.ret_type	= RET_INTEGER,
1107 	.arg1_type	= ARG_PTR_TO_CTX,
1108 };
1109 
1110 BPF_CALL_1(bpf_get_func_ip_kprobe_multi, struct pt_regs *, regs)
1111 {
1112 	return bpf_kprobe_multi_entry_ip(current->bpf_ctx);
1113 }
1114 
1115 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe_multi = {
1116 	.func		= bpf_get_func_ip_kprobe_multi,
1117 	.gpl_only	= false,
1118 	.ret_type	= RET_INTEGER,
1119 	.arg1_type	= ARG_PTR_TO_CTX,
1120 };
1121 
1122 BPF_CALL_1(bpf_get_attach_cookie_kprobe_multi, struct pt_regs *, regs)
1123 {
1124 	return bpf_kprobe_multi_cookie(current->bpf_ctx);
1125 }
1126 
1127 static const struct bpf_func_proto bpf_get_attach_cookie_proto_kmulti = {
1128 	.func		= bpf_get_attach_cookie_kprobe_multi,
1129 	.gpl_only	= false,
1130 	.ret_type	= RET_INTEGER,
1131 	.arg1_type	= ARG_PTR_TO_CTX,
1132 };
1133 
1134 BPF_CALL_1(bpf_get_func_ip_uprobe_multi, struct pt_regs *, regs)
1135 {
1136 	return bpf_uprobe_multi_entry_ip(current->bpf_ctx);
1137 }
1138 
1139 static const struct bpf_func_proto bpf_get_func_ip_proto_uprobe_multi = {
1140 	.func		= bpf_get_func_ip_uprobe_multi,
1141 	.gpl_only	= false,
1142 	.ret_type	= RET_INTEGER,
1143 	.arg1_type	= ARG_PTR_TO_CTX,
1144 };
1145 
1146 BPF_CALL_1(bpf_get_attach_cookie_uprobe_multi, struct pt_regs *, regs)
1147 {
1148 	return bpf_uprobe_multi_cookie(current->bpf_ctx);
1149 }
1150 
1151 static const struct bpf_func_proto bpf_get_attach_cookie_proto_umulti = {
1152 	.func		= bpf_get_attach_cookie_uprobe_multi,
1153 	.gpl_only	= false,
1154 	.ret_type	= RET_INTEGER,
1155 	.arg1_type	= ARG_PTR_TO_CTX,
1156 };
1157 
1158 BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx)
1159 {
1160 	struct bpf_trace_run_ctx *run_ctx;
1161 
1162 	run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
1163 	return run_ctx->bpf_cookie;
1164 }
1165 
1166 static const struct bpf_func_proto bpf_get_attach_cookie_proto_trace = {
1167 	.func		= bpf_get_attach_cookie_trace,
1168 	.gpl_only	= false,
1169 	.ret_type	= RET_INTEGER,
1170 	.arg1_type	= ARG_PTR_TO_CTX,
1171 };
1172 
1173 BPF_CALL_1(bpf_get_attach_cookie_pe, struct bpf_perf_event_data_kern *, ctx)
1174 {
1175 	return ctx->event->bpf_cookie;
1176 }
1177 
1178 static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = {
1179 	.func		= bpf_get_attach_cookie_pe,
1180 	.gpl_only	= false,
1181 	.ret_type	= RET_INTEGER,
1182 	.arg1_type	= ARG_PTR_TO_CTX,
1183 };
1184 
1185 BPF_CALL_1(bpf_get_attach_cookie_tracing, void *, ctx)
1186 {
1187 	struct bpf_trace_run_ctx *run_ctx;
1188 
1189 	run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
1190 	return run_ctx->bpf_cookie;
1191 }
1192 
1193 static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing = {
1194 	.func		= bpf_get_attach_cookie_tracing,
1195 	.gpl_only	= false,
1196 	.ret_type	= RET_INTEGER,
1197 	.arg1_type	= ARG_PTR_TO_CTX,
1198 };
1199 
1200 BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
1201 {
1202 	static const u32 br_entry_size = sizeof(struct perf_branch_entry);
1203 	u32 entry_cnt = size / br_entry_size;
1204 
1205 	entry_cnt = static_call(perf_snapshot_branch_stack)(buf, entry_cnt);
1206 
1207 	if (unlikely(flags))
1208 		return -EINVAL;
1209 
1210 	if (!entry_cnt)
1211 		return -ENOENT;
1212 
1213 	return entry_cnt * br_entry_size;
1214 }
1215 
1216 const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
1217 	.func		= bpf_get_branch_snapshot,
1218 	.gpl_only	= true,
1219 	.ret_type	= RET_INTEGER,
1220 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
1221 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
1222 };
1223 
1224 BPF_CALL_3(get_func_arg, void *, ctx, u32, n, u64 *, value)
1225 {
1226 	/* This helper call is inlined by verifier. */
1227 	u64 nr_args = ((u64 *)ctx)[-1] & 0xFF;
1228 
1229 	if ((u64) n >= nr_args)
1230 		return -EINVAL;
1231 	*value = ((u64 *)ctx)[n];
1232 	return 0;
1233 }
1234 
1235 static const struct bpf_func_proto bpf_get_func_arg_proto = {
1236 	.func		= get_func_arg,
1237 	.ret_type	= RET_INTEGER,
1238 	.arg1_type	= ARG_PTR_TO_CTX,
1239 	.arg2_type	= ARG_ANYTHING,
1240 	.arg3_type	= ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
1241 	.arg3_size	= sizeof(u64),
1242 };
1243 
1244 BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value)
1245 {
1246 	/* This helper call is inlined by verifier. */
1247 	u64 nr_args = ((u64 *)ctx)[-1] & 0xFF;
1248 
1249 	*value = ((u64 *)ctx)[nr_args];
1250 	return 0;
1251 }
1252 
1253 static const struct bpf_func_proto bpf_get_func_ret_proto = {
1254 	.func		= get_func_ret,
1255 	.ret_type	= RET_INTEGER,
1256 	.arg1_type	= ARG_PTR_TO_CTX,
1257 	.arg2_type	= ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
1258 	.arg2_size	= sizeof(u64),
1259 };
1260 
1261 BPF_CALL_1(get_func_arg_cnt, void *, ctx)
1262 {
1263 	/* This helper call is inlined by verifier. */
1264 	return ((u64 *)ctx)[-1] & 0xFF;
1265 }
1266 
1267 static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = {
1268 	.func		= get_func_arg_cnt,
1269 	.ret_type	= RET_INTEGER,
1270 	.arg1_type	= ARG_PTR_TO_CTX,
1271 };
1272 
1273 static const struct bpf_func_proto *
1274 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1275 {
1276 	const struct bpf_func_proto *func_proto;
1277 
1278 	switch (func_id) {
1279 	case BPF_FUNC_get_smp_processor_id:
1280 		return &bpf_get_smp_processor_id_proto;
1281 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
1282 	case BPF_FUNC_probe_read:
1283 		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1284 		       NULL : &bpf_probe_read_compat_proto;
1285 	case BPF_FUNC_probe_read_str:
1286 		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1287 		       NULL : &bpf_probe_read_compat_str_proto;
1288 #endif
1289 	case BPF_FUNC_get_func_ip:
1290 		return &bpf_get_func_ip_proto_tracing;
1291 	default:
1292 		break;
1293 	}
1294 
1295 	func_proto = bpf_base_func_proto(func_id, prog);
1296 	if (func_proto)
1297 		return func_proto;
1298 
1299 	if (!bpf_token_capable(prog->aux->token, CAP_SYS_ADMIN))
1300 		return NULL;
1301 
1302 	switch (func_id) {
1303 	case BPF_FUNC_probe_write_user:
1304 		return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ?
1305 		       NULL : &bpf_probe_write_user_proto;
1306 	default:
1307 		return NULL;
1308 	}
1309 }
1310 
1311 static bool is_kprobe_multi(const struct bpf_prog *prog)
1312 {
1313 	return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ||
1314 	       prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION;
1315 }
1316 
1317 static inline bool is_kprobe_session(const struct bpf_prog *prog)
1318 {
1319 	return prog->type == BPF_PROG_TYPE_KPROBE &&
1320 	       prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION;
1321 }
1322 
1323 static inline bool is_uprobe_multi(const struct bpf_prog *prog)
1324 {
1325 	return prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI ||
1326 	       prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
1327 }
1328 
1329 static inline bool is_uprobe_session(const struct bpf_prog *prog)
1330 {
1331 	return prog->type == BPF_PROG_TYPE_KPROBE &&
1332 	       prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
1333 }
1334 
1335 static inline bool is_trace_fsession(const struct bpf_prog *prog)
1336 {
1337 	return prog->type == BPF_PROG_TYPE_TRACING &&
1338 	       (prog->expected_attach_type == BPF_TRACE_FSESSION ||
1339 		prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI);
1340 }
1341 
1342 static const struct bpf_func_proto *
1343 kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1344 {
1345 	switch (func_id) {
1346 	case BPF_FUNC_perf_event_output:
1347 		return &bpf_perf_event_output_proto;
1348 	case BPF_FUNC_get_stackid:
1349 		return &bpf_get_stackid_proto;
1350 	case BPF_FUNC_get_stack:
1351 		return prog->sleepable ? &bpf_get_stack_sleepable_proto : &bpf_get_stack_proto;
1352 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
1353 	case BPF_FUNC_override_return:
1354 		return &bpf_override_return_proto;
1355 #endif
1356 	case BPF_FUNC_get_func_ip:
1357 		if (is_kprobe_multi(prog))
1358 			return &bpf_get_func_ip_proto_kprobe_multi;
1359 		if (is_uprobe_multi(prog))
1360 			return &bpf_get_func_ip_proto_uprobe_multi;
1361 		return &bpf_get_func_ip_proto_kprobe;
1362 	case BPF_FUNC_get_attach_cookie:
1363 		if (is_kprobe_multi(prog))
1364 			return &bpf_get_attach_cookie_proto_kmulti;
1365 		if (is_uprobe_multi(prog))
1366 			return &bpf_get_attach_cookie_proto_umulti;
1367 		return &bpf_get_attach_cookie_proto_trace;
1368 	default:
1369 		return bpf_tracing_func_proto(func_id, prog);
1370 	}
1371 }
1372 
1373 /* bpf+kprobe programs can access fields of 'struct pt_regs' */
1374 static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
1375 					const struct bpf_prog *prog,
1376 					struct bpf_insn_access_aux *info)
1377 {
1378 	if (off < 0 || off >= sizeof(struct pt_regs))
1379 		return false;
1380 	if (off % size != 0)
1381 		return false;
1382 	/*
1383 	 * Assertion for 32 bit to make sure last 8 byte access
1384 	 * (BPF_DW) to the last 4 byte member is disallowed.
1385 	 */
1386 	if (off + size > sizeof(struct pt_regs))
1387 		return false;
1388 
1389 	if (type == BPF_WRITE)
1390 		prog->aux->kprobe_write_ctx = true;
1391 
1392 	return true;
1393 }
1394 
1395 const struct bpf_verifier_ops kprobe_verifier_ops = {
1396 	.get_func_proto  = kprobe_prog_func_proto,
1397 	.is_valid_access = kprobe_prog_is_valid_access,
1398 };
1399 
1400 const struct bpf_prog_ops kprobe_prog_ops = {
1401 };
1402 
1403 BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
1404 	   u64, flags, void *, data, u64, size)
1405 {
1406 	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
1407 
1408 	/*
1409 	 * r1 points to perf tracepoint buffer where first 8 bytes are hidden
1410 	 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
1411 	 * from there and call the same bpf_perf_event_output() helper inline.
1412 	 */
1413 	return ____bpf_perf_event_output(regs, map, flags, data, size);
1414 }
1415 
1416 static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
1417 	.func		= bpf_perf_event_output_tp,
1418 	.gpl_only	= true,
1419 	.ret_type	= RET_INTEGER,
1420 	.arg1_type	= ARG_PTR_TO_CTX,
1421 	.arg2_type	= ARG_CONST_MAP_PTR,
1422 	.arg3_type	= ARG_ANYTHING,
1423 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
1424 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
1425 };
1426 
1427 BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
1428 	   u64, flags)
1429 {
1430 	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
1431 
1432 	/*
1433 	 * Same comment as in bpf_perf_event_output_tp(), only that this time
1434 	 * the other helper's function body cannot be inlined due to being
1435 	 * external, thus we need to call raw helper function.
1436 	 */
1437 	return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
1438 			       flags, 0, 0);
1439 }
1440 
1441 static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
1442 	.func		= bpf_get_stackid_tp,
1443 	.gpl_only	= true,
1444 	.ret_type	= RET_INTEGER,
1445 	.arg1_type	= ARG_PTR_TO_CTX,
1446 	.arg2_type	= ARG_CONST_MAP_PTR,
1447 	.arg3_type	= ARG_ANYTHING,
1448 };
1449 
1450 BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size,
1451 	   u64, flags)
1452 {
1453 	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
1454 
1455 	return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
1456 			     (unsigned long) size, flags, 0);
1457 }
1458 
1459 static const struct bpf_func_proto bpf_get_stack_proto_tp = {
1460 	.func		= bpf_get_stack_tp,
1461 	.gpl_only	= true,
1462 	.ret_type	= RET_INTEGER,
1463 	.arg1_type	= ARG_PTR_TO_CTX,
1464 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
1465 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
1466 	.arg4_type	= ARG_ANYTHING,
1467 };
1468 
1469 static const struct bpf_func_proto *
1470 tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1471 {
1472 	switch (func_id) {
1473 	case BPF_FUNC_perf_event_output:
1474 		return &bpf_perf_event_output_proto_tp;
1475 	case BPF_FUNC_get_stackid:
1476 		return &bpf_get_stackid_proto_tp;
1477 	case BPF_FUNC_get_stack:
1478 		return &bpf_get_stack_proto_tp;
1479 	case BPF_FUNC_get_attach_cookie:
1480 		return &bpf_get_attach_cookie_proto_trace;
1481 	default:
1482 		return bpf_tracing_func_proto(func_id, prog);
1483 	}
1484 }
1485 
1486 static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
1487 				    const struct bpf_prog *prog,
1488 				    struct bpf_insn_access_aux *info)
1489 {
1490 	if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
1491 		return false;
1492 	if (type != BPF_READ)
1493 		return false;
1494 	if (off % size != 0)
1495 		return false;
1496 
1497 	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
1498 	return true;
1499 }
1500 
1501 const struct bpf_verifier_ops tracepoint_verifier_ops = {
1502 	.get_func_proto  = tp_prog_func_proto,
1503 	.is_valid_access = tp_prog_is_valid_access,
1504 };
1505 
1506 const struct bpf_prog_ops tracepoint_prog_ops = {
1507 };
1508 
1509 BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx,
1510 	   struct bpf_perf_event_value *, buf, u32, size)
1511 {
1512 	int err = -EINVAL;
1513 
1514 	if (unlikely(size != sizeof(struct bpf_perf_event_value)))
1515 		goto clear;
1516 	err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled,
1517 				    &buf->running);
1518 	if (unlikely(err))
1519 		goto clear;
1520 	return 0;
1521 clear:
1522 	memset(buf, 0, size);
1523 	return err;
1524 }
1525 
1526 static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
1527          .func           = bpf_perf_prog_read_value,
1528          .gpl_only       = true,
1529          .ret_type       = RET_INTEGER,
1530          .arg1_type      = ARG_PTR_TO_CTX,
1531          .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
1532          .arg3_type      = ARG_CONST_SIZE,
1533 };
1534 
1535 BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx,
1536 	   void *, buf, u32, size, u64, flags)
1537 {
1538 	static const u32 br_entry_size = sizeof(struct perf_branch_entry);
1539 	struct perf_branch_stack *br_stack = ctx->data->br_stack;
1540 	u32 to_copy;
1541 
1542 	if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE))
1543 		return -EINVAL;
1544 
1545 	if (unlikely(!(ctx->data->sample_flags & PERF_SAMPLE_BRANCH_STACK)))
1546 		return -ENOENT;
1547 
1548 	if (unlikely(!br_stack))
1549 		return -ENOENT;
1550 
1551 	if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE)
1552 		return br_stack->nr * br_entry_size;
1553 
1554 	if (!buf || (size % br_entry_size != 0))
1555 		return -EINVAL;
1556 
1557 	to_copy = min_t(u32, br_stack->nr * br_entry_size, size);
1558 	memcpy(buf, br_stack->entries, to_copy);
1559 
1560 	return to_copy;
1561 }
1562 
1563 static const struct bpf_func_proto bpf_read_branch_records_proto = {
1564 	.func           = bpf_read_branch_records,
1565 	.gpl_only       = true,
1566 	.ret_type       = RET_INTEGER,
1567 	.arg1_type      = ARG_PTR_TO_CTX,
1568 	.arg2_type      = ARG_PTR_TO_MEM_OR_NULL | MEM_WRITE,
1569 	.arg3_type      = ARG_CONST_SIZE_OR_ZERO,
1570 	.arg4_type      = ARG_ANYTHING,
1571 };
1572 
1573 static const struct bpf_func_proto *
1574 pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1575 {
1576 	switch (func_id) {
1577 	case BPF_FUNC_perf_event_output:
1578 		return &bpf_perf_event_output_proto_tp;
1579 	case BPF_FUNC_get_stackid:
1580 		return &bpf_get_stackid_proto_pe;
1581 	case BPF_FUNC_get_stack:
1582 		return &bpf_get_stack_proto_pe;
1583 	case BPF_FUNC_perf_prog_read_value:
1584 		return &bpf_perf_prog_read_value_proto;
1585 	case BPF_FUNC_read_branch_records:
1586 		return &bpf_read_branch_records_proto;
1587 	case BPF_FUNC_get_attach_cookie:
1588 		return &bpf_get_attach_cookie_proto_pe;
1589 	default:
1590 		return bpf_tracing_func_proto(func_id, prog);
1591 	}
1592 }
1593 
1594 /*
1595  * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
1596  * to avoid potential recursive reuse issue when/if tracepoints are added
1597  * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack.
1598  *
1599  * Since raw tracepoints run despite bpf_prog_active, support concurrent usage
1600  * in normal, irq, and nmi context.
1601  */
1602 struct bpf_raw_tp_regs {
1603 	struct pt_regs regs[3];
1604 };
1605 static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs);
1606 static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level);
1607 static struct pt_regs *get_bpf_raw_tp_regs(void)
1608 {
1609 	struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs);
1610 	int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level);
1611 
1612 	if (nest_level > ARRAY_SIZE(tp_regs->regs)) {
1613 		this_cpu_dec(bpf_raw_tp_nest_level);
1614 		return ERR_PTR(-EBUSY);
1615 	}
1616 
1617 	return &tp_regs->regs[nest_level - 1];
1618 }
1619 
1620 static void put_bpf_raw_tp_regs(void)
1621 {
1622 	this_cpu_dec(bpf_raw_tp_nest_level);
1623 }
1624 
1625 BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
1626 	   struct bpf_map *, map, u64, flags, void *, data, u64, size)
1627 {
1628 	struct pt_regs *regs = get_bpf_raw_tp_regs();
1629 	int ret;
1630 
1631 	if (IS_ERR(regs))
1632 		return PTR_ERR(regs);
1633 
1634 	perf_fetch_caller_regs(regs);
1635 	ret = ____bpf_perf_event_output(regs, map, flags, data, size);
1636 
1637 	put_bpf_raw_tp_regs();
1638 	return ret;
1639 }
1640 
1641 static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
1642 	.func		= bpf_perf_event_output_raw_tp,
1643 	.gpl_only	= true,
1644 	.ret_type	= RET_INTEGER,
1645 	.arg1_type	= ARG_PTR_TO_CTX,
1646 	.arg2_type	= ARG_CONST_MAP_PTR,
1647 	.arg3_type	= ARG_ANYTHING,
1648 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
1649 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
1650 };
1651 
1652 extern const struct bpf_func_proto bpf_skb_output_proto;
1653 extern const struct bpf_func_proto bpf_xdp_output_proto;
1654 extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
1655 
1656 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
1657 	   struct bpf_map *, map, u64, flags)
1658 {
1659 	struct pt_regs *regs = get_bpf_raw_tp_regs();
1660 	int ret;
1661 
1662 	if (IS_ERR(regs))
1663 		return PTR_ERR(regs);
1664 
1665 	perf_fetch_caller_regs(regs);
1666 	/* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
1667 	ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map,
1668 			      flags, 0, 0);
1669 	put_bpf_raw_tp_regs();
1670 	return ret;
1671 }
1672 
1673 static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
1674 	.func		= bpf_get_stackid_raw_tp,
1675 	.gpl_only	= true,
1676 	.ret_type	= RET_INTEGER,
1677 	.arg1_type	= ARG_PTR_TO_CTX,
1678 	.arg2_type	= ARG_CONST_MAP_PTR,
1679 	.arg3_type	= ARG_ANYTHING,
1680 };
1681 
1682 BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
1683 	   void *, buf, u32, size, u64, flags)
1684 {
1685 	struct pt_regs *regs = get_bpf_raw_tp_regs();
1686 	int ret;
1687 
1688 	if (IS_ERR(regs))
1689 		return PTR_ERR(regs);
1690 
1691 	perf_fetch_caller_regs(regs);
1692 	ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf,
1693 			    (unsigned long) size, flags, 0);
1694 	put_bpf_raw_tp_regs();
1695 	return ret;
1696 }
1697 
1698 static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
1699 	.func		= bpf_get_stack_raw_tp,
1700 	.gpl_only	= true,
1701 	.ret_type	= RET_INTEGER,
1702 	.arg1_type	= ARG_PTR_TO_CTX,
1703 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
1704 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
1705 	.arg4_type	= ARG_ANYTHING,
1706 };
1707 
1708 static const struct bpf_func_proto *
1709 raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1710 {
1711 	switch (func_id) {
1712 	case BPF_FUNC_perf_event_output:
1713 		return &bpf_perf_event_output_proto_raw_tp;
1714 	case BPF_FUNC_get_stackid:
1715 		return &bpf_get_stackid_proto_raw_tp;
1716 	case BPF_FUNC_get_stack:
1717 		return &bpf_get_stack_proto_raw_tp;
1718 	case BPF_FUNC_get_attach_cookie:
1719 		return &bpf_get_attach_cookie_proto_tracing;
1720 	default:
1721 		return bpf_tracing_func_proto(func_id, prog);
1722 	}
1723 }
1724 
1725 const struct bpf_func_proto *
1726 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1727 {
1728 	const struct bpf_func_proto *fn;
1729 
1730 	switch (func_id) {
1731 #ifdef CONFIG_NET
1732 	case BPF_FUNC_skb_output:
1733 		return &bpf_skb_output_proto;
1734 	case BPF_FUNC_xdp_output:
1735 		return &bpf_xdp_output_proto;
1736 	case BPF_FUNC_skc_to_tcp6_sock:
1737 		return &bpf_skc_to_tcp6_sock_proto;
1738 	case BPF_FUNC_skc_to_tcp_sock:
1739 		return &bpf_skc_to_tcp_sock_proto;
1740 	case BPF_FUNC_skc_to_tcp_timewait_sock:
1741 		return &bpf_skc_to_tcp_timewait_sock_proto;
1742 	case BPF_FUNC_skc_to_tcp_request_sock:
1743 		return &bpf_skc_to_tcp_request_sock_proto;
1744 	case BPF_FUNC_skc_to_udp6_sock:
1745 		return &bpf_skc_to_udp6_sock_proto;
1746 	case BPF_FUNC_skc_to_unix_sock:
1747 		return &bpf_skc_to_unix_sock_proto;
1748 	case BPF_FUNC_skc_to_mptcp_sock:
1749 		return &bpf_skc_to_mptcp_sock_proto;
1750 	case BPF_FUNC_sk_storage_get:
1751 		return &bpf_sk_storage_get_tracing_proto;
1752 	case BPF_FUNC_sk_storage_delete:
1753 		return &bpf_sk_storage_delete_tracing_proto;
1754 	case BPF_FUNC_sock_from_file:
1755 		return &bpf_sock_from_file_proto;
1756 	case BPF_FUNC_get_socket_cookie:
1757 		return &bpf_get_socket_ptr_cookie_proto;
1758 	case BPF_FUNC_xdp_get_buff_len:
1759 		return &bpf_xdp_get_buff_len_trace_proto;
1760 #endif
1761 	case BPF_FUNC_seq_printf:
1762 		return prog->expected_attach_type == BPF_TRACE_ITER ?
1763 		       &bpf_seq_printf_proto :
1764 		       NULL;
1765 	case BPF_FUNC_seq_write:
1766 		return prog->expected_attach_type == BPF_TRACE_ITER ?
1767 		       &bpf_seq_write_proto :
1768 		       NULL;
1769 	case BPF_FUNC_seq_printf_btf:
1770 		return prog->expected_attach_type == BPF_TRACE_ITER ?
1771 		       &bpf_seq_printf_btf_proto :
1772 		       NULL;
1773 	case BPF_FUNC_d_path:
1774 		return &bpf_d_path_proto;
1775 	case BPF_FUNC_get_func_arg:
1776 		if (bpf_prog_has_trampoline(prog) ||
1777 		    prog->expected_attach_type == BPF_TRACE_RAW_TP)
1778 			return &bpf_get_func_arg_proto;
1779 		return NULL;
1780 	case BPF_FUNC_get_func_ret:
1781 		return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL;
1782 	case BPF_FUNC_get_func_arg_cnt:
1783 		if (bpf_prog_has_trampoline(prog) ||
1784 		    prog->expected_attach_type == BPF_TRACE_RAW_TP)
1785 			return &bpf_get_func_arg_cnt_proto;
1786 		return NULL;
1787 	case BPF_FUNC_get_attach_cookie:
1788 		if (prog->type == BPF_PROG_TYPE_TRACING &&
1789 		    prog->expected_attach_type == BPF_TRACE_RAW_TP)
1790 			return &bpf_get_attach_cookie_proto_tracing;
1791 		return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto_tracing : NULL;
1792 	default:
1793 		fn = raw_tp_prog_func_proto(func_id, prog);
1794 		if (!fn && prog->expected_attach_type == BPF_TRACE_ITER)
1795 			fn = bpf_iter_get_func_proto(func_id, prog);
1796 		return fn;
1797 	}
1798 }
1799 
1800 static bool raw_tp_prog_is_valid_access(int off, int size,
1801 					enum bpf_access_type type,
1802 					const struct bpf_prog *prog,
1803 					struct bpf_insn_access_aux *info)
1804 {
1805 	return bpf_tracing_ctx_access(off, size, type);
1806 }
1807 
1808 static bool tracing_prog_is_valid_access(int off, int size,
1809 					 enum bpf_access_type type,
1810 					 const struct bpf_prog *prog,
1811 					 struct bpf_insn_access_aux *info)
1812 {
1813 	return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
1814 }
1815 
1816 int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog,
1817 				     const union bpf_attr *kattr,
1818 				     union bpf_attr __user *uattr)
1819 {
1820 	return -ENOTSUPP;
1821 }
1822 
1823 const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
1824 	.get_func_proto  = raw_tp_prog_func_proto,
1825 	.is_valid_access = raw_tp_prog_is_valid_access,
1826 };
1827 
1828 const struct bpf_prog_ops raw_tracepoint_prog_ops = {
1829 #ifdef CONFIG_NET
1830 	.test_run = bpf_prog_test_run_raw_tp,
1831 #endif
1832 };
1833 
1834 const struct bpf_verifier_ops tracing_verifier_ops = {
1835 	.get_func_proto  = tracing_prog_func_proto,
1836 	.is_valid_access = tracing_prog_is_valid_access,
1837 };
1838 
1839 const struct bpf_prog_ops tracing_prog_ops = {
1840 	.test_run = bpf_prog_test_run_tracing,
1841 };
1842 
1843 static bool raw_tp_writable_prog_is_valid_access(int off, int size,
1844 						 enum bpf_access_type type,
1845 						 const struct bpf_prog *prog,
1846 						 struct bpf_insn_access_aux *info)
1847 {
1848 	if (off == 0) {
1849 		if (size != sizeof(u64) || type != BPF_READ)
1850 			return false;
1851 		info->reg_type = PTR_TO_TP_BUFFER;
1852 	}
1853 	return raw_tp_prog_is_valid_access(off, size, type, prog, info);
1854 }
1855 
1856 const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = {
1857 	.get_func_proto  = raw_tp_prog_func_proto,
1858 	.is_valid_access = raw_tp_writable_prog_is_valid_access,
1859 };
1860 
1861 const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = {
1862 };
1863 
1864 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
1865 				    const struct bpf_prog *prog,
1866 				    struct bpf_insn_access_aux *info)
1867 {
1868 	const int size_u64 = sizeof(u64);
1869 
1870 	if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
1871 		return false;
1872 	if (type != BPF_READ)
1873 		return false;
1874 	if (off % size != 0) {
1875 		if (sizeof(unsigned long) != 4)
1876 			return false;
1877 		if (size != 8)
1878 			return false;
1879 		if (off % size != 4)
1880 			return false;
1881 	}
1882 
1883 	switch (off) {
1884 	case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
1885 		bpf_ctx_record_field_size(info, size_u64);
1886 		if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
1887 			return false;
1888 		break;
1889 	case bpf_ctx_range(struct bpf_perf_event_data, addr):
1890 		bpf_ctx_record_field_size(info, size_u64);
1891 		if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
1892 			return false;
1893 		break;
1894 	default:
1895 		if (size != sizeof(long))
1896 			return false;
1897 	}
1898 
1899 	return true;
1900 }
1901 
1902 static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
1903 				      const struct bpf_insn *si,
1904 				      struct bpf_insn *insn_buf,
1905 				      struct bpf_prog *prog, u32 *target_size)
1906 {
1907 	struct bpf_insn *insn = insn_buf;
1908 
1909 	switch (si->off) {
1910 	case offsetof(struct bpf_perf_event_data, sample_period):
1911 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
1912 						       data), si->dst_reg, si->src_reg,
1913 				      offsetof(struct bpf_perf_event_data_kern, data));
1914 		*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
1915 				      bpf_target_off(struct perf_sample_data, period, 8,
1916 						     target_size));
1917 		break;
1918 	case offsetof(struct bpf_perf_event_data, addr):
1919 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
1920 						       data), si->dst_reg, si->src_reg,
1921 				      offsetof(struct bpf_perf_event_data_kern, data));
1922 		*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
1923 				      bpf_target_off(struct perf_sample_data, addr, 8,
1924 						     target_size));
1925 		break;
1926 	default:
1927 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
1928 						       regs), si->dst_reg, si->src_reg,
1929 				      offsetof(struct bpf_perf_event_data_kern, regs));
1930 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg,
1931 				      si->off);
1932 		break;
1933 	}
1934 
1935 	return insn - insn_buf;
1936 }
1937 
1938 const struct bpf_verifier_ops perf_event_verifier_ops = {
1939 	.get_func_proto		= pe_prog_func_proto,
1940 	.is_valid_access	= pe_prog_is_valid_access,
1941 	.convert_ctx_access	= pe_prog_convert_ctx_access,
1942 };
1943 
1944 const struct bpf_prog_ops perf_event_prog_ops = {
1945 };
1946 
1947 static DEFINE_MUTEX(bpf_event_mutex);
1948 
1949 #define BPF_TRACE_MAX_PROGS 64
1950 
1951 int perf_event_attach_bpf_prog(struct perf_event *event,
1952 			       struct bpf_prog *prog,
1953 			       u64 bpf_cookie)
1954 {
1955 	struct bpf_prog_array *old_array;
1956 	struct bpf_prog_array *new_array;
1957 	int ret = -EEXIST;
1958 
1959 	/*
1960 	 * Kprobe override only works if they are on the function entry,
1961 	 * and only if they are on the opt-in list.
1962 	 */
1963 	if (prog->kprobe_override &&
1964 	    (!trace_kprobe_on_func_entry(event->tp_event) ||
1965 	     !trace_kprobe_error_injectable(event->tp_event)))
1966 		return -EINVAL;
1967 
1968 	mutex_lock(&bpf_event_mutex);
1969 
1970 	if (event->prog)
1971 		goto unlock;
1972 
1973 	old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
1974 	if (old_array &&
1975 	    bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) {
1976 		ret = -E2BIG;
1977 		goto unlock;
1978 	}
1979 
1980 	ret = bpf_prog_array_copy(old_array, NULL, prog, bpf_cookie, &new_array);
1981 	if (ret < 0)
1982 		goto unlock;
1983 
1984 	/* set the new array to event->tp_event and set event->prog */
1985 	event->prog = prog;
1986 	event->bpf_cookie = bpf_cookie;
1987 	rcu_assign_pointer(event->tp_event->prog_array, new_array);
1988 	bpf_prog_array_free_sleepable(old_array);
1989 
1990 unlock:
1991 	mutex_unlock(&bpf_event_mutex);
1992 	return ret;
1993 }
1994 
1995 void perf_event_detach_bpf_prog(struct perf_event *event)
1996 {
1997 	struct bpf_prog_array *old_array;
1998 	struct bpf_prog_array *new_array;
1999 	struct bpf_prog *prog = NULL;
2000 	int ret;
2001 
2002 	mutex_lock(&bpf_event_mutex);
2003 
2004 	if (!event->prog)
2005 		goto unlock;
2006 
2007 	old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
2008 	if (!old_array)
2009 		goto put;
2010 
2011 	ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array);
2012 	if (ret < 0) {
2013 		bpf_prog_array_delete_safe(old_array, event->prog);
2014 	} else {
2015 		rcu_assign_pointer(event->tp_event->prog_array, new_array);
2016 		bpf_prog_array_free_sleepable(old_array);
2017 	}
2018 
2019 put:
2020 	prog = event->prog;
2021 	event->prog = NULL;
2022 
2023 unlock:
2024 	mutex_unlock(&bpf_event_mutex);
2025 
2026 	if (prog) {
2027 		/*
2028 		 * It could be that the bpf_prog is not sleepable (and will be freed
2029 		 * via normal RCU), but is called from a point that supports sleepable
2030 		 * programs and uses tasks-trace-RCU.
2031 		 */
2032 		synchronize_rcu_tasks_trace();
2033 
2034 		bpf_prog_put(prog);
2035 	}
2036 }
2037 
2038 int perf_event_query_prog_array(struct perf_event *event, void __user *info)
2039 {
2040 	struct perf_event_query_bpf __user *uquery = info;
2041 	struct perf_event_query_bpf query = {};
2042 	struct bpf_prog_array *progs;
2043 	u32 *ids, prog_cnt, ids_len;
2044 	int ret;
2045 
2046 	if (!perfmon_capable())
2047 		return -EPERM;
2048 	if (event->attr.type != PERF_TYPE_TRACEPOINT)
2049 		return -EINVAL;
2050 	if (copy_from_user(&query, uquery, sizeof(query)))
2051 		return -EFAULT;
2052 
2053 	ids_len = query.ids_len;
2054 	if (ids_len > BPF_TRACE_MAX_PROGS)
2055 		return -E2BIG;
2056 	ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN);
2057 	if (!ids)
2058 		return -ENOMEM;
2059 	/*
2060 	 * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which
2061 	 * is required when user only wants to check for uquery->prog_cnt.
2062 	 * There is no need to check for it since the case is handled
2063 	 * gracefully in bpf_prog_array_copy_info.
2064 	 */
2065 
2066 	mutex_lock(&bpf_event_mutex);
2067 	progs = bpf_event_rcu_dereference(event->tp_event->prog_array);
2068 	ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt);
2069 	mutex_unlock(&bpf_event_mutex);
2070 
2071 	if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) ||
2072 	    copy_to_user(uquery->ids, ids, ids_len * sizeof(u32)))
2073 		ret = -EFAULT;
2074 
2075 	kfree(ids);
2076 	return ret;
2077 }
2078 
2079 extern struct bpf_raw_event_map __start__bpf_raw_tp[];
2080 extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
2081 
2082 struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name)
2083 {
2084 	struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
2085 
2086 	for (; btp < __stop__bpf_raw_tp; btp++) {
2087 		if (!strcmp(btp->tp->name, name))
2088 			return btp;
2089 	}
2090 
2091 	return bpf_get_raw_tracepoint_module(name);
2092 }
2093 
2094 void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
2095 {
2096 	struct module *mod;
2097 
2098 	guard(rcu)();
2099 	mod = __module_address((unsigned long)btp);
2100 	module_put(mod);
2101 }
2102 
2103 static __always_inline
2104 void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args)
2105 {
2106 	struct srcu_ctr __percpu *scp = NULL;
2107 	struct bpf_prog *prog = link->link.prog;
2108 	bool sleepable = prog->sleepable;
2109 	struct bpf_run_ctx *old_run_ctx;
2110 	struct bpf_trace_run_ctx run_ctx;
2111 
2112 	if (sleepable) {
2113 		scp = rcu_read_lock_tasks_trace();
2114 		migrate_disable();
2115 	} else {
2116 		rcu_read_lock_dont_migrate();
2117 	}
2118 
2119 	if (unlikely(!bpf_prog_get_recursion_context(prog))) {
2120 		bpf_prog_inc_misses_counter(prog);
2121 		goto out;
2122 	}
2123 
2124 	run_ctx.bpf_cookie = link->cookie;
2125 	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
2126 
2127 	(void)bpf_prog_run(prog, args);
2128 
2129 	bpf_reset_run_ctx(old_run_ctx);
2130 out:
2131 	bpf_prog_put_recursion_context(prog);
2132 
2133 	if (sleepable) {
2134 		migrate_enable();
2135 		rcu_read_unlock_tasks_trace(scp);
2136 	} else {
2137 		rcu_read_unlock_migrate();
2138 	}
2139 }
2140 
2141 #define UNPACK(...)			__VA_ARGS__
2142 #define REPEAT_1(FN, DL, X, ...)	FN(X)
2143 #define REPEAT_2(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__)
2144 #define REPEAT_3(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__)
2145 #define REPEAT_4(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__)
2146 #define REPEAT_5(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__)
2147 #define REPEAT_6(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__)
2148 #define REPEAT_7(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__)
2149 #define REPEAT_8(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__)
2150 #define REPEAT_9(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__)
2151 #define REPEAT_10(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__)
2152 #define REPEAT_11(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__)
2153 #define REPEAT_12(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__)
2154 #define REPEAT(X, FN, DL, ...)		REPEAT_##X(FN, DL, __VA_ARGS__)
2155 
2156 #define SARG(X)		u64 arg##X
2157 #define COPY(X)		args[X] = arg##X
2158 
2159 #define __DL_COM	(,)
2160 #define __DL_SEM	(;)
2161 
2162 #define __SEQ_0_11	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
2163 
2164 #define BPF_TRACE_DEFN_x(x)						\
2165 	void bpf_trace_run##x(struct bpf_raw_tp_link *link,		\
2166 			      REPEAT(x, SARG, __DL_COM, __SEQ_0_11))	\
2167 	{								\
2168 		u64 args[x];						\
2169 		REPEAT(x, COPY, __DL_SEM, __SEQ_0_11);			\
2170 		__bpf_trace_run(link, args);				\
2171 	}								\
2172 	EXPORT_SYMBOL_GPL(bpf_trace_run##x)
2173 BPF_TRACE_DEFN_x(1);
2174 BPF_TRACE_DEFN_x(2);
2175 BPF_TRACE_DEFN_x(3);
2176 BPF_TRACE_DEFN_x(4);
2177 BPF_TRACE_DEFN_x(5);
2178 BPF_TRACE_DEFN_x(6);
2179 BPF_TRACE_DEFN_x(7);
2180 BPF_TRACE_DEFN_x(8);
2181 BPF_TRACE_DEFN_x(9);
2182 BPF_TRACE_DEFN_x(10);
2183 BPF_TRACE_DEFN_x(11);
2184 BPF_TRACE_DEFN_x(12);
2185 
2186 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
2187 {
2188 	struct tracepoint *tp = btp->tp;
2189 	struct bpf_prog *prog = link->link.prog;
2190 
2191 	/*
2192 	 * check that program doesn't access arguments beyond what's
2193 	 * available in this tracepoint
2194 	 */
2195 	if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
2196 		return -EINVAL;
2197 
2198 	if (prog->aux->max_tp_access > btp->writable_size)
2199 		return -EINVAL;
2200 
2201 	return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, link);
2202 }
2203 
2204 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
2205 {
2206 	return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, link);
2207 }
2208 
2209 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
2210 			    u32 *fd_type, const char **buf,
2211 			    u64 *probe_offset, u64 *probe_addr,
2212 			    unsigned long *missed)
2213 {
2214 	bool is_tracepoint, is_syscall_tp;
2215 	struct bpf_prog *prog;
2216 	int flags, err = 0;
2217 
2218 	prog = event->prog;
2219 	if (!prog)
2220 		return -ENOENT;
2221 
2222 	/* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
2223 	if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
2224 		return -EOPNOTSUPP;
2225 
2226 	*prog_id = prog->aux->id;
2227 	flags = event->tp_event->flags;
2228 	is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
2229 	is_syscall_tp = is_syscall_trace_event(event->tp_event);
2230 
2231 	if (is_tracepoint || is_syscall_tp) {
2232 		*buf = is_tracepoint ? event->tp_event->tp->name
2233 				     : event->tp_event->name;
2234 		/* We allow NULL pointer for tracepoint */
2235 		if (fd_type)
2236 			*fd_type = BPF_FD_TYPE_TRACEPOINT;
2237 		if (probe_offset)
2238 			*probe_offset = 0x0;
2239 		if (probe_addr)
2240 			*probe_addr = 0x0;
2241 	} else {
2242 		/* kprobe/uprobe */
2243 		err = -EOPNOTSUPP;
2244 #ifdef CONFIG_KPROBE_EVENTS
2245 		if (flags & TRACE_EVENT_FL_KPROBE)
2246 			err = bpf_get_kprobe_info(event, fd_type, buf,
2247 						  probe_offset, probe_addr, missed,
2248 						  event->attr.type == PERF_TYPE_TRACEPOINT);
2249 #endif
2250 #ifdef CONFIG_UPROBE_EVENTS
2251 		if (flags & TRACE_EVENT_FL_UPROBE)
2252 			err = bpf_get_uprobe_info(event, fd_type, buf,
2253 						  probe_offset, probe_addr,
2254 						  event->attr.type == PERF_TYPE_TRACEPOINT);
2255 #endif
2256 	}
2257 
2258 	return err;
2259 }
2260 
2261 static int __init send_signal_irq_work_init(void)
2262 {
2263 	int cpu;
2264 	struct send_signal_irq_work *work;
2265 
2266 	for_each_possible_cpu(cpu) {
2267 		work = per_cpu_ptr(&send_signal_work, cpu);
2268 		init_irq_work(&work->irq_work, do_bpf_send_signal);
2269 	}
2270 	return 0;
2271 }
2272 
2273 subsys_initcall(send_signal_irq_work_init);
2274 
2275 #ifdef CONFIG_MODULES
2276 static int bpf_event_notify(struct notifier_block *nb, unsigned long op,
2277 			    void *module)
2278 {
2279 	struct bpf_trace_module *btm, *tmp;
2280 	struct module *mod = module;
2281 	int ret = 0;
2282 
2283 	if (mod->num_bpf_raw_events == 0 ||
2284 	    (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
2285 		goto out;
2286 
2287 	mutex_lock(&bpf_module_mutex);
2288 
2289 	switch (op) {
2290 	case MODULE_STATE_COMING:
2291 		btm = kzalloc_obj(*btm);
2292 		if (btm) {
2293 			btm->module = module;
2294 			list_add(&btm->list, &bpf_trace_modules);
2295 		} else {
2296 			ret = -ENOMEM;
2297 		}
2298 		break;
2299 	case MODULE_STATE_GOING:
2300 		list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) {
2301 			if (btm->module == module) {
2302 				list_del(&btm->list);
2303 				kfree(btm);
2304 				break;
2305 			}
2306 		}
2307 		break;
2308 	}
2309 
2310 	mutex_unlock(&bpf_module_mutex);
2311 
2312 out:
2313 	return notifier_from_errno(ret);
2314 }
2315 
2316 static struct notifier_block bpf_module_nb = {
2317 	.notifier_call = bpf_event_notify,
2318 };
2319 
2320 static int __init bpf_event_init(void)
2321 {
2322 	register_module_notifier(&bpf_module_nb);
2323 	return 0;
2324 }
2325 
2326 fs_initcall(bpf_event_init);
2327 #endif /* CONFIG_MODULES */
2328 
2329 struct bpf_session_run_ctx {
2330 	struct bpf_run_ctx run_ctx;
2331 	bool is_return;
2332 	void *data;
2333 };
2334 
2335 #ifdef CONFIG_FPROBE
2336 struct bpf_kprobe_multi_link {
2337 	struct bpf_link link;
2338 	struct fprobe fp;
2339 	unsigned long *addrs;
2340 	u64 *cookies;
2341 	u32 cnt;
2342 	u32 mods_cnt;
2343 	struct module **mods;
2344 };
2345 
2346 struct bpf_kprobe_multi_run_ctx {
2347 	struct bpf_session_run_ctx session_ctx;
2348 	struct bpf_kprobe_multi_link *link;
2349 	unsigned long entry_ip;
2350 };
2351 
2352 struct user_syms {
2353 	const char **syms;
2354 	char *buf;
2355 };
2356 
2357 #ifndef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS
2358 static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs);
2359 #define bpf_kprobe_multi_pt_regs_ptr()	this_cpu_ptr(&bpf_kprobe_multi_pt_regs)
2360 #else
2361 #define bpf_kprobe_multi_pt_regs_ptr()	(NULL)
2362 #endif
2363 
2364 static unsigned long ftrace_get_entry_ip(unsigned long fentry_ip)
2365 {
2366 	unsigned long ip = ftrace_get_symaddr(fentry_ip);
2367 
2368 	return ip ? : fentry_ip;
2369 }
2370 
2371 static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 cnt)
2372 {
2373 	unsigned long __user usymbol;
2374 	const char **syms = NULL;
2375 	char *buf = NULL, *p;
2376 	int err = -ENOMEM;
2377 	unsigned int i;
2378 
2379 	syms = kvmalloc_array(cnt, sizeof(*syms), GFP_KERNEL);
2380 	if (!syms)
2381 		goto error;
2382 
2383 	buf = kvmalloc_array(cnt, KSYM_NAME_LEN, GFP_KERNEL);
2384 	if (!buf)
2385 		goto error;
2386 
2387 	for (p = buf, i = 0; i < cnt; i++) {
2388 		if (__get_user(usymbol, usyms + i)) {
2389 			err = -EFAULT;
2390 			goto error;
2391 		}
2392 		err = strncpy_from_user(p, (const char __user *) usymbol, KSYM_NAME_LEN);
2393 		if (err == KSYM_NAME_LEN)
2394 			err = -E2BIG;
2395 		if (err < 0)
2396 			goto error;
2397 		syms[i] = p;
2398 		p += err + 1;
2399 	}
2400 
2401 	us->syms = syms;
2402 	us->buf = buf;
2403 	return 0;
2404 
2405 error:
2406 	if (err) {
2407 		kvfree(syms);
2408 		kvfree(buf);
2409 	}
2410 	return err;
2411 }
2412 
2413 static void kprobe_multi_put_modules(struct module **mods, u32 cnt)
2414 {
2415 	u32 i;
2416 
2417 	for (i = 0; i < cnt; i++)
2418 		module_put(mods[i]);
2419 }
2420 
2421 static void free_user_syms(struct user_syms *us)
2422 {
2423 	kvfree(us->syms);
2424 	kvfree(us->buf);
2425 }
2426 
2427 static void bpf_kprobe_multi_link_release(struct bpf_link *link)
2428 {
2429 	struct bpf_kprobe_multi_link *kmulti_link;
2430 
2431 	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
2432 	/* Don't wait for RCU GP here. */
2433 	unregister_fprobe_async(&kmulti_link->fp);
2434 	kprobe_multi_put_modules(kmulti_link->mods, kmulti_link->mods_cnt);
2435 }
2436 
2437 static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link)
2438 {
2439 	struct bpf_kprobe_multi_link *kmulti_link;
2440 
2441 	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
2442 	kvfree(kmulti_link->addrs);
2443 	kvfree(kmulti_link->cookies);
2444 	kfree(kmulti_link->mods);
2445 	kfree(kmulti_link);
2446 }
2447 
2448 static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
2449 						struct bpf_link_info *info)
2450 {
2451 	u64 __user *ucookies = u64_to_user_ptr(info->kprobe_multi.cookies);
2452 	u64 __user *uaddrs = u64_to_user_ptr(info->kprobe_multi.addrs);
2453 	struct bpf_kprobe_multi_link *kmulti_link;
2454 	u32 ucount = info->kprobe_multi.count;
2455 	int err = 0, i;
2456 
2457 	if (!uaddrs ^ !ucount)
2458 		return -EINVAL;
2459 	if (ucookies && !ucount)
2460 		return -EINVAL;
2461 
2462 	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
2463 	info->kprobe_multi.count = kmulti_link->cnt;
2464 	info->kprobe_multi.flags = kmulti_link->link.flags;
2465 	info->kprobe_multi.missed = kmulti_link->fp.nmissed;
2466 
2467 	if (!uaddrs)
2468 		return 0;
2469 	if (ucount < kmulti_link->cnt)
2470 		err = -ENOSPC;
2471 	else
2472 		ucount = kmulti_link->cnt;
2473 
2474 	if (ucookies) {
2475 		if (kmulti_link->cookies) {
2476 			if (copy_to_user(ucookies, kmulti_link->cookies, ucount * sizeof(u64)))
2477 				return -EFAULT;
2478 		} else {
2479 			for (i = 0; i < ucount; i++) {
2480 				if (put_user(0, ucookies + i))
2481 					return -EFAULT;
2482 			}
2483 		}
2484 	}
2485 
2486 	if (kallsyms_show_value(current_cred())) {
2487 		if (copy_to_user(uaddrs, kmulti_link->addrs, ucount * sizeof(u64)))
2488 			return -EFAULT;
2489 	} else {
2490 		for (i = 0; i < ucount; i++) {
2491 			if (put_user(0, uaddrs + i))
2492 				return -EFAULT;
2493 		}
2494 	}
2495 	return err;
2496 }
2497 
2498 #ifdef CONFIG_PROC_FS
2499 static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link,
2500 					 struct seq_file *seq)
2501 {
2502 	struct bpf_kprobe_multi_link *kmulti_link;
2503 	bool has_cookies;
2504 
2505 	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
2506 	has_cookies = !!kmulti_link->cookies;
2507 
2508 	seq_printf(seq,
2509 		   "kprobe_cnt:\t%u\n"
2510 		   "missed:\t%lu\n",
2511 		   kmulti_link->cnt,
2512 		   kmulti_link->fp.nmissed);
2513 
2514 	seq_printf(seq, "%s\t %s\n", "cookie", "func");
2515 	for (int i = 0; i < kmulti_link->cnt; i++) {
2516 		seq_printf(seq,
2517 			   "%llu\t %pS\n",
2518 			   has_cookies ? kmulti_link->cookies[i] : 0,
2519 			   (void *)kmulti_link->addrs[i]);
2520 	}
2521 }
2522 #endif
2523 
2524 static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
2525 	.release = bpf_kprobe_multi_link_release,
2526 	.dealloc_deferred = bpf_kprobe_multi_link_dealloc,
2527 	.fill_link_info = bpf_kprobe_multi_link_fill_link_info,
2528 #ifdef CONFIG_PROC_FS
2529 	.show_fdinfo = bpf_kprobe_multi_show_fdinfo,
2530 #endif
2531 };
2532 
2533 static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void *priv)
2534 {
2535 	const struct bpf_kprobe_multi_link *link = priv;
2536 	unsigned long *addr_a = a, *addr_b = b;
2537 	u64 *cookie_a, *cookie_b;
2538 
2539 	cookie_a = link->cookies + (addr_a - link->addrs);
2540 	cookie_b = link->cookies + (addr_b - link->addrs);
2541 
2542 	/* swap addr_a/addr_b and cookie_a/cookie_b values */
2543 	swap(*addr_a, *addr_b);
2544 	swap(*cookie_a, *cookie_b);
2545 }
2546 
2547 static int bpf_kprobe_multi_addrs_cmp(const void *a, const void *b)
2548 {
2549 	const unsigned long *addr_a = a, *addr_b = b;
2550 
2551 	if (*addr_a == *addr_b)
2552 		return 0;
2553 	return *addr_a < *addr_b ? -1 : 1;
2554 }
2555 
2556 static int bpf_kprobe_multi_cookie_cmp(const void *a, const void *b, const void *priv)
2557 {
2558 	return bpf_kprobe_multi_addrs_cmp(a, b);
2559 }
2560 
2561 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
2562 {
2563 	struct bpf_kprobe_multi_run_ctx *run_ctx;
2564 	struct bpf_kprobe_multi_link *link;
2565 	u64 *cookie, entry_ip;
2566 	unsigned long *addr;
2567 
2568 	if (WARN_ON_ONCE(!ctx))
2569 		return 0;
2570 	run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx,
2571 			       session_ctx.run_ctx);
2572 	link = run_ctx->link;
2573 	if (!link->cookies)
2574 		return 0;
2575 	entry_ip = run_ctx->entry_ip;
2576 	addr = bsearch(&entry_ip, link->addrs, link->cnt, sizeof(entry_ip),
2577 		       bpf_kprobe_multi_addrs_cmp);
2578 	if (!addr)
2579 		return 0;
2580 	cookie = link->cookies + (addr - link->addrs);
2581 	return *cookie;
2582 }
2583 
2584 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
2585 {
2586 	struct bpf_kprobe_multi_run_ctx *run_ctx;
2587 
2588 	run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx,
2589 			       session_ctx.run_ctx);
2590 	return run_ctx->entry_ip;
2591 }
2592 
2593 static __always_inline int
2594 kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
2595 			   unsigned long entry_ip, struct ftrace_regs *fregs,
2596 			   bool is_return, void *data)
2597 {
2598 	struct bpf_kprobe_multi_run_ctx run_ctx = {
2599 		.session_ctx = {
2600 			.is_return = is_return,
2601 			.data = data,
2602 		},
2603 		.link = link,
2604 		.entry_ip = entry_ip,
2605 	};
2606 	struct bpf_run_ctx *old_run_ctx;
2607 	struct pt_regs *regs;
2608 	int err;
2609 
2610 	/*
2611 	 * graph tracer framework ensures we won't migrate, so there is no need
2612 	 * to use migrate_disable for bpf_prog_run again. The check here just for
2613 	 * __this_cpu_inc_return.
2614 	 */
2615 	cant_sleep();
2616 
2617 	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
2618 		bpf_prog_inc_misses_counter(link->link.prog);
2619 		err = 1;
2620 		goto out;
2621 	}
2622 
2623 	rcu_read_lock();
2624 	regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr());
2625 	old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
2626 	err = bpf_prog_run(link->link.prog, regs);
2627 	bpf_reset_run_ctx(old_run_ctx);
2628 	ftrace_partial_regs_update(fregs, bpf_kprobe_multi_pt_regs_ptr());
2629 	rcu_read_unlock();
2630 
2631  out:
2632 	__this_cpu_dec(bpf_prog_active);
2633 	return err;
2634 }
2635 
2636 static int
2637 kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
2638 			  unsigned long ret_ip, struct ftrace_regs *fregs,
2639 			  void *data)
2640 {
2641 	struct bpf_kprobe_multi_link *link;
2642 	int err;
2643 
2644 	link = container_of(fp, struct bpf_kprobe_multi_link, fp);
2645 	err = kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip),
2646 					 fregs, false, data);
2647 	return is_kprobe_session(link->link.prog) ? err : 0;
2648 }
2649 
2650 static void
2651 kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip,
2652 			       unsigned long ret_ip, struct ftrace_regs *fregs,
2653 			       void *data)
2654 {
2655 	struct bpf_kprobe_multi_link *link;
2656 
2657 	link = container_of(fp, struct bpf_kprobe_multi_link, fp);
2658 	kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip),
2659 				   fregs, true, data);
2660 }
2661 
2662 static int symbols_cmp_r(const void *a, const void *b, const void *priv)
2663 {
2664 	const char **str_a = (const char **) a;
2665 	const char **str_b = (const char **) b;
2666 
2667 	return strcmp(*str_a, *str_b);
2668 }
2669 
2670 struct multi_symbols_sort {
2671 	const char **funcs;
2672 	u64 *cookies;
2673 };
2674 
2675 static void symbols_swap_r(void *a, void *b, int size, const void *priv)
2676 {
2677 	const struct multi_symbols_sort *data = priv;
2678 	const char **name_a = a, **name_b = b;
2679 
2680 	swap(*name_a, *name_b);
2681 
2682 	/* If defined, swap also related cookies. */
2683 	if (data->cookies) {
2684 		u64 *cookie_a, *cookie_b;
2685 
2686 		cookie_a = data->cookies + (name_a - data->funcs);
2687 		cookie_b = data->cookies + (name_b - data->funcs);
2688 		swap(*cookie_a, *cookie_b);
2689 	}
2690 }
2691 
2692 struct modules_array {
2693 	struct module **mods;
2694 	int mods_cnt;
2695 	int mods_cap;
2696 };
2697 
2698 static int add_module(struct modules_array *arr, struct module *mod)
2699 {
2700 	struct module **mods;
2701 
2702 	if (arr->mods_cnt == arr->mods_cap) {
2703 		arr->mods_cap = max(16, arr->mods_cap * 3 / 2);
2704 		mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL);
2705 		if (!mods)
2706 			return -ENOMEM;
2707 		arr->mods = mods;
2708 	}
2709 
2710 	arr->mods[arr->mods_cnt] = mod;
2711 	arr->mods_cnt++;
2712 	return 0;
2713 }
2714 
2715 static bool has_module(struct modules_array *arr, struct module *mod)
2716 {
2717 	int i;
2718 
2719 	for (i = arr->mods_cnt - 1; i >= 0; i--) {
2720 		if (arr->mods[i] == mod)
2721 			return true;
2722 	}
2723 	return false;
2724 }
2725 
2726 static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt)
2727 {
2728 	struct modules_array arr = {};
2729 	u32 i, err = 0;
2730 
2731 	for (i = 0; i < addrs_cnt; i++) {
2732 		bool skip_add = false;
2733 		struct module *mod;
2734 
2735 		scoped_guard(rcu) {
2736 			mod = __module_address(addrs[i]);
2737 			/* Either no module or it's already stored  */
2738 			if (!mod || has_module(&arr, mod)) {
2739 				skip_add = true;
2740 				break; /* scoped_guard */
2741 			}
2742 			if (!try_module_get(mod))
2743 				err = -EINVAL;
2744 		}
2745 		if (skip_add)
2746 			continue;
2747 		if (err)
2748 			break;
2749 		err = add_module(&arr, mod);
2750 		if (err) {
2751 			module_put(mod);
2752 			break;
2753 		}
2754 	}
2755 
2756 	/* We return either err < 0 in case of error, ... */
2757 	if (err) {
2758 		kprobe_multi_put_modules(arr.mods, arr.mods_cnt);
2759 		kfree(arr.mods);
2760 		return err;
2761 	}
2762 
2763 	/* or number of modules found if everything is ok. */
2764 	*mods = arr.mods;
2765 	return arr.mods_cnt;
2766 }
2767 
2768 static int addrs_check_error_injection_list(unsigned long *addrs, u32 cnt)
2769 {
2770 	u32 i;
2771 
2772 	for (i = 0; i < cnt; i++) {
2773 		if (!within_error_injection_list(addrs[i]))
2774 			return -EINVAL;
2775 	}
2776 	return 0;
2777 }
2778 
2779 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
2780 {
2781 	struct bpf_kprobe_multi_link *link = NULL;
2782 	struct bpf_link_primer link_primer;
2783 	void __user *ucookies;
2784 	unsigned long *addrs;
2785 	u32 flags, cnt, size;
2786 	void __user *uaddrs;
2787 	u64 *cookies = NULL;
2788 	void __user *usyms;
2789 	int err;
2790 
2791 	/* no support for 32bit archs yet */
2792 	if (sizeof(u64) != sizeof(void *))
2793 		return -EOPNOTSUPP;
2794 
2795 	if (attr->link_create.flags)
2796 		return -EINVAL;
2797 
2798 	if (!is_kprobe_multi(prog))
2799 		return -EINVAL;
2800 
2801 	/* kprobe_multi is not allowed to be sleepable. */
2802 	if (prog->sleepable)
2803 		return -EINVAL;
2804 
2805 	/* Writing to context is not allowed for kprobes. */
2806 	if (prog->aux->kprobe_write_ctx)
2807 		return -EINVAL;
2808 
2809 	flags = attr->link_create.kprobe_multi.flags;
2810 	if (flags & ~BPF_F_KPROBE_MULTI_RETURN)
2811 		return -EINVAL;
2812 
2813 	uaddrs = u64_to_user_ptr(attr->link_create.kprobe_multi.addrs);
2814 	usyms = u64_to_user_ptr(attr->link_create.kprobe_multi.syms);
2815 	if (!!uaddrs == !!usyms)
2816 		return -EINVAL;
2817 
2818 	cnt = attr->link_create.kprobe_multi.cnt;
2819 	if (!cnt)
2820 		return -EINVAL;
2821 	if (cnt > MAX_KPROBE_MULTI_CNT)
2822 		return -E2BIG;
2823 
2824 	size = cnt * sizeof(*addrs);
2825 	addrs = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);
2826 	if (!addrs)
2827 		return -ENOMEM;
2828 
2829 	ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies);
2830 	if (ucookies) {
2831 		cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);
2832 		if (!cookies) {
2833 			err = -ENOMEM;
2834 			goto error;
2835 		}
2836 		if (copy_from_user(cookies, ucookies, size)) {
2837 			err = -EFAULT;
2838 			goto error;
2839 		}
2840 	}
2841 
2842 	if (uaddrs) {
2843 		if (copy_from_user(addrs, uaddrs, size)) {
2844 			err = -EFAULT;
2845 			goto error;
2846 		}
2847 	} else {
2848 		struct multi_symbols_sort data = {
2849 			.cookies = cookies,
2850 		};
2851 		struct user_syms us;
2852 
2853 		err = copy_user_syms(&us, usyms, cnt);
2854 		if (err)
2855 			goto error;
2856 
2857 		if (cookies)
2858 			data.funcs = us.syms;
2859 
2860 		sort_r(us.syms, cnt, sizeof(*us.syms), symbols_cmp_r,
2861 		       symbols_swap_r, &data);
2862 
2863 		err = ftrace_lookup_symbols(us.syms, cnt, addrs);
2864 		free_user_syms(&us);
2865 		if (err)
2866 			goto error;
2867 	}
2868 
2869 	if (prog->kprobe_override && addrs_check_error_injection_list(addrs, cnt)) {
2870 		err = -EINVAL;
2871 		goto error;
2872 	}
2873 
2874 	link = kzalloc_obj(*link);
2875 	if (!link) {
2876 		err = -ENOMEM;
2877 		goto error;
2878 	}
2879 
2880 	bpf_link_init(&link->link, BPF_LINK_TYPE_KPROBE_MULTI,
2881 		      &bpf_kprobe_multi_link_lops, prog, attr->link_create.attach_type);
2882 
2883 	err = bpf_link_prime(&link->link, &link_primer);
2884 	if (err)
2885 		goto error;
2886 
2887 	if (!(flags & BPF_F_KPROBE_MULTI_RETURN))
2888 		link->fp.entry_handler = kprobe_multi_link_handler;
2889 	if ((flags & BPF_F_KPROBE_MULTI_RETURN) || is_kprobe_session(prog))
2890 		link->fp.exit_handler = kprobe_multi_link_exit_handler;
2891 	if (is_kprobe_session(prog))
2892 		link->fp.entry_data_size = sizeof(u64);
2893 
2894 	link->addrs = addrs;
2895 	link->cookies = cookies;
2896 	link->cnt = cnt;
2897 	link->link.flags = flags;
2898 
2899 	if (cookies) {
2900 		/*
2901 		 * Sorting addresses will trigger sorting cookies as well
2902 		 * (check bpf_kprobe_multi_cookie_swap). This way we can
2903 		 * find cookie based on the address in bpf_get_attach_cookie
2904 		 * helper.
2905 		 */
2906 		sort_r(addrs, cnt, sizeof(*addrs),
2907 		       bpf_kprobe_multi_cookie_cmp,
2908 		       bpf_kprobe_multi_cookie_swap,
2909 		       link);
2910 	}
2911 
2912 	err = get_modules_for_addrs(&link->mods, addrs, cnt);
2913 	if (err < 0) {
2914 		bpf_link_cleanup(&link_primer);
2915 		return err;
2916 	}
2917 	link->mods_cnt = err;
2918 
2919 	err = register_fprobe_ips(&link->fp, addrs, cnt);
2920 	if (err) {
2921 		kprobe_multi_put_modules(link->mods, link->mods_cnt);
2922 		bpf_link_cleanup(&link_primer);
2923 		return err;
2924 	}
2925 
2926 	return bpf_link_settle(&link_primer);
2927 
2928 error:
2929 	kfree(link);
2930 	kvfree(addrs);
2931 	kvfree(cookies);
2932 	return err;
2933 }
2934 #else /* !CONFIG_FPROBE */
2935 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
2936 {
2937 	return -EOPNOTSUPP;
2938 }
2939 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
2940 {
2941 	return 0;
2942 }
2943 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
2944 {
2945 	return 0;
2946 }
2947 #endif
2948 
2949 #ifdef CONFIG_UPROBES
2950 struct bpf_uprobe_multi_link;
2951 
2952 struct bpf_uprobe {
2953 	struct bpf_uprobe_multi_link *link;
2954 	loff_t offset;
2955 	unsigned long ref_ctr_offset;
2956 	u64 cookie;
2957 	struct uprobe *uprobe;
2958 	struct uprobe_consumer consumer;
2959 	bool session;
2960 };
2961 
2962 struct bpf_uprobe_multi_link {
2963 	struct path path;
2964 	struct bpf_link link;
2965 	u32 cnt;
2966 	struct bpf_uprobe *uprobes;
2967 	struct task_struct *task;
2968 };
2969 
2970 struct bpf_uprobe_multi_run_ctx {
2971 	struct bpf_session_run_ctx session_ctx;
2972 	unsigned long entry_ip;
2973 	struct bpf_uprobe *uprobe;
2974 };
2975 
2976 static void bpf_uprobe_unregister(struct bpf_uprobe *uprobes, u32 cnt)
2977 {
2978 	u32 i;
2979 
2980 	for (i = 0; i < cnt; i++)
2981 		uprobe_unregister_nosync(uprobes[i].uprobe, &uprobes[i].consumer);
2982 
2983 	if (cnt)
2984 		uprobe_unregister_sync();
2985 }
2986 
2987 static void bpf_uprobe_multi_link_release(struct bpf_link *link)
2988 {
2989 	struct bpf_uprobe_multi_link *umulti_link;
2990 
2991 	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
2992 	bpf_uprobe_unregister(umulti_link->uprobes, umulti_link->cnt);
2993 	if (umulti_link->task)
2994 		put_task_struct(umulti_link->task);
2995 	path_put(&umulti_link->path);
2996 }
2997 
2998 static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
2999 {
3000 	struct bpf_uprobe_multi_link *umulti_link;
3001 
3002 	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
3003 	kvfree(umulti_link->uprobes);
3004 	kfree(umulti_link);
3005 }
3006 
3007 static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
3008 						struct bpf_link_info *info)
3009 {
3010 	u64 __user *uref_ctr_offsets = u64_to_user_ptr(info->uprobe_multi.ref_ctr_offsets);
3011 	u64 __user *ucookies = u64_to_user_ptr(info->uprobe_multi.cookies);
3012 	u64 __user *uoffsets = u64_to_user_ptr(info->uprobe_multi.offsets);
3013 	u64 __user *upath = u64_to_user_ptr(info->uprobe_multi.path);
3014 	u32 upath_size = info->uprobe_multi.path_size;
3015 	struct bpf_uprobe_multi_link *umulti_link;
3016 	u32 ucount = info->uprobe_multi.count;
3017 	int err = 0, i;
3018 	char *p, *buf;
3019 	long left = 0;
3020 
3021 	if (!upath ^ !upath_size)
3022 		return -EINVAL;
3023 
3024 	if ((uoffsets || uref_ctr_offsets || ucookies) && !ucount)
3025 		return -EINVAL;
3026 
3027 	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
3028 	info->uprobe_multi.count = umulti_link->cnt;
3029 	info->uprobe_multi.flags = umulti_link->link.flags;
3030 	info->uprobe_multi.pid = umulti_link->task ?
3031 				 task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0;
3032 
3033 	upath_size = upath_size ? min_t(u32, upath_size, PATH_MAX) : PATH_MAX;
3034 	buf = kmalloc(upath_size, GFP_KERNEL);
3035 	if (!buf)
3036 		return -ENOMEM;
3037 	p = d_path(&umulti_link->path, buf, upath_size);
3038 	if (IS_ERR(p)) {
3039 		kfree(buf);
3040 		return PTR_ERR(p);
3041 	}
3042 	upath_size = buf + upath_size - p;
3043 
3044 	if (upath)
3045 		left = copy_to_user(upath, p, upath_size);
3046 	kfree(buf);
3047 	if (left)
3048 		return -EFAULT;
3049 	info->uprobe_multi.path_size = upath_size;
3050 
3051 	if (!uoffsets && !ucookies && !uref_ctr_offsets)
3052 		return 0;
3053 
3054 	if (ucount < umulti_link->cnt)
3055 		err = -ENOSPC;
3056 	else
3057 		ucount = umulti_link->cnt;
3058 
3059 	for (i = 0; i < ucount; i++) {
3060 		if (uoffsets &&
3061 		    put_user(umulti_link->uprobes[i].offset, uoffsets + i))
3062 			return -EFAULT;
3063 		if (uref_ctr_offsets &&
3064 		    put_user(umulti_link->uprobes[i].ref_ctr_offset, uref_ctr_offsets + i))
3065 			return -EFAULT;
3066 		if (ucookies &&
3067 		    put_user(umulti_link->uprobes[i].cookie, ucookies + i))
3068 			return -EFAULT;
3069 	}
3070 
3071 	return err;
3072 }
3073 
3074 #ifdef CONFIG_PROC_FS
3075 static void bpf_uprobe_multi_show_fdinfo(const struct bpf_link *link,
3076 					 struct seq_file *seq)
3077 {
3078 	struct bpf_uprobe_multi_link *umulti_link;
3079 	char *p, *buf;
3080 	pid_t pid;
3081 
3082 	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
3083 
3084 	buf = kmalloc(PATH_MAX, GFP_KERNEL);
3085 	if (!buf)
3086 		return;
3087 
3088 	p = d_path(&umulti_link->path, buf, PATH_MAX);
3089 	if (IS_ERR(p)) {
3090 		kfree(buf);
3091 		return;
3092 	}
3093 
3094 	pid = umulti_link->task ?
3095 	      task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0;
3096 	seq_printf(seq,
3097 		   "uprobe_cnt:\t%u\n"
3098 		   "pid:\t%u\n"
3099 		   "path:\t%s\n",
3100 		   umulti_link->cnt, pid, p);
3101 
3102 	seq_printf(seq, "%s\t %s\t %s\n", "cookie", "offset", "ref_ctr_offset");
3103 	for (int i = 0; i < umulti_link->cnt; i++) {
3104 		seq_printf(seq,
3105 			   "%llu\t %#llx\t %#lx\n",
3106 			   umulti_link->uprobes[i].cookie,
3107 			   umulti_link->uprobes[i].offset,
3108 			   umulti_link->uprobes[i].ref_ctr_offset);
3109 	}
3110 
3111 	kfree(buf);
3112 }
3113 #endif
3114 
3115 static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
3116 	.release = bpf_uprobe_multi_link_release,
3117 	.dealloc_deferred = bpf_uprobe_multi_link_dealloc,
3118 	.fill_link_info = bpf_uprobe_multi_link_fill_link_info,
3119 #ifdef CONFIG_PROC_FS
3120 	.show_fdinfo = bpf_uprobe_multi_show_fdinfo,
3121 #endif
3122 };
3123 
3124 static int uprobe_prog_run(struct bpf_uprobe *uprobe,
3125 			   unsigned long entry_ip,
3126 			   struct pt_regs *regs,
3127 			   bool is_return, void *data)
3128 {
3129 	struct bpf_uprobe_multi_link *link = uprobe->link;
3130 	struct bpf_uprobe_multi_run_ctx run_ctx = {
3131 		.session_ctx = {
3132 			.is_return = is_return,
3133 			.data = data,
3134 		},
3135 		.entry_ip = entry_ip,
3136 		.uprobe = uprobe,
3137 	};
3138 	struct bpf_prog *prog = link->link.prog;
3139 	bool sleepable = prog->sleepable;
3140 	struct bpf_run_ctx *old_run_ctx;
3141 	int err;
3142 
3143 	if (link->task && !same_thread_group(current, link->task))
3144 		return 0;
3145 
3146 	if (sleepable)
3147 		rcu_read_lock_trace();
3148 	else
3149 		rcu_read_lock();
3150 
3151 	migrate_disable();
3152 
3153 	old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
3154 	err = bpf_prog_run(link->link.prog, regs);
3155 	bpf_reset_run_ctx(old_run_ctx);
3156 
3157 	migrate_enable();
3158 
3159 	if (sleepable)
3160 		rcu_read_unlock_trace();
3161 	else
3162 		rcu_read_unlock();
3163 	return err;
3164 }
3165 
3166 static bool
3167 uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm)
3168 {
3169 	struct bpf_uprobe *uprobe;
3170 
3171 	uprobe = container_of(con, struct bpf_uprobe, consumer);
3172 	return uprobe->link->task->mm == mm;
3173 }
3174 
3175 static int
3176 uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs,
3177 			  __u64 *data)
3178 {
3179 	struct bpf_uprobe *uprobe;
3180 	int ret;
3181 
3182 	uprobe = container_of(con, struct bpf_uprobe, consumer);
3183 	ret = uprobe_prog_run(uprobe, instruction_pointer(regs), regs, false, data);
3184 	if (uprobe->session)
3185 		return ret ? UPROBE_HANDLER_IGNORE : 0;
3186 	return 0;
3187 }
3188 
3189 static int
3190 uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs,
3191 			      __u64 *data)
3192 {
3193 	struct bpf_uprobe *uprobe;
3194 
3195 	uprobe = container_of(con, struct bpf_uprobe, consumer);
3196 	uprobe_prog_run(uprobe, func, regs, true, data);
3197 	return 0;
3198 }
3199 
3200 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
3201 {
3202 	struct bpf_uprobe_multi_run_ctx *run_ctx;
3203 
3204 	run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx,
3205 			       session_ctx.run_ctx);
3206 	return run_ctx->entry_ip;
3207 }
3208 
3209 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx)
3210 {
3211 	struct bpf_uprobe_multi_run_ctx *run_ctx;
3212 
3213 	run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx,
3214 			       session_ctx.run_ctx);
3215 	return run_ctx->uprobe->cookie;
3216 }
3217 
3218 static int bpf_uprobe_multi_get_path(const union bpf_attr *attr, struct path *path)
3219 {
3220 	void __user *upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path);
3221 	u32 path_fd = attr->link_create.uprobe_multi.path_fd;
3222 	u32 flags = attr->link_create.uprobe_multi.flags;
3223 
3224 	if (flags & BPF_F_UPROBE_MULTI_PATH_FD) {
3225 		/*
3226 		 * When BPF_F_UPROBE_MULTI_PATH_FD is set, the executable is
3227 		 * identified by path_fd, upath must be NULL.
3228 		 */
3229 		if (upath)
3230 			return -EINVAL;
3231 
3232 		CLASS(fd, f)(path_fd);
3233 		if (fd_empty(f))
3234 			return -EBADF;
3235 		*path = fd_file(f)->f_path;
3236 		path_get(path);
3237 		return 0;
3238 	}
3239 
3240 	/*
3241 	 * When BPF_F_UPROBE_MULTI_PATH_FD is not set, the path is resolved
3242 	 * relative to the cwd (AT_FDCWD) or absolute using the upath string.
3243 	 */
3244 	if (!upath || path_fd)
3245 		return -EINVAL;
3246 
3247 	return user_path_at(AT_FDCWD, upath, LOOKUP_FOLLOW, path);
3248 }
3249 
3250 int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
3251 {
3252 	struct bpf_uprobe_multi_link *link = NULL;
3253 	unsigned long __user *uref_ctr_offsets;
3254 	struct bpf_link_primer link_primer;
3255 	struct bpf_uprobe *uprobes = NULL;
3256 	struct task_struct *task = NULL;
3257 	unsigned long __user *uoffsets;
3258 	u64 __user *ucookies;
3259 	unsigned long size;
3260 	u32 flags, cnt, i;
3261 	struct path path;
3262 	pid_t pid;
3263 	int err;
3264 
3265 	/* no support for 32bit archs yet */
3266 	if (sizeof(u64) != sizeof(void *))
3267 		return -EOPNOTSUPP;
3268 
3269 	if (attr->link_create.flags)
3270 		return -EINVAL;
3271 
3272 	if (!is_uprobe_multi(prog))
3273 		return -EINVAL;
3274 
3275 	flags = attr->link_create.uprobe_multi.flags;
3276 	if (flags & ~(BPF_F_UPROBE_MULTI_RETURN | BPF_F_UPROBE_MULTI_PATH_FD))
3277 		return -EINVAL;
3278 
3279 	/*
3280 	 * offsets and cnt are mandatory,
3281 	 * ref_ctr_offsets and cookies are optional
3282 	 */
3283 	uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets);
3284 	cnt = attr->link_create.uprobe_multi.cnt;
3285 	pid = attr->link_create.uprobe_multi.pid;
3286 
3287 	if (!uoffsets || !cnt || pid < 0)
3288 		return -EINVAL;
3289 	if (cnt > MAX_UPROBE_MULTI_CNT)
3290 		return -E2BIG;
3291 
3292 	uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets);
3293 	ucookies = u64_to_user_ptr(attr->link_create.uprobe_multi.cookies);
3294 
3295 	/*
3296 	 * All uoffsets/uref_ctr_offsets/ucookies arrays have the same value
3297 	 * size, we need to check their address range is safe for __get_user
3298 	 * calls.
3299 	 */
3300 	size = sizeof(*uoffsets) * cnt;
3301 	if (!access_ok(uoffsets, size) || !access_ok(uref_ctr_offsets, size) ||
3302 	    !access_ok(ucookies, size))
3303 		return -EFAULT;
3304 
3305 	err = bpf_uprobe_multi_get_path(attr, &path);
3306 	if (err)
3307 		return err;
3308 
3309 	if (!d_is_reg(path.dentry)) {
3310 		err = -EBADF;
3311 		goto error_path_put;
3312 	}
3313 
3314 	if (pid) {
3315 		rcu_read_lock();
3316 		task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
3317 		rcu_read_unlock();
3318 		if (!task) {
3319 			err = -ESRCH;
3320 			goto error_path_put;
3321 		}
3322 	}
3323 
3324 	err = -ENOMEM;
3325 
3326 	link = kzalloc_obj(*link);
3327 	uprobes = kvzalloc_objs(*uprobes, cnt);
3328 
3329 	if (!uprobes || !link)
3330 		goto error_free;
3331 
3332 	for (i = 0; i < cnt; i++) {
3333 		if (__get_user(uprobes[i].offset, uoffsets + i)) {
3334 			err = -EFAULT;
3335 			goto error_free;
3336 		}
3337 		if (uprobes[i].offset < 0) {
3338 			err = -EINVAL;
3339 			goto error_free;
3340 		}
3341 		if (uref_ctr_offsets && __get_user(uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) {
3342 			err = -EFAULT;
3343 			goto error_free;
3344 		}
3345 		if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) {
3346 			err = -EFAULT;
3347 			goto error_free;
3348 		}
3349 
3350 		uprobes[i].link = link;
3351 
3352 		if (!(flags & BPF_F_UPROBE_MULTI_RETURN))
3353 			uprobes[i].consumer.handler = uprobe_multi_link_handler;
3354 		if (flags & BPF_F_UPROBE_MULTI_RETURN || is_uprobe_session(prog))
3355 			uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler;
3356 		if (is_uprobe_session(prog))
3357 			uprobes[i].session = true;
3358 		if (pid)
3359 			uprobes[i].consumer.filter = uprobe_multi_link_filter;
3360 	}
3361 
3362 	link->cnt = cnt;
3363 	link->uprobes = uprobes;
3364 	link->path = path;
3365 	link->task = task;
3366 	link->link.flags = flags;
3367 
3368 	bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI,
3369 		      &bpf_uprobe_multi_link_lops, prog, attr->link_create.attach_type);
3370 
3371 	for (i = 0; i < cnt; i++) {
3372 		uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry),
3373 						    uprobes[i].offset,
3374 						    uprobes[i].ref_ctr_offset,
3375 						    &uprobes[i].consumer);
3376 		if (IS_ERR(uprobes[i].uprobe)) {
3377 			err = PTR_ERR(uprobes[i].uprobe);
3378 			link->cnt = i;
3379 			goto error_unregister;
3380 		}
3381 	}
3382 
3383 	err = bpf_link_prime(&link->link, &link_primer);
3384 	if (err)
3385 		goto error_unregister;
3386 
3387 	return bpf_link_settle(&link_primer);
3388 
3389 error_unregister:
3390 	bpf_uprobe_unregister(uprobes, link->cnt);
3391 
3392 error_free:
3393 	kvfree(uprobes);
3394 	kfree(link);
3395 	if (task)
3396 		put_task_struct(task);
3397 error_path_put:
3398 	path_put(&path);
3399 	return err;
3400 }
3401 #else /* !CONFIG_UPROBES */
3402 int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
3403 {
3404 	return -EOPNOTSUPP;
3405 }
3406 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx)
3407 {
3408 	return 0;
3409 }
3410 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
3411 {
3412 	return 0;
3413 }
3414 #endif /* CONFIG_UPROBES */
3415 
3416 __bpf_kfunc_start_defs();
3417 
3418 __bpf_kfunc bool bpf_session_is_return(void *ctx)
3419 {
3420 	struct bpf_session_run_ctx *session_ctx;
3421 
3422 	session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx);
3423 	return session_ctx->is_return;
3424 }
3425 
3426 __bpf_kfunc __u64 *bpf_session_cookie(void *ctx)
3427 {
3428 	struct bpf_session_run_ctx *session_ctx;
3429 
3430 	session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx);
3431 	return session_ctx->data;
3432 }
3433 
3434 __bpf_kfunc_end_defs();
3435 
3436 BTF_KFUNCS_START(session_kfunc_set_ids)
3437 BTF_ID_FLAGS(func, bpf_session_is_return)
3438 BTF_ID_FLAGS(func, bpf_session_cookie)
3439 BTF_KFUNCS_END(session_kfunc_set_ids)
3440 
3441 static int bpf_session_filter(const struct bpf_prog *prog, u32 kfunc_id)
3442 {
3443 	if (!btf_id_set8_contains(&session_kfunc_set_ids, kfunc_id))
3444 		return 0;
3445 
3446 	if (!is_kprobe_session(prog) && !is_uprobe_session(prog) && !is_trace_fsession(prog))
3447 		return -EACCES;
3448 
3449 	return 0;
3450 }
3451 
3452 static const struct btf_kfunc_id_set bpf_session_kfunc_set = {
3453 	.owner = THIS_MODULE,
3454 	.set = &session_kfunc_set_ids,
3455 	.filter = bpf_session_filter,
3456 };
3457 
3458 static int __init bpf_trace_kfuncs_init(void)
3459 {
3460 	int err = 0;
3461 
3462 	err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_KPROBE, &bpf_session_kfunc_set);
3463 	err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_session_kfunc_set);
3464 
3465 	return err;
3466 }
3467 
3468 late_initcall(bpf_trace_kfuncs_init);
3469 
3470 typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk);
3471 
3472 /*
3473  * The __always_inline is to make sure the compiler doesn't
3474  * generate indirect calls into callbacks, which is expensive,
3475  * on some kernel configurations. This allows compiler to put
3476  * direct calls into all the specific callback implementations
3477  * (copy_user_data_sleepable, copy_user_data_nofault, and so on)
3478  */
3479 static __always_inline int __bpf_dynptr_copy_str(const struct bpf_dynptr *dptr, u64 doff, u64 size,
3480 						 const void *unsafe_src,
3481 						 copy_fn_t str_copy_fn,
3482 						 struct task_struct *tsk)
3483 {
3484 	const struct bpf_dynptr_kern *dst;
3485 	u64 chunk_sz, off;
3486 	void *dst_slice;
3487 	int cnt, err;
3488 	char buf[256];
3489 
3490 	dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
3491 	if (likely(dst_slice))
3492 		return str_copy_fn(dst_slice, unsafe_src, size, tsk);
3493 
3494 	dst = (struct bpf_dynptr_kern *)dptr;
3495 	if (bpf_dynptr_check_off_len(dst, doff, size))
3496 		return -E2BIG;
3497 
3498 	for (off = 0; off < size; off += chunk_sz - 1) {
3499 		chunk_sz = min_t(u64, sizeof(buf), size - off);
3500 		/* Expect str_copy_fn to return count of copied bytes, including
3501 		 * zero terminator. Next iteration increment off by chunk_sz - 1 to
3502 		 * overwrite NUL.
3503 		 */
3504 		cnt = str_copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
3505 		if (cnt < 0)
3506 			return cnt;
3507 		err = __bpf_dynptr_write(dst, doff + off, buf, cnt, 0);
3508 		if (err)
3509 			return err;
3510 		if (cnt < chunk_sz || chunk_sz == 1) /* we are done */
3511 			return off + cnt;
3512 	}
3513 	return off;
3514 }
3515 
3516 static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 doff,
3517 					     u64 size, const void *unsafe_src,
3518 					     copy_fn_t copy_fn, struct task_struct *tsk)
3519 {
3520 	const struct bpf_dynptr_kern *dst;
3521 	void *dst_slice;
3522 	char buf[256];
3523 	u64 off, chunk_sz;
3524 	int err;
3525 
3526 	dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
3527 	if (likely(dst_slice))
3528 		return copy_fn(dst_slice, unsafe_src, size, tsk);
3529 
3530 	dst = (struct bpf_dynptr_kern *)dptr;
3531 	if (bpf_dynptr_check_off_len(dst, doff, size))
3532 		return -E2BIG;
3533 
3534 	for (off = 0; off < size; off += chunk_sz) {
3535 		chunk_sz = min_t(u64, sizeof(buf), size - off);
3536 		err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
3537 		if (err)
3538 			return err;
3539 		err = __bpf_dynptr_write(dst, doff + off, buf, chunk_sz, 0);
3540 		if (err)
3541 			return err;
3542 	}
3543 	return 0;
3544 }
3545 
3546 static __always_inline int copy_user_data_nofault(void *dst, const void *unsafe_src,
3547 						  u32 size, struct task_struct *tsk)
3548 {
3549 	return copy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
3550 }
3551 
3552 static __always_inline int copy_user_data_sleepable(void *dst, const void *unsafe_src,
3553 						    u32 size, struct task_struct *tsk)
3554 {
3555 	int ret;
3556 
3557 	if (!tsk) { /* Read from the current task */
3558 		ret = copy_from_user(dst, (const void __user *)unsafe_src, size);
3559 		if (ret)
3560 			return -EFAULT;
3561 		return 0;
3562 	}
3563 
3564 	ret = access_process_vm(tsk, (unsigned long)unsafe_src, dst, size, 0);
3565 	if (ret != size)
3566 		return -EFAULT;
3567 	return 0;
3568 }
3569 
3570 static __always_inline int copy_kernel_data_nofault(void *dst, const void *unsafe_src,
3571 						    u32 size, struct task_struct *tsk)
3572 {
3573 	return copy_from_kernel_nofault(dst, unsafe_src, size);
3574 }
3575 
3576 static __always_inline int copy_user_str_nofault(void *dst, const void *unsafe_src,
3577 						 u32 size, struct task_struct *tsk)
3578 {
3579 	return strncpy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
3580 }
3581 
3582 static __always_inline int copy_user_str_sleepable(void *dst, const void *unsafe_src,
3583 						   u32 size, struct task_struct *tsk)
3584 {
3585 	int ret;
3586 
3587 	if (unlikely(size == 0))
3588 		return 0;
3589 
3590 	if (tsk) {
3591 		ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_src, dst, size, 0);
3592 	} else {
3593 		ret = strncpy_from_user(dst, (const void __user *)unsafe_src, size - 1);
3594 		/* strncpy_from_user does not guarantee NUL termination */
3595 		if (ret >= 0)
3596 			((char *)dst)[ret] = '\0';
3597 	}
3598 
3599 	if (ret < 0)
3600 		return ret;
3601 	return ret + 1;
3602 }
3603 
3604 static __always_inline int copy_kernel_str_nofault(void *dst, const void *unsafe_src,
3605 						   u32 size, struct task_struct *tsk)
3606 {
3607 	return strncpy_from_kernel_nofault(dst, unsafe_src, size);
3608 }
3609 
3610 __bpf_kfunc_start_defs();
3611 
3612 __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type,
3613 				     u64 value)
3614 {
3615 	if (type != PIDTYPE_PID && type != PIDTYPE_TGID)
3616 		return -EINVAL;
3617 
3618 	return bpf_send_signal_common(sig, type, task, value);
3619 }
3620 
3621 __bpf_kfunc int bpf_probe_read_user_dynptr(const struct bpf_dynptr *dptr, u64 off,
3622 					   u64 size, const void __user *unsafe_ptr__ign)
3623 {
3624 	return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3625 				 copy_user_data_nofault, NULL);
3626 }
3627 
3628 __bpf_kfunc int bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dptr, u64 off,
3629 					     u64 size, const void *unsafe_ptr__ign)
3630 {
3631 	return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign,
3632 				 copy_kernel_data_nofault, NULL);
3633 }
3634 
3635 __bpf_kfunc int bpf_probe_read_user_str_dynptr(const struct bpf_dynptr *dptr, u64 off,
3636 					       u64 size, const void __user *unsafe_ptr__ign)
3637 {
3638 	return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3639 				     copy_user_str_nofault, NULL);
3640 }
3641 
3642 __bpf_kfunc int bpf_probe_read_kernel_str_dynptr(const struct bpf_dynptr *dptr, u64 off,
3643 						 u64 size, const void *unsafe_ptr__ign)
3644 {
3645 	return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign,
3646 				     copy_kernel_str_nofault, NULL);
3647 }
3648 
3649 __bpf_kfunc int bpf_copy_from_user_dynptr(const struct bpf_dynptr *dptr, u64 off,
3650 					  u64 size, const void __user *unsafe_ptr__ign)
3651 {
3652 	return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3653 				 copy_user_data_sleepable, NULL);
3654 }
3655 
3656 __bpf_kfunc int bpf_copy_from_user_str_dynptr(const struct bpf_dynptr *dptr, u64 off,
3657 					      u64 size, const void __user *unsafe_ptr__ign)
3658 {
3659 	return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3660 				     copy_user_str_sleepable, NULL);
3661 }
3662 
3663 __bpf_kfunc int bpf_copy_from_user_task_dynptr(const struct bpf_dynptr *dptr, u64 off,
3664 					       u64 size, const void __user *unsafe_ptr__ign,
3665 					       struct task_struct *tsk)
3666 {
3667 	return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3668 				 copy_user_data_sleepable, tsk);
3669 }
3670 
3671 __bpf_kfunc int bpf_copy_from_user_task_str_dynptr(const struct bpf_dynptr *dptr, u64 off,
3672 						   u64 size, const void __user *unsafe_ptr__ign,
3673 						   struct task_struct *tsk)
3674 {
3675 	return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3676 				     copy_user_str_sleepable, tsk);
3677 }
3678 
3679 __bpf_kfunc_end_defs();
3680 
3681 #if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) && \
3682     defined(CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS)
3683 
3684 static void bpf_tracing_multi_link_release(struct bpf_link *link)
3685 {
3686 	struct bpf_tracing_multi_link *tr_link =
3687 		container_of(link, struct bpf_tracing_multi_link, link);
3688 
3689 	WARN_ON_ONCE(bpf_trampoline_multi_detach(link->prog, tr_link));
3690 }
3691 
3692 static void bpf_tracing_multi_link_dealloc(struct bpf_link *link)
3693 {
3694 	struct bpf_tracing_multi_link *tr_link =
3695 		container_of(link, struct bpf_tracing_multi_link, link);
3696 
3697 	kvfree(tr_link->fexits);
3698 	kvfree(tr_link->cookies);
3699 	kvfree(tr_link);
3700 }
3701 
3702 #ifdef CONFIG_PROC_FS
3703 static void bpf_tracing_multi_show_fdinfo(const struct bpf_link *link,
3704 					  struct seq_file *seq)
3705 {
3706 	struct bpf_tracing_multi_link *tr_link =
3707 		container_of(link, struct bpf_tracing_multi_link, link);
3708 	bool has_cookies = !!tr_link->cookies;
3709 
3710 	seq_printf(seq, "attach_type:\t%u\n", tr_link->link.attach_type);
3711 	seq_printf(seq, "cnt:\t%u\n", tr_link->nodes_cnt);
3712 
3713 	seq_printf(seq, "%s\t %s\t %s\t %s\n", "obj-id", "btf-id", "cookie", "func");
3714 	for (int i = 0; i < tr_link->nodes_cnt; i++) {
3715 		struct bpf_tracing_multi_node *mnode = &tr_link->nodes[i];
3716 		u32 btf_id, obj_id;
3717 
3718 		bpf_trampoline_unpack_key(mnode->trampoline->key, &obj_id, &btf_id);
3719 		seq_printf(seq, "%u\t %u\t %llu\t %pS\n",
3720 			   obj_id, btf_id,
3721 			   has_cookies ? tr_link->cookies[i] : 0,
3722 			   (void *) mnode->trampoline->ip);
3723 
3724 		cond_resched();
3725 	}
3726 }
3727 #endif
3728 
3729 static const struct bpf_link_ops bpf_tracing_multi_link_lops = {
3730 	.release = bpf_tracing_multi_link_release,
3731 	.dealloc_deferred = bpf_tracing_multi_link_dealloc,
3732 #ifdef CONFIG_PROC_FS
3733 	.show_fdinfo = bpf_tracing_multi_show_fdinfo,
3734 #endif
3735 };
3736 
3737 static int ids_cmp_r(const void *pa, const void *pb, const void *priv __maybe_unused)
3738 {
3739 	u32 a = *(u32 *) pa;
3740 	u32 b = *(u32 *) pb;
3741 
3742 	return (a > b) - (a < b);
3743 }
3744 
3745 static void ids_swap_r(void *a, void *b, int size __maybe_unused,
3746 		       const void *priv __maybe_unused)
3747 {
3748 	u64 *cookie_a, *cookie_b, *cookies;
3749 	u32 *id_a = a, *id_b = b, *ids;
3750 	void **data = (void **) priv;
3751 
3752 	ids     = data[0];
3753 	cookies = data[1];
3754 
3755 	if (cookies) {
3756 		cookie_a = cookies + (id_a - ids);
3757 		cookie_b = cookies + (id_b - ids);
3758 		swap(*cookie_a, *cookie_b);
3759 	}
3760 	swap(*id_a, *id_b);
3761 }
3762 
3763 static int check_dup_ids(u32 *ids, u64 *cookies, u32 cnt)
3764 {
3765 	void *data[2] = { ids, cookies };
3766 	int err = 0;
3767 
3768 	/*
3769 	 * Sort ids array (together with cookies array if defined)
3770 	 * and check it for duplicates. The ids and cookies arrays
3771 	 * are left sorted.
3772 	 */
3773 	sort_r_nonatomic(ids, cnt, sizeof(ids[0]), ids_cmp_r, ids_swap_r, data);
3774 
3775 	for (int i = 1; i < cnt; i++) {
3776 		if (ids[i] == ids[i - 1]) {
3777 			err = -EINVAL;
3778 			break;
3779 		}
3780 	}
3781 	return err;
3782 }
3783 
3784 int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
3785 {
3786 	struct bpf_tracing_multi_link *link = NULL;
3787 	struct bpf_tramp_node *fexits = NULL;
3788 	struct bpf_link_primer link_primer;
3789 	u32 cnt, *ids = NULL;
3790 	u64 __user *ucookies;
3791 	u64 *cookies = NULL;
3792 	u32 __user *uids;
3793 	int err;
3794 
3795 	uids = u64_to_user_ptr(attr->link_create.tracing_multi.ids);
3796 	cnt = attr->link_create.tracing_multi.cnt;
3797 
3798 	if (!cnt || !uids)
3799 		return -EINVAL;
3800 	if (cnt > MAX_TRACING_MULTI_CNT)
3801 		return -E2BIG;
3802 	if (attr->link_create.flags || attr->link_create.target_fd)
3803 		return -EINVAL;
3804 
3805 	ids = kvmalloc_objs(*ids, cnt);
3806 	if (!ids)
3807 		return -ENOMEM;
3808 
3809 	if (copy_from_user(ids, uids, cnt * sizeof(*ids))) {
3810 		err = -EFAULT;
3811 		goto error;
3812 	}
3813 
3814 	ucookies = u64_to_user_ptr(attr->link_create.tracing_multi.cookies);
3815 	if (ucookies) {
3816 		cookies = kvmalloc_objs(*cookies, cnt);
3817 		if (!cookies) {
3818 			err = -ENOMEM;
3819 			goto error;
3820 		}
3821 		if (copy_from_user(cookies, ucookies, cnt * sizeof(*cookies))) {
3822 			err = -EFAULT;
3823 			goto error;
3824 		}
3825 	}
3826 
3827 	err = check_dup_ids(ids, cookies, cnt);
3828 	if (err)
3829 		goto error;
3830 
3831 	if (prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) {
3832 		fexits = kvmalloc_objs(*fexits, cnt);
3833 		if (!fexits) {
3834 			err = -ENOMEM;
3835 			goto error;
3836 		}
3837 	}
3838 
3839 	link = kvzalloc_flex(*link, nodes, cnt);
3840 	if (!link) {
3841 		err = -ENOMEM;
3842 		goto error;
3843 	}
3844 
3845 	bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING_MULTI,
3846 		      &bpf_tracing_multi_link_lops, prog, prog->expected_attach_type);
3847 
3848 	err = bpf_link_prime(&link->link, &link_primer);
3849 	if (err)
3850 		goto error;
3851 
3852 	link->nodes_cnt = cnt;
3853 	link->cookies = cookies;
3854 	link->fexits = fexits;
3855 
3856 	err = bpf_trampoline_multi_attach(prog, ids, link);
3857 	kvfree(ids);
3858 	if (err) {
3859 		bpf_link_cleanup(&link_primer);
3860 		return err;
3861 	}
3862 	return bpf_link_settle(&link_primer);
3863 
3864 error:
3865 	kvfree(fexits);
3866 	kvfree(cookies);
3867 	kvfree(ids);
3868 	kvfree(link);
3869 	return err;
3870 }
3871 
3872 #else
3873 
3874 int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
3875 {
3876 	return -EOPNOTSUPP;
3877 }
3878 
3879 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS && CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
3880