xref: /linux/kernel/trace/bpf_trace.c (revision d5dc200c3a3f217de072af269dd90adddf90e48d)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
3  * Copyright (c) 2016 Facebook
4  */
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/slab.h>
8 #include <linux/bpf.h>
9 #include <linux/bpf_verifier.h>
10 #include <linux/bpf_perf_event.h>
11 #include <linux/btf.h>
12 #include <linux/filter.h>
13 #include <linux/uaccess.h>
14 #include <linux/ctype.h>
15 #include <linux/kprobes.h>
16 #include <linux/spinlock.h>
17 #include <linux/syscalls.h>
18 #include <linux/error-injection.h>
19 #include <linux/btf_ids.h>
20 #include <linux/bpf_lsm.h>
21 #include <linux/fprobe.h>
22 #include <linux/bsearch.h>
23 #include <linux/sort.h>
24 #include <linux/key.h>
25 #include <linux/namei.h>
26 #include <linux/file.h>
27 
28 #include <net/bpf_sk_storage.h>
29 
30 #include <uapi/linux/bpf.h>
31 #include <uapi/linux/btf.h>
32 
33 #include <asm/tlb.h>
34 
35 #include "trace_probe.h"
36 #include "trace.h"
37 
38 #define CREATE_TRACE_POINTS
39 #include "bpf_trace.h"
40 
41 #define bpf_event_rcu_dereference(p)					\
42 	rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
43 
44 #define MAX_UPROBE_MULTI_CNT (1U << 20)
45 #define MAX_KPROBE_MULTI_CNT (1U << 20)
46 #define MAX_TRACING_MULTI_CNT (1U << 20)
47 
48 #ifdef CONFIG_MODULES
49 struct bpf_trace_module {
50 	struct module *module;
51 	struct list_head list;
52 };
53 
54 static LIST_HEAD(bpf_trace_modules);
55 static DEFINE_MUTEX(bpf_module_mutex);
56 
57 static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
58 {
59 	struct bpf_raw_event_map *btp, *ret = NULL;
60 	struct bpf_trace_module *btm;
61 	unsigned int i;
62 
63 	mutex_lock(&bpf_module_mutex);
64 	list_for_each_entry(btm, &bpf_trace_modules, list) {
65 		for (i = 0; i < btm->module->num_bpf_raw_events; ++i) {
66 			btp = &btm->module->bpf_raw_events[i];
67 			if (!strcmp(btp->tp->name, name)) {
68 				if (try_module_get(btm->module))
69 					ret = btp;
70 				goto out;
71 			}
72 		}
73 	}
74 out:
75 	mutex_unlock(&bpf_module_mutex);
76 	return ret;
77 }
78 #else
79 static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
80 {
81 	return NULL;
82 }
83 #endif /* CONFIG_MODULES */
84 
85 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
86 u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
87 
88 static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
89 				  u64 flags, const struct btf **btf,
90 				  s32 *btf_id);
91 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx);
92 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx);
93 
94 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx);
95 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx);
96 
97 /**
98  * trace_call_bpf - invoke BPF program
99  * @call: tracepoint event
100  * @ctx: opaque context pointer
101  *
102  * kprobe handlers execute BPF programs via this helper.
103  * Can be used from static tracepoints in the future.
104  *
105  * Return: BPF programs always return an integer which is interpreted by
106  * kprobe handler as:
107  * 0 - return from kprobe (event is filtered out)
108  * 1 - store kprobe event into ring buffer
109  * Other values are reserved and currently alias to 1
110  */
111 unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
112 {
113 	unsigned int ret;
114 
115 	cant_sleep();
116 
117 	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
118 		/*
119 		 * since some bpf program is already running on this cpu,
120 		 * don't call into another bpf program (same or different)
121 		 * and don't send kprobe event into ring-buffer,
122 		 * so return zero here
123 		 */
124 		rcu_read_lock();
125 		bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array));
126 		rcu_read_unlock();
127 		ret = 0;
128 		goto out;
129 	}
130 
131 	/*
132 	 * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
133 	 * to all call sites, we did a bpf_prog_array_valid() there to check
134 	 * whether call->prog_array is empty or not, which is
135 	 * a heuristic to speed up execution.
136 	 *
137 	 * If bpf_prog_array_valid() fetched prog_array was
138 	 * non-NULL, we go into trace_call_bpf() and do the actual
139 	 * proper rcu_dereference() under RCU lock.
140 	 * If it turns out that prog_array is NULL then, we bail out.
141 	 * For the opposite, if the bpf_prog_array_valid() fetched pointer
142 	 * was NULL, you'll skip the prog_array with the risk of missing
143 	 * out of events when it was updated in between this and the
144 	 * rcu_dereference() which is accepted risk.
145 	 */
146 	rcu_read_lock();
147 	ret = bpf_prog_run_array(rcu_dereference(call->prog_array),
148 				 ctx, bpf_prog_run);
149 	rcu_read_unlock();
150 
151  out:
152 	__this_cpu_dec(bpf_prog_active);
153 
154 	return ret;
155 }
156 
157 /**
158  * trace_call_bpf_faultable - invoke BPF program in faultable context
159  * @call: tracepoint event
160  * @ctx: opaque context pointer
161  *
162  * Variant of trace_call_bpf() for faultable tracepoints (syscall
163  * tracepoints). Supports sleepable BPF programs by using rcu_tasks_trace
164  * for lifetime protection and bpf_prog_run_array_sleepable() for per-program
165  * RCU flavor selection, following the uprobe pattern.
166  *
167  * Per-program recursion protection is provided by
168  * bpf_prog_run_array_sleepable(). Global bpf_prog_active is not
169  * needed because syscall tracepoints cannot self-recurse.
170  *
171  * Must be called from a faultable/preemptible context.
172  */
173 unsigned int trace_call_bpf_faultable(struct trace_event_call *call, void *ctx)
174 {
175 	struct bpf_prog_array *prog_array;
176 
177 	might_fault();
178 	guard(rcu_tasks_trace)();
179 
180 	prog_array = rcu_dereference_check(call->prog_array,
181 					   rcu_read_lock_trace_held());
182 	return bpf_prog_run_array_sleepable(prog_array, ctx, bpf_prog_run);
183 }
184 
185 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
186 BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
187 {
188 	regs_set_return_value(regs, rc);
189 	override_function_with_return(regs);
190 	return 0;
191 }
192 
193 static const struct bpf_func_proto bpf_override_return_proto = {
194 	.func		= bpf_override_return,
195 	.gpl_only	= true,
196 	.ret_type	= RET_INTEGER,
197 	.arg1_type	= ARG_PTR_TO_CTX,
198 	.arg2_type	= ARG_ANYTHING,
199 };
200 #endif
201 
202 static __always_inline int
203 bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr)
204 {
205 	int ret;
206 
207 	ret = copy_from_user_nofault(dst, unsafe_ptr, size);
208 	if (unlikely(ret < 0))
209 		memset(dst, 0, size);
210 	return ret;
211 }
212 
213 BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size,
214 	   const void __user *, unsafe_ptr)
215 {
216 	return bpf_probe_read_user_common(dst, size, unsafe_ptr);
217 }
218 
219 const struct bpf_func_proto bpf_probe_read_user_proto = {
220 	.func		= bpf_probe_read_user,
221 	.gpl_only	= true,
222 	.ret_type	= RET_INTEGER,
223 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
224 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
225 	.arg3_type	= ARG_ANYTHING,
226 };
227 
228 static __always_inline int
229 bpf_probe_read_user_str_common(void *dst, u32 size,
230 			       const void __user *unsafe_ptr)
231 {
232 	int ret;
233 
234 	/*
235 	 * NB: We rely on strncpy_from_user() not copying junk past the NUL
236 	 * terminator into `dst`.
237 	 *
238 	 * strncpy_from_user() does long-sized strides in the fast path. If the
239 	 * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
240 	 * then there could be junk after the NUL in `dst`. If user takes `dst`
241 	 * and keys a hash map with it, then semantically identical strings can
242 	 * occupy multiple entries in the map.
243 	 */
244 	ret = strncpy_from_user_nofault(dst, unsafe_ptr, size);
245 	if (unlikely(ret < 0))
246 		memset(dst, 0, size);
247 	return ret;
248 }
249 
250 BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size,
251 	   const void __user *, unsafe_ptr)
252 {
253 	return bpf_probe_read_user_str_common(dst, size, unsafe_ptr);
254 }
255 
256 const struct bpf_func_proto bpf_probe_read_user_str_proto = {
257 	.func		= bpf_probe_read_user_str,
258 	.gpl_only	= true,
259 	.ret_type	= RET_INTEGER,
260 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
261 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
262 	.arg3_type	= ARG_ANYTHING,
263 };
264 
265 BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
266 	   const void *, unsafe_ptr)
267 {
268 	return bpf_probe_read_kernel_common(dst, size, unsafe_ptr);
269 }
270 
271 const struct bpf_func_proto bpf_probe_read_kernel_proto = {
272 	.func		= bpf_probe_read_kernel,
273 	.gpl_only	= true,
274 	.ret_type	= RET_INTEGER,
275 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
276 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
277 	.arg3_type	= ARG_ANYTHING,
278 };
279 
280 static __always_inline int
281 bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr)
282 {
283 	int ret;
284 
285 	/*
286 	 * The strncpy_from_kernel_nofault() call will likely not fill the
287 	 * entire buffer, but that's okay in this circumstance as we're probing
288 	 * arbitrary memory anyway similar to bpf_probe_read_*() and might
289 	 * as well probe the stack. Thus, memory is explicitly cleared
290 	 * only in error case, so that improper users ignoring return
291 	 * code altogether don't copy garbage; otherwise length of string
292 	 * is returned that can be used for bpf_perf_event_output() et al.
293 	 */
294 	ret = strncpy_from_kernel_nofault(dst, unsafe_ptr, size);
295 	if (unlikely(ret < 0))
296 		memset(dst, 0, size);
297 	return ret;
298 }
299 
300 BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size,
301 	   const void *, unsafe_ptr)
302 {
303 	return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr);
304 }
305 
306 const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
307 	.func		= bpf_probe_read_kernel_str,
308 	.gpl_only	= true,
309 	.ret_type	= RET_INTEGER,
310 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
311 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
312 	.arg3_type	= ARG_ANYTHING,
313 };
314 
315 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
316 BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
317 	   const void *, unsafe_ptr)
318 {
319 	if ((unsigned long)unsafe_ptr < TASK_SIZE) {
320 		return bpf_probe_read_user_common(dst, size,
321 				(__force void __user *)unsafe_ptr);
322 	}
323 	return bpf_probe_read_kernel_common(dst, size, unsafe_ptr);
324 }
325 
326 static const struct bpf_func_proto bpf_probe_read_compat_proto = {
327 	.func		= bpf_probe_read_compat,
328 	.gpl_only	= true,
329 	.ret_type	= RET_INTEGER,
330 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
331 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
332 	.arg3_type	= ARG_ANYTHING,
333 };
334 
335 BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size,
336 	   const void *, unsafe_ptr)
337 {
338 	if ((unsigned long)unsafe_ptr < TASK_SIZE) {
339 		return bpf_probe_read_user_str_common(dst, size,
340 				(__force void __user *)unsafe_ptr);
341 	}
342 	return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr);
343 }
344 
345 static const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
346 	.func		= bpf_probe_read_compat_str,
347 	.gpl_only	= true,
348 	.ret_type	= RET_INTEGER,
349 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
350 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
351 	.arg3_type	= ARG_ANYTHING,
352 };
353 #endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */
354 
355 BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
356 	   u32, size)
357 {
358 	/*
359 	 * Ensure we're in user context which is safe for the helper to
360 	 * run. This helper has no business in a kthread.
361 	 *
362 	 * access_ok() should prevent writing to non-user memory, but in
363 	 * some situations (nommu, temporary switch, etc) access_ok() does
364 	 * not provide enough validation, hence the check on KERNEL_DS.
365 	 *
366 	 * nmi_uaccess_okay() ensures the probe is not run in an interim
367 	 * state, when the task or mm are switched. This is specifically
368 	 * required to prevent the use of temporary mm.
369 	 */
370 
371 	if (unlikely(in_interrupt() ||
372 		     current->flags & (PF_KTHREAD | PF_EXITING)))
373 		return -EPERM;
374 	if (unlikely(!nmi_uaccess_okay()))
375 		return -EPERM;
376 
377 	return copy_to_user_nofault(unsafe_ptr, src, size);
378 }
379 
380 static const struct bpf_func_proto bpf_probe_write_user_proto = {
381 	.func		= bpf_probe_write_user,
382 	.gpl_only	= true,
383 	.ret_type	= RET_INTEGER,
384 	.arg1_type	= ARG_ANYTHING,
385 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
386 	.arg3_type	= ARG_CONST_SIZE,
387 };
388 
389 #define MAX_TRACE_PRINTK_VARARGS	3
390 #define BPF_TRACE_PRINTK_SIZE		1024
391 
392 BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
393 	   u64, arg2, u64, arg3)
394 {
395 	u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
396 	struct bpf_bprintf_data data = {
397 		.get_bin_args	= true,
398 		.get_buf	= true,
399 	};
400 	int ret;
401 
402 	ret = bpf_bprintf_prepare(fmt, fmt_size, args,
403 				  MAX_TRACE_PRINTK_VARARGS, &data);
404 	if (ret < 0)
405 		return ret;
406 
407 	ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args);
408 
409 	trace_bpf_trace_printk(data.buf);
410 
411 	bpf_bprintf_cleanup(&data);
412 
413 	return ret;
414 }
415 
416 static const struct bpf_func_proto bpf_trace_printk_proto = {
417 	.func		= bpf_trace_printk,
418 	.gpl_only	= true,
419 	.ret_type	= RET_INTEGER,
420 	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
421 	.arg2_type	= ARG_CONST_SIZE,
422 };
423 
424 static void __set_printk_clr_event(struct work_struct *work)
425 {
426 	/*
427 	 * This program might be calling bpf_trace_printk,
428 	 * so enable the associated bpf_trace/bpf_trace_printk event.
429 	 * Repeat this each time as it is possible a user has
430 	 * disabled bpf_trace_printk events.  By loading a program
431 	 * calling bpf_trace_printk() however the user has expressed
432 	 * the intent to see such events.
433 	 */
434 	if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
435 		pr_warn_ratelimited("could not enable bpf_trace_printk events");
436 }
437 static DECLARE_WORK(set_printk_work, __set_printk_clr_event);
438 
439 const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
440 {
441 	schedule_work(&set_printk_work);
442 	return &bpf_trace_printk_proto;
443 }
444 
445 BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args,
446 	   u32, data_len)
447 {
448 	struct bpf_bprintf_data data = {
449 		.get_bin_args	= true,
450 		.get_buf	= true,
451 	};
452 	int ret, num_args;
453 
454 	if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
455 	    (data_len && !args))
456 		return -EINVAL;
457 	num_args = data_len / 8;
458 
459 	ret = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data);
460 	if (ret < 0)
461 		return ret;
462 
463 	ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args);
464 
465 	trace_bpf_trace_printk(data.buf);
466 
467 	bpf_bprintf_cleanup(&data);
468 
469 	return ret;
470 }
471 
472 static const struct bpf_func_proto bpf_trace_vprintk_proto = {
473 	.func		= bpf_trace_vprintk,
474 	.gpl_only	= true,
475 	.ret_type	= RET_INTEGER,
476 	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
477 	.arg2_type	= ARG_CONST_SIZE,
478 	.arg3_type	= ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
479 	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
480 };
481 
482 const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void)
483 {
484 	schedule_work(&set_printk_work);
485 	return &bpf_trace_vprintk_proto;
486 }
487 
488 BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
489 	   const void *, args, u32, data_len)
490 {
491 	struct bpf_bprintf_data data = {
492 		.get_bin_args	= true,
493 	};
494 	int err, num_args;
495 
496 	if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
497 	    (data_len && !args))
498 		return -EINVAL;
499 	num_args = data_len / 8;
500 
501 	err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data);
502 	if (err < 0)
503 		return err;
504 
505 	seq_bprintf(m, fmt, data.bin_args);
506 
507 	bpf_bprintf_cleanup(&data);
508 
509 	return seq_has_overflowed(m) ? -EOVERFLOW : 0;
510 }
511 
512 BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file)
513 
514 static const struct bpf_func_proto bpf_seq_printf_proto = {
515 	.func		= bpf_seq_printf,
516 	.gpl_only	= true,
517 	.ret_type	= RET_INTEGER,
518 	.arg1_type	= ARG_PTR_TO_BTF_ID,
519 	.arg1_btf_id	= &btf_seq_file_ids[0],
520 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
521 	.arg3_type	= ARG_CONST_SIZE,
522 	.arg4_type      = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
523 	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
524 };
525 
526 BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len)
527 {
528 	return seq_write(m, data, len) ? -EOVERFLOW : 0;
529 }
530 
531 static const struct bpf_func_proto bpf_seq_write_proto = {
532 	.func		= bpf_seq_write,
533 	.gpl_only	= true,
534 	.ret_type	= RET_INTEGER,
535 	.arg1_type	= ARG_PTR_TO_BTF_ID,
536 	.arg1_btf_id	= &btf_seq_file_ids[0],
537 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
538 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
539 };
540 
541 BPF_CALL_4(bpf_seq_printf_btf, struct seq_file *, m, struct btf_ptr *, ptr,
542 	   u32, btf_ptr_size, u64, flags)
543 {
544 	const struct btf *btf;
545 	s32 btf_id;
546 	int ret;
547 
548 	ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
549 	if (ret)
550 		return ret;
551 
552 	return btf_type_seq_show_flags(btf, btf_id, ptr->ptr, m, flags);
553 }
554 
555 static const struct bpf_func_proto bpf_seq_printf_btf_proto = {
556 	.func		= bpf_seq_printf_btf,
557 	.gpl_only	= true,
558 	.ret_type	= RET_INTEGER,
559 	.arg1_type	= ARG_PTR_TO_BTF_ID,
560 	.arg1_btf_id	= &btf_seq_file_ids[0],
561 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
562 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
563 	.arg4_type	= ARG_ANYTHING,
564 };
565 
566 static __always_inline int
567 get_map_perf_counter(struct bpf_map *map, u64 flags,
568 		     u64 *value, u64 *enabled, u64 *running)
569 {
570 	struct bpf_array *array = container_of(map, struct bpf_array, map);
571 	unsigned int cpu = smp_processor_id();
572 	u64 index = flags & BPF_F_INDEX_MASK;
573 	struct bpf_event_entry *ee;
574 
575 	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
576 		return -EINVAL;
577 	if (index == BPF_F_CURRENT_CPU)
578 		index = cpu;
579 	if (unlikely(index >= array->map.max_entries))
580 		return -E2BIG;
581 
582 	ee = READ_ONCE(array->ptrs[index]);
583 	if (!ee)
584 		return -ENOENT;
585 
586 	return perf_event_read_local(ee->event, value, enabled, running);
587 }
588 
589 BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
590 {
591 	u64 value = 0;
592 	int err;
593 
594 	err = get_map_perf_counter(map, flags, &value, NULL, NULL);
595 	/*
596 	 * this api is ugly since we miss [-22..-2] range of valid
597 	 * counter values, but that's uapi
598 	 */
599 	if (err)
600 		return err;
601 	return value;
602 }
603 
604 const struct bpf_func_proto bpf_perf_event_read_proto = {
605 	.func		= bpf_perf_event_read,
606 	.gpl_only	= true,
607 	.ret_type	= RET_INTEGER,
608 	.arg1_type	= ARG_CONST_MAP_PTR,
609 	.arg2_type	= ARG_ANYTHING,
610 };
611 
612 BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags,
613 	   struct bpf_perf_event_value *, buf, u32, size)
614 {
615 	int err = -EINVAL;
616 
617 	if (unlikely(size != sizeof(struct bpf_perf_event_value)))
618 		goto clear;
619 	err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled,
620 				   &buf->running);
621 	if (unlikely(err))
622 		goto clear;
623 	return 0;
624 clear:
625 	memset(buf, 0, size);
626 	return err;
627 }
628 
629 static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
630 	.func		= bpf_perf_event_read_value,
631 	.gpl_only	= true,
632 	.ret_type	= RET_INTEGER,
633 	.arg1_type	= ARG_CONST_MAP_PTR,
634 	.arg2_type	= ARG_ANYTHING,
635 	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
636 	.arg4_type	= ARG_CONST_SIZE,
637 };
638 
639 const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void)
640 {
641 	return &bpf_perf_event_read_value_proto;
642 }
643 
644 static __always_inline u64
645 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
646 			u64 flags, struct perf_raw_record *raw,
647 			struct perf_sample_data *sd)
648 {
649 	struct bpf_array *array = container_of(map, struct bpf_array, map);
650 	unsigned int cpu = smp_processor_id();
651 	u64 index = flags & BPF_F_INDEX_MASK;
652 	struct bpf_event_entry *ee;
653 	struct perf_event *event;
654 
655 	if (index == BPF_F_CURRENT_CPU)
656 		index = cpu;
657 	if (unlikely(index >= array->map.max_entries))
658 		return -E2BIG;
659 
660 	ee = READ_ONCE(array->ptrs[index]);
661 	if (!ee)
662 		return -ENOENT;
663 
664 	event = ee->event;
665 	if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
666 		     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
667 		return -EINVAL;
668 
669 	if (unlikely(event->oncpu != cpu))
670 		return -EOPNOTSUPP;
671 
672 	perf_sample_save_raw_data(sd, event, raw);
673 
674 	return perf_event_output(event, sd, regs);
675 }
676 
677 /*
678  * Support executing tracepoints in normal, irq, and nmi context that each call
679  * bpf_perf_event_output
680  */
681 struct bpf_trace_sample_data {
682 	struct perf_sample_data sds[3];
683 };
684 
685 static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds);
686 static DEFINE_PER_CPU(int, bpf_trace_nest_level);
687 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
688 	   u64, flags, void *, data, u64, size)
689 {
690 	struct bpf_trace_sample_data *sds;
691 	struct perf_raw_record raw = {
692 		.frag = {
693 			.size = size,
694 			.data = data,
695 		},
696 	};
697 	struct perf_sample_data *sd;
698 	int nest_level, err;
699 
700 	preempt_disable();
701 	sds = this_cpu_ptr(&bpf_trace_sds);
702 	nest_level = this_cpu_inc_return(bpf_trace_nest_level);
703 
704 	if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
705 		err = -EBUSY;
706 		goto out;
707 	}
708 
709 	sd = &sds->sds[nest_level - 1];
710 
711 	if (unlikely(flags & ~(BPF_F_INDEX_MASK))) {
712 		err = -EINVAL;
713 		goto out;
714 	}
715 
716 	perf_sample_data_init(sd, 0, 0);
717 
718 	err = __bpf_perf_event_output(regs, map, flags, &raw, sd);
719 out:
720 	this_cpu_dec(bpf_trace_nest_level);
721 	preempt_enable();
722 	return err;
723 }
724 
725 static const struct bpf_func_proto bpf_perf_event_output_proto = {
726 	.func		= bpf_perf_event_output,
727 	.gpl_only	= true,
728 	.ret_type	= RET_INTEGER,
729 	.arg1_type	= ARG_PTR_TO_CTX,
730 	.arg2_type	= ARG_CONST_MAP_PTR,
731 	.arg3_type	= ARG_ANYTHING,
732 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
733 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
734 };
735 
736 static DEFINE_PER_CPU(int, bpf_event_output_nest_level);
737 struct bpf_nested_pt_regs {
738 	struct pt_regs regs[3];
739 };
740 static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs);
741 static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds);
742 
743 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
744 		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
745 {
746 	struct perf_raw_frag frag = {
747 		.copy		= ctx_copy,
748 		.size		= ctx_size,
749 		.data		= ctx,
750 	};
751 	struct perf_raw_record raw = {
752 		.frag = {
753 			{
754 				.next	= ctx_size ? &frag : NULL,
755 			},
756 			.size	= meta_size,
757 			.data	= meta,
758 		},
759 	};
760 	struct perf_sample_data *sd;
761 	struct pt_regs *regs;
762 	int nest_level;
763 	u64 ret;
764 
765 	preempt_disable();
766 	nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
767 
768 	if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
769 		ret = -EBUSY;
770 		goto out;
771 	}
772 	sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]);
773 	regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]);
774 
775 	perf_fetch_caller_regs(regs);
776 	perf_sample_data_init(sd, 0, 0);
777 
778 	ret = __bpf_perf_event_output(regs, map, flags, &raw, sd);
779 out:
780 	this_cpu_dec(bpf_event_output_nest_level);
781 	preempt_enable();
782 	return ret;
783 }
784 
785 BPF_CALL_0(bpf_get_current_task)
786 {
787 	return (long) current;
788 }
789 
790 const struct bpf_func_proto bpf_get_current_task_proto = {
791 	.func		= bpf_get_current_task,
792 	.gpl_only	= true,
793 	.ret_type	= RET_INTEGER,
794 };
795 
796 BPF_CALL_0(bpf_get_current_task_btf)
797 {
798 	return (unsigned long) current;
799 }
800 
801 const struct bpf_func_proto bpf_get_current_task_btf_proto = {
802 	.func		= bpf_get_current_task_btf,
803 	.gpl_only	= true,
804 	.ret_type	= RET_PTR_TO_BTF_ID_TRUSTED,
805 	.ret_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
806 };
807 
808 BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task)
809 {
810 	return (unsigned long) task_pt_regs(task);
811 }
812 
813 BTF_ID_LIST_SINGLE(bpf_task_pt_regs_ids, struct, pt_regs)
814 
815 const struct bpf_func_proto bpf_task_pt_regs_proto = {
816 	.func		= bpf_task_pt_regs,
817 	.gpl_only	= true,
818 	.arg1_type	= ARG_PTR_TO_BTF_ID,
819 	.arg1_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
820 	.ret_type	= RET_PTR_TO_BTF_ID,
821 	.ret_btf_id	= &bpf_task_pt_regs_ids[0],
822 };
823 
824 struct send_signal_irq_work {
825 	struct irq_work irq_work;
826 	struct task_struct *task;
827 	u32 sig;
828 	enum pid_type type;
829 	bool has_siginfo;
830 	struct kernel_siginfo info;
831 };
832 
833 static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
834 
835 static void do_bpf_send_signal(struct irq_work *entry)
836 {
837 	struct send_signal_irq_work *work;
838 	struct kernel_siginfo *siginfo;
839 
840 	work = container_of(entry, struct send_signal_irq_work, irq_work);
841 	siginfo = work->has_siginfo ? &work->info : SEND_SIG_PRIV;
842 
843 	group_send_sig_info(work->sig, siginfo, work->task, work->type);
844 	put_task_struct(work->task);
845 }
846 
847 static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struct *task, u64 value)
848 {
849 	struct send_signal_irq_work *work = NULL;
850 	struct kernel_siginfo info;
851 	struct kernel_siginfo *siginfo;
852 
853 	if (!task) {
854 		task = current;
855 		siginfo = SEND_SIG_PRIV;
856 	} else {
857 		clear_siginfo(&info);
858 		info.si_signo = sig;
859 		info.si_errno = 0;
860 		info.si_code = SI_KERNEL;
861 		info.si_pid = 0;
862 		info.si_uid = 0;
863 		info.si_value.sival_ptr = (void __user __force *)(unsigned long)value;
864 		siginfo = &info;
865 	}
866 
867 	/* Similar to bpf_probe_write_user, task needs to be
868 	 * in a sound condition and kernel memory access be
869 	 * permitted in order to send signal to the current
870 	 * task.
871 	 */
872 	if (unlikely(task->flags & (PF_KTHREAD | PF_EXITING)))
873 		return -EPERM;
874 	if (unlikely(!nmi_uaccess_okay()))
875 		return -EPERM;
876 	/* Task should not be pid=1 to avoid kernel panic. */
877 	if (unlikely(is_global_init(task)))
878 		return -EPERM;
879 
880 	if (preempt_count() != 0 || irqs_disabled()) {
881 		/* Do an early check on signal validity. Otherwise,
882 		 * the error is lost in deferred irq_work.
883 		 */
884 		if (unlikely(!valid_signal(sig)))
885 			return -EINVAL;
886 
887 		work = this_cpu_ptr(&send_signal_work);
888 		if (irq_work_is_busy(&work->irq_work))
889 			return -EBUSY;
890 
891 		/* Add the current task, which is the target of sending signal,
892 		 * to the irq_work. The current task may change when queued
893 		 * irq works get executed.
894 		 */
895 		work->task = get_task_struct(task);
896 		work->has_siginfo = siginfo == &info;
897 		if (work->has_siginfo)
898 			copy_siginfo(&work->info, &info);
899 		work->sig = sig;
900 		work->type = type;
901 		irq_work_queue(&work->irq_work);
902 		return 0;
903 	}
904 
905 	return group_send_sig_info(sig, siginfo, task, type);
906 }
907 
908 BPF_CALL_1(bpf_send_signal, u32, sig)
909 {
910 	return bpf_send_signal_common(sig, PIDTYPE_TGID, NULL, 0);
911 }
912 
913 const struct bpf_func_proto bpf_send_signal_proto = {
914 	.func		= bpf_send_signal,
915 	.gpl_only	= false,
916 	.ret_type	= RET_INTEGER,
917 	.arg1_type	= ARG_ANYTHING,
918 };
919 
920 BPF_CALL_1(bpf_send_signal_thread, u32, sig)
921 {
922 	return bpf_send_signal_common(sig, PIDTYPE_PID, NULL, 0);
923 }
924 
925 const struct bpf_func_proto bpf_send_signal_thread_proto = {
926 	.func		= bpf_send_signal_thread,
927 	.gpl_only	= false,
928 	.ret_type	= RET_INTEGER,
929 	.arg1_type	= ARG_ANYTHING,
930 };
931 
932 BPF_CALL_3(bpf_d_path, const struct path *, path, char *, buf, u32, sz)
933 {
934 	struct path copy;
935 	long len;
936 	char *p;
937 
938 	if (!sz)
939 		return 0;
940 
941 	/*
942 	 * The path pointer is verified as trusted and safe to use,
943 	 * but let's double check it's valid anyway to workaround
944 	 * potentially broken verifier.
945 	 */
946 	len = copy_from_kernel_nofault(&copy, path, sizeof(*path));
947 	if (len < 0)
948 		return len;
949 
950 	p = d_path(&copy, buf, sz);
951 	if (IS_ERR(p)) {
952 		len = PTR_ERR(p);
953 	} else {
954 		len = buf + sz - p;
955 		memmove(buf, p, len);
956 	}
957 
958 	return len;
959 }
960 
961 BTF_SET_START(btf_allowlist_d_path)
962 #ifdef CONFIG_SECURITY
963 BTF_ID(func, security_file_permission)
964 BTF_ID(func, security_inode_getattr)
965 BTF_ID(func, security_file_open)
966 #endif
967 #ifdef CONFIG_SECURITY_PATH
968 BTF_ID(func, security_path_truncate)
969 #endif
970 BTF_ID(func, vfs_truncate)
971 BTF_ID(func, vfs_fallocate)
972 BTF_ID(func, dentry_open)
973 BTF_ID(func, vfs_getattr)
974 BTF_ID(func, filp_close)
975 BTF_SET_END(btf_allowlist_d_path)
976 
977 static bool bpf_d_path_allowed(const struct bpf_prog *prog)
978 {
979 	if (prog->type == BPF_PROG_TYPE_TRACING &&
980 	    prog->expected_attach_type == BPF_TRACE_ITER)
981 		return true;
982 
983 	if (prog->type == BPF_PROG_TYPE_LSM)
984 		return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id);
985 
986 	return btf_id_set_contains(&btf_allowlist_d_path,
987 				   prog->aux->attach_btf_id);
988 }
989 
990 BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path)
991 
992 static const struct bpf_func_proto bpf_d_path_proto = {
993 	.func		= bpf_d_path,
994 	.gpl_only	= false,
995 	.ret_type	= RET_INTEGER,
996 	.arg1_type	= ARG_PTR_TO_BTF_ID,
997 	.arg1_btf_id	= &bpf_d_path_btf_ids[0],
998 	.arg2_type	= ARG_PTR_TO_MEM | MEM_WRITE,
999 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
1000 	.allowed	= bpf_d_path_allowed,
1001 };
1002 
1003 #define BTF_F_ALL	(BTF_F_COMPACT  | BTF_F_NONAME | \
1004 			 BTF_F_PTR_RAW | BTF_F_ZERO)
1005 
1006 static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
1007 				  u64 flags, const struct btf **btf,
1008 				  s32 *btf_id)
1009 {
1010 	const struct btf_type *t;
1011 
1012 	if (unlikely(flags & ~(BTF_F_ALL)))
1013 		return -EINVAL;
1014 
1015 	if (btf_ptr_size != sizeof(struct btf_ptr))
1016 		return -EINVAL;
1017 
1018 	*btf = bpf_get_btf_vmlinux();
1019 
1020 	if (IS_ERR_OR_NULL(*btf))
1021 		return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL;
1022 
1023 	if (ptr->type_id > 0)
1024 		*btf_id = ptr->type_id;
1025 	else
1026 		return -EINVAL;
1027 
1028 	if (*btf_id > 0)
1029 		t = btf_type_by_id(*btf, *btf_id);
1030 	if (*btf_id <= 0 || !t)
1031 		return -ENOENT;
1032 
1033 	return 0;
1034 }
1035 
1036 BPF_CALL_5(bpf_snprintf_btf, char *, str, u32, str_size, struct btf_ptr *, ptr,
1037 	   u32, btf_ptr_size, u64, flags)
1038 {
1039 	const struct btf *btf;
1040 	s32 btf_id;
1041 	int ret;
1042 
1043 	ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
1044 	if (ret)
1045 		return ret;
1046 
1047 	return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size,
1048 				      flags);
1049 }
1050 
1051 const struct bpf_func_proto bpf_snprintf_btf_proto = {
1052 	.func		= bpf_snprintf_btf,
1053 	.gpl_only	= false,
1054 	.ret_type	= RET_INTEGER,
1055 	.arg1_type	= ARG_PTR_TO_MEM | MEM_WRITE,
1056 	.arg2_type	= ARG_CONST_SIZE,
1057 	.arg3_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
1058 	.arg4_type	= ARG_CONST_SIZE,
1059 	.arg5_type	= ARG_ANYTHING,
1060 };
1061 
1062 BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx)
1063 {
1064 	/* This helper call is inlined by verifier. */
1065 	return ((u64 *)ctx)[-2];
1066 }
1067 
1068 static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = {
1069 	.func		= bpf_get_func_ip_tracing,
1070 	.gpl_only	= true,
1071 	.ret_type	= RET_INTEGER,
1072 	.arg1_type	= ARG_PTR_TO_CTX,
1073 };
1074 
1075 static inline unsigned long get_entry_ip(unsigned long fentry_ip)
1076 {
1077 #ifdef CONFIG_X86_KERNEL_IBT
1078 	if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE)))
1079 		fentry_ip -= ENDBR_INSN_SIZE;
1080 #endif
1081 	return fentry_ip;
1082 }
1083 
1084 BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs)
1085 {
1086 	struct bpf_trace_run_ctx *run_ctx __maybe_unused;
1087 	struct kprobe *kp;
1088 
1089 #ifdef CONFIG_UPROBES
1090 	run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
1091 	if (run_ctx->is_uprobe)
1092 		return ((struct uprobe_dispatch_data *)current->utask->vaddr)->bp_addr;
1093 #endif
1094 
1095 	kp = kprobe_running();
1096 
1097 	if (!kp || !(kp->flags & KPROBE_FLAG_ON_FUNC_ENTRY))
1098 		return 0;
1099 
1100 	return get_entry_ip((uintptr_t)kp->addr);
1101 }
1102 
1103 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = {
1104 	.func		= bpf_get_func_ip_kprobe,
1105 	.gpl_only	= true,
1106 	.ret_type	= RET_INTEGER,
1107 	.arg1_type	= ARG_PTR_TO_CTX,
1108 };
1109 
1110 BPF_CALL_1(bpf_get_func_ip_kprobe_multi, struct pt_regs *, regs)
1111 {
1112 	return bpf_kprobe_multi_entry_ip(current->bpf_ctx);
1113 }
1114 
1115 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe_multi = {
1116 	.func		= bpf_get_func_ip_kprobe_multi,
1117 	.gpl_only	= false,
1118 	.ret_type	= RET_INTEGER,
1119 	.arg1_type	= ARG_PTR_TO_CTX,
1120 };
1121 
1122 BPF_CALL_1(bpf_get_attach_cookie_kprobe_multi, struct pt_regs *, regs)
1123 {
1124 	return bpf_kprobe_multi_cookie(current->bpf_ctx);
1125 }
1126 
1127 static const struct bpf_func_proto bpf_get_attach_cookie_proto_kmulti = {
1128 	.func		= bpf_get_attach_cookie_kprobe_multi,
1129 	.gpl_only	= false,
1130 	.ret_type	= RET_INTEGER,
1131 	.arg1_type	= ARG_PTR_TO_CTX,
1132 };
1133 
1134 BPF_CALL_1(bpf_get_func_ip_uprobe_multi, struct pt_regs *, regs)
1135 {
1136 	return bpf_uprobe_multi_entry_ip(current->bpf_ctx);
1137 }
1138 
1139 static const struct bpf_func_proto bpf_get_func_ip_proto_uprobe_multi = {
1140 	.func		= bpf_get_func_ip_uprobe_multi,
1141 	.gpl_only	= false,
1142 	.ret_type	= RET_INTEGER,
1143 	.arg1_type	= ARG_PTR_TO_CTX,
1144 };
1145 
1146 BPF_CALL_1(bpf_get_attach_cookie_uprobe_multi, struct pt_regs *, regs)
1147 {
1148 	return bpf_uprobe_multi_cookie(current->bpf_ctx);
1149 }
1150 
1151 static const struct bpf_func_proto bpf_get_attach_cookie_proto_umulti = {
1152 	.func		= bpf_get_attach_cookie_uprobe_multi,
1153 	.gpl_only	= false,
1154 	.ret_type	= RET_INTEGER,
1155 	.arg1_type	= ARG_PTR_TO_CTX,
1156 };
1157 
1158 BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx)
1159 {
1160 	struct bpf_trace_run_ctx *run_ctx;
1161 
1162 	run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
1163 	return run_ctx->bpf_cookie;
1164 }
1165 
1166 static const struct bpf_func_proto bpf_get_attach_cookie_proto_trace = {
1167 	.func		= bpf_get_attach_cookie_trace,
1168 	.gpl_only	= false,
1169 	.ret_type	= RET_INTEGER,
1170 	.arg1_type	= ARG_PTR_TO_CTX,
1171 };
1172 
1173 BPF_CALL_1(bpf_get_attach_cookie_pe, struct bpf_perf_event_data_kern *, ctx)
1174 {
1175 	return ctx->event->bpf_cookie;
1176 }
1177 
1178 static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = {
1179 	.func		= bpf_get_attach_cookie_pe,
1180 	.gpl_only	= false,
1181 	.ret_type	= RET_INTEGER,
1182 	.arg1_type	= ARG_PTR_TO_CTX,
1183 };
1184 
1185 BPF_CALL_1(bpf_get_attach_cookie_tracing, void *, ctx)
1186 {
1187 	struct bpf_trace_run_ctx *run_ctx;
1188 
1189 	run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
1190 	return run_ctx->bpf_cookie;
1191 }
1192 
1193 static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing = {
1194 	.func		= bpf_get_attach_cookie_tracing,
1195 	.gpl_only	= false,
1196 	.ret_type	= RET_INTEGER,
1197 	.arg1_type	= ARG_PTR_TO_CTX,
1198 };
1199 
1200 BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
1201 {
1202 	static const u32 br_entry_size = sizeof(struct perf_branch_entry);
1203 	u32 entry_cnt = size / br_entry_size;
1204 
1205 	entry_cnt = static_call(perf_snapshot_branch_stack)(buf, entry_cnt);
1206 
1207 	if (unlikely(flags))
1208 		return -EINVAL;
1209 
1210 	if (!entry_cnt)
1211 		return -ENOENT;
1212 
1213 	return entry_cnt * br_entry_size;
1214 }
1215 
1216 const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
1217 	.func		= bpf_get_branch_snapshot,
1218 	.gpl_only	= true,
1219 	.ret_type	= RET_INTEGER,
1220 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
1221 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
1222 };
1223 
1224 BPF_CALL_3(get_func_arg, void *, ctx, u32, n, u64 *, value)
1225 {
1226 	/* This helper call is inlined by verifier. */
1227 	u64 nr_args = ((u64 *)ctx)[-1] & 0xFF;
1228 
1229 	if ((u64) n >= nr_args)
1230 		return -EINVAL;
1231 	*value = ((u64 *)ctx)[n];
1232 	return 0;
1233 }
1234 
1235 static const struct bpf_func_proto bpf_get_func_arg_proto = {
1236 	.func		= get_func_arg,
1237 	.ret_type	= RET_INTEGER,
1238 	.arg1_type	= ARG_PTR_TO_CTX,
1239 	.arg2_type	= ARG_ANYTHING,
1240 	.arg3_type	= ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
1241 	.arg3_size	= sizeof(u64),
1242 };
1243 
1244 BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value)
1245 {
1246 	/* This helper call is inlined by verifier. */
1247 	u64 nr_args = ((u64 *)ctx)[-1] & 0xFF;
1248 
1249 	*value = ((u64 *)ctx)[nr_args];
1250 	return 0;
1251 }
1252 
1253 static const struct bpf_func_proto bpf_get_func_ret_proto = {
1254 	.func		= get_func_ret,
1255 	.ret_type	= RET_INTEGER,
1256 	.arg1_type	= ARG_PTR_TO_CTX,
1257 	.arg2_type	= ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
1258 	.arg2_size	= sizeof(u64),
1259 };
1260 
1261 BPF_CALL_1(get_func_arg_cnt, void *, ctx)
1262 {
1263 	/* This helper call is inlined by verifier. */
1264 	return ((u64 *)ctx)[-1] & 0xFF;
1265 }
1266 
1267 static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = {
1268 	.func		= get_func_arg_cnt,
1269 	.ret_type	= RET_INTEGER,
1270 	.arg1_type	= ARG_PTR_TO_CTX,
1271 };
1272 
1273 static const struct bpf_func_proto *
1274 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1275 {
1276 	const struct bpf_func_proto *func_proto;
1277 
1278 	switch (func_id) {
1279 	case BPF_FUNC_get_smp_processor_id:
1280 		return &bpf_get_smp_processor_id_proto;
1281 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
1282 	case BPF_FUNC_probe_read:
1283 		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1284 		       NULL : &bpf_probe_read_compat_proto;
1285 	case BPF_FUNC_probe_read_str:
1286 		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1287 		       NULL : &bpf_probe_read_compat_str_proto;
1288 #endif
1289 	case BPF_FUNC_get_func_ip:
1290 		return &bpf_get_func_ip_proto_tracing;
1291 	default:
1292 		break;
1293 	}
1294 
1295 	func_proto = bpf_base_func_proto(func_id, prog);
1296 	if (func_proto)
1297 		return func_proto;
1298 
1299 	if (!bpf_token_capable(prog->aux->token, CAP_SYS_ADMIN))
1300 		return NULL;
1301 
1302 	switch (func_id) {
1303 	case BPF_FUNC_probe_write_user:
1304 		return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ?
1305 		       NULL : &bpf_probe_write_user_proto;
1306 	default:
1307 		return NULL;
1308 	}
1309 }
1310 
1311 static bool is_kprobe_multi(const struct bpf_prog *prog)
1312 {
1313 	return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ||
1314 	       prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION;
1315 }
1316 
1317 static inline bool is_kprobe_session(const struct bpf_prog *prog)
1318 {
1319 	return prog->type == BPF_PROG_TYPE_KPROBE &&
1320 	       prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION;
1321 }
1322 
1323 static inline bool is_uprobe_multi(const struct bpf_prog *prog)
1324 {
1325 	return prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI ||
1326 	       prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
1327 }
1328 
1329 static inline bool is_uprobe_session(const struct bpf_prog *prog)
1330 {
1331 	return prog->type == BPF_PROG_TYPE_KPROBE &&
1332 	       prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
1333 }
1334 
1335 static inline bool is_trace_fsession(const struct bpf_prog *prog)
1336 {
1337 	return prog->type == BPF_PROG_TYPE_TRACING &&
1338 	       (prog->expected_attach_type == BPF_TRACE_FSESSION ||
1339 		prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI);
1340 }
1341 
1342 static const struct bpf_func_proto *
1343 kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1344 {
1345 	switch (func_id) {
1346 	case BPF_FUNC_perf_event_output:
1347 		return &bpf_perf_event_output_proto;
1348 	case BPF_FUNC_get_stackid:
1349 		return &bpf_get_stackid_proto;
1350 	case BPF_FUNC_get_stack:
1351 		return prog->sleepable ? &bpf_get_stack_sleepable_proto : &bpf_get_stack_proto;
1352 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
1353 	case BPF_FUNC_override_return:
1354 		return &bpf_override_return_proto;
1355 #endif
1356 	case BPF_FUNC_get_func_ip:
1357 		if (is_kprobe_multi(prog))
1358 			return &bpf_get_func_ip_proto_kprobe_multi;
1359 		if (is_uprobe_multi(prog))
1360 			return &bpf_get_func_ip_proto_uprobe_multi;
1361 		return &bpf_get_func_ip_proto_kprobe;
1362 	case BPF_FUNC_get_attach_cookie:
1363 		if (is_kprobe_multi(prog))
1364 			return &bpf_get_attach_cookie_proto_kmulti;
1365 		if (is_uprobe_multi(prog))
1366 			return &bpf_get_attach_cookie_proto_umulti;
1367 		return &bpf_get_attach_cookie_proto_trace;
1368 	default:
1369 		return bpf_tracing_func_proto(func_id, prog);
1370 	}
1371 }
1372 
1373 /* bpf+kprobe programs can access fields of 'struct pt_regs' */
1374 static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
1375 					const struct bpf_prog *prog,
1376 					struct bpf_insn_access_aux *info)
1377 {
1378 	if (off < 0 || off >= sizeof(struct pt_regs))
1379 		return false;
1380 	if (off % size != 0)
1381 		return false;
1382 	/*
1383 	 * Assertion for 32 bit to make sure last 8 byte access
1384 	 * (BPF_DW) to the last 4 byte member is disallowed.
1385 	 */
1386 	if (off + size > sizeof(struct pt_regs))
1387 		return false;
1388 
1389 	if (type == BPF_WRITE)
1390 		prog->aux->kprobe_write_ctx = true;
1391 
1392 	return true;
1393 }
1394 
1395 const struct bpf_verifier_ops kprobe_verifier_ops = {
1396 	.get_func_proto  = kprobe_prog_func_proto,
1397 	.is_valid_access = kprobe_prog_is_valid_access,
1398 };
1399 
1400 const struct bpf_prog_ops kprobe_prog_ops = {
1401 };
1402 
1403 BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
1404 	   u64, flags, void *, data, u64, size)
1405 {
1406 	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
1407 
1408 	/*
1409 	 * r1 points to perf tracepoint buffer where first 8 bytes are hidden
1410 	 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
1411 	 * from there and call the same bpf_perf_event_output() helper inline.
1412 	 */
1413 	return ____bpf_perf_event_output(regs, map, flags, data, size);
1414 }
1415 
1416 static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
1417 	.func		= bpf_perf_event_output_tp,
1418 	.gpl_only	= true,
1419 	.ret_type	= RET_INTEGER,
1420 	.arg1_type	= ARG_PTR_TO_CTX,
1421 	.arg2_type	= ARG_CONST_MAP_PTR,
1422 	.arg3_type	= ARG_ANYTHING,
1423 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
1424 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
1425 };
1426 
1427 BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
1428 	   u64, flags)
1429 {
1430 	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
1431 
1432 	/*
1433 	 * Same comment as in bpf_perf_event_output_tp(), only that this time
1434 	 * the other helper's function body cannot be inlined due to being
1435 	 * external, thus we need to call raw helper function.
1436 	 */
1437 	return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
1438 			       flags, 0, 0);
1439 }
1440 
1441 static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
1442 	.func		= bpf_get_stackid_tp,
1443 	.gpl_only	= true,
1444 	.ret_type	= RET_INTEGER,
1445 	.arg1_type	= ARG_PTR_TO_CTX,
1446 	.arg2_type	= ARG_CONST_MAP_PTR,
1447 	.arg3_type	= ARG_ANYTHING,
1448 };
1449 
1450 BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size,
1451 	   u64, flags)
1452 {
1453 	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
1454 
1455 	return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
1456 			     (unsigned long) size, flags, 0);
1457 }
1458 
1459 static const struct bpf_func_proto bpf_get_stack_proto_tp = {
1460 	.func		= bpf_get_stack_tp,
1461 	.gpl_only	= true,
1462 	.ret_type	= RET_INTEGER,
1463 	.arg1_type	= ARG_PTR_TO_CTX,
1464 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
1465 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
1466 	.arg4_type	= ARG_ANYTHING,
1467 };
1468 
1469 static const struct bpf_func_proto *
1470 tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1471 {
1472 	switch (func_id) {
1473 	case BPF_FUNC_perf_event_output:
1474 		return &bpf_perf_event_output_proto_tp;
1475 	case BPF_FUNC_get_stackid:
1476 		return &bpf_get_stackid_proto_tp;
1477 	case BPF_FUNC_get_stack:
1478 		return &bpf_get_stack_proto_tp;
1479 	case BPF_FUNC_get_attach_cookie:
1480 		return &bpf_get_attach_cookie_proto_trace;
1481 	default:
1482 		return bpf_tracing_func_proto(func_id, prog);
1483 	}
1484 }
1485 
1486 static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
1487 				    const struct bpf_prog *prog,
1488 				    struct bpf_insn_access_aux *info)
1489 {
1490 	if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
1491 		return false;
1492 	if (type != BPF_READ)
1493 		return false;
1494 	if (off % size != 0)
1495 		return false;
1496 
1497 	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
1498 	return true;
1499 }
1500 
1501 const struct bpf_verifier_ops tracepoint_verifier_ops = {
1502 	.get_func_proto  = tp_prog_func_proto,
1503 	.is_valid_access = tp_prog_is_valid_access,
1504 };
1505 
1506 const struct bpf_prog_ops tracepoint_prog_ops = {
1507 };
1508 
1509 BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx,
1510 	   struct bpf_perf_event_value *, buf, u32, size)
1511 {
1512 	int err = -EINVAL;
1513 
1514 	if (unlikely(size != sizeof(struct bpf_perf_event_value)))
1515 		goto clear;
1516 	err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled,
1517 				    &buf->running);
1518 	if (unlikely(err))
1519 		goto clear;
1520 	return 0;
1521 clear:
1522 	memset(buf, 0, size);
1523 	return err;
1524 }
1525 
1526 static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
1527          .func           = bpf_perf_prog_read_value,
1528          .gpl_only       = true,
1529          .ret_type       = RET_INTEGER,
1530          .arg1_type      = ARG_PTR_TO_CTX,
1531          .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
1532          .arg3_type      = ARG_CONST_SIZE,
1533 };
1534 
1535 BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx,
1536 	   void *, buf, u32, size, u64, flags)
1537 {
1538 	static const u32 br_entry_size = sizeof(struct perf_branch_entry);
1539 	struct perf_branch_stack *br_stack = ctx->data->br_stack;
1540 	u32 to_copy;
1541 
1542 	if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE))
1543 		return -EINVAL;
1544 
1545 	if (unlikely(!(ctx->data->sample_flags & PERF_SAMPLE_BRANCH_STACK)))
1546 		return -ENOENT;
1547 
1548 	if (unlikely(!br_stack))
1549 		return -ENOENT;
1550 
1551 	if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE)
1552 		return br_stack->nr * br_entry_size;
1553 
1554 	if (!buf || (size % br_entry_size != 0))
1555 		return -EINVAL;
1556 
1557 	to_copy = min_t(u32, br_stack->nr * br_entry_size, size);
1558 	memcpy(buf, br_stack->entries, to_copy);
1559 
1560 	return to_copy;
1561 }
1562 
1563 static const struct bpf_func_proto bpf_read_branch_records_proto = {
1564 	.func           = bpf_read_branch_records,
1565 	.gpl_only       = true,
1566 	.ret_type       = RET_INTEGER,
1567 	.arg1_type      = ARG_PTR_TO_CTX,
1568 	.arg2_type      = ARG_PTR_TO_MEM_OR_NULL | MEM_WRITE,
1569 	.arg3_type      = ARG_CONST_SIZE_OR_ZERO,
1570 	.arg4_type      = ARG_ANYTHING,
1571 };
1572 
1573 static const struct bpf_func_proto *
1574 pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1575 {
1576 	switch (func_id) {
1577 	case BPF_FUNC_perf_event_output:
1578 		return &bpf_perf_event_output_proto_tp;
1579 	case BPF_FUNC_get_stackid:
1580 		return &bpf_get_stackid_proto_pe;
1581 	case BPF_FUNC_get_stack:
1582 		return &bpf_get_stack_proto_pe;
1583 	case BPF_FUNC_perf_prog_read_value:
1584 		return &bpf_perf_prog_read_value_proto;
1585 	case BPF_FUNC_read_branch_records:
1586 		return &bpf_read_branch_records_proto;
1587 	case BPF_FUNC_get_attach_cookie:
1588 		return &bpf_get_attach_cookie_proto_pe;
1589 	default:
1590 		return bpf_tracing_func_proto(func_id, prog);
1591 	}
1592 }
1593 
1594 /*
1595  * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
1596  * to avoid potential recursive reuse issue when/if tracepoints are added
1597  * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack.
1598  *
1599  * Since raw tracepoints run despite bpf_prog_active, support concurrent usage
1600  * in normal, irq, and nmi context.
1601  */
1602 struct bpf_raw_tp_regs {
1603 	struct pt_regs regs[3];
1604 };
1605 static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs);
1606 static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level);
1607 static struct pt_regs *get_bpf_raw_tp_regs(void)
1608 {
1609 	struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs);
1610 	int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level);
1611 
1612 	if (nest_level > ARRAY_SIZE(tp_regs->regs)) {
1613 		this_cpu_dec(bpf_raw_tp_nest_level);
1614 		return ERR_PTR(-EBUSY);
1615 	}
1616 
1617 	return &tp_regs->regs[nest_level - 1];
1618 }
1619 
1620 static void put_bpf_raw_tp_regs(void)
1621 {
1622 	this_cpu_dec(bpf_raw_tp_nest_level);
1623 }
1624 
1625 BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
1626 	   struct bpf_map *, map, u64, flags, void *, data, u64, size)
1627 {
1628 	struct pt_regs *regs = get_bpf_raw_tp_regs();
1629 	int ret;
1630 
1631 	if (IS_ERR(regs))
1632 		return PTR_ERR(regs);
1633 
1634 	perf_fetch_caller_regs(regs);
1635 	ret = ____bpf_perf_event_output(regs, map, flags, data, size);
1636 
1637 	put_bpf_raw_tp_regs();
1638 	return ret;
1639 }
1640 
1641 static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
1642 	.func		= bpf_perf_event_output_raw_tp,
1643 	.gpl_only	= true,
1644 	.ret_type	= RET_INTEGER,
1645 	.arg1_type	= ARG_PTR_TO_CTX,
1646 	.arg2_type	= ARG_CONST_MAP_PTR,
1647 	.arg3_type	= ARG_ANYTHING,
1648 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
1649 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
1650 };
1651 
1652 extern const struct bpf_func_proto bpf_skb_output_proto;
1653 extern const struct bpf_func_proto bpf_xdp_output_proto;
1654 extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
1655 
1656 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
1657 	   struct bpf_map *, map, u64, flags)
1658 {
1659 	struct pt_regs *regs = get_bpf_raw_tp_regs();
1660 	int ret;
1661 
1662 	if (IS_ERR(regs))
1663 		return PTR_ERR(regs);
1664 
1665 	perf_fetch_caller_regs(regs);
1666 	/* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
1667 	ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map,
1668 			      flags, 0, 0);
1669 	put_bpf_raw_tp_regs();
1670 	return ret;
1671 }
1672 
1673 static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
1674 	.func		= bpf_get_stackid_raw_tp,
1675 	.gpl_only	= true,
1676 	.ret_type	= RET_INTEGER,
1677 	.arg1_type	= ARG_PTR_TO_CTX,
1678 	.arg2_type	= ARG_CONST_MAP_PTR,
1679 	.arg3_type	= ARG_ANYTHING,
1680 };
1681 
1682 BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
1683 	   void *, buf, u32, size, u64, flags)
1684 {
1685 	struct pt_regs *regs = get_bpf_raw_tp_regs();
1686 	int ret;
1687 
1688 	if (IS_ERR(regs))
1689 		return PTR_ERR(regs);
1690 
1691 	perf_fetch_caller_regs(regs);
1692 	ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf,
1693 			    (unsigned long) size, flags, 0);
1694 	put_bpf_raw_tp_regs();
1695 	return ret;
1696 }
1697 
1698 static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
1699 	.func		= bpf_get_stack_raw_tp,
1700 	.gpl_only	= true,
1701 	.ret_type	= RET_INTEGER,
1702 	.arg1_type	= ARG_PTR_TO_CTX,
1703 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
1704 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
1705 	.arg4_type	= ARG_ANYTHING,
1706 };
1707 
1708 static const struct bpf_func_proto *
1709 raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1710 {
1711 	switch (func_id) {
1712 	case BPF_FUNC_perf_event_output:
1713 		return &bpf_perf_event_output_proto_raw_tp;
1714 	case BPF_FUNC_get_stackid:
1715 		return &bpf_get_stackid_proto_raw_tp;
1716 	case BPF_FUNC_get_stack:
1717 		return &bpf_get_stack_proto_raw_tp;
1718 	case BPF_FUNC_get_attach_cookie:
1719 		return &bpf_get_attach_cookie_proto_tracing;
1720 	default:
1721 		return bpf_tracing_func_proto(func_id, prog);
1722 	}
1723 }
1724 
1725 const struct bpf_func_proto *
1726 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1727 {
1728 	const struct bpf_func_proto *fn;
1729 
1730 	switch (func_id) {
1731 #ifdef CONFIG_NET
1732 	case BPF_FUNC_skb_output:
1733 		return &bpf_skb_output_proto;
1734 	case BPF_FUNC_xdp_output:
1735 		return &bpf_xdp_output_proto;
1736 	case BPF_FUNC_skc_to_tcp6_sock:
1737 		return &bpf_skc_to_tcp6_sock_proto;
1738 	case BPF_FUNC_skc_to_tcp_sock:
1739 		return &bpf_skc_to_tcp_sock_proto;
1740 	case BPF_FUNC_skc_to_tcp_timewait_sock:
1741 		return &bpf_skc_to_tcp_timewait_sock_proto;
1742 	case BPF_FUNC_skc_to_tcp_request_sock:
1743 		return &bpf_skc_to_tcp_request_sock_proto;
1744 	case BPF_FUNC_skc_to_udp6_sock:
1745 		return &bpf_skc_to_udp6_sock_proto;
1746 	case BPF_FUNC_skc_to_unix_sock:
1747 		return &bpf_skc_to_unix_sock_proto;
1748 	case BPF_FUNC_skc_to_mptcp_sock:
1749 		return &bpf_skc_to_mptcp_sock_proto;
1750 	case BPF_FUNC_sk_storage_get:
1751 		return &bpf_sk_storage_get_tracing_proto;
1752 	case BPF_FUNC_sk_storage_delete:
1753 		return &bpf_sk_storage_delete_tracing_proto;
1754 	case BPF_FUNC_sock_from_file:
1755 		return &bpf_sock_from_file_proto;
1756 	case BPF_FUNC_get_socket_cookie:
1757 		return &bpf_get_socket_ptr_cookie_proto;
1758 	case BPF_FUNC_xdp_get_buff_len:
1759 		return &bpf_xdp_get_buff_len_trace_proto;
1760 #endif
1761 	case BPF_FUNC_seq_printf:
1762 		return prog->expected_attach_type == BPF_TRACE_ITER ?
1763 		       &bpf_seq_printf_proto :
1764 		       NULL;
1765 	case BPF_FUNC_seq_write:
1766 		return prog->expected_attach_type == BPF_TRACE_ITER ?
1767 		       &bpf_seq_write_proto :
1768 		       NULL;
1769 	case BPF_FUNC_seq_printf_btf:
1770 		return prog->expected_attach_type == BPF_TRACE_ITER ?
1771 		       &bpf_seq_printf_btf_proto :
1772 		       NULL;
1773 	case BPF_FUNC_d_path:
1774 		return &bpf_d_path_proto;
1775 	case BPF_FUNC_get_func_arg:
1776 		if (bpf_prog_has_trampoline(prog) ||
1777 		    prog->expected_attach_type == BPF_TRACE_RAW_TP)
1778 			return &bpf_get_func_arg_proto;
1779 		return NULL;
1780 	case BPF_FUNC_get_func_ret:
1781 		return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL;
1782 	case BPF_FUNC_get_func_arg_cnt:
1783 		if (bpf_prog_has_trampoline(prog) ||
1784 		    prog->expected_attach_type == BPF_TRACE_RAW_TP)
1785 			return &bpf_get_func_arg_cnt_proto;
1786 		return NULL;
1787 	case BPF_FUNC_get_attach_cookie:
1788 		if (prog->type == BPF_PROG_TYPE_TRACING &&
1789 		    prog->expected_attach_type == BPF_TRACE_RAW_TP)
1790 			return &bpf_get_attach_cookie_proto_tracing;
1791 		return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto_tracing : NULL;
1792 	default:
1793 		fn = raw_tp_prog_func_proto(func_id, prog);
1794 		if (!fn && prog->expected_attach_type == BPF_TRACE_ITER)
1795 			fn = bpf_iter_get_func_proto(func_id, prog);
1796 		return fn;
1797 	}
1798 }
1799 
1800 static bool raw_tp_prog_is_valid_access(int off, int size,
1801 					enum bpf_access_type type,
1802 					const struct bpf_prog *prog,
1803 					struct bpf_insn_access_aux *info)
1804 {
1805 	return bpf_tracing_ctx_access(off, size, type);
1806 }
1807 
1808 static bool tracing_prog_is_valid_access(int off, int size,
1809 					 enum bpf_access_type type,
1810 					 const struct bpf_prog *prog,
1811 					 struct bpf_insn_access_aux *info)
1812 {
1813 	return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
1814 }
1815 
1816 int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog,
1817 				     const union bpf_attr *kattr,
1818 				     union bpf_attr __user *uattr)
1819 {
1820 	return -ENOTSUPP;
1821 }
1822 
1823 const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
1824 	.get_func_proto  = raw_tp_prog_func_proto,
1825 	.is_valid_access = raw_tp_prog_is_valid_access,
1826 };
1827 
1828 const struct bpf_prog_ops raw_tracepoint_prog_ops = {
1829 #ifdef CONFIG_NET
1830 	.test_run = bpf_prog_test_run_raw_tp,
1831 #endif
1832 };
1833 
1834 const struct bpf_verifier_ops tracing_verifier_ops = {
1835 	.get_func_proto  = tracing_prog_func_proto,
1836 	.is_valid_access = tracing_prog_is_valid_access,
1837 };
1838 
1839 const struct bpf_prog_ops tracing_prog_ops = {
1840 	.test_run = bpf_prog_test_run_tracing,
1841 };
1842 
1843 static bool raw_tp_writable_prog_is_valid_access(int off, int size,
1844 						 enum bpf_access_type type,
1845 						 const struct bpf_prog *prog,
1846 						 struct bpf_insn_access_aux *info)
1847 {
1848 	if (off == 0) {
1849 		if (size != sizeof(u64) || type != BPF_READ)
1850 			return false;
1851 		info->reg_type = PTR_TO_TP_BUFFER;
1852 	}
1853 	return raw_tp_prog_is_valid_access(off, size, type, prog, info);
1854 }
1855 
1856 const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = {
1857 	.get_func_proto  = raw_tp_prog_func_proto,
1858 	.is_valid_access = raw_tp_writable_prog_is_valid_access,
1859 };
1860 
1861 const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = {
1862 };
1863 
1864 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
1865 				    const struct bpf_prog *prog,
1866 				    struct bpf_insn_access_aux *info)
1867 {
1868 	const int size_u64 = sizeof(u64);
1869 
1870 	if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
1871 		return false;
1872 	if (type != BPF_READ)
1873 		return false;
1874 	if (off % size != 0) {
1875 		if (sizeof(unsigned long) != 4)
1876 			return false;
1877 		if (size != 8)
1878 			return false;
1879 		if (off % size != 4)
1880 			return false;
1881 	}
1882 
1883 	switch (off) {
1884 	case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
1885 		bpf_ctx_record_field_size(info, size_u64);
1886 		if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
1887 			return false;
1888 		break;
1889 	case bpf_ctx_range(struct bpf_perf_event_data, addr):
1890 		bpf_ctx_record_field_size(info, size_u64);
1891 		if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
1892 			return false;
1893 		break;
1894 	default:
1895 		if (size != sizeof(long))
1896 			return false;
1897 	}
1898 
1899 	return true;
1900 }
1901 
1902 static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
1903 				      const struct bpf_insn *si,
1904 				      struct bpf_insn *insn_buf,
1905 				      struct bpf_prog *prog, u32 *target_size)
1906 {
1907 	struct bpf_insn *insn = insn_buf;
1908 
1909 	switch (si->off) {
1910 	case offsetof(struct bpf_perf_event_data, sample_period):
1911 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
1912 						       data), si->dst_reg, si->src_reg,
1913 				      offsetof(struct bpf_perf_event_data_kern, data));
1914 		*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
1915 				      bpf_target_off(struct perf_sample_data, period, 8,
1916 						     target_size));
1917 		break;
1918 	case offsetof(struct bpf_perf_event_data, addr):
1919 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
1920 						       data), si->dst_reg, si->src_reg,
1921 				      offsetof(struct bpf_perf_event_data_kern, data));
1922 		*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
1923 				      bpf_target_off(struct perf_sample_data, addr, 8,
1924 						     target_size));
1925 		break;
1926 	default:
1927 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
1928 						       regs), si->dst_reg, si->src_reg,
1929 				      offsetof(struct bpf_perf_event_data_kern, regs));
1930 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg,
1931 				      si->off);
1932 		break;
1933 	}
1934 
1935 	return insn - insn_buf;
1936 }
1937 
1938 const struct bpf_verifier_ops perf_event_verifier_ops = {
1939 	.get_func_proto		= pe_prog_func_proto,
1940 	.is_valid_access	= pe_prog_is_valid_access,
1941 	.convert_ctx_access	= pe_prog_convert_ctx_access,
1942 };
1943 
1944 const struct bpf_prog_ops perf_event_prog_ops = {
1945 };
1946 
1947 static DEFINE_MUTEX(bpf_event_mutex);
1948 
1949 #define BPF_TRACE_MAX_PROGS 64
1950 
1951 int perf_event_attach_bpf_prog(struct perf_event *event,
1952 			       struct bpf_prog *prog,
1953 			       u64 bpf_cookie)
1954 {
1955 	struct bpf_prog_array *old_array;
1956 	struct bpf_prog_array *new_array;
1957 	int ret = -EEXIST;
1958 
1959 	/*
1960 	 * Kprobe override only works if they are on the function entry,
1961 	 * and only if they are on the opt-in list.
1962 	 */
1963 	if (prog->kprobe_override &&
1964 	    (!trace_kprobe_on_func_entry(event->tp_event) ||
1965 	     !trace_kprobe_error_injectable(event->tp_event)))
1966 		return -EINVAL;
1967 
1968 	mutex_lock(&bpf_event_mutex);
1969 
1970 	if (event->prog)
1971 		goto unlock;
1972 
1973 	old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
1974 	if (old_array &&
1975 	    bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) {
1976 		ret = -E2BIG;
1977 		goto unlock;
1978 	}
1979 
1980 	ret = bpf_prog_array_copy(old_array, NULL, prog, bpf_cookie, &new_array);
1981 	if (ret < 0)
1982 		goto unlock;
1983 
1984 	/* set the new array to event->tp_event and set event->prog */
1985 	event->prog = prog;
1986 	event->bpf_cookie = bpf_cookie;
1987 	rcu_assign_pointer(event->tp_event->prog_array, new_array);
1988 	bpf_prog_array_free_sleepable(old_array);
1989 
1990 unlock:
1991 	mutex_unlock(&bpf_event_mutex);
1992 	return ret;
1993 }
1994 
1995 void perf_event_detach_bpf_prog(struct perf_event *event)
1996 {
1997 	struct bpf_prog_array *old_array;
1998 	struct bpf_prog_array *new_array;
1999 	struct bpf_prog *prog = NULL;
2000 	int ret;
2001 
2002 	mutex_lock(&bpf_event_mutex);
2003 
2004 	if (!event->prog)
2005 		goto unlock;
2006 
2007 	old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
2008 	if (!old_array)
2009 		goto put;
2010 
2011 	ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array);
2012 	if (ret < 0) {
2013 		bpf_prog_array_delete_safe(old_array, event->prog);
2014 	} else {
2015 		rcu_assign_pointer(event->tp_event->prog_array, new_array);
2016 		bpf_prog_array_free_sleepable(old_array);
2017 	}
2018 
2019 put:
2020 	prog = event->prog;
2021 	event->prog = NULL;
2022 
2023 unlock:
2024 	mutex_unlock(&bpf_event_mutex);
2025 
2026 	if (prog) {
2027 		/*
2028 		 * It could be that the bpf_prog is not sleepable (and will be freed
2029 		 * via normal RCU), but is called from a point that supports sleepable
2030 		 * programs and uses tasks-trace-RCU.
2031 		 */
2032 		synchronize_rcu_tasks_trace();
2033 
2034 		bpf_prog_put(prog);
2035 	}
2036 }
2037 
2038 int perf_event_query_prog_array(struct perf_event *event, void __user *info)
2039 {
2040 	struct perf_event_query_bpf __user *uquery = info;
2041 	struct perf_event_query_bpf query = {};
2042 	struct bpf_prog_array *progs;
2043 	u32 *ids, prog_cnt, ids_len;
2044 	int ret;
2045 
2046 	if (!perfmon_capable())
2047 		return -EPERM;
2048 	if (event->attr.type != PERF_TYPE_TRACEPOINT)
2049 		return -EINVAL;
2050 	if (copy_from_user(&query, uquery, sizeof(query)))
2051 		return -EFAULT;
2052 
2053 	ids_len = query.ids_len;
2054 	if (ids_len > BPF_TRACE_MAX_PROGS)
2055 		return -E2BIG;
2056 	ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN);
2057 	if (!ids)
2058 		return -ENOMEM;
2059 	/*
2060 	 * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which
2061 	 * is required when user only wants to check for uquery->prog_cnt.
2062 	 * There is no need to check for it since the case is handled
2063 	 * gracefully in bpf_prog_array_copy_info.
2064 	 */
2065 
2066 	mutex_lock(&bpf_event_mutex);
2067 	progs = bpf_event_rcu_dereference(event->tp_event->prog_array);
2068 	ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt);
2069 	mutex_unlock(&bpf_event_mutex);
2070 
2071 	if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) ||
2072 	    copy_to_user(uquery->ids, ids, ids_len * sizeof(u32)))
2073 		ret = -EFAULT;
2074 
2075 	kfree(ids);
2076 	return ret;
2077 }
2078 
2079 extern struct bpf_raw_event_map __start__bpf_raw_tp[];
2080 extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
2081 
2082 struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name)
2083 {
2084 	struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
2085 
2086 	for (; btp < __stop__bpf_raw_tp; btp++) {
2087 		if (!strcmp(btp->tp->name, name))
2088 			return btp;
2089 	}
2090 
2091 	return bpf_get_raw_tracepoint_module(name);
2092 }
2093 
2094 void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
2095 {
2096 	struct module *mod;
2097 
2098 	guard(rcu)();
2099 	mod = __module_address((unsigned long)btp);
2100 	module_put(mod);
2101 }
2102 
2103 static __always_inline
2104 void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args)
2105 {
2106 	struct srcu_ctr __percpu *scp = NULL;
2107 	struct bpf_prog *prog = link->link.prog;
2108 	bool sleepable = prog->sleepable;
2109 	struct bpf_run_ctx *old_run_ctx;
2110 	struct bpf_trace_run_ctx run_ctx;
2111 
2112 	if (sleepable) {
2113 		scp = rcu_read_lock_tasks_trace();
2114 		migrate_disable();
2115 	} else {
2116 		rcu_read_lock_dont_migrate();
2117 	}
2118 
2119 	if (unlikely(!bpf_prog_get_recursion_context(prog))) {
2120 		bpf_prog_inc_misses_counter(prog);
2121 		goto out;
2122 	}
2123 
2124 	run_ctx.bpf_cookie = link->cookie;
2125 	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
2126 
2127 	(void)bpf_prog_run(prog, args);
2128 
2129 	bpf_reset_run_ctx(old_run_ctx);
2130 out:
2131 	bpf_prog_put_recursion_context(prog);
2132 
2133 	if (sleepable) {
2134 		migrate_enable();
2135 		rcu_read_unlock_tasks_trace(scp);
2136 	} else {
2137 		rcu_read_unlock_migrate();
2138 	}
2139 }
2140 
2141 #define UNPACK(...)			__VA_ARGS__
2142 #define REPEAT_1(FN, DL, X, ...)	FN(X)
2143 #define REPEAT_2(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__)
2144 #define REPEAT_3(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__)
2145 #define REPEAT_4(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__)
2146 #define REPEAT_5(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__)
2147 #define REPEAT_6(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__)
2148 #define REPEAT_7(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__)
2149 #define REPEAT_8(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__)
2150 #define REPEAT_9(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__)
2151 #define REPEAT_10(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__)
2152 #define REPEAT_11(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__)
2153 #define REPEAT_12(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__)
2154 #define REPEAT(X, FN, DL, ...)		REPEAT_##X(FN, DL, __VA_ARGS__)
2155 
2156 #define SARG(X)		u64 arg##X
2157 #define COPY(X)		args[X] = arg##X
2158 
2159 #define __DL_COM	(,)
2160 #define __DL_SEM	(;)
2161 
2162 #define __SEQ_0_11	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
2163 
2164 #define BPF_TRACE_DEFN_x(x)						\
2165 	void bpf_trace_run##x(struct bpf_raw_tp_link *link,		\
2166 			      REPEAT(x, SARG, __DL_COM, __SEQ_0_11))	\
2167 	{								\
2168 		u64 args[x];						\
2169 		REPEAT(x, COPY, __DL_SEM, __SEQ_0_11);			\
2170 		__bpf_trace_run(link, args);				\
2171 	}								\
2172 	EXPORT_SYMBOL_GPL(bpf_trace_run##x)
2173 BPF_TRACE_DEFN_x(1);
2174 BPF_TRACE_DEFN_x(2);
2175 BPF_TRACE_DEFN_x(3);
2176 BPF_TRACE_DEFN_x(4);
2177 BPF_TRACE_DEFN_x(5);
2178 BPF_TRACE_DEFN_x(6);
2179 BPF_TRACE_DEFN_x(7);
2180 BPF_TRACE_DEFN_x(8);
2181 BPF_TRACE_DEFN_x(9);
2182 BPF_TRACE_DEFN_x(10);
2183 BPF_TRACE_DEFN_x(11);
2184 BPF_TRACE_DEFN_x(12);
2185 
2186 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
2187 {
2188 	struct tracepoint *tp = btp->tp;
2189 	struct bpf_prog *prog = link->link.prog;
2190 
2191 	/*
2192 	 * check that program doesn't access arguments beyond what's
2193 	 * available in this tracepoint
2194 	 */
2195 	if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
2196 		return -EINVAL;
2197 
2198 	if (prog->aux->max_tp_access > btp->writable_size)
2199 		return -EINVAL;
2200 
2201 	return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, link);
2202 }
2203 
2204 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
2205 {
2206 	return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, link);
2207 }
2208 
2209 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
2210 			    u32 *fd_type, const char **buf,
2211 			    u64 *probe_offset, u64 *probe_addr,
2212 			    unsigned long *missed)
2213 {
2214 	bool is_tracepoint, is_syscall_tp;
2215 	struct bpf_prog *prog;
2216 	int flags, err = 0;
2217 
2218 	prog = event->prog;
2219 	if (!prog)
2220 		return -ENOENT;
2221 
2222 	/* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
2223 	if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
2224 		return -EOPNOTSUPP;
2225 
2226 	*prog_id = prog->aux->id;
2227 	flags = event->tp_event->flags;
2228 	is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
2229 	is_syscall_tp = is_syscall_trace_event(event->tp_event);
2230 
2231 	if (is_tracepoint || is_syscall_tp) {
2232 		*buf = is_tracepoint ? event->tp_event->tp->name
2233 				     : event->tp_event->name;
2234 		/* We allow NULL pointer for tracepoint */
2235 		if (fd_type)
2236 			*fd_type = BPF_FD_TYPE_TRACEPOINT;
2237 		if (probe_offset)
2238 			*probe_offset = 0x0;
2239 		if (probe_addr)
2240 			*probe_addr = 0x0;
2241 	} else {
2242 		/* kprobe/uprobe */
2243 		err = -EOPNOTSUPP;
2244 #ifdef CONFIG_KPROBE_EVENTS
2245 		if (flags & TRACE_EVENT_FL_KPROBE)
2246 			err = bpf_get_kprobe_info(event, fd_type, buf,
2247 						  probe_offset, probe_addr, missed,
2248 						  event->attr.type == PERF_TYPE_TRACEPOINT);
2249 #endif
2250 #ifdef CONFIG_UPROBE_EVENTS
2251 		if (flags & TRACE_EVENT_FL_UPROBE)
2252 			err = bpf_get_uprobe_info(event, fd_type, buf,
2253 						  probe_offset, probe_addr,
2254 						  event->attr.type == PERF_TYPE_TRACEPOINT);
2255 #endif
2256 	}
2257 
2258 	return err;
2259 }
2260 
2261 static int __init send_signal_irq_work_init(void)
2262 {
2263 	int cpu;
2264 	struct send_signal_irq_work *work;
2265 
2266 	for_each_possible_cpu(cpu) {
2267 		work = per_cpu_ptr(&send_signal_work, cpu);
2268 		init_irq_work(&work->irq_work, do_bpf_send_signal);
2269 	}
2270 	return 0;
2271 }
2272 
2273 subsys_initcall(send_signal_irq_work_init);
2274 
2275 #ifdef CONFIG_MODULES
2276 static int bpf_event_notify(struct notifier_block *nb, unsigned long op,
2277 			    void *module)
2278 {
2279 	struct bpf_trace_module *btm, *tmp;
2280 	struct module *mod = module;
2281 	int ret = 0;
2282 
2283 	if (mod->num_bpf_raw_events == 0 ||
2284 	    (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
2285 		goto out;
2286 
2287 	mutex_lock(&bpf_module_mutex);
2288 
2289 	switch (op) {
2290 	case MODULE_STATE_COMING:
2291 		btm = kzalloc_obj(*btm);
2292 		if (btm) {
2293 			btm->module = module;
2294 			list_add(&btm->list, &bpf_trace_modules);
2295 		} else {
2296 			ret = -ENOMEM;
2297 		}
2298 		break;
2299 	case MODULE_STATE_GOING:
2300 		list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) {
2301 			if (btm->module == module) {
2302 				list_del(&btm->list);
2303 				kfree(btm);
2304 				break;
2305 			}
2306 		}
2307 		break;
2308 	}
2309 
2310 	mutex_unlock(&bpf_module_mutex);
2311 
2312 out:
2313 	return notifier_from_errno(ret);
2314 }
2315 
2316 static struct notifier_block bpf_module_nb = {
2317 	.notifier_call = bpf_event_notify,
2318 };
2319 
2320 static int __init bpf_event_init(void)
2321 {
2322 	register_module_notifier(&bpf_module_nb);
2323 	return 0;
2324 }
2325 
2326 fs_initcall(bpf_event_init);
2327 #endif /* CONFIG_MODULES */
2328 
2329 struct bpf_session_run_ctx {
2330 	struct bpf_run_ctx run_ctx;
2331 	bool is_return;
2332 	void *data;
2333 };
2334 
2335 #ifdef CONFIG_FPROBE
2336 struct bpf_kprobe_multi_link {
2337 	struct bpf_link link;
2338 	struct fprobe fp;
2339 	unsigned long *addrs;
2340 	u64 *cookies;
2341 	u32 cnt;
2342 	u32 mods_cnt;
2343 	struct module **mods;
2344 };
2345 
2346 struct bpf_kprobe_multi_run_ctx {
2347 	struct bpf_session_run_ctx session_ctx;
2348 	struct bpf_kprobe_multi_link *link;
2349 	unsigned long entry_ip;
2350 };
2351 
2352 struct user_syms {
2353 	const char **syms;
2354 	char *buf;
2355 };
2356 
2357 #ifndef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS
2358 static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs);
2359 #define bpf_kprobe_multi_pt_regs_ptr()	this_cpu_ptr(&bpf_kprobe_multi_pt_regs)
2360 #else
2361 #define bpf_kprobe_multi_pt_regs_ptr()	(NULL)
2362 #endif
2363 
2364 static unsigned long ftrace_get_entry_ip(unsigned long fentry_ip)
2365 {
2366 	unsigned long ip = ftrace_get_symaddr(fentry_ip);
2367 
2368 	return ip ? : fentry_ip;
2369 }
2370 
2371 static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 cnt)
2372 {
2373 	unsigned long __user usymbol;
2374 	const char **syms = NULL;
2375 	char *buf = NULL, *p;
2376 	int err = -ENOMEM;
2377 	unsigned int i;
2378 
2379 	if (!access_ok(usyms, cnt * sizeof(*usyms)))
2380 		return -EFAULT;
2381 
2382 	syms = kvmalloc_array(cnt, sizeof(*syms), GFP_KERNEL);
2383 	if (!syms)
2384 		return -ENOMEM;
2385 
2386 	buf = kvmalloc_array(cnt, KSYM_NAME_LEN, GFP_KERNEL);
2387 	if (!buf)
2388 		goto error;
2389 
2390 	for (p = buf, i = 0; i < cnt; i++) {
2391 		if (__get_user(usymbol, usyms + i)) {
2392 			err = -EFAULT;
2393 			goto error;
2394 		}
2395 		err = strncpy_from_user(p, (const char __user *) usymbol, KSYM_NAME_LEN);
2396 		if (err == KSYM_NAME_LEN)
2397 			err = -E2BIG;
2398 		if (err < 0)
2399 			goto error;
2400 		syms[i] = p;
2401 		p += err + 1;
2402 	}
2403 
2404 	us->syms = syms;
2405 	us->buf = buf;
2406 	return 0;
2407 
2408 error:
2409 	kvfree(syms);
2410 	kvfree(buf);
2411 	return err;
2412 }
2413 
2414 static void kprobe_multi_put_modules(struct module **mods, u32 cnt)
2415 {
2416 	u32 i;
2417 
2418 	for (i = 0; i < cnt; i++)
2419 		module_put(mods[i]);
2420 }
2421 
2422 static void free_user_syms(struct user_syms *us)
2423 {
2424 	kvfree(us->syms);
2425 	kvfree(us->buf);
2426 }
2427 
2428 static void bpf_kprobe_multi_link_release(struct bpf_link *link)
2429 {
2430 	struct bpf_kprobe_multi_link *kmulti_link;
2431 
2432 	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
2433 	/* Don't wait for RCU GP here. */
2434 	unregister_fprobe_async(&kmulti_link->fp);
2435 	kprobe_multi_put_modules(kmulti_link->mods, kmulti_link->mods_cnt);
2436 }
2437 
2438 static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link)
2439 {
2440 	struct bpf_kprobe_multi_link *kmulti_link;
2441 
2442 	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
2443 	kvfree(kmulti_link->addrs);
2444 	kvfree(kmulti_link->cookies);
2445 	kfree(kmulti_link->mods);
2446 	kfree(kmulti_link);
2447 }
2448 
2449 static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
2450 						struct bpf_link_info *info)
2451 {
2452 	u64 __user *ucookies = u64_to_user_ptr(info->kprobe_multi.cookies);
2453 	u64 __user *uaddrs = u64_to_user_ptr(info->kprobe_multi.addrs);
2454 	struct bpf_kprobe_multi_link *kmulti_link;
2455 	u32 ucount = info->kprobe_multi.count;
2456 	int err = 0, i;
2457 
2458 	if (!uaddrs ^ !ucount)
2459 		return -EINVAL;
2460 	if (ucookies && !ucount)
2461 		return -EINVAL;
2462 
2463 	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
2464 	info->kprobe_multi.count = kmulti_link->cnt;
2465 	info->kprobe_multi.flags = kmulti_link->link.flags;
2466 	info->kprobe_multi.missed = kmulti_link->fp.nmissed;
2467 
2468 	if (!uaddrs)
2469 		return 0;
2470 	if (ucount < kmulti_link->cnt)
2471 		err = -ENOSPC;
2472 	else
2473 		ucount = kmulti_link->cnt;
2474 
2475 	if (ucookies) {
2476 		if (kmulti_link->cookies) {
2477 			if (copy_to_user(ucookies, kmulti_link->cookies, ucount * sizeof(u64)))
2478 				return -EFAULT;
2479 		} else {
2480 			for (i = 0; i < ucount; i++) {
2481 				if (put_user(0, ucookies + i))
2482 					return -EFAULT;
2483 			}
2484 		}
2485 	}
2486 
2487 	if (kallsyms_show_value(current_cred())) {
2488 		if (copy_to_user(uaddrs, kmulti_link->addrs, ucount * sizeof(u64)))
2489 			return -EFAULT;
2490 	} else {
2491 		for (i = 0; i < ucount; i++) {
2492 			if (put_user(0, uaddrs + i))
2493 				return -EFAULT;
2494 		}
2495 	}
2496 	return err;
2497 }
2498 
2499 #ifdef CONFIG_PROC_FS
2500 static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link,
2501 					 struct seq_file *seq)
2502 {
2503 	struct bpf_kprobe_multi_link *kmulti_link;
2504 	bool has_cookies;
2505 
2506 	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
2507 	has_cookies = !!kmulti_link->cookies;
2508 
2509 	seq_printf(seq,
2510 		   "kprobe_cnt:\t%u\n"
2511 		   "missed:\t%lu\n",
2512 		   kmulti_link->cnt,
2513 		   kmulti_link->fp.nmissed);
2514 
2515 	seq_printf(seq, "%s\t %s\n", "cookie", "func");
2516 	for (int i = 0; i < kmulti_link->cnt; i++) {
2517 		seq_printf(seq,
2518 			   "%llu\t %pS\n",
2519 			   has_cookies ? kmulti_link->cookies[i] : 0,
2520 			   (void *)kmulti_link->addrs[i]);
2521 	}
2522 }
2523 #endif
2524 
2525 static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
2526 	.release = bpf_kprobe_multi_link_release,
2527 	.dealloc_deferred = bpf_kprobe_multi_link_dealloc,
2528 	.fill_link_info = bpf_kprobe_multi_link_fill_link_info,
2529 #ifdef CONFIG_PROC_FS
2530 	.show_fdinfo = bpf_kprobe_multi_show_fdinfo,
2531 #endif
2532 };
2533 
2534 static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void *priv)
2535 {
2536 	const struct bpf_kprobe_multi_link *link = priv;
2537 	unsigned long *addr_a = a, *addr_b = b;
2538 	u64 *cookie_a, *cookie_b;
2539 
2540 	cookie_a = link->cookies + (addr_a - link->addrs);
2541 	cookie_b = link->cookies + (addr_b - link->addrs);
2542 
2543 	/* swap addr_a/addr_b and cookie_a/cookie_b values */
2544 	swap(*addr_a, *addr_b);
2545 	swap(*cookie_a, *cookie_b);
2546 }
2547 
2548 static int bpf_kprobe_multi_addrs_cmp(const void *a, const void *b)
2549 {
2550 	const unsigned long *addr_a = a, *addr_b = b;
2551 
2552 	if (*addr_a == *addr_b)
2553 		return 0;
2554 	return *addr_a < *addr_b ? -1 : 1;
2555 }
2556 
2557 static int bpf_kprobe_multi_cookie_cmp(const void *a, const void *b, const void *priv)
2558 {
2559 	return bpf_kprobe_multi_addrs_cmp(a, b);
2560 }
2561 
2562 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
2563 {
2564 	struct bpf_kprobe_multi_run_ctx *run_ctx;
2565 	struct bpf_kprobe_multi_link *link;
2566 	u64 *cookie, entry_ip;
2567 	unsigned long *addr;
2568 
2569 	if (WARN_ON_ONCE(!ctx))
2570 		return 0;
2571 	run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx,
2572 			       session_ctx.run_ctx);
2573 	link = run_ctx->link;
2574 	if (!link->cookies)
2575 		return 0;
2576 	entry_ip = run_ctx->entry_ip;
2577 	addr = bsearch(&entry_ip, link->addrs, link->cnt, sizeof(entry_ip),
2578 		       bpf_kprobe_multi_addrs_cmp);
2579 	if (!addr)
2580 		return 0;
2581 	cookie = link->cookies + (addr - link->addrs);
2582 	return *cookie;
2583 }
2584 
2585 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
2586 {
2587 	struct bpf_kprobe_multi_run_ctx *run_ctx;
2588 
2589 	run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx,
2590 			       session_ctx.run_ctx);
2591 	return run_ctx->entry_ip;
2592 }
2593 
2594 static __always_inline int
2595 kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
2596 			   unsigned long entry_ip, struct ftrace_regs *fregs,
2597 			   bool is_return, void *data)
2598 {
2599 	struct bpf_kprobe_multi_run_ctx run_ctx = {
2600 		.session_ctx = {
2601 			.is_return = is_return,
2602 			.data = data,
2603 		},
2604 		.link = link,
2605 		.entry_ip = entry_ip,
2606 	};
2607 	struct bpf_run_ctx *old_run_ctx;
2608 	struct pt_regs *regs;
2609 	int err;
2610 
2611 	/*
2612 	 * graph tracer framework ensures we won't migrate, so there is no need
2613 	 * to use migrate_disable for bpf_prog_run again. The check here just for
2614 	 * __this_cpu_inc_return.
2615 	 */
2616 	cant_sleep();
2617 
2618 	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
2619 		bpf_prog_inc_misses_counter(link->link.prog);
2620 		err = 1;
2621 		goto out;
2622 	}
2623 
2624 	rcu_read_lock();
2625 	regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr());
2626 	old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
2627 	err = bpf_prog_run(link->link.prog, regs);
2628 	bpf_reset_run_ctx(old_run_ctx);
2629 	ftrace_partial_regs_update(fregs, bpf_kprobe_multi_pt_regs_ptr());
2630 	rcu_read_unlock();
2631 
2632  out:
2633 	__this_cpu_dec(bpf_prog_active);
2634 	return err;
2635 }
2636 
2637 static int
2638 kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
2639 			  unsigned long ret_ip, struct ftrace_regs *fregs,
2640 			  void *data)
2641 {
2642 	struct bpf_kprobe_multi_link *link;
2643 	int err;
2644 
2645 	link = container_of(fp, struct bpf_kprobe_multi_link, fp);
2646 	err = kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip),
2647 					 fregs, false, data);
2648 	return is_kprobe_session(link->link.prog) ? err : 0;
2649 }
2650 
2651 static void
2652 kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip,
2653 			       unsigned long ret_ip, struct ftrace_regs *fregs,
2654 			       void *data)
2655 {
2656 	struct bpf_kprobe_multi_link *link;
2657 
2658 	link = container_of(fp, struct bpf_kprobe_multi_link, fp);
2659 	kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip),
2660 				   fregs, true, data);
2661 }
2662 
2663 static int symbols_cmp_r(const void *a, const void *b, const void *priv)
2664 {
2665 	const char **str_a = (const char **) a;
2666 	const char **str_b = (const char **) b;
2667 
2668 	return strcmp(*str_a, *str_b);
2669 }
2670 
2671 struct multi_symbols_sort {
2672 	const char **funcs;
2673 	u64 *cookies;
2674 };
2675 
2676 static void symbols_swap_r(void *a, void *b, int size, const void *priv)
2677 {
2678 	const struct multi_symbols_sort *data = priv;
2679 	const char **name_a = a, **name_b = b;
2680 
2681 	swap(*name_a, *name_b);
2682 
2683 	/* If defined, swap also related cookies. */
2684 	if (data->cookies) {
2685 		u64 *cookie_a, *cookie_b;
2686 
2687 		cookie_a = data->cookies + (name_a - data->funcs);
2688 		cookie_b = data->cookies + (name_b - data->funcs);
2689 		swap(*cookie_a, *cookie_b);
2690 	}
2691 }
2692 
2693 struct modules_array {
2694 	struct module **mods;
2695 	int mods_cnt;
2696 	int mods_cap;
2697 };
2698 
2699 static int add_module(struct modules_array *arr, struct module *mod)
2700 {
2701 	struct module **mods;
2702 
2703 	if (arr->mods_cnt == arr->mods_cap) {
2704 		arr->mods_cap = max(16, arr->mods_cap * 3 / 2);
2705 		mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL);
2706 		if (!mods)
2707 			return -ENOMEM;
2708 		arr->mods = mods;
2709 	}
2710 
2711 	arr->mods[arr->mods_cnt] = mod;
2712 	arr->mods_cnt++;
2713 	return 0;
2714 }
2715 
2716 static bool has_module(struct modules_array *arr, struct module *mod)
2717 {
2718 	int i;
2719 
2720 	for (i = arr->mods_cnt - 1; i >= 0; i--) {
2721 		if (arr->mods[i] == mod)
2722 			return true;
2723 	}
2724 	return false;
2725 }
2726 
2727 static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt)
2728 {
2729 	struct modules_array arr = {};
2730 	u32 i, err = 0;
2731 
2732 	for (i = 0; i < addrs_cnt; i++) {
2733 		bool skip_add = false;
2734 		struct module *mod;
2735 
2736 		scoped_guard(rcu) {
2737 			mod = __module_address(addrs[i]);
2738 			/* Either no module or it's already stored  */
2739 			if (!mod || has_module(&arr, mod)) {
2740 				skip_add = true;
2741 				break; /* scoped_guard */
2742 			}
2743 			if (!try_module_get(mod))
2744 				err = -EINVAL;
2745 		}
2746 		if (skip_add)
2747 			continue;
2748 		if (err)
2749 			break;
2750 		err = add_module(&arr, mod);
2751 		if (err) {
2752 			module_put(mod);
2753 			break;
2754 		}
2755 	}
2756 
2757 	/* We return either err < 0 in case of error, ... */
2758 	if (err) {
2759 		kprobe_multi_put_modules(arr.mods, arr.mods_cnt);
2760 		kfree(arr.mods);
2761 		return err;
2762 	}
2763 
2764 	/* or number of modules found if everything is ok. */
2765 	*mods = arr.mods;
2766 	return arr.mods_cnt;
2767 }
2768 
2769 static int addrs_check_error_injection_list(unsigned long *addrs, u32 cnt)
2770 {
2771 	u32 i;
2772 
2773 	for (i = 0; i < cnt; i++) {
2774 		if (!within_error_injection_list(addrs[i]))
2775 			return -EINVAL;
2776 	}
2777 	return 0;
2778 }
2779 
2780 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
2781 {
2782 	struct bpf_kprobe_multi_link *link = NULL;
2783 	struct bpf_link_primer link_primer;
2784 	void __user *ucookies;
2785 	unsigned long *addrs;
2786 	u32 flags, cnt, size;
2787 	void __user *uaddrs;
2788 	u64 *cookies = NULL;
2789 	void __user *usyms;
2790 	int err;
2791 
2792 	/* no support for 32bit archs yet */
2793 	if (sizeof(u64) != sizeof(void *))
2794 		return -EOPNOTSUPP;
2795 
2796 	if (attr->link_create.flags)
2797 		return -EINVAL;
2798 
2799 	if (!is_kprobe_multi(prog))
2800 		return -EINVAL;
2801 
2802 	/* kprobe_multi is not allowed to be sleepable. */
2803 	if (prog->sleepable)
2804 		return -EINVAL;
2805 
2806 	/* Writing to context is not allowed for kprobes. */
2807 	if (prog->aux->kprobe_write_ctx)
2808 		return -EINVAL;
2809 
2810 	flags = attr->link_create.kprobe_multi.flags;
2811 	if (flags & ~BPF_F_KPROBE_MULTI_RETURN)
2812 		return -EINVAL;
2813 
2814 	uaddrs = u64_to_user_ptr(attr->link_create.kprobe_multi.addrs);
2815 	usyms = u64_to_user_ptr(attr->link_create.kprobe_multi.syms);
2816 	if (!!uaddrs == !!usyms)
2817 		return -EINVAL;
2818 
2819 	cnt = attr->link_create.kprobe_multi.cnt;
2820 	if (!cnt)
2821 		return -EINVAL;
2822 	if (cnt > MAX_KPROBE_MULTI_CNT)
2823 		return -E2BIG;
2824 
2825 	size = cnt * sizeof(*addrs);
2826 	addrs = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);
2827 	if (!addrs)
2828 		return -ENOMEM;
2829 
2830 	ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies);
2831 	if (ucookies) {
2832 		cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);
2833 		if (!cookies) {
2834 			err = -ENOMEM;
2835 			goto error;
2836 		}
2837 		if (copy_from_user(cookies, ucookies, size)) {
2838 			err = -EFAULT;
2839 			goto error;
2840 		}
2841 	}
2842 
2843 	if (uaddrs) {
2844 		if (copy_from_user(addrs, uaddrs, size)) {
2845 			err = -EFAULT;
2846 			goto error;
2847 		}
2848 	} else {
2849 		struct multi_symbols_sort data = {
2850 			.cookies = cookies,
2851 		};
2852 		struct user_syms us;
2853 
2854 		err = copy_user_syms(&us, usyms, cnt);
2855 		if (err)
2856 			goto error;
2857 
2858 		if (cookies)
2859 			data.funcs = us.syms;
2860 
2861 		sort_r(us.syms, cnt, sizeof(*us.syms), symbols_cmp_r,
2862 		       symbols_swap_r, &data);
2863 
2864 		err = ftrace_lookup_symbols(us.syms, cnt, addrs);
2865 		free_user_syms(&us);
2866 		if (err)
2867 			goto error;
2868 	}
2869 
2870 	if (prog->kprobe_override && addrs_check_error_injection_list(addrs, cnt)) {
2871 		err = -EINVAL;
2872 		goto error;
2873 	}
2874 
2875 	link = kzalloc_obj(*link);
2876 	if (!link) {
2877 		err = -ENOMEM;
2878 		goto error;
2879 	}
2880 
2881 	bpf_link_init(&link->link, BPF_LINK_TYPE_KPROBE_MULTI,
2882 		      &bpf_kprobe_multi_link_lops, prog, attr->link_create.attach_type);
2883 
2884 	err = bpf_link_prime(&link->link, &link_primer);
2885 	if (err)
2886 		goto error;
2887 
2888 	if (!(flags & BPF_F_KPROBE_MULTI_RETURN))
2889 		link->fp.entry_handler = kprobe_multi_link_handler;
2890 	if ((flags & BPF_F_KPROBE_MULTI_RETURN) || is_kprobe_session(prog))
2891 		link->fp.exit_handler = kprobe_multi_link_exit_handler;
2892 	if (is_kprobe_session(prog))
2893 		link->fp.entry_data_size = sizeof(u64);
2894 
2895 	link->addrs = addrs;
2896 	link->cookies = cookies;
2897 	link->cnt = cnt;
2898 	link->link.flags = flags;
2899 
2900 	if (cookies) {
2901 		/*
2902 		 * Sorting addresses will trigger sorting cookies as well
2903 		 * (check bpf_kprobe_multi_cookie_swap). This way we can
2904 		 * find cookie based on the address in bpf_get_attach_cookie
2905 		 * helper.
2906 		 */
2907 		sort_r(addrs, cnt, sizeof(*addrs),
2908 		       bpf_kprobe_multi_cookie_cmp,
2909 		       bpf_kprobe_multi_cookie_swap,
2910 		       link);
2911 	}
2912 
2913 	err = get_modules_for_addrs(&link->mods, addrs, cnt);
2914 	if (err < 0) {
2915 		bpf_link_cleanup(&link_primer);
2916 		return err;
2917 	}
2918 	link->mods_cnt = err;
2919 
2920 	err = register_fprobe_ips(&link->fp, addrs, cnt);
2921 	if (err) {
2922 		kprobe_multi_put_modules(link->mods, link->mods_cnt);
2923 		bpf_link_cleanup(&link_primer);
2924 		return err;
2925 	}
2926 
2927 	return bpf_link_settle(&link_primer);
2928 
2929 error:
2930 	kfree(link);
2931 	kvfree(addrs);
2932 	kvfree(cookies);
2933 	return err;
2934 }
2935 #else /* !CONFIG_FPROBE */
2936 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
2937 {
2938 	return -EOPNOTSUPP;
2939 }
2940 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
2941 {
2942 	return 0;
2943 }
2944 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
2945 {
2946 	return 0;
2947 }
2948 #endif
2949 
2950 #ifdef CONFIG_UPROBES
2951 struct bpf_uprobe_multi_link;
2952 
2953 struct bpf_uprobe {
2954 	struct bpf_uprobe_multi_link *link;
2955 	loff_t offset;
2956 	unsigned long ref_ctr_offset;
2957 	u64 cookie;
2958 	struct uprobe *uprobe;
2959 	struct uprobe_consumer consumer;
2960 	bool session;
2961 };
2962 
2963 struct bpf_uprobe_multi_link {
2964 	struct path path;
2965 	struct bpf_link link;
2966 	u32 cnt;
2967 	struct bpf_uprobe *uprobes;
2968 	struct task_struct *task;
2969 };
2970 
2971 struct bpf_uprobe_multi_run_ctx {
2972 	struct bpf_session_run_ctx session_ctx;
2973 	unsigned long entry_ip;
2974 	struct bpf_uprobe *uprobe;
2975 };
2976 
2977 static void bpf_uprobe_unregister(struct bpf_uprobe *uprobes, u32 cnt)
2978 {
2979 	u32 i;
2980 
2981 	for (i = 0; i < cnt; i++)
2982 		uprobe_unregister_nosync(uprobes[i].uprobe, &uprobes[i].consumer);
2983 
2984 	if (cnt)
2985 		uprobe_unregister_sync();
2986 }
2987 
2988 static void bpf_uprobe_multi_link_release(struct bpf_link *link)
2989 {
2990 	struct bpf_uprobe_multi_link *umulti_link;
2991 
2992 	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
2993 	bpf_uprobe_unregister(umulti_link->uprobes, umulti_link->cnt);
2994 	if (umulti_link->task)
2995 		put_task_struct(umulti_link->task);
2996 	path_put(&umulti_link->path);
2997 }
2998 
2999 static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
3000 {
3001 	struct bpf_uprobe_multi_link *umulti_link;
3002 
3003 	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
3004 	kvfree(umulti_link->uprobes);
3005 	kfree(umulti_link);
3006 }
3007 
3008 static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
3009 						struct bpf_link_info *info)
3010 {
3011 	u64 __user *uref_ctr_offsets = u64_to_user_ptr(info->uprobe_multi.ref_ctr_offsets);
3012 	u64 __user *ucookies = u64_to_user_ptr(info->uprobe_multi.cookies);
3013 	u64 __user *uoffsets = u64_to_user_ptr(info->uprobe_multi.offsets);
3014 	u64 __user *upath = u64_to_user_ptr(info->uprobe_multi.path);
3015 	u32 upath_size = info->uprobe_multi.path_size;
3016 	struct bpf_uprobe_multi_link *umulti_link;
3017 	u32 ucount = info->uprobe_multi.count;
3018 	int err = 0, i;
3019 	char *p, *buf;
3020 	long left = 0;
3021 
3022 	if (!upath ^ !upath_size)
3023 		return -EINVAL;
3024 
3025 	if ((uoffsets || uref_ctr_offsets || ucookies) && !ucount)
3026 		return -EINVAL;
3027 
3028 	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
3029 	info->uprobe_multi.count = umulti_link->cnt;
3030 	info->uprobe_multi.flags = umulti_link->link.flags;
3031 	info->uprobe_multi.pid = umulti_link->task ?
3032 				 task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0;
3033 
3034 	upath_size = upath_size ? min_t(u32, upath_size, PATH_MAX) : PATH_MAX;
3035 	buf = kmalloc(upath_size, GFP_KERNEL);
3036 	if (!buf)
3037 		return -ENOMEM;
3038 	p = d_path(&umulti_link->path, buf, upath_size);
3039 	if (IS_ERR(p)) {
3040 		kfree(buf);
3041 		return PTR_ERR(p);
3042 	}
3043 	upath_size = buf + upath_size - p;
3044 
3045 	if (upath)
3046 		left = copy_to_user(upath, p, upath_size);
3047 	kfree(buf);
3048 	if (left)
3049 		return -EFAULT;
3050 	info->uprobe_multi.path_size = upath_size;
3051 
3052 	if (!uoffsets && !ucookies && !uref_ctr_offsets)
3053 		return 0;
3054 
3055 	if (ucount < umulti_link->cnt)
3056 		err = -ENOSPC;
3057 	else
3058 		ucount = umulti_link->cnt;
3059 
3060 	for (i = 0; i < ucount; i++) {
3061 		if (uoffsets &&
3062 		    put_user(umulti_link->uprobes[i].offset, uoffsets + i))
3063 			return -EFAULT;
3064 		if (uref_ctr_offsets &&
3065 		    put_user(umulti_link->uprobes[i].ref_ctr_offset, uref_ctr_offsets + i))
3066 			return -EFAULT;
3067 		if (ucookies &&
3068 		    put_user(umulti_link->uprobes[i].cookie, ucookies + i))
3069 			return -EFAULT;
3070 	}
3071 
3072 	return err;
3073 }
3074 
3075 #ifdef CONFIG_PROC_FS
3076 static void bpf_uprobe_multi_show_fdinfo(const struct bpf_link *link,
3077 					 struct seq_file *seq)
3078 {
3079 	struct bpf_uprobe_multi_link *umulti_link;
3080 	char *p, *buf;
3081 	pid_t pid;
3082 
3083 	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
3084 
3085 	buf = kmalloc(PATH_MAX, GFP_KERNEL);
3086 	if (!buf)
3087 		return;
3088 
3089 	p = d_path(&umulti_link->path, buf, PATH_MAX);
3090 	if (IS_ERR(p)) {
3091 		kfree(buf);
3092 		return;
3093 	}
3094 
3095 	pid = umulti_link->task ?
3096 	      task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0;
3097 	seq_printf(seq,
3098 		   "uprobe_cnt:\t%u\n"
3099 		   "pid:\t%u\n"
3100 		   "path:\t%s\n",
3101 		   umulti_link->cnt, pid, p);
3102 
3103 	seq_printf(seq, "%s\t %s\t %s\n", "cookie", "offset", "ref_ctr_offset");
3104 	for (int i = 0; i < umulti_link->cnt; i++) {
3105 		seq_printf(seq,
3106 			   "%llu\t %#llx\t %#lx\n",
3107 			   umulti_link->uprobes[i].cookie,
3108 			   umulti_link->uprobes[i].offset,
3109 			   umulti_link->uprobes[i].ref_ctr_offset);
3110 	}
3111 
3112 	kfree(buf);
3113 }
3114 #endif
3115 
3116 static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
3117 	.release = bpf_uprobe_multi_link_release,
3118 	.dealloc_deferred = bpf_uprobe_multi_link_dealloc,
3119 	.fill_link_info = bpf_uprobe_multi_link_fill_link_info,
3120 #ifdef CONFIG_PROC_FS
3121 	.show_fdinfo = bpf_uprobe_multi_show_fdinfo,
3122 #endif
3123 };
3124 
3125 static int uprobe_prog_run(struct bpf_uprobe *uprobe,
3126 			   unsigned long entry_ip,
3127 			   struct pt_regs *regs,
3128 			   bool is_return, void *data)
3129 {
3130 	struct bpf_uprobe_multi_link *link = uprobe->link;
3131 	struct bpf_uprobe_multi_run_ctx run_ctx = {
3132 		.session_ctx = {
3133 			.is_return = is_return,
3134 			.data = data,
3135 		},
3136 		.entry_ip = entry_ip,
3137 		.uprobe = uprobe,
3138 	};
3139 	struct bpf_prog *prog = link->link.prog;
3140 	bool sleepable = prog->sleepable;
3141 	struct bpf_run_ctx *old_run_ctx;
3142 	int err;
3143 
3144 	if (link->task && !same_thread_group(current, link->task))
3145 		return 0;
3146 
3147 	if (sleepable)
3148 		rcu_read_lock_trace();
3149 	else
3150 		rcu_read_lock();
3151 
3152 	migrate_disable();
3153 
3154 	old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
3155 	err = bpf_prog_run(link->link.prog, regs);
3156 	bpf_reset_run_ctx(old_run_ctx);
3157 
3158 	migrate_enable();
3159 
3160 	if (sleepable)
3161 		rcu_read_unlock_trace();
3162 	else
3163 		rcu_read_unlock();
3164 	return err;
3165 }
3166 
3167 static bool
3168 uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm)
3169 {
3170 	struct bpf_uprobe *uprobe;
3171 
3172 	uprobe = container_of(con, struct bpf_uprobe, consumer);
3173 	return uprobe->link->task->mm == mm;
3174 }
3175 
3176 static int
3177 uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs,
3178 			  __u64 *data)
3179 {
3180 	struct bpf_uprobe *uprobe;
3181 	int ret;
3182 
3183 	uprobe = container_of(con, struct bpf_uprobe, consumer);
3184 	ret = uprobe_prog_run(uprobe, instruction_pointer(regs), regs, false, data);
3185 	if (uprobe->session)
3186 		return ret ? UPROBE_HANDLER_IGNORE : 0;
3187 	return 0;
3188 }
3189 
3190 static int
3191 uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs,
3192 			      __u64 *data)
3193 {
3194 	struct bpf_uprobe *uprobe;
3195 
3196 	uprobe = container_of(con, struct bpf_uprobe, consumer);
3197 	uprobe_prog_run(uprobe, func, regs, true, data);
3198 	return 0;
3199 }
3200 
3201 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
3202 {
3203 	struct bpf_uprobe_multi_run_ctx *run_ctx;
3204 
3205 	run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx,
3206 			       session_ctx.run_ctx);
3207 	return run_ctx->entry_ip;
3208 }
3209 
3210 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx)
3211 {
3212 	struct bpf_uprobe_multi_run_ctx *run_ctx;
3213 
3214 	run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx,
3215 			       session_ctx.run_ctx);
3216 	return run_ctx->uprobe->cookie;
3217 }
3218 
3219 static int bpf_uprobe_multi_get_path(const union bpf_attr *attr, struct path *path)
3220 {
3221 	void __user *upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path);
3222 	u32 path_fd = attr->link_create.uprobe_multi.path_fd;
3223 	u32 flags = attr->link_create.uprobe_multi.flags;
3224 
3225 	if (flags & BPF_F_UPROBE_MULTI_PATH_FD) {
3226 		/*
3227 		 * When BPF_F_UPROBE_MULTI_PATH_FD is set, the executable is
3228 		 * identified by path_fd, upath must be NULL.
3229 		 */
3230 		if (upath)
3231 			return -EINVAL;
3232 
3233 		CLASS(fd, f)(path_fd);
3234 		if (fd_empty(f))
3235 			return -EBADF;
3236 		*path = fd_file(f)->f_path;
3237 		path_get(path);
3238 		return 0;
3239 	}
3240 
3241 	/*
3242 	 * When BPF_F_UPROBE_MULTI_PATH_FD is not set, the path is resolved
3243 	 * relative to the cwd (AT_FDCWD) or absolute using the upath string.
3244 	 */
3245 	if (!upath || path_fd)
3246 		return -EINVAL;
3247 
3248 	return user_path_at(AT_FDCWD, upath, LOOKUP_FOLLOW, path);
3249 }
3250 
3251 int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
3252 {
3253 	struct bpf_uprobe_multi_link *link = NULL;
3254 	unsigned long __user *uref_ctr_offsets;
3255 	struct bpf_link_primer link_primer;
3256 	struct bpf_uprobe *uprobes = NULL;
3257 	struct task_struct *task = NULL;
3258 	unsigned long __user *uoffsets;
3259 	u64 __user *ucookies;
3260 	unsigned long size;
3261 	u32 flags, cnt, i;
3262 	struct path path;
3263 	pid_t pid;
3264 	int err;
3265 
3266 	/* no support for 32bit archs yet */
3267 	if (sizeof(u64) != sizeof(void *))
3268 		return -EOPNOTSUPP;
3269 
3270 	if (attr->link_create.flags)
3271 		return -EINVAL;
3272 
3273 	if (!is_uprobe_multi(prog))
3274 		return -EINVAL;
3275 
3276 	flags = attr->link_create.uprobe_multi.flags;
3277 	if (flags & ~(BPF_F_UPROBE_MULTI_RETURN | BPF_F_UPROBE_MULTI_PATH_FD))
3278 		return -EINVAL;
3279 
3280 	/*
3281 	 * offsets and cnt are mandatory,
3282 	 * ref_ctr_offsets and cookies are optional
3283 	 */
3284 	uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets);
3285 	cnt = attr->link_create.uprobe_multi.cnt;
3286 	pid = attr->link_create.uprobe_multi.pid;
3287 
3288 	if (!uoffsets || !cnt || pid < 0)
3289 		return -EINVAL;
3290 	if (cnt > MAX_UPROBE_MULTI_CNT)
3291 		return -E2BIG;
3292 
3293 	uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets);
3294 	ucookies = u64_to_user_ptr(attr->link_create.uprobe_multi.cookies);
3295 
3296 	/*
3297 	 * All uoffsets/uref_ctr_offsets/ucookies arrays have the same value
3298 	 * size, we need to check their address range is safe for __get_user
3299 	 * calls.
3300 	 */
3301 	size = sizeof(*uoffsets) * cnt;
3302 	if (!access_ok(uoffsets, size) || !access_ok(uref_ctr_offsets, size) ||
3303 	    !access_ok(ucookies, size))
3304 		return -EFAULT;
3305 
3306 	err = bpf_uprobe_multi_get_path(attr, &path);
3307 	if (err)
3308 		return err;
3309 
3310 	if (!d_is_reg(path.dentry)) {
3311 		err = -EBADF;
3312 		goto error_path_put;
3313 	}
3314 
3315 	if (pid) {
3316 		rcu_read_lock();
3317 		task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
3318 		rcu_read_unlock();
3319 		if (!task) {
3320 			err = -ESRCH;
3321 			goto error_path_put;
3322 		}
3323 	}
3324 
3325 	err = -ENOMEM;
3326 
3327 	link = kzalloc_obj(*link);
3328 	uprobes = kvzalloc_objs(*uprobes, cnt);
3329 
3330 	if (!uprobes || !link)
3331 		goto error_free;
3332 
3333 	for (i = 0; i < cnt; i++) {
3334 		if (__get_user(uprobes[i].offset, uoffsets + i)) {
3335 			err = -EFAULT;
3336 			goto error_free;
3337 		}
3338 		if (uprobes[i].offset < 0) {
3339 			err = -EINVAL;
3340 			goto error_free;
3341 		}
3342 		if (uref_ctr_offsets && __get_user(uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) {
3343 			err = -EFAULT;
3344 			goto error_free;
3345 		}
3346 		if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) {
3347 			err = -EFAULT;
3348 			goto error_free;
3349 		}
3350 
3351 		uprobes[i].link = link;
3352 
3353 		if (!(flags & BPF_F_UPROBE_MULTI_RETURN))
3354 			uprobes[i].consumer.handler = uprobe_multi_link_handler;
3355 		if (flags & BPF_F_UPROBE_MULTI_RETURN || is_uprobe_session(prog))
3356 			uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler;
3357 		if (is_uprobe_session(prog))
3358 			uprobes[i].session = true;
3359 		if (pid)
3360 			uprobes[i].consumer.filter = uprobe_multi_link_filter;
3361 	}
3362 
3363 	link->cnt = cnt;
3364 	link->uprobes = uprobes;
3365 	link->path = path;
3366 	link->task = task;
3367 	link->link.flags = flags;
3368 
3369 	bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI,
3370 		      &bpf_uprobe_multi_link_lops, prog, attr->link_create.attach_type);
3371 
3372 	for (i = 0; i < cnt; i++) {
3373 		uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry),
3374 						    uprobes[i].offset,
3375 						    uprobes[i].ref_ctr_offset,
3376 						    &uprobes[i].consumer);
3377 		if (IS_ERR(uprobes[i].uprobe)) {
3378 			err = PTR_ERR(uprobes[i].uprobe);
3379 			link->cnt = i;
3380 			goto error_unregister;
3381 		}
3382 	}
3383 
3384 	err = bpf_link_prime(&link->link, &link_primer);
3385 	if (err)
3386 		goto error_unregister;
3387 
3388 	return bpf_link_settle(&link_primer);
3389 
3390 error_unregister:
3391 	bpf_uprobe_unregister(uprobes, link->cnt);
3392 
3393 error_free:
3394 	kvfree(uprobes);
3395 	kfree(link);
3396 	if (task)
3397 		put_task_struct(task);
3398 error_path_put:
3399 	path_put(&path);
3400 	return err;
3401 }
3402 #else /* !CONFIG_UPROBES */
3403 int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
3404 {
3405 	return -EOPNOTSUPP;
3406 }
3407 static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx)
3408 {
3409 	return 0;
3410 }
3411 static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
3412 {
3413 	return 0;
3414 }
3415 #endif /* CONFIG_UPROBES */
3416 
3417 __bpf_kfunc_start_defs();
3418 
3419 __bpf_kfunc bool bpf_session_is_return(void *ctx)
3420 {
3421 	struct bpf_session_run_ctx *session_ctx;
3422 
3423 	session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx);
3424 	return session_ctx->is_return;
3425 }
3426 
3427 __bpf_kfunc __u64 *bpf_session_cookie(void *ctx)
3428 {
3429 	struct bpf_session_run_ctx *session_ctx;
3430 
3431 	session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx);
3432 	return session_ctx->data;
3433 }
3434 
3435 __bpf_kfunc_end_defs();
3436 
3437 BTF_KFUNCS_START(session_kfunc_set_ids)
3438 BTF_ID_FLAGS(func, bpf_session_is_return)
3439 BTF_ID_FLAGS(func, bpf_session_cookie)
3440 BTF_KFUNCS_END(session_kfunc_set_ids)
3441 
3442 static int bpf_session_filter(const struct bpf_prog *prog, u32 kfunc_id)
3443 {
3444 	if (!btf_id_set8_contains(&session_kfunc_set_ids, kfunc_id))
3445 		return 0;
3446 
3447 	if (!is_kprobe_session(prog) && !is_uprobe_session(prog) && !is_trace_fsession(prog))
3448 		return -EACCES;
3449 
3450 	return 0;
3451 }
3452 
3453 static const struct btf_kfunc_id_set bpf_session_kfunc_set = {
3454 	.owner = THIS_MODULE,
3455 	.set = &session_kfunc_set_ids,
3456 	.filter = bpf_session_filter,
3457 };
3458 
3459 static int __init bpf_trace_kfuncs_init(void)
3460 {
3461 	int err = 0;
3462 
3463 	err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_KPROBE, &bpf_session_kfunc_set);
3464 	err = err ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_session_kfunc_set);
3465 
3466 	return err;
3467 }
3468 
3469 late_initcall(bpf_trace_kfuncs_init);
3470 
3471 typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk);
3472 
3473 /*
3474  * The __always_inline is to make sure the compiler doesn't
3475  * generate indirect calls into callbacks, which is expensive,
3476  * on some kernel configurations. This allows compiler to put
3477  * direct calls into all the specific callback implementations
3478  * (copy_user_data_sleepable, copy_user_data_nofault, and so on)
3479  */
3480 static __always_inline int __bpf_dynptr_copy_str(const struct bpf_dynptr *dptr, u64 doff, u64 size,
3481 						 const void *unsafe_src,
3482 						 copy_fn_t str_copy_fn,
3483 						 struct task_struct *tsk)
3484 {
3485 	const struct bpf_dynptr_kern *dst;
3486 	u64 chunk_sz, off;
3487 	void *dst_slice;
3488 	int cnt, err;
3489 	char buf[256];
3490 
3491 	dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
3492 	if (likely(dst_slice))
3493 		return str_copy_fn(dst_slice, unsafe_src, size, tsk);
3494 
3495 	dst = (struct bpf_dynptr_kern *)dptr;
3496 	if (bpf_dynptr_check_off_len(dst, doff, size))
3497 		return -E2BIG;
3498 
3499 	for (off = 0; off < size; off += chunk_sz - 1) {
3500 		chunk_sz = min_t(u64, sizeof(buf), size - off);
3501 		/* Expect str_copy_fn to return count of copied bytes, including
3502 		 * zero terminator. Next iteration increment off by chunk_sz - 1 to
3503 		 * overwrite NUL.
3504 		 */
3505 		cnt = str_copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
3506 		if (cnt < 0)
3507 			return cnt;
3508 		err = __bpf_dynptr_write(dst, doff + off, buf, cnt, 0);
3509 		if (err)
3510 			return err;
3511 		if (cnt < chunk_sz || chunk_sz == 1) /* we are done */
3512 			return off + cnt;
3513 	}
3514 	return off;
3515 }
3516 
3517 static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 doff,
3518 					     u64 size, const void *unsafe_src,
3519 					     copy_fn_t copy_fn, struct task_struct *tsk)
3520 {
3521 	const struct bpf_dynptr_kern *dst;
3522 	void *dst_slice;
3523 	char buf[256];
3524 	u64 off, chunk_sz;
3525 	int err;
3526 
3527 	dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
3528 	if (likely(dst_slice))
3529 		return copy_fn(dst_slice, unsafe_src, size, tsk);
3530 
3531 	dst = (struct bpf_dynptr_kern *)dptr;
3532 	if (bpf_dynptr_check_off_len(dst, doff, size))
3533 		return -E2BIG;
3534 
3535 	for (off = 0; off < size; off += chunk_sz) {
3536 		chunk_sz = min_t(u64, sizeof(buf), size - off);
3537 		err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
3538 		if (err)
3539 			return err;
3540 		err = __bpf_dynptr_write(dst, doff + off, buf, chunk_sz, 0);
3541 		if (err)
3542 			return err;
3543 	}
3544 	return 0;
3545 }
3546 
3547 static __always_inline int copy_user_data_nofault(void *dst, const void *unsafe_src,
3548 						  u32 size, struct task_struct *tsk)
3549 {
3550 	return copy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
3551 }
3552 
3553 static __always_inline int copy_user_data_sleepable(void *dst, const void *unsafe_src,
3554 						    u32 size, struct task_struct *tsk)
3555 {
3556 	int ret;
3557 
3558 	if (!tsk) { /* Read from the current task */
3559 		ret = copy_from_user(dst, (const void __user *)unsafe_src, size);
3560 		if (ret)
3561 			return -EFAULT;
3562 		return 0;
3563 	}
3564 
3565 	ret = access_process_vm(tsk, (unsigned long)unsafe_src, dst, size, 0);
3566 	if (ret != size)
3567 		return -EFAULT;
3568 	return 0;
3569 }
3570 
3571 static __always_inline int copy_kernel_data_nofault(void *dst, const void *unsafe_src,
3572 						    u32 size, struct task_struct *tsk)
3573 {
3574 	return copy_from_kernel_nofault(dst, unsafe_src, size);
3575 }
3576 
3577 static __always_inline int copy_user_str_nofault(void *dst, const void *unsafe_src,
3578 						 u32 size, struct task_struct *tsk)
3579 {
3580 	return strncpy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
3581 }
3582 
3583 static __always_inline int copy_user_str_sleepable(void *dst, const void *unsafe_src,
3584 						   u32 size, struct task_struct *tsk)
3585 {
3586 	int ret;
3587 
3588 	if (unlikely(size == 0))
3589 		return 0;
3590 
3591 	if (tsk) {
3592 		ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_src, dst, size, 0);
3593 	} else {
3594 		ret = strncpy_from_user(dst, (const void __user *)unsafe_src, size - 1);
3595 		/* strncpy_from_user does not guarantee NUL termination */
3596 		if (ret >= 0)
3597 			((char *)dst)[ret] = '\0';
3598 	}
3599 
3600 	if (ret < 0)
3601 		return ret;
3602 	return ret + 1;
3603 }
3604 
3605 static __always_inline int copy_kernel_str_nofault(void *dst, const void *unsafe_src,
3606 						   u32 size, struct task_struct *tsk)
3607 {
3608 	return strncpy_from_kernel_nofault(dst, unsafe_src, size);
3609 }
3610 
3611 __bpf_kfunc_start_defs();
3612 
3613 __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type,
3614 				     u64 value)
3615 {
3616 	if (type != PIDTYPE_PID && type != PIDTYPE_TGID)
3617 		return -EINVAL;
3618 
3619 	return bpf_send_signal_common(sig, type, task, value);
3620 }
3621 
3622 __bpf_kfunc int bpf_probe_read_user_dynptr(const struct bpf_dynptr *dptr, u64 off,
3623 					   u64 size, const void __user *unsafe_ptr__ign)
3624 {
3625 	return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3626 				 copy_user_data_nofault, NULL);
3627 }
3628 
3629 __bpf_kfunc int bpf_probe_read_kernel_dynptr(const struct bpf_dynptr *dptr, u64 off,
3630 					     u64 size, const void *unsafe_ptr__ign)
3631 {
3632 	return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign,
3633 				 copy_kernel_data_nofault, NULL);
3634 }
3635 
3636 __bpf_kfunc int bpf_probe_read_user_str_dynptr(const struct bpf_dynptr *dptr, u64 off,
3637 					       u64 size, const void __user *unsafe_ptr__ign)
3638 {
3639 	return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3640 				     copy_user_str_nofault, NULL);
3641 }
3642 
3643 __bpf_kfunc int bpf_probe_read_kernel_str_dynptr(const struct bpf_dynptr *dptr, u64 off,
3644 						 u64 size, const void *unsafe_ptr__ign)
3645 {
3646 	return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign,
3647 				     copy_kernel_str_nofault, NULL);
3648 }
3649 
3650 __bpf_kfunc int bpf_copy_from_user_dynptr(const struct bpf_dynptr *dptr, u64 off,
3651 					  u64 size, const void __user *unsafe_ptr__ign)
3652 {
3653 	return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3654 				 copy_user_data_sleepable, NULL);
3655 }
3656 
3657 __bpf_kfunc int bpf_copy_from_user_str_dynptr(const struct bpf_dynptr *dptr, u64 off,
3658 					      u64 size, const void __user *unsafe_ptr__ign)
3659 {
3660 	return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3661 				     copy_user_str_sleepable, NULL);
3662 }
3663 
3664 __bpf_kfunc int bpf_copy_from_user_task_dynptr(const struct bpf_dynptr *dptr, u64 off,
3665 					       u64 size, const void __user *unsafe_ptr__ign,
3666 					       struct task_struct *tsk)
3667 {
3668 	return __bpf_dynptr_copy(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3669 				 copy_user_data_sleepable, tsk);
3670 }
3671 
3672 __bpf_kfunc int bpf_copy_from_user_task_str_dynptr(const struct bpf_dynptr *dptr, u64 off,
3673 						   u64 size, const void __user *unsafe_ptr__ign,
3674 						   struct task_struct *tsk)
3675 {
3676 	return __bpf_dynptr_copy_str(dptr, off, size, (const void __force *)unsafe_ptr__ign,
3677 				     copy_user_str_sleepable, tsk);
3678 }
3679 
3680 __bpf_kfunc_end_defs();
3681 
3682 #if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) && \
3683     defined(CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS)
3684 
3685 static void bpf_tracing_multi_link_release(struct bpf_link *link)
3686 {
3687 	struct bpf_tracing_multi_link *tr_link =
3688 		container_of(link, struct bpf_tracing_multi_link, link);
3689 
3690 	WARN_ON_ONCE(bpf_trampoline_multi_detach(link->prog, tr_link));
3691 }
3692 
3693 static void bpf_tracing_multi_link_dealloc(struct bpf_link *link)
3694 {
3695 	struct bpf_tracing_multi_link *tr_link =
3696 		container_of(link, struct bpf_tracing_multi_link, link);
3697 
3698 	kvfree(tr_link->fexits);
3699 	kvfree(tr_link->cookies);
3700 	kvfree(tr_link);
3701 }
3702 
3703 #ifdef CONFIG_PROC_FS
3704 static void bpf_tracing_multi_show_fdinfo(const struct bpf_link *link,
3705 					  struct seq_file *seq)
3706 {
3707 	struct bpf_tracing_multi_link *tr_link =
3708 		container_of(link, struct bpf_tracing_multi_link, link);
3709 	bool has_cookies = !!tr_link->cookies;
3710 
3711 	seq_printf(seq, "attach_type:\t%u\n", tr_link->link.attach_type);
3712 	seq_printf(seq, "cnt:\t%u\n", tr_link->nodes_cnt);
3713 
3714 	seq_printf(seq, "%s\t %s\t %s\t %s\n", "obj-id", "btf-id", "cookie", "func");
3715 	for (int i = 0; i < tr_link->nodes_cnt; i++) {
3716 		struct bpf_tracing_multi_node *mnode = &tr_link->nodes[i];
3717 		u32 btf_id, obj_id;
3718 
3719 		bpf_trampoline_unpack_key(mnode->trampoline->key, &obj_id, &btf_id);
3720 		seq_printf(seq, "%u\t %u\t %llu\t %pS\n",
3721 			   obj_id, btf_id,
3722 			   has_cookies ? tr_link->cookies[i] : 0,
3723 			   (void *) mnode->trampoline->ip);
3724 
3725 		cond_resched();
3726 	}
3727 }
3728 #endif
3729 
3730 static const struct bpf_link_ops bpf_tracing_multi_link_lops = {
3731 	.release = bpf_tracing_multi_link_release,
3732 	.dealloc_deferred = bpf_tracing_multi_link_dealloc,
3733 #ifdef CONFIG_PROC_FS
3734 	.show_fdinfo = bpf_tracing_multi_show_fdinfo,
3735 #endif
3736 };
3737 
3738 static int ids_cmp_r(const void *pa, const void *pb, const void *priv __maybe_unused)
3739 {
3740 	u32 a = *(u32 *) pa;
3741 	u32 b = *(u32 *) pb;
3742 
3743 	return (a > b) - (a < b);
3744 }
3745 
3746 static void ids_swap_r(void *a, void *b, int size __maybe_unused,
3747 		       const void *priv __maybe_unused)
3748 {
3749 	u64 *cookie_a, *cookie_b, *cookies;
3750 	u32 *id_a = a, *id_b = b, *ids;
3751 	void **data = (void **) priv;
3752 
3753 	ids     = data[0];
3754 	cookies = data[1];
3755 
3756 	if (cookies) {
3757 		cookie_a = cookies + (id_a - ids);
3758 		cookie_b = cookies + (id_b - ids);
3759 		swap(*cookie_a, *cookie_b);
3760 	}
3761 	swap(*id_a, *id_b);
3762 }
3763 
3764 static int check_dup_ids(u32 *ids, u64 *cookies, u32 cnt)
3765 {
3766 	void *data[2] = { ids, cookies };
3767 	int err = 0;
3768 
3769 	/*
3770 	 * Sort ids array (together with cookies array if defined)
3771 	 * and check it for duplicates. The ids and cookies arrays
3772 	 * are left sorted.
3773 	 */
3774 	sort_r_nonatomic(ids, cnt, sizeof(ids[0]), ids_cmp_r, ids_swap_r, data);
3775 
3776 	for (int i = 1; i < cnt; i++) {
3777 		if (ids[i] == ids[i - 1]) {
3778 			err = -EINVAL;
3779 			break;
3780 		}
3781 	}
3782 	return err;
3783 }
3784 
3785 int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
3786 {
3787 	struct bpf_tracing_multi_link *link = NULL;
3788 	struct bpf_tramp_node *fexits = NULL;
3789 	struct bpf_link_primer link_primer;
3790 	u32 cnt, *ids = NULL;
3791 	u64 __user *ucookies;
3792 	u64 *cookies = NULL;
3793 	u32 __user *uids;
3794 	int err;
3795 
3796 	uids = u64_to_user_ptr(attr->link_create.tracing_multi.ids);
3797 	cnt = attr->link_create.tracing_multi.cnt;
3798 
3799 	if (!cnt || !uids)
3800 		return -EINVAL;
3801 	if (cnt > MAX_TRACING_MULTI_CNT)
3802 		return -E2BIG;
3803 	if (attr->link_create.flags || attr->link_create.target_fd)
3804 		return -EINVAL;
3805 
3806 	ids = kvmalloc_objs(*ids, cnt);
3807 	if (!ids)
3808 		return -ENOMEM;
3809 
3810 	if (copy_from_user(ids, uids, cnt * sizeof(*ids))) {
3811 		err = -EFAULT;
3812 		goto error;
3813 	}
3814 
3815 	ucookies = u64_to_user_ptr(attr->link_create.tracing_multi.cookies);
3816 	if (ucookies) {
3817 		cookies = kvmalloc_objs(*cookies, cnt);
3818 		if (!cookies) {
3819 			err = -ENOMEM;
3820 			goto error;
3821 		}
3822 		if (copy_from_user(cookies, ucookies, cnt * sizeof(*cookies))) {
3823 			err = -EFAULT;
3824 			goto error;
3825 		}
3826 	}
3827 
3828 	err = check_dup_ids(ids, cookies, cnt);
3829 	if (err)
3830 		goto error;
3831 
3832 	if (prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) {
3833 		fexits = kvmalloc_objs(*fexits, cnt);
3834 		if (!fexits) {
3835 			err = -ENOMEM;
3836 			goto error;
3837 		}
3838 	}
3839 
3840 	link = kvzalloc_flex(*link, nodes, cnt);
3841 	if (!link) {
3842 		err = -ENOMEM;
3843 		goto error;
3844 	}
3845 
3846 	bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING_MULTI,
3847 		      &bpf_tracing_multi_link_lops, prog, prog->expected_attach_type);
3848 
3849 	err = bpf_link_prime(&link->link, &link_primer);
3850 	if (err)
3851 		goto error;
3852 
3853 	link->nodes_cnt = cnt;
3854 	link->cookies = cookies;
3855 	link->fexits = fexits;
3856 
3857 	err = bpf_trampoline_multi_attach(prog, ids, link);
3858 	kvfree(ids);
3859 	if (err) {
3860 		bpf_link_cleanup(&link_primer);
3861 		return err;
3862 	}
3863 	return bpf_link_settle(&link_primer);
3864 
3865 error:
3866 	kvfree(fexits);
3867 	kvfree(cookies);
3868 	kvfree(ids);
3869 	kvfree(link);
3870 	return err;
3871 }
3872 
3873 #else
3874 
3875 int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
3876 {
3877 	return -EOPNOTSUPP;
3878 }
3879 
3880 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS && CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
3881