xref: /linux/kernel/bpf/helpers.c (revision 40863f4d6ef2c34bb00dd1070dfaf9d5f27a497e)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  */
4 #include <linux/bpf.h>
5 #include <linux/btf.h>
6 #include <linux/bpf-cgroup.h>
7 #include <linux/cgroup.h>
8 #include <linux/rcupdate.h>
9 #include <linux/random.h>
10 #include <linux/smp.h>
11 #include <linux/topology.h>
12 #include <linux/ktime.h>
13 #include <linux/sched.h>
14 #include <linux/uidgid.h>
15 #include <linux/filter.h>
16 #include <linux/ctype.h>
17 #include <linux/jiffies.h>
18 #include <linux/pid_namespace.h>
19 #include <linux/poison.h>
20 #include <linux/proc_ns.h>
21 #include <linux/sched/task.h>
22 #include <linux/security.h>
23 #include <linux/btf_ids.h>
24 #include <linux/bpf_mem_alloc.h>
25 #include <linux/kasan.h>
26 #include <linux/bpf_verifier.h>
27 #include <linux/uaccess.h>
28 #include <linux/verification.h>
29 
30 #include "../../lib/kstrtox.h"
31 
32 /* If kernel subsystem is allowing eBPF programs to call this function,
33  * inside its own verifier_ops->get_func_proto() callback it should return
34  * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
35  *
36  * Different map implementations will rely on rcu in map methods
37  * lookup/update/delete, therefore eBPF programs must run under rcu lock
38  * if program is allowed to access maps, so check rcu_read_lock_held() or
39  * rcu_read_lock_trace_held() in all three functions.
40  */
41 BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
42 {
43 	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
44 		     !rcu_read_lock_bh_held());
45 	return (unsigned long) map->ops->map_lookup_elem(map, key);
46 }
47 
48 const struct bpf_func_proto bpf_map_lookup_elem_proto = {
49 	.func		= bpf_map_lookup_elem,
50 	.gpl_only	= false,
51 	.pkt_access	= true,
52 	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
53 	.arg1_type	= ARG_CONST_MAP_PTR,
54 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
55 };
56 
57 BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
58 	   void *, value, u64, flags)
59 {
60 	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
61 		     !rcu_read_lock_bh_held());
62 	return map->ops->map_update_elem(map, key, value, flags);
63 }
64 
65 const struct bpf_func_proto bpf_map_update_elem_proto = {
66 	.func		= bpf_map_update_elem,
67 	.gpl_only	= false,
68 	.pkt_access	= true,
69 	.ret_type	= RET_INTEGER,
70 	.arg1_type	= ARG_CONST_MAP_PTR,
71 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
72 	.arg3_type	= ARG_PTR_TO_MAP_VALUE,
73 	.arg4_type	= ARG_ANYTHING,
74 };
75 
76 BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
77 {
78 	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
79 		     !rcu_read_lock_bh_held());
80 	return map->ops->map_delete_elem(map, key);
81 }
82 
83 const struct bpf_func_proto bpf_map_delete_elem_proto = {
84 	.func		= bpf_map_delete_elem,
85 	.gpl_only	= false,
86 	.pkt_access	= true,
87 	.ret_type	= RET_INTEGER,
88 	.arg1_type	= ARG_CONST_MAP_PTR,
89 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
90 };
91 
92 BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
93 {
94 	return map->ops->map_push_elem(map, value, flags);
95 }
96 
97 const struct bpf_func_proto bpf_map_push_elem_proto = {
98 	.func		= bpf_map_push_elem,
99 	.gpl_only	= false,
100 	.pkt_access	= true,
101 	.ret_type	= RET_INTEGER,
102 	.arg1_type	= ARG_CONST_MAP_PTR,
103 	.arg2_type	= ARG_PTR_TO_MAP_VALUE,
104 	.arg3_type	= ARG_ANYTHING,
105 };
106 
107 BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
108 {
109 	return map->ops->map_pop_elem(map, value);
110 }
111 
112 const struct bpf_func_proto bpf_map_pop_elem_proto = {
113 	.func		= bpf_map_pop_elem,
114 	.gpl_only	= false,
115 	.ret_type	= RET_INTEGER,
116 	.arg1_type	= ARG_CONST_MAP_PTR,
117 	.arg2_type	= ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE,
118 };
119 
120 BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
121 {
122 	return map->ops->map_peek_elem(map, value);
123 }
124 
125 const struct bpf_func_proto bpf_map_peek_elem_proto = {
126 	.func		= bpf_map_peek_elem,
127 	.gpl_only	= false,
128 	.ret_type	= RET_INTEGER,
129 	.arg1_type	= ARG_CONST_MAP_PTR,
130 	.arg2_type	= ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE,
131 };
132 
133 BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu)
134 {
135 	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
136 		     !rcu_read_lock_bh_held());
137 	return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu);
138 }
139 
140 const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = {
141 	.func		= bpf_map_lookup_percpu_elem,
142 	.gpl_only	= false,
143 	.pkt_access	= true,
144 	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
145 	.arg1_type	= ARG_CONST_MAP_PTR,
146 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
147 	.arg3_type	= ARG_ANYTHING,
148 };
149 
150 const struct bpf_func_proto bpf_get_prandom_u32_proto = {
151 	.func		= bpf_user_rnd_u32,
152 	.gpl_only	= false,
153 	.ret_type	= RET_INTEGER,
154 };
155 
156 BPF_CALL_0(bpf_get_smp_processor_id)
157 {
158 	return smp_processor_id();
159 }
160 
161 const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
162 	.func		= bpf_get_smp_processor_id,
163 	.gpl_only	= false,
164 	.ret_type	= RET_INTEGER,
165 	.allow_fastcall	= true,
166 };
167 
168 BPF_CALL_0(bpf_get_numa_node_id)
169 {
170 	return numa_node_id();
171 }
172 
173 const struct bpf_func_proto bpf_get_numa_node_id_proto = {
174 	.func		= bpf_get_numa_node_id,
175 	.gpl_only	= false,
176 	.ret_type	= RET_INTEGER,
177 };
178 
179 BPF_CALL_0(bpf_ktime_get_ns)
180 {
181 	/* NMI safe access to clock monotonic */
182 	return ktime_get_mono_fast_ns();
183 }
184 
185 const struct bpf_func_proto bpf_ktime_get_ns_proto = {
186 	.func		= bpf_ktime_get_ns,
187 	.gpl_only	= false,
188 	.ret_type	= RET_INTEGER,
189 };
190 
191 BPF_CALL_0(bpf_ktime_get_boot_ns)
192 {
193 	/* NMI safe access to clock boottime */
194 	return ktime_get_boot_fast_ns();
195 }
196 
197 const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
198 	.func		= bpf_ktime_get_boot_ns,
199 	.gpl_only	= false,
200 	.ret_type	= RET_INTEGER,
201 };
202 
203 BPF_CALL_0(bpf_ktime_get_coarse_ns)
204 {
205 	return ktime_get_coarse_ns();
206 }
207 
208 const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = {
209 	.func		= bpf_ktime_get_coarse_ns,
210 	.gpl_only	= false,
211 	.ret_type	= RET_INTEGER,
212 };
213 
214 BPF_CALL_0(bpf_ktime_get_tai_ns)
215 {
216 	/* NMI safe access to clock tai */
217 	return ktime_get_tai_fast_ns();
218 }
219 
220 const struct bpf_func_proto bpf_ktime_get_tai_ns_proto = {
221 	.func		= bpf_ktime_get_tai_ns,
222 	.gpl_only	= false,
223 	.ret_type	= RET_INTEGER,
224 };
225 
226 BPF_CALL_0(bpf_get_current_pid_tgid)
227 {
228 	struct task_struct *task = current;
229 
230 	if (unlikely(!task))
231 		return -EINVAL;
232 
233 	return (u64) task->tgid << 32 | task->pid;
234 }
235 
236 const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
237 	.func		= bpf_get_current_pid_tgid,
238 	.gpl_only	= false,
239 	.ret_type	= RET_INTEGER,
240 };
241 
242 BPF_CALL_0(bpf_get_current_uid_gid)
243 {
244 	struct task_struct *task = current;
245 	kuid_t uid;
246 	kgid_t gid;
247 
248 	if (unlikely(!task))
249 		return -EINVAL;
250 
251 	current_uid_gid(&uid, &gid);
252 	return (u64) from_kgid(&init_user_ns, gid) << 32 |
253 		     from_kuid(&init_user_ns, uid);
254 }
255 
256 const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
257 	.func		= bpf_get_current_uid_gid,
258 	.gpl_only	= false,
259 	.ret_type	= RET_INTEGER,
260 };
261 
262 BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
263 {
264 	struct task_struct *task = current;
265 
266 	if (unlikely(!task))
267 		goto err_clear;
268 
269 	/* Verifier guarantees that size > 0 */
270 	strscpy_pad(buf, task->comm, size);
271 	return 0;
272 err_clear:
273 	memset(buf, 0, size);
274 	return -EINVAL;
275 }
276 
277 const struct bpf_func_proto bpf_get_current_comm_proto = {
278 	.func		= bpf_get_current_comm,
279 	.gpl_only	= false,
280 	.ret_type	= RET_INTEGER,
281 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
282 	.arg2_type	= ARG_CONST_SIZE,
283 };
284 
285 #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
286 
287 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
288 {
289 	arch_spinlock_t *l = (void *)lock;
290 	union {
291 		__u32 val;
292 		arch_spinlock_t lock;
293 	} u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
294 
295 	compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
296 	BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
297 	BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
298 	preempt_disable();
299 	arch_spin_lock(l);
300 }
301 
302 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
303 {
304 	arch_spinlock_t *l = (void *)lock;
305 
306 	arch_spin_unlock(l);
307 	preempt_enable();
308 }
309 
310 #else
311 
312 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
313 {
314 	atomic_t *l = (void *)lock;
315 
316 	BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
317 	do {
318 		atomic_cond_read_relaxed(l, !VAL);
319 	} while (atomic_xchg(l, 1));
320 }
321 
322 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
323 {
324 	atomic_t *l = (void *)lock;
325 
326 	atomic_set_release(l, 0);
327 }
328 
329 #endif
330 
331 static DEFINE_PER_CPU(unsigned long, irqsave_flags);
332 
333 static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
334 {
335 	unsigned long flags;
336 
337 	local_irq_save(flags);
338 	__bpf_spin_lock(lock);
339 	__this_cpu_write(irqsave_flags, flags);
340 }
341 
342 NOTRACE_BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
343 {
344 	__bpf_spin_lock_irqsave(lock);
345 	return 0;
346 }
347 
348 const struct bpf_func_proto bpf_spin_lock_proto = {
349 	.func		= bpf_spin_lock,
350 	.gpl_only	= false,
351 	.ret_type	= RET_VOID,
352 	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
353 	.arg1_btf_id    = BPF_PTR_POISON,
354 };
355 
356 static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
357 {
358 	unsigned long flags;
359 
360 	flags = __this_cpu_read(irqsave_flags);
361 	__bpf_spin_unlock(lock);
362 	local_irq_restore(flags);
363 }
364 
365 NOTRACE_BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
366 {
367 	__bpf_spin_unlock_irqrestore(lock);
368 	return 0;
369 }
370 
371 const struct bpf_func_proto bpf_spin_unlock_proto = {
372 	.func		= bpf_spin_unlock,
373 	.gpl_only	= false,
374 	.ret_type	= RET_VOID,
375 	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
376 	.arg1_btf_id    = BPF_PTR_POISON,
377 };
378 
379 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
380 			   bool lock_src)
381 {
382 	struct bpf_spin_lock *lock;
383 
384 	if (lock_src)
385 		lock = src + map->record->spin_lock_off;
386 	else
387 		lock = dst + map->record->spin_lock_off;
388 	preempt_disable();
389 	__bpf_spin_lock_irqsave(lock);
390 	copy_map_value(map, dst, src);
391 	__bpf_spin_unlock_irqrestore(lock);
392 	preempt_enable();
393 }
394 
395 BPF_CALL_0(bpf_jiffies64)
396 {
397 	return get_jiffies_64();
398 }
399 
400 const struct bpf_func_proto bpf_jiffies64_proto = {
401 	.func		= bpf_jiffies64,
402 	.gpl_only	= false,
403 	.ret_type	= RET_INTEGER,
404 };
405 
406 #ifdef CONFIG_CGROUPS
407 BPF_CALL_0(bpf_get_current_cgroup_id)
408 {
409 	struct cgroup *cgrp;
410 	u64 cgrp_id;
411 
412 	rcu_read_lock();
413 	cgrp = task_dfl_cgroup(current);
414 	cgrp_id = cgroup_id(cgrp);
415 	rcu_read_unlock();
416 
417 	return cgrp_id;
418 }
419 
420 const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
421 	.func		= bpf_get_current_cgroup_id,
422 	.gpl_only	= false,
423 	.ret_type	= RET_INTEGER,
424 };
425 
426 BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level)
427 {
428 	struct cgroup *cgrp;
429 	struct cgroup *ancestor;
430 	u64 cgrp_id;
431 
432 	rcu_read_lock();
433 	cgrp = task_dfl_cgroup(current);
434 	ancestor = cgroup_ancestor(cgrp, ancestor_level);
435 	cgrp_id = ancestor ? cgroup_id(ancestor) : 0;
436 	rcu_read_unlock();
437 
438 	return cgrp_id;
439 }
440 
441 const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
442 	.func		= bpf_get_current_ancestor_cgroup_id,
443 	.gpl_only	= false,
444 	.ret_type	= RET_INTEGER,
445 	.arg1_type	= ARG_ANYTHING,
446 };
447 #endif /* CONFIG_CGROUPS */
448 
449 #define BPF_STRTOX_BASE_MASK 0x1F
450 
451 static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
452 			  unsigned long long *res, bool *is_negative)
453 {
454 	unsigned int base = flags & BPF_STRTOX_BASE_MASK;
455 	const char *cur_buf = buf;
456 	size_t cur_len = buf_len;
457 	unsigned int consumed;
458 	size_t val_len;
459 	char str[64];
460 
461 	if (!buf || !buf_len || !res || !is_negative)
462 		return -EINVAL;
463 
464 	if (base != 0 && base != 8 && base != 10 && base != 16)
465 		return -EINVAL;
466 
467 	if (flags & ~BPF_STRTOX_BASE_MASK)
468 		return -EINVAL;
469 
470 	while (cur_buf < buf + buf_len && isspace(*cur_buf))
471 		++cur_buf;
472 
473 	*is_negative = (cur_buf < buf + buf_len && *cur_buf == '-');
474 	if (*is_negative)
475 		++cur_buf;
476 
477 	consumed = cur_buf - buf;
478 	cur_len -= consumed;
479 	if (!cur_len)
480 		return -EINVAL;
481 
482 	cur_len = min(cur_len, sizeof(str) - 1);
483 	memcpy(str, cur_buf, cur_len);
484 	str[cur_len] = '\0';
485 	cur_buf = str;
486 
487 	cur_buf = _parse_integer_fixup_radix(cur_buf, &base);
488 	val_len = _parse_integer(cur_buf, base, res);
489 
490 	if (val_len & KSTRTOX_OVERFLOW)
491 		return -ERANGE;
492 
493 	if (val_len == 0)
494 		return -EINVAL;
495 
496 	cur_buf += val_len;
497 	consumed += cur_buf - str;
498 
499 	return consumed;
500 }
501 
502 static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
503 			 long long *res)
504 {
505 	unsigned long long _res;
506 	bool is_negative;
507 	int err;
508 
509 	err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
510 	if (err < 0)
511 		return err;
512 	if (is_negative) {
513 		if ((long long)-_res > 0)
514 			return -ERANGE;
515 		*res = -_res;
516 	} else {
517 		if ((long long)_res < 0)
518 			return -ERANGE;
519 		*res = _res;
520 	}
521 	return err;
522 }
523 
524 BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
525 	   s64 *, res)
526 {
527 	long long _res;
528 	int err;
529 
530 	*res = 0;
531 	err = __bpf_strtoll(buf, buf_len, flags, &_res);
532 	if (err < 0)
533 		return err;
534 	*res = _res;
535 	return err;
536 }
537 
538 const struct bpf_func_proto bpf_strtol_proto = {
539 	.func		= bpf_strtol,
540 	.gpl_only	= false,
541 	.ret_type	= RET_INTEGER,
542 	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
543 	.arg2_type	= ARG_CONST_SIZE,
544 	.arg3_type	= ARG_ANYTHING,
545 	.arg4_type	= ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
546 	.arg4_size	= sizeof(s64),
547 };
548 
549 BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
550 	   u64 *, res)
551 {
552 	unsigned long long _res;
553 	bool is_negative;
554 	int err;
555 
556 	*res = 0;
557 	err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
558 	if (err < 0)
559 		return err;
560 	if (is_negative)
561 		return -EINVAL;
562 	*res = _res;
563 	return err;
564 }
565 
566 const struct bpf_func_proto bpf_strtoul_proto = {
567 	.func		= bpf_strtoul,
568 	.gpl_only	= false,
569 	.ret_type	= RET_INTEGER,
570 	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
571 	.arg2_type	= ARG_CONST_SIZE,
572 	.arg3_type	= ARG_ANYTHING,
573 	.arg4_type	= ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
574 	.arg4_size	= sizeof(u64),
575 };
576 
577 BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2)
578 {
579 	return strncmp(s1, s2, s1_sz);
580 }
581 
582 static const struct bpf_func_proto bpf_strncmp_proto = {
583 	.func		= bpf_strncmp,
584 	.gpl_only	= false,
585 	.ret_type	= RET_INTEGER,
586 	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
587 	.arg2_type	= ARG_CONST_SIZE,
588 	.arg3_type	= ARG_PTR_TO_CONST_STR,
589 };
590 
591 BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
592 	   struct bpf_pidns_info *, nsdata, u32, size)
593 {
594 	struct task_struct *task = current;
595 	struct pid_namespace *pidns;
596 	int err = -EINVAL;
597 
598 	if (unlikely(size != sizeof(struct bpf_pidns_info)))
599 		goto clear;
600 
601 	if (unlikely((u64)(dev_t)dev != dev))
602 		goto clear;
603 
604 	if (unlikely(!task))
605 		goto clear;
606 
607 	pidns = task_active_pid_ns(task);
608 	if (unlikely(!pidns)) {
609 		err = -ENOENT;
610 		goto clear;
611 	}
612 
613 	if (!ns_match(&pidns->ns, (dev_t)dev, ino))
614 		goto clear;
615 
616 	nsdata->pid = task_pid_nr_ns(task, pidns);
617 	nsdata->tgid = task_tgid_nr_ns(task, pidns);
618 	return 0;
619 clear:
620 	memset((void *)nsdata, 0, (size_t) size);
621 	return err;
622 }
623 
624 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
625 	.func		= bpf_get_ns_current_pid_tgid,
626 	.gpl_only	= false,
627 	.ret_type	= RET_INTEGER,
628 	.arg1_type	= ARG_ANYTHING,
629 	.arg2_type	= ARG_ANYTHING,
630 	.arg3_type      = ARG_PTR_TO_UNINIT_MEM,
631 	.arg4_type      = ARG_CONST_SIZE,
632 };
633 
634 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
635 	.func		= bpf_get_raw_cpu_id,
636 	.gpl_only	= false,
637 	.ret_type	= RET_INTEGER,
638 };
639 
640 BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map,
641 	   u64, flags, void *, data, u64, size)
642 {
643 	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
644 		return -EINVAL;
645 
646 	return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
647 }
648 
649 const struct bpf_func_proto bpf_event_output_data_proto =  {
650 	.func		= bpf_event_output_data,
651 	.gpl_only       = true,
652 	.ret_type       = RET_INTEGER,
653 	.arg1_type      = ARG_PTR_TO_CTX,
654 	.arg2_type      = ARG_CONST_MAP_PTR,
655 	.arg3_type      = ARG_ANYTHING,
656 	.arg4_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
657 	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
658 };
659 
660 BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
661 	   const void __user *, user_ptr)
662 {
663 	int ret = copy_from_user(dst, user_ptr, size);
664 
665 	if (unlikely(ret)) {
666 		memset(dst, 0, size);
667 		ret = -EFAULT;
668 	}
669 
670 	return ret;
671 }
672 
673 const struct bpf_func_proto bpf_copy_from_user_proto = {
674 	.func		= bpf_copy_from_user,
675 	.gpl_only	= false,
676 	.might_sleep	= true,
677 	.ret_type	= RET_INTEGER,
678 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
679 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
680 	.arg3_type	= ARG_ANYTHING,
681 };
682 
683 BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
684 	   const void __user *, user_ptr, struct task_struct *, tsk, u64, flags)
685 {
686 	int ret;
687 
688 	/* flags is not used yet */
689 	if (unlikely(flags))
690 		return -EINVAL;
691 
692 	if (unlikely(!size))
693 		return 0;
694 
695 	ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
696 	if (ret == size)
697 		return 0;
698 
699 	memset(dst, 0, size);
700 	/* Return -EFAULT for partial read */
701 	return ret < 0 ? ret : -EFAULT;
702 }
703 
704 const struct bpf_func_proto bpf_copy_from_user_task_proto = {
705 	.func		= bpf_copy_from_user_task,
706 	.gpl_only	= true,
707 	.might_sleep	= true,
708 	.ret_type	= RET_INTEGER,
709 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
710 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
711 	.arg3_type	= ARG_ANYTHING,
712 	.arg4_type	= ARG_PTR_TO_BTF_ID,
713 	.arg4_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
714 	.arg5_type	= ARG_ANYTHING
715 };
716 
717 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
718 {
719 	if (cpu >= nr_cpu_ids)
720 		return (unsigned long)NULL;
721 
722 	return (unsigned long)per_cpu_ptr((const void __percpu *)(const uintptr_t)ptr, cpu);
723 }
724 
725 const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
726 	.func		= bpf_per_cpu_ptr,
727 	.gpl_only	= false,
728 	.ret_type	= RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY,
729 	.arg1_type	= ARG_PTR_TO_PERCPU_BTF_ID,
730 	.arg2_type	= ARG_ANYTHING,
731 };
732 
733 BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
734 {
735 	return (unsigned long)this_cpu_ptr((const void __percpu *)(const uintptr_t)percpu_ptr);
736 }
737 
738 const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
739 	.func		= bpf_this_cpu_ptr,
740 	.gpl_only	= false,
741 	.ret_type	= RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY,
742 	.arg1_type	= ARG_PTR_TO_PERCPU_BTF_ID,
743 };
744 
745 static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
746 		size_t bufsz)
747 {
748 	void __user *user_ptr = (__force void __user *)unsafe_ptr;
749 
750 	buf[0] = 0;
751 
752 	switch (fmt_ptype) {
753 	case 's':
754 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
755 		if ((unsigned long)unsafe_ptr < TASK_SIZE)
756 			return strncpy_from_user_nofault(buf, user_ptr, bufsz);
757 		fallthrough;
758 #endif
759 	case 'k':
760 		return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
761 	case 'u':
762 		return strncpy_from_user_nofault(buf, user_ptr, bufsz);
763 	}
764 
765 	return -EINVAL;
766 }
767 
768 /* Support executing three nested bprintf helper calls on a given CPU */
769 #define MAX_BPRINTF_NEST_LEVEL	3
770 
771 static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs);
772 static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
773 
774 int bpf_try_get_buffers(struct bpf_bprintf_buffers **bufs)
775 {
776 	int nest_level;
777 
778 	nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
779 	if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
780 		this_cpu_dec(bpf_bprintf_nest_level);
781 		return -EBUSY;
782 	}
783 	*bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]);
784 
785 	return 0;
786 }
787 
788 void bpf_put_buffers(void)
789 {
790 	if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0))
791 		return;
792 	this_cpu_dec(bpf_bprintf_nest_level);
793 }
794 
795 void bpf_bprintf_cleanup(struct bpf_bprintf_data *data)
796 {
797 	if (!data->bin_args && !data->buf)
798 		return;
799 	bpf_put_buffers();
800 }
801 
802 /*
803  * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers
804  *
805  * Returns a negative value if fmt is an invalid format string or 0 otherwise.
806  *
807  * This can be used in two ways:
808  * - Format string verification only: when data->get_bin_args is false
809  * - Arguments preparation: in addition to the above verification, it writes in
810  *   data->bin_args a binary representation of arguments usable by bstr_printf
811  *   where pointers from BPF have been sanitized.
812  *
813  * In argument preparation mode, if 0 is returned, safe temporary buffers are
814  * allocated and bpf_bprintf_cleanup should be called to free them after use.
815  */
816 int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args,
817 			u32 num_args, struct bpf_bprintf_data *data)
818 {
819 	bool get_buffers = (data->get_bin_args && num_args) || data->get_buf;
820 	char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
821 	struct bpf_bprintf_buffers *buffers = NULL;
822 	size_t sizeof_cur_arg, sizeof_cur_ip;
823 	int err, i, num_spec = 0;
824 	u64 cur_arg;
825 	char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX";
826 
827 	fmt_end = strnchr(fmt, fmt_size, 0);
828 	if (!fmt_end)
829 		return -EINVAL;
830 	fmt_size = fmt_end - fmt;
831 
832 	if (get_buffers && bpf_try_get_buffers(&buffers))
833 		return -EBUSY;
834 
835 	if (data->get_bin_args) {
836 		if (num_args)
837 			tmp_buf = buffers->bin_args;
838 		tmp_buf_end = tmp_buf + MAX_BPRINTF_BIN_ARGS;
839 		data->bin_args = (u32 *)tmp_buf;
840 	}
841 
842 	if (data->get_buf)
843 		data->buf = buffers->buf;
844 
845 	for (i = 0; i < fmt_size; i++) {
846 		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
847 			err = -EINVAL;
848 			goto out;
849 		}
850 
851 		if (fmt[i] != '%')
852 			continue;
853 
854 		if (fmt[i + 1] == '%') {
855 			i++;
856 			continue;
857 		}
858 
859 		if (num_spec >= num_args) {
860 			err = -EINVAL;
861 			goto out;
862 		}
863 
864 		/* The string is zero-terminated so if fmt[i] != 0, we can
865 		 * always access fmt[i + 1], in the worst case it will be a 0
866 		 */
867 		i++;
868 
869 		/* skip optional "[0 +-][num]" width formatting field */
870 		while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
871 		       fmt[i] == ' ')
872 			i++;
873 		if (fmt[i] >= '1' && fmt[i] <= '9') {
874 			i++;
875 			while (fmt[i] >= '0' && fmt[i] <= '9')
876 				i++;
877 		}
878 
879 		if (fmt[i] == 'p') {
880 			sizeof_cur_arg = sizeof(long);
881 
882 			if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
883 			    ispunct(fmt[i + 1])) {
884 				if (tmp_buf)
885 					cur_arg = raw_args[num_spec];
886 				goto nocopy_fmt;
887 			}
888 
889 			if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
890 			    fmt[i + 2] == 's') {
891 				fmt_ptype = fmt[i + 1];
892 				i += 2;
893 				goto fmt_str;
894 			}
895 
896 			if (fmt[i + 1] == 'K' ||
897 			    fmt[i + 1] == 'x' || fmt[i + 1] == 's' ||
898 			    fmt[i + 1] == 'S') {
899 				if (tmp_buf)
900 					cur_arg = raw_args[num_spec];
901 				i++;
902 				goto nocopy_fmt;
903 			}
904 
905 			if (fmt[i + 1] == 'B') {
906 				if (tmp_buf)  {
907 					err = snprintf(tmp_buf,
908 						       (tmp_buf_end - tmp_buf),
909 						       "%pB",
910 						       (void *)(long)raw_args[num_spec]);
911 					tmp_buf += (err + 1);
912 				}
913 
914 				i++;
915 				num_spec++;
916 				continue;
917 			}
918 
919 			/* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
920 			if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
921 			    (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
922 				err = -EINVAL;
923 				goto out;
924 			}
925 
926 			i += 2;
927 			if (!tmp_buf)
928 				goto nocopy_fmt;
929 
930 			sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16;
931 			if (tmp_buf_end - tmp_buf < sizeof_cur_ip) {
932 				err = -ENOSPC;
933 				goto out;
934 			}
935 
936 			unsafe_ptr = (char *)(long)raw_args[num_spec];
937 			err = copy_from_kernel_nofault(cur_ip, unsafe_ptr,
938 						       sizeof_cur_ip);
939 			if (err < 0)
940 				memset(cur_ip, 0, sizeof_cur_ip);
941 
942 			/* hack: bstr_printf expects IP addresses to be
943 			 * pre-formatted as strings, ironically, the easiest way
944 			 * to do that is to call snprintf.
945 			 */
946 			ip_spec[2] = fmt[i - 1];
947 			ip_spec[3] = fmt[i];
948 			err = snprintf(tmp_buf, tmp_buf_end - tmp_buf,
949 				       ip_spec, &cur_ip);
950 
951 			tmp_buf += err + 1;
952 			num_spec++;
953 
954 			continue;
955 		} else if (fmt[i] == 's') {
956 			fmt_ptype = fmt[i];
957 fmt_str:
958 			if (fmt[i + 1] != 0 &&
959 			    !isspace(fmt[i + 1]) &&
960 			    !ispunct(fmt[i + 1])) {
961 				err = -EINVAL;
962 				goto out;
963 			}
964 
965 			if (!tmp_buf)
966 				goto nocopy_fmt;
967 
968 			if (tmp_buf_end == tmp_buf) {
969 				err = -ENOSPC;
970 				goto out;
971 			}
972 
973 			unsafe_ptr = (char *)(long)raw_args[num_spec];
974 			err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
975 						    fmt_ptype,
976 						    tmp_buf_end - tmp_buf);
977 			if (err < 0) {
978 				tmp_buf[0] = '\0';
979 				err = 1;
980 			}
981 
982 			tmp_buf += err;
983 			num_spec++;
984 
985 			continue;
986 		} else if (fmt[i] == 'c') {
987 			if (!tmp_buf)
988 				goto nocopy_fmt;
989 
990 			if (tmp_buf_end == tmp_buf) {
991 				err = -ENOSPC;
992 				goto out;
993 			}
994 
995 			*tmp_buf = raw_args[num_spec];
996 			tmp_buf++;
997 			num_spec++;
998 
999 			continue;
1000 		}
1001 
1002 		sizeof_cur_arg = sizeof(int);
1003 
1004 		if (fmt[i] == 'l') {
1005 			sizeof_cur_arg = sizeof(long);
1006 			i++;
1007 		}
1008 		if (fmt[i] == 'l') {
1009 			sizeof_cur_arg = sizeof(long long);
1010 			i++;
1011 		}
1012 
1013 		if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
1014 		    fmt[i] != 'x' && fmt[i] != 'X') {
1015 			err = -EINVAL;
1016 			goto out;
1017 		}
1018 
1019 		if (tmp_buf)
1020 			cur_arg = raw_args[num_spec];
1021 nocopy_fmt:
1022 		if (tmp_buf) {
1023 			tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32));
1024 			if (tmp_buf_end - tmp_buf < sizeof_cur_arg) {
1025 				err = -ENOSPC;
1026 				goto out;
1027 			}
1028 
1029 			if (sizeof_cur_arg == 8) {
1030 				*(u32 *)tmp_buf = *(u32 *)&cur_arg;
1031 				*(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1);
1032 			} else {
1033 				*(u32 *)tmp_buf = (u32)(long)cur_arg;
1034 			}
1035 			tmp_buf += sizeof_cur_arg;
1036 		}
1037 		num_spec++;
1038 	}
1039 
1040 	err = 0;
1041 out:
1042 	if (err)
1043 		bpf_bprintf_cleanup(data);
1044 	return err;
1045 }
1046 
1047 BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
1048 	   const void *, args, u32, data_len)
1049 {
1050 	struct bpf_bprintf_data data = {
1051 		.get_bin_args	= true,
1052 	};
1053 	int err, num_args;
1054 
1055 	if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 ||
1056 	    (data_len && !args))
1057 		return -EINVAL;
1058 	num_args = data_len / 8;
1059 
1060 	/* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
1061 	 * can safely give an unbounded size.
1062 	 */
1063 	err = bpf_bprintf_prepare(fmt, UINT_MAX, args, num_args, &data);
1064 	if (err < 0)
1065 		return err;
1066 
1067 	err = bstr_printf(str, str_size, fmt, data.bin_args);
1068 
1069 	bpf_bprintf_cleanup(&data);
1070 
1071 	return err + 1;
1072 }
1073 
1074 const struct bpf_func_proto bpf_snprintf_proto = {
1075 	.func		= bpf_snprintf,
1076 	.gpl_only	= true,
1077 	.ret_type	= RET_INTEGER,
1078 	.arg1_type	= ARG_PTR_TO_MEM_OR_NULL,
1079 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
1080 	.arg3_type	= ARG_PTR_TO_CONST_STR,
1081 	.arg4_type	= ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
1082 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
1083 };
1084 
1085 struct bpf_async_cb {
1086 	struct bpf_map *map;
1087 	struct bpf_prog *prog;
1088 	void __rcu *callback_fn;
1089 	void *value;
1090 	union {
1091 		struct rcu_head rcu;
1092 		struct work_struct delete_work;
1093 	};
1094 	u64 flags;
1095 };
1096 
1097 /* BPF map elements can contain 'struct bpf_timer'.
1098  * Such map owns all of its BPF timers.
1099  * 'struct bpf_timer' is allocated as part of map element allocation
1100  * and it's zero initialized.
1101  * That space is used to keep 'struct bpf_async_kern'.
1102  * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and
1103  * remembers 'struct bpf_map *' pointer it's part of.
1104  * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn.
1105  * bpf_timer_start() arms the timer.
1106  * If user space reference to a map goes to zero at this point
1107  * ops->map_release_uref callback is responsible for cancelling the timers,
1108  * freeing their memory, and decrementing prog's refcnts.
1109  * bpf_timer_cancel() cancels the timer and decrements prog's refcnt.
1110  * Inner maps can contain bpf timers as well. ops->map_release_uref is
1111  * freeing the timers when inner map is replaced or deleted by user space.
1112  */
1113 struct bpf_hrtimer {
1114 	struct bpf_async_cb cb;
1115 	struct hrtimer timer;
1116 	atomic_t cancelling;
1117 };
1118 
1119 struct bpf_work {
1120 	struct bpf_async_cb cb;
1121 	struct work_struct work;
1122 	struct work_struct delete_work;
1123 };
1124 
1125 /* the actual struct hidden inside uapi struct bpf_timer and bpf_wq */
1126 struct bpf_async_kern {
1127 	union {
1128 		struct bpf_async_cb *cb;
1129 		struct bpf_hrtimer *timer;
1130 		struct bpf_work *work;
1131 	};
1132 	/* bpf_spin_lock is used here instead of spinlock_t to make
1133 	 * sure that it always fits into space reserved by struct bpf_timer
1134 	 * regardless of LOCKDEP and spinlock debug flags.
1135 	 */
1136 	struct bpf_spin_lock lock;
1137 } __attribute__((aligned(8)));
1138 
1139 enum bpf_async_type {
1140 	BPF_ASYNC_TYPE_TIMER = 0,
1141 	BPF_ASYNC_TYPE_WQ,
1142 };
1143 
1144 static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
1145 
1146 static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
1147 {
1148 	struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
1149 	struct bpf_map *map = t->cb.map;
1150 	void *value = t->cb.value;
1151 	bpf_callback_t callback_fn;
1152 	void *key;
1153 	u32 idx;
1154 
1155 	BTF_TYPE_EMIT(struct bpf_timer);
1156 	callback_fn = rcu_dereference_check(t->cb.callback_fn, rcu_read_lock_bh_held());
1157 	if (!callback_fn)
1158 		goto out;
1159 
1160 	/* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and
1161 	 * cannot be preempted by another bpf_timer_cb() on the same cpu.
1162 	 * Remember the timer this callback is servicing to prevent
1163 	 * deadlock if callback_fn() calls bpf_timer_cancel() or
1164 	 * bpf_map_delete_elem() on the same timer.
1165 	 */
1166 	this_cpu_write(hrtimer_running, t);
1167 	if (map->map_type == BPF_MAP_TYPE_ARRAY) {
1168 		struct bpf_array *array = container_of(map, struct bpf_array, map);
1169 
1170 		/* compute the key */
1171 		idx = ((char *)value - array->value) / array->elem_size;
1172 		key = &idx;
1173 	} else { /* hash or lru */
1174 		key = value - round_up(map->key_size, 8);
1175 	}
1176 
1177 	callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0);
1178 	/* The verifier checked that return value is zero. */
1179 
1180 	this_cpu_write(hrtimer_running, NULL);
1181 out:
1182 	return HRTIMER_NORESTART;
1183 }
1184 
1185 static void bpf_wq_work(struct work_struct *work)
1186 {
1187 	struct bpf_work *w = container_of(work, struct bpf_work, work);
1188 	struct bpf_async_cb *cb = &w->cb;
1189 	struct bpf_map *map = cb->map;
1190 	bpf_callback_t callback_fn;
1191 	void *value = cb->value;
1192 	void *key;
1193 	u32 idx;
1194 
1195 	BTF_TYPE_EMIT(struct bpf_wq);
1196 
1197 	callback_fn = READ_ONCE(cb->callback_fn);
1198 	if (!callback_fn)
1199 		return;
1200 
1201 	if (map->map_type == BPF_MAP_TYPE_ARRAY) {
1202 		struct bpf_array *array = container_of(map, struct bpf_array, map);
1203 
1204 		/* compute the key */
1205 		idx = ((char *)value - array->value) / array->elem_size;
1206 		key = &idx;
1207 	} else { /* hash or lru */
1208 		key = value - round_up(map->key_size, 8);
1209 	}
1210 
1211         rcu_read_lock_trace();
1212         migrate_disable();
1213 
1214 	callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0);
1215 
1216 	migrate_enable();
1217 	rcu_read_unlock_trace();
1218 }
1219 
1220 static void bpf_wq_delete_work(struct work_struct *work)
1221 {
1222 	struct bpf_work *w = container_of(work, struct bpf_work, delete_work);
1223 
1224 	cancel_work_sync(&w->work);
1225 
1226 	kfree_rcu(w, cb.rcu);
1227 }
1228 
1229 static void bpf_timer_delete_work(struct work_struct *work)
1230 {
1231 	struct bpf_hrtimer *t = container_of(work, struct bpf_hrtimer, cb.delete_work);
1232 
1233 	/* Cancel the timer and wait for callback to complete if it was running.
1234 	 * If hrtimer_cancel() can be safely called it's safe to call
1235 	 * kfree_rcu(t) right after for both preallocated and non-preallocated
1236 	 * maps.  The async->cb = NULL was already done and no code path can see
1237 	 * address 't' anymore. Timer if armed for existing bpf_hrtimer before
1238 	 * bpf_timer_cancel_and_free will have been cancelled.
1239 	 */
1240 	hrtimer_cancel(&t->timer);
1241 	kfree_rcu(t, cb.rcu);
1242 }
1243 
1244 static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags,
1245 			    enum bpf_async_type type)
1246 {
1247 	struct bpf_async_cb *cb;
1248 	struct bpf_hrtimer *t;
1249 	struct bpf_work *w;
1250 	clockid_t clockid;
1251 	size_t size;
1252 	int ret = 0;
1253 
1254 	if (in_nmi())
1255 		return -EOPNOTSUPP;
1256 
1257 	switch (type) {
1258 	case BPF_ASYNC_TYPE_TIMER:
1259 		size = sizeof(struct bpf_hrtimer);
1260 		break;
1261 	case BPF_ASYNC_TYPE_WQ:
1262 		size = sizeof(struct bpf_work);
1263 		break;
1264 	default:
1265 		return -EINVAL;
1266 	}
1267 
1268 	__bpf_spin_lock_irqsave(&async->lock);
1269 	t = async->timer;
1270 	if (t) {
1271 		ret = -EBUSY;
1272 		goto out;
1273 	}
1274 
1275 	/* Allocate via bpf_map_kmalloc_node() for memcg accounting. Until
1276 	 * kmalloc_nolock() is available, avoid locking issues by using
1277 	 * __GFP_HIGH (GFP_ATOMIC & ~__GFP_RECLAIM).
1278 	 */
1279 	cb = bpf_map_kmalloc_node(map, size, __GFP_HIGH, map->numa_node);
1280 	if (!cb) {
1281 		ret = -ENOMEM;
1282 		goto out;
1283 	}
1284 
1285 	switch (type) {
1286 	case BPF_ASYNC_TYPE_TIMER:
1287 		clockid = flags & (MAX_CLOCKS - 1);
1288 		t = (struct bpf_hrtimer *)cb;
1289 
1290 		atomic_set(&t->cancelling, 0);
1291 		INIT_WORK(&t->cb.delete_work, bpf_timer_delete_work);
1292 		hrtimer_setup(&t->timer, bpf_timer_cb, clockid, HRTIMER_MODE_REL_SOFT);
1293 		cb->value = (void *)async - map->record->timer_off;
1294 		break;
1295 	case BPF_ASYNC_TYPE_WQ:
1296 		w = (struct bpf_work *)cb;
1297 
1298 		INIT_WORK(&w->work, bpf_wq_work);
1299 		INIT_WORK(&w->delete_work, bpf_wq_delete_work);
1300 		cb->value = (void *)async - map->record->wq_off;
1301 		break;
1302 	}
1303 	cb->map = map;
1304 	cb->prog = NULL;
1305 	cb->flags = flags;
1306 	rcu_assign_pointer(cb->callback_fn, NULL);
1307 
1308 	WRITE_ONCE(async->cb, cb);
1309 	/* Guarantee the order between async->cb and map->usercnt. So
1310 	 * when there are concurrent uref release and bpf timer init, either
1311 	 * bpf_timer_cancel_and_free() called by uref release reads a no-NULL
1312 	 * timer or atomic64_read() below returns a zero usercnt.
1313 	 */
1314 	smp_mb();
1315 	if (!atomic64_read(&map->usercnt)) {
1316 		/* maps with timers must be either held by user space
1317 		 * or pinned in bpffs.
1318 		 */
1319 		WRITE_ONCE(async->cb, NULL);
1320 		kfree(cb);
1321 		ret = -EPERM;
1322 	}
1323 out:
1324 	__bpf_spin_unlock_irqrestore(&async->lock);
1325 	return ret;
1326 }
1327 
1328 BPF_CALL_3(bpf_timer_init, struct bpf_async_kern *, timer, struct bpf_map *, map,
1329 	   u64, flags)
1330 {
1331 	clock_t clockid = flags & (MAX_CLOCKS - 1);
1332 
1333 	BUILD_BUG_ON(MAX_CLOCKS != 16);
1334 	BUILD_BUG_ON(sizeof(struct bpf_async_kern) > sizeof(struct bpf_timer));
1335 	BUILD_BUG_ON(__alignof__(struct bpf_async_kern) != __alignof__(struct bpf_timer));
1336 
1337 	if (flags >= MAX_CLOCKS ||
1338 	    /* similar to timerfd except _ALARM variants are not supported */
1339 	    (clockid != CLOCK_MONOTONIC &&
1340 	     clockid != CLOCK_REALTIME &&
1341 	     clockid != CLOCK_BOOTTIME))
1342 		return -EINVAL;
1343 
1344 	return __bpf_async_init(timer, map, flags, BPF_ASYNC_TYPE_TIMER);
1345 }
1346 
1347 static const struct bpf_func_proto bpf_timer_init_proto = {
1348 	.func		= bpf_timer_init,
1349 	.gpl_only	= true,
1350 	.ret_type	= RET_INTEGER,
1351 	.arg1_type	= ARG_PTR_TO_TIMER,
1352 	.arg2_type	= ARG_CONST_MAP_PTR,
1353 	.arg3_type	= ARG_ANYTHING,
1354 };
1355 
1356 static int __bpf_async_set_callback(struct bpf_async_kern *async, void *callback_fn,
1357 				    struct bpf_prog_aux *aux, unsigned int flags,
1358 				    enum bpf_async_type type)
1359 {
1360 	struct bpf_prog *prev, *prog = aux->prog;
1361 	struct bpf_async_cb *cb;
1362 	int ret = 0;
1363 
1364 	if (in_nmi())
1365 		return -EOPNOTSUPP;
1366 	__bpf_spin_lock_irqsave(&async->lock);
1367 	cb = async->cb;
1368 	if (!cb) {
1369 		ret = -EINVAL;
1370 		goto out;
1371 	}
1372 	if (!atomic64_read(&cb->map->usercnt)) {
1373 		/* maps with timers must be either held by user space
1374 		 * or pinned in bpffs. Otherwise timer might still be
1375 		 * running even when bpf prog is detached and user space
1376 		 * is gone, since map_release_uref won't ever be called.
1377 		 */
1378 		ret = -EPERM;
1379 		goto out;
1380 	}
1381 	prev = cb->prog;
1382 	if (prev != prog) {
1383 		/* Bump prog refcnt once. Every bpf_timer_set_callback()
1384 		 * can pick different callback_fn-s within the same prog.
1385 		 */
1386 		prog = bpf_prog_inc_not_zero(prog);
1387 		if (IS_ERR(prog)) {
1388 			ret = PTR_ERR(prog);
1389 			goto out;
1390 		}
1391 		if (prev)
1392 			/* Drop prev prog refcnt when swapping with new prog */
1393 			bpf_prog_put(prev);
1394 		cb->prog = prog;
1395 	}
1396 	rcu_assign_pointer(cb->callback_fn, callback_fn);
1397 out:
1398 	__bpf_spin_unlock_irqrestore(&async->lock);
1399 	return ret;
1400 }
1401 
1402 BPF_CALL_3(bpf_timer_set_callback, struct bpf_async_kern *, timer, void *, callback_fn,
1403 	   struct bpf_prog_aux *, aux)
1404 {
1405 	return __bpf_async_set_callback(timer, callback_fn, aux, 0, BPF_ASYNC_TYPE_TIMER);
1406 }
1407 
1408 static const struct bpf_func_proto bpf_timer_set_callback_proto = {
1409 	.func		= bpf_timer_set_callback,
1410 	.gpl_only	= true,
1411 	.ret_type	= RET_INTEGER,
1412 	.arg1_type	= ARG_PTR_TO_TIMER,
1413 	.arg2_type	= ARG_PTR_TO_FUNC,
1414 };
1415 
1416 BPF_CALL_3(bpf_timer_start, struct bpf_async_kern *, timer, u64, nsecs, u64, flags)
1417 {
1418 	struct bpf_hrtimer *t;
1419 	int ret = 0;
1420 	enum hrtimer_mode mode;
1421 
1422 	if (in_nmi())
1423 		return -EOPNOTSUPP;
1424 	if (flags & ~(BPF_F_TIMER_ABS | BPF_F_TIMER_CPU_PIN))
1425 		return -EINVAL;
1426 	__bpf_spin_lock_irqsave(&timer->lock);
1427 	t = timer->timer;
1428 	if (!t || !t->cb.prog) {
1429 		ret = -EINVAL;
1430 		goto out;
1431 	}
1432 
1433 	if (flags & BPF_F_TIMER_ABS)
1434 		mode = HRTIMER_MODE_ABS_SOFT;
1435 	else
1436 		mode = HRTIMER_MODE_REL_SOFT;
1437 
1438 	if (flags & BPF_F_TIMER_CPU_PIN)
1439 		mode |= HRTIMER_MODE_PINNED;
1440 
1441 	hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode);
1442 out:
1443 	__bpf_spin_unlock_irqrestore(&timer->lock);
1444 	return ret;
1445 }
1446 
1447 static const struct bpf_func_proto bpf_timer_start_proto = {
1448 	.func		= bpf_timer_start,
1449 	.gpl_only	= true,
1450 	.ret_type	= RET_INTEGER,
1451 	.arg1_type	= ARG_PTR_TO_TIMER,
1452 	.arg2_type	= ARG_ANYTHING,
1453 	.arg3_type	= ARG_ANYTHING,
1454 };
1455 
1456 static void drop_prog_refcnt(struct bpf_async_cb *async)
1457 {
1458 	struct bpf_prog *prog = async->prog;
1459 
1460 	if (prog) {
1461 		bpf_prog_put(prog);
1462 		async->prog = NULL;
1463 		rcu_assign_pointer(async->callback_fn, NULL);
1464 	}
1465 }
1466 
1467 BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
1468 {
1469 	struct bpf_hrtimer *t, *cur_t;
1470 	bool inc = false;
1471 	int ret = 0;
1472 
1473 	if (in_nmi())
1474 		return -EOPNOTSUPP;
1475 	rcu_read_lock();
1476 	__bpf_spin_lock_irqsave(&timer->lock);
1477 	t = timer->timer;
1478 	if (!t) {
1479 		ret = -EINVAL;
1480 		goto out;
1481 	}
1482 
1483 	cur_t = this_cpu_read(hrtimer_running);
1484 	if (cur_t == t) {
1485 		/* If bpf callback_fn is trying to bpf_timer_cancel()
1486 		 * its own timer the hrtimer_cancel() will deadlock
1487 		 * since it waits for callback_fn to finish.
1488 		 */
1489 		ret = -EDEADLK;
1490 		goto out;
1491 	}
1492 
1493 	/* Only account in-flight cancellations when invoked from a timer
1494 	 * callback, since we want to avoid waiting only if other _callbacks_
1495 	 * are waiting on us, to avoid introducing lockups. Non-callback paths
1496 	 * are ok, since nobody would synchronously wait for their completion.
1497 	 */
1498 	if (!cur_t)
1499 		goto drop;
1500 	atomic_inc(&t->cancelling);
1501 	/* Need full barrier after relaxed atomic_inc */
1502 	smp_mb__after_atomic();
1503 	inc = true;
1504 	if (atomic_read(&cur_t->cancelling)) {
1505 		/* We're cancelling timer t, while some other timer callback is
1506 		 * attempting to cancel us. In such a case, it might be possible
1507 		 * that timer t belongs to the other callback, or some other
1508 		 * callback waiting upon it (creating transitive dependencies
1509 		 * upon us), and we will enter a deadlock if we continue
1510 		 * cancelling and waiting for it synchronously, since it might
1511 		 * do the same. Bail!
1512 		 */
1513 		ret = -EDEADLK;
1514 		goto out;
1515 	}
1516 drop:
1517 	drop_prog_refcnt(&t->cb);
1518 out:
1519 	__bpf_spin_unlock_irqrestore(&timer->lock);
1520 	/* Cancel the timer and wait for associated callback to finish
1521 	 * if it was running.
1522 	 */
1523 	ret = ret ?: hrtimer_cancel(&t->timer);
1524 	if (inc)
1525 		atomic_dec(&t->cancelling);
1526 	rcu_read_unlock();
1527 	return ret;
1528 }
1529 
1530 static const struct bpf_func_proto bpf_timer_cancel_proto = {
1531 	.func		= bpf_timer_cancel,
1532 	.gpl_only	= true,
1533 	.ret_type	= RET_INTEGER,
1534 	.arg1_type	= ARG_PTR_TO_TIMER,
1535 };
1536 
1537 static struct bpf_async_cb *__bpf_async_cancel_and_free(struct bpf_async_kern *async)
1538 {
1539 	struct bpf_async_cb *cb;
1540 
1541 	/* Performance optimization: read async->cb without lock first. */
1542 	if (!READ_ONCE(async->cb))
1543 		return NULL;
1544 
1545 	__bpf_spin_lock_irqsave(&async->lock);
1546 	/* re-read it under lock */
1547 	cb = async->cb;
1548 	if (!cb)
1549 		goto out;
1550 	drop_prog_refcnt(cb);
1551 	/* The subsequent bpf_timer_start/cancel() helpers won't be able to use
1552 	 * this timer, since it won't be initialized.
1553 	 */
1554 	WRITE_ONCE(async->cb, NULL);
1555 out:
1556 	__bpf_spin_unlock_irqrestore(&async->lock);
1557 	return cb;
1558 }
1559 
1560 /* This function is called by map_delete/update_elem for individual element and
1561  * by ops->map_release_uref when the user space reference to a map reaches zero.
1562  */
1563 void bpf_timer_cancel_and_free(void *val)
1564 {
1565 	struct bpf_hrtimer *t;
1566 
1567 	t = (struct bpf_hrtimer *)__bpf_async_cancel_and_free(val);
1568 
1569 	if (!t)
1570 		return;
1571 	/* We check that bpf_map_delete/update_elem() was called from timer
1572 	 * callback_fn. In such case we don't call hrtimer_cancel() (since it
1573 	 * will deadlock) and don't call hrtimer_try_to_cancel() (since it will
1574 	 * just return -1). Though callback_fn is still running on this cpu it's
1575 	 * safe to do kfree(t) because bpf_timer_cb() read everything it needed
1576 	 * from 't'. The bpf subprog callback_fn won't be able to access 't',
1577 	 * since async->cb = NULL was already done. The timer will be
1578 	 * effectively cancelled because bpf_timer_cb() will return
1579 	 * HRTIMER_NORESTART.
1580 	 *
1581 	 * However, it is possible the timer callback_fn calling us armed the
1582 	 * timer _before_ calling us, such that failing to cancel it here will
1583 	 * cause it to possibly use struct hrtimer after freeing bpf_hrtimer.
1584 	 * Therefore, we _need_ to cancel any outstanding timers before we do
1585 	 * kfree_rcu, even though no more timers can be armed.
1586 	 *
1587 	 * Moreover, we need to schedule work even if timer does not belong to
1588 	 * the calling callback_fn, as on two different CPUs, we can end up in a
1589 	 * situation where both sides run in parallel, try to cancel one
1590 	 * another, and we end up waiting on both sides in hrtimer_cancel
1591 	 * without making forward progress, since timer1 depends on time2
1592 	 * callback to finish, and vice versa.
1593 	 *
1594 	 *  CPU 1 (timer1_cb)			CPU 2 (timer2_cb)
1595 	 *  bpf_timer_cancel_and_free(timer2)	bpf_timer_cancel_and_free(timer1)
1596 	 *
1597 	 * To avoid these issues, punt to workqueue context when we are in a
1598 	 * timer callback.
1599 	 */
1600 	if (this_cpu_read(hrtimer_running)) {
1601 		queue_work(system_dfl_wq, &t->cb.delete_work);
1602 		return;
1603 	}
1604 
1605 	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
1606 		/* If the timer is running on other CPU, also use a kworker to
1607 		 * wait for the completion of the timer instead of trying to
1608 		 * acquire a sleepable lock in hrtimer_cancel() to wait for its
1609 		 * completion.
1610 		 */
1611 		if (hrtimer_try_to_cancel(&t->timer) >= 0)
1612 			kfree_rcu(t, cb.rcu);
1613 		else
1614 			queue_work(system_dfl_wq, &t->cb.delete_work);
1615 	} else {
1616 		bpf_timer_delete_work(&t->cb.delete_work);
1617 	}
1618 }
1619 
1620 /* This function is called by map_delete/update_elem for individual element and
1621  * by ops->map_release_uref when the user space reference to a map reaches zero.
1622  */
1623 void bpf_wq_cancel_and_free(void *val)
1624 {
1625 	struct bpf_work *work;
1626 
1627 	BTF_TYPE_EMIT(struct bpf_wq);
1628 
1629 	work = (struct bpf_work *)__bpf_async_cancel_and_free(val);
1630 	if (!work)
1631 		return;
1632 	/* Trigger cancel of the sleepable work, but *do not* wait for
1633 	 * it to finish if it was running as we might not be in a
1634 	 * sleepable context.
1635 	 * kfree will be called once the work has finished.
1636 	 */
1637 	schedule_work(&work->delete_work);
1638 }
1639 
1640 BPF_CALL_2(bpf_kptr_xchg, void *, dst, void *, ptr)
1641 {
1642 	unsigned long *kptr = dst;
1643 
1644 	/* This helper may be inlined by verifier. */
1645 	return xchg(kptr, (unsigned long)ptr);
1646 }
1647 
1648 /* Unlike other PTR_TO_BTF_ID helpers the btf_id in bpf_kptr_xchg()
1649  * helper is determined dynamically by the verifier. Use BPF_PTR_POISON to
1650  * denote type that verifier will determine.
1651  */
1652 static const struct bpf_func_proto bpf_kptr_xchg_proto = {
1653 	.func         = bpf_kptr_xchg,
1654 	.gpl_only     = false,
1655 	.ret_type     = RET_PTR_TO_BTF_ID_OR_NULL,
1656 	.ret_btf_id   = BPF_PTR_POISON,
1657 	.arg1_type    = ARG_KPTR_XCHG_DEST,
1658 	.arg2_type    = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,
1659 	.arg2_btf_id  = BPF_PTR_POISON,
1660 };
1661 
1662 /* Since the upper 8 bits of dynptr->size is reserved, the
1663  * maximum supported size is 2^24 - 1.
1664  */
1665 #define DYNPTR_MAX_SIZE	((1UL << 24) - 1)
1666 #define DYNPTR_TYPE_SHIFT	28
1667 #define DYNPTR_SIZE_MASK	0xFFFFFF
1668 #define DYNPTR_RDONLY_BIT	BIT(31)
1669 
1670 bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
1671 {
1672 	return ptr->size & DYNPTR_RDONLY_BIT;
1673 }
1674 
1675 void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
1676 {
1677 	ptr->size |= DYNPTR_RDONLY_BIT;
1678 }
1679 
1680 static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type)
1681 {
1682 	ptr->size |= type << DYNPTR_TYPE_SHIFT;
1683 }
1684 
1685 static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr)
1686 {
1687 	return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
1688 }
1689 
1690 u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
1691 {
1692 	return ptr->size & DYNPTR_SIZE_MASK;
1693 }
1694 
1695 static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size)
1696 {
1697 	u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK;
1698 
1699 	ptr->size = new_size | metadata;
1700 }
1701 
1702 int bpf_dynptr_check_size(u32 size)
1703 {
1704 	return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
1705 }
1706 
1707 void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
1708 		     enum bpf_dynptr_type type, u32 offset, u32 size)
1709 {
1710 	ptr->data = data;
1711 	ptr->offset = offset;
1712 	ptr->size = size;
1713 	bpf_dynptr_set_type(ptr, type);
1714 }
1715 
1716 void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
1717 {
1718 	memset(ptr, 0, sizeof(*ptr));
1719 }
1720 
1721 BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr)
1722 {
1723 	int err;
1724 
1725 	BTF_TYPE_EMIT(struct bpf_dynptr);
1726 
1727 	err = bpf_dynptr_check_size(size);
1728 	if (err)
1729 		goto error;
1730 
1731 	/* flags is currently unsupported */
1732 	if (flags) {
1733 		err = -EINVAL;
1734 		goto error;
1735 	}
1736 
1737 	bpf_dynptr_init(ptr, data, BPF_DYNPTR_TYPE_LOCAL, 0, size);
1738 
1739 	return 0;
1740 
1741 error:
1742 	bpf_dynptr_set_null(ptr);
1743 	return err;
1744 }
1745 
1746 static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
1747 	.func		= bpf_dynptr_from_mem,
1748 	.gpl_only	= false,
1749 	.ret_type	= RET_INTEGER,
1750 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
1751 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
1752 	.arg3_type	= ARG_ANYTHING,
1753 	.arg4_type	= ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE,
1754 };
1755 
1756 static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *src,
1757 			     u32 offset, u64 flags)
1758 {
1759 	enum bpf_dynptr_type type;
1760 	int err;
1761 
1762 	if (!src->data || flags)
1763 		return -EINVAL;
1764 
1765 	err = bpf_dynptr_check_off_len(src, offset, len);
1766 	if (err)
1767 		return err;
1768 
1769 	type = bpf_dynptr_get_type(src);
1770 
1771 	switch (type) {
1772 	case BPF_DYNPTR_TYPE_LOCAL:
1773 	case BPF_DYNPTR_TYPE_RINGBUF:
1774 		/* Source and destination may possibly overlap, hence use memmove to
1775 		 * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
1776 		 * pointing to overlapping PTR_TO_MAP_VALUE regions.
1777 		 */
1778 		memmove(dst, src->data + src->offset + offset, len);
1779 		return 0;
1780 	case BPF_DYNPTR_TYPE_SKB:
1781 		return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
1782 	case BPF_DYNPTR_TYPE_XDP:
1783 		return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len);
1784 	case BPF_DYNPTR_TYPE_SKB_META:
1785 		memmove(dst, bpf_skb_meta_pointer(src->data, src->offset + offset), len);
1786 		return 0;
1787 	default:
1788 		WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
1789 		return -EFAULT;
1790 	}
1791 }
1792 
1793 BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
1794 	   u32, offset, u64, flags)
1795 {
1796 	return __bpf_dynptr_read(dst, len, src, offset, flags);
1797 }
1798 
1799 static const struct bpf_func_proto bpf_dynptr_read_proto = {
1800 	.func		= bpf_dynptr_read,
1801 	.gpl_only	= false,
1802 	.ret_type	= RET_INTEGER,
1803 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
1804 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
1805 	.arg3_type	= ARG_PTR_TO_DYNPTR | MEM_RDONLY,
1806 	.arg4_type	= ARG_ANYTHING,
1807 	.arg5_type	= ARG_ANYTHING,
1808 };
1809 
1810 int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
1811 		       u32 len, u64 flags)
1812 {
1813 	enum bpf_dynptr_type type;
1814 	int err;
1815 
1816 	if (!dst->data || __bpf_dynptr_is_rdonly(dst))
1817 		return -EINVAL;
1818 
1819 	err = bpf_dynptr_check_off_len(dst, offset, len);
1820 	if (err)
1821 		return err;
1822 
1823 	type = bpf_dynptr_get_type(dst);
1824 
1825 	switch (type) {
1826 	case BPF_DYNPTR_TYPE_LOCAL:
1827 	case BPF_DYNPTR_TYPE_RINGBUF:
1828 		if (flags)
1829 			return -EINVAL;
1830 		/* Source and destination may possibly overlap, hence use memmove to
1831 		 * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
1832 		 * pointing to overlapping PTR_TO_MAP_VALUE regions.
1833 		 */
1834 		memmove(dst->data + dst->offset + offset, src, len);
1835 		return 0;
1836 	case BPF_DYNPTR_TYPE_SKB:
1837 		return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len,
1838 					     flags);
1839 	case BPF_DYNPTR_TYPE_XDP:
1840 		if (flags)
1841 			return -EINVAL;
1842 		return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len);
1843 	case BPF_DYNPTR_TYPE_SKB_META:
1844 		if (flags)
1845 			return -EINVAL;
1846 		memmove(bpf_skb_meta_pointer(dst->data, dst->offset + offset), src, len);
1847 		return 0;
1848 	default:
1849 		WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
1850 		return -EFAULT;
1851 	}
1852 }
1853 
1854 BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
1855 	   u32, len, u64, flags)
1856 {
1857 	return __bpf_dynptr_write(dst, offset, src, len, flags);
1858 }
1859 
1860 static const struct bpf_func_proto bpf_dynptr_write_proto = {
1861 	.func		= bpf_dynptr_write,
1862 	.gpl_only	= false,
1863 	.ret_type	= RET_INTEGER,
1864 	.arg1_type	= ARG_PTR_TO_DYNPTR | MEM_RDONLY,
1865 	.arg2_type	= ARG_ANYTHING,
1866 	.arg3_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
1867 	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
1868 	.arg5_type	= ARG_ANYTHING,
1869 };
1870 
1871 BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
1872 {
1873 	enum bpf_dynptr_type type;
1874 	int err;
1875 
1876 	if (!ptr->data)
1877 		return 0;
1878 
1879 	err = bpf_dynptr_check_off_len(ptr, offset, len);
1880 	if (err)
1881 		return 0;
1882 
1883 	if (__bpf_dynptr_is_rdonly(ptr))
1884 		return 0;
1885 
1886 	type = bpf_dynptr_get_type(ptr);
1887 
1888 	switch (type) {
1889 	case BPF_DYNPTR_TYPE_LOCAL:
1890 	case BPF_DYNPTR_TYPE_RINGBUF:
1891 		return (unsigned long)(ptr->data + ptr->offset + offset);
1892 	case BPF_DYNPTR_TYPE_SKB:
1893 	case BPF_DYNPTR_TYPE_XDP:
1894 	case BPF_DYNPTR_TYPE_SKB_META:
1895 		/* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */
1896 		return 0;
1897 	default:
1898 		WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
1899 		return 0;
1900 	}
1901 }
1902 
1903 static const struct bpf_func_proto bpf_dynptr_data_proto = {
1904 	.func		= bpf_dynptr_data,
1905 	.gpl_only	= false,
1906 	.ret_type	= RET_PTR_TO_DYNPTR_MEM_OR_NULL,
1907 	.arg1_type	= ARG_PTR_TO_DYNPTR | MEM_RDONLY,
1908 	.arg2_type	= ARG_ANYTHING,
1909 	.arg3_type	= ARG_CONST_ALLOC_SIZE_OR_ZERO,
1910 };
1911 
1912 const struct bpf_func_proto bpf_get_current_task_proto __weak;
1913 const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
1914 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
1915 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
1916 const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
1917 const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
1918 const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
1919 const struct bpf_func_proto bpf_perf_event_read_proto __weak;
1920 const struct bpf_func_proto bpf_send_signal_proto __weak;
1921 const struct bpf_func_proto bpf_send_signal_thread_proto __weak;
1922 const struct bpf_func_proto bpf_get_task_stack_sleepable_proto __weak;
1923 const struct bpf_func_proto bpf_get_task_stack_proto __weak;
1924 const struct bpf_func_proto bpf_get_branch_snapshot_proto __weak;
1925 
1926 const struct bpf_func_proto *
1927 bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1928 {
1929 	switch (func_id) {
1930 	case BPF_FUNC_map_lookup_elem:
1931 		return &bpf_map_lookup_elem_proto;
1932 	case BPF_FUNC_map_update_elem:
1933 		return &bpf_map_update_elem_proto;
1934 	case BPF_FUNC_map_delete_elem:
1935 		return &bpf_map_delete_elem_proto;
1936 	case BPF_FUNC_map_push_elem:
1937 		return &bpf_map_push_elem_proto;
1938 	case BPF_FUNC_map_pop_elem:
1939 		return &bpf_map_pop_elem_proto;
1940 	case BPF_FUNC_map_peek_elem:
1941 		return &bpf_map_peek_elem_proto;
1942 	case BPF_FUNC_map_lookup_percpu_elem:
1943 		return &bpf_map_lookup_percpu_elem_proto;
1944 	case BPF_FUNC_get_prandom_u32:
1945 		return &bpf_get_prandom_u32_proto;
1946 	case BPF_FUNC_get_smp_processor_id:
1947 		return &bpf_get_raw_smp_processor_id_proto;
1948 	case BPF_FUNC_get_numa_node_id:
1949 		return &bpf_get_numa_node_id_proto;
1950 	case BPF_FUNC_tail_call:
1951 		return &bpf_tail_call_proto;
1952 	case BPF_FUNC_ktime_get_ns:
1953 		return &bpf_ktime_get_ns_proto;
1954 	case BPF_FUNC_ktime_get_boot_ns:
1955 		return &bpf_ktime_get_boot_ns_proto;
1956 	case BPF_FUNC_ktime_get_tai_ns:
1957 		return &bpf_ktime_get_tai_ns_proto;
1958 	case BPF_FUNC_ringbuf_output:
1959 		return &bpf_ringbuf_output_proto;
1960 	case BPF_FUNC_ringbuf_reserve:
1961 		return &bpf_ringbuf_reserve_proto;
1962 	case BPF_FUNC_ringbuf_submit:
1963 		return &bpf_ringbuf_submit_proto;
1964 	case BPF_FUNC_ringbuf_discard:
1965 		return &bpf_ringbuf_discard_proto;
1966 	case BPF_FUNC_ringbuf_query:
1967 		return &bpf_ringbuf_query_proto;
1968 	case BPF_FUNC_strncmp:
1969 		return &bpf_strncmp_proto;
1970 	case BPF_FUNC_strtol:
1971 		return &bpf_strtol_proto;
1972 	case BPF_FUNC_strtoul:
1973 		return &bpf_strtoul_proto;
1974 	case BPF_FUNC_get_current_pid_tgid:
1975 		return &bpf_get_current_pid_tgid_proto;
1976 	case BPF_FUNC_get_ns_current_pid_tgid:
1977 		return &bpf_get_ns_current_pid_tgid_proto;
1978 	case BPF_FUNC_get_current_uid_gid:
1979 		return &bpf_get_current_uid_gid_proto;
1980 	default:
1981 		break;
1982 	}
1983 
1984 	if (!bpf_token_capable(prog->aux->token, CAP_BPF))
1985 		return NULL;
1986 
1987 	switch (func_id) {
1988 	case BPF_FUNC_spin_lock:
1989 		return &bpf_spin_lock_proto;
1990 	case BPF_FUNC_spin_unlock:
1991 		return &bpf_spin_unlock_proto;
1992 	case BPF_FUNC_jiffies64:
1993 		return &bpf_jiffies64_proto;
1994 	case BPF_FUNC_per_cpu_ptr:
1995 		return &bpf_per_cpu_ptr_proto;
1996 	case BPF_FUNC_this_cpu_ptr:
1997 		return &bpf_this_cpu_ptr_proto;
1998 	case BPF_FUNC_timer_init:
1999 		return &bpf_timer_init_proto;
2000 	case BPF_FUNC_timer_set_callback:
2001 		return &bpf_timer_set_callback_proto;
2002 	case BPF_FUNC_timer_start:
2003 		return &bpf_timer_start_proto;
2004 	case BPF_FUNC_timer_cancel:
2005 		return &bpf_timer_cancel_proto;
2006 	case BPF_FUNC_kptr_xchg:
2007 		return &bpf_kptr_xchg_proto;
2008 	case BPF_FUNC_for_each_map_elem:
2009 		return &bpf_for_each_map_elem_proto;
2010 	case BPF_FUNC_loop:
2011 		return &bpf_loop_proto;
2012 	case BPF_FUNC_user_ringbuf_drain:
2013 		return &bpf_user_ringbuf_drain_proto;
2014 	case BPF_FUNC_ringbuf_reserve_dynptr:
2015 		return &bpf_ringbuf_reserve_dynptr_proto;
2016 	case BPF_FUNC_ringbuf_submit_dynptr:
2017 		return &bpf_ringbuf_submit_dynptr_proto;
2018 	case BPF_FUNC_ringbuf_discard_dynptr:
2019 		return &bpf_ringbuf_discard_dynptr_proto;
2020 	case BPF_FUNC_dynptr_from_mem:
2021 		return &bpf_dynptr_from_mem_proto;
2022 	case BPF_FUNC_dynptr_read:
2023 		return &bpf_dynptr_read_proto;
2024 	case BPF_FUNC_dynptr_write:
2025 		return &bpf_dynptr_write_proto;
2026 	case BPF_FUNC_dynptr_data:
2027 		return &bpf_dynptr_data_proto;
2028 #ifdef CONFIG_CGROUPS
2029 	case BPF_FUNC_cgrp_storage_get:
2030 		return &bpf_cgrp_storage_get_proto;
2031 	case BPF_FUNC_cgrp_storage_delete:
2032 		return &bpf_cgrp_storage_delete_proto;
2033 	case BPF_FUNC_get_current_cgroup_id:
2034 		return &bpf_get_current_cgroup_id_proto;
2035 	case BPF_FUNC_get_current_ancestor_cgroup_id:
2036 		return &bpf_get_current_ancestor_cgroup_id_proto;
2037 	case BPF_FUNC_current_task_under_cgroup:
2038 		return &bpf_current_task_under_cgroup_proto;
2039 #endif
2040 #ifdef CONFIG_CGROUP_NET_CLASSID
2041 	case BPF_FUNC_get_cgroup_classid:
2042 		return &bpf_get_cgroup_classid_curr_proto;
2043 #endif
2044 	case BPF_FUNC_task_storage_get:
2045 		if (bpf_prog_check_recur(prog))
2046 			return &bpf_task_storage_get_recur_proto;
2047 		return &bpf_task_storage_get_proto;
2048 	case BPF_FUNC_task_storage_delete:
2049 		if (bpf_prog_check_recur(prog))
2050 			return &bpf_task_storage_delete_recur_proto;
2051 		return &bpf_task_storage_delete_proto;
2052 	default:
2053 		break;
2054 	}
2055 
2056 	if (!bpf_token_capable(prog->aux->token, CAP_PERFMON))
2057 		return NULL;
2058 
2059 	switch (func_id) {
2060 	case BPF_FUNC_trace_printk:
2061 		return bpf_get_trace_printk_proto();
2062 	case BPF_FUNC_get_current_task:
2063 		return &bpf_get_current_task_proto;
2064 	case BPF_FUNC_get_current_task_btf:
2065 		return &bpf_get_current_task_btf_proto;
2066 	case BPF_FUNC_get_current_comm:
2067 		return &bpf_get_current_comm_proto;
2068 	case BPF_FUNC_probe_read_user:
2069 		return &bpf_probe_read_user_proto;
2070 	case BPF_FUNC_probe_read_kernel:
2071 		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
2072 		       NULL : &bpf_probe_read_kernel_proto;
2073 	case BPF_FUNC_probe_read_user_str:
2074 		return &bpf_probe_read_user_str_proto;
2075 	case BPF_FUNC_probe_read_kernel_str:
2076 		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
2077 		       NULL : &bpf_probe_read_kernel_str_proto;
2078 	case BPF_FUNC_copy_from_user:
2079 		return &bpf_copy_from_user_proto;
2080 	case BPF_FUNC_copy_from_user_task:
2081 		return &bpf_copy_from_user_task_proto;
2082 	case BPF_FUNC_snprintf_btf:
2083 		return &bpf_snprintf_btf_proto;
2084 	case BPF_FUNC_snprintf:
2085 		return &bpf_snprintf_proto;
2086 	case BPF_FUNC_task_pt_regs:
2087 		return &bpf_task_pt_regs_proto;
2088 	case BPF_FUNC_trace_vprintk:
2089 		return bpf_get_trace_vprintk_proto();
2090 	case BPF_FUNC_perf_event_read_value:
2091 		return bpf_get_perf_event_read_value_proto();
2092 	case BPF_FUNC_perf_event_read:
2093 		return &bpf_perf_event_read_proto;
2094 	case BPF_FUNC_send_signal:
2095 		return &bpf_send_signal_proto;
2096 	case BPF_FUNC_send_signal_thread:
2097 		return &bpf_send_signal_thread_proto;
2098 	case BPF_FUNC_get_task_stack:
2099 		return prog->sleepable ? &bpf_get_task_stack_sleepable_proto
2100 				       : &bpf_get_task_stack_proto;
2101 	case BPF_FUNC_get_branch_snapshot:
2102 		return &bpf_get_branch_snapshot_proto;
2103 	case BPF_FUNC_find_vma:
2104 		return &bpf_find_vma_proto;
2105 	default:
2106 		return NULL;
2107 	}
2108 }
2109 EXPORT_SYMBOL_GPL(bpf_base_func_proto);
2110 
2111 void bpf_list_head_free(const struct btf_field *field, void *list_head,
2112 			struct bpf_spin_lock *spin_lock)
2113 {
2114 	struct list_head *head = list_head, *orig_head = list_head;
2115 
2116 	BUILD_BUG_ON(sizeof(struct list_head) > sizeof(struct bpf_list_head));
2117 	BUILD_BUG_ON(__alignof__(struct list_head) > __alignof__(struct bpf_list_head));
2118 
2119 	/* Do the actual list draining outside the lock to not hold the lock for
2120 	 * too long, and also prevent deadlocks if tracing programs end up
2121 	 * executing on entry/exit of functions called inside the critical
2122 	 * section, and end up doing map ops that call bpf_list_head_free for
2123 	 * the same map value again.
2124 	 */
2125 	__bpf_spin_lock_irqsave(spin_lock);
2126 	if (!head->next || list_empty(head))
2127 		goto unlock;
2128 	head = head->next;
2129 unlock:
2130 	INIT_LIST_HEAD(orig_head);
2131 	__bpf_spin_unlock_irqrestore(spin_lock);
2132 
2133 	while (head != orig_head) {
2134 		void *obj = head;
2135 
2136 		obj -= field->graph_root.node_offset;
2137 		head = head->next;
2138 		/* The contained type can also have resources, including a
2139 		 * bpf_list_head which needs to be freed.
2140 		 */
2141 		__bpf_obj_drop_impl(obj, field->graph_root.value_rec, false);
2142 	}
2143 }
2144 
2145 /* Like rbtree_postorder_for_each_entry_safe, but 'pos' and 'n' are
2146  * 'rb_node *', so field name of rb_node within containing struct is not
2147  * needed.
2148  *
2149  * Since bpf_rb_tree's node type has a corresponding struct btf_field with
2150  * graph_root.node_offset, it's not necessary to know field name
2151  * or type of node struct
2152  */
2153 #define bpf_rbtree_postorder_for_each_entry_safe(pos, n, root) \
2154 	for (pos = rb_first_postorder(root); \
2155 	    pos && ({ n = rb_next_postorder(pos); 1; }); \
2156 	    pos = n)
2157 
2158 void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
2159 		      struct bpf_spin_lock *spin_lock)
2160 {
2161 	struct rb_root_cached orig_root, *root = rb_root;
2162 	struct rb_node *pos, *n;
2163 	void *obj;
2164 
2165 	BUILD_BUG_ON(sizeof(struct rb_root_cached) > sizeof(struct bpf_rb_root));
2166 	BUILD_BUG_ON(__alignof__(struct rb_root_cached) > __alignof__(struct bpf_rb_root));
2167 
2168 	__bpf_spin_lock_irqsave(spin_lock);
2169 	orig_root = *root;
2170 	*root = RB_ROOT_CACHED;
2171 	__bpf_spin_unlock_irqrestore(spin_lock);
2172 
2173 	bpf_rbtree_postorder_for_each_entry_safe(pos, n, &orig_root.rb_root) {
2174 		obj = pos;
2175 		obj -= field->graph_root.node_offset;
2176 
2177 
2178 		__bpf_obj_drop_impl(obj, field->graph_root.value_rec, false);
2179 	}
2180 }
2181 
2182 __bpf_kfunc_start_defs();
2183 
2184 __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
2185 {
2186 	struct btf_struct_meta *meta = meta__ign;
2187 	u64 size = local_type_id__k;
2188 	void *p;
2189 
2190 	p = bpf_mem_alloc(&bpf_global_ma, size);
2191 	if (!p)
2192 		return NULL;
2193 	if (meta)
2194 		bpf_obj_init(meta->record, p);
2195 	return p;
2196 }
2197 
2198 __bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign)
2199 {
2200 	u64 size = local_type_id__k;
2201 
2202 	/* The verifier has ensured that meta__ign must be NULL */
2203 	return bpf_mem_alloc(&bpf_global_percpu_ma, size);
2204 }
2205 
2206 /* Must be called under migrate_disable(), as required by bpf_mem_free */
2207 void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu)
2208 {
2209 	struct bpf_mem_alloc *ma;
2210 
2211 	if (rec && rec->refcount_off >= 0 &&
2212 	    !refcount_dec_and_test((refcount_t *)(p + rec->refcount_off))) {
2213 		/* Object is refcounted and refcount_dec didn't result in 0
2214 		 * refcount. Return without freeing the object
2215 		 */
2216 		return;
2217 	}
2218 
2219 	if (rec)
2220 		bpf_obj_free_fields(rec, p);
2221 
2222 	if (percpu)
2223 		ma = &bpf_global_percpu_ma;
2224 	else
2225 		ma = &bpf_global_ma;
2226 	bpf_mem_free_rcu(ma, p);
2227 }
2228 
2229 __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
2230 {
2231 	struct btf_struct_meta *meta = meta__ign;
2232 	void *p = p__alloc;
2233 
2234 	__bpf_obj_drop_impl(p, meta ? meta->record : NULL, false);
2235 }
2236 
2237 __bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign)
2238 {
2239 	/* The verifier has ensured that meta__ign must be NULL */
2240 	bpf_mem_free_rcu(&bpf_global_percpu_ma, p__alloc);
2241 }
2242 
2243 __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign)
2244 {
2245 	struct btf_struct_meta *meta = meta__ign;
2246 	struct bpf_refcount *ref;
2247 
2248 	/* Could just cast directly to refcount_t *, but need some code using
2249 	 * bpf_refcount type so that it is emitted in vmlinux BTF
2250 	 */
2251 	ref = (struct bpf_refcount *)(p__refcounted_kptr + meta->record->refcount_off);
2252 	if (!refcount_inc_not_zero((refcount_t *)ref))
2253 		return NULL;
2254 
2255 	/* Verifier strips KF_RET_NULL if input is owned ref, see is_kfunc_ret_null
2256 	 * in verifier.c
2257 	 */
2258 	return (void *)p__refcounted_kptr;
2259 }
2260 
2261 static int __bpf_list_add(struct bpf_list_node_kern *node,
2262 			  struct bpf_list_head *head,
2263 			  bool tail, struct btf_record *rec, u64 off)
2264 {
2265 	struct list_head *n = &node->list_head, *h = (void *)head;
2266 
2267 	/* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
2268 	 * called on its fields, so init here
2269 	 */
2270 	if (unlikely(!h->next))
2271 		INIT_LIST_HEAD(h);
2272 
2273 	/* node->owner != NULL implies !list_empty(n), no need to separately
2274 	 * check the latter
2275 	 */
2276 	if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
2277 		/* Only called from BPF prog, no need to migrate_disable */
2278 		__bpf_obj_drop_impl((void *)n - off, rec, false);
2279 		return -EINVAL;
2280 	}
2281 
2282 	tail ? list_add_tail(n, h) : list_add(n, h);
2283 	WRITE_ONCE(node->owner, head);
2284 
2285 	return 0;
2286 }
2287 
2288 __bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head,
2289 					 struct bpf_list_node *node,
2290 					 void *meta__ign, u64 off)
2291 {
2292 	struct bpf_list_node_kern *n = (void *)node;
2293 	struct btf_struct_meta *meta = meta__ign;
2294 
2295 	return __bpf_list_add(n, head, false, meta ? meta->record : NULL, off);
2296 }
2297 
2298 __bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head,
2299 					struct bpf_list_node *node,
2300 					void *meta__ign, u64 off)
2301 {
2302 	struct bpf_list_node_kern *n = (void *)node;
2303 	struct btf_struct_meta *meta = meta__ign;
2304 
2305 	return __bpf_list_add(n, head, true, meta ? meta->record : NULL, off);
2306 }
2307 
2308 static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail)
2309 {
2310 	struct list_head *n, *h = (void *)head;
2311 	struct bpf_list_node_kern *node;
2312 
2313 	/* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
2314 	 * called on its fields, so init here
2315 	 */
2316 	if (unlikely(!h->next))
2317 		INIT_LIST_HEAD(h);
2318 	if (list_empty(h))
2319 		return NULL;
2320 
2321 	n = tail ? h->prev : h->next;
2322 	node = container_of(n, struct bpf_list_node_kern, list_head);
2323 	if (WARN_ON_ONCE(READ_ONCE(node->owner) != head))
2324 		return NULL;
2325 
2326 	list_del_init(n);
2327 	WRITE_ONCE(node->owner, NULL);
2328 	return (struct bpf_list_node *)n;
2329 }
2330 
2331 __bpf_kfunc struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head)
2332 {
2333 	return __bpf_list_del(head, false);
2334 }
2335 
2336 __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
2337 {
2338 	return __bpf_list_del(head, true);
2339 }
2340 
2341 __bpf_kfunc struct bpf_list_node *bpf_list_front(struct bpf_list_head *head)
2342 {
2343 	struct list_head *h = (struct list_head *)head;
2344 
2345 	if (list_empty(h) || unlikely(!h->next))
2346 		return NULL;
2347 
2348 	return (struct bpf_list_node *)h->next;
2349 }
2350 
2351 __bpf_kfunc struct bpf_list_node *bpf_list_back(struct bpf_list_head *head)
2352 {
2353 	struct list_head *h = (struct list_head *)head;
2354 
2355 	if (list_empty(h) || unlikely(!h->next))
2356 		return NULL;
2357 
2358 	return (struct bpf_list_node *)h->prev;
2359 }
2360 
2361 __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
2362 						  struct bpf_rb_node *node)
2363 {
2364 	struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
2365 	struct rb_root_cached *r = (struct rb_root_cached *)root;
2366 	struct rb_node *n = &node_internal->rb_node;
2367 
2368 	/* node_internal->owner != root implies either RB_EMPTY_NODE(n) or
2369 	 * n is owned by some other tree. No need to check RB_EMPTY_NODE(n)
2370 	 */
2371 	if (READ_ONCE(node_internal->owner) != root)
2372 		return NULL;
2373 
2374 	rb_erase_cached(n, r);
2375 	RB_CLEAR_NODE(n);
2376 	WRITE_ONCE(node_internal->owner, NULL);
2377 	return (struct bpf_rb_node *)n;
2378 }
2379 
2380 /* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF
2381  * program
2382  */
2383 static int __bpf_rbtree_add(struct bpf_rb_root *root,
2384 			    struct bpf_rb_node_kern *node,
2385 			    void *less, struct btf_record *rec, u64 off)
2386 {
2387 	struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node;
2388 	struct rb_node *parent = NULL, *n = &node->rb_node;
2389 	bpf_callback_t cb = (bpf_callback_t)less;
2390 	bool leftmost = true;
2391 
2392 	/* node->owner != NULL implies !RB_EMPTY_NODE(n), no need to separately
2393 	 * check the latter
2394 	 */
2395 	if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
2396 		/* Only called from BPF prog, no need to migrate_disable */
2397 		__bpf_obj_drop_impl((void *)n - off, rec, false);
2398 		return -EINVAL;
2399 	}
2400 
2401 	while (*link) {
2402 		parent = *link;
2403 		if (cb((uintptr_t)node, (uintptr_t)parent, 0, 0, 0)) {
2404 			link = &parent->rb_left;
2405 		} else {
2406 			link = &parent->rb_right;
2407 			leftmost = false;
2408 		}
2409 	}
2410 
2411 	rb_link_node(n, parent, link);
2412 	rb_insert_color_cached(n, (struct rb_root_cached *)root, leftmost);
2413 	WRITE_ONCE(node->owner, root);
2414 	return 0;
2415 }
2416 
2417 __bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
2418 				    bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b),
2419 				    void *meta__ign, u64 off)
2420 {
2421 	struct btf_struct_meta *meta = meta__ign;
2422 	struct bpf_rb_node_kern *n = (void *)node;
2423 
2424 	return __bpf_rbtree_add(root, n, (void *)less, meta ? meta->record : NULL, off);
2425 }
2426 
2427 __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
2428 {
2429 	struct rb_root_cached *r = (struct rb_root_cached *)root;
2430 
2431 	return (struct bpf_rb_node *)rb_first_cached(r);
2432 }
2433 
2434 __bpf_kfunc struct bpf_rb_node *bpf_rbtree_root(struct bpf_rb_root *root)
2435 {
2436 	struct rb_root_cached *r = (struct rb_root_cached *)root;
2437 
2438 	return (struct bpf_rb_node *)r->rb_root.rb_node;
2439 }
2440 
2441 __bpf_kfunc struct bpf_rb_node *bpf_rbtree_left(struct bpf_rb_root *root, struct bpf_rb_node *node)
2442 {
2443 	struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
2444 
2445 	if (READ_ONCE(node_internal->owner) != root)
2446 		return NULL;
2447 
2448 	return (struct bpf_rb_node *)node_internal->rb_node.rb_left;
2449 }
2450 
2451 __bpf_kfunc struct bpf_rb_node *bpf_rbtree_right(struct bpf_rb_root *root, struct bpf_rb_node *node)
2452 {
2453 	struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
2454 
2455 	if (READ_ONCE(node_internal->owner) != root)
2456 		return NULL;
2457 
2458 	return (struct bpf_rb_node *)node_internal->rb_node.rb_right;
2459 }
2460 
2461 /**
2462  * bpf_task_acquire - Acquire a reference to a task. A task acquired by this
2463  * kfunc which is not stored in a map as a kptr, must be released by calling
2464  * bpf_task_release().
2465  * @p: The task on which a reference is being acquired.
2466  */
2467 __bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p)
2468 {
2469 	if (refcount_inc_not_zero(&p->rcu_users))
2470 		return p;
2471 	return NULL;
2472 }
2473 
2474 /**
2475  * bpf_task_release - Release the reference acquired on a task.
2476  * @p: The task on which a reference is being released.
2477  */
2478 __bpf_kfunc void bpf_task_release(struct task_struct *p)
2479 {
2480 	put_task_struct_rcu_user(p);
2481 }
2482 
2483 __bpf_kfunc void bpf_task_release_dtor(void *p)
2484 {
2485 	put_task_struct_rcu_user(p);
2486 }
2487 CFI_NOSEAL(bpf_task_release_dtor);
2488 
2489 #ifdef CONFIG_CGROUPS
2490 /**
2491  * bpf_cgroup_acquire - Acquire a reference to a cgroup. A cgroup acquired by
2492  * this kfunc which is not stored in a map as a kptr, must be released by
2493  * calling bpf_cgroup_release().
2494  * @cgrp: The cgroup on which a reference is being acquired.
2495  */
2496 __bpf_kfunc struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp)
2497 {
2498 	return cgroup_tryget(cgrp) ? cgrp : NULL;
2499 }
2500 
2501 /**
2502  * bpf_cgroup_release - Release the reference acquired on a cgroup.
2503  * If this kfunc is invoked in an RCU read region, the cgroup is guaranteed to
2504  * not be freed until the current grace period has ended, even if its refcount
2505  * drops to 0.
2506  * @cgrp: The cgroup on which a reference is being released.
2507  */
2508 __bpf_kfunc void bpf_cgroup_release(struct cgroup *cgrp)
2509 {
2510 	cgroup_put(cgrp);
2511 }
2512 
2513 __bpf_kfunc void bpf_cgroup_release_dtor(void *cgrp)
2514 {
2515 	cgroup_put(cgrp);
2516 }
2517 CFI_NOSEAL(bpf_cgroup_release_dtor);
2518 
2519 /**
2520  * bpf_cgroup_ancestor - Perform a lookup on an entry in a cgroup's ancestor
2521  * array. A cgroup returned by this kfunc which is not subsequently stored in a
2522  * map, must be released by calling bpf_cgroup_release().
2523  * @cgrp: The cgroup for which we're performing a lookup.
2524  * @level: The level of ancestor to look up.
2525  */
2526 __bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
2527 {
2528 	struct cgroup *ancestor;
2529 
2530 	if (level > cgrp->level || level < 0)
2531 		return NULL;
2532 
2533 	/* cgrp's refcnt could be 0 here, but ancestors can still be accessed */
2534 	ancestor = cgrp->ancestors[level];
2535 	if (!cgroup_tryget(ancestor))
2536 		return NULL;
2537 	return ancestor;
2538 }
2539 
2540 /**
2541  * bpf_cgroup_from_id - Find a cgroup from its ID. A cgroup returned by this
2542  * kfunc which is not subsequently stored in a map, must be released by calling
2543  * bpf_cgroup_release().
2544  * @cgid: cgroup id.
2545  */
2546 __bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid)
2547 {
2548 	struct cgroup *cgrp;
2549 
2550 	cgrp = __cgroup_get_from_id(cgid);
2551 	if (IS_ERR(cgrp))
2552 		return NULL;
2553 	return cgrp;
2554 }
2555 
2556 /**
2557  * bpf_task_under_cgroup - wrap task_under_cgroup_hierarchy() as a kfunc, test
2558  * task's membership of cgroup ancestry.
2559  * @task: the task to be tested
2560  * @ancestor: possible ancestor of @task's cgroup
2561  *
2562  * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
2563  * It follows all the same rules as cgroup_is_descendant, and only applies
2564  * to the default hierarchy.
2565  */
2566 __bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task,
2567 				       struct cgroup *ancestor)
2568 {
2569 	long ret;
2570 
2571 	rcu_read_lock();
2572 	ret = task_under_cgroup_hierarchy(task, ancestor);
2573 	rcu_read_unlock();
2574 	return ret;
2575 }
2576 
2577 BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
2578 {
2579 	struct bpf_array *array = container_of(map, struct bpf_array, map);
2580 	struct cgroup *cgrp;
2581 
2582 	if (unlikely(idx >= array->map.max_entries))
2583 		return -E2BIG;
2584 
2585 	cgrp = READ_ONCE(array->ptrs[idx]);
2586 	if (unlikely(!cgrp))
2587 		return -EAGAIN;
2588 
2589 	return task_under_cgroup_hierarchy(current, cgrp);
2590 }
2591 
2592 const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
2593 	.func           = bpf_current_task_under_cgroup,
2594 	.gpl_only       = false,
2595 	.ret_type       = RET_INTEGER,
2596 	.arg1_type      = ARG_CONST_MAP_PTR,
2597 	.arg2_type      = ARG_ANYTHING,
2598 };
2599 
2600 /**
2601  * bpf_task_get_cgroup1 - Acquires the associated cgroup of a task within a
2602  * specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its
2603  * hierarchy ID.
2604  * @task: The target task
2605  * @hierarchy_id: The ID of a cgroup1 hierarchy
2606  *
2607  * On success, the cgroup is returen. On failure, NULL is returned.
2608  */
2609 __bpf_kfunc struct cgroup *
2610 bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id)
2611 {
2612 	struct cgroup *cgrp = task_get_cgroup1(task, hierarchy_id);
2613 
2614 	if (IS_ERR(cgrp))
2615 		return NULL;
2616 	return cgrp;
2617 }
2618 #endif /* CONFIG_CGROUPS */
2619 
2620 /**
2621  * bpf_task_from_pid - Find a struct task_struct from its pid by looking it up
2622  * in the root pid namespace idr. If a task is returned, it must either be
2623  * stored in a map, or released with bpf_task_release().
2624  * @pid: The pid of the task being looked up.
2625  */
2626 __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
2627 {
2628 	struct task_struct *p;
2629 
2630 	rcu_read_lock();
2631 	p = find_task_by_pid_ns(pid, &init_pid_ns);
2632 	if (p)
2633 		p = bpf_task_acquire(p);
2634 	rcu_read_unlock();
2635 
2636 	return p;
2637 }
2638 
2639 /**
2640  * bpf_task_from_vpid - Find a struct task_struct from its vpid by looking it up
2641  * in the pid namespace of the current task. If a task is returned, it must
2642  * either be stored in a map, or released with bpf_task_release().
2643  * @vpid: The vpid of the task being looked up.
2644  */
2645 __bpf_kfunc struct task_struct *bpf_task_from_vpid(s32 vpid)
2646 {
2647 	struct task_struct *p;
2648 
2649 	rcu_read_lock();
2650 	p = find_task_by_vpid(vpid);
2651 	if (p)
2652 		p = bpf_task_acquire(p);
2653 	rcu_read_unlock();
2654 
2655 	return p;
2656 }
2657 
2658 /**
2659  * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data.
2660  * @p: The dynptr whose data slice to retrieve
2661  * @offset: Offset into the dynptr
2662  * @buffer__opt: User-provided buffer to copy contents into.  May be NULL
2663  * @buffer__szk: Size (in bytes) of the buffer if present. This is the
2664  *               length of the requested slice. This must be a constant.
2665  *
2666  * For non-skb and non-xdp type dynptrs, there is no difference between
2667  * bpf_dynptr_slice and bpf_dynptr_data.
2668  *
2669  *  If buffer__opt is NULL, the call will fail if buffer_opt was needed.
2670  *
2671  * If the intention is to write to the data slice, please use
2672  * bpf_dynptr_slice_rdwr.
2673  *
2674  * The user must check that the returned pointer is not null before using it.
2675  *
2676  * Please note that in the case of skb and xdp dynptrs, bpf_dynptr_slice
2677  * does not change the underlying packet data pointers, so a call to
2678  * bpf_dynptr_slice will not invalidate any ctx->data/data_end pointers in
2679  * the bpf program.
2680  *
2681  * Return: NULL if the call failed (eg invalid dynptr), pointer to a read-only
2682  * data slice (can be either direct pointer to the data or a pointer to the user
2683  * provided buffer, with its contents containing the data, if unable to obtain
2684  * direct pointer)
2685  */
2686 __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset,
2687 				   void *buffer__opt, u32 buffer__szk)
2688 {
2689 	const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
2690 	enum bpf_dynptr_type type;
2691 	u32 len = buffer__szk;
2692 	int err;
2693 
2694 	if (!ptr->data)
2695 		return NULL;
2696 
2697 	err = bpf_dynptr_check_off_len(ptr, offset, len);
2698 	if (err)
2699 		return NULL;
2700 
2701 	type = bpf_dynptr_get_type(ptr);
2702 
2703 	switch (type) {
2704 	case BPF_DYNPTR_TYPE_LOCAL:
2705 	case BPF_DYNPTR_TYPE_RINGBUF:
2706 		return ptr->data + ptr->offset + offset;
2707 	case BPF_DYNPTR_TYPE_SKB:
2708 		if (buffer__opt)
2709 			return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__opt);
2710 		else
2711 			return skb_pointer_if_linear(ptr->data, ptr->offset + offset, len);
2712 	case BPF_DYNPTR_TYPE_XDP:
2713 	{
2714 		void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
2715 		if (!IS_ERR_OR_NULL(xdp_ptr))
2716 			return xdp_ptr;
2717 
2718 		if (!buffer__opt)
2719 			return NULL;
2720 		bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false);
2721 		return buffer__opt;
2722 	}
2723 	case BPF_DYNPTR_TYPE_SKB_META:
2724 		return bpf_skb_meta_pointer(ptr->data, ptr->offset + offset);
2725 	default:
2726 		WARN_ONCE(true, "unknown dynptr type %d\n", type);
2727 		return NULL;
2728 	}
2729 }
2730 
2731 /**
2732  * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data.
2733  * @p: The dynptr whose data slice to retrieve
2734  * @offset: Offset into the dynptr
2735  * @buffer__opt: User-provided buffer to copy contents into. May be NULL
2736  * @buffer__szk: Size (in bytes) of the buffer if present. This is the
2737  *               length of the requested slice. This must be a constant.
2738  *
2739  * For non-skb and non-xdp type dynptrs, there is no difference between
2740  * bpf_dynptr_slice and bpf_dynptr_data.
2741  *
2742  * If buffer__opt is NULL, the call will fail if buffer_opt was needed.
2743  *
2744  * The returned pointer is writable and may point to either directly the dynptr
2745  * data at the requested offset or to the buffer if unable to obtain a direct
2746  * data pointer to (example: the requested slice is to the paged area of an skb
2747  * packet). In the case where the returned pointer is to the buffer, the user
2748  * is responsible for persisting writes through calling bpf_dynptr_write(). This
2749  * usually looks something like this pattern:
2750  *
2751  * struct eth_hdr *eth = bpf_dynptr_slice_rdwr(&dynptr, 0, buffer, sizeof(buffer));
2752  * if (!eth)
2753  *	return TC_ACT_SHOT;
2754  *
2755  * // mutate eth header //
2756  *
2757  * if (eth == buffer)
2758  *	bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0);
2759  *
2760  * Please note that, as in the example above, the user must check that the
2761  * returned pointer is not null before using it.
2762  *
2763  * Please also note that in the case of skb and xdp dynptrs, bpf_dynptr_slice_rdwr
2764  * does not change the underlying packet data pointers, so a call to
2765  * bpf_dynptr_slice_rdwr will not invalidate any ctx->data/data_end pointers in
2766  * the bpf program.
2767  *
2768  * Return: NULL if the call failed (eg invalid dynptr), pointer to a
2769  * data slice (can be either direct pointer to the data or a pointer to the user
2770  * provided buffer, with its contents containing the data, if unable to obtain
2771  * direct pointer)
2772  */
2773 __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset,
2774 					void *buffer__opt, u32 buffer__szk)
2775 {
2776 	const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
2777 
2778 	if (!ptr->data || __bpf_dynptr_is_rdonly(ptr))
2779 		return NULL;
2780 
2781 	/* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice.
2782 	 *
2783 	 * For skb-type dynptrs, it is safe to write into the returned pointer
2784 	 * if the bpf program allows skb data writes. There are two possibilities
2785 	 * that may occur when calling bpf_dynptr_slice_rdwr:
2786 	 *
2787 	 * 1) The requested slice is in the head of the skb. In this case, the
2788 	 * returned pointer is directly to skb data, and if the skb is cloned, the
2789 	 * verifier will have uncloned it (see bpf_unclone_prologue()) already.
2790 	 * The pointer can be directly written into.
2791 	 *
2792 	 * 2) Some portion of the requested slice is in the paged buffer area.
2793 	 * In this case, the requested data will be copied out into the buffer
2794 	 * and the returned pointer will be a pointer to the buffer. The skb
2795 	 * will not be pulled. To persist the write, the user will need to call
2796 	 * bpf_dynptr_write(), which will pull the skb and commit the write.
2797 	 *
2798 	 * Similarly for xdp programs, if the requested slice is not across xdp
2799 	 * fragments, then a direct pointer will be returned, otherwise the data
2800 	 * will be copied out into the buffer and the user will need to call
2801 	 * bpf_dynptr_write() to commit changes.
2802 	 */
2803 	return bpf_dynptr_slice(p, offset, buffer__opt, buffer__szk);
2804 }
2805 
2806 __bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u32 start, u32 end)
2807 {
2808 	struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
2809 	u32 size;
2810 
2811 	if (!ptr->data || start > end)
2812 		return -EINVAL;
2813 
2814 	size = __bpf_dynptr_size(ptr);
2815 
2816 	if (start > size || end > size)
2817 		return -ERANGE;
2818 
2819 	ptr->offset += start;
2820 	bpf_dynptr_set_size(ptr, end - start);
2821 
2822 	return 0;
2823 }
2824 
2825 __bpf_kfunc bool bpf_dynptr_is_null(const struct bpf_dynptr *p)
2826 {
2827 	struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
2828 
2829 	return !ptr->data;
2830 }
2831 
2832 __bpf_kfunc bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *p)
2833 {
2834 	struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
2835 
2836 	if (!ptr->data)
2837 		return false;
2838 
2839 	return __bpf_dynptr_is_rdonly(ptr);
2840 }
2841 
2842 __bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr *p)
2843 {
2844 	struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
2845 
2846 	if (!ptr->data)
2847 		return -EINVAL;
2848 
2849 	return __bpf_dynptr_size(ptr);
2850 }
2851 
2852 __bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p,
2853 				 struct bpf_dynptr *clone__uninit)
2854 {
2855 	struct bpf_dynptr_kern *clone = (struct bpf_dynptr_kern *)clone__uninit;
2856 	struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
2857 
2858 	if (!ptr->data) {
2859 		bpf_dynptr_set_null(clone);
2860 		return -EINVAL;
2861 	}
2862 
2863 	*clone = *ptr;
2864 
2865 	return 0;
2866 }
2867 
2868 /**
2869  * bpf_dynptr_copy() - Copy data from one dynptr to another.
2870  * @dst_ptr: Destination dynptr - where data should be copied to
2871  * @dst_off: Offset into the destination dynptr
2872  * @src_ptr: Source dynptr - where data should be copied from
2873  * @src_off: Offset into the source dynptr
2874  * @size: Length of the data to copy from source to destination
2875  *
2876  * Copies data from source dynptr to destination dynptr.
2877  * Returns 0 on success; negative error, otherwise.
2878  */
2879 __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off,
2880 				struct bpf_dynptr *src_ptr, u32 src_off, u32 size)
2881 {
2882 	struct bpf_dynptr_kern *dst = (struct bpf_dynptr_kern *)dst_ptr;
2883 	struct bpf_dynptr_kern *src = (struct bpf_dynptr_kern *)src_ptr;
2884 	void *src_slice, *dst_slice;
2885 	char buf[256];
2886 	u32 off;
2887 
2888 	src_slice = bpf_dynptr_slice(src_ptr, src_off, NULL, size);
2889 	dst_slice = bpf_dynptr_slice_rdwr(dst_ptr, dst_off, NULL, size);
2890 
2891 	if (src_slice && dst_slice) {
2892 		memmove(dst_slice, src_slice, size);
2893 		return 0;
2894 	}
2895 
2896 	if (src_slice)
2897 		return __bpf_dynptr_write(dst, dst_off, src_slice, size, 0);
2898 
2899 	if (dst_slice)
2900 		return __bpf_dynptr_read(dst_slice, size, src, src_off, 0);
2901 
2902 	if (bpf_dynptr_check_off_len(dst, dst_off, size) ||
2903 	    bpf_dynptr_check_off_len(src, src_off, size))
2904 		return -E2BIG;
2905 
2906 	off = 0;
2907 	while (off < size) {
2908 		u32 chunk_sz = min_t(u32, sizeof(buf), size - off);
2909 		int err;
2910 
2911 		err = __bpf_dynptr_read(buf, chunk_sz, src, src_off + off, 0);
2912 		if (err)
2913 			return err;
2914 		err = __bpf_dynptr_write(dst, dst_off + off, buf, chunk_sz, 0);
2915 		if (err)
2916 			return err;
2917 
2918 		off += chunk_sz;
2919 	}
2920 	return 0;
2921 }
2922 
2923 /**
2924  * bpf_dynptr_memset() - Fill dynptr memory with a constant byte.
2925  * @p: Destination dynptr - where data will be filled
2926  * @offset: Offset into the dynptr to start filling from
2927  * @size: Number of bytes to fill
2928  * @val: Constant byte to fill the memory with
2929  *
2930  * Fills the @size bytes of the memory area pointed to by @p
2931  * at @offset with the constant byte @val.
2932  * Returns 0 on success; negative error, otherwise.
2933  */
2934  __bpf_kfunc int bpf_dynptr_memset(struct bpf_dynptr *p, u32 offset, u32 size, u8 val)
2935  {
2936 	struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
2937 	u32 chunk_sz, write_off;
2938 	char buf[256];
2939 	void* slice;
2940 	int err;
2941 
2942 	slice = bpf_dynptr_slice_rdwr(p, offset, NULL, size);
2943 	if (likely(slice)) {
2944 		memset(slice, val, size);
2945 		return 0;
2946 	}
2947 
2948 	if (__bpf_dynptr_is_rdonly(ptr))
2949 		return -EINVAL;
2950 
2951 	err = bpf_dynptr_check_off_len(ptr, offset, size);
2952 	if (err)
2953 		return err;
2954 
2955 	/* Non-linear data under the dynptr, write from a local buffer */
2956 	chunk_sz = min_t(u32, sizeof(buf), size);
2957 	memset(buf, val, chunk_sz);
2958 
2959 	for (write_off = 0; write_off < size; write_off += chunk_sz) {
2960 		chunk_sz = min_t(u32, sizeof(buf), size - write_off);
2961 		err = __bpf_dynptr_write(ptr, offset + write_off, buf, chunk_sz, 0);
2962 		if (err)
2963 			return err;
2964 	}
2965 
2966 	return 0;
2967 }
2968 
2969 __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
2970 {
2971 	return obj;
2972 }
2973 
2974 __bpf_kfunc void *bpf_rdonly_cast(const void *obj__ign, u32 btf_id__k)
2975 {
2976 	return (void *)obj__ign;
2977 }
2978 
2979 __bpf_kfunc void bpf_rcu_read_lock(void)
2980 {
2981 	rcu_read_lock();
2982 }
2983 
2984 __bpf_kfunc void bpf_rcu_read_unlock(void)
2985 {
2986 	rcu_read_unlock();
2987 }
2988 
2989 struct bpf_throw_ctx {
2990 	struct bpf_prog_aux *aux;
2991 	u64 sp;
2992 	u64 bp;
2993 	int cnt;
2994 };
2995 
2996 static bool bpf_stack_walker(void *cookie, u64 ip, u64 sp, u64 bp)
2997 {
2998 	struct bpf_throw_ctx *ctx = cookie;
2999 	struct bpf_prog *prog;
3000 
3001 	/*
3002 	 * The RCU read lock is held to safely traverse the latch tree, but we
3003 	 * don't need its protection when accessing the prog, since it has an
3004 	 * active stack frame on the current stack trace, and won't disappear.
3005 	 */
3006 	rcu_read_lock();
3007 	prog = bpf_prog_ksym_find(ip);
3008 	rcu_read_unlock();
3009 	if (!prog)
3010 		return !ctx->cnt;
3011 	ctx->cnt++;
3012 	if (bpf_is_subprog(prog))
3013 		return true;
3014 	ctx->aux = prog->aux;
3015 	ctx->sp = sp;
3016 	ctx->bp = bp;
3017 	return false;
3018 }
3019 
3020 __bpf_kfunc void bpf_throw(u64 cookie)
3021 {
3022 	struct bpf_throw_ctx ctx = {};
3023 
3024 	arch_bpf_stack_walk(bpf_stack_walker, &ctx);
3025 	WARN_ON_ONCE(!ctx.aux);
3026 	if (ctx.aux)
3027 		WARN_ON_ONCE(!ctx.aux->exception_boundary);
3028 	WARN_ON_ONCE(!ctx.bp);
3029 	WARN_ON_ONCE(!ctx.cnt);
3030 	/* Prevent KASAN false positives for CONFIG_KASAN_STACK by unpoisoning
3031 	 * deeper stack depths than ctx.sp as we do not return from bpf_throw,
3032 	 * which skips compiler generated instrumentation to do the same.
3033 	 */
3034 	kasan_unpoison_task_stack_below((void *)(long)ctx.sp);
3035 	ctx.aux->bpf_exception_cb(cookie, ctx.sp, ctx.bp, 0, 0);
3036 	WARN(1, "A call to BPF exception callback should never return\n");
3037 }
3038 
3039 __bpf_kfunc int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags)
3040 {
3041 	struct bpf_async_kern *async = (struct bpf_async_kern *)wq;
3042 	struct bpf_map *map = p__map;
3043 
3044 	BUILD_BUG_ON(sizeof(struct bpf_async_kern) > sizeof(struct bpf_wq));
3045 	BUILD_BUG_ON(__alignof__(struct bpf_async_kern) != __alignof__(struct bpf_wq));
3046 
3047 	if (flags)
3048 		return -EINVAL;
3049 
3050 	return __bpf_async_init(async, map, flags, BPF_ASYNC_TYPE_WQ);
3051 }
3052 
3053 __bpf_kfunc int bpf_wq_start(struct bpf_wq *wq, unsigned int flags)
3054 {
3055 	struct bpf_async_kern *async = (struct bpf_async_kern *)wq;
3056 	struct bpf_work *w;
3057 
3058 	if (in_nmi())
3059 		return -EOPNOTSUPP;
3060 	if (flags)
3061 		return -EINVAL;
3062 	w = READ_ONCE(async->work);
3063 	if (!w || !READ_ONCE(w->cb.prog))
3064 		return -EINVAL;
3065 
3066 	schedule_work(&w->work);
3067 	return 0;
3068 }
3069 
3070 __bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq,
3071 					 int (callback_fn)(void *map, int *key, void *value),
3072 					 unsigned int flags,
3073 					 void *aux__prog)
3074 {
3075 	struct bpf_prog_aux *aux = (struct bpf_prog_aux *)aux__prog;
3076 	struct bpf_async_kern *async = (struct bpf_async_kern *)wq;
3077 
3078 	if (flags)
3079 		return -EINVAL;
3080 
3081 	return __bpf_async_set_callback(async, callback_fn, aux, flags, BPF_ASYNC_TYPE_WQ);
3082 }
3083 
3084 __bpf_kfunc void bpf_preempt_disable(void)
3085 {
3086 	preempt_disable();
3087 }
3088 
3089 __bpf_kfunc void bpf_preempt_enable(void)
3090 {
3091 	preempt_enable();
3092 }
3093 
3094 struct bpf_iter_bits {
3095 	__u64 __opaque[2];
3096 } __aligned(8);
3097 
3098 #define BITS_ITER_NR_WORDS_MAX 511
3099 
3100 struct bpf_iter_bits_kern {
3101 	union {
3102 		__u64 *bits;
3103 		__u64 bits_copy;
3104 	};
3105 	int nr_bits;
3106 	int bit;
3107 } __aligned(8);
3108 
3109 /* On 64-bit hosts, unsigned long and u64 have the same size, so passing
3110  * a u64 pointer and an unsigned long pointer to find_next_bit() will
3111  * return the same result, as both point to the same 8-byte area.
3112  *
3113  * For 32-bit little-endian hosts, using a u64 pointer or unsigned long
3114  * pointer also makes no difference. This is because the first iterated
3115  * unsigned long is composed of bits 0-31 of the u64 and the second unsigned
3116  * long is composed of bits 32-63 of the u64.
3117  *
3118  * However, for 32-bit big-endian hosts, this is not the case. The first
3119  * iterated unsigned long will be bits 32-63 of the u64, so swap these two
3120  * ulong values within the u64.
3121  */
3122 static void swap_ulong_in_u64(u64 *bits, unsigned int nr)
3123 {
3124 #if (BITS_PER_LONG == 32) && defined(__BIG_ENDIAN)
3125 	unsigned int i;
3126 
3127 	for (i = 0; i < nr; i++)
3128 		bits[i] = (bits[i] >> 32) | ((u64)(u32)bits[i] << 32);
3129 #endif
3130 }
3131 
3132 /**
3133  * bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area
3134  * @it: The new bpf_iter_bits to be created
3135  * @unsafe_ptr__ign: A pointer pointing to a memory area to be iterated over
3136  * @nr_words: The size of the specified memory area, measured in 8-byte units.
3137  * The maximum value of @nr_words is @BITS_ITER_NR_WORDS_MAX. This limit may be
3138  * further reduced by the BPF memory allocator implementation.
3139  *
3140  * This function initializes a new bpf_iter_bits structure for iterating over
3141  * a memory area which is specified by the @unsafe_ptr__ign and @nr_words. It
3142  * copies the data of the memory area to the newly created bpf_iter_bits @it for
3143  * subsequent iteration operations.
3144  *
3145  * On success, 0 is returned. On failure, ERR is returned.
3146  */
3147 __bpf_kfunc int
3148 bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_words)
3149 {
3150 	struct bpf_iter_bits_kern *kit = (void *)it;
3151 	u32 nr_bytes = nr_words * sizeof(u64);
3152 	u32 nr_bits = BYTES_TO_BITS(nr_bytes);
3153 	int err;
3154 
3155 	BUILD_BUG_ON(sizeof(struct bpf_iter_bits_kern) != sizeof(struct bpf_iter_bits));
3156 	BUILD_BUG_ON(__alignof__(struct bpf_iter_bits_kern) !=
3157 		     __alignof__(struct bpf_iter_bits));
3158 
3159 	kit->nr_bits = 0;
3160 	kit->bits_copy = 0;
3161 	kit->bit = -1;
3162 
3163 	if (!unsafe_ptr__ign || !nr_words)
3164 		return -EINVAL;
3165 	if (nr_words > BITS_ITER_NR_WORDS_MAX)
3166 		return -E2BIG;
3167 
3168 	/* Optimization for u64 mask */
3169 	if (nr_bits == 64) {
3170 		err = bpf_probe_read_kernel_common(&kit->bits_copy, nr_bytes, unsafe_ptr__ign);
3171 		if (err)
3172 			return -EFAULT;
3173 
3174 		swap_ulong_in_u64(&kit->bits_copy, nr_words);
3175 
3176 		kit->nr_bits = nr_bits;
3177 		return 0;
3178 	}
3179 
3180 	if (bpf_mem_alloc_check_size(false, nr_bytes))
3181 		return -E2BIG;
3182 
3183 	/* Fallback to memalloc */
3184 	kit->bits = bpf_mem_alloc(&bpf_global_ma, nr_bytes);
3185 	if (!kit->bits)
3186 		return -ENOMEM;
3187 
3188 	err = bpf_probe_read_kernel_common(kit->bits, nr_bytes, unsafe_ptr__ign);
3189 	if (err) {
3190 		bpf_mem_free(&bpf_global_ma, kit->bits);
3191 		return err;
3192 	}
3193 
3194 	swap_ulong_in_u64(kit->bits, nr_words);
3195 
3196 	kit->nr_bits = nr_bits;
3197 	return 0;
3198 }
3199 
3200 /**
3201  * bpf_iter_bits_next() - Get the next bit in a bpf_iter_bits
3202  * @it: The bpf_iter_bits to be checked
3203  *
3204  * This function returns a pointer to a number representing the value of the
3205  * next bit in the bits.
3206  *
3207  * If there are no further bits available, it returns NULL.
3208  */
3209 __bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it)
3210 {
3211 	struct bpf_iter_bits_kern *kit = (void *)it;
3212 	int bit = kit->bit, nr_bits = kit->nr_bits;
3213 	const void *bits;
3214 
3215 	if (!nr_bits || bit >= nr_bits)
3216 		return NULL;
3217 
3218 	bits = nr_bits == 64 ? &kit->bits_copy : kit->bits;
3219 	bit = find_next_bit(bits, nr_bits, bit + 1);
3220 	if (bit >= nr_bits) {
3221 		kit->bit = bit;
3222 		return NULL;
3223 	}
3224 
3225 	kit->bit = bit;
3226 	return &kit->bit;
3227 }
3228 
3229 /**
3230  * bpf_iter_bits_destroy() - Destroy a bpf_iter_bits
3231  * @it: The bpf_iter_bits to be destroyed
3232  *
3233  * Destroy the resource associated with the bpf_iter_bits.
3234  */
3235 __bpf_kfunc void bpf_iter_bits_destroy(struct bpf_iter_bits *it)
3236 {
3237 	struct bpf_iter_bits_kern *kit = (void *)it;
3238 
3239 	if (kit->nr_bits <= 64)
3240 		return;
3241 	bpf_mem_free(&bpf_global_ma, kit->bits);
3242 }
3243 
3244 /**
3245  * bpf_copy_from_user_str() - Copy a string from an unsafe user address
3246  * @dst:             Destination address, in kernel space.  This buffer must be
3247  *                   at least @dst__sz bytes long.
3248  * @dst__sz:         Maximum number of bytes to copy, includes the trailing NUL.
3249  * @unsafe_ptr__ign: Source address, in user space.
3250  * @flags:           The only supported flag is BPF_F_PAD_ZEROS
3251  *
3252  * Copies a NUL-terminated string from userspace to BPF space. If user string is
3253  * too long this will still ensure zero termination in the dst buffer unless
3254  * buffer size is 0.
3255  *
3256  * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst to 0 on success and
3257  * memset all of @dst on failure.
3258  */
3259 __bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user *unsafe_ptr__ign, u64 flags)
3260 {
3261 	int ret;
3262 
3263 	if (unlikely(flags & ~BPF_F_PAD_ZEROS))
3264 		return -EINVAL;
3265 
3266 	if (unlikely(!dst__sz))
3267 		return 0;
3268 
3269 	ret = strncpy_from_user(dst, unsafe_ptr__ign, dst__sz - 1);
3270 	if (ret < 0) {
3271 		if (flags & BPF_F_PAD_ZEROS)
3272 			memset((char *)dst, 0, dst__sz);
3273 
3274 		return ret;
3275 	}
3276 
3277 	if (flags & BPF_F_PAD_ZEROS)
3278 		memset((char *)dst + ret, 0, dst__sz - ret);
3279 	else
3280 		((char *)dst)[ret] = '\0';
3281 
3282 	return ret + 1;
3283 }
3284 
3285 /**
3286  * bpf_copy_from_user_task_str() - Copy a string from an task's address space
3287  * @dst:             Destination address, in kernel space.  This buffer must be
3288  *                   at least @dst__sz bytes long.
3289  * @dst__sz:         Maximum number of bytes to copy, includes the trailing NUL.
3290  * @unsafe_ptr__ign: Source address in the task's address space.
3291  * @tsk:             The task whose address space will be used
3292  * @flags:           The only supported flag is BPF_F_PAD_ZEROS
3293  *
3294  * Copies a NUL terminated string from a task's address space to @dst__sz
3295  * buffer. If user string is too long this will still ensure zero termination
3296  * in the @dst__sz buffer unless buffer size is 0.
3297  *
3298  * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst__sz to 0 on success
3299  * and memset all of @dst__sz on failure.
3300  *
3301  * Return: The number of copied bytes on success including the NUL terminator.
3302  * A negative error code on failure.
3303  */
3304 __bpf_kfunc int bpf_copy_from_user_task_str(void *dst, u32 dst__sz,
3305 					    const void __user *unsafe_ptr__ign,
3306 					    struct task_struct *tsk, u64 flags)
3307 {
3308 	int ret;
3309 
3310 	if (unlikely(flags & ~BPF_F_PAD_ZEROS))
3311 		return -EINVAL;
3312 
3313 	if (unlikely(dst__sz == 0))
3314 		return 0;
3315 
3316 	ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_ptr__ign, dst, dst__sz, 0);
3317 	if (ret < 0) {
3318 		if (flags & BPF_F_PAD_ZEROS)
3319 			memset(dst, 0, dst__sz);
3320 		return ret;
3321 	}
3322 
3323 	if (flags & BPF_F_PAD_ZEROS)
3324 		memset(dst + ret, 0, dst__sz - ret);
3325 
3326 	return ret + 1;
3327 }
3328 
3329 /* Keep unsinged long in prototype so that kfunc is usable when emitted to
3330  * vmlinux.h in BPF programs directly, but note that while in BPF prog, the
3331  * unsigned long always points to 8-byte region on stack, the kernel may only
3332  * read and write the 4-bytes on 32-bit.
3333  */
3334 __bpf_kfunc void bpf_local_irq_save(unsigned long *flags__irq_flag)
3335 {
3336 	local_irq_save(*flags__irq_flag);
3337 }
3338 
3339 __bpf_kfunc void bpf_local_irq_restore(unsigned long *flags__irq_flag)
3340 {
3341 	local_irq_restore(*flags__irq_flag);
3342 }
3343 
3344 __bpf_kfunc void __bpf_trap(void)
3345 {
3346 }
3347 
3348 /*
3349  * Kfuncs for string operations.
3350  *
3351  * Since strings are not necessarily %NUL-terminated, we cannot directly call
3352  * in-kernel implementations. Instead, we open-code the implementations using
3353  * __get_kernel_nofault instead of plain dereference to make them safe.
3354  */
3355 
3356 static int __bpf_strcasecmp(const char *s1, const char *s2, bool ignore_case)
3357 {
3358 	char c1, c2;
3359 	int i;
3360 
3361 	if (!copy_from_kernel_nofault_allowed(s1, 1) ||
3362 	    !copy_from_kernel_nofault_allowed(s2, 1)) {
3363 		return -ERANGE;
3364 	}
3365 
3366 	guard(pagefault)();
3367 	for (i = 0; i < XATTR_SIZE_MAX; i++) {
3368 		__get_kernel_nofault(&c1, s1, char, err_out);
3369 		__get_kernel_nofault(&c2, s2, char, err_out);
3370 		if (ignore_case) {
3371 			c1 = tolower(c1);
3372 			c2 = tolower(c2);
3373 		}
3374 		if (c1 != c2)
3375 			return c1 < c2 ? -1 : 1;
3376 		if (c1 == '\0')
3377 			return 0;
3378 		s1++;
3379 		s2++;
3380 	}
3381 	return -E2BIG;
3382 err_out:
3383 	return -EFAULT;
3384 }
3385 
3386 /**
3387  * bpf_strcmp - Compare two strings
3388  * @s1__ign: One string
3389  * @s2__ign: Another string
3390  *
3391  * Return:
3392  * * %0       - Strings are equal
3393  * * %-1      - @s1__ign is smaller
3394  * * %1       - @s2__ign is smaller
3395  * * %-EFAULT - Cannot read one of the strings
3396  * * %-E2BIG  - One of strings is too large
3397  * * %-ERANGE - One of strings is outside of kernel address space
3398  */
3399 __bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign)
3400 {
3401 	return __bpf_strcasecmp(s1__ign, s2__ign, false);
3402 }
3403 
3404 /**
3405  * bpf_strcasecmp - Compare two strings, ignoring the case of the characters
3406  * @s1__ign: One string
3407  * @s2__ign: Another string
3408  *
3409  * Return:
3410  * * %0       - Strings are equal
3411  * * %-1      - @s1__ign is smaller
3412  * * %1       - @s2__ign is smaller
3413  * * %-EFAULT - Cannot read one of the strings
3414  * * %-E2BIG  - One of strings is too large
3415  * * %-ERANGE - One of strings is outside of kernel address space
3416  */
3417 __bpf_kfunc int bpf_strcasecmp(const char *s1__ign, const char *s2__ign)
3418 {
3419 	return __bpf_strcasecmp(s1__ign, s2__ign, true);
3420 }
3421 
3422 /**
3423  * bpf_strnchr - Find a character in a length limited string
3424  * @s__ign: The string to be searched
3425  * @count: The number of characters to be searched
3426  * @c: The character to search for
3427  *
3428  * Note that the %NUL-terminator is considered part of the string, and can
3429  * be searched for.
3430  *
3431  * Return:
3432  * * >=0      - Index of the first occurrence of @c within @s__ign
3433  * * %-ENOENT - @c not found in the first @count characters of @s__ign
3434  * * %-EFAULT - Cannot read @s__ign
3435  * * %-E2BIG  - @s__ign is too large
3436  * * %-ERANGE - @s__ign is outside of kernel address space
3437  */
3438 __bpf_kfunc int bpf_strnchr(const char *s__ign, size_t count, char c)
3439 {
3440 	char sc;
3441 	int i;
3442 
3443 	if (!copy_from_kernel_nofault_allowed(s__ign, 1))
3444 		return -ERANGE;
3445 
3446 	guard(pagefault)();
3447 	for (i = 0; i < count && i < XATTR_SIZE_MAX; i++) {
3448 		__get_kernel_nofault(&sc, s__ign, char, err_out);
3449 		if (sc == c)
3450 			return i;
3451 		if (sc == '\0')
3452 			return -ENOENT;
3453 		s__ign++;
3454 	}
3455 	return i == XATTR_SIZE_MAX ? -E2BIG : -ENOENT;
3456 err_out:
3457 	return -EFAULT;
3458 }
3459 
3460 /**
3461  * bpf_strchr - Find the first occurrence of a character in a string
3462  * @s__ign: The string to be searched
3463  * @c: The character to search for
3464  *
3465  * Note that the %NUL-terminator is considered part of the string, and can
3466  * be searched for.
3467  *
3468  * Return:
3469  * * >=0      - The index of the first occurrence of @c within @s__ign
3470  * * %-ENOENT - @c not found in @s__ign
3471  * * %-EFAULT - Cannot read @s__ign
3472  * * %-E2BIG  - @s__ign is too large
3473  * * %-ERANGE - @s__ign is outside of kernel address space
3474  */
3475 __bpf_kfunc int bpf_strchr(const char *s__ign, char c)
3476 {
3477 	return bpf_strnchr(s__ign, XATTR_SIZE_MAX, c);
3478 }
3479 
3480 /**
3481  * bpf_strchrnul - Find and return a character in a string, or end of string
3482  * @s__ign: The string to be searched
3483  * @c: The character to search for
3484  *
3485  * Return:
3486  * * >=0      - Index of the first occurrence of @c within @s__ign or index of
3487  *              the null byte at the end of @s__ign when @c is not found
3488  * * %-EFAULT - Cannot read @s__ign
3489  * * %-E2BIG  - @s__ign is too large
3490  * * %-ERANGE - @s__ign is outside of kernel address space
3491  */
3492 __bpf_kfunc int bpf_strchrnul(const char *s__ign, char c)
3493 {
3494 	char sc;
3495 	int i;
3496 
3497 	if (!copy_from_kernel_nofault_allowed(s__ign, 1))
3498 		return -ERANGE;
3499 
3500 	guard(pagefault)();
3501 	for (i = 0; i < XATTR_SIZE_MAX; i++) {
3502 		__get_kernel_nofault(&sc, s__ign, char, err_out);
3503 		if (sc == '\0' || sc == c)
3504 			return i;
3505 		s__ign++;
3506 	}
3507 	return -E2BIG;
3508 err_out:
3509 	return -EFAULT;
3510 }
3511 
3512 /**
3513  * bpf_strrchr - Find the last occurrence of a character in a string
3514  * @s__ign: The string to be searched
3515  * @c: The character to search for
3516  *
3517  * Return:
3518  * * >=0      - Index of the last occurrence of @c within @s__ign
3519  * * %-ENOENT - @c not found in @s__ign
3520  * * %-EFAULT - Cannot read @s__ign
3521  * * %-E2BIG  - @s__ign is too large
3522  * * %-ERANGE - @s__ign is outside of kernel address space
3523  */
3524 __bpf_kfunc int bpf_strrchr(const char *s__ign, int c)
3525 {
3526 	char sc;
3527 	int i, last = -ENOENT;
3528 
3529 	if (!copy_from_kernel_nofault_allowed(s__ign, 1))
3530 		return -ERANGE;
3531 
3532 	guard(pagefault)();
3533 	for (i = 0; i < XATTR_SIZE_MAX; i++) {
3534 		__get_kernel_nofault(&sc, s__ign, char, err_out);
3535 		if (sc == c)
3536 			last = i;
3537 		if (sc == '\0')
3538 			return last;
3539 		s__ign++;
3540 	}
3541 	return -E2BIG;
3542 err_out:
3543 	return -EFAULT;
3544 }
3545 
3546 /**
3547  * bpf_strnlen - Calculate the length of a length-limited string
3548  * @s__ign: The string
3549  * @count: The maximum number of characters to count
3550  *
3551  * Return:
3552  * * >=0      - The length of @s__ign
3553  * * %-EFAULT - Cannot read @s__ign
3554  * * %-E2BIG  - @s__ign is too large
3555  * * %-ERANGE - @s__ign is outside of kernel address space
3556  */
3557 __bpf_kfunc int bpf_strnlen(const char *s__ign, size_t count)
3558 {
3559 	char c;
3560 	int i;
3561 
3562 	if (!copy_from_kernel_nofault_allowed(s__ign, 1))
3563 		return -ERANGE;
3564 
3565 	guard(pagefault)();
3566 	for (i = 0; i < count && i < XATTR_SIZE_MAX; i++) {
3567 		__get_kernel_nofault(&c, s__ign, char, err_out);
3568 		if (c == '\0')
3569 			return i;
3570 		s__ign++;
3571 	}
3572 	return i == XATTR_SIZE_MAX ? -E2BIG : i;
3573 err_out:
3574 	return -EFAULT;
3575 }
3576 
3577 /**
3578  * bpf_strlen - Calculate the length of a string
3579  * @s__ign: The string
3580  *
3581  * Return:
3582  * * >=0      - The length of @s__ign
3583  * * %-EFAULT - Cannot read @s__ign
3584  * * %-E2BIG  - @s__ign is too large
3585  * * %-ERANGE - @s__ign is outside of kernel address space
3586  */
3587 __bpf_kfunc int bpf_strlen(const char *s__ign)
3588 {
3589 	return bpf_strnlen(s__ign, XATTR_SIZE_MAX);
3590 }
3591 
3592 /**
3593  * bpf_strspn - Calculate the length of the initial substring of @s__ign which
3594  *              only contains letters in @accept__ign
3595  * @s__ign: The string to be searched
3596  * @accept__ign: The string to search for
3597  *
3598  * Return:
3599  * * >=0      - The length of the initial substring of @s__ign which only
3600  *              contains letters from @accept__ign
3601  * * %-EFAULT - Cannot read one of the strings
3602  * * %-E2BIG  - One of the strings is too large
3603  * * %-ERANGE - One of the strings is outside of kernel address space
3604  */
3605 __bpf_kfunc int bpf_strspn(const char *s__ign, const char *accept__ign)
3606 {
3607 	char cs, ca;
3608 	int i, j;
3609 
3610 	if (!copy_from_kernel_nofault_allowed(s__ign, 1) ||
3611 	    !copy_from_kernel_nofault_allowed(accept__ign, 1)) {
3612 		return -ERANGE;
3613 	}
3614 
3615 	guard(pagefault)();
3616 	for (i = 0; i < XATTR_SIZE_MAX; i++) {
3617 		__get_kernel_nofault(&cs, s__ign, char, err_out);
3618 		if (cs == '\0')
3619 			return i;
3620 		for (j = 0; j < XATTR_SIZE_MAX; j++) {
3621 			__get_kernel_nofault(&ca, accept__ign + j, char, err_out);
3622 			if (cs == ca || ca == '\0')
3623 				break;
3624 		}
3625 		if (j == XATTR_SIZE_MAX)
3626 			return -E2BIG;
3627 		if (ca == '\0')
3628 			return i;
3629 		s__ign++;
3630 	}
3631 	return -E2BIG;
3632 err_out:
3633 	return -EFAULT;
3634 }
3635 
3636 /**
3637  * bpf_strcspn - Calculate the length of the initial substring of @s__ign which
3638  *               does not contain letters in @reject__ign
3639  * @s__ign: The string to be searched
3640  * @reject__ign: The string to search for
3641  *
3642  * Return:
3643  * * >=0      - The length of the initial substring of @s__ign which does not
3644  *              contain letters from @reject__ign
3645  * * %-EFAULT - Cannot read one of the strings
3646  * * %-E2BIG  - One of the strings is too large
3647  * * %-ERANGE - One of the strings is outside of kernel address space
3648  */
3649 __bpf_kfunc int bpf_strcspn(const char *s__ign, const char *reject__ign)
3650 {
3651 	char cs, cr;
3652 	int i, j;
3653 
3654 	if (!copy_from_kernel_nofault_allowed(s__ign, 1) ||
3655 	    !copy_from_kernel_nofault_allowed(reject__ign, 1)) {
3656 		return -ERANGE;
3657 	}
3658 
3659 	guard(pagefault)();
3660 	for (i = 0; i < XATTR_SIZE_MAX; i++) {
3661 		__get_kernel_nofault(&cs, s__ign, char, err_out);
3662 		if (cs == '\0')
3663 			return i;
3664 		for (j = 0; j < XATTR_SIZE_MAX; j++) {
3665 			__get_kernel_nofault(&cr, reject__ign + j, char, err_out);
3666 			if (cs == cr || cr == '\0')
3667 				break;
3668 		}
3669 		if (j == XATTR_SIZE_MAX)
3670 			return -E2BIG;
3671 		if (cr != '\0')
3672 			return i;
3673 		s__ign++;
3674 	}
3675 	return -E2BIG;
3676 err_out:
3677 	return -EFAULT;
3678 }
3679 
3680 /**
3681  * bpf_strnstr - Find the first substring in a length-limited string
3682  * @s1__ign: The string to be searched
3683  * @s2__ign: The string to search for
3684  * @len: the maximum number of characters to search
3685  *
3686  * Return:
3687  * * >=0      - Index of the first character of the first occurrence of @s2__ign
3688  *              within the first @len characters of @s1__ign
3689  * * %-ENOENT - @s2__ign not found in the first @len characters of @s1__ign
3690  * * %-EFAULT - Cannot read one of the strings
3691  * * %-E2BIG  - One of the strings is too large
3692  * * %-ERANGE - One of the strings is outside of kernel address space
3693  */
3694 __bpf_kfunc int bpf_strnstr(const char *s1__ign, const char *s2__ign, size_t len)
3695 {
3696 	char c1, c2;
3697 	int i, j;
3698 
3699 	if (!copy_from_kernel_nofault_allowed(s1__ign, 1) ||
3700 	    !copy_from_kernel_nofault_allowed(s2__ign, 1)) {
3701 		return -ERANGE;
3702 	}
3703 
3704 	guard(pagefault)();
3705 	for (i = 0; i < XATTR_SIZE_MAX; i++) {
3706 		for (j = 0; i + j <= len && j < XATTR_SIZE_MAX; j++) {
3707 			__get_kernel_nofault(&c2, s2__ign + j, char, err_out);
3708 			if (c2 == '\0')
3709 				return i;
3710 			/*
3711 			 * We allow reading an extra byte from s2 (note the
3712 			 * `i + j <= len` above) to cover the case when s2 is
3713 			 * a suffix of the first len chars of s1.
3714 			 */
3715 			if (i + j == len)
3716 				break;
3717 			__get_kernel_nofault(&c1, s1__ign + j, char, err_out);
3718 			if (c1 == '\0')
3719 				return -ENOENT;
3720 			if (c1 != c2)
3721 				break;
3722 		}
3723 		if (j == XATTR_SIZE_MAX)
3724 			return -E2BIG;
3725 		if (i + j == len)
3726 			return -ENOENT;
3727 		s1__ign++;
3728 	}
3729 	return -E2BIG;
3730 err_out:
3731 	return -EFAULT;
3732 }
3733 
3734 /**
3735  * bpf_strstr - Find the first substring in a string
3736  * @s1__ign: The string to be searched
3737  * @s2__ign: The string to search for
3738  *
3739  * Return:
3740  * * >=0      - Index of the first character of the first occurrence of @s2__ign
3741  *              within @s1__ign
3742  * * %-ENOENT - @s2__ign is not a substring of @s1__ign
3743  * * %-EFAULT - Cannot read one of the strings
3744  * * %-E2BIG  - One of the strings is too large
3745  * * %-ERANGE - One of the strings is outside of kernel address space
3746  */
3747 __bpf_kfunc int bpf_strstr(const char *s1__ign, const char *s2__ign)
3748 {
3749 	return bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX);
3750 }
3751 #ifdef CONFIG_KEYS
3752 /**
3753  * bpf_lookup_user_key - lookup a key by its serial
3754  * @serial: key handle serial number
3755  * @flags: lookup-specific flags
3756  *
3757  * Search a key with a given *serial* and the provided *flags*.
3758  * If found, increment the reference count of the key by one, and
3759  * return it in the bpf_key structure.
3760  *
3761  * The bpf_key structure must be passed to bpf_key_put() when done
3762  * with it, so that the key reference count is decremented and the
3763  * bpf_key structure is freed.
3764  *
3765  * Permission checks are deferred to the time the key is used by
3766  * one of the available key-specific kfuncs.
3767  *
3768  * Set *flags* with KEY_LOOKUP_CREATE, to attempt creating a requested
3769  * special keyring (e.g. session keyring), if it doesn't yet exist.
3770  * Set *flags* with KEY_LOOKUP_PARTIAL, to lookup a key without waiting
3771  * for the key construction, and to retrieve uninstantiated keys (keys
3772  * without data attached to them).
3773  *
3774  * Return: a bpf_key pointer with a valid key pointer if the key is found, a
3775  *         NULL pointer otherwise.
3776  */
3777 __bpf_kfunc struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags)
3778 {
3779 	key_ref_t key_ref;
3780 	struct bpf_key *bkey;
3781 
3782 	if (flags & ~KEY_LOOKUP_ALL)
3783 		return NULL;
3784 
3785 	/*
3786 	 * Permission check is deferred until the key is used, as the
3787 	 * intent of the caller is unknown here.
3788 	 */
3789 	key_ref = lookup_user_key(serial, flags, KEY_DEFER_PERM_CHECK);
3790 	if (IS_ERR(key_ref))
3791 		return NULL;
3792 
3793 	bkey = kmalloc(sizeof(*bkey), GFP_KERNEL);
3794 	if (!bkey) {
3795 		key_put(key_ref_to_ptr(key_ref));
3796 		return NULL;
3797 	}
3798 
3799 	bkey->key = key_ref_to_ptr(key_ref);
3800 	bkey->has_ref = true;
3801 
3802 	return bkey;
3803 }
3804 
3805 /**
3806  * bpf_lookup_system_key - lookup a key by a system-defined ID
3807  * @id: key ID
3808  *
3809  * Obtain a bpf_key structure with a key pointer set to the passed key ID.
3810  * The key pointer is marked as invalid, to prevent bpf_key_put() from
3811  * attempting to decrement the key reference count on that pointer. The key
3812  * pointer set in such way is currently understood only by
3813  * verify_pkcs7_signature().
3814  *
3815  * Set *id* to one of the values defined in include/linux/verification.h:
3816  * 0 for the primary keyring (immutable keyring of system keys);
3817  * VERIFY_USE_SECONDARY_KEYRING for both the primary and secondary keyring
3818  * (where keys can be added only if they are vouched for by existing keys
3819  * in those keyrings); VERIFY_USE_PLATFORM_KEYRING for the platform
3820  * keyring (primarily used by the integrity subsystem to verify a kexec'ed
3821  * kerned image and, possibly, the initramfs signature).
3822  *
3823  * Return: a bpf_key pointer with an invalid key pointer set from the
3824  *         pre-determined ID on success, a NULL pointer otherwise
3825  */
3826 __bpf_kfunc struct bpf_key *bpf_lookup_system_key(u64 id)
3827 {
3828 	struct bpf_key *bkey;
3829 
3830 	if (system_keyring_id_check(id) < 0)
3831 		return NULL;
3832 
3833 	bkey = kmalloc(sizeof(*bkey), GFP_ATOMIC);
3834 	if (!bkey)
3835 		return NULL;
3836 
3837 	bkey->key = (struct key *)(unsigned long)id;
3838 	bkey->has_ref = false;
3839 
3840 	return bkey;
3841 }
3842 
3843 /**
3844  * bpf_key_put - decrement key reference count if key is valid and free bpf_key
3845  * @bkey: bpf_key structure
3846  *
3847  * Decrement the reference count of the key inside *bkey*, if the pointer
3848  * is valid, and free *bkey*.
3849  */
3850 __bpf_kfunc void bpf_key_put(struct bpf_key *bkey)
3851 {
3852 	if (bkey->has_ref)
3853 		key_put(bkey->key);
3854 
3855 	kfree(bkey);
3856 }
3857 
3858 /**
3859  * bpf_verify_pkcs7_signature - verify a PKCS#7 signature
3860  * @data_p: data to verify
3861  * @sig_p: signature of the data
3862  * @trusted_keyring: keyring with keys trusted for signature verification
3863  *
3864  * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr*
3865  * with keys in a keyring referenced by *trusted_keyring*.
3866  *
3867  * Return: 0 on success, a negative value on error.
3868  */
3869 __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
3870 			       struct bpf_dynptr *sig_p,
3871 			       struct bpf_key *trusted_keyring)
3872 {
3873 #ifdef CONFIG_SYSTEM_DATA_VERIFICATION
3874 	struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p;
3875 	struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p;
3876 	const void *data, *sig;
3877 	u32 data_len, sig_len;
3878 	int ret;
3879 
3880 	if (trusted_keyring->has_ref) {
3881 		/*
3882 		 * Do the permission check deferred in bpf_lookup_user_key().
3883 		 * See bpf_lookup_user_key() for more details.
3884 		 *
3885 		 * A call to key_task_permission() here would be redundant, as
3886 		 * it is already done by keyring_search() called by
3887 		 * find_asymmetric_key().
3888 		 */
3889 		ret = key_validate(trusted_keyring->key);
3890 		if (ret < 0)
3891 			return ret;
3892 	}
3893 
3894 	data_len = __bpf_dynptr_size(data_ptr);
3895 	data = __bpf_dynptr_data(data_ptr, data_len);
3896 	sig_len = __bpf_dynptr_size(sig_ptr);
3897 	sig = __bpf_dynptr_data(sig_ptr, sig_len);
3898 
3899 	return verify_pkcs7_signature(data, data_len, sig, sig_len,
3900 				      trusted_keyring->key,
3901 				      VERIFYING_BPF_SIGNATURE, NULL,
3902 				      NULL);
3903 #else
3904 	return -EOPNOTSUPP;
3905 #endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
3906 }
3907 #endif /* CONFIG_KEYS */
3908 
3909 __bpf_kfunc_end_defs();
3910 
3911 BTF_KFUNCS_START(generic_btf_ids)
3912 #ifdef CONFIG_CRASH_DUMP
3913 BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
3914 #endif
3915 BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
3916 BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
3917 BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
3918 BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE)
3919 BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL | KF_RCU)
3920 BTF_ID_FLAGS(func, bpf_list_push_front_impl)
3921 BTF_ID_FLAGS(func, bpf_list_push_back_impl)
3922 BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
3923 BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
3924 BTF_ID_FLAGS(func, bpf_list_front, KF_RET_NULL)
3925 BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL)
3926 BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
3927 BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
3928 BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL)
3929 BTF_ID_FLAGS(func, bpf_rbtree_add_impl)
3930 BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
3931 BTF_ID_FLAGS(func, bpf_rbtree_root, KF_RET_NULL)
3932 BTF_ID_FLAGS(func, bpf_rbtree_left, KF_RET_NULL)
3933 BTF_ID_FLAGS(func, bpf_rbtree_right, KF_RET_NULL)
3934 
3935 #ifdef CONFIG_CGROUPS
3936 BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
3937 BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
3938 BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
3939 BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
3940 BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
3941 BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
3942 #endif
3943 BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
3944 BTF_ID_FLAGS(func, bpf_task_from_vpid, KF_ACQUIRE | KF_RET_NULL)
3945 BTF_ID_FLAGS(func, bpf_throw)
3946 #ifdef CONFIG_BPF_EVENTS
3947 BTF_ID_FLAGS(func, bpf_send_signal_task, KF_TRUSTED_ARGS)
3948 #endif
3949 #ifdef CONFIG_KEYS
3950 BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE)
3951 BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL)
3952 BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE)
3953 #ifdef CONFIG_SYSTEM_DATA_VERIFICATION
3954 BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE)
3955 #endif
3956 #endif
3957 BTF_KFUNCS_END(generic_btf_ids)
3958 
3959 static const struct btf_kfunc_id_set generic_kfunc_set = {
3960 	.owner = THIS_MODULE,
3961 	.set   = &generic_btf_ids,
3962 };
3963 
3964 
3965 BTF_ID_LIST(generic_dtor_ids)
3966 BTF_ID(struct, task_struct)
3967 BTF_ID(func, bpf_task_release_dtor)
3968 #ifdef CONFIG_CGROUPS
3969 BTF_ID(struct, cgroup)
3970 BTF_ID(func, bpf_cgroup_release_dtor)
3971 #endif
3972 
3973 BTF_KFUNCS_START(common_btf_ids)
3974 BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx, KF_FASTCALL)
3975 BTF_ID_FLAGS(func, bpf_rdonly_cast, KF_FASTCALL)
3976 BTF_ID_FLAGS(func, bpf_rcu_read_lock)
3977 BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
3978 BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL)
3979 BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
3980 BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
3981 BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL)
3982 BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
3983 BTF_ID_FLAGS(func, bpf_iter_task_vma_new, KF_ITER_NEW | KF_RCU)
3984 BTF_ID_FLAGS(func, bpf_iter_task_vma_next, KF_ITER_NEXT | KF_RET_NULL)
3985 BTF_ID_FLAGS(func, bpf_iter_task_vma_destroy, KF_ITER_DESTROY)
3986 #ifdef CONFIG_CGROUPS
3987 BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
3988 BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL)
3989 BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY)
3990 BTF_ID_FLAGS(func, bpf_iter_css_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED)
3991 BTF_ID_FLAGS(func, bpf_iter_css_next, KF_ITER_NEXT | KF_RET_NULL)
3992 BTF_ID_FLAGS(func, bpf_iter_css_destroy, KF_ITER_DESTROY)
3993 #endif
3994 BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED)
3995 BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT | KF_RET_NULL)
3996 BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY)
3997 BTF_ID_FLAGS(func, bpf_dynptr_adjust)
3998 BTF_ID_FLAGS(func, bpf_dynptr_is_null)
3999 BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
4000 BTF_ID_FLAGS(func, bpf_dynptr_size)
4001 BTF_ID_FLAGS(func, bpf_dynptr_clone)
4002 BTF_ID_FLAGS(func, bpf_dynptr_copy)
4003 BTF_ID_FLAGS(func, bpf_dynptr_memset)
4004 #ifdef CONFIG_NET
4005 BTF_ID_FLAGS(func, bpf_modify_return_test_tp)
4006 #endif
4007 BTF_ID_FLAGS(func, bpf_wq_init)
4008 BTF_ID_FLAGS(func, bpf_wq_set_callback_impl)
4009 BTF_ID_FLAGS(func, bpf_wq_start)
4010 BTF_ID_FLAGS(func, bpf_preempt_disable)
4011 BTF_ID_FLAGS(func, bpf_preempt_enable)
4012 BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
4013 BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
4014 BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
4015 BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
4016 BTF_ID_FLAGS(func, bpf_copy_from_user_task_str, KF_SLEEPABLE)
4017 BTF_ID_FLAGS(func, bpf_get_kmem_cache)
4018 BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE)
4019 BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
4020 BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
4021 BTF_ID_FLAGS(func, bpf_local_irq_save)
4022 BTF_ID_FLAGS(func, bpf_local_irq_restore)
4023 BTF_ID_FLAGS(func, bpf_probe_read_user_dynptr)
4024 BTF_ID_FLAGS(func, bpf_probe_read_kernel_dynptr)
4025 BTF_ID_FLAGS(func, bpf_probe_read_user_str_dynptr)
4026 BTF_ID_FLAGS(func, bpf_probe_read_kernel_str_dynptr)
4027 BTF_ID_FLAGS(func, bpf_copy_from_user_dynptr, KF_SLEEPABLE)
4028 BTF_ID_FLAGS(func, bpf_copy_from_user_str_dynptr, KF_SLEEPABLE)
4029 BTF_ID_FLAGS(func, bpf_copy_from_user_task_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
4030 BTF_ID_FLAGS(func, bpf_copy_from_user_task_str_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
4031 #ifdef CONFIG_DMA_SHARED_BUFFER
4032 BTF_ID_FLAGS(func, bpf_iter_dmabuf_new, KF_ITER_NEW | KF_SLEEPABLE)
4033 BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
4034 BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
4035 #endif
4036 BTF_ID_FLAGS(func, __bpf_trap)
4037 BTF_ID_FLAGS(func, bpf_strcmp);
4038 BTF_ID_FLAGS(func, bpf_strcasecmp);
4039 BTF_ID_FLAGS(func, bpf_strchr);
4040 BTF_ID_FLAGS(func, bpf_strchrnul);
4041 BTF_ID_FLAGS(func, bpf_strnchr);
4042 BTF_ID_FLAGS(func, bpf_strrchr);
4043 BTF_ID_FLAGS(func, bpf_strlen);
4044 BTF_ID_FLAGS(func, bpf_strnlen);
4045 BTF_ID_FLAGS(func, bpf_strspn);
4046 BTF_ID_FLAGS(func, bpf_strcspn);
4047 BTF_ID_FLAGS(func, bpf_strstr);
4048 BTF_ID_FLAGS(func, bpf_strnstr);
4049 #if defined(CONFIG_BPF_LSM) && defined(CONFIG_CGROUPS)
4050 BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU)
4051 #endif
4052 BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS)
4053 BTF_KFUNCS_END(common_btf_ids)
4054 
4055 static const struct btf_kfunc_id_set common_kfunc_set = {
4056 	.owner = THIS_MODULE,
4057 	.set   = &common_btf_ids,
4058 };
4059 
4060 static int __init kfunc_init(void)
4061 {
4062 	int ret;
4063 	const struct btf_id_dtor_kfunc generic_dtors[] = {
4064 		{
4065 			.btf_id       = generic_dtor_ids[0],
4066 			.kfunc_btf_id = generic_dtor_ids[1]
4067 		},
4068 #ifdef CONFIG_CGROUPS
4069 		{
4070 			.btf_id       = generic_dtor_ids[2],
4071 			.kfunc_btf_id = generic_dtor_ids[3]
4072 		},
4073 #endif
4074 	};
4075 
4076 	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set);
4077 	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set);
4078 	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &generic_kfunc_set);
4079 	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set);
4080 	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &generic_kfunc_set);
4081 	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &generic_kfunc_set);
4082 	ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors,
4083 						  ARRAY_SIZE(generic_dtors),
4084 						  THIS_MODULE);
4085 	return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set);
4086 }
4087 
4088 late_initcall(kfunc_init);
4089 
4090 /* Get a pointer to dynptr data up to len bytes for read only access. If
4091  * the dynptr doesn't have continuous data up to len bytes, return NULL.
4092  */
4093 const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len)
4094 {
4095 	const struct bpf_dynptr *p = (struct bpf_dynptr *)ptr;
4096 
4097 	return bpf_dynptr_slice(p, 0, NULL, len);
4098 }
4099 
4100 /* Get a pointer to dynptr data up to len bytes for read write access. If
4101  * the dynptr doesn't have continuous data up to len bytes, or the dynptr
4102  * is read only, return NULL.
4103  */
4104 void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len)
4105 {
4106 	if (__bpf_dynptr_is_rdonly(ptr))
4107 		return NULL;
4108 	return (void *)__bpf_dynptr_data(ptr, len);
4109 }
4110