xref: /linux/tools/testing/selftests/bpf/bpf_experimental.h (revision 8c1e1c33fe5ad867bc0b6ba121911d70e7881d88)
1 #ifndef __BPF_EXPERIMENTAL__
2 #define __BPF_EXPERIMENTAL__
3 
4 #include <vmlinux.h>
5 #include <bpf/bpf_tracing.h>
6 #include <bpf/bpf_helpers.h>
7 #include <bpf/bpf_core_read.h>
8 #include <bpf_may_goto.h>
9 
10 #define __contains(name, node) __attribute__((btf_decl_tag("contains:" #name ":" #node)))
11 
12 /* Convenience macro to wrap over bpf_obj_new */
13 #define bpf_obj_new(type) ((type *)bpf_obj_new(bpf_core_type_id_local(type)))
14 
15 /* Convenience macro to wrap over bpf_percpu_obj_new */
16 #define bpf_percpu_obj_new(type) ((type __percpu_kptr *)bpf_percpu_obj_new(bpf_core_type_id_local(type)))
17 
18 struct bpf_iter_task_vma;
19 
20 extern int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
21 				 struct task_struct *task,
22 				 __u64 addr) __ksym;
23 extern struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) __ksym;
24 extern void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) __ksym;
25 
26 /* Description
27  *	Throw a BPF exception from the program, immediately terminating its
28  *	execution and unwinding the stack. The supplied 'cookie' parameter
29  *	will be the return value of the program when an exception is thrown,
30  *	and the default exception callback is used. Otherwise, if an exception
31  *	callback is set using the '__exception_cb(callback)' declaration tag
32  *	on the main program, the 'cookie' parameter will be the callback's only
33  *	input argument.
34  *
35  *	Thus, in case of default exception callback, 'cookie' is subjected to
36  *	constraints on the program's return value (as with R0 on exit).
37  *	Otherwise, the return value of the marked exception callback will be
38  *	subjected to the same checks.
39  *
40  *	Note that throwing an exception with lingering resources (locks,
41  *	references, etc.) will lead to a verification error.
42  *
43  *	Note that callbacks *cannot* call this helper.
44  * Returns
45  *	Never.
46  * Throws
47  *	An exception with the specified 'cookie' value.
48  */
49 extern void bpf_throw(u64 cookie) __ksym;
50 
51 /* Description
52  *	Acquire a reference on the exe_file member field belonging to the
53  *	mm_struct that is nested within the supplied task_struct. The supplied
54  *	task_struct must be trusted/referenced.
55  * Returns
56  *	A referenced file pointer pointing to the exe_file member field of the
57  *	mm_struct nested in the supplied task_struct, or NULL.
58  */
59 extern struct file *bpf_get_task_exe_file(struct task_struct *task) __ksym;
60 
61 /* Description
62  *	Release a reference on the supplied file. The supplied file must be
63  *	acquired.
64  */
65 extern void bpf_put_file(struct file *file) __ksym;
66 
67 /* Description
68  *	Resolve a pathname for the supplied path and store it in the supplied
69  *	buffer. The supplied path must be trusted/referenced.
70  * Returns
71  *	A positive integer corresponding to the length of the resolved pathname,
72  *	including the NULL termination character, stored in the supplied
73  *	buffer. On error, a negative integer is returned.
74  */
75 extern int bpf_path_d_path(const struct path *path, char *buf, size_t buf__sz) __ksym;
76 
77 /* This macro must be used to mark the exception callback corresponding to the
78  * main program. For example:
79  *
80  * int exception_cb(u64 cookie) {
81  *	return cookie;
82  * }
83  *
84  * SEC("tc")
85  * __exception_cb(exception_cb)
86  * int main_prog(struct __sk_buff *ctx) {
87  *	...
88  *	return TC_ACT_OK;
89  * }
90  *
91  * Here, exception callback for the main program will be 'exception_cb'. Note
92  * that this attribute can only be used once, and multiple exception callbacks
93  * specified for the main program will lead to verification error.
94  */
95 #define __exception_cb(name) __attribute__((btf_decl_tag("exception_callback:" #name)))
96 
97 #define __bpf_assert_signed(x) _Generic((x), \
98     unsigned long: 0,       \
99     unsigned long long: 0,  \
100     signed long: 1,         \
101     signed long long: 1     \
102 )
103 
104 #define __bpf_assert_check(LHS, op, RHS)								 \
105 	_Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression");			 \
106 	_Static_assert(sizeof(LHS) == 8, "Only 8-byte integers are supported\n");			 \
107 	_Static_assert(__builtin_constant_p(__bpf_assert_signed(LHS)), "internal static assert");	 \
108 	_Static_assert(__builtin_constant_p((RHS)), "2nd argument must be a constant expression")
109 
110 #define __bpf_assert(LHS, op, cons, RHS, VAL)							\
111 	({											\
112 		(void)bpf_throw;								\
113 		asm volatile ("if %[lhs] " op " %[rhs] goto +2; r1 = %[value]; call bpf_throw"	\
114 			       : : [lhs] "r"(LHS), [rhs] cons(RHS), [value] "ri"(VAL) : );	\
115 	})
116 
117 #define __bpf_assert_op_sign(LHS, op, cons, RHS, VAL, supp_sign)			\
118 	({										\
119 		__bpf_assert_check(LHS, op, RHS);					\
120 		if (__bpf_assert_signed(LHS) && !(supp_sign))				\
121 			__bpf_assert(LHS, "s" #op, cons, RHS, VAL);			\
122 		else									\
123 			__bpf_assert(LHS, #op, cons, RHS, VAL);				\
124 	 })
125 
126 #define __bpf_assert_op(LHS, op, RHS, VAL, supp_sign)					\
127 	({										\
128 		if (sizeof(typeof(RHS)) == 8) {						\
129 			const typeof(RHS) rhs_var = (RHS);				\
130 			__bpf_assert_op_sign(LHS, op, "r", rhs_var, VAL, supp_sign);	\
131 		} else {								\
132 			__bpf_assert_op_sign(LHS, op, "i", RHS, VAL, supp_sign);	\
133 		}									\
134 	 })
135 
136 #define __cmp_cannot_be_signed(x) \
137 	__builtin_strcmp(#x, "==") == 0 || __builtin_strcmp(#x, "!=") == 0 || \
138 	__builtin_strcmp(#x, "&") == 0
139 
140 #define __is_signed_type(type) (((type)(-1)) < (type)1)
141 
142 #define __bpf_cmp(LHS, OP, PRED, RHS, DEFAULT)						\
143 	({											\
144 		__label__ l_true;								\
145 		bool ret = DEFAULT;								\
146 		asm volatile goto("if %[lhs] " OP " %[rhs] goto %l[l_true]"		\
147 				  :: [lhs] "r"((short)LHS), [rhs] PRED (RHS) :: l_true);	\
148 		ret = !DEFAULT;									\
149 l_true:												\
150 		ret;										\
151        })
152 
153 /* C type conversions coupled with comparison operator are tricky.
154  * Make sure BPF program is compiled with -Wsign-compare then
155  * __lhs OP __rhs below will catch the mistake.
156  * Be aware that we check only __lhs to figure out the sign of compare.
157  */
158 #define _bpf_cmp(LHS, OP, RHS, UNLIKELY)								\
159 	({											\
160 		typeof(LHS) __lhs = (LHS);							\
161 		typeof(RHS) __rhs = (RHS);							\
162 		bool ret;									\
163 		_Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression");	\
164 		(void)(__lhs OP __rhs);								\
165 		if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) {		\
166 			if (sizeof(__rhs) == 8)							\
167 				/* "i" will truncate 64-bit constant into s32,			\
168 				 * so we have to use extra register via "r".			\
169 				 */								\
170 				ret = __bpf_cmp(__lhs, #OP, "r", __rhs, UNLIKELY);		\
171 			else									\
172 				ret = __bpf_cmp(__lhs, #OP, "ri", __rhs, UNLIKELY);		\
173 		} else {									\
174 			if (sizeof(__rhs) == 8)							\
175 				ret = __bpf_cmp(__lhs, "s"#OP, "r", __rhs, UNLIKELY);		\
176 			else									\
177 				ret = __bpf_cmp(__lhs, "s"#OP, "ri", __rhs, UNLIKELY);		\
178 		}										\
179 		ret;										\
180        })
181 
182 #ifndef bpf_cmp_unlikely
183 #define bpf_cmp_unlikely(LHS, OP, RHS) _bpf_cmp(LHS, OP, RHS, true)
184 #endif
185 
186 #ifndef bpf_cmp_likely
187 #define bpf_cmp_likely(LHS, OP, RHS)								\
188 	({											\
189 		bool ret = 0;									\
190 		if (__builtin_strcmp(#OP, "==") == 0)						\
191 			ret = _bpf_cmp(LHS, !=, RHS, false);					\
192 		else if (__builtin_strcmp(#OP, "!=") == 0)					\
193 			ret = _bpf_cmp(LHS, ==, RHS, false);					\
194 		else if (__builtin_strcmp(#OP, "<=") == 0)					\
195 			ret = _bpf_cmp(LHS, >, RHS, false);					\
196 		else if (__builtin_strcmp(#OP, "<") == 0)					\
197 			ret = _bpf_cmp(LHS, >=, RHS, false);					\
198 		else if (__builtin_strcmp(#OP, ">") == 0)					\
199 			ret = _bpf_cmp(LHS, <=, RHS, false);					\
200 		else if (__builtin_strcmp(#OP, ">=") == 0)					\
201 			ret = _bpf_cmp(LHS, <, RHS, false);					\
202 		else										\
203 			asm volatile("r0 " #OP " invalid compare");				\
204 		ret;										\
205        })
206 #endif
207 
208 #ifndef bpf_nop_mov
209 #define bpf_nop_mov(var) \
210 	asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var))
211 #endif
212 
213 /* emit instruction:
214  * rX = rX .off = BPF_ADDR_SPACE_CAST .imm32 = (dst_as << 16) | src_as
215  */
216 #ifndef bpf_addr_space_cast
217 #define bpf_addr_space_cast(var, dst_as, src_as)\
218 	asm volatile(".byte 0xBF;		\
219 		     .ifc %[reg], r0;		\
220 		     .byte 0x00;		\
221 		     .endif;			\
222 		     .ifc %[reg], r1;		\
223 		     .byte 0x11;		\
224 		     .endif;			\
225 		     .ifc %[reg], r2;		\
226 		     .byte 0x22;		\
227 		     .endif;			\
228 		     .ifc %[reg], r3;		\
229 		     .byte 0x33;		\
230 		     .endif;			\
231 		     .ifc %[reg], r4;		\
232 		     .byte 0x44;		\
233 		     .endif;			\
234 		     .ifc %[reg], r5;		\
235 		     .byte 0x55;		\
236 		     .endif;			\
237 		     .ifc %[reg], r6;		\
238 		     .byte 0x66;		\
239 		     .endif;			\
240 		     .ifc %[reg], r7;		\
241 		     .byte 0x77;		\
242 		     .endif;			\
243 		     .ifc %[reg], r8;		\
244 		     .byte 0x88;		\
245 		     .endif;			\
246 		     .ifc %[reg], r9;		\
247 		     .byte 0x99;		\
248 		     .endif;			\
249 		     .short %[off];		\
250 		     .long %[as]"		\
251 		     : [reg]"+r"(var)		\
252 		     : [off]"i"(BPF_ADDR_SPACE_CAST) \
253 		     , [as]"i"((dst_as << 16) | src_as));
254 #endif
255 
256 void bpf_preempt_disable(void) __weak __ksym;
257 void bpf_preempt_enable(void) __weak __ksym;
258 
259 typedef struct {
260 } __bpf_preempt_t;
261 
262 static inline __bpf_preempt_t __bpf_preempt_constructor(void)
263 {
264 	__bpf_preempt_t ret = {};
265 
266 	bpf_preempt_disable();
267 	return ret;
268 }
269 static inline void __bpf_preempt_destructor(__bpf_preempt_t *t)
270 {
271 	bpf_preempt_enable();
272 }
273 #define bpf_guard_preempt() \
274 	__bpf_preempt_t ___bpf_apply(preempt, __COUNTER__)			\
275 	__attribute__((__unused__, __cleanup__(__bpf_preempt_destructor))) =	\
276 	__bpf_preempt_constructor()
277 
278 /* Description
279  *	Assert that a conditional expression is true.
280  * Returns
281  *	Void.
282  * Throws
283  *	An exception with the value zero when the assertion fails.
284  */
285 #define bpf_assert(cond) if (!(cond)) bpf_throw(0);
286 
287 /* Description
288  *	Assert that a conditional expression is true.
289  * Returns
290  *	Void.
291  * Throws
292  *	An exception with the specified value when the assertion fails.
293  */
294 #define bpf_assert_with(cond, value) if (!(cond)) bpf_throw(value);
295 
296 /* Description
297  *	Assert that LHS is in the range [BEG, END] (inclusive of both). This
298  *	statement updates the known bounds of LHS during verification. Note
299  *	that both BEG and END must be constant values, and must fit within the
300  *	data type of LHS.
301  * Returns
302  *	Void.
303  * Throws
304  *	An exception with the value zero when the assertion fails.
305  */
306 #define bpf_assert_range(LHS, BEG, END)					\
307 	({								\
308 		_Static_assert(BEG <= END, "BEG must be <= END");	\
309 		barrier_var(LHS);					\
310 		__bpf_assert_op(LHS, >=, BEG, 0, false);		\
311 		__bpf_assert_op(LHS, <=, END, 0, false);		\
312 	})
313 
314 /* Description
315  *	Assert that LHS is in the range [BEG, END] (inclusive of both). This
316  *	statement updates the known bounds of LHS during verification. Note
317  *	that both BEG and END must be constant values, and must fit within the
318  *	data type of LHS.
319  * Returns
320  *	Void.
321  * Throws
322  *	An exception with the specified value when the assertion fails.
323  */
324 #define bpf_assert_range_with(LHS, BEG, END, value)			\
325 	({								\
326 		_Static_assert(BEG <= END, "BEG must be <= END");	\
327 		barrier_var(LHS);					\
328 		__bpf_assert_op(LHS, >=, BEG, value, false);		\
329 		__bpf_assert_op(LHS, <=, END, value, false);		\
330 	})
331 
332 struct bpf_iter_css_task;
333 struct cgroup_subsys_state;
334 extern int bpf_iter_css_task_new(struct bpf_iter_css_task *it,
335 		struct cgroup_subsys_state *css, unsigned int flags) __weak __ksym;
336 extern struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) __weak __ksym;
337 extern void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) __weak __ksym;
338 
339 struct bpf_iter_task;
340 extern int bpf_iter_task_new(struct bpf_iter_task *it,
341 		struct task_struct *task, unsigned int flags) __weak __ksym;
342 extern struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it) __weak __ksym;
343 extern void bpf_iter_task_destroy(struct bpf_iter_task *it) __weak __ksym;
344 
345 struct bpf_iter_css;
346 extern int bpf_iter_css_new(struct bpf_iter_css *it,
347 				struct cgroup_subsys_state *start, unsigned int flags) __weak __ksym;
348 extern struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it) __weak __ksym;
349 extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym;
350 
351 extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym;
352 extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym;
353 
354 struct bpf_iter_kmem_cache;
355 extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym;
356 extern struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it) __weak __ksym;
357 extern void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it) __weak __ksym;
358 
359 struct bpf_iter_dmabuf;
360 extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym;
361 extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym;
362 extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym;
363 
364 extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str,
365 				 struct bpf_dynptr *value_p) __weak __ksym;
366 
367 #define PREEMPT_BITS	8
368 #define SOFTIRQ_BITS	8
369 #define HARDIRQ_BITS	4
370 #define NMI_BITS	4
371 
372 #define PREEMPT_SHIFT	0
373 #define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
374 #define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)
375 #define NMI_SHIFT	(HARDIRQ_SHIFT + HARDIRQ_BITS)
376 
377 #define __IRQ_MASK(x)	((1UL << (x))-1)
378 
379 #define SOFTIRQ_MASK	(__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
380 #define HARDIRQ_MASK	(__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
381 #define NMI_MASK	(__IRQ_MASK(NMI_BITS)     << NMI_SHIFT)
382 
383 #define SOFTIRQ_OFFSET	(1UL << SOFTIRQ_SHIFT)
384 
385 extern bool CONFIG_PREEMPT_RT __kconfig __weak;
386 #ifdef bpf_target_x86
387 extern const int __preempt_count __ksym __weak;
388 
389 struct pcpu_hot___local {
390 	int preempt_count;
391 } __attribute__((preserve_access_index));
392 
393 extern struct pcpu_hot___local pcpu_hot __ksym __weak;
394 #endif
395 
396 struct task_struct___preempt_rt {
397 	int softirq_disable_cnt;
398 } __attribute__((preserve_access_index));
399 
400 #ifdef bpf_target_s390
401 extern struct lowcore *bpf_get_lowcore(void) __weak __ksym;
402 #endif
403 
404 static inline int get_preempt_count(void)
405 {
406 #if defined(bpf_target_x86)
407 	/* By default, read the per-CPU __preempt_count. */
408 	if (bpf_ksym_exists(&__preempt_count))
409 		return *(int *) bpf_this_cpu_ptr(&__preempt_count);
410 
411 	/*
412 	 * If __preempt_count does not exist, try to read preempt_count under
413 	 * struct pcpu_hot. Between v6.1 and v6.14 -- more specifically,
414 	 * [64701838bf057, 46e8fff6d45fe), preempt_count had been managed
415 	 * under struct pcpu_hot.
416 	 */
417 	if (bpf_core_field_exists(pcpu_hot.preempt_count))
418 		return ((struct pcpu_hot___local *)
419 			bpf_this_cpu_ptr(&pcpu_hot))->preempt_count;
420 #elif defined(bpf_target_arm64)
421 	return bpf_get_current_task_btf()->thread_info.preempt.count;
422 #elif defined(bpf_target_powerpc)
423 	return bpf_get_current_task_btf()->thread_info.preempt_count;
424 #elif defined(bpf_target_s390)
425 	return bpf_get_lowcore()->preempt_count;
426 #endif
427 	return 0;
428 }
429 
430 /* Description
431  *	Report whether it is in interrupt context. Only works on the following archs:
432  *	* x86
433  *	* arm64
434  *	* powerpc64
435  *	* s390x
436  */
437 static inline int bpf_in_interrupt(void)
438 {
439 	struct task_struct___preempt_rt *tsk;
440 	int pcnt;
441 
442 	pcnt = get_preempt_count();
443 	if (!CONFIG_PREEMPT_RT)
444 		return pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK);
445 
446 	tsk = (void *) bpf_get_current_task_btf();
447 	return (pcnt & (NMI_MASK | HARDIRQ_MASK)) |
448 	       (tsk->softirq_disable_cnt & SOFTIRQ_MASK);
449 }
450 
451 /* Description
452  *	Report whether it is in NMI context. Only works on the following archs:
453  *	* x86
454  *	* arm64
455  *	* powerpc64
456  *	* s390x
457  */
458 static inline int bpf_in_nmi(void)
459 {
460 	return get_preempt_count() & NMI_MASK;
461 }
462 
463 /* Description
464  *	Report whether it is in hard IRQ context. Only works on the following archs:
465  *	* x86
466  *	* arm64
467  *	* powerpc64
468  *	* s390x
469  */
470 static inline int bpf_in_hardirq(void)
471 {
472 	return get_preempt_count() & HARDIRQ_MASK;
473 }
474 
475 /* Description
476  *	Report whether it is in softirq context. Only works on the following archs:
477  *	* x86
478  *	* arm64
479  *	* powerpc64
480  *	* s390x
481  */
482 static inline int bpf_in_serving_softirq(void)
483 {
484 	struct task_struct___preempt_rt *tsk;
485 	int pcnt;
486 
487 	pcnt = get_preempt_count();
488 	if (!CONFIG_PREEMPT_RT)
489 		return (pcnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET;
490 
491 	tsk = (void *) bpf_get_current_task_btf();
492 	return (tsk->softirq_disable_cnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET;
493 }
494 
495 /* Description
496  *	Report whether it is in task context. Only works on the following archs:
497  *	* x86
498  *	* arm64
499  *	* powerpc64
500  *	* s390x
501  */
502 static inline int bpf_in_task(void)
503 {
504 	struct task_struct___preempt_rt *tsk;
505 	int pcnt;
506 
507 	pcnt = get_preempt_count();
508 	if (!CONFIG_PREEMPT_RT)
509 		return !(pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
510 
511 	tsk = (void *) bpf_get_current_task_btf();
512 	return !((pcnt & (NMI_MASK | HARDIRQ_MASK)) |
513 		 ((tsk->softirq_disable_cnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET));
514 }
515 #endif
516