1 #ifndef __BPF_EXPERIMENTAL__ 2 #define __BPF_EXPERIMENTAL__ 3 4 #include <vmlinux.h> 5 #include <bpf/bpf_tracing.h> 6 #include <bpf/bpf_helpers.h> 7 #include <bpf/bpf_core_read.h> 8 #include <bpf_may_goto.h> 9 10 #define __contains(name, node) __attribute__((btf_decl_tag("contains:" #name ":" #node))) 11 12 /* Convenience macro to wrap over bpf_obj_new */ 13 #define bpf_obj_new(type) ((type *)bpf_obj_new(bpf_core_type_id_local(type))) 14 15 /* Convenience macro to wrap over bpf_percpu_obj_new */ 16 #define bpf_percpu_obj_new(type) ((type __percpu_kptr *)bpf_percpu_obj_new(bpf_core_type_id_local(type))) 17 18 struct bpf_iter_task_vma; 19 20 extern int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it, 21 struct task_struct *task, 22 __u64 addr) __ksym; 23 extern struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) __ksym; 24 extern void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) __ksym; 25 26 /* Description 27 * Throw a BPF exception from the program, immediately terminating its 28 * execution and unwinding the stack. The supplied 'cookie' parameter 29 * will be the return value of the program when an exception is thrown, 30 * and the default exception callback is used. Otherwise, if an exception 31 * callback is set using the '__exception_cb(callback)' declaration tag 32 * on the main program, the 'cookie' parameter will be the callback's only 33 * input argument. 34 * 35 * Thus, in case of default exception callback, 'cookie' is subjected to 36 * constraints on the program's return value (as with R0 on exit). 37 * Otherwise, the return value of the marked exception callback will be 38 * subjected to the same checks. 39 * 40 * Note that throwing an exception with lingering resources (locks, 41 * references, etc.) will lead to a verification error. 42 * 43 * Note that callbacks *cannot* call this helper. 44 * Returns 45 * Never. 46 * Throws 47 * An exception with the specified 'cookie' value. 48 */ 49 extern void bpf_throw(u64 cookie) __ksym; 50 51 /* Description 52 * Acquire a reference on the exe_file member field belonging to the 53 * mm_struct that is nested within the supplied task_struct. The supplied 54 * task_struct must be trusted/referenced. 55 * Returns 56 * A referenced file pointer pointing to the exe_file member field of the 57 * mm_struct nested in the supplied task_struct, or NULL. 58 */ 59 extern struct file *bpf_get_task_exe_file(struct task_struct *task) __ksym; 60 61 /* Description 62 * Release a reference on the supplied file. The supplied file must be 63 * acquired. 64 */ 65 extern void bpf_put_file(struct file *file) __ksym; 66 67 /* Description 68 * Resolve a pathname for the supplied path and store it in the supplied 69 * buffer. The supplied path must be trusted/referenced. 70 * Returns 71 * A positive integer corresponding to the length of the resolved pathname, 72 * including the NULL termination character, stored in the supplied 73 * buffer. On error, a negative integer is returned. 74 */ 75 extern int bpf_path_d_path(const struct path *path, char *buf, size_t buf__sz) __ksym; 76 77 /* This macro must be used to mark the exception callback corresponding to the 78 * main program. For example: 79 * 80 * int exception_cb(u64 cookie) { 81 * return cookie; 82 * } 83 * 84 * SEC("tc") 85 * __exception_cb(exception_cb) 86 * int main_prog(struct __sk_buff *ctx) { 87 * ... 88 * return TC_ACT_OK; 89 * } 90 * 91 * Here, exception callback for the main program will be 'exception_cb'. Note 92 * that this attribute can only be used once, and multiple exception callbacks 93 * specified for the main program will lead to verification error. 94 */ 95 #define __exception_cb(name) __attribute__((btf_decl_tag("exception_callback:" #name))) 96 97 #define __bpf_assert_signed(x) _Generic((x), \ 98 unsigned long: 0, \ 99 unsigned long long: 0, \ 100 signed long: 1, \ 101 signed long long: 1 \ 102 ) 103 104 #define __bpf_assert_check(LHS, op, RHS) \ 105 _Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression"); \ 106 _Static_assert(sizeof(LHS) == 8, "Only 8-byte integers are supported\n"); \ 107 _Static_assert(__builtin_constant_p(__bpf_assert_signed(LHS)), "internal static assert"); \ 108 _Static_assert(__builtin_constant_p((RHS)), "2nd argument must be a constant expression") 109 110 #define __bpf_assert(LHS, op, cons, RHS, VAL) \ 111 ({ \ 112 (void)bpf_throw; \ 113 asm volatile ("if %[lhs] " op " %[rhs] goto +2; r1 = %[value]; call bpf_throw" \ 114 : : [lhs] "r"(LHS), [rhs] cons(RHS), [value] "ri"(VAL) : ); \ 115 }) 116 117 #define __bpf_assert_op_sign(LHS, op, cons, RHS, VAL, supp_sign) \ 118 ({ \ 119 __bpf_assert_check(LHS, op, RHS); \ 120 if (__bpf_assert_signed(LHS) && !(supp_sign)) \ 121 __bpf_assert(LHS, "s" #op, cons, RHS, VAL); \ 122 else \ 123 __bpf_assert(LHS, #op, cons, RHS, VAL); \ 124 }) 125 126 #define __bpf_assert_op(LHS, op, RHS, VAL, supp_sign) \ 127 ({ \ 128 if (sizeof(typeof(RHS)) == 8) { \ 129 const typeof(RHS) rhs_var = (RHS); \ 130 __bpf_assert_op_sign(LHS, op, "r", rhs_var, VAL, supp_sign); \ 131 } else { \ 132 __bpf_assert_op_sign(LHS, op, "i", RHS, VAL, supp_sign); \ 133 } \ 134 }) 135 136 #define __cmp_cannot_be_signed(x) \ 137 __builtin_strcmp(#x, "==") == 0 || __builtin_strcmp(#x, "!=") == 0 || \ 138 __builtin_strcmp(#x, "&") == 0 139 140 #define __is_signed_type(type) (((type)(-1)) < (type)1) 141 142 #define __bpf_cmp(LHS, OP, PRED, RHS, DEFAULT) \ 143 ({ \ 144 __label__ l_true; \ 145 bool ret = DEFAULT; \ 146 asm volatile goto("if %[lhs] " OP " %[rhs] goto %l[l_true]" \ 147 :: [lhs] "r"((short)LHS), [rhs] PRED (RHS) :: l_true); \ 148 ret = !DEFAULT; \ 149 l_true: \ 150 ret; \ 151 }) 152 153 /* C type conversions coupled with comparison operator are tricky. 154 * Make sure BPF program is compiled with -Wsign-compare then 155 * __lhs OP __rhs below will catch the mistake. 156 * Be aware that we check only __lhs to figure out the sign of compare. 157 */ 158 #define _bpf_cmp(LHS, OP, RHS, UNLIKELY) \ 159 ({ \ 160 typeof(LHS) __lhs = (LHS); \ 161 typeof(RHS) __rhs = (RHS); \ 162 bool ret; \ 163 _Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression"); \ 164 (void)(__lhs OP __rhs); \ 165 if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) { \ 166 if (sizeof(__rhs) == 8) \ 167 /* "i" will truncate 64-bit constant into s32, \ 168 * so we have to use extra register via "r". \ 169 */ \ 170 ret = __bpf_cmp(__lhs, #OP, "r", __rhs, UNLIKELY); \ 171 else \ 172 ret = __bpf_cmp(__lhs, #OP, "ri", __rhs, UNLIKELY); \ 173 } else { \ 174 if (sizeof(__rhs) == 8) \ 175 ret = __bpf_cmp(__lhs, "s"#OP, "r", __rhs, UNLIKELY); \ 176 else \ 177 ret = __bpf_cmp(__lhs, "s"#OP, "ri", __rhs, UNLIKELY); \ 178 } \ 179 ret; \ 180 }) 181 182 #ifndef bpf_cmp_unlikely 183 #define bpf_cmp_unlikely(LHS, OP, RHS) _bpf_cmp(LHS, OP, RHS, true) 184 #endif 185 186 #ifndef bpf_cmp_likely 187 #define bpf_cmp_likely(LHS, OP, RHS) \ 188 ({ \ 189 bool ret = 0; \ 190 if (__builtin_strcmp(#OP, "==") == 0) \ 191 ret = _bpf_cmp(LHS, !=, RHS, false); \ 192 else if (__builtin_strcmp(#OP, "!=") == 0) \ 193 ret = _bpf_cmp(LHS, ==, RHS, false); \ 194 else if (__builtin_strcmp(#OP, "<=") == 0) \ 195 ret = _bpf_cmp(LHS, >, RHS, false); \ 196 else if (__builtin_strcmp(#OP, "<") == 0) \ 197 ret = _bpf_cmp(LHS, >=, RHS, false); \ 198 else if (__builtin_strcmp(#OP, ">") == 0) \ 199 ret = _bpf_cmp(LHS, <=, RHS, false); \ 200 else if (__builtin_strcmp(#OP, ">=") == 0) \ 201 ret = _bpf_cmp(LHS, <, RHS, false); \ 202 else \ 203 asm volatile("r0 " #OP " invalid compare"); \ 204 ret; \ 205 }) 206 #endif 207 208 #ifndef bpf_nop_mov 209 #define bpf_nop_mov(var) \ 210 asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var)) 211 #endif 212 213 /* emit instruction: 214 * rX = rX .off = BPF_ADDR_SPACE_CAST .imm32 = (dst_as << 16) | src_as 215 */ 216 #ifndef bpf_addr_space_cast 217 #define bpf_addr_space_cast(var, dst_as, src_as)\ 218 asm volatile(".byte 0xBF; \ 219 .ifc %[reg], r0; \ 220 .byte 0x00; \ 221 .endif; \ 222 .ifc %[reg], r1; \ 223 .byte 0x11; \ 224 .endif; \ 225 .ifc %[reg], r2; \ 226 .byte 0x22; \ 227 .endif; \ 228 .ifc %[reg], r3; \ 229 .byte 0x33; \ 230 .endif; \ 231 .ifc %[reg], r4; \ 232 .byte 0x44; \ 233 .endif; \ 234 .ifc %[reg], r5; \ 235 .byte 0x55; \ 236 .endif; \ 237 .ifc %[reg], r6; \ 238 .byte 0x66; \ 239 .endif; \ 240 .ifc %[reg], r7; \ 241 .byte 0x77; \ 242 .endif; \ 243 .ifc %[reg], r8; \ 244 .byte 0x88; \ 245 .endif; \ 246 .ifc %[reg], r9; \ 247 .byte 0x99; \ 248 .endif; \ 249 .short %[off]; \ 250 .long %[as]" \ 251 : [reg]"+r"(var) \ 252 : [off]"i"(BPF_ADDR_SPACE_CAST) \ 253 , [as]"i"((dst_as << 16) | src_as)); 254 #endif 255 256 void bpf_preempt_disable(void) __weak __ksym; 257 void bpf_preempt_enable(void) __weak __ksym; 258 259 typedef struct { 260 } __bpf_preempt_t; 261 262 static inline __bpf_preempt_t __bpf_preempt_constructor(void) 263 { 264 __bpf_preempt_t ret = {}; 265 266 bpf_preempt_disable(); 267 return ret; 268 } 269 static inline void __bpf_preempt_destructor(__bpf_preempt_t *t) 270 { 271 bpf_preempt_enable(); 272 } 273 #define bpf_guard_preempt() \ 274 __bpf_preempt_t ___bpf_apply(preempt, __COUNTER__) \ 275 __attribute__((__unused__, __cleanup__(__bpf_preempt_destructor))) = \ 276 __bpf_preempt_constructor() 277 278 /* Description 279 * Assert that a conditional expression is true. 280 * Returns 281 * Void. 282 * Throws 283 * An exception with the value zero when the assertion fails. 284 */ 285 #define bpf_assert(cond) if (!(cond)) bpf_throw(0); 286 287 /* Description 288 * Assert that a conditional expression is true. 289 * Returns 290 * Void. 291 * Throws 292 * An exception with the specified value when the assertion fails. 293 */ 294 #define bpf_assert_with(cond, value) if (!(cond)) bpf_throw(value); 295 296 /* Description 297 * Assert that LHS is in the range [BEG, END] (inclusive of both). This 298 * statement updates the known bounds of LHS during verification. Note 299 * that both BEG and END must be constant values, and must fit within the 300 * data type of LHS. 301 * Returns 302 * Void. 303 * Throws 304 * An exception with the value zero when the assertion fails. 305 */ 306 #define bpf_assert_range(LHS, BEG, END) \ 307 ({ \ 308 _Static_assert(BEG <= END, "BEG must be <= END"); \ 309 barrier_var(LHS); \ 310 __bpf_assert_op(LHS, >=, BEG, 0, false); \ 311 __bpf_assert_op(LHS, <=, END, 0, false); \ 312 }) 313 314 /* Description 315 * Assert that LHS is in the range [BEG, END] (inclusive of both). This 316 * statement updates the known bounds of LHS during verification. Note 317 * that both BEG and END must be constant values, and must fit within the 318 * data type of LHS. 319 * Returns 320 * Void. 321 * Throws 322 * An exception with the specified value when the assertion fails. 323 */ 324 #define bpf_assert_range_with(LHS, BEG, END, value) \ 325 ({ \ 326 _Static_assert(BEG <= END, "BEG must be <= END"); \ 327 barrier_var(LHS); \ 328 __bpf_assert_op(LHS, >=, BEG, value, false); \ 329 __bpf_assert_op(LHS, <=, END, value, false); \ 330 }) 331 332 struct bpf_iter_css_task; 333 struct cgroup_subsys_state; 334 extern int bpf_iter_css_task_new(struct bpf_iter_css_task *it, 335 struct cgroup_subsys_state *css, unsigned int flags) __weak __ksym; 336 extern struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) __weak __ksym; 337 extern void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) __weak __ksym; 338 339 struct bpf_iter_task; 340 extern int bpf_iter_task_new(struct bpf_iter_task *it, 341 struct task_struct *task, unsigned int flags) __weak __ksym; 342 extern struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it) __weak __ksym; 343 extern void bpf_iter_task_destroy(struct bpf_iter_task *it) __weak __ksym; 344 345 struct bpf_iter_css; 346 extern int bpf_iter_css_new(struct bpf_iter_css *it, 347 struct cgroup_subsys_state *start, unsigned int flags) __weak __ksym; 348 extern struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it) __weak __ksym; 349 extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym; 350 351 extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym; 352 extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym; 353 354 struct bpf_iter_kmem_cache; 355 extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym; 356 extern struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it) __weak __ksym; 357 extern void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it) __weak __ksym; 358 359 struct bpf_iter_dmabuf; 360 extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym; 361 extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym; 362 extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym; 363 364 extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str, 365 struct bpf_dynptr *value_p) __weak __ksym; 366 367 #define PREEMPT_BITS 8 368 #define SOFTIRQ_BITS 8 369 #define HARDIRQ_BITS 4 370 #define NMI_BITS 4 371 372 #define PREEMPT_SHIFT 0 373 #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) 374 #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) 375 #define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) 376 377 #define __IRQ_MASK(x) ((1UL << (x))-1) 378 379 #define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) 380 #define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) 381 #define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) 382 383 #define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) 384 385 extern bool CONFIG_PREEMPT_RT __kconfig __weak; 386 #ifdef bpf_target_x86 387 extern const int __preempt_count __ksym __weak; 388 389 struct pcpu_hot___local { 390 int preempt_count; 391 } __attribute__((preserve_access_index)); 392 393 extern struct pcpu_hot___local pcpu_hot __ksym __weak; 394 #endif 395 396 struct task_struct___preempt_rt { 397 int softirq_disable_cnt; 398 } __attribute__((preserve_access_index)); 399 400 #ifdef bpf_target_s390 401 extern struct lowcore *bpf_get_lowcore(void) __weak __ksym; 402 #endif 403 404 static inline int get_preempt_count(void) 405 { 406 #if defined(bpf_target_x86) 407 /* By default, read the per-CPU __preempt_count. */ 408 if (bpf_ksym_exists(&__preempt_count)) 409 return *(int *) bpf_this_cpu_ptr(&__preempt_count); 410 411 /* 412 * If __preempt_count does not exist, try to read preempt_count under 413 * struct pcpu_hot. Between v6.1 and v6.14 -- more specifically, 414 * [64701838bf057, 46e8fff6d45fe), preempt_count had been managed 415 * under struct pcpu_hot. 416 */ 417 if (bpf_core_field_exists(pcpu_hot.preempt_count)) 418 return ((struct pcpu_hot___local *) 419 bpf_this_cpu_ptr(&pcpu_hot))->preempt_count; 420 #elif defined(bpf_target_arm64) 421 return bpf_get_current_task_btf()->thread_info.preempt.count; 422 #elif defined(bpf_target_powerpc) 423 return bpf_get_current_task_btf()->thread_info.preempt_count; 424 #elif defined(bpf_target_s390) 425 return bpf_get_lowcore()->preempt_count; 426 #endif 427 return 0; 428 } 429 430 /* Description 431 * Report whether it is in interrupt context. Only works on the following archs: 432 * * x86 433 * * arm64 434 * * powerpc64 435 * * s390x 436 */ 437 static inline int bpf_in_interrupt(void) 438 { 439 struct task_struct___preempt_rt *tsk; 440 int pcnt; 441 442 pcnt = get_preempt_count(); 443 if (!CONFIG_PREEMPT_RT) 444 return pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK); 445 446 tsk = (void *) bpf_get_current_task_btf(); 447 return (pcnt & (NMI_MASK | HARDIRQ_MASK)) | 448 (tsk->softirq_disable_cnt & SOFTIRQ_MASK); 449 } 450 451 /* Description 452 * Report whether it is in NMI context. Only works on the following archs: 453 * * x86 454 * * arm64 455 * * powerpc64 456 * * s390x 457 */ 458 static inline int bpf_in_nmi(void) 459 { 460 return get_preempt_count() & NMI_MASK; 461 } 462 463 /* Description 464 * Report whether it is in hard IRQ context. Only works on the following archs: 465 * * x86 466 * * arm64 467 * * powerpc64 468 * * s390x 469 */ 470 static inline int bpf_in_hardirq(void) 471 { 472 return get_preempt_count() & HARDIRQ_MASK; 473 } 474 475 /* Description 476 * Report whether it is in softirq context. Only works on the following archs: 477 * * x86 478 * * arm64 479 * * powerpc64 480 * * s390x 481 */ 482 static inline int bpf_in_serving_softirq(void) 483 { 484 struct task_struct___preempt_rt *tsk; 485 int pcnt; 486 487 pcnt = get_preempt_count(); 488 if (!CONFIG_PREEMPT_RT) 489 return (pcnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET; 490 491 tsk = (void *) bpf_get_current_task_btf(); 492 return (tsk->softirq_disable_cnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET; 493 } 494 495 /* Description 496 * Report whether it is in task context. Only works on the following archs: 497 * * x86 498 * * arm64 499 * * powerpc64 500 * * s390x 501 */ 502 static inline int bpf_in_task(void) 503 { 504 struct task_struct___preempt_rt *tsk; 505 int pcnt; 506 507 pcnt = get_preempt_count(); 508 if (!CONFIG_PREEMPT_RT) 509 return !(pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)); 510 511 tsk = (void *) bpf_get_current_task_btf(); 512 return !((pcnt & (NMI_MASK | HARDIRQ_MASK)) | 513 ((tsk->softirq_disable_cnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET)); 514 } 515 #endif 516