1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Copyright (c) 2022 Meta Platforms, Inc. and affiliates. 4 * Copyright (c) 2022 Tejun Heo <tj@kernel.org> 5 * Copyright (c) 2022 David Vernet <dvernet@meta.com> 6 */ 7 #ifndef __SCX_COMMON_BPF_H 8 #define __SCX_COMMON_BPF_H 9 10 #include "vmlinux.h" 11 #include <bpf/bpf_helpers.h> 12 #include <bpf/bpf_tracing.h> 13 #include <asm-generic/errno.h> 14 #include "user_exit_info.h" 15 16 #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ 17 #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ 18 #define PF_EXITING 0x00000004 19 #define CLOCK_MONOTONIC 1 20 21 /* 22 * Earlier versions of clang/pahole lost upper 32bits in 64bit enums which can 23 * lead to really confusing misbehaviors. Let's trigger a build failure. 24 */ 25 static inline void ___vmlinux_h_sanity_check___(void) 26 { 27 _Static_assert(SCX_DSQ_FLAG_BUILTIN, 28 "bpftool generated vmlinux.h is missing high bits for 64bit enums, upgrade clang and pahole"); 29 } 30 31 s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym; 32 s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym; 33 void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym; 34 void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym; 35 u32 scx_bpf_dispatch_nr_slots(void) __ksym; 36 void scx_bpf_dispatch_cancel(void) __ksym; 37 bool scx_bpf_consume(u64 dsq_id) __ksym; 38 u32 scx_bpf_reenqueue_local(void) __ksym; 39 void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym; 40 s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym; 41 void scx_bpf_destroy_dsq(u64 dsq_id) __ksym; 42 void scx_bpf_exit_bstr(s64 exit_code, char *fmt, unsigned long long *data, u32 data__sz) __ksym __weak; 43 void scx_bpf_error_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym; 44 void scx_bpf_dump_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym __weak; 45 u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym __weak; 46 u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym __weak; 47 void scx_bpf_cpuperf_set(s32 cpu, u32 perf) __ksym __weak; 48 u32 scx_bpf_nr_cpu_ids(void) __ksym __weak; 49 const struct cpumask *scx_bpf_get_possible_cpumask(void) __ksym __weak; 50 const struct cpumask *scx_bpf_get_online_cpumask(void) __ksym __weak; 51 void scx_bpf_put_cpumask(const struct cpumask *cpumask) __ksym __weak; 52 const struct cpumask *scx_bpf_get_idle_cpumask(void) __ksym; 53 const struct cpumask *scx_bpf_get_idle_smtmask(void) __ksym; 54 void scx_bpf_put_idle_cpumask(const struct cpumask *cpumask) __ksym; 55 bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) __ksym; 56 s32 scx_bpf_pick_idle_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; 57 s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; 58 bool scx_bpf_task_running(const struct task_struct *p) __ksym; 59 s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym; 60 61 static inline __attribute__((format(printf, 1, 2))) 62 void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {} 63 64 /* 65 * Helper macro for initializing the fmt and variadic argument inputs to both 66 * bstr exit kfuncs. Callers to this function should use ___fmt and ___param to 67 * refer to the initialized list of inputs to the bstr kfunc. 68 */ 69 #define scx_bpf_bstr_preamble(fmt, args...) \ 70 static char ___fmt[] = fmt; \ 71 /* \ 72 * Note that __param[] must have at least one \ 73 * element to keep the verifier happy. \ 74 */ \ 75 unsigned long long ___param[___bpf_narg(args) ?: 1] = {}; \ 76 \ 77 _Pragma("GCC diagnostic push") \ 78 _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 79 ___bpf_fill(___param, args); \ 80 _Pragma("GCC diagnostic pop") \ 81 82 /* 83 * scx_bpf_exit() wraps the scx_bpf_exit_bstr() kfunc with variadic arguments 84 * instead of an array of u64. Using this macro will cause the scheduler to 85 * exit cleanly with the specified exit code being passed to user space. 86 */ 87 #define scx_bpf_exit(code, fmt, args...) \ 88 ({ \ 89 scx_bpf_bstr_preamble(fmt, args) \ 90 scx_bpf_exit_bstr(code, ___fmt, ___param, sizeof(___param)); \ 91 ___scx_bpf_bstr_format_checker(fmt, ##args); \ 92 }) 93 94 /* 95 * scx_bpf_error() wraps the scx_bpf_error_bstr() kfunc with variadic arguments 96 * instead of an array of u64. Invoking this macro will cause the scheduler to 97 * exit in an erroneous state, with diagnostic information being passed to the 98 * user. 99 */ 100 #define scx_bpf_error(fmt, args...) \ 101 ({ \ 102 scx_bpf_bstr_preamble(fmt, args) \ 103 scx_bpf_error_bstr(___fmt, ___param, sizeof(___param)); \ 104 ___scx_bpf_bstr_format_checker(fmt, ##args); \ 105 }) 106 107 /* 108 * scx_bpf_dump() wraps the scx_bpf_dump_bstr() kfunc with variadic arguments 109 * instead of an array of u64. To be used from ops.dump() and friends. 110 */ 111 #define scx_bpf_dump(fmt, args...) \ 112 ({ \ 113 scx_bpf_bstr_preamble(fmt, args) \ 114 scx_bpf_dump_bstr(___fmt, ___param, sizeof(___param)); \ 115 ___scx_bpf_bstr_format_checker(fmt, ##args); \ 116 }) 117 118 #define BPF_STRUCT_OPS(name, args...) \ 119 SEC("struct_ops/"#name) \ 120 BPF_PROG(name, ##args) 121 122 #define BPF_STRUCT_OPS_SLEEPABLE(name, args...) \ 123 SEC("struct_ops.s/"#name) \ 124 BPF_PROG(name, ##args) 125 126 /** 127 * RESIZABLE_ARRAY - Generates annotations for an array that may be resized 128 * @elfsec: the data section of the BPF program in which to place the array 129 * @arr: the name of the array 130 * 131 * libbpf has an API for setting map value sizes. Since data sections (i.e. 132 * bss, data, rodata) themselves are maps, a data section can be resized. If 133 * a data section has an array as its last element, the BTF info for that 134 * array will be adjusted so that length of the array is extended to meet the 135 * new length of the data section. This macro annotates an array to have an 136 * element count of one with the assumption that this array can be resized 137 * within the userspace program. It also annotates the section specifier so 138 * this array exists in a custom sub data section which can be resized 139 * independently. 140 * 141 * See RESIZE_ARRAY() for the userspace convenience macro for resizing an 142 * array declared with RESIZABLE_ARRAY(). 143 */ 144 #define RESIZABLE_ARRAY(elfsec, arr) arr[1] SEC("."#elfsec"."#arr) 145 146 /** 147 * MEMBER_VPTR - Obtain the verified pointer to a struct or array member 148 * @base: struct or array to index 149 * @member: dereferenced member (e.g. .field, [idx0][idx1], .field[idx0] ...) 150 * 151 * The verifier often gets confused by the instruction sequence the compiler 152 * generates for indexing struct fields or arrays. This macro forces the 153 * compiler to generate a code sequence which first calculates the byte offset, 154 * checks it against the struct or array size and add that byte offset to 155 * generate the pointer to the member to help the verifier. 156 * 157 * Ideally, we want to abort if the calculated offset is out-of-bounds. However, 158 * BPF currently doesn't support abort, so evaluate to %NULL instead. The caller 159 * must check for %NULL and take appropriate action to appease the verifier. To 160 * avoid confusing the verifier, it's best to check for %NULL and dereference 161 * immediately. 162 * 163 * vptr = MEMBER_VPTR(my_array, [i][j]); 164 * if (!vptr) 165 * return error; 166 * *vptr = new_value; 167 * 168 * sizeof(@base) should encompass the memory area to be accessed and thus can't 169 * be a pointer to the area. Use `MEMBER_VPTR(*ptr, .member)` instead of 170 * `MEMBER_VPTR(ptr, ->member)`. 171 */ 172 #define MEMBER_VPTR(base, member) (typeof((base) member) *) \ 173 ({ \ 174 u64 __base = (u64)&(base); \ 175 u64 __addr = (u64)&((base) member) - __base; \ 176 _Static_assert(sizeof(base) >= sizeof((base) member), \ 177 "@base is smaller than @member, is @base a pointer?"); \ 178 asm volatile ( \ 179 "if %0 <= %[max] goto +2\n" \ 180 "%0 = 0\n" \ 181 "goto +1\n" \ 182 "%0 += %1\n" \ 183 : "+r"(__addr) \ 184 : "r"(__base), \ 185 [max]"i"(sizeof(base) - sizeof((base) member))); \ 186 __addr; \ 187 }) 188 189 /** 190 * ARRAY_ELEM_PTR - Obtain the verified pointer to an array element 191 * @arr: array to index into 192 * @i: array index 193 * @n: number of elements in array 194 * 195 * Similar to MEMBER_VPTR() but is intended for use with arrays where the 196 * element count needs to be explicit. 197 * It can be used in cases where a global array is defined with an initial 198 * size but is intended to be be resized before loading the BPF program. 199 * Without this version of the macro, MEMBER_VPTR() will use the compile time 200 * size of the array to compute the max, which will result in rejection by 201 * the verifier. 202 */ 203 #define ARRAY_ELEM_PTR(arr, i, n) (typeof(arr[i]) *) \ 204 ({ \ 205 u64 __base = (u64)arr; \ 206 u64 __addr = (u64)&(arr[i]) - __base; \ 207 asm volatile ( \ 208 "if %0 <= %[max] goto +2\n" \ 209 "%0 = 0\n" \ 210 "goto +1\n" \ 211 "%0 += %1\n" \ 212 : "+r"(__addr) \ 213 : "r"(__base), \ 214 [max]"r"(sizeof(arr[0]) * ((n) - 1))); \ 215 __addr; \ 216 }) 217 218 219 /* 220 * BPF declarations and helpers 221 */ 222 223 /* list and rbtree */ 224 #define __contains(name, node) __attribute__((btf_decl_tag("contains:" #name ":" #node))) 225 #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) 226 227 void *bpf_obj_new_impl(__u64 local_type_id, void *meta) __ksym; 228 void bpf_obj_drop_impl(void *kptr, void *meta) __ksym; 229 230 #define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL)) 231 #define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL) 232 233 void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; 234 void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; 235 struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym; 236 struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym; 237 struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, 238 struct bpf_rb_node *node) __ksym; 239 int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node, 240 bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b), 241 void *meta, __u64 off) __ksym; 242 #define bpf_rbtree_add(head, node, less) bpf_rbtree_add_impl(head, node, less, NULL, 0) 243 244 struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym; 245 246 void *bpf_refcount_acquire_impl(void *kptr, void *meta) __ksym; 247 #define bpf_refcount_acquire(kptr) bpf_refcount_acquire_impl(kptr, NULL) 248 249 /* task */ 250 struct task_struct *bpf_task_from_pid(s32 pid) __ksym; 251 struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym; 252 void bpf_task_release(struct task_struct *p) __ksym; 253 254 /* cgroup */ 255 struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym; 256 void bpf_cgroup_release(struct cgroup *cgrp) __ksym; 257 struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym; 258 259 /* css iteration */ 260 struct bpf_iter_css; 261 struct cgroup_subsys_state; 262 extern int bpf_iter_css_new(struct bpf_iter_css *it, 263 struct cgroup_subsys_state *start, 264 unsigned int flags) __weak __ksym; 265 extern struct cgroup_subsys_state * 266 bpf_iter_css_next(struct bpf_iter_css *it) __weak __ksym; 267 extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym; 268 269 /* cpumask */ 270 struct bpf_cpumask *bpf_cpumask_create(void) __ksym; 271 struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym; 272 void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym; 273 u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym; 274 u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym; 275 void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; 276 void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; 277 bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym; 278 bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; 279 bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; 280 void bpf_cpumask_setall(struct bpf_cpumask *cpumask) __ksym; 281 void bpf_cpumask_clear(struct bpf_cpumask *cpumask) __ksym; 282 bool bpf_cpumask_and(struct bpf_cpumask *dst, const struct cpumask *src1, 283 const struct cpumask *src2) __ksym; 284 void bpf_cpumask_or(struct bpf_cpumask *dst, const struct cpumask *src1, 285 const struct cpumask *src2) __ksym; 286 void bpf_cpumask_xor(struct bpf_cpumask *dst, const struct cpumask *src1, 287 const struct cpumask *src2) __ksym; 288 bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) __ksym; 289 bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) __ksym; 290 bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) __ksym; 291 bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym; 292 bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym; 293 void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym; 294 u32 bpf_cpumask_any_distribute(const struct cpumask *cpumask) __ksym; 295 u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, 296 const struct cpumask *src2) __ksym; 297 298 /* rcu */ 299 void bpf_rcu_read_lock(void) __ksym; 300 void bpf_rcu_read_unlock(void) __ksym; 301 302 303 /* 304 * Other helpers 305 */ 306 307 /* useful compiler attributes */ 308 #define likely(x) __builtin_expect(!!(x), 1) 309 #define unlikely(x) __builtin_expect(!!(x), 0) 310 #define __maybe_unused __attribute__((__unused__)) 311 312 /* 313 * READ/WRITE_ONCE() are from kernel (include/asm-generic/rwonce.h). They 314 * prevent compiler from caching, redoing or reordering reads or writes. 315 */ 316 typedef __u8 __attribute__((__may_alias__)) __u8_alias_t; 317 typedef __u16 __attribute__((__may_alias__)) __u16_alias_t; 318 typedef __u32 __attribute__((__may_alias__)) __u32_alias_t; 319 typedef __u64 __attribute__((__may_alias__)) __u64_alias_t; 320 321 static __always_inline void __read_once_size(const volatile void *p, void *res, int size) 322 { 323 switch (size) { 324 case 1: *(__u8_alias_t *) res = *(volatile __u8_alias_t *) p; break; 325 case 2: *(__u16_alias_t *) res = *(volatile __u16_alias_t *) p; break; 326 case 4: *(__u32_alias_t *) res = *(volatile __u32_alias_t *) p; break; 327 case 8: *(__u64_alias_t *) res = *(volatile __u64_alias_t *) p; break; 328 default: 329 barrier(); 330 __builtin_memcpy((void *)res, (const void *)p, size); 331 barrier(); 332 } 333 } 334 335 static __always_inline void __write_once_size(volatile void *p, void *res, int size) 336 { 337 switch (size) { 338 case 1: *(volatile __u8_alias_t *) p = *(__u8_alias_t *) res; break; 339 case 2: *(volatile __u16_alias_t *) p = *(__u16_alias_t *) res; break; 340 case 4: *(volatile __u32_alias_t *) p = *(__u32_alias_t *) res; break; 341 case 8: *(volatile __u64_alias_t *) p = *(__u64_alias_t *) res; break; 342 default: 343 barrier(); 344 __builtin_memcpy((void *)p, (const void *)res, size); 345 barrier(); 346 } 347 } 348 349 #define READ_ONCE(x) \ 350 ({ \ 351 union { typeof(x) __val; char __c[1]; } __u = \ 352 { .__c = { 0 } }; \ 353 __read_once_size(&(x), __u.__c, sizeof(x)); \ 354 __u.__val; \ 355 }) 356 357 #define WRITE_ONCE(x, val) \ 358 ({ \ 359 union { typeof(x) __val; char __c[1]; } __u = \ 360 { .__val = (val) }; \ 361 __write_once_size(&(x), __u.__c, sizeof(x)); \ 362 __u.__val; \ 363 }) 364 365 /* 366 * log2_u32 - Compute the base 2 logarithm of a 32-bit exponential value. 367 * @v: The value for which we're computing the base 2 logarithm. 368 */ 369 static inline u32 log2_u32(u32 v) 370 { 371 u32 r; 372 u32 shift; 373 374 r = (v > 0xFFFF) << 4; v >>= r; 375 shift = (v > 0xFF) << 3; v >>= shift; r |= shift; 376 shift = (v > 0xF) << 2; v >>= shift; r |= shift; 377 shift = (v > 0x3) << 1; v >>= shift; r |= shift; 378 r |= (v >> 1); 379 return r; 380 } 381 382 /* 383 * log2_u64 - Compute the base 2 logarithm of a 64-bit exponential value. 384 * @v: The value for which we're computing the base 2 logarithm. 385 */ 386 static inline u32 log2_u64(u64 v) 387 { 388 u32 hi = v >> 32; 389 if (hi) 390 return log2_u32(hi) + 32 + 1; 391 else 392 return log2_u32(v) + 1; 393 } 394 395 #include "compat.bpf.h" 396 397 #endif /* __SCX_COMMON_BPF_H */ 398