1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2019 Facebook 3 #include <linux/sched.h> 4 #include <linux/ptrace.h> 5 #include <stdint.h> 6 #include <stddef.h> 7 #include <stdbool.h> 8 #include <linux/bpf.h> 9 #include <bpf/bpf_helpers.h> 10 #include "bpf_misc.h" 11 #include "bpf_compiler.h" 12 13 #define FUNCTION_NAME_LEN 64 14 #define FILE_NAME_LEN 128 15 #define TASK_COMM_LEN 16 16 17 typedef struct { 18 int PyThreadState_frame; 19 int PyThreadState_thread; 20 int PyFrameObject_back; 21 int PyFrameObject_code; 22 int PyFrameObject_lineno; 23 int PyCodeObject_filename; 24 int PyCodeObject_name; 25 int String_data; 26 int String_size; 27 } OffsetConfig; 28 29 typedef struct { 30 uintptr_t current_state_addr; 31 uintptr_t tls_key_addr; 32 OffsetConfig offsets; 33 bool use_tls; 34 } PidData; 35 36 typedef struct { 37 uint32_t success; 38 } Stats; 39 40 typedef struct { 41 char name[FUNCTION_NAME_LEN]; 42 char file[FILE_NAME_LEN]; 43 } Symbol; 44 45 typedef struct { 46 uint32_t pid; 47 uint32_t tid; 48 char comm[TASK_COMM_LEN]; 49 int32_t kernel_stack_id; 50 int32_t user_stack_id; 51 bool thread_current; 52 bool pthread_match; 53 bool stack_complete; 54 int16_t stack_len; 55 int32_t stack[STACK_MAX_LEN]; 56 57 int has_meta; 58 int metadata; 59 char dummy_safeguard; 60 } Event; 61 62 63 typedef int pid_t; 64 65 typedef struct { 66 void* f_back; // PyFrameObject.f_back, previous frame 67 void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject 68 void* co_filename; // PyCodeObject.co_filename 69 void* co_name; // PyCodeObject.co_name 70 } FrameData; 71 72 #ifdef SUBPROGS 73 __noinline 74 #else 75 __always_inline 76 #endif 77 static void *get_thread_state(void *tls_base, PidData *pidData) 78 { 79 void* thread_state; 80 int key; 81 82 bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr); 83 bpf_probe_read_user(&thread_state, sizeof(thread_state), 84 tls_base + 0x310 + key * 0x10 + 0x08); 85 return thread_state; 86 } 87 88 static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData, 89 FrameData *frame, Symbol *symbol) 90 { 91 // read data from PyFrameObject 92 bpf_probe_read_user(&frame->f_back, 93 sizeof(frame->f_back), 94 frame_ptr + pidData->offsets.PyFrameObject_back); 95 bpf_probe_read_user(&frame->f_code, 96 sizeof(frame->f_code), 97 frame_ptr + pidData->offsets.PyFrameObject_code); 98 99 // read data from PyCodeObject 100 if (!frame->f_code) 101 return false; 102 bpf_probe_read_user(&frame->co_filename, 103 sizeof(frame->co_filename), 104 frame->f_code + pidData->offsets.PyCodeObject_filename); 105 bpf_probe_read_user(&frame->co_name, 106 sizeof(frame->co_name), 107 frame->f_code + pidData->offsets.PyCodeObject_name); 108 // read actual names into symbol 109 if (frame->co_filename) 110 bpf_probe_read_user_str(&symbol->file, 111 sizeof(symbol->file), 112 frame->co_filename + 113 pidData->offsets.String_data); 114 if (frame->co_name) 115 bpf_probe_read_user_str(&symbol->name, 116 sizeof(symbol->name), 117 frame->co_name + 118 pidData->offsets.String_data); 119 return true; 120 } 121 122 struct { 123 __uint(type, BPF_MAP_TYPE_HASH); 124 __uint(max_entries, 1); 125 __type(key, int); 126 __type(value, PidData); 127 } pidmap SEC(".maps"); 128 129 struct { 130 __uint(type, BPF_MAP_TYPE_HASH); 131 __uint(max_entries, 1); 132 __type(key, int); 133 __type(value, Event); 134 } eventmap SEC(".maps"); 135 136 struct { 137 __uint(type, BPF_MAP_TYPE_HASH); 138 __uint(max_entries, 1); 139 __type(key, Symbol); 140 __type(value, int); 141 } symbolmap SEC(".maps"); 142 143 struct { 144 __uint(type, BPF_MAP_TYPE_ARRAY); 145 __uint(max_entries, 1); 146 __type(key, int); 147 __type(value, Stats); 148 } statsmap SEC(".maps"); 149 150 struct { 151 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 152 __uint(max_entries, 32); 153 __uint(key_size, sizeof(int)); 154 __uint(value_size, sizeof(int)); 155 } perfmap SEC(".maps"); 156 157 struct { 158 __uint(type, BPF_MAP_TYPE_STACK_TRACE); 159 __uint(max_entries, 1000); 160 __uint(key_size, sizeof(int)); 161 __uint(value_size, sizeof(long long) * 127); 162 } stackmap SEC(".maps"); 163 164 #ifdef USE_BPF_LOOP 165 struct process_frame_ctx { 166 int cur_cpu; 167 int32_t *symbol_counter; 168 void *frame_ptr; 169 FrameData *frame; 170 PidData *pidData; 171 Symbol *sym; 172 Event *event; 173 bool done; 174 }; 175 176 static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx) 177 { 178 int zero = 0; 179 void *frame_ptr = ctx->frame_ptr; 180 PidData *pidData = ctx->pidData; 181 FrameData *frame = ctx->frame; 182 int32_t *symbol_counter = ctx->symbol_counter; 183 int cur_cpu = ctx->cur_cpu; 184 Event *event = ctx->event; 185 Symbol *sym = ctx->sym; 186 187 if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) { 188 int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; 189 int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym); 190 191 if (!symbol_id) { 192 bpf_map_update_elem(&symbolmap, sym, &zero, 0); 193 symbol_id = bpf_map_lookup_elem(&symbolmap, sym); 194 if (!symbol_id) { 195 ctx->done = true; 196 return 1; 197 } 198 } 199 if (*symbol_id == new_symbol_id) 200 (*symbol_counter)++; 201 202 barrier_var(i); 203 if (i >= STACK_MAX_LEN) 204 return 1; 205 206 event->stack[i] = *symbol_id; 207 208 event->stack_len = i + 1; 209 frame_ptr = frame->f_back; 210 } 211 return 0; 212 } 213 #endif /* USE_BPF_LOOP */ 214 215 #ifdef GLOBAL_FUNC 216 __noinline 217 #elif defined(SUBPROGS) 218 static __noinline 219 #else 220 static __always_inline 221 #endif 222 int __on_event(struct bpf_raw_tracepoint_args *ctx) 223 { 224 uint64_t pid_tgid = bpf_get_current_pid_tgid(); 225 pid_t pid = (pid_t)(pid_tgid >> 32); 226 PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid); 227 if (!pidData) 228 return 0; 229 230 int zero = 0; 231 Event* event = bpf_map_lookup_elem(&eventmap, &zero); 232 if (!event) 233 return 0; 234 235 event->pid = pid; 236 237 event->tid = (pid_t)pid_tgid; 238 bpf_get_current_comm(&event->comm, sizeof(event->comm)); 239 240 event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK); 241 event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0); 242 243 void* thread_state_current = (void*)0; 244 bpf_probe_read_user(&thread_state_current, 245 sizeof(thread_state_current), 246 (void*)(long)pidData->current_state_addr); 247 248 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 249 void* tls_base = (void*)task; 250 251 void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData) 252 : thread_state_current; 253 event->thread_current = thread_state == thread_state_current; 254 255 if (pidData->use_tls) { 256 uint64_t pthread_created; 257 uint64_t pthread_self; 258 bpf_probe_read_user(&pthread_self, sizeof(pthread_self), 259 tls_base + 0x10); 260 261 bpf_probe_read_user(&pthread_created, 262 sizeof(pthread_created), 263 thread_state + 264 pidData->offsets.PyThreadState_thread); 265 event->pthread_match = pthread_created == pthread_self; 266 } else { 267 event->pthread_match = 1; 268 } 269 270 if (event->pthread_match || !pidData->use_tls) { 271 void* frame_ptr; 272 FrameData frame; 273 Symbol sym = {}; 274 int cur_cpu = bpf_get_smp_processor_id(); 275 276 bpf_probe_read_user(&frame_ptr, 277 sizeof(frame_ptr), 278 thread_state + 279 pidData->offsets.PyThreadState_frame); 280 281 int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); 282 if (symbol_counter == NULL) 283 return 0; 284 #ifdef USE_BPF_LOOP 285 struct process_frame_ctx ctx = { 286 .cur_cpu = cur_cpu, 287 .symbol_counter = symbol_counter, 288 .frame_ptr = frame_ptr, 289 .frame = &frame, 290 .pidData = pidData, 291 .sym = &sym, 292 .event = event, 293 }; 294 295 bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0); 296 if (ctx.done) 297 return 0; 298 #else 299 #if defined(USE_ITER) 300 /* no for loop, no unrolling */ 301 #elif defined(NO_UNROLL) 302 __pragma_loop_no_unroll 303 #elif defined(UNROLL_COUNT) 304 __pragma_loop_unroll_count(UNROLL_COUNT) 305 #else 306 __pragma_loop_unroll_full 307 #endif /* NO_UNROLL */ 308 /* Unwind python stack */ 309 #ifdef USE_ITER 310 int i; 311 bpf_for(i, 0, STACK_MAX_LEN) { 312 #else /* !USE_ITER */ 313 for (int i = 0; i < STACK_MAX_LEN; ++i) { 314 #endif 315 if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { 316 int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; 317 int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); 318 if (!symbol_id) { 319 bpf_map_update_elem(&symbolmap, &sym, &zero, 0); 320 symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); 321 if (!symbol_id) 322 return 0; 323 } 324 if (*symbol_id == new_symbol_id) 325 (*symbol_counter)++; 326 event->stack[i] = *symbol_id; 327 event->stack_len = i + 1; 328 frame_ptr = frame.f_back; 329 } 330 } 331 #endif /* USE_BPF_LOOP */ 332 event->stack_complete = frame_ptr == NULL; 333 } else { 334 event->stack_complete = 1; 335 } 336 337 Stats* stats = bpf_map_lookup_elem(&statsmap, &zero); 338 if (stats) 339 stats->success++; 340 341 event->has_meta = 0; 342 bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata)); 343 return 0; 344 } 345 346 SEC("raw_tracepoint/kfree_skb") 347 int on_event(struct bpf_raw_tracepoint_args* ctx) 348 { 349 int ret = 0; 350 ret |= __on_event(ctx); 351 ret |= __on_event(ctx); 352 ret |= __on_event(ctx); 353 ret |= __on_event(ctx); 354 ret |= __on_event(ctx); 355 return ret; 356 } 357 358 char _license[] SEC("license") = "GPL"; 359