1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2019 Facebook 3 #include <linux/sched.h> 4 #include <linux/ptrace.h> 5 #include <stdint.h> 6 #include <stddef.h> 7 #include <stdbool.h> 8 #include <linux/bpf.h> 9 #include <bpf/bpf_helpers.h> 10 #include "bpf_misc.h" 11 12 #define FUNCTION_NAME_LEN 64 13 #define FILE_NAME_LEN 128 14 #define TASK_COMM_LEN 16 15 16 typedef struct { 17 int PyThreadState_frame; 18 int PyThreadState_thread; 19 int PyFrameObject_back; 20 int PyFrameObject_code; 21 int PyFrameObject_lineno; 22 int PyCodeObject_filename; 23 int PyCodeObject_name; 24 int String_data; 25 int String_size; 26 } OffsetConfig; 27 28 typedef struct { 29 uintptr_t current_state_addr; 30 uintptr_t tls_key_addr; 31 OffsetConfig offsets; 32 bool use_tls; 33 } PidData; 34 35 typedef struct { 36 uint32_t success; 37 } Stats; 38 39 typedef struct { 40 char name[FUNCTION_NAME_LEN]; 41 char file[FILE_NAME_LEN]; 42 } Symbol; 43 44 typedef struct { 45 uint32_t pid; 46 uint32_t tid; 47 char comm[TASK_COMM_LEN]; 48 int32_t kernel_stack_id; 49 int32_t user_stack_id; 50 bool thread_current; 51 bool pthread_match; 52 bool stack_complete; 53 int16_t stack_len; 54 int32_t stack[STACK_MAX_LEN]; 55 56 int has_meta; 57 int metadata; 58 char dummy_safeguard; 59 } Event; 60 61 62 typedef int pid_t; 63 64 typedef struct { 65 void* f_back; // PyFrameObject.f_back, previous frame 66 void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject 67 void* co_filename; // PyCodeObject.co_filename 68 void* co_name; // PyCodeObject.co_name 69 } FrameData; 70 71 #ifdef SUBPROGS 72 __noinline 73 #else 74 __always_inline 75 #endif 76 static void *get_thread_state(void *tls_base, PidData *pidData) 77 { 78 void* thread_state; 79 int key; 80 81 bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr); 82 bpf_probe_read_user(&thread_state, sizeof(thread_state), 83 tls_base + 0x310 + key * 0x10 + 0x08); 84 return thread_state; 85 } 86 87 static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData, 88 FrameData *frame, Symbol *symbol) 89 { 90 // read data from PyFrameObject 91 bpf_probe_read_user(&frame->f_back, 92 sizeof(frame->f_back), 93 frame_ptr + pidData->offsets.PyFrameObject_back); 94 bpf_probe_read_user(&frame->f_code, 95 sizeof(frame->f_code), 96 frame_ptr + pidData->offsets.PyFrameObject_code); 97 98 // read data from PyCodeObject 99 if (!frame->f_code) 100 return false; 101 bpf_probe_read_user(&frame->co_filename, 102 sizeof(frame->co_filename), 103 frame->f_code + pidData->offsets.PyCodeObject_filename); 104 bpf_probe_read_user(&frame->co_name, 105 sizeof(frame->co_name), 106 frame->f_code + pidData->offsets.PyCodeObject_name); 107 // read actual names into symbol 108 if (frame->co_filename) 109 bpf_probe_read_user_str(&symbol->file, 110 sizeof(symbol->file), 111 frame->co_filename + 112 pidData->offsets.String_data); 113 if (frame->co_name) 114 bpf_probe_read_user_str(&symbol->name, 115 sizeof(symbol->name), 116 frame->co_name + 117 pidData->offsets.String_data); 118 return true; 119 } 120 121 struct { 122 __uint(type, BPF_MAP_TYPE_HASH); 123 __uint(max_entries, 1); 124 __type(key, int); 125 __type(value, PidData); 126 } pidmap SEC(".maps"); 127 128 struct { 129 __uint(type, BPF_MAP_TYPE_HASH); 130 __uint(max_entries, 1); 131 __type(key, int); 132 __type(value, Event); 133 } eventmap SEC(".maps"); 134 135 struct { 136 __uint(type, BPF_MAP_TYPE_HASH); 137 __uint(max_entries, 1); 138 __type(key, Symbol); 139 __type(value, int); 140 } symbolmap SEC(".maps"); 141 142 struct { 143 __uint(type, BPF_MAP_TYPE_ARRAY); 144 __uint(max_entries, 1); 145 __type(key, int); 146 __type(value, Stats); 147 } statsmap SEC(".maps"); 148 149 struct { 150 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 151 __uint(max_entries, 32); 152 __uint(key_size, sizeof(int)); 153 __uint(value_size, sizeof(int)); 154 } perfmap SEC(".maps"); 155 156 struct { 157 __uint(type, BPF_MAP_TYPE_STACK_TRACE); 158 __uint(max_entries, 1000); 159 __uint(key_size, sizeof(int)); 160 __uint(value_size, sizeof(long long) * 127); 161 } stackmap SEC(".maps"); 162 163 #ifdef USE_BPF_LOOP 164 struct process_frame_ctx { 165 int cur_cpu; 166 int32_t *symbol_counter; 167 void *frame_ptr; 168 FrameData *frame; 169 PidData *pidData; 170 Symbol *sym; 171 Event *event; 172 bool done; 173 }; 174 175 static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx) 176 { 177 int zero = 0; 178 void *frame_ptr = ctx->frame_ptr; 179 PidData *pidData = ctx->pidData; 180 FrameData *frame = ctx->frame; 181 int32_t *symbol_counter = ctx->symbol_counter; 182 int cur_cpu = ctx->cur_cpu; 183 Event *event = ctx->event; 184 Symbol *sym = ctx->sym; 185 186 if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) { 187 int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; 188 int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym); 189 190 if (!symbol_id) { 191 bpf_map_update_elem(&symbolmap, sym, &zero, 0); 192 symbol_id = bpf_map_lookup_elem(&symbolmap, sym); 193 if (!symbol_id) { 194 ctx->done = true; 195 return 1; 196 } 197 } 198 if (*symbol_id == new_symbol_id) 199 (*symbol_counter)++; 200 201 barrier_var(i); 202 if (i >= STACK_MAX_LEN) 203 return 1; 204 205 event->stack[i] = *symbol_id; 206 207 event->stack_len = i + 1; 208 frame_ptr = frame->f_back; 209 } 210 return 0; 211 } 212 #endif /* USE_BPF_LOOP */ 213 214 #ifdef GLOBAL_FUNC 215 __noinline 216 #elif defined(SUBPROGS) 217 static __noinline 218 #else 219 static __always_inline 220 #endif 221 int __on_event(struct bpf_raw_tracepoint_args *ctx) 222 { 223 uint64_t pid_tgid = bpf_get_current_pid_tgid(); 224 pid_t pid = (pid_t)(pid_tgid >> 32); 225 PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid); 226 if (!pidData) 227 return 0; 228 229 int zero = 0; 230 Event* event = bpf_map_lookup_elem(&eventmap, &zero); 231 if (!event) 232 return 0; 233 234 event->pid = pid; 235 236 event->tid = (pid_t)pid_tgid; 237 bpf_get_current_comm(&event->comm, sizeof(event->comm)); 238 239 event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK); 240 event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0); 241 242 void* thread_state_current = (void*)0; 243 bpf_probe_read_user(&thread_state_current, 244 sizeof(thread_state_current), 245 (void*)(long)pidData->current_state_addr); 246 247 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 248 void* tls_base = (void*)task; 249 250 void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData) 251 : thread_state_current; 252 event->thread_current = thread_state == thread_state_current; 253 254 if (pidData->use_tls) { 255 uint64_t pthread_created; 256 uint64_t pthread_self; 257 bpf_probe_read_user(&pthread_self, sizeof(pthread_self), 258 tls_base + 0x10); 259 260 bpf_probe_read_user(&pthread_created, 261 sizeof(pthread_created), 262 thread_state + 263 pidData->offsets.PyThreadState_thread); 264 event->pthread_match = pthread_created == pthread_self; 265 } else { 266 event->pthread_match = 1; 267 } 268 269 if (event->pthread_match || !pidData->use_tls) { 270 void* frame_ptr; 271 FrameData frame; 272 Symbol sym = {}; 273 int cur_cpu = bpf_get_smp_processor_id(); 274 275 bpf_probe_read_user(&frame_ptr, 276 sizeof(frame_ptr), 277 thread_state + 278 pidData->offsets.PyThreadState_frame); 279 280 int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); 281 if (symbol_counter == NULL) 282 return 0; 283 #ifdef USE_BPF_LOOP 284 struct process_frame_ctx ctx = { 285 .cur_cpu = cur_cpu, 286 .symbol_counter = symbol_counter, 287 .frame_ptr = frame_ptr, 288 .frame = &frame, 289 .pidData = pidData, 290 .sym = &sym, 291 .event = event, 292 }; 293 294 bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0); 295 if (ctx.done) 296 return 0; 297 #else 298 #if defined(USE_ITER) 299 /* no for loop, no unrolling */ 300 #elif defined(NO_UNROLL) 301 #pragma clang loop unroll(disable) 302 #elif defined(UNROLL_COUNT) 303 #pragma clang loop unroll_count(UNROLL_COUNT) 304 #else 305 #pragma clang loop unroll(full) 306 #endif /* NO_UNROLL */ 307 /* Unwind python stack */ 308 #ifdef USE_ITER 309 int i; 310 bpf_for(i, 0, STACK_MAX_LEN) { 311 #else /* !USE_ITER */ 312 for (int i = 0; i < STACK_MAX_LEN; ++i) { 313 #endif 314 if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { 315 int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; 316 int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); 317 if (!symbol_id) { 318 bpf_map_update_elem(&symbolmap, &sym, &zero, 0); 319 symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); 320 if (!symbol_id) 321 return 0; 322 } 323 if (*symbol_id == new_symbol_id) 324 (*symbol_counter)++; 325 event->stack[i] = *symbol_id; 326 event->stack_len = i + 1; 327 frame_ptr = frame.f_back; 328 } 329 } 330 #endif /* USE_BPF_LOOP */ 331 event->stack_complete = frame_ptr == NULL; 332 } else { 333 event->stack_complete = 1; 334 } 335 336 Stats* stats = bpf_map_lookup_elem(&statsmap, &zero); 337 if (stats) 338 stats->success++; 339 340 event->has_meta = 0; 341 bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata)); 342 return 0; 343 } 344 345 SEC("raw_tracepoint/kfree_skb") 346 int on_event(struct bpf_raw_tracepoint_args* ctx) 347 { 348 int ret = 0; 349 ret |= __on_event(ctx); 350 ret |= __on_event(ctx); 351 ret |= __on_event(ctx); 352 ret |= __on_event(ctx); 353 ret |= __on_event(ctx); 354 return ret; 355 } 356 357 char _license[] SEC("license") = "GPL"; 358