1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2020 Facebook */ 3 #include <vmlinux.h> 4 #include <bpf/bpf_core_read.h> 5 #include <bpf/bpf_helpers.h> 6 #include <bpf/bpf_tracing.h> 7 8 #include "profiler.h" 9 #include "err.h" 10 #include "bpf_experimental.h" 11 #include "bpf_compiler.h" 12 #include "bpf_misc.h" 13 14 #ifndef NULL 15 #define NULL 0 16 #endif 17 18 #define O_WRONLY 00000001 19 #define O_RDWR 00000002 20 #define O_DIRECTORY 00200000 21 #define __O_TMPFILE 020000000 22 #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) 23 #define S_IFMT 00170000 24 #define S_IFSOCK 0140000 25 #define S_IFLNK 0120000 26 #define S_IFREG 0100000 27 #define S_IFBLK 0060000 28 #define S_IFDIR 0040000 29 #define S_IFCHR 0020000 30 #define S_IFIFO 0010000 31 #define S_ISUID 0004000 32 #define S_ISGID 0002000 33 #define S_ISVTX 0001000 34 #define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK) 35 #define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR) 36 #define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR) 37 #define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK) 38 #define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO) 39 #define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK) 40 41 #define KILL_DATA_ARRAY_SIZE 8 42 43 struct var_kill_data_arr_t { 44 struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE]; 45 }; 46 47 union any_profiler_data_t { 48 struct var_exec_data_t var_exec; 49 struct var_kill_data_t var_kill; 50 struct var_sysctl_data_t var_sysctl; 51 struct var_filemod_data_t var_filemod; 52 struct var_fork_data_t var_fork; 53 struct var_kill_data_arr_t var_kill_data_arr; 54 }; 55 56 volatile struct profiler_config_struct bpf_config = {}; 57 58 #define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf) 59 #define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode) 60 #define CGROUP_LOGIN_SESSION_INODE \ 61 (bpf_config.cgroup_login_session_inode) 62 #define KILL_SIGNALS (bpf_config.kill_signals_mask) 63 #define STALE_INFO (bpf_config.stale_info_secs) 64 #define INODE_FILTER (bpf_config.inode_filter) 65 #define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec) 66 #define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver) 67 68 struct kernfs_iattrs___52 { 69 struct iattr ia_iattr; 70 }; 71 72 struct kernfs_node___52 { 73 union /* kernfs_node_id */ { 74 struct { 75 u32 ino; 76 u32 generation; 77 }; 78 u64 id; 79 } id; 80 }; 81 82 struct { 83 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 84 __uint(max_entries, 1); 85 __type(key, u32); 86 __type(value, union any_profiler_data_t); 87 } data_heap SEC(".maps"); 88 89 struct { 90 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 91 __uint(key_size, sizeof(int)); 92 __uint(value_size, sizeof(int)); 93 } events SEC(".maps"); 94 95 struct { 96 __uint(type, BPF_MAP_TYPE_HASH); 97 __uint(max_entries, KILL_DATA_ARRAY_SIZE); 98 __type(key, u32); 99 __type(value, struct var_kill_data_arr_t); 100 } var_tpid_to_data SEC(".maps"); 101 102 struct { 103 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 104 __uint(max_entries, profiler_bpf_max_function_id); 105 __type(key, u32); 106 __type(value, struct bpf_func_stats_data); 107 } bpf_func_stats SEC(".maps"); 108 109 struct { 110 __uint(type, BPF_MAP_TYPE_HASH); 111 __type(key, u32); 112 __type(value, bool); 113 __uint(max_entries, 16); 114 } allowed_devices SEC(".maps"); 115 116 struct { 117 __uint(type, BPF_MAP_TYPE_HASH); 118 __type(key, u64); 119 __type(value, bool); 120 __uint(max_entries, 1024); 121 } allowed_file_inodes SEC(".maps"); 122 123 struct { 124 __uint(type, BPF_MAP_TYPE_HASH); 125 __type(key, u64); 126 __type(value, bool); 127 __uint(max_entries, 1024); 128 } allowed_directory_inodes SEC(".maps"); 129 130 struct { 131 __uint(type, BPF_MAP_TYPE_HASH); 132 __type(key, u32); 133 __type(value, bool); 134 __uint(max_entries, 16); 135 } disallowed_exec_inodes SEC(".maps"); 136 137 static INLINE bool IS_ERR(const void* ptr) 138 { 139 return IS_ERR_VALUE((unsigned long)ptr); 140 } 141 142 static INLINE u32 get_userspace_pid() 143 { 144 return bpf_get_current_pid_tgid() >> 32; 145 } 146 147 static INLINE bool is_init_process(u32 tgid) 148 { 149 return tgid == 1 || tgid == 0; 150 } 151 152 static INLINE unsigned long 153 probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max) 154 { 155 len = len < max ? len : max; 156 if (len > 1) { 157 if (bpf_probe_read_kernel(dst, len, src)) 158 return 0; 159 } else if (len == 1) { 160 if (bpf_probe_read_kernel(dst, 1, src)) 161 return 0; 162 } 163 return len; 164 } 165 166 static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct, 167 int spid) 168 { 169 #ifdef UNROLL 170 __pragma_loop_unroll 171 #endif 172 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) 173 if (arr_struct->array[i].meta.pid == spid) 174 return i; 175 return -1; 176 } 177 178 static INLINE void populate_ancestors(struct task_struct* task, 179 struct ancestors_data_t* ancestors_data) 180 { 181 struct task_struct* parent = task; 182 u32 num_ancestors, ppid; 183 184 ancestors_data->num_ancestors = 0; 185 #ifdef UNROLL 186 __pragma_loop_unroll 187 #endif 188 for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) { 189 parent = BPF_CORE_READ(parent, real_parent); 190 if (parent == NULL) 191 break; 192 ppid = BPF_CORE_READ(parent, tgid); 193 if (is_init_process(ppid)) 194 break; 195 ancestors_data->ancestor_pids[num_ancestors] = ppid; 196 ancestors_data->ancestor_exec_ids[num_ancestors] = 197 BPF_CORE_READ(parent, self_exec_id); 198 ancestors_data->ancestor_start_times[num_ancestors] = 199 BPF_CORE_READ(parent, start_time); 200 ancestors_data->num_ancestors = num_ancestors; 201 } 202 } 203 204 static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, 205 struct kernfs_node* cgroup_root_node, 206 void* payload, 207 int* root_pos) 208 { 209 void* payload_start = payload; 210 size_t filepart_length; 211 212 #ifdef UNROLL 213 __pragma_loop_unroll 214 #endif 215 for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { 216 filepart_length = 217 bpf_probe_read_kernel_str(payload, MAX_PATH, 218 BPF_CORE_READ(cgroup_node, name)); 219 if (!cgroup_node) 220 return payload; 221 if (cgroup_node == cgroup_root_node) 222 *root_pos = payload - payload_start; 223 if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) { 224 payload += filepart_length; 225 } 226 cgroup_node = BPF_CORE_READ(cgroup_node, parent); 227 } 228 return payload; 229 } 230 231 static ino_t get_inode_from_kernfs(struct kernfs_node* node) 232 { 233 struct kernfs_node___52* node52 = (void*)node; 234 235 if (bpf_core_field_exists(node52->id.ino)) { 236 barrier_var(node52); 237 return BPF_CORE_READ(node52, id.ino); 238 } else { 239 barrier_var(node); 240 return (u64)BPF_CORE_READ(node, id); 241 } 242 } 243 244 extern bool CONFIG_CGROUP_PIDS __kconfig __weak; 245 enum cgroup_subsys_id___local { 246 pids_cgrp_id___local = 123, /* value doesn't matter */ 247 }; 248 249 static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, 250 struct task_struct* task, 251 void* payload) 252 { 253 struct kernfs_node* root_kernfs = 254 BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn); 255 struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); 256 257 #if __has_builtin(__builtin_preserve_enum_value) 258 if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) { 259 int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local, 260 pids_cgrp_id___local); 261 #ifdef UNROLL 262 __pragma_loop_unroll 263 #endif 264 for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 265 struct cgroup_subsys_state* subsys = 266 BPF_CORE_READ(task, cgroups, subsys[i]); 267 if (subsys != NULL) { 268 int subsys_id = BPF_CORE_READ(subsys, ss, id); 269 if (subsys_id == cgrp_id) { 270 proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn); 271 root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn); 272 break; 273 } 274 } 275 } 276 } 277 #endif 278 279 cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs); 280 cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs); 281 282 if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) { 283 cgroup_data->cgroup_root_mtime = 284 BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec); 285 cgroup_data->cgroup_proc_mtime = 286 BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec); 287 } else { 288 struct kernfs_iattrs___52* root_iattr = 289 (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr); 290 cgroup_data->cgroup_root_mtime = 291 BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec); 292 293 struct kernfs_iattrs___52* proc_iattr = 294 (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr); 295 cgroup_data->cgroup_proc_mtime = 296 BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec); 297 } 298 299 cgroup_data->cgroup_root_length = 0; 300 cgroup_data->cgroup_proc_length = 0; 301 cgroup_data->cgroup_full_length = 0; 302 303 size_t cgroup_root_length = 304 bpf_probe_read_kernel_str(payload, MAX_PATH, 305 BPF_CORE_READ(root_kernfs, name)); 306 if (bpf_cmp_likely(cgroup_root_length, <=, MAX_PATH)) { 307 cgroup_data->cgroup_root_length = cgroup_root_length; 308 payload += cgroup_root_length; 309 } 310 311 size_t cgroup_proc_length = 312 bpf_probe_read_kernel_str(payload, MAX_PATH, 313 BPF_CORE_READ(proc_kernfs, name)); 314 if (bpf_cmp_likely(cgroup_proc_length, <=, MAX_PATH)) { 315 cgroup_data->cgroup_proc_length = cgroup_proc_length; 316 payload += cgroup_proc_length; 317 } 318 319 if (FETCH_CGROUPS_FROM_BPF) { 320 cgroup_data->cgroup_full_path_root_pos = -1; 321 void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload, 322 &cgroup_data->cgroup_full_path_root_pos); 323 cgroup_data->cgroup_full_length = payload_end_pos - payload; 324 payload = payload_end_pos; 325 } 326 327 return (void*)payload; 328 } 329 330 static INLINE void* populate_var_metadata(struct var_metadata_t* metadata, 331 struct task_struct* task, 332 u32 pid, void* payload) 333 { 334 u64 uid_gid = bpf_get_current_uid_gid(); 335 336 metadata->uid = (u32)uid_gid; 337 metadata->gid = uid_gid >> 32; 338 metadata->pid = pid; 339 metadata->exec_id = BPF_CORE_READ(task, self_exec_id); 340 metadata->start_time = BPF_CORE_READ(task, start_time); 341 metadata->comm_length = 0; 342 343 size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); 344 if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) { 345 metadata->comm_length = comm_length; 346 payload += comm_length; 347 } 348 349 return (void*)payload; 350 } 351 352 static INLINE struct var_kill_data_t* 353 get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig) 354 { 355 int zero = 0; 356 struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); 357 358 if (kill_data == NULL) 359 return NULL; 360 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 361 362 void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload); 363 payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload); 364 size_t payload_length = payload - (void*)kill_data->payload; 365 kill_data->payload_length = payload_length; 366 populate_ancestors(task, &kill_data->ancestors_info); 367 kill_data->meta.type = KILL_EVENT; 368 kill_data->kill_target_pid = tpid; 369 kill_data->kill_sig = sig; 370 kill_data->kill_count = 1; 371 kill_data->last_kill_time = bpf_ktime_get_ns(); 372 return kill_data; 373 } 374 375 static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig) 376 { 377 if ((KILL_SIGNALS & (1ULL << sig)) == 0) 378 return 0; 379 380 u32 spid = get_userspace_pid(); 381 struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); 382 383 if (arr_struct == NULL) { 384 struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig); 385 int zero = 0; 386 387 if (kill_data == NULL) 388 return 0; 389 arr_struct = bpf_map_lookup_elem(&data_heap, &zero); 390 if (arr_struct == NULL) 391 return 0; 392 bpf_probe_read_kernel(&arr_struct->array[0], 393 sizeof(arr_struct->array[0]), kill_data); 394 } else { 395 int index = get_var_spid_index(arr_struct, spid); 396 397 if (index == -1) { 398 struct var_kill_data_t* kill_data = 399 get_var_kill_data(ctx, spid, tpid, sig); 400 if (kill_data == NULL) 401 return 0; 402 #ifdef UNROLL 403 __pragma_loop_unroll 404 #endif 405 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) 406 if (arr_struct->array[i].meta.pid == 0) { 407 bpf_probe_read_kernel(&arr_struct->array[i], 408 sizeof(arr_struct->array[i]), 409 kill_data); 410 bpf_map_update_elem(&var_tpid_to_data, &tpid, 411 arr_struct, 0); 412 413 return 0; 414 } 415 return 0; 416 } 417 418 struct var_kill_data_t* kill_data = &arr_struct->array[index]; 419 420 u64 delta_sec = 421 (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000; 422 423 if (delta_sec < STALE_INFO) { 424 kill_data->kill_count++; 425 kill_data->last_kill_time = bpf_ktime_get_ns(); 426 bpf_probe_read_kernel(&arr_struct->array[index], 427 sizeof(arr_struct->array[index]), 428 kill_data); 429 } else { 430 struct var_kill_data_t* kill_data = 431 get_var_kill_data(ctx, spid, tpid, sig); 432 if (kill_data == NULL) 433 return 0; 434 bpf_probe_read_kernel(&arr_struct->array[index], 435 sizeof(arr_struct->array[index]), 436 kill_data); 437 } 438 } 439 bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0); 440 return 0; 441 } 442 443 static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx, 444 enum bpf_function_id func_id) 445 { 446 int func_id_key = func_id; 447 448 bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns(); 449 bpf_stat_ctx->bpf_func_stats_data_val = 450 bpf_map_lookup_elem(&bpf_func_stats, &func_id_key); 451 if (bpf_stat_ctx->bpf_func_stats_data_val) 452 bpf_stat_ctx->bpf_func_stats_data_val->num_executions++; 453 } 454 455 static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx) 456 { 457 if (bpf_stat_ctx->bpf_func_stats_data_val) 458 bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns += 459 bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns; 460 } 461 462 static INLINE void 463 bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx, 464 struct var_metadata_t* meta) 465 { 466 if (bpf_stat_ctx->bpf_func_stats_data_val) { 467 bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++; 468 meta->bpf_stats_num_perf_events = 469 bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events; 470 } 471 meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns; 472 meta->cpu_id = bpf_get_smp_processor_id(); 473 } 474 475 static INLINE size_t 476 read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload) 477 { 478 size_t length = 0; 479 size_t filepart_length; 480 struct dentry* parent_dentry; 481 482 #ifdef UNROLL 483 __pragma_loop_unroll 484 #endif 485 for (int i = 0; i < MAX_PATH_DEPTH; i++) { 486 filepart_length = 487 bpf_probe_read_kernel_str(payload, MAX_PATH, 488 BPF_CORE_READ(filp_dentry, d_name.name)); 489 bpf_nop_mov(filepart_length); 490 if (bpf_cmp_unlikely(filepart_length, >, MAX_PATH)) 491 break; 492 payload += filepart_length; 493 length += filepart_length; 494 495 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); 496 if (filp_dentry == parent_dentry) 497 break; 498 filp_dentry = parent_dentry; 499 } 500 501 return length; 502 } 503 504 static INLINE bool 505 is_ancestor_in_allowed_inodes(struct dentry* filp_dentry) 506 { 507 struct dentry* parent_dentry; 508 #ifdef UNROLL 509 __pragma_loop_unroll 510 #endif 511 for (int i = 0; i < MAX_PATH_DEPTH; i++) { 512 u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino); 513 bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino); 514 515 if (allowed_dir != NULL) 516 return true; 517 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); 518 if (filp_dentry == parent_dentry) 519 break; 520 filp_dentry = parent_dentry; 521 } 522 return false; 523 } 524 525 static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry, 526 u32* device_id, 527 u64* file_ino) 528 { 529 u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev); 530 *device_id = dev_id; 531 bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id); 532 533 if (allowed_device == NULL) 534 return false; 535 536 u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino); 537 *file_ino = ino; 538 bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino); 539 540 if (allowed_file == NULL) 541 if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent))) 542 return false; 543 return true; 544 } 545 546 SEC("kprobe/proc_sys_write") 547 ssize_t BPF_KPROBE(kprobe__proc_sys_write, 548 struct file* filp, const char* buf, 549 size_t count, loff_t* ppos) 550 { 551 struct bpf_func_stats_ctx stats_ctx; 552 bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write); 553 554 u32 pid = get_userspace_pid(); 555 int zero = 0; 556 struct var_sysctl_data_t* sysctl_data = 557 bpf_map_lookup_elem(&data_heap, &zero); 558 if (!sysctl_data) 559 goto out; 560 561 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 562 sysctl_data->meta.type = SYSCTL_EVENT; 563 void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload); 564 payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload); 565 566 populate_ancestors(task, &sysctl_data->ancestors_info); 567 568 sysctl_data->sysctl_val_length = 0; 569 sysctl_data->sysctl_path_length = 0; 570 571 size_t sysctl_val_length = bpf_probe_read_kernel_str(payload, 572 CTL_MAXNAME, buf); 573 if (bpf_cmp_likely(sysctl_val_length, <=, CTL_MAXNAME)) { 574 sysctl_data->sysctl_val_length = sysctl_val_length; 575 payload += sysctl_val_length; 576 } 577 578 size_t sysctl_path_length = 579 bpf_probe_read_kernel_str(payload, MAX_PATH, 580 BPF_CORE_READ(filp, f_path.dentry, 581 d_name.name)); 582 if (bpf_cmp_likely(sysctl_path_length, <=, MAX_PATH)) { 583 sysctl_data->sysctl_path_length = sysctl_path_length; 584 payload += sysctl_path_length; 585 } 586 587 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta); 588 unsigned long data_len = payload - (void*)sysctl_data; 589 data_len = data_len > sizeof(struct var_sysctl_data_t) 590 ? sizeof(struct var_sysctl_data_t) 591 : data_len; 592 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len); 593 out: 594 bpf_stats_exit(&stats_ctx); 595 return 0; 596 } 597 598 SEC("tracepoint/syscalls/sys_enter_kill") 599 int tracepoint__syscalls__sys_enter_kill(struct syscall_trace_enter* ctx) 600 { 601 struct bpf_func_stats_ctx stats_ctx; 602 603 bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill); 604 int pid = ctx->args[0]; 605 int sig = ctx->args[1]; 606 int ret = trace_var_sys_kill(ctx, pid, sig); 607 bpf_stats_exit(&stats_ctx); 608 return ret; 609 }; 610 611 SEC("raw_tracepoint/sched_process_exit") 612 int raw_tracepoint__sched_process_exit(void* ctx) 613 { 614 int zero = 0; 615 struct bpf_func_stats_ctx stats_ctx; 616 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit); 617 618 u32 tpid = get_userspace_pid(); 619 620 struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); 621 struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); 622 623 if (arr_struct == NULL || kill_data == NULL) 624 goto out; 625 626 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 627 struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); 628 629 #ifdef UNROLL 630 __pragma_loop_unroll 631 #endif 632 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) { 633 struct var_kill_data_t* past_kill_data = &arr_struct->array[i]; 634 635 if (past_kill_data != NULL && past_kill_data->kill_target_pid == (pid_t)tpid) { 636 bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data), 637 past_kill_data); 638 void* payload = kill_data->payload; 639 size_t offset = kill_data->payload_length; 640 if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN) 641 return 0; 642 payload += offset; 643 644 kill_data->kill_target_name_length = 0; 645 kill_data->kill_target_cgroup_proc_length = 0; 646 647 size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); 648 if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) { 649 kill_data->kill_target_name_length = comm_length; 650 payload += comm_length; 651 } 652 653 size_t cgroup_proc_length = 654 bpf_probe_read_kernel_str(payload, 655 KILL_TARGET_LEN, 656 BPF_CORE_READ(proc_kernfs, name)); 657 if (bpf_cmp_likely(cgroup_proc_length, <=, KILL_TARGET_LEN)) { 658 kill_data->kill_target_cgroup_proc_length = cgroup_proc_length; 659 payload += cgroup_proc_length; 660 } 661 662 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta); 663 unsigned long data_len = (void*)payload - (void*)kill_data; 664 data_len = data_len > sizeof(struct var_kill_data_t) 665 ? sizeof(struct var_kill_data_t) 666 : data_len; 667 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len); 668 } 669 } 670 bpf_map_delete_elem(&var_tpid_to_data, &tpid); 671 out: 672 bpf_stats_exit(&stats_ctx); 673 return 0; 674 } 675 676 SEC("raw_tracepoint/sched_process_exec") 677 int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx) 678 { 679 struct bpf_func_stats_ctx stats_ctx; 680 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec); 681 682 struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2]; 683 u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino); 684 685 bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode); 686 if (should_filter_binprm != NULL) 687 goto out; 688 689 int zero = 0; 690 struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero); 691 if (!proc_exec_data) 692 goto out; 693 694 if (INODE_FILTER && inode != INODE_FILTER) 695 return 0; 696 697 u32 pid = get_userspace_pid(); 698 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 699 700 proc_exec_data->meta.type = EXEC_EVENT; 701 proc_exec_data->bin_path_length = 0; 702 proc_exec_data->cmdline_length = 0; 703 proc_exec_data->environment_length = 0; 704 void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid, 705 proc_exec_data->payload); 706 payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload); 707 708 struct task_struct* parent_task = BPF_CORE_READ(task, real_parent); 709 proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid); 710 proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val); 711 proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id); 712 proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time); 713 714 const char* filename = BPF_CORE_READ(bprm, filename); 715 size_t bin_path_length = 716 bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename); 717 if (bpf_cmp_likely(bin_path_length, <=, MAX_FILENAME_LEN)) { 718 proc_exec_data->bin_path_length = bin_path_length; 719 payload += bin_path_length; 720 } 721 722 void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start); 723 void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end); 724 unsigned int cmdline_length = probe_read_lim(payload, arg_start, 725 arg_end - arg_start, MAX_ARGS_LEN); 726 727 if (bpf_cmp_likely(cmdline_length, <=, MAX_ARGS_LEN)) { 728 proc_exec_data->cmdline_length = cmdline_length; 729 payload += cmdline_length; 730 } 731 732 if (READ_ENVIRON_FROM_EXEC) { 733 void* env_start = (void*)BPF_CORE_READ(task, mm, env_start); 734 void* env_end = (void*)BPF_CORE_READ(task, mm, env_end); 735 unsigned long env_len = probe_read_lim(payload, env_start, 736 env_end - env_start, MAX_ENVIRON_LEN); 737 if (cmdline_length <= MAX_ENVIRON_LEN) { 738 proc_exec_data->environment_length = env_len; 739 payload += env_len; 740 } 741 } 742 743 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta); 744 unsigned long data_len = payload - (void*)proc_exec_data; 745 data_len = data_len > sizeof(struct var_exec_data_t) 746 ? sizeof(struct var_exec_data_t) 747 : data_len; 748 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len); 749 out: 750 bpf_stats_exit(&stats_ctx); 751 return 0; 752 } 753 754 SEC("kretprobe/do_filp_open") 755 int kprobe_ret__do_filp_open(struct pt_regs* ctx) 756 { 757 struct bpf_func_stats_ctx stats_ctx; 758 bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret); 759 760 struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx); 761 762 if (filp == NULL || IS_ERR(filp)) 763 goto out; 764 unsigned int flags = BPF_CORE_READ(filp, f_flags); 765 if ((flags & (O_RDWR | O_WRONLY)) == 0) 766 goto out; 767 if ((flags & O_TMPFILE) > 0) 768 goto out; 769 struct inode* file_inode = BPF_CORE_READ(filp, f_inode); 770 umode_t mode = BPF_CORE_READ(file_inode, i_mode); 771 if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || 772 S_ISSOCK(mode)) 773 goto out; 774 775 struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry); 776 u32 device_id = 0; 777 u64 file_ino = 0; 778 if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino)) 779 goto out; 780 781 int zero = 0; 782 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); 783 if (!filemod_data) 784 goto out; 785 786 u32 pid = get_userspace_pid(); 787 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 788 789 filemod_data->meta.type = FILEMOD_EVENT; 790 filemod_data->fmod_type = FMOD_OPEN; 791 filemod_data->dst_flags = flags; 792 filemod_data->src_inode = 0; 793 filemod_data->dst_inode = file_ino; 794 filemod_data->src_device_id = 0; 795 filemod_data->dst_device_id = device_id; 796 filemod_data->src_filepath_length = 0; 797 filemod_data->dst_filepath_length = 0; 798 799 void* payload = populate_var_metadata(&filemod_data->meta, task, pid, 800 filemod_data->payload); 801 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); 802 803 size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload); 804 if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { 805 payload += len; 806 filemod_data->dst_filepath_length = len; 807 } 808 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); 809 unsigned long data_len = payload - (void*)filemod_data; 810 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; 811 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); 812 out: 813 bpf_stats_exit(&stats_ctx); 814 return 0; 815 } 816 817 SEC("kprobe/vfs_link") 818 int BPF_KPROBE(kprobe__vfs_link, 819 struct dentry* old_dentry, struct mnt_idmap *idmap, 820 struct inode* dir, struct dentry* new_dentry, 821 struct inode** delegated_inode) 822 { 823 struct bpf_func_stats_ctx stats_ctx; 824 bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link); 825 826 u32 src_device_id = 0; 827 u64 src_file_ino = 0; 828 u32 dst_device_id = 0; 829 u64 dst_file_ino = 0; 830 if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) && 831 !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino)) 832 goto out; 833 834 int zero = 0; 835 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); 836 if (!filemod_data) 837 goto out; 838 839 u32 pid = get_userspace_pid(); 840 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 841 842 filemod_data->meta.type = FILEMOD_EVENT; 843 filemod_data->fmod_type = FMOD_LINK; 844 filemod_data->dst_flags = 0; 845 filemod_data->src_inode = src_file_ino; 846 filemod_data->dst_inode = dst_file_ino; 847 filemod_data->src_device_id = src_device_id; 848 filemod_data->dst_device_id = dst_device_id; 849 filemod_data->src_filepath_length = 0; 850 filemod_data->dst_filepath_length = 0; 851 852 void* payload = populate_var_metadata(&filemod_data->meta, task, pid, 853 filemod_data->payload); 854 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); 855 856 size_t len = read_absolute_file_path_from_dentry(old_dentry, payload); 857 if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { 858 payload += len; 859 filemod_data->src_filepath_length = len; 860 } 861 862 len = read_absolute_file_path_from_dentry(new_dentry, payload); 863 if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { 864 payload += len; 865 filemod_data->dst_filepath_length = len; 866 } 867 868 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); 869 unsigned long data_len = payload - (void*)filemod_data; 870 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; 871 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); 872 out: 873 bpf_stats_exit(&stats_ctx); 874 return 0; 875 } 876 877 SEC("kprobe/vfs_symlink") 878 int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry, 879 const char* oldname) 880 { 881 struct bpf_func_stats_ctx stats_ctx; 882 bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink); 883 884 u32 dst_device_id = 0; 885 u64 dst_file_ino = 0; 886 if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino)) 887 goto out; 888 889 int zero = 0; 890 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); 891 if (!filemod_data) 892 goto out; 893 894 u32 pid = get_userspace_pid(); 895 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 896 897 filemod_data->meta.type = FILEMOD_EVENT; 898 filemod_data->fmod_type = FMOD_SYMLINK; 899 filemod_data->dst_flags = 0; 900 filemod_data->src_inode = 0; 901 filemod_data->dst_inode = dst_file_ino; 902 filemod_data->src_device_id = 0; 903 filemod_data->dst_device_id = dst_device_id; 904 filemod_data->src_filepath_length = 0; 905 filemod_data->dst_filepath_length = 0; 906 907 void* payload = populate_var_metadata(&filemod_data->meta, task, pid, 908 filemod_data->payload); 909 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); 910 911 size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH, 912 oldname); 913 if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { 914 payload += len; 915 filemod_data->src_filepath_length = len; 916 } 917 len = read_absolute_file_path_from_dentry(dentry, payload); 918 if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { 919 payload += len; 920 filemod_data->dst_filepath_length = len; 921 } 922 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); 923 unsigned long data_len = payload - (void*)filemod_data; 924 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; 925 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); 926 out: 927 bpf_stats_exit(&stats_ctx); 928 return 0; 929 } 930 931 SEC("raw_tracepoint/sched_process_fork") 932 int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx) 933 { 934 struct bpf_func_stats_ctx stats_ctx; 935 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork); 936 937 int zero = 0; 938 struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero); 939 if (!fork_data) 940 goto out; 941 942 struct task_struct* parent = (struct task_struct*)ctx->args[0]; 943 struct task_struct* child = (struct task_struct*)ctx->args[1]; 944 fork_data->meta.type = FORK_EVENT; 945 946 void* payload = populate_var_metadata(&fork_data->meta, child, 947 BPF_CORE_READ(child, pid), fork_data->payload); 948 fork_data->parent_pid = BPF_CORE_READ(parent, pid); 949 fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id); 950 fork_data->parent_start_time = BPF_CORE_READ(parent, start_time); 951 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta); 952 953 unsigned long data_len = payload - (void*)fork_data; 954 data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len; 955 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len); 956 out: 957 bpf_stats_exit(&stats_ctx); 958 return 0; 959 } 960 char _license[] SEC("license") = "GPL"; 961