1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2020 Facebook */ 3 #include <vmlinux.h> 4 #include <bpf/bpf_core_read.h> 5 #include <bpf/bpf_helpers.h> 6 #include <bpf/bpf_tracing.h> 7 8 #include "profiler.h" 9 #include "err.h" 10 #include "bpf_experimental.h" 11 #include "bpf_compiler.h" 12 13 #ifndef NULL 14 #define NULL 0 15 #endif 16 17 #define O_WRONLY 00000001 18 #define O_RDWR 00000002 19 #define O_DIRECTORY 00200000 20 #define __O_TMPFILE 020000000 21 #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) 22 #define S_IFMT 00170000 23 #define S_IFSOCK 0140000 24 #define S_IFLNK 0120000 25 #define S_IFREG 0100000 26 #define S_IFBLK 0060000 27 #define S_IFDIR 0040000 28 #define S_IFCHR 0020000 29 #define S_IFIFO 0010000 30 #define S_ISUID 0004000 31 #define S_ISGID 0002000 32 #define S_ISVTX 0001000 33 #define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK) 34 #define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR) 35 #define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR) 36 #define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK) 37 #define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO) 38 #define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK) 39 40 #define KILL_DATA_ARRAY_SIZE 8 41 42 struct var_kill_data_arr_t { 43 struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE]; 44 }; 45 46 union any_profiler_data_t { 47 struct var_exec_data_t var_exec; 48 struct var_kill_data_t var_kill; 49 struct var_sysctl_data_t var_sysctl; 50 struct var_filemod_data_t var_filemod; 51 struct var_fork_data_t var_fork; 52 struct var_kill_data_arr_t var_kill_data_arr; 53 }; 54 55 volatile struct profiler_config_struct bpf_config = {}; 56 57 #define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf) 58 #define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode) 59 #define CGROUP_LOGIN_SESSION_INODE \ 60 (bpf_config.cgroup_login_session_inode) 61 #define KILL_SIGNALS (bpf_config.kill_signals_mask) 62 #define STALE_INFO (bpf_config.stale_info_secs) 63 #define INODE_FILTER (bpf_config.inode_filter) 64 #define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec) 65 #define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver) 66 67 struct kernfs_iattrs___52 { 68 struct iattr ia_iattr; 69 }; 70 71 struct kernfs_node___52 { 72 union /* kernfs_node_id */ { 73 struct { 74 u32 ino; 75 u32 generation; 76 }; 77 u64 id; 78 } id; 79 }; 80 81 struct { 82 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 83 __uint(max_entries, 1); 84 __type(key, u32); 85 __type(value, union any_profiler_data_t); 86 } data_heap SEC(".maps"); 87 88 struct { 89 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 90 __uint(key_size, sizeof(int)); 91 __uint(value_size, sizeof(int)); 92 } events SEC(".maps"); 93 94 struct { 95 __uint(type, BPF_MAP_TYPE_HASH); 96 __uint(max_entries, KILL_DATA_ARRAY_SIZE); 97 __type(key, u32); 98 __type(value, struct var_kill_data_arr_t); 99 } var_tpid_to_data SEC(".maps"); 100 101 struct { 102 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 103 __uint(max_entries, profiler_bpf_max_function_id); 104 __type(key, u32); 105 __type(value, struct bpf_func_stats_data); 106 } bpf_func_stats SEC(".maps"); 107 108 struct { 109 __uint(type, BPF_MAP_TYPE_HASH); 110 __type(key, u32); 111 __type(value, bool); 112 __uint(max_entries, 16); 113 } allowed_devices SEC(".maps"); 114 115 struct { 116 __uint(type, BPF_MAP_TYPE_HASH); 117 __type(key, u64); 118 __type(value, bool); 119 __uint(max_entries, 1024); 120 } allowed_file_inodes SEC(".maps"); 121 122 struct { 123 __uint(type, BPF_MAP_TYPE_HASH); 124 __type(key, u64); 125 __type(value, bool); 126 __uint(max_entries, 1024); 127 } allowed_directory_inodes SEC(".maps"); 128 129 struct { 130 __uint(type, BPF_MAP_TYPE_HASH); 131 __type(key, u32); 132 __type(value, bool); 133 __uint(max_entries, 16); 134 } disallowed_exec_inodes SEC(".maps"); 135 136 #ifndef ARRAY_SIZE 137 #define ARRAY_SIZE(arr) (int)(sizeof(arr) / sizeof(arr[0])) 138 #endif 139 140 static INLINE bool IS_ERR(const void* ptr) 141 { 142 return IS_ERR_VALUE((unsigned long)ptr); 143 } 144 145 static INLINE u32 get_userspace_pid() 146 { 147 return bpf_get_current_pid_tgid() >> 32; 148 } 149 150 static INLINE bool is_init_process(u32 tgid) 151 { 152 return tgid == 1 || tgid == 0; 153 } 154 155 static INLINE unsigned long 156 probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max) 157 { 158 len = len < max ? len : max; 159 if (len > 1) { 160 if (bpf_probe_read_kernel(dst, len, src)) 161 return 0; 162 } else if (len == 1) { 163 if (bpf_probe_read_kernel(dst, 1, src)) 164 return 0; 165 } 166 return len; 167 } 168 169 static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct, 170 int spid) 171 { 172 #ifdef UNROLL 173 __pragma_loop_unroll 174 #endif 175 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) 176 if (arr_struct->array[i].meta.pid == spid) 177 return i; 178 return -1; 179 } 180 181 static INLINE void populate_ancestors(struct task_struct* task, 182 struct ancestors_data_t* ancestors_data) 183 { 184 struct task_struct* parent = task; 185 u32 num_ancestors, ppid; 186 187 ancestors_data->num_ancestors = 0; 188 #ifdef UNROLL 189 __pragma_loop_unroll 190 #endif 191 for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) { 192 parent = BPF_CORE_READ(parent, real_parent); 193 if (parent == NULL) 194 break; 195 ppid = BPF_CORE_READ(parent, tgid); 196 if (is_init_process(ppid)) 197 break; 198 ancestors_data->ancestor_pids[num_ancestors] = ppid; 199 ancestors_data->ancestor_exec_ids[num_ancestors] = 200 BPF_CORE_READ(parent, self_exec_id); 201 ancestors_data->ancestor_start_times[num_ancestors] = 202 BPF_CORE_READ(parent, start_time); 203 ancestors_data->num_ancestors = num_ancestors; 204 } 205 } 206 207 static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, 208 struct kernfs_node* cgroup_root_node, 209 void* payload, 210 int* root_pos) 211 { 212 void* payload_start = payload; 213 size_t filepart_length; 214 215 #ifdef UNROLL 216 __pragma_loop_unroll 217 #endif 218 for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { 219 filepart_length = 220 bpf_probe_read_kernel_str(payload, MAX_PATH, 221 BPF_CORE_READ(cgroup_node, name)); 222 if (!cgroup_node) 223 return payload; 224 if (cgroup_node == cgroup_root_node) 225 *root_pos = payload - payload_start; 226 if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) { 227 payload += filepart_length; 228 } 229 cgroup_node = BPF_CORE_READ(cgroup_node, parent); 230 } 231 return payload; 232 } 233 234 static ino_t get_inode_from_kernfs(struct kernfs_node* node) 235 { 236 struct kernfs_node___52* node52 = (void*)node; 237 238 if (bpf_core_field_exists(node52->id.ino)) { 239 barrier_var(node52); 240 return BPF_CORE_READ(node52, id.ino); 241 } else { 242 barrier_var(node); 243 return (u64)BPF_CORE_READ(node, id); 244 } 245 } 246 247 extern bool CONFIG_CGROUP_PIDS __kconfig __weak; 248 enum cgroup_subsys_id___local { 249 pids_cgrp_id___local = 123, /* value doesn't matter */ 250 }; 251 252 static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, 253 struct task_struct* task, 254 void* payload) 255 { 256 struct kernfs_node* root_kernfs = 257 BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn); 258 struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); 259 260 #if __has_builtin(__builtin_preserve_enum_value) 261 if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) { 262 int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local, 263 pids_cgrp_id___local); 264 #ifdef UNROLL 265 __pragma_loop_unroll 266 #endif 267 for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 268 struct cgroup_subsys_state* subsys = 269 BPF_CORE_READ(task, cgroups, subsys[i]); 270 if (subsys != NULL) { 271 int subsys_id = BPF_CORE_READ(subsys, ss, id); 272 if (subsys_id == cgrp_id) { 273 proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn); 274 root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn); 275 break; 276 } 277 } 278 } 279 } 280 #endif 281 282 cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs); 283 cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs); 284 285 if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) { 286 cgroup_data->cgroup_root_mtime = 287 BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec); 288 cgroup_data->cgroup_proc_mtime = 289 BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec); 290 } else { 291 struct kernfs_iattrs___52* root_iattr = 292 (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr); 293 cgroup_data->cgroup_root_mtime = 294 BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec); 295 296 struct kernfs_iattrs___52* proc_iattr = 297 (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr); 298 cgroup_data->cgroup_proc_mtime = 299 BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec); 300 } 301 302 cgroup_data->cgroup_root_length = 0; 303 cgroup_data->cgroup_proc_length = 0; 304 cgroup_data->cgroup_full_length = 0; 305 306 size_t cgroup_root_length = 307 bpf_probe_read_kernel_str(payload, MAX_PATH, 308 BPF_CORE_READ(root_kernfs, name)); 309 if (bpf_cmp_likely(cgroup_root_length, <=, MAX_PATH)) { 310 cgroup_data->cgroup_root_length = cgroup_root_length; 311 payload += cgroup_root_length; 312 } 313 314 size_t cgroup_proc_length = 315 bpf_probe_read_kernel_str(payload, MAX_PATH, 316 BPF_CORE_READ(proc_kernfs, name)); 317 if (bpf_cmp_likely(cgroup_proc_length, <=, MAX_PATH)) { 318 cgroup_data->cgroup_proc_length = cgroup_proc_length; 319 payload += cgroup_proc_length; 320 } 321 322 if (FETCH_CGROUPS_FROM_BPF) { 323 cgroup_data->cgroup_full_path_root_pos = -1; 324 void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload, 325 &cgroup_data->cgroup_full_path_root_pos); 326 cgroup_data->cgroup_full_length = payload_end_pos - payload; 327 payload = payload_end_pos; 328 } 329 330 return (void*)payload; 331 } 332 333 static INLINE void* populate_var_metadata(struct var_metadata_t* metadata, 334 struct task_struct* task, 335 u32 pid, void* payload) 336 { 337 u64 uid_gid = bpf_get_current_uid_gid(); 338 339 metadata->uid = (u32)uid_gid; 340 metadata->gid = uid_gid >> 32; 341 metadata->pid = pid; 342 metadata->exec_id = BPF_CORE_READ(task, self_exec_id); 343 metadata->start_time = BPF_CORE_READ(task, start_time); 344 metadata->comm_length = 0; 345 346 size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); 347 if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) { 348 metadata->comm_length = comm_length; 349 payload += comm_length; 350 } 351 352 return (void*)payload; 353 } 354 355 static INLINE struct var_kill_data_t* 356 get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig) 357 { 358 int zero = 0; 359 struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); 360 361 if (kill_data == NULL) 362 return NULL; 363 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 364 365 void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload); 366 payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload); 367 size_t payload_length = payload - (void*)kill_data->payload; 368 kill_data->payload_length = payload_length; 369 populate_ancestors(task, &kill_data->ancestors_info); 370 kill_data->meta.type = KILL_EVENT; 371 kill_data->kill_target_pid = tpid; 372 kill_data->kill_sig = sig; 373 kill_data->kill_count = 1; 374 kill_data->last_kill_time = bpf_ktime_get_ns(); 375 return kill_data; 376 } 377 378 static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig) 379 { 380 if ((KILL_SIGNALS & (1ULL << sig)) == 0) 381 return 0; 382 383 u32 spid = get_userspace_pid(); 384 struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); 385 386 if (arr_struct == NULL) { 387 struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig); 388 int zero = 0; 389 390 if (kill_data == NULL) 391 return 0; 392 arr_struct = bpf_map_lookup_elem(&data_heap, &zero); 393 if (arr_struct == NULL) 394 return 0; 395 bpf_probe_read_kernel(&arr_struct->array[0], 396 sizeof(arr_struct->array[0]), kill_data); 397 } else { 398 int index = get_var_spid_index(arr_struct, spid); 399 400 if (index == -1) { 401 struct var_kill_data_t* kill_data = 402 get_var_kill_data(ctx, spid, tpid, sig); 403 if (kill_data == NULL) 404 return 0; 405 #ifdef UNROLL 406 __pragma_loop_unroll 407 #endif 408 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) 409 if (arr_struct->array[i].meta.pid == 0) { 410 bpf_probe_read_kernel(&arr_struct->array[i], 411 sizeof(arr_struct->array[i]), 412 kill_data); 413 bpf_map_update_elem(&var_tpid_to_data, &tpid, 414 arr_struct, 0); 415 416 return 0; 417 } 418 return 0; 419 } 420 421 struct var_kill_data_t* kill_data = &arr_struct->array[index]; 422 423 u64 delta_sec = 424 (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000; 425 426 if (delta_sec < STALE_INFO) { 427 kill_data->kill_count++; 428 kill_data->last_kill_time = bpf_ktime_get_ns(); 429 bpf_probe_read_kernel(&arr_struct->array[index], 430 sizeof(arr_struct->array[index]), 431 kill_data); 432 } else { 433 struct var_kill_data_t* kill_data = 434 get_var_kill_data(ctx, spid, tpid, sig); 435 if (kill_data == NULL) 436 return 0; 437 bpf_probe_read_kernel(&arr_struct->array[index], 438 sizeof(arr_struct->array[index]), 439 kill_data); 440 } 441 } 442 bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0); 443 return 0; 444 } 445 446 static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx, 447 enum bpf_function_id func_id) 448 { 449 int func_id_key = func_id; 450 451 bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns(); 452 bpf_stat_ctx->bpf_func_stats_data_val = 453 bpf_map_lookup_elem(&bpf_func_stats, &func_id_key); 454 if (bpf_stat_ctx->bpf_func_stats_data_val) 455 bpf_stat_ctx->bpf_func_stats_data_val->num_executions++; 456 } 457 458 static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx) 459 { 460 if (bpf_stat_ctx->bpf_func_stats_data_val) 461 bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns += 462 bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns; 463 } 464 465 static INLINE void 466 bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx, 467 struct var_metadata_t* meta) 468 { 469 if (bpf_stat_ctx->bpf_func_stats_data_val) { 470 bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++; 471 meta->bpf_stats_num_perf_events = 472 bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events; 473 } 474 meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns; 475 meta->cpu_id = bpf_get_smp_processor_id(); 476 } 477 478 static INLINE size_t 479 read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload) 480 { 481 size_t length = 0; 482 size_t filepart_length; 483 struct dentry* parent_dentry; 484 485 #ifdef UNROLL 486 __pragma_loop_unroll 487 #endif 488 for (int i = 0; i < MAX_PATH_DEPTH; i++) { 489 filepart_length = 490 bpf_probe_read_kernel_str(payload, MAX_PATH, 491 BPF_CORE_READ(filp_dentry, d_name.name)); 492 bpf_nop_mov(filepart_length); 493 if (bpf_cmp_unlikely(filepart_length, >, MAX_PATH)) 494 break; 495 payload += filepart_length; 496 length += filepart_length; 497 498 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); 499 if (filp_dentry == parent_dentry) 500 break; 501 filp_dentry = parent_dentry; 502 } 503 504 return length; 505 } 506 507 static INLINE bool 508 is_ancestor_in_allowed_inodes(struct dentry* filp_dentry) 509 { 510 struct dentry* parent_dentry; 511 #ifdef UNROLL 512 __pragma_loop_unroll 513 #endif 514 for (int i = 0; i < MAX_PATH_DEPTH; i++) { 515 u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino); 516 bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino); 517 518 if (allowed_dir != NULL) 519 return true; 520 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); 521 if (filp_dentry == parent_dentry) 522 break; 523 filp_dentry = parent_dentry; 524 } 525 return false; 526 } 527 528 static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry, 529 u32* device_id, 530 u64* file_ino) 531 { 532 u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev); 533 *device_id = dev_id; 534 bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id); 535 536 if (allowed_device == NULL) 537 return false; 538 539 u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino); 540 *file_ino = ino; 541 bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino); 542 543 if (allowed_file == NULL) 544 if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent))) 545 return false; 546 return true; 547 } 548 549 SEC("kprobe/proc_sys_write") 550 ssize_t BPF_KPROBE(kprobe__proc_sys_write, 551 struct file* filp, const char* buf, 552 size_t count, loff_t* ppos) 553 { 554 struct bpf_func_stats_ctx stats_ctx; 555 bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write); 556 557 u32 pid = get_userspace_pid(); 558 int zero = 0; 559 struct var_sysctl_data_t* sysctl_data = 560 bpf_map_lookup_elem(&data_heap, &zero); 561 if (!sysctl_data) 562 goto out; 563 564 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 565 sysctl_data->meta.type = SYSCTL_EVENT; 566 void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload); 567 payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload); 568 569 populate_ancestors(task, &sysctl_data->ancestors_info); 570 571 sysctl_data->sysctl_val_length = 0; 572 sysctl_data->sysctl_path_length = 0; 573 574 size_t sysctl_val_length = bpf_probe_read_kernel_str(payload, 575 CTL_MAXNAME, buf); 576 if (bpf_cmp_likely(sysctl_val_length, <=, CTL_MAXNAME)) { 577 sysctl_data->sysctl_val_length = sysctl_val_length; 578 payload += sysctl_val_length; 579 } 580 581 size_t sysctl_path_length = 582 bpf_probe_read_kernel_str(payload, MAX_PATH, 583 BPF_CORE_READ(filp, f_path.dentry, 584 d_name.name)); 585 if (bpf_cmp_likely(sysctl_path_length, <=, MAX_PATH)) { 586 sysctl_data->sysctl_path_length = sysctl_path_length; 587 payload += sysctl_path_length; 588 } 589 590 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta); 591 unsigned long data_len = payload - (void*)sysctl_data; 592 data_len = data_len > sizeof(struct var_sysctl_data_t) 593 ? sizeof(struct var_sysctl_data_t) 594 : data_len; 595 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len); 596 out: 597 bpf_stats_exit(&stats_ctx); 598 return 0; 599 } 600 601 SEC("tracepoint/syscalls/sys_enter_kill") 602 int tracepoint__syscalls__sys_enter_kill(struct syscall_trace_enter* ctx) 603 { 604 struct bpf_func_stats_ctx stats_ctx; 605 606 bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill); 607 int pid = ctx->args[0]; 608 int sig = ctx->args[1]; 609 int ret = trace_var_sys_kill(ctx, pid, sig); 610 bpf_stats_exit(&stats_ctx); 611 return ret; 612 }; 613 614 SEC("raw_tracepoint/sched_process_exit") 615 int raw_tracepoint__sched_process_exit(void* ctx) 616 { 617 int zero = 0; 618 struct bpf_func_stats_ctx stats_ctx; 619 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit); 620 621 u32 tpid = get_userspace_pid(); 622 623 struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); 624 struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); 625 626 if (arr_struct == NULL || kill_data == NULL) 627 goto out; 628 629 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 630 struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); 631 632 #ifdef UNROLL 633 __pragma_loop_unroll 634 #endif 635 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) { 636 struct var_kill_data_t* past_kill_data = &arr_struct->array[i]; 637 638 if (past_kill_data != NULL && past_kill_data->kill_target_pid == (pid_t)tpid) { 639 bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data), 640 past_kill_data); 641 void* payload = kill_data->payload; 642 size_t offset = kill_data->payload_length; 643 if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN) 644 return 0; 645 payload += offset; 646 647 kill_data->kill_target_name_length = 0; 648 kill_data->kill_target_cgroup_proc_length = 0; 649 650 size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); 651 if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) { 652 kill_data->kill_target_name_length = comm_length; 653 payload += comm_length; 654 } 655 656 size_t cgroup_proc_length = 657 bpf_probe_read_kernel_str(payload, 658 KILL_TARGET_LEN, 659 BPF_CORE_READ(proc_kernfs, name)); 660 if (bpf_cmp_likely(cgroup_proc_length, <=, KILL_TARGET_LEN)) { 661 kill_data->kill_target_cgroup_proc_length = cgroup_proc_length; 662 payload += cgroup_proc_length; 663 } 664 665 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta); 666 unsigned long data_len = (void*)payload - (void*)kill_data; 667 data_len = data_len > sizeof(struct var_kill_data_t) 668 ? sizeof(struct var_kill_data_t) 669 : data_len; 670 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len); 671 } 672 } 673 bpf_map_delete_elem(&var_tpid_to_data, &tpid); 674 out: 675 bpf_stats_exit(&stats_ctx); 676 return 0; 677 } 678 679 SEC("raw_tracepoint/sched_process_exec") 680 int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx) 681 { 682 struct bpf_func_stats_ctx stats_ctx; 683 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec); 684 685 struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2]; 686 u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino); 687 688 bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode); 689 if (should_filter_binprm != NULL) 690 goto out; 691 692 int zero = 0; 693 struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero); 694 if (!proc_exec_data) 695 goto out; 696 697 if (INODE_FILTER && inode != INODE_FILTER) 698 return 0; 699 700 u32 pid = get_userspace_pid(); 701 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 702 703 proc_exec_data->meta.type = EXEC_EVENT; 704 proc_exec_data->bin_path_length = 0; 705 proc_exec_data->cmdline_length = 0; 706 proc_exec_data->environment_length = 0; 707 void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid, 708 proc_exec_data->payload); 709 payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload); 710 711 struct task_struct* parent_task = BPF_CORE_READ(task, real_parent); 712 proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid); 713 proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val); 714 proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id); 715 proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time); 716 717 const char* filename = BPF_CORE_READ(bprm, filename); 718 size_t bin_path_length = 719 bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename); 720 if (bpf_cmp_likely(bin_path_length, <=, MAX_FILENAME_LEN)) { 721 proc_exec_data->bin_path_length = bin_path_length; 722 payload += bin_path_length; 723 } 724 725 void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start); 726 void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end); 727 unsigned int cmdline_length = probe_read_lim(payload, arg_start, 728 arg_end - arg_start, MAX_ARGS_LEN); 729 730 if (bpf_cmp_likely(cmdline_length, <=, MAX_ARGS_LEN)) { 731 proc_exec_data->cmdline_length = cmdline_length; 732 payload += cmdline_length; 733 } 734 735 if (READ_ENVIRON_FROM_EXEC) { 736 void* env_start = (void*)BPF_CORE_READ(task, mm, env_start); 737 void* env_end = (void*)BPF_CORE_READ(task, mm, env_end); 738 unsigned long env_len = probe_read_lim(payload, env_start, 739 env_end - env_start, MAX_ENVIRON_LEN); 740 if (cmdline_length <= MAX_ENVIRON_LEN) { 741 proc_exec_data->environment_length = env_len; 742 payload += env_len; 743 } 744 } 745 746 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta); 747 unsigned long data_len = payload - (void*)proc_exec_data; 748 data_len = data_len > sizeof(struct var_exec_data_t) 749 ? sizeof(struct var_exec_data_t) 750 : data_len; 751 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len); 752 out: 753 bpf_stats_exit(&stats_ctx); 754 return 0; 755 } 756 757 SEC("kretprobe/do_filp_open") 758 int kprobe_ret__do_filp_open(struct pt_regs* ctx) 759 { 760 struct bpf_func_stats_ctx stats_ctx; 761 bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret); 762 763 struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx); 764 765 if (filp == NULL || IS_ERR(filp)) 766 goto out; 767 unsigned int flags = BPF_CORE_READ(filp, f_flags); 768 if ((flags & (O_RDWR | O_WRONLY)) == 0) 769 goto out; 770 if ((flags & O_TMPFILE) > 0) 771 goto out; 772 struct inode* file_inode = BPF_CORE_READ(filp, f_inode); 773 umode_t mode = BPF_CORE_READ(file_inode, i_mode); 774 if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || 775 S_ISSOCK(mode)) 776 goto out; 777 778 struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry); 779 u32 device_id = 0; 780 u64 file_ino = 0; 781 if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino)) 782 goto out; 783 784 int zero = 0; 785 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); 786 if (!filemod_data) 787 goto out; 788 789 u32 pid = get_userspace_pid(); 790 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 791 792 filemod_data->meta.type = FILEMOD_EVENT; 793 filemod_data->fmod_type = FMOD_OPEN; 794 filemod_data->dst_flags = flags; 795 filemod_data->src_inode = 0; 796 filemod_data->dst_inode = file_ino; 797 filemod_data->src_device_id = 0; 798 filemod_data->dst_device_id = device_id; 799 filemod_data->src_filepath_length = 0; 800 filemod_data->dst_filepath_length = 0; 801 802 void* payload = populate_var_metadata(&filemod_data->meta, task, pid, 803 filemod_data->payload); 804 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); 805 806 size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload); 807 if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { 808 payload += len; 809 filemod_data->dst_filepath_length = len; 810 } 811 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); 812 unsigned long data_len = payload - (void*)filemod_data; 813 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; 814 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); 815 out: 816 bpf_stats_exit(&stats_ctx); 817 return 0; 818 } 819 820 SEC("kprobe/vfs_link") 821 int BPF_KPROBE(kprobe__vfs_link, 822 struct dentry* old_dentry, struct mnt_idmap *idmap, 823 struct inode* dir, struct dentry* new_dentry, 824 struct inode** delegated_inode) 825 { 826 struct bpf_func_stats_ctx stats_ctx; 827 bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link); 828 829 u32 src_device_id = 0; 830 u64 src_file_ino = 0; 831 u32 dst_device_id = 0; 832 u64 dst_file_ino = 0; 833 if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) && 834 !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino)) 835 goto out; 836 837 int zero = 0; 838 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); 839 if (!filemod_data) 840 goto out; 841 842 u32 pid = get_userspace_pid(); 843 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 844 845 filemod_data->meta.type = FILEMOD_EVENT; 846 filemod_data->fmod_type = FMOD_LINK; 847 filemod_data->dst_flags = 0; 848 filemod_data->src_inode = src_file_ino; 849 filemod_data->dst_inode = dst_file_ino; 850 filemod_data->src_device_id = src_device_id; 851 filemod_data->dst_device_id = dst_device_id; 852 filemod_data->src_filepath_length = 0; 853 filemod_data->dst_filepath_length = 0; 854 855 void* payload = populate_var_metadata(&filemod_data->meta, task, pid, 856 filemod_data->payload); 857 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); 858 859 size_t len = read_absolute_file_path_from_dentry(old_dentry, payload); 860 if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { 861 payload += len; 862 filemod_data->src_filepath_length = len; 863 } 864 865 len = read_absolute_file_path_from_dentry(new_dentry, payload); 866 if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { 867 payload += len; 868 filemod_data->dst_filepath_length = len; 869 } 870 871 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); 872 unsigned long data_len = payload - (void*)filemod_data; 873 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; 874 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); 875 out: 876 bpf_stats_exit(&stats_ctx); 877 return 0; 878 } 879 880 SEC("kprobe/vfs_symlink") 881 int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry, 882 const char* oldname) 883 { 884 struct bpf_func_stats_ctx stats_ctx; 885 bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink); 886 887 u32 dst_device_id = 0; 888 u64 dst_file_ino = 0; 889 if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino)) 890 goto out; 891 892 int zero = 0; 893 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); 894 if (!filemod_data) 895 goto out; 896 897 u32 pid = get_userspace_pid(); 898 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 899 900 filemod_data->meta.type = FILEMOD_EVENT; 901 filemod_data->fmod_type = FMOD_SYMLINK; 902 filemod_data->dst_flags = 0; 903 filemod_data->src_inode = 0; 904 filemod_data->dst_inode = dst_file_ino; 905 filemod_data->src_device_id = 0; 906 filemod_data->dst_device_id = dst_device_id; 907 filemod_data->src_filepath_length = 0; 908 filemod_data->dst_filepath_length = 0; 909 910 void* payload = populate_var_metadata(&filemod_data->meta, task, pid, 911 filemod_data->payload); 912 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); 913 914 size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH, 915 oldname); 916 if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { 917 payload += len; 918 filemod_data->src_filepath_length = len; 919 } 920 len = read_absolute_file_path_from_dentry(dentry, payload); 921 if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { 922 payload += len; 923 filemod_data->dst_filepath_length = len; 924 } 925 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); 926 unsigned long data_len = payload - (void*)filemod_data; 927 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; 928 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); 929 out: 930 bpf_stats_exit(&stats_ctx); 931 return 0; 932 } 933 934 SEC("raw_tracepoint/sched_process_fork") 935 int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx) 936 { 937 struct bpf_func_stats_ctx stats_ctx; 938 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork); 939 940 int zero = 0; 941 struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero); 942 if (!fork_data) 943 goto out; 944 945 struct task_struct* parent = (struct task_struct*)ctx->args[0]; 946 struct task_struct* child = (struct task_struct*)ctx->args[1]; 947 fork_data->meta.type = FORK_EVENT; 948 949 void* payload = populate_var_metadata(&fork_data->meta, child, 950 BPF_CORE_READ(child, pid), fork_data->payload); 951 fork_data->parent_pid = BPF_CORE_READ(parent, pid); 952 fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id); 953 fork_data->parent_start_time = BPF_CORE_READ(parent, start_time); 954 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta); 955 956 unsigned long data_len = payload - (void*)fork_data; 957 data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len; 958 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len); 959 out: 960 bpf_stats_exit(&stats_ctx); 961 return 0; 962 } 963 char _license[] SEC("license") = "GPL"; 964