1 // SPDX-License-Identifier: GPL-2.0 2 #include <trace/syscall.h> 3 #include <trace/events/syscalls.h> 4 #include <linux/syscalls.h> 5 #include <linux/slab.h> 6 #include <linux/kernel.h> 7 #include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */ 8 #include <linux/ftrace.h> 9 #include <linux/perf_event.h> 10 #include <linux/xarray.h> 11 #include <asm/syscall.h> 12 13 #include "trace_output.h" 14 #include "trace.h" 15 16 static DEFINE_MUTEX(syscall_trace_lock); 17 18 static int syscall_enter_register(struct trace_event_call *event, 19 enum trace_reg type, void *data); 20 static int syscall_exit_register(struct trace_event_call *event, 21 enum trace_reg type, void *data); 22 23 static struct list_head * 24 syscall_get_enter_fields(struct trace_event_call *call) 25 { 26 struct syscall_metadata *entry = call->data; 27 28 return &entry->enter_fields; 29 } 30 31 extern struct syscall_metadata *__start_syscalls_metadata[]; 32 extern struct syscall_metadata *__stop_syscalls_metadata[]; 33 34 static DEFINE_XARRAY(syscalls_metadata_sparse); 35 static struct syscall_metadata **syscalls_metadata; 36 37 #ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME 38 static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) 39 { 40 /* 41 * Only compare after the "sys" prefix. Archs that use 42 * syscall wrappers may have syscalls symbols aliases prefixed 43 * with ".SyS" or ".sys" instead of "sys", leading to an unwanted 44 * mismatch. 45 */ 46 return !strcmp(sym + 3, name + 3); 47 } 48 #endif 49 50 #ifdef ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 51 /* 52 * Some architectures that allow for 32bit applications 53 * to run on a 64bit kernel, do not map the syscalls for 54 * the 32bit tasks the same as they do for 64bit tasks. 55 * 56 * *cough*x86*cough* 57 * 58 * In such a case, instead of reporting the wrong syscalls, 59 * simply ignore them. 60 * 61 * For an arch to ignore the compat syscalls it needs to 62 * define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as 63 * define the function arch_trace_is_compat_syscall() to let 64 * the tracing system know that it should ignore it. 65 */ 66 static int 67 trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs) 68 { 69 if (unlikely(arch_trace_is_compat_syscall(regs))) 70 return -1; 71 72 return syscall_get_nr(task, regs); 73 } 74 #else 75 static inline int 76 trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs) 77 { 78 return syscall_get_nr(task, regs); 79 } 80 #endif /* ARCH_TRACE_IGNORE_COMPAT_SYSCALLS */ 81 82 static __init struct syscall_metadata * 83 find_syscall_meta(unsigned long syscall) 84 { 85 struct syscall_metadata **start; 86 struct syscall_metadata **stop; 87 char str[KSYM_SYMBOL_LEN]; 88 89 90 start = __start_syscalls_metadata; 91 stop = __stop_syscalls_metadata; 92 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 93 94 if (arch_syscall_match_sym_name(str, "sys_ni_syscall")) 95 return NULL; 96 97 for ( ; start < stop; start++) { 98 if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name)) 99 return *start; 100 } 101 return NULL; 102 } 103 104 static struct syscall_metadata *syscall_nr_to_meta(int nr) 105 { 106 if (IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) 107 return xa_load(&syscalls_metadata_sparse, (unsigned long)nr); 108 109 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) 110 return NULL; 111 112 return syscalls_metadata[nr]; 113 } 114 115 const char *get_syscall_name(int syscall) 116 { 117 struct syscall_metadata *entry; 118 119 entry = syscall_nr_to_meta(syscall); 120 if (!entry) 121 return NULL; 122 123 return entry->name; 124 } 125 126 static enum print_line_t 127 print_syscall_enter(struct trace_iterator *iter, int flags, 128 struct trace_event *event) 129 { 130 struct trace_array *tr = iter->tr; 131 struct trace_seq *s = &iter->seq; 132 struct trace_entry *ent = iter->ent; 133 struct syscall_trace_enter *trace; 134 struct syscall_metadata *entry; 135 int i, syscall; 136 137 trace = (typeof(trace))ent; 138 syscall = trace->nr; 139 entry = syscall_nr_to_meta(syscall); 140 141 if (!entry) 142 goto end; 143 144 if (entry->enter_event->event.type != ent->type) { 145 WARN_ON_ONCE(1); 146 goto end; 147 } 148 149 trace_seq_printf(s, "%s(", entry->name); 150 151 for (i = 0; i < entry->nb_args; i++) { 152 153 if (trace_seq_has_overflowed(s)) 154 goto end; 155 156 /* parameter types */ 157 if (tr && tr->trace_flags & TRACE_ITER_VERBOSE) 158 trace_seq_printf(s, "%s ", entry->types[i]); 159 160 /* parameter values */ 161 trace_seq_printf(s, "%s: %lx%s", entry->args[i], 162 trace->args[i], 163 i == entry->nb_args - 1 ? "" : ", "); 164 } 165 166 trace_seq_putc(s, ')'); 167 end: 168 trace_seq_putc(s, '\n'); 169 170 return trace_handle_return(s); 171 } 172 173 static enum print_line_t 174 print_syscall_exit(struct trace_iterator *iter, int flags, 175 struct trace_event *event) 176 { 177 struct trace_seq *s = &iter->seq; 178 struct trace_entry *ent = iter->ent; 179 struct syscall_trace_exit *trace; 180 int syscall; 181 struct syscall_metadata *entry; 182 183 trace = (typeof(trace))ent; 184 syscall = trace->nr; 185 entry = syscall_nr_to_meta(syscall); 186 187 if (!entry) { 188 trace_seq_putc(s, '\n'); 189 goto out; 190 } 191 192 if (entry->exit_event->event.type != ent->type) { 193 WARN_ON_ONCE(1); 194 return TRACE_TYPE_UNHANDLED; 195 } 196 197 trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 198 trace->ret); 199 200 out: 201 return trace_handle_return(s); 202 } 203 204 #define SYSCALL_FIELD(_type, _name) { \ 205 .type = #_type, .name = #_name, \ 206 .size = sizeof(_type), .align = __alignof__(_type), \ 207 .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER } 208 209 static int __init 210 __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) 211 { 212 int i; 213 int pos = 0; 214 215 /* When len=0, we just calculate the needed length */ 216 #define LEN_OR_ZERO (len ? len - pos : 0) 217 218 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 219 for (i = 0; i < entry->nb_args; i++) { 220 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s", 221 entry->args[i], sizeof(unsigned long), 222 i == entry->nb_args - 1 ? "" : ", "); 223 } 224 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 225 226 for (i = 0; i < entry->nb_args; i++) { 227 pos += snprintf(buf + pos, LEN_OR_ZERO, 228 ", ((unsigned long)(REC->%s))", entry->args[i]); 229 } 230 231 #undef LEN_OR_ZERO 232 233 /* return the length of print_fmt */ 234 return pos; 235 } 236 237 static int __init set_syscall_print_fmt(struct trace_event_call *call) 238 { 239 char *print_fmt; 240 int len; 241 struct syscall_metadata *entry = call->data; 242 243 if (entry->enter_event != call) { 244 call->print_fmt = "\"0x%lx\", REC->ret"; 245 return 0; 246 } 247 248 /* First: called with 0 length to calculate the needed length */ 249 len = __set_enter_print_fmt(entry, NULL, 0); 250 251 print_fmt = kmalloc(len + 1, GFP_KERNEL); 252 if (!print_fmt) 253 return -ENOMEM; 254 255 /* Second: actually write the @print_fmt */ 256 __set_enter_print_fmt(entry, print_fmt, len + 1); 257 call->print_fmt = print_fmt; 258 259 return 0; 260 } 261 262 static void __init free_syscall_print_fmt(struct trace_event_call *call) 263 { 264 struct syscall_metadata *entry = call->data; 265 266 if (entry->enter_event == call) 267 kfree(call->print_fmt); 268 } 269 270 static int __init syscall_enter_define_fields(struct trace_event_call *call) 271 { 272 struct syscall_trace_enter trace; 273 struct syscall_metadata *meta = call->data; 274 int offset = offsetof(typeof(trace), args); 275 int ret = 0; 276 int i; 277 278 for (i = 0; i < meta->nb_args; i++) { 279 ret = trace_define_field(call, meta->types[i], 280 meta->args[i], offset, 281 sizeof(unsigned long), 0, 282 FILTER_OTHER); 283 if (ret) 284 break; 285 offset += sizeof(unsigned long); 286 } 287 288 return ret; 289 } 290 291 static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) 292 { 293 struct trace_array *tr = data; 294 struct trace_event_file *trace_file; 295 struct syscall_trace_enter *entry; 296 struct syscall_metadata *sys_data; 297 struct trace_event_buffer fbuffer; 298 unsigned long args[6]; 299 int syscall_nr; 300 int size; 301 302 /* 303 * Syscall probe called with preemption enabled, but the ring 304 * buffer and per-cpu data require preemption to be disabled. 305 */ 306 might_fault(); 307 guard(preempt_notrace)(); 308 309 syscall_nr = trace_get_syscall_nr(current, regs); 310 if (syscall_nr < 0 || syscall_nr >= NR_syscalls) 311 return; 312 313 /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */ 314 trace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]); 315 if (!trace_file) 316 return; 317 318 if (trace_trigger_soft_disabled(trace_file)) 319 return; 320 321 sys_data = syscall_nr_to_meta(syscall_nr); 322 if (!sys_data) 323 return; 324 325 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 326 327 entry = trace_event_buffer_reserve(&fbuffer, trace_file, size); 328 if (!entry) 329 return; 330 331 entry = ring_buffer_event_data(fbuffer.event); 332 entry->nr = syscall_nr; 333 syscall_get_arguments(current, regs, args); 334 memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args); 335 336 trace_event_buffer_commit(&fbuffer); 337 } 338 339 static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) 340 { 341 struct trace_array *tr = data; 342 struct trace_event_file *trace_file; 343 struct syscall_trace_exit *entry; 344 struct syscall_metadata *sys_data; 345 struct trace_event_buffer fbuffer; 346 int syscall_nr; 347 348 /* 349 * Syscall probe called with preemption enabled, but the ring 350 * buffer and per-cpu data require preemption to be disabled. 351 */ 352 might_fault(); 353 guard(preempt_notrace)(); 354 355 syscall_nr = trace_get_syscall_nr(current, regs); 356 if (syscall_nr < 0 || syscall_nr >= NR_syscalls) 357 return; 358 359 /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */ 360 trace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]); 361 if (!trace_file) 362 return; 363 364 if (trace_trigger_soft_disabled(trace_file)) 365 return; 366 367 sys_data = syscall_nr_to_meta(syscall_nr); 368 if (!sys_data) 369 return; 370 371 entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry)); 372 if (!entry) 373 return; 374 375 entry = ring_buffer_event_data(fbuffer.event); 376 entry->nr = syscall_nr; 377 entry->ret = syscall_get_return_value(current, regs); 378 379 trace_event_buffer_commit(&fbuffer); 380 } 381 382 static int reg_event_syscall_enter(struct trace_event_file *file, 383 struct trace_event_call *call) 384 { 385 struct trace_array *tr = file->tr; 386 int ret = 0; 387 int num; 388 389 num = ((struct syscall_metadata *)call->data)->syscall_nr; 390 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) 391 return -ENOSYS; 392 mutex_lock(&syscall_trace_lock); 393 if (!tr->sys_refcount_enter) 394 ret = register_trace_sys_enter(ftrace_syscall_enter, tr); 395 if (!ret) { 396 rcu_assign_pointer(tr->enter_syscall_files[num], file); 397 tr->sys_refcount_enter++; 398 } 399 mutex_unlock(&syscall_trace_lock); 400 return ret; 401 } 402 403 static void unreg_event_syscall_enter(struct trace_event_file *file, 404 struct trace_event_call *call) 405 { 406 struct trace_array *tr = file->tr; 407 int num; 408 409 num = ((struct syscall_metadata *)call->data)->syscall_nr; 410 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) 411 return; 412 mutex_lock(&syscall_trace_lock); 413 tr->sys_refcount_enter--; 414 RCU_INIT_POINTER(tr->enter_syscall_files[num], NULL); 415 if (!tr->sys_refcount_enter) 416 unregister_trace_sys_enter(ftrace_syscall_enter, tr); 417 mutex_unlock(&syscall_trace_lock); 418 } 419 420 static int reg_event_syscall_exit(struct trace_event_file *file, 421 struct trace_event_call *call) 422 { 423 struct trace_array *tr = file->tr; 424 int ret = 0; 425 int num; 426 427 num = ((struct syscall_metadata *)call->data)->syscall_nr; 428 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) 429 return -ENOSYS; 430 mutex_lock(&syscall_trace_lock); 431 if (!tr->sys_refcount_exit) 432 ret = register_trace_sys_exit(ftrace_syscall_exit, tr); 433 if (!ret) { 434 rcu_assign_pointer(tr->exit_syscall_files[num], file); 435 tr->sys_refcount_exit++; 436 } 437 mutex_unlock(&syscall_trace_lock); 438 return ret; 439 } 440 441 static void unreg_event_syscall_exit(struct trace_event_file *file, 442 struct trace_event_call *call) 443 { 444 struct trace_array *tr = file->tr; 445 int num; 446 447 num = ((struct syscall_metadata *)call->data)->syscall_nr; 448 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) 449 return; 450 mutex_lock(&syscall_trace_lock); 451 tr->sys_refcount_exit--; 452 RCU_INIT_POINTER(tr->exit_syscall_files[num], NULL); 453 if (!tr->sys_refcount_exit) 454 unregister_trace_sys_exit(ftrace_syscall_exit, tr); 455 mutex_unlock(&syscall_trace_lock); 456 } 457 458 static int __init init_syscall_trace(struct trace_event_call *call) 459 { 460 int id; 461 int num; 462 463 num = ((struct syscall_metadata *)call->data)->syscall_nr; 464 if (num < 0 || num >= NR_syscalls) { 465 pr_debug("syscall %s metadata not mapped, disabling ftrace event\n", 466 ((struct syscall_metadata *)call->data)->name); 467 return -ENOSYS; 468 } 469 470 if (set_syscall_print_fmt(call) < 0) 471 return -ENOMEM; 472 473 id = trace_event_raw_init(call); 474 475 if (id < 0) { 476 free_syscall_print_fmt(call); 477 return id; 478 } 479 480 return id; 481 } 482 483 static struct trace_event_fields __refdata syscall_enter_fields_array[] = { 484 SYSCALL_FIELD(int, __syscall_nr), 485 { .type = TRACE_FUNCTION_TYPE, 486 .define_fields = syscall_enter_define_fields }, 487 {} 488 }; 489 490 struct trace_event_functions enter_syscall_print_funcs = { 491 .trace = print_syscall_enter, 492 }; 493 494 struct trace_event_functions exit_syscall_print_funcs = { 495 .trace = print_syscall_exit, 496 }; 497 498 struct trace_event_class __refdata event_class_syscall_enter = { 499 .system = "syscalls", 500 .reg = syscall_enter_register, 501 .fields_array = syscall_enter_fields_array, 502 .get_fields = syscall_get_enter_fields, 503 .raw_init = init_syscall_trace, 504 }; 505 506 struct trace_event_class __refdata event_class_syscall_exit = { 507 .system = "syscalls", 508 .reg = syscall_exit_register, 509 .fields_array = (struct trace_event_fields[]){ 510 SYSCALL_FIELD(int, __syscall_nr), 511 SYSCALL_FIELD(long, ret), 512 {} 513 }, 514 .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), 515 .raw_init = init_syscall_trace, 516 }; 517 518 unsigned long __init __weak arch_syscall_addr(int nr) 519 { 520 return (unsigned long)sys_call_table[nr]; 521 } 522 523 void __init init_ftrace_syscalls(void) 524 { 525 struct syscall_metadata *meta; 526 unsigned long addr; 527 int i; 528 void *ret; 529 530 if (!IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) { 531 syscalls_metadata = kcalloc(NR_syscalls, 532 sizeof(*syscalls_metadata), 533 GFP_KERNEL); 534 if (!syscalls_metadata) { 535 WARN_ON(1); 536 return; 537 } 538 } 539 540 for (i = 0; i < NR_syscalls; i++) { 541 addr = arch_syscall_addr(i); 542 meta = find_syscall_meta(addr); 543 if (!meta) 544 continue; 545 546 meta->syscall_nr = i; 547 548 if (!IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) { 549 syscalls_metadata[i] = meta; 550 } else { 551 ret = xa_store(&syscalls_metadata_sparse, i, meta, 552 GFP_KERNEL); 553 WARN(xa_is_err(ret), 554 "Syscall memory allocation failed\n"); 555 } 556 557 } 558 } 559 560 #ifdef CONFIG_PERF_EVENTS 561 562 static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls); 563 static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); 564 static int sys_perf_refcount_enter; 565 static int sys_perf_refcount_exit; 566 567 static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *regs, 568 struct syscall_metadata *sys_data, 569 struct syscall_trace_enter *rec) 570 { 571 struct syscall_tp_t { 572 struct trace_entry ent; 573 int syscall_nr; 574 unsigned long args[SYSCALL_DEFINE_MAXARGS]; 575 } __aligned(8) param; 576 int i; 577 578 BUILD_BUG_ON(sizeof(param.ent) < sizeof(void *)); 579 580 /* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. ¶m) */ 581 perf_fetch_caller_regs(regs); 582 *(struct pt_regs **)¶m = regs; 583 param.syscall_nr = rec->nr; 584 for (i = 0; i < sys_data->nb_args; i++) 585 param.args[i] = rec->args[i]; 586 return trace_call_bpf(call, ¶m); 587 } 588 589 static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) 590 { 591 struct syscall_metadata *sys_data; 592 struct syscall_trace_enter *rec; 593 struct pt_regs *fake_regs; 594 struct hlist_head *head; 595 unsigned long args[6]; 596 bool valid_prog_array; 597 int syscall_nr; 598 int rctx; 599 int size; 600 601 /* 602 * Syscall probe called with preemption enabled, but the ring 603 * buffer and per-cpu data require preemption to be disabled. 604 */ 605 might_fault(); 606 guard(preempt_notrace)(); 607 608 syscall_nr = trace_get_syscall_nr(current, regs); 609 if (syscall_nr < 0 || syscall_nr >= NR_syscalls) 610 return; 611 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) 612 return; 613 614 sys_data = syscall_nr_to_meta(syscall_nr); 615 if (!sys_data) 616 return; 617 618 head = this_cpu_ptr(sys_data->enter_event->perf_events); 619 valid_prog_array = bpf_prog_array_valid(sys_data->enter_event); 620 if (!valid_prog_array && hlist_empty(head)) 621 return; 622 623 /* get the size after alignment with the u32 buffer size field */ 624 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); 625 size = ALIGN(size + sizeof(u32), sizeof(u64)); 626 size -= sizeof(u32); 627 628 rec = perf_trace_buf_alloc(size, &fake_regs, &rctx); 629 if (!rec) 630 return; 631 632 rec->nr = syscall_nr; 633 syscall_get_arguments(current, regs, args); 634 memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args); 635 636 if ((valid_prog_array && 637 !perf_call_bpf_enter(sys_data->enter_event, fake_regs, sys_data, rec)) || 638 hlist_empty(head)) { 639 perf_swevent_put_recursion_context(rctx); 640 return; 641 } 642 643 perf_trace_buf_submit(rec, size, rctx, 644 sys_data->enter_event->event.type, 1, regs, 645 head, NULL); 646 } 647 648 static int perf_sysenter_enable(struct trace_event_call *call) 649 { 650 int ret = 0; 651 int num; 652 653 num = ((struct syscall_metadata *)call->data)->syscall_nr; 654 655 mutex_lock(&syscall_trace_lock); 656 if (!sys_perf_refcount_enter) 657 ret = register_trace_sys_enter(perf_syscall_enter, NULL); 658 if (ret) { 659 pr_info("event trace: Could not activate syscall entry trace point"); 660 } else { 661 set_bit(num, enabled_perf_enter_syscalls); 662 sys_perf_refcount_enter++; 663 } 664 mutex_unlock(&syscall_trace_lock); 665 return ret; 666 } 667 668 static void perf_sysenter_disable(struct trace_event_call *call) 669 { 670 int num; 671 672 num = ((struct syscall_metadata *)call->data)->syscall_nr; 673 674 mutex_lock(&syscall_trace_lock); 675 sys_perf_refcount_enter--; 676 clear_bit(num, enabled_perf_enter_syscalls); 677 if (!sys_perf_refcount_enter) 678 unregister_trace_sys_enter(perf_syscall_enter, NULL); 679 mutex_unlock(&syscall_trace_lock); 680 } 681 682 static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *regs, 683 struct syscall_trace_exit *rec) 684 { 685 struct syscall_tp_t { 686 struct trace_entry ent; 687 int syscall_nr; 688 unsigned long ret; 689 } __aligned(8) param; 690 691 /* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. ¶m) */ 692 perf_fetch_caller_regs(regs); 693 *(struct pt_regs **)¶m = regs; 694 param.syscall_nr = rec->nr; 695 param.ret = rec->ret; 696 return trace_call_bpf(call, ¶m); 697 } 698 699 static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) 700 { 701 struct syscall_metadata *sys_data; 702 struct syscall_trace_exit *rec; 703 struct pt_regs *fake_regs; 704 struct hlist_head *head; 705 bool valid_prog_array; 706 int syscall_nr; 707 int rctx; 708 int size; 709 710 /* 711 * Syscall probe called with preemption enabled, but the ring 712 * buffer and per-cpu data require preemption to be disabled. 713 */ 714 might_fault(); 715 guard(preempt_notrace)(); 716 717 syscall_nr = trace_get_syscall_nr(current, regs); 718 if (syscall_nr < 0 || syscall_nr >= NR_syscalls) 719 return; 720 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) 721 return; 722 723 sys_data = syscall_nr_to_meta(syscall_nr); 724 if (!sys_data) 725 return; 726 727 head = this_cpu_ptr(sys_data->exit_event->perf_events); 728 valid_prog_array = bpf_prog_array_valid(sys_data->exit_event); 729 if (!valid_prog_array && hlist_empty(head)) 730 return; 731 732 /* We can probably do that at build time */ 733 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); 734 size -= sizeof(u32); 735 736 rec = perf_trace_buf_alloc(size, &fake_regs, &rctx); 737 if (!rec) 738 return; 739 740 rec->nr = syscall_nr; 741 rec->ret = syscall_get_return_value(current, regs); 742 743 if ((valid_prog_array && 744 !perf_call_bpf_exit(sys_data->exit_event, fake_regs, rec)) || 745 hlist_empty(head)) { 746 perf_swevent_put_recursion_context(rctx); 747 return; 748 } 749 750 perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type, 751 1, regs, head, NULL); 752 } 753 754 static int perf_sysexit_enable(struct trace_event_call *call) 755 { 756 int ret = 0; 757 int num; 758 759 num = ((struct syscall_metadata *)call->data)->syscall_nr; 760 761 mutex_lock(&syscall_trace_lock); 762 if (!sys_perf_refcount_exit) 763 ret = register_trace_sys_exit(perf_syscall_exit, NULL); 764 if (ret) { 765 pr_info("event trace: Could not activate syscall exit trace point"); 766 } else { 767 set_bit(num, enabled_perf_exit_syscalls); 768 sys_perf_refcount_exit++; 769 } 770 mutex_unlock(&syscall_trace_lock); 771 return ret; 772 } 773 774 static void perf_sysexit_disable(struct trace_event_call *call) 775 { 776 int num; 777 778 num = ((struct syscall_metadata *)call->data)->syscall_nr; 779 780 mutex_lock(&syscall_trace_lock); 781 sys_perf_refcount_exit--; 782 clear_bit(num, enabled_perf_exit_syscalls); 783 if (!sys_perf_refcount_exit) 784 unregister_trace_sys_exit(perf_syscall_exit, NULL); 785 mutex_unlock(&syscall_trace_lock); 786 } 787 788 #endif /* CONFIG_PERF_EVENTS */ 789 790 static int syscall_enter_register(struct trace_event_call *event, 791 enum trace_reg type, void *data) 792 { 793 struct trace_event_file *file = data; 794 795 switch (type) { 796 case TRACE_REG_REGISTER: 797 return reg_event_syscall_enter(file, event); 798 case TRACE_REG_UNREGISTER: 799 unreg_event_syscall_enter(file, event); 800 return 0; 801 802 #ifdef CONFIG_PERF_EVENTS 803 case TRACE_REG_PERF_REGISTER: 804 return perf_sysenter_enable(event); 805 case TRACE_REG_PERF_UNREGISTER: 806 perf_sysenter_disable(event); 807 return 0; 808 case TRACE_REG_PERF_OPEN: 809 case TRACE_REG_PERF_CLOSE: 810 case TRACE_REG_PERF_ADD: 811 case TRACE_REG_PERF_DEL: 812 return 0; 813 #endif 814 } 815 return 0; 816 } 817 818 static int syscall_exit_register(struct trace_event_call *event, 819 enum trace_reg type, void *data) 820 { 821 struct trace_event_file *file = data; 822 823 switch (type) { 824 case TRACE_REG_REGISTER: 825 return reg_event_syscall_exit(file, event); 826 case TRACE_REG_UNREGISTER: 827 unreg_event_syscall_exit(file, event); 828 return 0; 829 830 #ifdef CONFIG_PERF_EVENTS 831 case TRACE_REG_PERF_REGISTER: 832 return perf_sysexit_enable(event); 833 case TRACE_REG_PERF_UNREGISTER: 834 perf_sysexit_disable(event); 835 return 0; 836 case TRACE_REG_PERF_OPEN: 837 case TRACE_REG_PERF_CLOSE: 838 case TRACE_REG_PERF_ADD: 839 case TRACE_REG_PERF_DEL: 840 return 0; 841 #endif 842 } 843 return 0; 844 } 845