1 // SPDX-License-Identifier: GPL-2.0 2 #include <trace/syscall.h> 3 #include <trace/events/syscalls.h> 4 #include <linux/syscalls.h> 5 #include <linux/slab.h> 6 #include <linux/kernel.h> 7 #include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */ 8 #include <linux/ftrace.h> 9 #include <linux/perf_event.h> 10 #include <linux/xarray.h> 11 #include <asm/syscall.h> 12 13 #include "trace_output.h" 14 #include "trace.h" 15 16 static DEFINE_MUTEX(syscall_trace_lock); 17 18 static int syscall_enter_register(struct trace_event_call *event, 19 enum trace_reg type, void *data); 20 static int syscall_exit_register(struct trace_event_call *event, 21 enum trace_reg type, void *data); 22 23 static struct list_head * 24 syscall_get_enter_fields(struct trace_event_call *call) 25 { 26 struct syscall_metadata *entry = call->data; 27 28 return &entry->enter_fields; 29 } 30 31 extern struct syscall_metadata *__start_syscalls_metadata[]; 32 extern struct syscall_metadata *__stop_syscalls_metadata[]; 33 34 static DEFINE_XARRAY(syscalls_metadata_sparse); 35 static struct syscall_metadata **syscalls_metadata; 36 37 #ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME 38 static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) 39 { 40 /* 41 * Only compare after the "sys" prefix. Archs that use 42 * syscall wrappers may have syscalls symbols aliases prefixed 43 * with ".SyS" or ".sys" instead of "sys", leading to an unwanted 44 * mismatch. 45 */ 46 return !strcmp(sym + 3, name + 3); 47 } 48 #endif 49 50 #ifdef ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 51 /* 52 * Some architectures that allow for 32bit applications 53 * to run on a 64bit kernel, do not map the syscalls for 54 * the 32bit tasks the same as they do for 64bit tasks. 55 * 56 * *cough*x86*cough* 57 * 58 * In such a case, instead of reporting the wrong syscalls, 59 * simply ignore them. 60 * 61 * For an arch to ignore the compat syscalls it needs to 62 * define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as 63 * define the function arch_trace_is_compat_syscall() to let 64 * the tracing system know that it should ignore it. 65 */ 66 static int 67 trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs) 68 { 69 if (unlikely(arch_trace_is_compat_syscall(regs))) 70 return -1; 71 72 return syscall_get_nr(task, regs); 73 } 74 #else 75 static inline int 76 trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs) 77 { 78 return syscall_get_nr(task, regs); 79 } 80 #endif /* ARCH_TRACE_IGNORE_COMPAT_SYSCALLS */ 81 82 static __init struct syscall_metadata * 83 find_syscall_meta(unsigned long syscall) 84 { 85 struct syscall_metadata **start; 86 struct syscall_metadata **stop; 87 char str[KSYM_SYMBOL_LEN]; 88 89 90 start = __start_syscalls_metadata; 91 stop = __stop_syscalls_metadata; 92 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 93 94 if (arch_syscall_match_sym_name(str, "sys_ni_syscall")) 95 return NULL; 96 97 for ( ; start < stop; start++) { 98 if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name)) 99 return *start; 100 } 101 return NULL; 102 } 103 104 static struct syscall_metadata *syscall_nr_to_meta(int nr) 105 { 106 if (IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) 107 return xa_load(&syscalls_metadata_sparse, (unsigned long)nr); 108 109 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) 110 return NULL; 111 112 return syscalls_metadata[nr]; 113 } 114 115 const char *get_syscall_name(int syscall) 116 { 117 struct syscall_metadata *entry; 118 119 entry = syscall_nr_to_meta(syscall); 120 if (!entry) 121 return NULL; 122 123 return entry->name; 124 } 125 126 static enum print_line_t 127 print_syscall_enter(struct trace_iterator *iter, int flags, 128 struct trace_event *event) 129 { 130 struct trace_array *tr = iter->tr; 131 struct trace_seq *s = &iter->seq; 132 struct trace_entry *ent = iter->ent; 133 struct syscall_trace_enter *trace; 134 struct syscall_metadata *entry; 135 int i, syscall; 136 137 trace = (typeof(trace))ent; 138 syscall = trace->nr; 139 entry = syscall_nr_to_meta(syscall); 140 141 if (!entry) 142 goto end; 143 144 if (entry->enter_event->event.type != ent->type) { 145 WARN_ON_ONCE(1); 146 goto end; 147 } 148 149 trace_seq_printf(s, "%s(", entry->name); 150 151 for (i = 0; i < entry->nb_args; i++) { 152 153 if (trace_seq_has_overflowed(s)) 154 goto end; 155 156 if (i) 157 trace_seq_puts(s, ", "); 158 159 /* parameter types */ 160 if (tr && tr->trace_flags & TRACE_ITER_VERBOSE) 161 trace_seq_printf(s, "%s ", entry->types[i]); 162 163 /* parameter values */ 164 if (trace->args[i] < 10) 165 trace_seq_printf(s, "%s: %lu", entry->args[i], 166 trace->args[i]); 167 else 168 trace_seq_printf(s, "%s: 0x%lx", entry->args[i], 169 trace->args[i]); 170 } 171 172 trace_seq_putc(s, ')'); 173 end: 174 trace_seq_putc(s, '\n'); 175 176 return trace_handle_return(s); 177 } 178 179 static enum print_line_t 180 print_syscall_exit(struct trace_iterator *iter, int flags, 181 struct trace_event *event) 182 { 183 struct trace_seq *s = &iter->seq; 184 struct trace_entry *ent = iter->ent; 185 struct syscall_trace_exit *trace; 186 int syscall; 187 struct syscall_metadata *entry; 188 189 trace = (typeof(trace))ent; 190 syscall = trace->nr; 191 entry = syscall_nr_to_meta(syscall); 192 193 if (!entry) { 194 trace_seq_putc(s, '\n'); 195 goto out; 196 } 197 198 if (entry->exit_event->event.type != ent->type) { 199 WARN_ON_ONCE(1); 200 return TRACE_TYPE_UNHANDLED; 201 } 202 203 trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 204 trace->ret); 205 206 out: 207 return trace_handle_return(s); 208 } 209 210 #define SYSCALL_FIELD(_type, _name) { \ 211 .type = #_type, .name = #_name, \ 212 .size = sizeof(_type), .align = __alignof__(_type), \ 213 .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER } 214 215 static int __init 216 __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) 217 { 218 int i; 219 int pos = 0; 220 221 /* When len=0, we just calculate the needed length */ 222 #define LEN_OR_ZERO (len ? len - pos : 0) 223 224 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 225 for (i = 0; i < entry->nb_args; i++) { 226 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s", 227 entry->args[i], sizeof(unsigned long), 228 i == entry->nb_args - 1 ? "" : ", "); 229 } 230 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 231 232 for (i = 0; i < entry->nb_args; i++) { 233 pos += snprintf(buf + pos, LEN_OR_ZERO, 234 ", ((unsigned long)(REC->%s))", entry->args[i]); 235 } 236 237 #undef LEN_OR_ZERO 238 239 /* return the length of print_fmt */ 240 return pos; 241 } 242 243 static int __init set_syscall_print_fmt(struct trace_event_call *call) 244 { 245 char *print_fmt; 246 int len; 247 struct syscall_metadata *entry = call->data; 248 249 if (entry->enter_event != call) { 250 call->print_fmt = "\"0x%lx\", REC->ret"; 251 return 0; 252 } 253 254 /* First: called with 0 length to calculate the needed length */ 255 len = __set_enter_print_fmt(entry, NULL, 0); 256 257 print_fmt = kmalloc(len + 1, GFP_KERNEL); 258 if (!print_fmt) 259 return -ENOMEM; 260 261 /* Second: actually write the @print_fmt */ 262 __set_enter_print_fmt(entry, print_fmt, len + 1); 263 call->print_fmt = print_fmt; 264 265 return 0; 266 } 267 268 static void __init free_syscall_print_fmt(struct trace_event_call *call) 269 { 270 struct syscall_metadata *entry = call->data; 271 272 if (entry->enter_event == call) 273 kfree(call->print_fmt); 274 } 275 276 static int __init syscall_enter_define_fields(struct trace_event_call *call) 277 { 278 struct syscall_trace_enter trace; 279 struct syscall_metadata *meta = call->data; 280 int offset = offsetof(typeof(trace), args); 281 int ret = 0; 282 int i; 283 284 for (i = 0; i < meta->nb_args; i++) { 285 ret = trace_define_field(call, meta->types[i], 286 meta->args[i], offset, 287 sizeof(unsigned long), 0, 288 FILTER_OTHER); 289 if (ret) 290 break; 291 offset += sizeof(unsigned long); 292 } 293 294 return ret; 295 } 296 297 static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) 298 { 299 struct trace_array *tr = data; 300 struct trace_event_file *trace_file; 301 struct syscall_trace_enter *entry; 302 struct syscall_metadata *sys_data; 303 struct trace_event_buffer fbuffer; 304 unsigned long args[6]; 305 int syscall_nr; 306 int size; 307 308 /* 309 * Syscall probe called with preemption enabled, but the ring 310 * buffer and per-cpu data require preemption to be disabled. 311 */ 312 might_fault(); 313 guard(preempt_notrace)(); 314 315 syscall_nr = trace_get_syscall_nr(current, regs); 316 if (syscall_nr < 0 || syscall_nr >= NR_syscalls) 317 return; 318 319 trace_file = READ_ONCE(tr->enter_syscall_files[syscall_nr]); 320 if (!trace_file) 321 return; 322 323 if (trace_trigger_soft_disabled(trace_file)) 324 return; 325 326 sys_data = syscall_nr_to_meta(syscall_nr); 327 if (!sys_data) 328 return; 329 330 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 331 332 entry = trace_event_buffer_reserve(&fbuffer, trace_file, size); 333 if (!entry) 334 return; 335 336 entry = ring_buffer_event_data(fbuffer.event); 337 entry->nr = syscall_nr; 338 syscall_get_arguments(current, regs, args); 339 memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args); 340 341 trace_event_buffer_commit(&fbuffer); 342 } 343 344 static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) 345 { 346 struct trace_array *tr = data; 347 struct trace_event_file *trace_file; 348 struct syscall_trace_exit *entry; 349 struct syscall_metadata *sys_data; 350 struct trace_event_buffer fbuffer; 351 int syscall_nr; 352 353 /* 354 * Syscall probe called with preemption enabled, but the ring 355 * buffer and per-cpu data require preemption to be disabled. 356 */ 357 might_fault(); 358 guard(preempt_notrace)(); 359 360 syscall_nr = trace_get_syscall_nr(current, regs); 361 if (syscall_nr < 0 || syscall_nr >= NR_syscalls) 362 return; 363 364 trace_file = READ_ONCE(tr->exit_syscall_files[syscall_nr]); 365 if (!trace_file) 366 return; 367 368 if (trace_trigger_soft_disabled(trace_file)) 369 return; 370 371 sys_data = syscall_nr_to_meta(syscall_nr); 372 if (!sys_data) 373 return; 374 375 entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry)); 376 if (!entry) 377 return; 378 379 entry = ring_buffer_event_data(fbuffer.event); 380 entry->nr = syscall_nr; 381 entry->ret = syscall_get_return_value(current, regs); 382 383 trace_event_buffer_commit(&fbuffer); 384 } 385 386 static int reg_event_syscall_enter(struct trace_event_file *file, 387 struct trace_event_call *call) 388 { 389 struct trace_array *tr = file->tr; 390 int ret = 0; 391 int num; 392 393 num = ((struct syscall_metadata *)call->data)->syscall_nr; 394 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) 395 return -ENOSYS; 396 mutex_lock(&syscall_trace_lock); 397 if (!tr->sys_refcount_enter) 398 ret = register_trace_sys_enter(ftrace_syscall_enter, tr); 399 if (!ret) { 400 WRITE_ONCE(tr->enter_syscall_files[num], file); 401 tr->sys_refcount_enter++; 402 } 403 mutex_unlock(&syscall_trace_lock); 404 return ret; 405 } 406 407 static void unreg_event_syscall_enter(struct trace_event_file *file, 408 struct trace_event_call *call) 409 { 410 struct trace_array *tr = file->tr; 411 int num; 412 413 num = ((struct syscall_metadata *)call->data)->syscall_nr; 414 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) 415 return; 416 mutex_lock(&syscall_trace_lock); 417 tr->sys_refcount_enter--; 418 WRITE_ONCE(tr->enter_syscall_files[num], NULL); 419 if (!tr->sys_refcount_enter) 420 unregister_trace_sys_enter(ftrace_syscall_enter, tr); 421 mutex_unlock(&syscall_trace_lock); 422 } 423 424 static int reg_event_syscall_exit(struct trace_event_file *file, 425 struct trace_event_call *call) 426 { 427 struct trace_array *tr = file->tr; 428 int ret = 0; 429 int num; 430 431 num = ((struct syscall_metadata *)call->data)->syscall_nr; 432 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) 433 return -ENOSYS; 434 mutex_lock(&syscall_trace_lock); 435 if (!tr->sys_refcount_exit) 436 ret = register_trace_sys_exit(ftrace_syscall_exit, tr); 437 if (!ret) { 438 WRITE_ONCE(tr->exit_syscall_files[num], file); 439 tr->sys_refcount_exit++; 440 } 441 mutex_unlock(&syscall_trace_lock); 442 return ret; 443 } 444 445 static void unreg_event_syscall_exit(struct trace_event_file *file, 446 struct trace_event_call *call) 447 { 448 struct trace_array *tr = file->tr; 449 int num; 450 451 num = ((struct syscall_metadata *)call->data)->syscall_nr; 452 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) 453 return; 454 mutex_lock(&syscall_trace_lock); 455 tr->sys_refcount_exit--; 456 WRITE_ONCE(tr->exit_syscall_files[num], NULL); 457 if (!tr->sys_refcount_exit) 458 unregister_trace_sys_exit(ftrace_syscall_exit, tr); 459 mutex_unlock(&syscall_trace_lock); 460 } 461 462 static int __init init_syscall_trace(struct trace_event_call *call) 463 { 464 int id; 465 int num; 466 467 num = ((struct syscall_metadata *)call->data)->syscall_nr; 468 if (num < 0 || num >= NR_syscalls) { 469 pr_debug("syscall %s metadata not mapped, disabling ftrace event\n", 470 ((struct syscall_metadata *)call->data)->name); 471 return -ENOSYS; 472 } 473 474 if (set_syscall_print_fmt(call) < 0) 475 return -ENOMEM; 476 477 id = trace_event_raw_init(call); 478 479 if (id < 0) { 480 free_syscall_print_fmt(call); 481 return id; 482 } 483 484 return id; 485 } 486 487 static struct trace_event_fields __refdata syscall_enter_fields_array[] = { 488 SYSCALL_FIELD(int, __syscall_nr), 489 { .type = TRACE_FUNCTION_TYPE, 490 .define_fields = syscall_enter_define_fields }, 491 {} 492 }; 493 494 struct trace_event_functions enter_syscall_print_funcs = { 495 .trace = print_syscall_enter, 496 }; 497 498 struct trace_event_functions exit_syscall_print_funcs = { 499 .trace = print_syscall_exit, 500 }; 501 502 struct trace_event_class __refdata event_class_syscall_enter = { 503 .system = "syscalls", 504 .reg = syscall_enter_register, 505 .fields_array = syscall_enter_fields_array, 506 .get_fields = syscall_get_enter_fields, 507 .raw_init = init_syscall_trace, 508 }; 509 510 struct trace_event_class __refdata event_class_syscall_exit = { 511 .system = "syscalls", 512 .reg = syscall_exit_register, 513 .fields_array = (struct trace_event_fields[]){ 514 SYSCALL_FIELD(int, __syscall_nr), 515 SYSCALL_FIELD(long, ret), 516 {} 517 }, 518 .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), 519 .raw_init = init_syscall_trace, 520 }; 521 522 unsigned long __init __weak arch_syscall_addr(int nr) 523 { 524 return (unsigned long)sys_call_table[nr]; 525 } 526 527 void __init init_ftrace_syscalls(void) 528 { 529 struct syscall_metadata *meta; 530 unsigned long addr; 531 int i; 532 void *ret; 533 534 if (!IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) { 535 syscalls_metadata = kcalloc(NR_syscalls, 536 sizeof(*syscalls_metadata), 537 GFP_KERNEL); 538 if (!syscalls_metadata) { 539 WARN_ON(1); 540 return; 541 } 542 } 543 544 for (i = 0; i < NR_syscalls; i++) { 545 addr = arch_syscall_addr(i); 546 meta = find_syscall_meta(addr); 547 if (!meta) 548 continue; 549 550 meta->syscall_nr = i; 551 552 if (!IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) { 553 syscalls_metadata[i] = meta; 554 } else { 555 ret = xa_store(&syscalls_metadata_sparse, i, meta, 556 GFP_KERNEL); 557 WARN(xa_is_err(ret), 558 "Syscall memory allocation failed\n"); 559 } 560 561 } 562 } 563 564 #ifdef CONFIG_PERF_EVENTS 565 566 static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls); 567 static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); 568 static int sys_perf_refcount_enter; 569 static int sys_perf_refcount_exit; 570 571 static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *regs, 572 struct syscall_metadata *sys_data, 573 struct syscall_trace_enter *rec) 574 { 575 struct syscall_tp_t { 576 struct trace_entry ent; 577 int syscall_nr; 578 unsigned long args[SYSCALL_DEFINE_MAXARGS]; 579 } __aligned(8) param; 580 int i; 581 582 BUILD_BUG_ON(sizeof(param.ent) < sizeof(void *)); 583 584 /* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. ¶m) */ 585 perf_fetch_caller_regs(regs); 586 *(struct pt_regs **)¶m = regs; 587 param.syscall_nr = rec->nr; 588 for (i = 0; i < sys_data->nb_args; i++) 589 param.args[i] = rec->args[i]; 590 return trace_call_bpf(call, ¶m); 591 } 592 593 static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) 594 { 595 struct syscall_metadata *sys_data; 596 struct syscall_trace_enter *rec; 597 struct pt_regs *fake_regs; 598 struct hlist_head *head; 599 unsigned long args[6]; 600 bool valid_prog_array; 601 int syscall_nr; 602 int rctx; 603 int size; 604 605 /* 606 * Syscall probe called with preemption enabled, but the ring 607 * buffer and per-cpu data require preemption to be disabled. 608 */ 609 might_fault(); 610 guard(preempt_notrace)(); 611 612 syscall_nr = trace_get_syscall_nr(current, regs); 613 if (syscall_nr < 0 || syscall_nr >= NR_syscalls) 614 return; 615 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) 616 return; 617 618 sys_data = syscall_nr_to_meta(syscall_nr); 619 if (!sys_data) 620 return; 621 622 head = this_cpu_ptr(sys_data->enter_event->perf_events); 623 valid_prog_array = bpf_prog_array_valid(sys_data->enter_event); 624 if (!valid_prog_array && hlist_empty(head)) 625 return; 626 627 /* get the size after alignment with the u32 buffer size field */ 628 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); 629 size = ALIGN(size + sizeof(u32), sizeof(u64)); 630 size -= sizeof(u32); 631 632 rec = perf_trace_buf_alloc(size, &fake_regs, &rctx); 633 if (!rec) 634 return; 635 636 rec->nr = syscall_nr; 637 syscall_get_arguments(current, regs, args); 638 memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args); 639 640 if ((valid_prog_array && 641 !perf_call_bpf_enter(sys_data->enter_event, fake_regs, sys_data, rec)) || 642 hlist_empty(head)) { 643 perf_swevent_put_recursion_context(rctx); 644 return; 645 } 646 647 perf_trace_buf_submit(rec, size, rctx, 648 sys_data->enter_event->event.type, 1, regs, 649 head, NULL); 650 } 651 652 static int perf_sysenter_enable(struct trace_event_call *call) 653 { 654 int ret = 0; 655 int num; 656 657 num = ((struct syscall_metadata *)call->data)->syscall_nr; 658 659 mutex_lock(&syscall_trace_lock); 660 if (!sys_perf_refcount_enter) 661 ret = register_trace_sys_enter(perf_syscall_enter, NULL); 662 if (ret) { 663 pr_info("event trace: Could not activate syscall entry trace point"); 664 } else { 665 set_bit(num, enabled_perf_enter_syscalls); 666 sys_perf_refcount_enter++; 667 } 668 mutex_unlock(&syscall_trace_lock); 669 return ret; 670 } 671 672 static void perf_sysenter_disable(struct trace_event_call *call) 673 { 674 int num; 675 676 num = ((struct syscall_metadata *)call->data)->syscall_nr; 677 678 mutex_lock(&syscall_trace_lock); 679 sys_perf_refcount_enter--; 680 clear_bit(num, enabled_perf_enter_syscalls); 681 if (!sys_perf_refcount_enter) 682 unregister_trace_sys_enter(perf_syscall_enter, NULL); 683 mutex_unlock(&syscall_trace_lock); 684 } 685 686 static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *regs, 687 struct syscall_trace_exit *rec) 688 { 689 struct syscall_tp_t { 690 struct trace_entry ent; 691 int syscall_nr; 692 unsigned long ret; 693 } __aligned(8) param; 694 695 /* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. ¶m) */ 696 perf_fetch_caller_regs(regs); 697 *(struct pt_regs **)¶m = regs; 698 param.syscall_nr = rec->nr; 699 param.ret = rec->ret; 700 return trace_call_bpf(call, ¶m); 701 } 702 703 static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) 704 { 705 struct syscall_metadata *sys_data; 706 struct syscall_trace_exit *rec; 707 struct pt_regs *fake_regs; 708 struct hlist_head *head; 709 bool valid_prog_array; 710 int syscall_nr; 711 int rctx; 712 int size; 713 714 /* 715 * Syscall probe called with preemption enabled, but the ring 716 * buffer and per-cpu data require preemption to be disabled. 717 */ 718 might_fault(); 719 guard(preempt_notrace)(); 720 721 syscall_nr = trace_get_syscall_nr(current, regs); 722 if (syscall_nr < 0 || syscall_nr >= NR_syscalls) 723 return; 724 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) 725 return; 726 727 sys_data = syscall_nr_to_meta(syscall_nr); 728 if (!sys_data) 729 return; 730 731 head = this_cpu_ptr(sys_data->exit_event->perf_events); 732 valid_prog_array = bpf_prog_array_valid(sys_data->exit_event); 733 if (!valid_prog_array && hlist_empty(head)) 734 return; 735 736 /* We can probably do that at build time */ 737 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); 738 size -= sizeof(u32); 739 740 rec = perf_trace_buf_alloc(size, &fake_regs, &rctx); 741 if (!rec) 742 return; 743 744 rec->nr = syscall_nr; 745 rec->ret = syscall_get_return_value(current, regs); 746 747 if ((valid_prog_array && 748 !perf_call_bpf_exit(sys_data->exit_event, fake_regs, rec)) || 749 hlist_empty(head)) { 750 perf_swevent_put_recursion_context(rctx); 751 return; 752 } 753 754 perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type, 755 1, regs, head, NULL); 756 } 757 758 static int perf_sysexit_enable(struct trace_event_call *call) 759 { 760 int ret = 0; 761 int num; 762 763 num = ((struct syscall_metadata *)call->data)->syscall_nr; 764 765 mutex_lock(&syscall_trace_lock); 766 if (!sys_perf_refcount_exit) 767 ret = register_trace_sys_exit(perf_syscall_exit, NULL); 768 if (ret) { 769 pr_info("event trace: Could not activate syscall exit trace point"); 770 } else { 771 set_bit(num, enabled_perf_exit_syscalls); 772 sys_perf_refcount_exit++; 773 } 774 mutex_unlock(&syscall_trace_lock); 775 return ret; 776 } 777 778 static void perf_sysexit_disable(struct trace_event_call *call) 779 { 780 int num; 781 782 num = ((struct syscall_metadata *)call->data)->syscall_nr; 783 784 mutex_lock(&syscall_trace_lock); 785 sys_perf_refcount_exit--; 786 clear_bit(num, enabled_perf_exit_syscalls); 787 if (!sys_perf_refcount_exit) 788 unregister_trace_sys_exit(perf_syscall_exit, NULL); 789 mutex_unlock(&syscall_trace_lock); 790 } 791 792 #endif /* CONFIG_PERF_EVENTS */ 793 794 static int syscall_enter_register(struct trace_event_call *event, 795 enum trace_reg type, void *data) 796 { 797 struct trace_event_file *file = data; 798 799 switch (type) { 800 case TRACE_REG_REGISTER: 801 return reg_event_syscall_enter(file, event); 802 case TRACE_REG_UNREGISTER: 803 unreg_event_syscall_enter(file, event); 804 return 0; 805 806 #ifdef CONFIG_PERF_EVENTS 807 case TRACE_REG_PERF_REGISTER: 808 return perf_sysenter_enable(event); 809 case TRACE_REG_PERF_UNREGISTER: 810 perf_sysenter_disable(event); 811 return 0; 812 case TRACE_REG_PERF_OPEN: 813 case TRACE_REG_PERF_CLOSE: 814 case TRACE_REG_PERF_ADD: 815 case TRACE_REG_PERF_DEL: 816 return 0; 817 #endif 818 } 819 return 0; 820 } 821 822 static int syscall_exit_register(struct trace_event_call *event, 823 enum trace_reg type, void *data) 824 { 825 struct trace_event_file *file = data; 826 827 switch (type) { 828 case TRACE_REG_REGISTER: 829 return reg_event_syscall_exit(file, event); 830 case TRACE_REG_UNREGISTER: 831 unreg_event_syscall_exit(file, event); 832 return 0; 833 834 #ifdef CONFIG_PERF_EVENTS 835 case TRACE_REG_PERF_REGISTER: 836 return perf_sysexit_enable(event); 837 case TRACE_REG_PERF_UNREGISTER: 838 perf_sysexit_disable(event); 839 return 0; 840 case TRACE_REG_PERF_OPEN: 841 case TRACE_REG_PERF_CLOSE: 842 case TRACE_REG_PERF_ADD: 843 case TRACE_REG_PERF_DEL: 844 return 0; 845 #endif 846 } 847 return 0; 848 } 849