1 #include <trace/syscall.h> 2 #include <trace/events/syscalls.h> 3 #include <linux/slab.h> 4 #include <linux/kernel.h> 5 #include <linux/ftrace.h> 6 #include <linux/perf_event.h> 7 #include <asm/syscall.h> 8 9 #include "trace_output.h" 10 #include "trace.h" 11 12 static DEFINE_MUTEX(syscall_trace_lock); 13 static int sys_refcount_enter; 14 static int sys_refcount_exit; 15 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 16 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 17 18 static int syscall_enter_register(struct ftrace_event_call *event, 19 enum trace_reg type); 20 static int syscall_exit_register(struct ftrace_event_call *event, 21 enum trace_reg type); 22 23 static int syscall_enter_define_fields(struct ftrace_event_call *call); 24 static int syscall_exit_define_fields(struct ftrace_event_call *call); 25 26 static struct list_head * 27 syscall_get_enter_fields(struct ftrace_event_call *call) 28 { 29 struct syscall_metadata *entry = call->data; 30 31 return &entry->enter_fields; 32 } 33 34 static struct list_head * 35 syscall_get_exit_fields(struct ftrace_event_call *call) 36 { 37 struct syscall_metadata *entry = call->data; 38 39 return &entry->exit_fields; 40 } 41 42 struct trace_event_functions enter_syscall_print_funcs = { 43 .trace = print_syscall_enter, 44 }; 45 46 struct trace_event_functions exit_syscall_print_funcs = { 47 .trace = print_syscall_exit, 48 }; 49 50 struct ftrace_event_class event_class_syscall_enter = { 51 .system = "syscalls", 52 .reg = syscall_enter_register, 53 .define_fields = syscall_enter_define_fields, 54 .get_fields = syscall_get_enter_fields, 55 .raw_init = init_syscall_trace, 56 }; 57 58 struct ftrace_event_class event_class_syscall_exit = { 59 .system = "syscalls", 60 .reg = syscall_exit_register, 61 .define_fields = syscall_exit_define_fields, 62 .get_fields = syscall_get_exit_fields, 63 .raw_init = init_syscall_trace, 64 }; 65 66 extern unsigned long __start_syscalls_metadata[]; 67 extern unsigned long __stop_syscalls_metadata[]; 68 69 static struct syscall_metadata **syscalls_metadata; 70 71 static struct syscall_metadata *find_syscall_meta(unsigned long syscall) 72 { 73 struct syscall_metadata *start; 74 struct syscall_metadata *stop; 75 char str[KSYM_SYMBOL_LEN]; 76 77 78 start = (struct syscall_metadata *)__start_syscalls_metadata; 79 stop = (struct syscall_metadata *)__stop_syscalls_metadata; 80 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 81 82 for ( ; start < stop; start++) { 83 /* 84 * Only compare after the "sys" prefix. Archs that use 85 * syscall wrappers may have syscalls symbols aliases prefixed 86 * with "SyS" instead of "sys", leading to an unwanted 87 * mismatch. 88 */ 89 if (start->name && !strcmp(start->name + 3, str + 3)) 90 return start; 91 } 92 return NULL; 93 } 94 95 static struct syscall_metadata *syscall_nr_to_meta(int nr) 96 { 97 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) 98 return NULL; 99 100 return syscalls_metadata[nr]; 101 } 102 103 enum print_line_t 104 print_syscall_enter(struct trace_iterator *iter, int flags, 105 struct trace_event *event) 106 { 107 struct trace_seq *s = &iter->seq; 108 struct trace_entry *ent = iter->ent; 109 struct syscall_trace_enter *trace; 110 struct syscall_metadata *entry; 111 int i, ret, syscall; 112 113 trace = (typeof(trace))ent; 114 syscall = trace->nr; 115 entry = syscall_nr_to_meta(syscall); 116 117 if (!entry) 118 goto end; 119 120 if (entry->enter_event->event.type != ent->type) { 121 WARN_ON_ONCE(1); 122 goto end; 123 } 124 125 ret = trace_seq_printf(s, "%s(", entry->name); 126 if (!ret) 127 return TRACE_TYPE_PARTIAL_LINE; 128 129 for (i = 0; i < entry->nb_args; i++) { 130 /* parameter types */ 131 if (trace_flags & TRACE_ITER_VERBOSE) { 132 ret = trace_seq_printf(s, "%s ", entry->types[i]); 133 if (!ret) 134 return TRACE_TYPE_PARTIAL_LINE; 135 } 136 /* parameter values */ 137 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i], 138 trace->args[i], 139 i == entry->nb_args - 1 ? "" : ", "); 140 if (!ret) 141 return TRACE_TYPE_PARTIAL_LINE; 142 } 143 144 ret = trace_seq_putc(s, ')'); 145 if (!ret) 146 return TRACE_TYPE_PARTIAL_LINE; 147 148 end: 149 ret = trace_seq_putc(s, '\n'); 150 if (!ret) 151 return TRACE_TYPE_PARTIAL_LINE; 152 153 return TRACE_TYPE_HANDLED; 154 } 155 156 enum print_line_t 157 print_syscall_exit(struct trace_iterator *iter, int flags, 158 struct trace_event *event) 159 { 160 struct trace_seq *s = &iter->seq; 161 struct trace_entry *ent = iter->ent; 162 struct syscall_trace_exit *trace; 163 int syscall; 164 struct syscall_metadata *entry; 165 int ret; 166 167 trace = (typeof(trace))ent; 168 syscall = trace->nr; 169 entry = syscall_nr_to_meta(syscall); 170 171 if (!entry) { 172 trace_seq_printf(s, "\n"); 173 return TRACE_TYPE_HANDLED; 174 } 175 176 if (entry->exit_event->event.type != ent->type) { 177 WARN_ON_ONCE(1); 178 return TRACE_TYPE_UNHANDLED; 179 } 180 181 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 182 trace->ret); 183 if (!ret) 184 return TRACE_TYPE_PARTIAL_LINE; 185 186 return TRACE_TYPE_HANDLED; 187 } 188 189 extern char *__bad_type_size(void); 190 191 #define SYSCALL_FIELD(type, name) \ 192 sizeof(type) != sizeof(trace.name) ? \ 193 __bad_type_size() : \ 194 #type, #name, offsetof(typeof(trace), name), \ 195 sizeof(trace.name), is_signed_type(type) 196 197 static 198 int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) 199 { 200 int i; 201 int pos = 0; 202 203 /* When len=0, we just calculate the needed length */ 204 #define LEN_OR_ZERO (len ? len - pos : 0) 205 206 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 207 for (i = 0; i < entry->nb_args; i++) { 208 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s", 209 entry->args[i], sizeof(unsigned long), 210 i == entry->nb_args - 1 ? "" : ", "); 211 } 212 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 213 214 for (i = 0; i < entry->nb_args; i++) { 215 pos += snprintf(buf + pos, LEN_OR_ZERO, 216 ", ((unsigned long)(REC->%s))", entry->args[i]); 217 } 218 219 #undef LEN_OR_ZERO 220 221 /* return the length of print_fmt */ 222 return pos; 223 } 224 225 static int set_syscall_print_fmt(struct ftrace_event_call *call) 226 { 227 char *print_fmt; 228 int len; 229 struct syscall_metadata *entry = call->data; 230 231 if (entry->enter_event != call) { 232 call->print_fmt = "\"0x%lx\", REC->ret"; 233 return 0; 234 } 235 236 /* First: called with 0 length to calculate the needed length */ 237 len = __set_enter_print_fmt(entry, NULL, 0); 238 239 print_fmt = kmalloc(len + 1, GFP_KERNEL); 240 if (!print_fmt) 241 return -ENOMEM; 242 243 /* Second: actually write the @print_fmt */ 244 __set_enter_print_fmt(entry, print_fmt, len + 1); 245 call->print_fmt = print_fmt; 246 247 return 0; 248 } 249 250 static void free_syscall_print_fmt(struct ftrace_event_call *call) 251 { 252 struct syscall_metadata *entry = call->data; 253 254 if (entry->enter_event == call) 255 kfree(call->print_fmt); 256 } 257 258 static int syscall_enter_define_fields(struct ftrace_event_call *call) 259 { 260 struct syscall_trace_enter trace; 261 struct syscall_metadata *meta = call->data; 262 int ret; 263 int i; 264 int offset = offsetof(typeof(trace), args); 265 266 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 267 if (ret) 268 return ret; 269 270 for (i = 0; i < meta->nb_args; i++) { 271 ret = trace_define_field(call, meta->types[i], 272 meta->args[i], offset, 273 sizeof(unsigned long), 0, 274 FILTER_OTHER); 275 offset += sizeof(unsigned long); 276 } 277 278 return ret; 279 } 280 281 static int syscall_exit_define_fields(struct ftrace_event_call *call) 282 { 283 struct syscall_trace_exit trace; 284 int ret; 285 286 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 287 if (ret) 288 return ret; 289 290 ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 291 FILTER_OTHER); 292 293 return ret; 294 } 295 296 void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) 297 { 298 struct syscall_trace_enter *entry; 299 struct syscall_metadata *sys_data; 300 struct ring_buffer_event *event; 301 struct ring_buffer *buffer; 302 int size; 303 int syscall_nr; 304 305 syscall_nr = syscall_get_nr(current, regs); 306 if (syscall_nr < 0) 307 return; 308 if (!test_bit(syscall_nr, enabled_enter_syscalls)) 309 return; 310 311 sys_data = syscall_nr_to_meta(syscall_nr); 312 if (!sys_data) 313 return; 314 315 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 316 317 event = trace_current_buffer_lock_reserve(&buffer, 318 sys_data->enter_event->event.type, size, 0, 0); 319 if (!event) 320 return; 321 322 entry = ring_buffer_event_data(event); 323 entry->nr = syscall_nr; 324 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); 325 326 if (!filter_current_check_discard(buffer, sys_data->enter_event, 327 entry, event)) 328 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 329 } 330 331 void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) 332 { 333 struct syscall_trace_exit *entry; 334 struct syscall_metadata *sys_data; 335 struct ring_buffer_event *event; 336 struct ring_buffer *buffer; 337 int syscall_nr; 338 339 syscall_nr = syscall_get_nr(current, regs); 340 if (syscall_nr < 0) 341 return; 342 if (!test_bit(syscall_nr, enabled_exit_syscalls)) 343 return; 344 345 sys_data = syscall_nr_to_meta(syscall_nr); 346 if (!sys_data) 347 return; 348 349 event = trace_current_buffer_lock_reserve(&buffer, 350 sys_data->exit_event->event.type, sizeof(*entry), 0, 0); 351 if (!event) 352 return; 353 354 entry = ring_buffer_event_data(event); 355 entry->nr = syscall_nr; 356 entry->ret = syscall_get_return_value(current, regs); 357 358 if (!filter_current_check_discard(buffer, sys_data->exit_event, 359 entry, event)) 360 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 361 } 362 363 int reg_event_syscall_enter(struct ftrace_event_call *call) 364 { 365 int ret = 0; 366 int num; 367 368 num = ((struct syscall_metadata *)call->data)->syscall_nr; 369 if (num < 0 || num >= NR_syscalls) 370 return -ENOSYS; 371 mutex_lock(&syscall_trace_lock); 372 if (!sys_refcount_enter) 373 ret = register_trace_sys_enter(ftrace_syscall_enter, NULL); 374 if (!ret) { 375 set_bit(num, enabled_enter_syscalls); 376 sys_refcount_enter++; 377 } 378 mutex_unlock(&syscall_trace_lock); 379 return ret; 380 } 381 382 void unreg_event_syscall_enter(struct ftrace_event_call *call) 383 { 384 int num; 385 386 num = ((struct syscall_metadata *)call->data)->syscall_nr; 387 if (num < 0 || num >= NR_syscalls) 388 return; 389 mutex_lock(&syscall_trace_lock); 390 sys_refcount_enter--; 391 clear_bit(num, enabled_enter_syscalls); 392 if (!sys_refcount_enter) 393 unregister_trace_sys_enter(ftrace_syscall_enter, NULL); 394 mutex_unlock(&syscall_trace_lock); 395 } 396 397 int reg_event_syscall_exit(struct ftrace_event_call *call) 398 { 399 int ret = 0; 400 int num; 401 402 num = ((struct syscall_metadata *)call->data)->syscall_nr; 403 if (num < 0 || num >= NR_syscalls) 404 return -ENOSYS; 405 mutex_lock(&syscall_trace_lock); 406 if (!sys_refcount_exit) 407 ret = register_trace_sys_exit(ftrace_syscall_exit, NULL); 408 if (!ret) { 409 set_bit(num, enabled_exit_syscalls); 410 sys_refcount_exit++; 411 } 412 mutex_unlock(&syscall_trace_lock); 413 return ret; 414 } 415 416 void unreg_event_syscall_exit(struct ftrace_event_call *call) 417 { 418 int num; 419 420 num = ((struct syscall_metadata *)call->data)->syscall_nr; 421 if (num < 0 || num >= NR_syscalls) 422 return; 423 mutex_lock(&syscall_trace_lock); 424 sys_refcount_exit--; 425 clear_bit(num, enabled_exit_syscalls); 426 if (!sys_refcount_exit) 427 unregister_trace_sys_exit(ftrace_syscall_exit, NULL); 428 mutex_unlock(&syscall_trace_lock); 429 } 430 431 int init_syscall_trace(struct ftrace_event_call *call) 432 { 433 int id; 434 435 if (set_syscall_print_fmt(call) < 0) 436 return -ENOMEM; 437 438 id = trace_event_raw_init(call); 439 440 if (id < 0) { 441 free_syscall_print_fmt(call); 442 return id; 443 } 444 445 return id; 446 } 447 448 unsigned long __init arch_syscall_addr(int nr) 449 { 450 return (unsigned long)sys_call_table[nr]; 451 } 452 453 int __init init_ftrace_syscalls(void) 454 { 455 struct syscall_metadata *meta; 456 unsigned long addr; 457 int i; 458 459 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * 460 NR_syscalls, GFP_KERNEL); 461 if (!syscalls_metadata) { 462 WARN_ON(1); 463 return -ENOMEM; 464 } 465 466 for (i = 0; i < NR_syscalls; i++) { 467 addr = arch_syscall_addr(i); 468 meta = find_syscall_meta(addr); 469 if (!meta) 470 continue; 471 472 meta->syscall_nr = i; 473 syscalls_metadata[i] = meta; 474 } 475 476 return 0; 477 } 478 core_initcall(init_ftrace_syscalls); 479 480 #ifdef CONFIG_PERF_EVENTS 481 482 static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls); 483 static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); 484 static int sys_perf_refcount_enter; 485 static int sys_perf_refcount_exit; 486 487 static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) 488 { 489 struct syscall_metadata *sys_data; 490 struct syscall_trace_enter *rec; 491 struct hlist_head *head; 492 int syscall_nr; 493 int rctx; 494 int size; 495 496 syscall_nr = syscall_get_nr(current, regs); 497 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) 498 return; 499 500 sys_data = syscall_nr_to_meta(syscall_nr); 501 if (!sys_data) 502 return; 503 504 /* get the size after alignment with the u32 buffer size field */ 505 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); 506 size = ALIGN(size + sizeof(u32), sizeof(u64)); 507 size -= sizeof(u32); 508 509 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 510 "perf buffer not large enough")) 511 return; 512 513 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, 514 sys_data->enter_event->event.type, regs, &rctx); 515 if (!rec) 516 return; 517 518 rec->nr = syscall_nr; 519 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 520 (unsigned long *)&rec->args); 521 522 head = this_cpu_ptr(sys_data->enter_event->perf_events); 523 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); 524 } 525 526 int perf_sysenter_enable(struct ftrace_event_call *call) 527 { 528 int ret = 0; 529 int num; 530 531 num = ((struct syscall_metadata *)call->data)->syscall_nr; 532 533 mutex_lock(&syscall_trace_lock); 534 if (!sys_perf_refcount_enter) 535 ret = register_trace_sys_enter(perf_syscall_enter, NULL); 536 if (ret) { 537 pr_info("event trace: Could not activate" 538 "syscall entry trace point"); 539 } else { 540 set_bit(num, enabled_perf_enter_syscalls); 541 sys_perf_refcount_enter++; 542 } 543 mutex_unlock(&syscall_trace_lock); 544 return ret; 545 } 546 547 void perf_sysenter_disable(struct ftrace_event_call *call) 548 { 549 int num; 550 551 num = ((struct syscall_metadata *)call->data)->syscall_nr; 552 553 mutex_lock(&syscall_trace_lock); 554 sys_perf_refcount_enter--; 555 clear_bit(num, enabled_perf_enter_syscalls); 556 if (!sys_perf_refcount_enter) 557 unregister_trace_sys_enter(perf_syscall_enter, NULL); 558 mutex_unlock(&syscall_trace_lock); 559 } 560 561 static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) 562 { 563 struct syscall_metadata *sys_data; 564 struct syscall_trace_exit *rec; 565 struct hlist_head *head; 566 int syscall_nr; 567 int rctx; 568 int size; 569 570 syscall_nr = syscall_get_nr(current, regs); 571 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) 572 return; 573 574 sys_data = syscall_nr_to_meta(syscall_nr); 575 if (!sys_data) 576 return; 577 578 /* We can probably do that at build time */ 579 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); 580 size -= sizeof(u32); 581 582 /* 583 * Impossible, but be paranoid with the future 584 * How to put this check outside runtime? 585 */ 586 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 587 "exit event has grown above perf buffer size")) 588 return; 589 590 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, 591 sys_data->exit_event->event.type, regs, &rctx); 592 if (!rec) 593 return; 594 595 rec->nr = syscall_nr; 596 rec->ret = syscall_get_return_value(current, regs); 597 598 head = this_cpu_ptr(sys_data->exit_event->perf_events); 599 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); 600 } 601 602 int perf_sysexit_enable(struct ftrace_event_call *call) 603 { 604 int ret = 0; 605 int num; 606 607 num = ((struct syscall_metadata *)call->data)->syscall_nr; 608 609 mutex_lock(&syscall_trace_lock); 610 if (!sys_perf_refcount_exit) 611 ret = register_trace_sys_exit(perf_syscall_exit, NULL); 612 if (ret) { 613 pr_info("event trace: Could not activate" 614 "syscall exit trace point"); 615 } else { 616 set_bit(num, enabled_perf_exit_syscalls); 617 sys_perf_refcount_exit++; 618 } 619 mutex_unlock(&syscall_trace_lock); 620 return ret; 621 } 622 623 void perf_sysexit_disable(struct ftrace_event_call *call) 624 { 625 int num; 626 627 num = ((struct syscall_metadata *)call->data)->syscall_nr; 628 629 mutex_lock(&syscall_trace_lock); 630 sys_perf_refcount_exit--; 631 clear_bit(num, enabled_perf_exit_syscalls); 632 if (!sys_perf_refcount_exit) 633 unregister_trace_sys_exit(perf_syscall_exit, NULL); 634 mutex_unlock(&syscall_trace_lock); 635 } 636 637 #endif /* CONFIG_PERF_EVENTS */ 638 639 static int syscall_enter_register(struct ftrace_event_call *event, 640 enum trace_reg type) 641 { 642 switch (type) { 643 case TRACE_REG_REGISTER: 644 return reg_event_syscall_enter(event); 645 case TRACE_REG_UNREGISTER: 646 unreg_event_syscall_enter(event); 647 return 0; 648 649 #ifdef CONFIG_PERF_EVENTS 650 case TRACE_REG_PERF_REGISTER: 651 return perf_sysenter_enable(event); 652 case TRACE_REG_PERF_UNREGISTER: 653 perf_sysenter_disable(event); 654 return 0; 655 #endif 656 } 657 return 0; 658 } 659 660 static int syscall_exit_register(struct ftrace_event_call *event, 661 enum trace_reg type) 662 { 663 switch (type) { 664 case TRACE_REG_REGISTER: 665 return reg_event_syscall_exit(event); 666 case TRACE_REG_UNREGISTER: 667 unreg_event_syscall_exit(event); 668 return 0; 669 670 #ifdef CONFIG_PERF_EVENTS 671 case TRACE_REG_PERF_REGISTER: 672 return perf_sysexit_enable(event); 673 case TRACE_REG_PERF_UNREGISTER: 674 perf_sysexit_disable(event); 675 return 0; 676 #endif 677 } 678 return 0; 679 } 680