1 #include <trace/syscall.h> 2 #include <trace/events/syscalls.h> 3 #include <linux/kernel.h> 4 #include <linux/ftrace.h> 5 #include <linux/perf_event.h> 6 #include <asm/syscall.h> 7 8 #include "trace_output.h" 9 #include "trace.h" 10 11 static DEFINE_MUTEX(syscall_trace_lock); 12 static int sys_refcount_enter; 13 static int sys_refcount_exit; 14 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 15 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 16 17 extern unsigned long __start_syscalls_metadata[]; 18 extern unsigned long __stop_syscalls_metadata[]; 19 20 static struct syscall_metadata **syscalls_metadata; 21 22 static struct syscall_metadata *find_syscall_meta(unsigned long syscall) 23 { 24 struct syscall_metadata *start; 25 struct syscall_metadata *stop; 26 char str[KSYM_SYMBOL_LEN]; 27 28 29 start = (struct syscall_metadata *)__start_syscalls_metadata; 30 stop = (struct syscall_metadata *)__stop_syscalls_metadata; 31 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 32 33 for ( ; start < stop; start++) { 34 /* 35 * Only compare after the "sys" prefix. Archs that use 36 * syscall wrappers may have syscalls symbols aliases prefixed 37 * with "SyS" instead of "sys", leading to an unwanted 38 * mismatch. 39 */ 40 if (start->name && !strcmp(start->name + 3, str + 3)) 41 return start; 42 } 43 return NULL; 44 } 45 46 static struct syscall_metadata *syscall_nr_to_meta(int nr) 47 { 48 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) 49 return NULL; 50 51 return syscalls_metadata[nr]; 52 } 53 54 enum print_line_t 55 print_syscall_enter(struct trace_iterator *iter, int flags) 56 { 57 struct trace_seq *s = &iter->seq; 58 struct trace_entry *ent = iter->ent; 59 struct syscall_trace_enter *trace; 60 struct syscall_metadata *entry; 61 int i, ret, syscall; 62 63 trace = (typeof(trace))ent; 64 syscall = trace->nr; 65 entry = syscall_nr_to_meta(syscall); 66 67 if (!entry) 68 goto end; 69 70 if (entry->enter_event->id != ent->type) { 71 WARN_ON_ONCE(1); 72 goto end; 73 } 74 75 ret = trace_seq_printf(s, "%s(", entry->name); 76 if (!ret) 77 return TRACE_TYPE_PARTIAL_LINE; 78 79 for (i = 0; i < entry->nb_args; i++) { 80 /* parameter types */ 81 if (trace_flags & TRACE_ITER_VERBOSE) { 82 ret = trace_seq_printf(s, "%s ", entry->types[i]); 83 if (!ret) 84 return TRACE_TYPE_PARTIAL_LINE; 85 } 86 /* parameter values */ 87 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i], 88 trace->args[i], 89 i == entry->nb_args - 1 ? "" : ", "); 90 if (!ret) 91 return TRACE_TYPE_PARTIAL_LINE; 92 } 93 94 ret = trace_seq_putc(s, ')'); 95 if (!ret) 96 return TRACE_TYPE_PARTIAL_LINE; 97 98 end: 99 ret = trace_seq_putc(s, '\n'); 100 if (!ret) 101 return TRACE_TYPE_PARTIAL_LINE; 102 103 return TRACE_TYPE_HANDLED; 104 } 105 106 enum print_line_t 107 print_syscall_exit(struct trace_iterator *iter, int flags) 108 { 109 struct trace_seq *s = &iter->seq; 110 struct trace_entry *ent = iter->ent; 111 struct syscall_trace_exit *trace; 112 int syscall; 113 struct syscall_metadata *entry; 114 int ret; 115 116 trace = (typeof(trace))ent; 117 syscall = trace->nr; 118 entry = syscall_nr_to_meta(syscall); 119 120 if (!entry) { 121 trace_seq_printf(s, "\n"); 122 return TRACE_TYPE_HANDLED; 123 } 124 125 if (entry->exit_event->id != ent->type) { 126 WARN_ON_ONCE(1); 127 return TRACE_TYPE_UNHANDLED; 128 } 129 130 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 131 trace->ret); 132 if (!ret) 133 return TRACE_TYPE_PARTIAL_LINE; 134 135 return TRACE_TYPE_HANDLED; 136 } 137 138 extern char *__bad_type_size(void); 139 140 #define SYSCALL_FIELD(type, name) \ 141 sizeof(type) != sizeof(trace.name) ? \ 142 __bad_type_size() : \ 143 #type, #name, offsetof(typeof(trace), name), \ 144 sizeof(trace.name), is_signed_type(type) 145 146 int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 147 { 148 int i; 149 int ret; 150 struct syscall_metadata *entry = call->data; 151 struct syscall_trace_enter trace; 152 int offset = offsetof(struct syscall_trace_enter, args); 153 154 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 155 "\tsigned:%u;\n", 156 SYSCALL_FIELD(int, nr)); 157 if (!ret) 158 return 0; 159 160 for (i = 0; i < entry->nb_args; i++) { 161 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], 162 entry->args[i]); 163 if (!ret) 164 return 0; 165 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;" 166 "\tsigned:%u;\n", offset, 167 sizeof(unsigned long), 168 is_signed_type(unsigned long)); 169 if (!ret) 170 return 0; 171 offset += sizeof(unsigned long); 172 } 173 174 trace_seq_puts(s, "\nprint fmt: \""); 175 for (i = 0; i < entry->nb_args; i++) { 176 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], 177 sizeof(unsigned long), 178 i == entry->nb_args - 1 ? "" : ", "); 179 if (!ret) 180 return 0; 181 } 182 trace_seq_putc(s, '"'); 183 184 for (i = 0; i < entry->nb_args; i++) { 185 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))", 186 entry->args[i]); 187 if (!ret) 188 return 0; 189 } 190 191 return trace_seq_putc(s, '\n'); 192 } 193 194 int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) 195 { 196 int ret; 197 struct syscall_trace_exit trace; 198 199 ret = trace_seq_printf(s, 200 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 201 "\tsigned:%u;\n" 202 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 203 "\tsigned:%u;\n", 204 SYSCALL_FIELD(int, nr), 205 SYSCALL_FIELD(long, ret)); 206 if (!ret) 207 return 0; 208 209 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); 210 } 211 212 int syscall_enter_define_fields(struct ftrace_event_call *call) 213 { 214 struct syscall_trace_enter trace; 215 struct syscall_metadata *meta = call->data; 216 int ret; 217 int i; 218 int offset = offsetof(typeof(trace), args); 219 220 ret = trace_define_common_fields(call); 221 if (ret) 222 return ret; 223 224 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 225 if (ret) 226 return ret; 227 228 for (i = 0; i < meta->nb_args; i++) { 229 ret = trace_define_field(call, meta->types[i], 230 meta->args[i], offset, 231 sizeof(unsigned long), 0, 232 FILTER_OTHER); 233 offset += sizeof(unsigned long); 234 } 235 236 return ret; 237 } 238 239 int syscall_exit_define_fields(struct ftrace_event_call *call) 240 { 241 struct syscall_trace_exit trace; 242 int ret; 243 244 ret = trace_define_common_fields(call); 245 if (ret) 246 return ret; 247 248 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 249 if (ret) 250 return ret; 251 252 ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 253 FILTER_OTHER); 254 255 return ret; 256 } 257 258 void ftrace_syscall_enter(struct pt_regs *regs, long id) 259 { 260 struct syscall_trace_enter *entry; 261 struct syscall_metadata *sys_data; 262 struct ring_buffer_event *event; 263 struct ring_buffer *buffer; 264 int size; 265 int syscall_nr; 266 267 syscall_nr = syscall_get_nr(current, regs); 268 if (syscall_nr < 0) 269 return; 270 if (!test_bit(syscall_nr, enabled_enter_syscalls)) 271 return; 272 273 sys_data = syscall_nr_to_meta(syscall_nr); 274 if (!sys_data) 275 return; 276 277 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 278 279 event = trace_current_buffer_lock_reserve(&buffer, 280 sys_data->enter_event->id, size, 0, 0); 281 if (!event) 282 return; 283 284 entry = ring_buffer_event_data(event); 285 entry->nr = syscall_nr; 286 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); 287 288 if (!filter_current_check_discard(buffer, sys_data->enter_event, 289 entry, event)) 290 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 291 } 292 293 void ftrace_syscall_exit(struct pt_regs *regs, long ret) 294 { 295 struct syscall_trace_exit *entry; 296 struct syscall_metadata *sys_data; 297 struct ring_buffer_event *event; 298 struct ring_buffer *buffer; 299 int syscall_nr; 300 301 syscall_nr = syscall_get_nr(current, regs); 302 if (syscall_nr < 0) 303 return; 304 if (!test_bit(syscall_nr, enabled_exit_syscalls)) 305 return; 306 307 sys_data = syscall_nr_to_meta(syscall_nr); 308 if (!sys_data) 309 return; 310 311 event = trace_current_buffer_lock_reserve(&buffer, 312 sys_data->exit_event->id, sizeof(*entry), 0, 0); 313 if (!event) 314 return; 315 316 entry = ring_buffer_event_data(event); 317 entry->nr = syscall_nr; 318 entry->ret = syscall_get_return_value(current, regs); 319 320 if (!filter_current_check_discard(buffer, sys_data->exit_event, 321 entry, event)) 322 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 323 } 324 325 int reg_event_syscall_enter(struct ftrace_event_call *call) 326 { 327 int ret = 0; 328 int num; 329 330 num = ((struct syscall_metadata *)call->data)->syscall_nr; 331 if (num < 0 || num >= NR_syscalls) 332 return -ENOSYS; 333 mutex_lock(&syscall_trace_lock); 334 if (!sys_refcount_enter) 335 ret = register_trace_sys_enter(ftrace_syscall_enter); 336 if (ret) { 337 pr_info("event trace: Could not activate" 338 "syscall entry trace point"); 339 } else { 340 set_bit(num, enabled_enter_syscalls); 341 sys_refcount_enter++; 342 } 343 mutex_unlock(&syscall_trace_lock); 344 return ret; 345 } 346 347 void unreg_event_syscall_enter(struct ftrace_event_call *call) 348 { 349 int num; 350 351 num = ((struct syscall_metadata *)call->data)->syscall_nr; 352 if (num < 0 || num >= NR_syscalls) 353 return; 354 mutex_lock(&syscall_trace_lock); 355 sys_refcount_enter--; 356 clear_bit(num, enabled_enter_syscalls); 357 if (!sys_refcount_enter) 358 unregister_trace_sys_enter(ftrace_syscall_enter); 359 mutex_unlock(&syscall_trace_lock); 360 } 361 362 int reg_event_syscall_exit(struct ftrace_event_call *call) 363 { 364 int ret = 0; 365 int num; 366 367 num = ((struct syscall_metadata *)call->data)->syscall_nr; 368 if (num < 0 || num >= NR_syscalls) 369 return -ENOSYS; 370 mutex_lock(&syscall_trace_lock); 371 if (!sys_refcount_exit) 372 ret = register_trace_sys_exit(ftrace_syscall_exit); 373 if (ret) { 374 pr_info("event trace: Could not activate" 375 "syscall exit trace point"); 376 } else { 377 set_bit(num, enabled_exit_syscalls); 378 sys_refcount_exit++; 379 } 380 mutex_unlock(&syscall_trace_lock); 381 return ret; 382 } 383 384 void unreg_event_syscall_exit(struct ftrace_event_call *call) 385 { 386 int num; 387 388 num = ((struct syscall_metadata *)call->data)->syscall_nr; 389 if (num < 0 || num >= NR_syscalls) 390 return; 391 mutex_lock(&syscall_trace_lock); 392 sys_refcount_exit--; 393 clear_bit(num, enabled_exit_syscalls); 394 if (!sys_refcount_exit) 395 unregister_trace_sys_exit(ftrace_syscall_exit); 396 mutex_unlock(&syscall_trace_lock); 397 } 398 399 int init_syscall_trace(struct ftrace_event_call *call) 400 { 401 int id; 402 403 id = register_ftrace_event(call->event); 404 if (!id) 405 return -ENODEV; 406 call->id = id; 407 INIT_LIST_HEAD(&call->fields); 408 return 0; 409 } 410 411 int __init init_ftrace_syscalls(void) 412 { 413 struct syscall_metadata *meta; 414 unsigned long addr; 415 int i; 416 417 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * 418 NR_syscalls, GFP_KERNEL); 419 if (!syscalls_metadata) { 420 WARN_ON(1); 421 return -ENOMEM; 422 } 423 424 for (i = 0; i < NR_syscalls; i++) { 425 addr = arch_syscall_addr(i); 426 meta = find_syscall_meta(addr); 427 if (!meta) 428 continue; 429 430 meta->syscall_nr = i; 431 syscalls_metadata[i] = meta; 432 } 433 434 return 0; 435 } 436 core_initcall(init_ftrace_syscalls); 437 438 #ifdef CONFIG_EVENT_PROFILE 439 440 static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 441 static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 442 static int sys_prof_refcount_enter; 443 static int sys_prof_refcount_exit; 444 445 static void prof_syscall_enter(struct pt_regs *regs, long id) 446 { 447 struct syscall_metadata *sys_data; 448 struct syscall_trace_enter *rec; 449 unsigned long flags; 450 char *trace_buf; 451 char *raw_data; 452 int syscall_nr; 453 int rctx; 454 int size; 455 int cpu; 456 457 syscall_nr = syscall_get_nr(current, regs); 458 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 459 return; 460 461 sys_data = syscall_nr_to_meta(syscall_nr); 462 if (!sys_data) 463 return; 464 465 /* get the size after alignment with the u32 buffer size field */ 466 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); 467 size = ALIGN(size + sizeof(u32), sizeof(u64)); 468 size -= sizeof(u32); 469 470 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 471 "profile buffer not large enough")) 472 return; 473 474 /* Protect the per cpu buffer, begin the rcu read side */ 475 local_irq_save(flags); 476 477 rctx = perf_swevent_get_recursion_context(); 478 if (rctx < 0) 479 goto end_recursion; 480 481 cpu = smp_processor_id(); 482 483 trace_buf = rcu_dereference(perf_trace_buf); 484 485 if (!trace_buf) 486 goto end; 487 488 raw_data = per_cpu_ptr(trace_buf, cpu); 489 490 /* zero the dead bytes from align to not leak stack to user */ 491 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 492 493 rec = (struct syscall_trace_enter *) raw_data; 494 tracing_generic_entry_update(&rec->ent, 0, 0); 495 rec->ent.type = sys_data->enter_event->id; 496 rec->nr = syscall_nr; 497 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 498 (unsigned long *)&rec->args); 499 perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); 500 501 end: 502 perf_swevent_put_recursion_context(rctx); 503 end_recursion: 504 local_irq_restore(flags); 505 } 506 507 int prof_sysenter_enable(struct ftrace_event_call *call) 508 { 509 int ret = 0; 510 int num; 511 512 num = ((struct syscall_metadata *)call->data)->syscall_nr; 513 514 mutex_lock(&syscall_trace_lock); 515 if (!sys_prof_refcount_enter) 516 ret = register_trace_sys_enter(prof_syscall_enter); 517 if (ret) { 518 pr_info("event trace: Could not activate" 519 "syscall entry trace point"); 520 } else { 521 set_bit(num, enabled_prof_enter_syscalls); 522 sys_prof_refcount_enter++; 523 } 524 mutex_unlock(&syscall_trace_lock); 525 return ret; 526 } 527 528 void prof_sysenter_disable(struct ftrace_event_call *call) 529 { 530 int num; 531 532 num = ((struct syscall_metadata *)call->data)->syscall_nr; 533 534 mutex_lock(&syscall_trace_lock); 535 sys_prof_refcount_enter--; 536 clear_bit(num, enabled_prof_enter_syscalls); 537 if (!sys_prof_refcount_enter) 538 unregister_trace_sys_enter(prof_syscall_enter); 539 mutex_unlock(&syscall_trace_lock); 540 } 541 542 static void prof_syscall_exit(struct pt_regs *regs, long ret) 543 { 544 struct syscall_metadata *sys_data; 545 struct syscall_trace_exit *rec; 546 unsigned long flags; 547 int syscall_nr; 548 char *trace_buf; 549 char *raw_data; 550 int rctx; 551 int size; 552 int cpu; 553 554 syscall_nr = syscall_get_nr(current, regs); 555 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 556 return; 557 558 sys_data = syscall_nr_to_meta(syscall_nr); 559 if (!sys_data) 560 return; 561 562 /* We can probably do that at build time */ 563 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); 564 size -= sizeof(u32); 565 566 /* 567 * Impossible, but be paranoid with the future 568 * How to put this check outside runtime? 569 */ 570 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 571 "exit event has grown above profile buffer size")) 572 return; 573 574 /* Protect the per cpu buffer, begin the rcu read side */ 575 local_irq_save(flags); 576 577 rctx = perf_swevent_get_recursion_context(); 578 if (rctx < 0) 579 goto end_recursion; 580 581 cpu = smp_processor_id(); 582 583 trace_buf = rcu_dereference(perf_trace_buf); 584 585 if (!trace_buf) 586 goto end; 587 588 raw_data = per_cpu_ptr(trace_buf, cpu); 589 590 /* zero the dead bytes from align to not leak stack to user */ 591 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 592 593 rec = (struct syscall_trace_exit *)raw_data; 594 595 tracing_generic_entry_update(&rec->ent, 0, 0); 596 rec->ent.type = sys_data->exit_event->id; 597 rec->nr = syscall_nr; 598 rec->ret = syscall_get_return_value(current, regs); 599 600 perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); 601 602 end: 603 perf_swevent_put_recursion_context(rctx); 604 end_recursion: 605 local_irq_restore(flags); 606 } 607 608 int prof_sysexit_enable(struct ftrace_event_call *call) 609 { 610 int ret = 0; 611 int num; 612 613 num = ((struct syscall_metadata *)call->data)->syscall_nr; 614 615 mutex_lock(&syscall_trace_lock); 616 if (!sys_prof_refcount_exit) 617 ret = register_trace_sys_exit(prof_syscall_exit); 618 if (ret) { 619 pr_info("event trace: Could not activate" 620 "syscall entry trace point"); 621 } else { 622 set_bit(num, enabled_prof_exit_syscalls); 623 sys_prof_refcount_exit++; 624 } 625 mutex_unlock(&syscall_trace_lock); 626 return ret; 627 } 628 629 void prof_sysexit_disable(struct ftrace_event_call *call) 630 { 631 int num; 632 633 num = ((struct syscall_metadata *)call->data)->syscall_nr; 634 635 mutex_lock(&syscall_trace_lock); 636 sys_prof_refcount_exit--; 637 clear_bit(num, enabled_prof_exit_syscalls); 638 if (!sys_prof_refcount_exit) 639 unregister_trace_sys_exit(prof_syscall_exit); 640 mutex_unlock(&syscall_trace_lock); 641 } 642 643 #endif 644 645 646