1 #include <trace/syscall.h> 2 #include <trace/events/syscalls.h> 3 #include <linux/slab.h> 4 #include <linux/kernel.h> 5 #include <linux/ftrace.h> 6 #include <linux/perf_event.h> 7 #include <asm/syscall.h> 8 9 #include "trace_output.h" 10 #include "trace.h" 11 12 static DEFINE_MUTEX(syscall_trace_lock); 13 static int sys_refcount_enter; 14 static int sys_refcount_exit; 15 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 16 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 17 18 extern unsigned long __start_syscalls_metadata[]; 19 extern unsigned long __stop_syscalls_metadata[]; 20 21 static struct syscall_metadata **syscalls_metadata; 22 23 static struct syscall_metadata *find_syscall_meta(unsigned long syscall) 24 { 25 struct syscall_metadata *start; 26 struct syscall_metadata *stop; 27 char str[KSYM_SYMBOL_LEN]; 28 29 30 start = (struct syscall_metadata *)__start_syscalls_metadata; 31 stop = (struct syscall_metadata *)__stop_syscalls_metadata; 32 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 33 34 for ( ; start < stop; start++) { 35 /* 36 * Only compare after the "sys" prefix. Archs that use 37 * syscall wrappers may have syscalls symbols aliases prefixed 38 * with "SyS" instead of "sys", leading to an unwanted 39 * mismatch. 40 */ 41 if (start->name && !strcmp(start->name + 3, str + 3)) 42 return start; 43 } 44 return NULL; 45 } 46 47 static struct syscall_metadata *syscall_nr_to_meta(int nr) 48 { 49 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) 50 return NULL; 51 52 return syscalls_metadata[nr]; 53 } 54 55 enum print_line_t 56 print_syscall_enter(struct trace_iterator *iter, int flags) 57 { 58 struct trace_seq *s = &iter->seq; 59 struct trace_entry *ent = iter->ent; 60 struct syscall_trace_enter *trace; 61 struct syscall_metadata *entry; 62 int i, ret, syscall; 63 64 trace = (typeof(trace))ent; 65 syscall = trace->nr; 66 entry = syscall_nr_to_meta(syscall); 67 68 if (!entry) 69 goto end; 70 71 if (entry->enter_event->id != ent->type) { 72 WARN_ON_ONCE(1); 73 goto end; 74 } 75 76 ret = trace_seq_printf(s, "%s(", entry->name); 77 if (!ret) 78 return TRACE_TYPE_PARTIAL_LINE; 79 80 for (i = 0; i < entry->nb_args; i++) { 81 /* parameter types */ 82 if (trace_flags & TRACE_ITER_VERBOSE) { 83 ret = trace_seq_printf(s, "%s ", entry->types[i]); 84 if (!ret) 85 return TRACE_TYPE_PARTIAL_LINE; 86 } 87 /* parameter values */ 88 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i], 89 trace->args[i], 90 i == entry->nb_args - 1 ? "" : ", "); 91 if (!ret) 92 return TRACE_TYPE_PARTIAL_LINE; 93 } 94 95 ret = trace_seq_putc(s, ')'); 96 if (!ret) 97 return TRACE_TYPE_PARTIAL_LINE; 98 99 end: 100 ret = trace_seq_putc(s, '\n'); 101 if (!ret) 102 return TRACE_TYPE_PARTIAL_LINE; 103 104 return TRACE_TYPE_HANDLED; 105 } 106 107 enum print_line_t 108 print_syscall_exit(struct trace_iterator *iter, int flags) 109 { 110 struct trace_seq *s = &iter->seq; 111 struct trace_entry *ent = iter->ent; 112 struct syscall_trace_exit *trace; 113 int syscall; 114 struct syscall_metadata *entry; 115 int ret; 116 117 trace = (typeof(trace))ent; 118 syscall = trace->nr; 119 entry = syscall_nr_to_meta(syscall); 120 121 if (!entry) { 122 trace_seq_printf(s, "\n"); 123 return TRACE_TYPE_HANDLED; 124 } 125 126 if (entry->exit_event->id != ent->type) { 127 WARN_ON_ONCE(1); 128 return TRACE_TYPE_UNHANDLED; 129 } 130 131 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 132 trace->ret); 133 if (!ret) 134 return TRACE_TYPE_PARTIAL_LINE; 135 136 return TRACE_TYPE_HANDLED; 137 } 138 139 extern char *__bad_type_size(void); 140 141 #define SYSCALL_FIELD(type, name) \ 142 sizeof(type) != sizeof(trace.name) ? \ 143 __bad_type_size() : \ 144 #type, #name, offsetof(typeof(trace), name), \ 145 sizeof(trace.name), is_signed_type(type) 146 147 static 148 int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) 149 { 150 int i; 151 int pos = 0; 152 153 /* When len=0, we just calculate the needed length */ 154 #define LEN_OR_ZERO (len ? len - pos : 0) 155 156 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 157 for (i = 0; i < entry->nb_args; i++) { 158 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s", 159 entry->args[i], sizeof(unsigned long), 160 i == entry->nb_args - 1 ? "" : ", "); 161 } 162 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 163 164 for (i = 0; i < entry->nb_args; i++) { 165 pos += snprintf(buf + pos, LEN_OR_ZERO, 166 ", ((unsigned long)(REC->%s))", entry->args[i]); 167 } 168 169 #undef LEN_OR_ZERO 170 171 /* return the length of print_fmt */ 172 return pos; 173 } 174 175 static int set_syscall_print_fmt(struct ftrace_event_call *call) 176 { 177 char *print_fmt; 178 int len; 179 struct syscall_metadata *entry = call->data; 180 181 if (entry->enter_event != call) { 182 call->print_fmt = "\"0x%lx\", REC->ret"; 183 return 0; 184 } 185 186 /* First: called with 0 length to calculate the needed length */ 187 len = __set_enter_print_fmt(entry, NULL, 0); 188 189 print_fmt = kmalloc(len + 1, GFP_KERNEL); 190 if (!print_fmt) 191 return -ENOMEM; 192 193 /* Second: actually write the @print_fmt */ 194 __set_enter_print_fmt(entry, print_fmt, len + 1); 195 call->print_fmt = print_fmt; 196 197 return 0; 198 } 199 200 static void free_syscall_print_fmt(struct ftrace_event_call *call) 201 { 202 struct syscall_metadata *entry = call->data; 203 204 if (entry->enter_event == call) 205 kfree(call->print_fmt); 206 } 207 208 int syscall_enter_define_fields(struct ftrace_event_call *call) 209 { 210 struct syscall_trace_enter trace; 211 struct syscall_metadata *meta = call->data; 212 int ret; 213 int i; 214 int offset = offsetof(typeof(trace), args); 215 216 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 217 if (ret) 218 return ret; 219 220 for (i = 0; i < meta->nb_args; i++) { 221 ret = trace_define_field(call, meta->types[i], 222 meta->args[i], offset, 223 sizeof(unsigned long), 0, 224 FILTER_OTHER); 225 offset += sizeof(unsigned long); 226 } 227 228 return ret; 229 } 230 231 int syscall_exit_define_fields(struct ftrace_event_call *call) 232 { 233 struct syscall_trace_exit trace; 234 int ret; 235 236 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 237 if (ret) 238 return ret; 239 240 ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 241 FILTER_OTHER); 242 243 return ret; 244 } 245 246 void ftrace_syscall_enter(struct pt_regs *regs, long id) 247 { 248 struct syscall_trace_enter *entry; 249 struct syscall_metadata *sys_data; 250 struct ring_buffer_event *event; 251 struct ring_buffer *buffer; 252 int size; 253 int syscall_nr; 254 255 syscall_nr = syscall_get_nr(current, regs); 256 if (syscall_nr < 0) 257 return; 258 if (!test_bit(syscall_nr, enabled_enter_syscalls)) 259 return; 260 261 sys_data = syscall_nr_to_meta(syscall_nr); 262 if (!sys_data) 263 return; 264 265 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 266 267 event = trace_current_buffer_lock_reserve(&buffer, 268 sys_data->enter_event->id, size, 0, 0); 269 if (!event) 270 return; 271 272 entry = ring_buffer_event_data(event); 273 entry->nr = syscall_nr; 274 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); 275 276 if (!filter_current_check_discard(buffer, sys_data->enter_event, 277 entry, event)) 278 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 279 } 280 281 void ftrace_syscall_exit(struct pt_regs *regs, long ret) 282 { 283 struct syscall_trace_exit *entry; 284 struct syscall_metadata *sys_data; 285 struct ring_buffer_event *event; 286 struct ring_buffer *buffer; 287 int syscall_nr; 288 289 syscall_nr = syscall_get_nr(current, regs); 290 if (syscall_nr < 0) 291 return; 292 if (!test_bit(syscall_nr, enabled_exit_syscalls)) 293 return; 294 295 sys_data = syscall_nr_to_meta(syscall_nr); 296 if (!sys_data) 297 return; 298 299 event = trace_current_buffer_lock_reserve(&buffer, 300 sys_data->exit_event->id, sizeof(*entry), 0, 0); 301 if (!event) 302 return; 303 304 entry = ring_buffer_event_data(event); 305 entry->nr = syscall_nr; 306 entry->ret = syscall_get_return_value(current, regs); 307 308 if (!filter_current_check_discard(buffer, sys_data->exit_event, 309 entry, event)) 310 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 311 } 312 313 int reg_event_syscall_enter(struct ftrace_event_call *call) 314 { 315 int ret = 0; 316 int num; 317 318 num = ((struct syscall_metadata *)call->data)->syscall_nr; 319 if (num < 0 || num >= NR_syscalls) 320 return -ENOSYS; 321 mutex_lock(&syscall_trace_lock); 322 if (!sys_refcount_enter) 323 ret = register_trace_sys_enter(ftrace_syscall_enter); 324 if (!ret) { 325 set_bit(num, enabled_enter_syscalls); 326 sys_refcount_enter++; 327 } 328 mutex_unlock(&syscall_trace_lock); 329 return ret; 330 } 331 332 void unreg_event_syscall_enter(struct ftrace_event_call *call) 333 { 334 int num; 335 336 num = ((struct syscall_metadata *)call->data)->syscall_nr; 337 if (num < 0 || num >= NR_syscalls) 338 return; 339 mutex_lock(&syscall_trace_lock); 340 sys_refcount_enter--; 341 clear_bit(num, enabled_enter_syscalls); 342 if (!sys_refcount_enter) 343 unregister_trace_sys_enter(ftrace_syscall_enter); 344 mutex_unlock(&syscall_trace_lock); 345 } 346 347 int reg_event_syscall_exit(struct ftrace_event_call *call) 348 { 349 int ret = 0; 350 int num; 351 352 num = ((struct syscall_metadata *)call->data)->syscall_nr; 353 if (num < 0 || num >= NR_syscalls) 354 return -ENOSYS; 355 mutex_lock(&syscall_trace_lock); 356 if (!sys_refcount_exit) 357 ret = register_trace_sys_exit(ftrace_syscall_exit); 358 if (!ret) { 359 set_bit(num, enabled_exit_syscalls); 360 sys_refcount_exit++; 361 } 362 mutex_unlock(&syscall_trace_lock); 363 return ret; 364 } 365 366 void unreg_event_syscall_exit(struct ftrace_event_call *call) 367 { 368 int num; 369 370 num = ((struct syscall_metadata *)call->data)->syscall_nr; 371 if (num < 0 || num >= NR_syscalls) 372 return; 373 mutex_lock(&syscall_trace_lock); 374 sys_refcount_exit--; 375 clear_bit(num, enabled_exit_syscalls); 376 if (!sys_refcount_exit) 377 unregister_trace_sys_exit(ftrace_syscall_exit); 378 mutex_unlock(&syscall_trace_lock); 379 } 380 381 int init_syscall_trace(struct ftrace_event_call *call) 382 { 383 int id; 384 385 if (set_syscall_print_fmt(call) < 0) 386 return -ENOMEM; 387 388 id = trace_event_raw_init(call); 389 390 if (id < 0) { 391 free_syscall_print_fmt(call); 392 return id; 393 } 394 395 return id; 396 } 397 398 unsigned long __init arch_syscall_addr(int nr) 399 { 400 return (unsigned long)sys_call_table[nr]; 401 } 402 403 int __init init_ftrace_syscalls(void) 404 { 405 struct syscall_metadata *meta; 406 unsigned long addr; 407 int i; 408 409 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * 410 NR_syscalls, GFP_KERNEL); 411 if (!syscalls_metadata) { 412 WARN_ON(1); 413 return -ENOMEM; 414 } 415 416 for (i = 0; i < NR_syscalls; i++) { 417 addr = arch_syscall_addr(i); 418 meta = find_syscall_meta(addr); 419 if (!meta) 420 continue; 421 422 meta->syscall_nr = i; 423 syscalls_metadata[i] = meta; 424 } 425 426 return 0; 427 } 428 core_initcall(init_ftrace_syscalls); 429 430 #ifdef CONFIG_PERF_EVENTS 431 432 static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls); 433 static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); 434 static int sys_perf_refcount_enter; 435 static int sys_perf_refcount_exit; 436 437 static void perf_syscall_enter(struct pt_regs *regs, long id) 438 { 439 struct syscall_metadata *sys_data; 440 struct syscall_trace_enter *rec; 441 unsigned long flags; 442 int syscall_nr; 443 int rctx; 444 int size; 445 446 syscall_nr = syscall_get_nr(current, regs); 447 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) 448 return; 449 450 sys_data = syscall_nr_to_meta(syscall_nr); 451 if (!sys_data) 452 return; 453 454 /* get the size after alignment with the u32 buffer size field */ 455 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); 456 size = ALIGN(size + sizeof(u32), sizeof(u64)); 457 size -= sizeof(u32); 458 459 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 460 "perf buffer not large enough")) 461 return; 462 463 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, 464 sys_data->enter_event->id, &rctx, &flags); 465 if (!rec) 466 return; 467 468 rec->nr = syscall_nr; 469 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 470 (unsigned long *)&rec->args); 471 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs); 472 } 473 474 int perf_sysenter_enable(struct ftrace_event_call *call) 475 { 476 int ret = 0; 477 int num; 478 479 num = ((struct syscall_metadata *)call->data)->syscall_nr; 480 481 mutex_lock(&syscall_trace_lock); 482 if (!sys_perf_refcount_enter) 483 ret = register_trace_sys_enter(perf_syscall_enter); 484 if (ret) { 485 pr_info("event trace: Could not activate" 486 "syscall entry trace point"); 487 } else { 488 set_bit(num, enabled_perf_enter_syscalls); 489 sys_perf_refcount_enter++; 490 } 491 mutex_unlock(&syscall_trace_lock); 492 return ret; 493 } 494 495 void perf_sysenter_disable(struct ftrace_event_call *call) 496 { 497 int num; 498 499 num = ((struct syscall_metadata *)call->data)->syscall_nr; 500 501 mutex_lock(&syscall_trace_lock); 502 sys_perf_refcount_enter--; 503 clear_bit(num, enabled_perf_enter_syscalls); 504 if (!sys_perf_refcount_enter) 505 unregister_trace_sys_enter(perf_syscall_enter); 506 mutex_unlock(&syscall_trace_lock); 507 } 508 509 static void perf_syscall_exit(struct pt_regs *regs, long ret) 510 { 511 struct syscall_metadata *sys_data; 512 struct syscall_trace_exit *rec; 513 unsigned long flags; 514 int syscall_nr; 515 int rctx; 516 int size; 517 518 syscall_nr = syscall_get_nr(current, regs); 519 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) 520 return; 521 522 sys_data = syscall_nr_to_meta(syscall_nr); 523 if (!sys_data) 524 return; 525 526 /* We can probably do that at build time */ 527 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); 528 size -= sizeof(u32); 529 530 /* 531 * Impossible, but be paranoid with the future 532 * How to put this check outside runtime? 533 */ 534 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 535 "exit event has grown above perf buffer size")) 536 return; 537 538 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, 539 sys_data->exit_event->id, &rctx, &flags); 540 if (!rec) 541 return; 542 543 rec->nr = syscall_nr; 544 rec->ret = syscall_get_return_value(current, regs); 545 546 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs); 547 } 548 549 int perf_sysexit_enable(struct ftrace_event_call *call) 550 { 551 int ret = 0; 552 int num; 553 554 num = ((struct syscall_metadata *)call->data)->syscall_nr; 555 556 mutex_lock(&syscall_trace_lock); 557 if (!sys_perf_refcount_exit) 558 ret = register_trace_sys_exit(perf_syscall_exit); 559 if (ret) { 560 pr_info("event trace: Could not activate" 561 "syscall exit trace point"); 562 } else { 563 set_bit(num, enabled_perf_exit_syscalls); 564 sys_perf_refcount_exit++; 565 } 566 mutex_unlock(&syscall_trace_lock); 567 return ret; 568 } 569 570 void perf_sysexit_disable(struct ftrace_event_call *call) 571 { 572 int num; 573 574 num = ((struct syscall_metadata *)call->data)->syscall_nr; 575 576 mutex_lock(&syscall_trace_lock); 577 sys_perf_refcount_exit--; 578 clear_bit(num, enabled_perf_exit_syscalls); 579 if (!sys_perf_refcount_exit) 580 unregister_trace_sys_exit(perf_syscall_exit); 581 mutex_unlock(&syscall_trace_lock); 582 } 583 584 #endif /* CONFIG_PERF_EVENTS */ 585 586