1 #include <trace/syscall.h> 2 #include <trace/events/syscalls.h> 3 #include <linux/kernel.h> 4 #include <linux/ftrace.h> 5 #include <linux/perf_event.h> 6 #include <asm/syscall.h> 7 8 #include "trace_output.h" 9 #include "trace.h" 10 11 static DEFINE_MUTEX(syscall_trace_lock); 12 static int sys_refcount_enter; 13 static int sys_refcount_exit; 14 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 15 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 16 17 extern unsigned long __start_syscalls_metadata[]; 18 extern unsigned long __stop_syscalls_metadata[]; 19 20 static struct syscall_metadata **syscalls_metadata; 21 22 static struct syscall_metadata *find_syscall_meta(unsigned long syscall) 23 { 24 struct syscall_metadata *start; 25 struct syscall_metadata *stop; 26 char str[KSYM_SYMBOL_LEN]; 27 28 29 start = (struct syscall_metadata *)__start_syscalls_metadata; 30 stop = (struct syscall_metadata *)__stop_syscalls_metadata; 31 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 32 33 for ( ; start < stop; start++) { 34 /* 35 * Only compare after the "sys" prefix. Archs that use 36 * syscall wrappers may have syscalls symbols aliases prefixed 37 * with "SyS" instead of "sys", leading to an unwanted 38 * mismatch. 39 */ 40 if (start->name && !strcmp(start->name + 3, str + 3)) 41 return start; 42 } 43 return NULL; 44 } 45 46 static struct syscall_metadata *syscall_nr_to_meta(int nr) 47 { 48 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) 49 return NULL; 50 51 return syscalls_metadata[nr]; 52 } 53 54 enum print_line_t 55 print_syscall_enter(struct trace_iterator *iter, int flags) 56 { 57 struct trace_seq *s = &iter->seq; 58 struct trace_entry *ent = iter->ent; 59 struct syscall_trace_enter *trace; 60 struct syscall_metadata *entry; 61 int i, ret, syscall; 62 63 trace = (typeof(trace))ent; 64 syscall = trace->nr; 65 entry = syscall_nr_to_meta(syscall); 66 67 if (!entry) 68 goto end; 69 70 if (entry->enter_event->id != ent->type) { 71 WARN_ON_ONCE(1); 72 goto end; 73 } 74 75 ret = trace_seq_printf(s, "%s(", entry->name); 76 if (!ret) 77 return TRACE_TYPE_PARTIAL_LINE; 78 79 for (i = 0; i < entry->nb_args; i++) { 80 /* parameter types */ 81 if (trace_flags & TRACE_ITER_VERBOSE) { 82 ret = trace_seq_printf(s, "%s ", entry->types[i]); 83 if (!ret) 84 return TRACE_TYPE_PARTIAL_LINE; 85 } 86 /* parameter values */ 87 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i], 88 trace->args[i], 89 i == entry->nb_args - 1 ? "" : ", "); 90 if (!ret) 91 return TRACE_TYPE_PARTIAL_LINE; 92 } 93 94 ret = trace_seq_putc(s, ')'); 95 if (!ret) 96 return TRACE_TYPE_PARTIAL_LINE; 97 98 end: 99 ret = trace_seq_putc(s, '\n'); 100 if (!ret) 101 return TRACE_TYPE_PARTIAL_LINE; 102 103 return TRACE_TYPE_HANDLED; 104 } 105 106 enum print_line_t 107 print_syscall_exit(struct trace_iterator *iter, int flags) 108 { 109 struct trace_seq *s = &iter->seq; 110 struct trace_entry *ent = iter->ent; 111 struct syscall_trace_exit *trace; 112 int syscall; 113 struct syscall_metadata *entry; 114 int ret; 115 116 trace = (typeof(trace))ent; 117 syscall = trace->nr; 118 entry = syscall_nr_to_meta(syscall); 119 120 if (!entry) { 121 trace_seq_printf(s, "\n"); 122 return TRACE_TYPE_HANDLED; 123 } 124 125 if (entry->exit_event->id != ent->type) { 126 WARN_ON_ONCE(1); 127 return TRACE_TYPE_UNHANDLED; 128 } 129 130 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, 131 trace->ret); 132 if (!ret) 133 return TRACE_TYPE_PARTIAL_LINE; 134 135 return TRACE_TYPE_HANDLED; 136 } 137 138 extern char *__bad_type_size(void); 139 140 #define SYSCALL_FIELD(type, name) \ 141 sizeof(type) != sizeof(trace.name) ? \ 142 __bad_type_size() : \ 143 #type, #name, offsetof(typeof(trace), name), \ 144 sizeof(trace.name), is_signed_type(type) 145 146 static 147 int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) 148 { 149 int i; 150 int pos = 0; 151 152 /* When len=0, we just calculate the needed length */ 153 #define LEN_OR_ZERO (len ? len - pos : 0) 154 155 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 156 for (i = 0; i < entry->nb_args; i++) { 157 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s", 158 entry->args[i], sizeof(unsigned long), 159 i == entry->nb_args - 1 ? "" : ", "); 160 } 161 pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); 162 163 for (i = 0; i < entry->nb_args; i++) { 164 pos += snprintf(buf + pos, LEN_OR_ZERO, 165 ", ((unsigned long)(REC->%s))", entry->args[i]); 166 } 167 168 #undef LEN_OR_ZERO 169 170 /* return the length of print_fmt */ 171 return pos; 172 } 173 174 static int set_syscall_print_fmt(struct ftrace_event_call *call) 175 { 176 char *print_fmt; 177 int len; 178 struct syscall_metadata *entry = call->data; 179 180 if (entry->enter_event != call) { 181 call->print_fmt = "\"0x%lx\", REC->ret"; 182 return 0; 183 } 184 185 /* First: called with 0 length to calculate the needed length */ 186 len = __set_enter_print_fmt(entry, NULL, 0); 187 188 print_fmt = kmalloc(len + 1, GFP_KERNEL); 189 if (!print_fmt) 190 return -ENOMEM; 191 192 /* Second: actually write the @print_fmt */ 193 __set_enter_print_fmt(entry, print_fmt, len + 1); 194 call->print_fmt = print_fmt; 195 196 return 0; 197 } 198 199 static void free_syscall_print_fmt(struct ftrace_event_call *call) 200 { 201 struct syscall_metadata *entry = call->data; 202 203 if (entry->enter_event == call) 204 kfree(call->print_fmt); 205 } 206 207 int syscall_enter_define_fields(struct ftrace_event_call *call) 208 { 209 struct syscall_trace_enter trace; 210 struct syscall_metadata *meta = call->data; 211 int ret; 212 int i; 213 int offset = offsetof(typeof(trace), args); 214 215 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 216 if (ret) 217 return ret; 218 219 for (i = 0; i < meta->nb_args; i++) { 220 ret = trace_define_field(call, meta->types[i], 221 meta->args[i], offset, 222 sizeof(unsigned long), 0, 223 FILTER_OTHER); 224 offset += sizeof(unsigned long); 225 } 226 227 return ret; 228 } 229 230 int syscall_exit_define_fields(struct ftrace_event_call *call) 231 { 232 struct syscall_trace_exit trace; 233 int ret; 234 235 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 236 if (ret) 237 return ret; 238 239 ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 240 FILTER_OTHER); 241 242 return ret; 243 } 244 245 void ftrace_syscall_enter(struct pt_regs *regs, long id) 246 { 247 struct syscall_trace_enter *entry; 248 struct syscall_metadata *sys_data; 249 struct ring_buffer_event *event; 250 struct ring_buffer *buffer; 251 int size; 252 int syscall_nr; 253 254 syscall_nr = syscall_get_nr(current, regs); 255 if (syscall_nr < 0) 256 return; 257 if (!test_bit(syscall_nr, enabled_enter_syscalls)) 258 return; 259 260 sys_data = syscall_nr_to_meta(syscall_nr); 261 if (!sys_data) 262 return; 263 264 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 265 266 event = trace_current_buffer_lock_reserve(&buffer, 267 sys_data->enter_event->id, size, 0, 0); 268 if (!event) 269 return; 270 271 entry = ring_buffer_event_data(event); 272 entry->nr = syscall_nr; 273 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); 274 275 if (!filter_current_check_discard(buffer, sys_data->enter_event, 276 entry, event)) 277 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 278 } 279 280 void ftrace_syscall_exit(struct pt_regs *regs, long ret) 281 { 282 struct syscall_trace_exit *entry; 283 struct syscall_metadata *sys_data; 284 struct ring_buffer_event *event; 285 struct ring_buffer *buffer; 286 int syscall_nr; 287 288 syscall_nr = syscall_get_nr(current, regs); 289 if (syscall_nr < 0) 290 return; 291 if (!test_bit(syscall_nr, enabled_exit_syscalls)) 292 return; 293 294 sys_data = syscall_nr_to_meta(syscall_nr); 295 if (!sys_data) 296 return; 297 298 event = trace_current_buffer_lock_reserve(&buffer, 299 sys_data->exit_event->id, sizeof(*entry), 0, 0); 300 if (!event) 301 return; 302 303 entry = ring_buffer_event_data(event); 304 entry->nr = syscall_nr; 305 entry->ret = syscall_get_return_value(current, regs); 306 307 if (!filter_current_check_discard(buffer, sys_data->exit_event, 308 entry, event)) 309 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 310 } 311 312 int reg_event_syscall_enter(struct ftrace_event_call *call) 313 { 314 int ret = 0; 315 int num; 316 317 num = ((struct syscall_metadata *)call->data)->syscall_nr; 318 if (num < 0 || num >= NR_syscalls) 319 return -ENOSYS; 320 mutex_lock(&syscall_trace_lock); 321 if (!sys_refcount_enter) 322 ret = register_trace_sys_enter(ftrace_syscall_enter); 323 if (!ret) { 324 set_bit(num, enabled_enter_syscalls); 325 sys_refcount_enter++; 326 } 327 mutex_unlock(&syscall_trace_lock); 328 return ret; 329 } 330 331 void unreg_event_syscall_enter(struct ftrace_event_call *call) 332 { 333 int num; 334 335 num = ((struct syscall_metadata *)call->data)->syscall_nr; 336 if (num < 0 || num >= NR_syscalls) 337 return; 338 mutex_lock(&syscall_trace_lock); 339 sys_refcount_enter--; 340 clear_bit(num, enabled_enter_syscalls); 341 if (!sys_refcount_enter) 342 unregister_trace_sys_enter(ftrace_syscall_enter); 343 mutex_unlock(&syscall_trace_lock); 344 } 345 346 int reg_event_syscall_exit(struct ftrace_event_call *call) 347 { 348 int ret = 0; 349 int num; 350 351 num = ((struct syscall_metadata *)call->data)->syscall_nr; 352 if (num < 0 || num >= NR_syscalls) 353 return -ENOSYS; 354 mutex_lock(&syscall_trace_lock); 355 if (!sys_refcount_exit) 356 ret = register_trace_sys_exit(ftrace_syscall_exit); 357 if (!ret) { 358 set_bit(num, enabled_exit_syscalls); 359 sys_refcount_exit++; 360 } 361 mutex_unlock(&syscall_trace_lock); 362 return ret; 363 } 364 365 void unreg_event_syscall_exit(struct ftrace_event_call *call) 366 { 367 int num; 368 369 num = ((struct syscall_metadata *)call->data)->syscall_nr; 370 if (num < 0 || num >= NR_syscalls) 371 return; 372 mutex_lock(&syscall_trace_lock); 373 sys_refcount_exit--; 374 clear_bit(num, enabled_exit_syscalls); 375 if (!sys_refcount_exit) 376 unregister_trace_sys_exit(ftrace_syscall_exit); 377 mutex_unlock(&syscall_trace_lock); 378 } 379 380 int init_syscall_trace(struct ftrace_event_call *call) 381 { 382 int id; 383 384 if (set_syscall_print_fmt(call) < 0) 385 return -ENOMEM; 386 387 id = trace_event_raw_init(call); 388 389 if (id < 0) { 390 free_syscall_print_fmt(call); 391 return id; 392 } 393 394 return id; 395 } 396 397 unsigned long __init arch_syscall_addr(int nr) 398 { 399 return (unsigned long)sys_call_table[nr]; 400 } 401 402 int __init init_ftrace_syscalls(void) 403 { 404 struct syscall_metadata *meta; 405 unsigned long addr; 406 int i; 407 408 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * 409 NR_syscalls, GFP_KERNEL); 410 if (!syscalls_metadata) { 411 WARN_ON(1); 412 return -ENOMEM; 413 } 414 415 for (i = 0; i < NR_syscalls; i++) { 416 addr = arch_syscall_addr(i); 417 meta = find_syscall_meta(addr); 418 if (!meta) 419 continue; 420 421 meta->syscall_nr = i; 422 syscalls_metadata[i] = meta; 423 } 424 425 return 0; 426 } 427 core_initcall(init_ftrace_syscalls); 428 429 #ifdef CONFIG_PERF_EVENTS 430 431 static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 432 static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 433 static int sys_prof_refcount_enter; 434 static int sys_prof_refcount_exit; 435 436 static void prof_syscall_enter(struct pt_regs *regs, long id) 437 { 438 struct syscall_metadata *sys_data; 439 struct syscall_trace_enter *rec; 440 unsigned long flags; 441 int syscall_nr; 442 int rctx; 443 int size; 444 445 syscall_nr = syscall_get_nr(current, regs); 446 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 447 return; 448 449 sys_data = syscall_nr_to_meta(syscall_nr); 450 if (!sys_data) 451 return; 452 453 /* get the size after alignment with the u32 buffer size field */ 454 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); 455 size = ALIGN(size + sizeof(u32), sizeof(u64)); 456 size -= sizeof(u32); 457 458 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 459 "profile buffer not large enough")) 460 return; 461 462 rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size, 463 sys_data->enter_event->id, &rctx, &flags); 464 if (!rec) 465 return; 466 467 rec->nr = syscall_nr; 468 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 469 (unsigned long *)&rec->args); 470 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); 471 } 472 473 int prof_sysenter_enable(struct ftrace_event_call *call) 474 { 475 int ret = 0; 476 int num; 477 478 num = ((struct syscall_metadata *)call->data)->syscall_nr; 479 480 mutex_lock(&syscall_trace_lock); 481 if (!sys_prof_refcount_enter) 482 ret = register_trace_sys_enter(prof_syscall_enter); 483 if (ret) { 484 pr_info("event trace: Could not activate" 485 "syscall entry trace point"); 486 } else { 487 set_bit(num, enabled_prof_enter_syscalls); 488 sys_prof_refcount_enter++; 489 } 490 mutex_unlock(&syscall_trace_lock); 491 return ret; 492 } 493 494 void prof_sysenter_disable(struct ftrace_event_call *call) 495 { 496 int num; 497 498 num = ((struct syscall_metadata *)call->data)->syscall_nr; 499 500 mutex_lock(&syscall_trace_lock); 501 sys_prof_refcount_enter--; 502 clear_bit(num, enabled_prof_enter_syscalls); 503 if (!sys_prof_refcount_enter) 504 unregister_trace_sys_enter(prof_syscall_enter); 505 mutex_unlock(&syscall_trace_lock); 506 } 507 508 static void prof_syscall_exit(struct pt_regs *regs, long ret) 509 { 510 struct syscall_metadata *sys_data; 511 struct syscall_trace_exit *rec; 512 unsigned long flags; 513 int syscall_nr; 514 int rctx; 515 int size; 516 517 syscall_nr = syscall_get_nr(current, regs); 518 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 519 return; 520 521 sys_data = syscall_nr_to_meta(syscall_nr); 522 if (!sys_data) 523 return; 524 525 /* We can probably do that at build time */ 526 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); 527 size -= sizeof(u32); 528 529 /* 530 * Impossible, but be paranoid with the future 531 * How to put this check outside runtime? 532 */ 533 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 534 "exit event has grown above profile buffer size")) 535 return; 536 537 rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size, 538 sys_data->exit_event->id, &rctx, &flags); 539 if (!rec) 540 return; 541 542 rec->nr = syscall_nr; 543 rec->ret = syscall_get_return_value(current, regs); 544 545 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); 546 } 547 548 int prof_sysexit_enable(struct ftrace_event_call *call) 549 { 550 int ret = 0; 551 int num; 552 553 num = ((struct syscall_metadata *)call->data)->syscall_nr; 554 555 mutex_lock(&syscall_trace_lock); 556 if (!sys_prof_refcount_exit) 557 ret = register_trace_sys_exit(prof_syscall_exit); 558 if (ret) { 559 pr_info("event trace: Could not activate" 560 "syscall exit trace point"); 561 } else { 562 set_bit(num, enabled_prof_exit_syscalls); 563 sys_prof_refcount_exit++; 564 } 565 mutex_unlock(&syscall_trace_lock); 566 return ret; 567 } 568 569 void prof_sysexit_disable(struct ftrace_event_call *call) 570 { 571 int num; 572 573 num = ((struct syscall_metadata *)call->data)->syscall_nr; 574 575 mutex_lock(&syscall_trace_lock); 576 sys_prof_refcount_exit--; 577 clear_bit(num, enabled_prof_exit_syscalls); 578 if (!sys_prof_refcount_exit) 579 unregister_trace_sys_exit(prof_syscall_exit); 580 mutex_unlock(&syscall_trace_lock); 581 } 582 583 #endif /* CONFIG_PERF_EVENTS */ 584 585