1 #include <traceevent/event-parse.h> 2 #include "builtin.h" 3 #include "util/color.h" 4 #include "util/evlist.h" 5 #include "util/machine.h" 6 #include "util/thread.h" 7 #include "util/parse-options.h" 8 #include "util/strlist.h" 9 #include "util/thread_map.h" 10 11 #include <libaudit.h> 12 #include <stdlib.h> 13 14 static struct syscall_fmt { 15 const char *name; 16 const char *alias; 17 bool errmsg; 18 bool timeout; 19 } syscall_fmts[] = { 20 { .name = "access", .errmsg = true, }, 21 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, 22 { .name = "connect", .errmsg = true, }, 23 { .name = "fstat", .errmsg = true, .alias = "newfstat", }, 24 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, 25 { .name = "futex", .errmsg = true, }, 26 { .name = "open", .errmsg = true, }, 27 { .name = "poll", .errmsg = true, .timeout = true, }, 28 { .name = "ppoll", .errmsg = true, .timeout = true, }, 29 { .name = "read", .errmsg = true, }, 30 { .name = "recvfrom", .errmsg = true, }, 31 { .name = "select", .errmsg = true, .timeout = true, }, 32 { .name = "socket", .errmsg = true, }, 33 { .name = "stat", .errmsg = true, .alias = "newstat", }, 34 }; 35 36 static int syscall_fmt__cmp(const void *name, const void *fmtp) 37 { 38 const struct syscall_fmt *fmt = fmtp; 39 return strcmp(name, fmt->name); 40 } 41 42 static struct syscall_fmt *syscall_fmt__find(const char *name) 43 { 44 const int nmemb = ARRAY_SIZE(syscall_fmts); 45 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 46 } 47 48 struct syscall { 49 struct event_format *tp_format; 50 const char *name; 51 bool filtered; 52 struct syscall_fmt *fmt; 53 }; 54 55 static size_t fprintf_duration(unsigned long t, FILE *fp) 56 { 57 double duration = (double)t / NSEC_PER_MSEC; 58 size_t printed = fprintf(fp, "("); 59 60 if (duration >= 1.0) 61 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); 62 else if (duration >= 0.01) 63 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); 64 else 65 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); 66 return printed + fprintf(stdout, "): "); 67 } 68 69 struct thread_trace { 70 u64 entry_time; 71 u64 exit_time; 72 bool entry_pending; 73 unsigned long nr_events; 74 char *entry_str; 75 double runtime_ms; 76 }; 77 78 static struct thread_trace *thread_trace__new(void) 79 { 80 return zalloc(sizeof(struct thread_trace)); 81 } 82 83 static struct thread_trace *thread__trace(struct thread *thread) 84 { 85 struct thread_trace *ttrace; 86 87 if (thread == NULL) 88 goto fail; 89 90 if (thread->priv == NULL) 91 thread->priv = thread_trace__new(); 92 93 if (thread->priv == NULL) 94 goto fail; 95 96 ttrace = thread->priv; 97 ++ttrace->nr_events; 98 99 return ttrace; 100 fail: 101 color_fprintf(stdout, PERF_COLOR_RED, 102 "WARNING: not enough memory, dropping samples!\n"); 103 return NULL; 104 } 105 106 struct trace { 107 int audit_machine; 108 struct { 109 int max; 110 struct syscall *table; 111 } syscalls; 112 struct perf_record_opts opts; 113 struct machine host; 114 u64 base_time; 115 struct strlist *ev_qualifier; 116 unsigned long nr_events; 117 bool sched; 118 bool multiple_threads; 119 double duration_filter; 120 double runtime_ms; 121 }; 122 123 static bool trace__filter_duration(struct trace *trace, double t) 124 { 125 return t < (trace->duration_filter * NSEC_PER_MSEC); 126 } 127 128 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 129 { 130 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; 131 132 return fprintf(fp, "%10.3f ", ts); 133 } 134 135 static bool done = false; 136 137 static void sig_handler(int sig __maybe_unused) 138 { 139 done = true; 140 } 141 142 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, 143 u64 duration, u64 tstamp, FILE *fp) 144 { 145 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); 146 printed += fprintf_duration(duration, fp); 147 148 if (trace->multiple_threads) 149 printed += fprintf(fp, "%d ", thread->tid); 150 151 return printed; 152 } 153 154 static int trace__process_event(struct machine *machine, union perf_event *event) 155 { 156 int ret = 0; 157 158 switch (event->header.type) { 159 case PERF_RECORD_LOST: 160 color_fprintf(stdout, PERF_COLOR_RED, 161 "LOST %" PRIu64 " events!\n", event->lost.lost); 162 ret = machine__process_lost_event(machine, event); 163 default: 164 ret = machine__process_event(machine, event); 165 break; 166 } 167 168 return ret; 169 } 170 171 static int trace__tool_process(struct perf_tool *tool __maybe_unused, 172 union perf_event *event, 173 struct perf_sample *sample __maybe_unused, 174 struct machine *machine) 175 { 176 return trace__process_event(machine, event); 177 } 178 179 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 180 { 181 int err = symbol__init(); 182 183 if (err) 184 return err; 185 186 machine__init(&trace->host, "", HOST_KERNEL_ID); 187 machine__create_kernel_maps(&trace->host); 188 189 if (perf_target__has_task(&trace->opts.target)) { 190 err = perf_event__synthesize_thread_map(NULL, evlist->threads, 191 trace__tool_process, 192 &trace->host); 193 } else { 194 err = perf_event__synthesize_threads(NULL, trace__tool_process, 195 &trace->host); 196 } 197 198 if (err) 199 symbol__exit(); 200 201 return err; 202 } 203 204 static int trace__read_syscall_info(struct trace *trace, int id) 205 { 206 char tp_name[128]; 207 struct syscall *sc; 208 const char *name = audit_syscall_to_name(id, trace->audit_machine); 209 210 if (name == NULL) 211 return -1; 212 213 if (id > trace->syscalls.max) { 214 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 215 216 if (nsyscalls == NULL) 217 return -1; 218 219 if (trace->syscalls.max != -1) { 220 memset(nsyscalls + trace->syscalls.max + 1, 0, 221 (id - trace->syscalls.max) * sizeof(*sc)); 222 } else { 223 memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); 224 } 225 226 trace->syscalls.table = nsyscalls; 227 trace->syscalls.max = id; 228 } 229 230 sc = trace->syscalls.table + id; 231 sc->name = name; 232 233 if (trace->ev_qualifier && !strlist__find(trace->ev_qualifier, name)) { 234 sc->filtered = true; 235 /* 236 * No need to do read tracepoint information since this will be 237 * filtered out. 238 */ 239 return 0; 240 } 241 242 sc->fmt = syscall_fmt__find(sc->name); 243 244 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 245 sc->tp_format = event_format__new("syscalls", tp_name); 246 247 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { 248 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); 249 sc->tp_format = event_format__new("syscalls", tp_name); 250 } 251 252 return sc->tp_format != NULL ? 0 : -1; 253 } 254 255 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, 256 unsigned long *args) 257 { 258 int i = 0; 259 size_t printed = 0; 260 261 if (sc->tp_format != NULL) { 262 struct format_field *field; 263 264 for (field = sc->tp_format->format.fields->next; field; field = field->next) { 265 printed += scnprintf(bf + printed, size - printed, 266 "%s%s: %ld", printed ? ", " : "", 267 field->name, args[i++]); 268 } 269 } else { 270 while (i < 6) { 271 printed += scnprintf(bf + printed, size - printed, 272 "%sarg%d: %ld", 273 printed ? ", " : "", i, args[i]); 274 ++i; 275 } 276 } 277 278 return printed; 279 } 280 281 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 282 struct perf_sample *sample); 283 284 static struct syscall *trace__syscall_info(struct trace *trace, 285 struct perf_evsel *evsel, 286 struct perf_sample *sample) 287 { 288 int id = perf_evsel__intval(evsel, sample, "id"); 289 290 if (id < 0) { 291 printf("Invalid syscall %d id, skipping...\n", id); 292 return NULL; 293 } 294 295 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 296 trace__read_syscall_info(trace, id)) 297 goto out_cant_read; 298 299 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 300 goto out_cant_read; 301 302 return &trace->syscalls.table[id]; 303 304 out_cant_read: 305 printf("Problems reading syscall %d", id); 306 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 307 printf("(%s)", trace->syscalls.table[id].name); 308 puts(" information"); 309 return NULL; 310 } 311 312 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 313 struct perf_sample *sample) 314 { 315 char *msg; 316 void *args; 317 size_t printed = 0; 318 struct thread *thread; 319 struct syscall *sc = trace__syscall_info(trace, evsel, sample); 320 struct thread_trace *ttrace; 321 322 if (sc == NULL) 323 return -1; 324 325 if (sc->filtered) 326 return 0; 327 328 thread = machine__findnew_thread(&trace->host, sample->tid); 329 ttrace = thread__trace(thread); 330 if (ttrace == NULL) 331 return -1; 332 333 args = perf_evsel__rawptr(evsel, sample, "args"); 334 if (args == NULL) { 335 printf("Problems reading syscall arguments\n"); 336 return -1; 337 } 338 339 ttrace = thread->priv; 340 341 if (ttrace->entry_str == NULL) { 342 ttrace->entry_str = malloc(1024); 343 if (!ttrace->entry_str) 344 return -1; 345 } 346 347 ttrace->entry_time = sample->time; 348 msg = ttrace->entry_str; 349 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); 350 351 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args); 352 353 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) { 354 if (!trace->duration_filter) { 355 trace__fprintf_entry_head(trace, thread, 1, sample->time, stdout); 356 printf("%-70s\n", ttrace->entry_str); 357 } 358 } else 359 ttrace->entry_pending = true; 360 361 return 0; 362 } 363 364 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 365 struct perf_sample *sample) 366 { 367 int ret; 368 u64 duration = 0; 369 struct thread *thread; 370 struct syscall *sc = trace__syscall_info(trace, evsel, sample); 371 struct thread_trace *ttrace; 372 373 if (sc == NULL) 374 return -1; 375 376 if (sc->filtered) 377 return 0; 378 379 thread = machine__findnew_thread(&trace->host, sample->tid); 380 ttrace = thread__trace(thread); 381 if (ttrace == NULL) 382 return -1; 383 384 ret = perf_evsel__intval(evsel, sample, "ret"); 385 386 ttrace = thread->priv; 387 388 ttrace->exit_time = sample->time; 389 390 if (ttrace->entry_time) { 391 duration = sample->time - ttrace->entry_time; 392 if (trace__filter_duration(trace, duration)) 393 goto out; 394 } else if (trace->duration_filter) 395 goto out; 396 397 trace__fprintf_entry_head(trace, thread, duration, sample->time, stdout); 398 399 if (ttrace->entry_pending) { 400 printf("%-70s", ttrace->entry_str); 401 } else { 402 printf(" ... ["); 403 color_fprintf(stdout, PERF_COLOR_YELLOW, "continued"); 404 printf("]: %s()", sc->name); 405 } 406 407 if (ret < 0 && sc->fmt && sc->fmt->errmsg) { 408 char bf[256]; 409 const char *emsg = strerror_r(-ret, bf, sizeof(bf)), 410 *e = audit_errno_to_name(-ret); 411 412 printf(") = -1 %s %s", e, emsg); 413 } else if (ret == 0 && sc->fmt && sc->fmt->timeout) 414 printf(") = 0 Timeout"); 415 else 416 printf(") = %d", ret); 417 418 putchar('\n'); 419 out: 420 ttrace->entry_pending = false; 421 422 return 0; 423 } 424 425 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 426 struct perf_sample *sample) 427 { 428 u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 429 double runtime_ms = (double)runtime / NSEC_PER_MSEC; 430 struct thread *thread = machine__findnew_thread(&trace->host, sample->tid); 431 struct thread_trace *ttrace = thread__trace(thread); 432 433 if (ttrace == NULL) 434 goto out_dump; 435 436 ttrace->runtime_ms += runtime_ms; 437 trace->runtime_ms += runtime_ms; 438 return 0; 439 440 out_dump: 441 printf("%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", 442 evsel->name, 443 perf_evsel__strval(evsel, sample, "comm"), 444 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 445 runtime, 446 perf_evsel__intval(evsel, sample, "vruntime")); 447 return 0; 448 } 449 450 static int trace__run(struct trace *trace, int argc, const char **argv) 451 { 452 struct perf_evlist *evlist = perf_evlist__new(); 453 struct perf_evsel *evsel; 454 int err = -1, i; 455 unsigned long before; 456 const bool forks = argc > 0; 457 458 if (evlist == NULL) { 459 printf("Not enough memory to run!\n"); 460 goto out; 461 } 462 463 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) || 464 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) { 465 printf("Couldn't read the raw_syscalls tracepoints information!\n"); 466 goto out_delete_evlist; 467 } 468 469 if (trace->sched && 470 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 471 trace__sched_stat_runtime)) { 472 printf("Couldn't read the sched_stat_runtime tracepoint information!\n"); 473 goto out_delete_evlist; 474 } 475 476 err = perf_evlist__create_maps(evlist, &trace->opts.target); 477 if (err < 0) { 478 printf("Problems parsing the target to trace, check your options!\n"); 479 goto out_delete_evlist; 480 } 481 482 err = trace__symbols_init(trace, evlist); 483 if (err < 0) { 484 printf("Problems initializing symbol libraries!\n"); 485 goto out_delete_maps; 486 } 487 488 perf_evlist__config(evlist, &trace->opts); 489 490 signal(SIGCHLD, sig_handler); 491 signal(SIGINT, sig_handler); 492 493 if (forks) { 494 err = perf_evlist__prepare_workload(evlist, &trace->opts.target, 495 argv, false, false); 496 if (err < 0) { 497 printf("Couldn't run the workload!\n"); 498 goto out_delete_maps; 499 } 500 } 501 502 err = perf_evlist__open(evlist); 503 if (err < 0) { 504 printf("Couldn't create the events: %s\n", strerror(errno)); 505 goto out_delete_maps; 506 } 507 508 err = perf_evlist__mmap(evlist, UINT_MAX, false); 509 if (err < 0) { 510 printf("Couldn't mmap the events: %s\n", strerror(errno)); 511 goto out_close_evlist; 512 } 513 514 perf_evlist__enable(evlist); 515 516 if (forks) 517 perf_evlist__start_workload(evlist); 518 519 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1; 520 again: 521 before = trace->nr_events; 522 523 for (i = 0; i < evlist->nr_mmaps; i++) { 524 union perf_event *event; 525 526 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { 527 const u32 type = event->header.type; 528 tracepoint_handler handler; 529 struct perf_sample sample; 530 531 ++trace->nr_events; 532 533 err = perf_evlist__parse_sample(evlist, event, &sample); 534 if (err) { 535 printf("Can't parse sample, err = %d, skipping...\n", err); 536 continue; 537 } 538 539 if (trace->base_time == 0) 540 trace->base_time = sample.time; 541 542 if (type != PERF_RECORD_SAMPLE) { 543 trace__process_event(&trace->host, event); 544 continue; 545 } 546 547 evsel = perf_evlist__id2evsel(evlist, sample.id); 548 if (evsel == NULL) { 549 printf("Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); 550 continue; 551 } 552 553 if (sample.raw_data == NULL) { 554 printf("%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 555 perf_evsel__name(evsel), sample.tid, 556 sample.cpu, sample.raw_size); 557 continue; 558 } 559 560 handler = evsel->handler.func; 561 handler(trace, evsel, &sample); 562 } 563 } 564 565 if (trace->nr_events == before) { 566 if (done) 567 goto out_unmap_evlist; 568 569 poll(evlist->pollfd, evlist->nr_fds, -1); 570 } 571 572 if (done) 573 perf_evlist__disable(evlist); 574 575 goto again; 576 577 out_unmap_evlist: 578 perf_evlist__munmap(evlist); 579 out_close_evlist: 580 perf_evlist__close(evlist); 581 out_delete_maps: 582 perf_evlist__delete_maps(evlist); 583 out_delete_evlist: 584 perf_evlist__delete(evlist); 585 out: 586 return err; 587 } 588 589 static size_t trace__fprintf_threads_header(FILE *fp) 590 { 591 size_t printed; 592 593 printed = fprintf(fp, "\n _____________________________________________________________________\n"); 594 printed += fprintf(fp," __) Summary of events (__\n\n"); 595 printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n"); 596 printed += fprintf(fp," _____________________________________________________________________\n\n"); 597 598 return printed; 599 } 600 601 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 602 { 603 size_t printed = trace__fprintf_threads_header(fp); 604 struct rb_node *nd; 605 606 for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) { 607 struct thread *thread = rb_entry(nd, struct thread, rb_node); 608 struct thread_trace *ttrace = thread->priv; 609 const char *color; 610 double ratio; 611 612 if (ttrace == NULL) 613 continue; 614 615 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; 616 617 color = PERF_COLOR_NORMAL; 618 if (ratio > 50.0) 619 color = PERF_COLOR_RED; 620 else if (ratio > 25.0) 621 color = PERF_COLOR_GREEN; 622 else if (ratio > 5.0) 623 color = PERF_COLOR_YELLOW; 624 625 printed += color_fprintf(fp, color, "%20s", thread->comm); 626 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events); 627 printed += color_fprintf(fp, color, "%5.1f%%", ratio); 628 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms); 629 } 630 631 return printed; 632 } 633 634 static int trace__set_duration(const struct option *opt, const char *str, 635 int unset __maybe_unused) 636 { 637 struct trace *trace = opt->value; 638 639 trace->duration_filter = atof(str); 640 return 0; 641 } 642 643 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) 644 { 645 const char * const trace_usage[] = { 646 "perf trace [<options>] [<command>]", 647 "perf trace [<options>] -- <command> [<options>]", 648 NULL 649 }; 650 struct trace trace = { 651 .audit_machine = audit_detect_machine(), 652 .syscalls = { 653 . max = -1, 654 }, 655 .opts = { 656 .target = { 657 .uid = UINT_MAX, 658 .uses_mmap = true, 659 }, 660 .user_freq = UINT_MAX, 661 .user_interval = ULLONG_MAX, 662 .no_delay = true, 663 .mmap_pages = 1024, 664 }, 665 }; 666 const char *ev_qualifier_str = NULL; 667 const struct option trace_options[] = { 668 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", 669 "list of events to trace"), 670 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 671 "trace events on existing process id"), 672 OPT_STRING(0, "tid", &trace.opts.target.tid, "tid", 673 "trace events on existing thread id"), 674 OPT_BOOLEAN(0, "all-cpus", &trace.opts.target.system_wide, 675 "system-wide collection from all CPUs"), 676 OPT_STRING(0, "cpu", &trace.opts.target.cpu_list, "cpu", 677 "list of cpus to monitor"), 678 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, 679 "child tasks do not inherit counters"), 680 OPT_UINTEGER(0, "mmap-pages", &trace.opts.mmap_pages, 681 "number of mmap data pages"), 682 OPT_STRING(0, "uid", &trace.opts.target.uid_str, "user", 683 "user to profile"), 684 OPT_CALLBACK(0, "duration", &trace, "float", 685 "show only events with duration > N.M ms", 686 trace__set_duration), 687 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 688 OPT_END() 689 }; 690 int err; 691 char bf[BUFSIZ]; 692 693 argc = parse_options(argc, argv, trace_options, trace_usage, 0); 694 695 if (ev_qualifier_str != NULL) { 696 trace.ev_qualifier = strlist__new(true, ev_qualifier_str); 697 if (trace.ev_qualifier == NULL) { 698 puts("Not enough memory to parse event qualifier"); 699 return -ENOMEM; 700 } 701 } 702 703 err = perf_target__validate(&trace.opts.target); 704 if (err) { 705 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 706 printf("%s", bf); 707 return err; 708 } 709 710 err = perf_target__parse_uid(&trace.opts.target); 711 if (err) { 712 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 713 printf("%s", bf); 714 return err; 715 } 716 717 if (!argc && perf_target__none(&trace.opts.target)) 718 trace.opts.target.system_wide = true; 719 720 err = trace__run(&trace, argc, argv); 721 722 if (trace.sched && !err) 723 trace__fprintf_thread_summary(&trace, stdout); 724 725 return err; 726 } 727